com.threadswarm.imagefeedarchiver.parser.RssDOMFeedParser.java Source code

Java tutorial

Introduction

Here is the source code for com.threadswarm.imagefeedarchiver.parser.RssDOMFeedParser.java

Source

/*
 * Copyright 2014 steve(at)threadswarm.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.threadswarm.imagefeedarchiver.parser;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.LinkedList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import com.threadswarm.imagefeedarchiver.model.RssChannel;
import com.threadswarm.imagefeedarchiver.model.RssItem;
import com.threadswarm.imagefeedarchiver.model.RssMediaContent;

public class RssDOMFeedParser implements FeedParser {

    private final static Logger LOGGER = LoggerFactory.getLogger(RssDOMFeedParser.class);

    private final static ThreadLocal<DocumentBuilder> docBuilderLocal = new ThreadLocal<DocumentBuilder>() {
        @Override
        protected DocumentBuilder initialValue() {
            DocumentBuilder docBuilder = null;
            try {
                docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            } catch (ParserConfigurationException e) {
                LOGGER.info("Unable to instantiate an XML parser, this should never happen", e);
            }

            return docBuilder;
        }
    };

    public RssDOMFeedParser() {
    }

    @Override
    public RssChannel readFeed(String feedBody) throws FeedParserException {
        DocumentBuilder docBuilder = docBuilderLocal.get();
        docBuilder.reset();
        RssChannel rssChannel = null;
        Reader reader = null;
        try {
            reader = new StringReader(feedBody);
            Document document = docBuilder.parse(new InputSource(reader));
            rssChannel = parseChannel(document);
        } catch (SAXException | IOException e) {
            LOGGER.error("An Exception was thrown while attempting to parse the feed as an XML document");
            throw new FeedParserException(
                    "An Exception occurred while attempting to parse an RSS feed from a String", e);
        } finally {
            IOUtils.closeQuietly(reader);
            docBuilder.reset();
        }

        return rssChannel;
    }

    private RssChannel parseChannel(Document document) {
        RssChannel channel = new RssChannel();
        List<RssItem> rssItemList = new LinkedList<RssItem>();
        NodeList channelNodes = document.getElementsByTagName("channel");
        Element channelElement = (Element) channelNodes.item(0);

        NodeList channelChildren = channelElement.getChildNodes();
        for (int x = 0; x < channelChildren.getLength(); x++) {
            Node node = channelChildren.item(x);
            if (node.getNodeType() == Node.ELEMENT_NODE) {
                Element childElement = (Element) node;
                if (childElement.getTagName().equals("title")) {
                    channel.setTitle(childElement.getTextContent());
                } else if (childElement.getTagName().equals("pubDate")) {
                    /*
                    String pubDateString = childElement.getTextContent().trim();
                    if(pubDateString != null && !pubDateString.isEmpty()){
                    DateFormat dateFormat = DateFormat.getInstance();
                    try{
                        Date pubDate = dateFormat.parse(pubDateString);
                        channel.setPubDate(pubDate);
                    }catch(ParseException e){
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    }
                    */
                } else if (childElement.getTagName().equals("description")) {
                    channel.setDescription(childElement.getTextContent());
                } else if (childElement.getTagName().equals("item")) {
                    RssItem rssItem = parseItem(childElement);
                    if (rssItem != null)
                        rssItemList.add(rssItem);
                }
            }
        }

        channel.setItems(rssItemList);

        return channel;
    }

    private RssItem parseItem(Element itemElement) {
        if (!itemElement.getTagName().equals("item"))
            return null;

        RssItem rssItem = new RssItem();
        List<RssMediaContent> mediaContentList = new LinkedList<RssMediaContent>();

        NodeList itemChildren = itemElement.getChildNodes();
        for (int x = 0; x < itemChildren.getLength(); x++) {
            Node node = itemChildren.item(x);
            if (node.getNodeType() == Node.ELEMENT_NODE) {
                Element childElement = (Element) node;
                if (childElement.getTagName().equals("title")) {
                    rssItem.setTitle(childElement.getTextContent());
                } else if (childElement.getTagName().equals("link")) {
                    rssItem.setLink(childElement.getTextContent());
                } else if (childElement.getTagName().equals("description")) {
                    rssItem.setDescription(childElement.getTextContent());
                } else if (childElement.getTagName().equals("media:content")) {
                    RssMediaContent mediaContent = parseMediaContent(childElement);
                    if (mediaContent != null)
                        mediaContentList.add(mediaContent);
                }
            }
        }
        rssItem.setMediaContent(mediaContentList);

        return rssItem;
    }

    private RssMediaContent parseMediaContent(Element mediaContentElement) {
        if (!mediaContentElement.getTagName().equals("media:content"))
            return null;

        RssMediaContent mediaContent = new RssMediaContent();

        //String based attributes
        mediaContent.setUrlString(mediaContentElement.getAttribute("url"));
        mediaContent.setType(mediaContentElement.getAttribute("type"));
        mediaContent.setMedium(mediaContentElement.getAttribute("medium"));

        //Numeric attributes
        String heightString = mediaContentElement.getAttribute("height");
        if (heightString != null && !heightString.isEmpty()) {
            try {
                Integer height = Integer.valueOf(heightString);
                mediaContent.setHeight(height);
            } catch (NumberFormatException e) {
                LOGGER.error("Unable to convert 'height' attribute to integer", e);
            }
        }

        String widthString = mediaContentElement.getAttribute("width");
        if (widthString != null && !widthString.isEmpty()) {
            try {
                Integer width = Integer.valueOf(widthString);
                mediaContent.setWidth(width);
            } catch (NumberFormatException e) {
                LOGGER.error("Unable to convert 'width' attribute to integer", e);
            }
        }

        String fileSizeString = mediaContentElement.getAttribute("fileSize");
        if (fileSizeString != null && !fileSizeString.isEmpty()) {
            try {
                Long fileSize = Long.valueOf(fileSizeString);
                mediaContent.setFileSize(fileSize);
            } catch (NumberFormatException e) {
                LOGGER.error("Unable to convert 'fileSize' attribute to Long", e);
            }
        }

        return mediaContent;
    }

}