Java tutorial
/* * Copyright 2014 steve(at)threadswarm.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.threadswarm.imagefeedarchiver.parser; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.LinkedList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.threadswarm.imagefeedarchiver.model.RssChannel; import com.threadswarm.imagefeedarchiver.model.RssItem; import com.threadswarm.imagefeedarchiver.model.RssMediaContent; public class RssDOMFeedParser implements FeedParser { private final static Logger LOGGER = LoggerFactory.getLogger(RssDOMFeedParser.class); private final static ThreadLocal<DocumentBuilder> docBuilderLocal = new ThreadLocal<DocumentBuilder>() { @Override protected DocumentBuilder initialValue() { DocumentBuilder docBuilder = null; try { docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); } catch (ParserConfigurationException e) { LOGGER.info("Unable to instantiate an XML parser, this should never happen", e); } return docBuilder; } }; public RssDOMFeedParser() { } @Override public RssChannel readFeed(String feedBody) throws FeedParserException { DocumentBuilder docBuilder = docBuilderLocal.get(); docBuilder.reset(); RssChannel rssChannel = null; Reader reader = null; try { reader = new StringReader(feedBody); Document document = docBuilder.parse(new InputSource(reader)); rssChannel = parseChannel(document); } catch (SAXException | IOException e) { LOGGER.error("An Exception was thrown while attempting to parse the feed as an XML document"); throw new FeedParserException( "An Exception occurred while attempting to parse an RSS feed from a String", e); } finally { IOUtils.closeQuietly(reader); docBuilder.reset(); } return rssChannel; } private RssChannel parseChannel(Document document) { RssChannel channel = new RssChannel(); List<RssItem> rssItemList = new LinkedList<RssItem>(); NodeList channelNodes = document.getElementsByTagName("channel"); Element channelElement = (Element) channelNodes.item(0); NodeList channelChildren = channelElement.getChildNodes(); for (int x = 0; x < channelChildren.getLength(); x++) { Node node = channelChildren.item(x); if (node.getNodeType() == Node.ELEMENT_NODE) { Element childElement = (Element) node; if (childElement.getTagName().equals("title")) { channel.setTitle(childElement.getTextContent()); } else if (childElement.getTagName().equals("pubDate")) { /* String pubDateString = childElement.getTextContent().trim(); if(pubDateString != null && !pubDateString.isEmpty()){ DateFormat dateFormat = DateFormat.getInstance(); try{ Date pubDate = dateFormat.parse(pubDateString); channel.setPubDate(pubDate); }catch(ParseException e){ // TODO Auto-generated catch block e.printStackTrace(); } } */ } else if (childElement.getTagName().equals("description")) { channel.setDescription(childElement.getTextContent()); } else if (childElement.getTagName().equals("item")) { RssItem rssItem = parseItem(childElement); if (rssItem != null) rssItemList.add(rssItem); } } } channel.setItems(rssItemList); return channel; } private RssItem parseItem(Element itemElement) { if (!itemElement.getTagName().equals("item")) return null; RssItem rssItem = new RssItem(); List<RssMediaContent> mediaContentList = new LinkedList<RssMediaContent>(); NodeList itemChildren = itemElement.getChildNodes(); for (int x = 0; x < itemChildren.getLength(); x++) { Node node = itemChildren.item(x); if (node.getNodeType() == Node.ELEMENT_NODE) { Element childElement = (Element) node; if (childElement.getTagName().equals("title")) { rssItem.setTitle(childElement.getTextContent()); } else if (childElement.getTagName().equals("link")) { rssItem.setLink(childElement.getTextContent()); } else if (childElement.getTagName().equals("description")) { rssItem.setDescription(childElement.getTextContent()); } else if (childElement.getTagName().equals("media:content")) { RssMediaContent mediaContent = parseMediaContent(childElement); if (mediaContent != null) mediaContentList.add(mediaContent); } } } rssItem.setMediaContent(mediaContentList); return rssItem; } private RssMediaContent parseMediaContent(Element mediaContentElement) { if (!mediaContentElement.getTagName().equals("media:content")) return null; RssMediaContent mediaContent = new RssMediaContent(); //String based attributes mediaContent.setUrlString(mediaContentElement.getAttribute("url")); mediaContent.setType(mediaContentElement.getAttribute("type")); mediaContent.setMedium(mediaContentElement.getAttribute("medium")); //Numeric attributes String heightString = mediaContentElement.getAttribute("height"); if (heightString != null && !heightString.isEmpty()) { try { Integer height = Integer.valueOf(heightString); mediaContent.setHeight(height); } catch (NumberFormatException e) { LOGGER.error("Unable to convert 'height' attribute to integer", e); } } String widthString = mediaContentElement.getAttribute("width"); if (widthString != null && !widthString.isEmpty()) { try { Integer width = Integer.valueOf(widthString); mediaContent.setWidth(width); } catch (NumberFormatException e) { LOGGER.error("Unable to convert 'width' attribute to integer", e); } } String fileSizeString = mediaContentElement.getAttribute("fileSize"); if (fileSizeString != null && !fileSizeString.isEmpty()) { try { Long fileSize = Long.valueOf(fileSizeString); mediaContent.setFileSize(fileSize); } catch (NumberFormatException e) { LOGGER.error("Unable to convert 'fileSize' attribute to Long", e); } } return mediaContent; } }