Java tutorial
// // Informa -- RSS Library for Java // Copyright (c) 2002 by Niko Schmuck // // All rights reserved. This program and the accompanying materials // are made available under the terms of the Eclipse Public License v1.0 // which accompanies this distribution, and is available at // http://www.eclipse.org/legal/epl-v10.html // package de.nava.informa.parsers; import de.nava.informa.core.*; import de.nava.informa.utils.ParserUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jdom2.Attribute; import org.jdom2.Element; import org.jdom2.Namespace; import java.net.URL; import java.util.Date; import java.util.Iterator; import java.util.List; /** * Parser which reads in document instances according to the RSS 1.0 * (RDF) specification and generates a news channel object. * * @author Niko Schmuck */ class RSS_1_0_Parser implements ChannelParserIF { private static Log logger = LogFactory.getLog(RSS_1_0_Parser.class); /** * Private constructor suppresses generation of a (public) default constructor. */ private RSS_1_0_Parser() { } /** * Holder of the RSS_1_0_Parser instance. */ private static class RSS_1_0_ParserHolder { private static RSS_1_0_Parser instance = new RSS_1_0_Parser(); } /** * Get the RSS_1_0_Parser instance. */ public static RSS_1_0_Parser getInstance() { return RSS_1_0_ParserHolder.instance; } public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); } Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(root); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; logger.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(root, "dc"); // fall back to default name space (for retrieving descriptions) if (dcNS == null) { dcNS = defNS; } // RSS 1.0 Syndication Module namespace Namespace syNS = ParserUtils.getNamespace(root, "sy"); // RSS 1.0 Aggregation Module namespace Namespace agNS = ParserUtils.getNamespace(root, "ag"); // RSS 1.0 Administration Module namespace Namespace adminNS = ParserUtils.getNamespace(root, "admin"); // RSS 1.0 DCTerms Module namespace Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms"); // RSS 1.0 Annotation Module namespace Namespace annotateNS = ParserUtils.getNamespace(root, "annotate"); // RSS091 Module namespace Namespace rss091NS = ParserUtils.getNamespace(root, "rss091"); // Content namespace Namespace contentNS = ParserUtils.getNamespace(root, "content"); ParserUtils.matchCaseOfChildren(root, new String[] { "channel", "item", "image", "textinput" }); // Get the channel element (only one occurs) Element channel = root.getChild("channel", defNS); if (channel == null) { logger.warn("Channel element could not be retrieved from feed."); throw new ParseException("No channel element found in feed."); } // ----------------------- read in channel information ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "link", "creator", "managingEditor", "publisher", "errorReportsTo", "webMaster", "language", "rights", "copyright", "rating", "date", "issued", "pubdate", "lastBuildDate", "modified", "generatorAgent", "updatePeriod", "updateFrequency", "updateBase" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // set channel format chnl.setFormat(ChannelFormat.RSS_1_0); // description element chnl.setDescription(channel.getChildTextTrim("description", defNS)); // link element chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); // creator element Element creator = channel.getChild("creator", dcNS); if (creator == null) { creator = channel.getChild("managingEditor", rss091NS); } if (creator != null) { chnl.setCreator(creator.getTextTrim()); } // publisher element String publisher = channel.getChildTextTrim("publisher", dcNS); if (publisher == null) { Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS); if (elErrorReportsTo != null) { publisher = elErrorReportsTo.getAttributeValue("resource", ParserUtils.getNamespace(elErrorReportsTo, "rdf")); } } if (publisher == null) { publisher = channel.getChildTextTrim("webMaster", rss091NS); } chnl.setPublisher(publisher); // language element Element language = channel.getChild("language", dcNS); if (language == null) { language = channel.getChild("language", rss091NS); } if (language != null) { chnl.setLanguage(language.getTextTrim()); } // rights element Element copyright = channel.getChild("rights", dcNS); if (copyright == null) { copyright = channel.getChild("copyright", rss091NS); } if (copyright != null) { chnl.setCopyright(copyright.getTextTrim()); } // 0..1 Rating element Element rating = channel.getChild("rating", rss091NS); if (rating != null) { chnl.setRating(rating.getTextTrim()); } // 0..1 Docs element // use namespace URI chnl.setDocs(defNS.getURI()); // 0..1 pubDate element Element pubDate = channel.getChild("date", dcNS); if (pubDate == null) { pubDate = channel.getChild("issued", dctermsNS); } if (pubDate == null) { pubDate = channel.getChild("pubdate", rss091NS); } if (pubDate != null) { chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); } // 0..1 lastBuildDate element Element lastBuildDate = channel.getChild("lastBuildDate"); if (lastBuildDate == null) { lastBuildDate = channel.getChild("modified", dctermsNS); } if (lastBuildDate == null) { lastBuildDate = channel.getChild("lastBuildDate", rss091NS); } if (lastBuildDate != null) { chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); } // RSS 1.0 Administration Module support // 0..1 generator element Element elGenerator = channel.getChild("generatorAgent", adminNS); if (elGenerator != null) { Attribute generator = elGenerator.getAttribute("resource", ParserUtils.getNamespace(elGenerator, "rdf")); if (generator != null) { chnl.setGenerator(generator.getValue()); } } // RSS 1.0 Syndication Module support // 0..1 update period element Element updatePeriod = channel.getChild("updatePeriod", syNS); if (updatePeriod != null) { try { ChannelUpdatePeriod channelUpdatePeriod = ChannelUpdatePeriod .valueFromText(updatePeriod.getTextTrim()); chnl.setUpdatePeriod(channelUpdatePeriod); } catch (IllegalArgumentException ex) { logger.warn(updatePeriod.getTextTrim(), ex); } } // 0..1 update frequency element Element updateFrequency = channel.getChild("updateFrequency", syNS); if (updateFrequency != null) { chnl.setUpdateFrequency((new Integer(updateFrequency.getTextTrim())).intValue()); } // 0..1 update base element Element updateBase = channel.getChild("updateBase", syNS); if (updateBase != null) { chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim())); } if ((updatePeriod != null) && updateFrequency != null) { int ttl = getTTL(chnl.getUpdatePeriod(), chnl.getUpdateFrequency()); chnl.setTtl(ttl); } // item elements List items = root.getChildren("item", defNS); Iterator i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description", "creator", "subject", "date", "sourceURL", "source", "timestamp", "reference" }); // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = elTitle.getTextTrim(); } if (logger.isDebugEnabled()) { logger.debug("Item element found (" + strTitle + ")."); } // get link element Element elLink = item.getChild("link", defNS); String strLink = ""; if (elLink != null) { strLink = elLink.getTextTrim(); } // get description element Element elDesc = item.getChild("encoded", contentNS); if (elDesc == null) { elDesc = item.getChild("description", defNS); } if (elDesc == null) { elDesc = item.getChild("description", dcNS); } String strDesc = ""; if (elDesc != null) { strDesc = elDesc.getTextTrim(); } // generate new RSS item (link to article) ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); rssItem.setFound(dateParsed); // get creator element Element elCreator = item.getChild("creator", dcNS); if (elCreator != null) { rssItem.setCreator(elCreator.getTextTrim()); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently rssItem.setSubject(elSubject.getTextTrim()); } // get date element Element elDate = item.getChild("date", dcNS); if (elDate != null) { rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); } // get source element - default to Aggregation module, then try Dublin Core String sourceName = null; String sourceLocation = null; Date sourceTimestamp = null; Element elSourceURL = item.getChild("sourceURL", agNS); if (elSourceURL == null) { // No Aggregation module - try Dublin Core elSourceURL = item.getChild("source", dcNS); if (elSourceURL != null) { sourceLocation = elSourceURL.getTextTrim(); sourceName = "Source"; } } else { // Aggregation module sourceLocation = elSourceURL.getTextTrim(); Element elSourceName = item.getChild("source", agNS); if (elSourceName != null) { sourceName = elSourceName.getTextTrim(); } Element elSourceTimestamp = item.getChild("timestamp", agNS); if (elSourceTimestamp != null) { sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim()); } } if (sourceLocation != null) { ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, sourceTimestamp); rssItem.setSource(itemSource); } // comments element - use Annotation module Element elReference = item.getChild("reference", annotateNS); if (elReference != null) { Attribute resource = elReference.getAttribute("resource", ParserUtils.getNamespace(elReference, "rdf")); if (resource != null) { URL resourceURL = ParserUtils.getURL(resource.getValue()); if (resourceURL != null) { rssItem.setComments(resourceURL); } } } } // image element Element image = root.getChild("image", defNS); if (image != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url", "link", "width", "height", "description" }); ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)), ParserUtils.getURL(image.getChildTextTrim("link", defNS))); Element imgWidth = image.getChild("width", defNS); if (imgWidth != null) { try { rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgHeight = image.getChild("height", defNS); if (imgHeight != null) { try { rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgDescr = image.getChild("description", defNS); if (imgDescr != null) { rssImage.setDescription(imgDescr.getTextTrim()); } chnl.setImage(rssImage); } // textinput element Element txtinp = root.getChild("textinput", defNS); if (txtinp != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "description", "name", "link" }); String tiTitle = null; if (txtinp.getChild("title", defNS) != null) { tiTitle = txtinp.getChild("title", defNS).getTextTrim(); } String tiDescr = null; if (txtinp.getChild("description", defNS) != null) { tiDescr = txtinp.getChild("description", defNS).getTextTrim(); } String tiName = null; if (txtinp.getChild("name", defNS) != null) { tiName = txtinp.getChild("name", defNS).getTextTrim(); } URL tiLink = null; if (txtinp.getChild("link", defNS) != null) { tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim()); } TextInputIF rssTextInput = cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink); chnl.setTextInput(rssTextInput); } chnl.setLastUpdated(dateParsed); return chnl; } /** * Returns the TTL value corresponding to updatePeriod and updateFrequency. * * @param updatePeriod Channel update period. * @param updateFrequency the update frequency. * @return the TTL value. */ private int getTTL(final ChannelUpdatePeriod updatePeriod, int updateFrequency) { int minutes; if (updatePeriod != null) { minutes = updatePeriod.getMinutesInPeriod(); } else { minutes = 24 * 60; } return updateFrequency == 0 ? minutes : minutes / updateFrequency; } }