Java tutorial
// // Informa -- RSS Library for Java // Copyright (c) 2002 by Niko Schmuck // // All rights reserved. This program and the accompanying materials // are made available under the terms of the Eclipse Public License v1.0 // which accompanies this distribution, and is available at // http://www.eclipse.org/legal/epl-v10.html // package de.nava.informa.parsers; import de.nava.informa.core.*; import de.nava.informa.utils.AtomParserUtils; import de.nava.informa.utils.ParserUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jdom2.Element; import org.jdom2.Namespace; import java.net.URL; import java.util.Date; import java.util.Iterator; import java.util.List; /** * Parser which reads in document instances according to the Atom 0.3 * specification and generates a news channel object. Currently the * support for the atom syntax is not complete. * * @author Niko Schmuck */ class Atom_0_3_Parser implements ChannelParserIF { static public final Log LOGGER = LogFactory.getLog(Atom_0_3_Parser.class); /** * Private constructor suppresses generation of a (public) default constructor. */ private Atom_0_3_Parser() { } /** * Holder of the Atom_0_3_Parser instance. */ private static class Atom_0_3_ParserHolder { private static Atom_0_3_Parser instance = new Atom_0_3_Parser(); } /** * Get the Atom_0_3_Parser instance. */ public static Atom_0_3_Parser getInstance() { return Atom_0_3_ParserHolder.instance; } static String getValue(Element elt) { return AtomParserUtils.getValue(elt, elt.getAttributeValue("mode")); } /** * Returns the content from content element. */ static String getContent(Element elt) { if (elt == null) { return ""; } String value = getValue(elt); String type = getContentType(elt); if ("text/plain".equals(type)) { value = ParserUtils.escape(value); } return value; } /** * Returns the content type of element. Default is 'text/plain' according to Atom draft 0.3. */ private static String getContentType(Element elt) { String type = elt.getAttributeValue("type"); return (type == null) ? "text/plain" : type; } /** * Returns copyright from element. */ static String getCopyright(Element elt) { return getTitle(elt); } /** * Looks for "content" elements and takes first from them or looks for "summary" element if * "content" not found. * * @param item item element. * @param namespace namespace. * @return description for item. */ public static String getDescription(Element item, Namespace namespace) { String strDesc = ""; Element elDesc; List contents = item.getChildren("content", namespace); if (contents.size() > 0) { elDesc = (Element) contents.get(0); } else { elDesc = item.getChild("summary", namespace); } if (elDesc != null) { strDesc = getValue(elDesc); } return strDesc; } /** * Returns the title from title element. */ static String getTitle(Element elt) { if (elt == null) { return ""; } String type = getContentType(elt); String value; if ("application/xhtml+xml".equals(type)) { value = elt.getValue(); } else { value = AtomParserUtils.getValue(elt, elt.getAttributeValue("mode")); if (!"text/plain".equals(type)) { value = ParserUtils.unEscape(value); } } return value; } /** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) */ public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); } Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(channel); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; LOGGER.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(channel, "dc"); if (dcNS == null) { LOGGER.debug("No namespace for dublin core found"); dcNS = defNS; } LOGGER.debug("start parsing."); // get version attribute String formatVersion = "0.3"; if (channel.getAttribute("version") != null) { formatVersion = channel.getAttribute("version").getValue().trim(); LOGGER.debug("Atom version " + formatVersion + " specified in document."); } else { LOGGER.info("No format version specified, using default."); } // --- read in channel information // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "tagline", "ttl", "modified", "author", "generator", "copyright", "link", "entry" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // TODO: support attributes: type, mode chnl.setFormat(ChannelFormat.ATOM_0_3); // language String language = channel.getAttributeValue("lang", Namespace.XML_NAMESPACE); if (language != null) { chnl.setLanguage(language); } // description element if (channel.getChild("description") != null) { chnl.setDescription(channel.getChildTextTrim("description", defNS)); } else { // fallback chnl.setDescription(channel.getChildTextTrim("tagline", defNS)); } // ttl in dc namespace Element ttl = channel.getChild("ttl", dcNS); if (ttl != null) { String ttlString = ttl.getTextTrim(); if (ttlString != null) { chnl.setTtl(Integer.parseInt(ttlString)); } } // lastbuild element : modified ? Element modified = channel.getChild("modified", defNS); if (modified != null) { chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim())); } // TODO : issued value /* if (modified != null) { modified = channel.getChild("issued", defNS); chnl.setLastBuildDate (ParserUtils.getDate(modified.getTextTrim())); } */ // author element Element author = channel.getChild("author", defNS); if (author != null) { ParserUtils.matchCaseOfChildren(author, "name"); chnl.setCreator(author.getChildTextTrim("name", defNS)); } // generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // copyright element Element copyright = channel.getChild("copyright", defNS); if (copyright != null) { chnl.setCopyright(getCopyright(copyright)); } // n link elements // TODO : type attribut of link (text, application...) List links = channel.getChildren("link", defNS); Iterator i = links.iterator(); while (i.hasNext()) { Element linkElement = (Element) i.next(); // use first 'alternate' link String rel = linkElement.getAttributeValue("rel"); String href = linkElement.getAttributeValue("href"); if ((rel != null) && (href != null) && rel.equals("alternate")) { URL linkURL = ParserUtils.getURL(href); chnl.setSite(linkURL); break; } // TODO: further extraction of link information } // 1..n entry elements List items = channel.getChildren("entry", defNS); i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "content", "summary", "issued", "subject" }); // get title element // TODO : deal with type attribut Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = getTitle(elTitle); LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle); } if (LOGGER.isDebugEnabled()) { LOGGER.debug("Entry element found (" + strTitle + ")."); } // get link element String strLink = AtomParserUtils.getItemLink(item, defNS); // get description element String strDesc = getDescription(item, defNS); // generate new news item (link to article) ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); curItem.setFound(dateParsed); // get issued element (required) Element elIssued = item.getChild("issued", defNS); if (elIssued == null) { // [adewale@gmail.com, 01-May-2005] Fix for blogs which have // 'created' dates, but not 'issued' dates -- in clear contravention // of the Atom 0.3 spec. Element elCreated = item.getChild("created", defNS); if (elCreated != null) { curItem.setDate(ParserUtils.getDate(elCreated.getTextTrim())); } } else { curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim())); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently curItem.setSubject(elSubject.getTextTrim()); } } // set to current date chnl.setLastUpdated(dateParsed); return chnl; } }