List of usage examples for org.jdom2 Element getAttributeValue
public String getAttributeValue(final String attname, final Namespace ns)
This returns the attribute value for the attribute with the given name and within the given Namespace, null if there is no such attribute, and the empty string if the attribute value is empty.
From source file:de.nava.informa.parsers.Atom_0_3_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *///from www.j a va 2 s . c o m public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); } Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(channel); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; LOGGER.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(channel, "dc"); if (dcNS == null) { LOGGER.debug("No namespace for dublin core found"); dcNS = defNS; } LOGGER.debug("start parsing."); // get version attribute String formatVersion = "0.3"; if (channel.getAttribute("version") != null) { formatVersion = channel.getAttribute("version").getValue().trim(); LOGGER.debug("Atom version " + formatVersion + " specified in document."); } else { LOGGER.info("No format version specified, using default."); } // --- read in channel information // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "tagline", "ttl", "modified", "author", "generator", "copyright", "link", "entry" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // TODO: support attributes: type, mode chnl.setFormat(ChannelFormat.ATOM_0_3); // language String language = channel.getAttributeValue("lang", Namespace.XML_NAMESPACE); if (language != null) { chnl.setLanguage(language); } // description element if (channel.getChild("description") != null) { chnl.setDescription(channel.getChildTextTrim("description", defNS)); } else { // fallback chnl.setDescription(channel.getChildTextTrim("tagline", defNS)); } // ttl in dc namespace Element ttl = channel.getChild("ttl", dcNS); if (ttl != null) { String ttlString = ttl.getTextTrim(); if (ttlString != null) { chnl.setTtl(Integer.parseInt(ttlString)); } } // lastbuild element : modified ? Element modified = channel.getChild("modified", defNS); if (modified != null) { chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim())); } // TODO : issued value /* if (modified != null) { modified = channel.getChild("issued", defNS); chnl.setLastBuildDate (ParserUtils.getDate(modified.getTextTrim())); } */ // author element Element author = channel.getChild("author", defNS); if (author != null) { ParserUtils.matchCaseOfChildren(author, "name"); chnl.setCreator(author.getChildTextTrim("name", defNS)); } // generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // copyright element Element copyright = channel.getChild("copyright", defNS); if (copyright != null) { chnl.setCopyright(getCopyright(copyright)); } // n link elements // TODO : type attribut of link (text, application...) List links = channel.getChildren("link", defNS); Iterator i = links.iterator(); while (i.hasNext()) { Element linkElement = (Element) i.next(); // use first 'alternate' link String rel = linkElement.getAttributeValue("rel"); String href = linkElement.getAttributeValue("href"); if ((rel != null) && (href != null) && rel.equals("alternate")) { URL linkURL = ParserUtils.getURL(href); chnl.setSite(linkURL); break; } // TODO: further extraction of link information } // 1..n entry elements List items = channel.getChildren("entry", defNS); i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "content", "summary", "issued", "subject" }); // get title element // TODO : deal with type attribut Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = getTitle(elTitle); LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle); } if (LOGGER.isDebugEnabled()) { LOGGER.debug("Entry element found (" + strTitle + ")."); } // get link element String strLink = AtomParserUtils.getItemLink(item, defNS); // get description element String strDesc = getDescription(item, defNS); // generate new news item (link to article) ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); curItem.setFound(dateParsed); // get issued element (required) Element elIssued = item.getChild("issued", defNS); if (elIssued == null) { // [adewale@gmail.com, 01-May-2005] Fix for blogs which have // 'created' dates, but not 'issued' dates -- in clear contravention // of the Atom 0.3 spec. Element elCreated = item.getChild("created", defNS); if (elCreated != null) { curItem.setDate(ParserUtils.getDate(elCreated.getTextTrim())); } } else { curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim())); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently curItem.setSubject(elSubject.getTextTrim()); } } // set to current date chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.RSS_1_0_Parser.java
License:Open Source License
public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); }// www . j a v a 2s . co m Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(root); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; logger.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(root, "dc"); // fall back to default name space (for retrieving descriptions) if (dcNS == null) { dcNS = defNS; } // RSS 1.0 Syndication Module namespace Namespace syNS = ParserUtils.getNamespace(root, "sy"); // RSS 1.0 Aggregation Module namespace Namespace agNS = ParserUtils.getNamespace(root, "ag"); // RSS 1.0 Administration Module namespace Namespace adminNS = ParserUtils.getNamespace(root, "admin"); // RSS 1.0 DCTerms Module namespace Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms"); // RSS 1.0 Annotation Module namespace Namespace annotateNS = ParserUtils.getNamespace(root, "annotate"); // RSS091 Module namespace Namespace rss091NS = ParserUtils.getNamespace(root, "rss091"); // Content namespace Namespace contentNS = ParserUtils.getNamespace(root, "content"); ParserUtils.matchCaseOfChildren(root, new String[] { "channel", "item", "image", "textinput" }); // Get the channel element (only one occurs) Element channel = root.getChild("channel", defNS); if (channel == null) { logger.warn("Channel element could not be retrieved from feed."); throw new ParseException("No channel element found in feed."); } // ----------------------- read in channel information ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "link", "creator", "managingEditor", "publisher", "errorReportsTo", "webMaster", "language", "rights", "copyright", "rating", "date", "issued", "pubdate", "lastBuildDate", "modified", "generatorAgent", "updatePeriod", "updateFrequency", "updateBase" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // set channel format chnl.setFormat(ChannelFormat.RSS_1_0); // description element chnl.setDescription(channel.getChildTextTrim("description", defNS)); // link element chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); // creator element Element creator = channel.getChild("creator", dcNS); if (creator == null) { creator = channel.getChild("managingEditor", rss091NS); } if (creator != null) { chnl.setCreator(creator.getTextTrim()); } // publisher element String publisher = channel.getChildTextTrim("publisher", dcNS); if (publisher == null) { Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS); if (elErrorReportsTo != null) { publisher = elErrorReportsTo.getAttributeValue("resource", ParserUtils.getNamespace(elErrorReportsTo, "rdf")); } } if (publisher == null) { publisher = channel.getChildTextTrim("webMaster", rss091NS); } chnl.setPublisher(publisher); // language element Element language = channel.getChild("language", dcNS); if (language == null) { language = channel.getChild("language", rss091NS); } if (language != null) { chnl.setLanguage(language.getTextTrim()); } // rights element Element copyright = channel.getChild("rights", dcNS); if (copyright == null) { copyright = channel.getChild("copyright", rss091NS); } if (copyright != null) { chnl.setCopyright(copyright.getTextTrim()); } // 0..1 Rating element Element rating = channel.getChild("rating", rss091NS); if (rating != null) { chnl.setRating(rating.getTextTrim()); } // 0..1 Docs element // use namespace URI chnl.setDocs(defNS.getURI()); // 0..1 pubDate element Element pubDate = channel.getChild("date", dcNS); if (pubDate == null) { pubDate = channel.getChild("issued", dctermsNS); } if (pubDate == null) { pubDate = channel.getChild("pubdate", rss091NS); } if (pubDate != null) { chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); } // 0..1 lastBuildDate element Element lastBuildDate = channel.getChild("lastBuildDate"); if (lastBuildDate == null) { lastBuildDate = channel.getChild("modified", dctermsNS); } if (lastBuildDate == null) { lastBuildDate = channel.getChild("lastBuildDate", rss091NS); } if (lastBuildDate != null) { chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); } // RSS 1.0 Administration Module support // 0..1 generator element Element elGenerator = channel.getChild("generatorAgent", adminNS); if (elGenerator != null) { Attribute generator = elGenerator.getAttribute("resource", ParserUtils.getNamespace(elGenerator, "rdf")); if (generator != null) { chnl.setGenerator(generator.getValue()); } } // RSS 1.0 Syndication Module support // 0..1 update period element Element updatePeriod = channel.getChild("updatePeriod", syNS); if (updatePeriod != null) { try { ChannelUpdatePeriod channelUpdatePeriod = ChannelUpdatePeriod .valueFromText(updatePeriod.getTextTrim()); chnl.setUpdatePeriod(channelUpdatePeriod); } catch (IllegalArgumentException ex) { logger.warn(updatePeriod.getTextTrim(), ex); } } // 0..1 update frequency element Element updateFrequency = channel.getChild("updateFrequency", syNS); if (updateFrequency != null) { chnl.setUpdateFrequency((new Integer(updateFrequency.getTextTrim())).intValue()); } // 0..1 update base element Element updateBase = channel.getChild("updateBase", syNS); if (updateBase != null) { chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim())); } if ((updatePeriod != null) && updateFrequency != null) { int ttl = getTTL(chnl.getUpdatePeriod(), chnl.getUpdateFrequency()); chnl.setTtl(ttl); } // item elements List items = root.getChildren("item", defNS); Iterator i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description", "creator", "subject", "date", "sourceURL", "source", "timestamp", "reference" }); // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = elTitle.getTextTrim(); } if (logger.isDebugEnabled()) { logger.debug("Item element found (" + strTitle + ")."); } // get link element Element elLink = item.getChild("link", defNS); String strLink = ""; if (elLink != null) { strLink = elLink.getTextTrim(); } // get description element Element elDesc = item.getChild("encoded", contentNS); if (elDesc == null) { elDesc = item.getChild("description", defNS); } if (elDesc == null) { elDesc = item.getChild("description", dcNS); } String strDesc = ""; if (elDesc != null) { strDesc = elDesc.getTextTrim(); } // generate new RSS item (link to article) ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); rssItem.setFound(dateParsed); // get creator element Element elCreator = item.getChild("creator", dcNS); if (elCreator != null) { rssItem.setCreator(elCreator.getTextTrim()); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently rssItem.setSubject(elSubject.getTextTrim()); } // get date element Element elDate = item.getChild("date", dcNS); if (elDate != null) { rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); } // get source element - default to Aggregation module, then try Dublin Core String sourceName = null; String sourceLocation = null; Date sourceTimestamp = null; Element elSourceURL = item.getChild("sourceURL", agNS); if (elSourceURL == null) { // No Aggregation module - try Dublin Core elSourceURL = item.getChild("source", dcNS); if (elSourceURL != null) { sourceLocation = elSourceURL.getTextTrim(); sourceName = "Source"; } } else { // Aggregation module sourceLocation = elSourceURL.getTextTrim(); Element elSourceName = item.getChild("source", agNS); if (elSourceName != null) { sourceName = elSourceName.getTextTrim(); } Element elSourceTimestamp = item.getChild("timestamp", agNS); if (elSourceTimestamp != null) { sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim()); } } if (sourceLocation != null) { ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, sourceTimestamp); rssItem.setSource(itemSource); } // comments element - use Annotation module Element elReference = item.getChild("reference", annotateNS); if (elReference != null) { Attribute resource = elReference.getAttribute("resource", ParserUtils.getNamespace(elReference, "rdf")); if (resource != null) { URL resourceURL = ParserUtils.getURL(resource.getValue()); if (resourceURL != null) { rssItem.setComments(resourceURL); } } } } // image element Element image = root.getChild("image", defNS); if (image != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url", "link", "width", "height", "description" }); ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)), ParserUtils.getURL(image.getChildTextTrim("link", defNS))); Element imgWidth = image.getChild("width", defNS); if (imgWidth != null) { try { rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgHeight = image.getChild("height", defNS); if (imgHeight != null) { try { rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgDescr = image.getChild("description", defNS); if (imgDescr != null) { rssImage.setDescription(imgDescr.getTextTrim()); } chnl.setImage(rssImage); } // textinput element Element txtinp = root.getChild("textinput", defNS); if (txtinp != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "description", "name", "link" }); String tiTitle = null; if (txtinp.getChild("title", defNS) != null) { tiTitle = txtinp.getChild("title", defNS).getTextTrim(); } String tiDescr = null; if (txtinp.getChild("description", defNS) != null) { tiDescr = txtinp.getChild("description", defNS).getTextTrim(); } String tiName = null; if (txtinp.getChild("name", defNS) != null) { tiName = txtinp.getChild("name", defNS).getTextTrim(); } URL tiLink = null; if (txtinp.getChild("link", defNS) != null) { tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim()); } TextInputIF rssTextInput = cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink); chnl.setTextInput(rssTextInput); } chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.RSS_2_0_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *//*from w w w. j av a 2 s .c o m*/ public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can be created."); } Date dateParsed = new Date(); logger.debug("start parsing."); Namespace defNS = ParserUtils.getDefaultNS(root); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; logger.info("No default namespace found."); } Namespace dcNS = ParserUtils.getNamespace(root, "dc"); // fall back to default name space if (dcNS == null) { dcNS = defNS; } // Content namespace Namespace contentNS = ParserUtils.getNamespace(root, "content"); // fall back to default name space if (contentNS == null) { contentNS = defNS; } ParserUtils.matchCaseOfChildren(root, "channel"); // Get the channel element (only one occurs) Element channel = root.getChild("channel", defNS); if (channel == null) { logger.warn("Channel element could not be retrieved from feed."); throw new ParseException("No channel element found in feed."); } // --- read in channel information ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "link", "language", "item", "image", "textinput", "copyright", "rating", "docs", "generator", "pubDate", "lastBuildDate", "category", "managingEditor", "webMaster", "cloud" }); // 1 title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // set channel format chnl.setFormat(ChannelFormat.RSS_2_0); // 1 description element chnl.setDescription(channel.getChildTextTrim("description", defNS)); // 1 link element chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); // 1 language element chnl.setLanguage(channel.getChildTextTrim("language", defNS)); // 1..n item elements List items = channel.getChildren("item", defNS); for (Object item1 : items) { Element item = (Element) item1; ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description", "subject", "category", "pubDate", "date", "author", "creator", "comments", "guid", "source", "enclosure" }); // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = elTitle.getTextTrim(); } if (logger.isDebugEnabled()) { logger.debug("Item element found (" + strTitle + ")."); } // get link element Element elLink = item.getChild("link", defNS); String strLink = ""; if (elLink != null) { strLink = elLink.getTextTrim(); } // get description element Element elDesc = item.getChild("encoded", contentNS); if (elDesc == null) { elDesc = item.getChild("description", defNS); } String strDesc = ""; if (elDesc != null) { strDesc = elDesc.getTextTrim(); } // generate new RSS item (link to article) ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); // get subject element Element elSubject = item.getChild("subject", defNS); if (elSubject == null) { // fallback mechanism: get dc:subject element elSubject = item.getChild("subject", dcNS); } if (elSubject != null) { rssItem.setSubject(elSubject.getTextTrim()); } // get category list // get list of <category> elements List listCategory = item.getChildren("category", defNS); if (listCategory.size() < 1) { // fallback mechanism: get dc:category element listCategory = item.getChildren("category", dcNS); } if (listCategory.size() > 0) { RecursiveHashtable<String> catTable = new RecursiveHashtable<String>(); // for each category, parse hierarchy for (Object aListCategory : listCategory) { RecursiveHashtable<String> currTable = catTable; Element elCategory = (Element) aListCategory; // get contents of category element String[] titles = elCategory.getTextNormalize().split("/"); for (String title : titles) { // tokenize category string to extract out hierarchy if (!currTable.containsKey(title)) { // if token does not exist in current map, add it with child Hashtable currTable.put(title, new RecursiveHashtable<String>()); } // reset current Hashtable to child's Hashtable then iterate to next token currTable = currTable.get(title); } } ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>(); // transform cat list & hierarchy into list of CategoryIF elements Enumeration<String> enumCategories = catTable.keys(); while (enumCategories.hasMoreElements()) { String key = enumCategories.nextElement(); // build category list: getCategoryList(parent, title, children) CategoryIF cat = getCategoryList(null, key, catTable.get(key)); catList.add(cat); } if (catList.size() > 0) { // if categories were actually created, then add list to item node rssItem.setCategories(catList); } } // get publication date Element elDate = item.getChild("pubDate", defNS); if (elDate == null) { // fallback mechanism: get dc:date element elDate = item.getChild("date", dcNS); } if (elDate != null) { rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); } rssItem.setFound(dateParsed); // get Author element Element elAuthor = item.getChild("author", defNS); if (elAuthor == null) { // fallback mechanism: get dc:creator element elAuthor = item.getChild("creator", dcNS); } if (elAuthor != null) rssItem.setCreator(elAuthor.getTextTrim()); // get Comments element Element elComments = item.getChild("comments", defNS); String strComments = ""; if (elComments != null) { strComments = elComments.getTextTrim(); } rssItem.setComments(ParserUtils.getURL(strComments)); // get guid element Element elGuid = item.getChild("guid", defNS); if (elGuid != null) { String guidUrl = elGuid.getTextTrim(); if (guidUrl != null) { boolean permaLink = true; Attribute permaLinkAttribute = elGuid.getAttribute("isPermaLink", defNS); if (permaLinkAttribute != null) { String permaLinkStr = permaLinkAttribute.getValue(); if (permaLinkStr != null) { permaLink = Boolean.valueOf(permaLinkStr); } } ItemGuidIF itemGuid = cBuilder.createItemGuid(rssItem, guidUrl, permaLink); rssItem.setGuid(itemGuid); } } // get source element Element elSource = item.getChild("source", defNS); if (elSource != null) { String sourceName = elSource.getTextTrim(); Attribute sourceAttribute = elSource.getAttribute("url", defNS); if (sourceAttribute != null) { String sourceLocation = sourceAttribute.getValue().trim(); ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, null); rssItem.setSource(itemSource); } } // get enclosure element Element elEnclosure = item.getChild("enclosure", defNS); if (elEnclosure != null) { URL location = null; String type = null; int length = -1; Attribute urlAttribute = elEnclosure.getAttribute("url", defNS); if (urlAttribute != null) { location = ParserUtils.getURL(urlAttribute.getValue().trim()); } Attribute typeAttribute = elEnclosure.getAttribute("type", defNS); if (typeAttribute != null) { type = typeAttribute.getValue().trim(); } Attribute lengthAttribute = elEnclosure.getAttribute("length", defNS); if (lengthAttribute != null) { try { length = Integer.parseInt(lengthAttribute.getValue().trim()); } catch (NumberFormatException e) { logger.warn(e); } } ItemEnclosureIF itemEnclosure = cBuilder.createItemEnclosure(rssItem, location, type, length); rssItem.setEnclosure(itemEnclosure); } } // 0..1 image element Element image = channel.getChild("image", defNS); if (image != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url", "link", "width", "height", "description" }); ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)), ParserUtils.getURL(image.getChildTextTrim("link", defNS))); Element imgWidth = image.getChild("width", defNS); if (imgWidth != null) { try { rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); } catch (NumberFormatException e) { logger.warn("Error parsing width: " + e.getMessage()); } } Element imgHeight = image.getChild("height", defNS); if (imgHeight != null) { try { rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); } catch (NumberFormatException e) { logger.warn("Error parsing height: " + e.getMessage()); } } Element imgDescr = image.getChild("description", defNS); if (imgDescr != null) { rssImage.setDescription(imgDescr.getTextTrim()); } chnl.setImage(rssImage); } // 0..1 textinput element Element txtinp = channel.getChild("textinput", defNS); if (txtinp != null) { ParserUtils.matchCaseOfChildren(txtinp, new String[] { "title", "description", "name", "link" }); TextInputIF rssTextInput = cBuilder.createTextInput(txtinp.getChildTextTrim("title", defNS), txtinp.getChildTextTrim("description", defNS), txtinp.getChildTextTrim("name", defNS), ParserUtils.getURL(txtinp.getChildTextTrim("link", defNS))); chnl.setTextInput(rssTextInput); } // 0..1 copyright element Element copyright = channel.getChild("copyright", defNS); if (copyright != null) { chnl.setCopyright(copyright.getTextTrim()); } // 0..1 Rating element Element rating = channel.getChild("rating", defNS); if (rating != null) { chnl.setRating(rating.getTextTrim()); } // 0..1 Docs element Element docs = channel.getChild("docs", defNS); if (docs != null) { chnl.setDocs(docs.getTextTrim()); } // 0..1 Generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // 0..1 ttl element Element ttl = channel.getChild("ttl", defNS); if (ttl != null) { String ttlValue = ttl.getTextTrim(); try { chnl.setTtl(Integer.parseInt(ttlValue)); } catch (NumberFormatException e) { logger.warn("Invalid TTL format: '" + ttlValue + "'"); } } // 0..1 pubDate element Element pubDate = channel.getChild("pubDate", defNS); if (pubDate != null) { chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); } // 0..1 lastBuildDate element Element lastBuildDate = channel.getChild("lastBuildDate", defNS); if (lastBuildDate != null) { chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); } // get category list // get list of <category> elements List listCategory = channel.getChildren("category", defNS); if (listCategory.size() < 1) { // fallback mechanism: get dc:category element listCategory = channel.getChildren("category", dcNS); } if (listCategory.size() > 0) { RecursiveHashtable<String> catTable = new RecursiveHashtable<String>(); // for each category, parse hierarchy for (Object aListCategory : listCategory) { RecursiveHashtable<String> currTable = catTable; Element elCategory = (Element) aListCategory; // get contents of category element String[] titles = elCategory.getTextNormalize().split("/"); for (String title : titles) { // tokenize category string to extract out hierarchy if (!currTable.containsKey(title)) { // if token does not exist in current map, add it with child Hashtable currTable.put(title, new RecursiveHashtable<String>()); } // reset current Hashtable to child's Hashtable then iterate to next token currTable = currTable.get(title); } } ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>(); // transform cat list & hierarchy into list of CategoryIF elements Enumeration<String> enumCategories = catTable.keys(); while (enumCategories.hasMoreElements()) { String key = enumCategories.nextElement(); // build category list: getCategoryList(parent, title, children) CategoryIF cat = getCategoryList(null, key, catTable.get(key)); catList.add(cat); } if (catList.size() > 0) { // if categories were actually created, then add list to item node chnl.setCategories(catList); } } // 0..1 managingEditor element Element managingEditor = channel.getChild("managingEditor", defNS); if (managingEditor != null) { chnl.setCreator(managingEditor.getTextTrim()); } // 0..1 webMaster element Element webMaster = channel.getChild("webMaster", defNS); if (webMaster != null) { chnl.setPublisher(webMaster.getTextTrim()); } // 0..1 cloud element Element cloud = channel.getChild("cloud", defNS); if (cloud != null) { String _port = cloud.getAttributeValue("port", defNS); int port = -1; if (_port != null) { try { port = Integer.parseInt(_port); } catch (NumberFormatException e) { logger.warn(e); } } chnl.setCloud(cBuilder.createCloud(cloud.getAttributeValue("domain", defNS), port, cloud.getAttributeValue("path", defNS), cloud.getAttributeValue("registerProcedure", defNS), cloud.getAttributeValue("protocol", defNS))); } chnl.setLastUpdated(dateParsed); // 0..1 skipHours element // 0..1 skipDays element return chnl; }