List of usage examples for org.jdom2 Element getChildTextTrim
public String getChildTextTrim(final String cname, final Namespace ns)
From source file:de.nava.informa.parsers.Atom_0_3_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *///from w ww . j a v a 2s.co m public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); } Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(channel); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; LOGGER.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(channel, "dc"); if (dcNS == null) { LOGGER.debug("No namespace for dublin core found"); dcNS = defNS; } LOGGER.debug("start parsing."); // get version attribute String formatVersion = "0.3"; if (channel.getAttribute("version") != null) { formatVersion = channel.getAttribute("version").getValue().trim(); LOGGER.debug("Atom version " + formatVersion + " specified in document."); } else { LOGGER.info("No format version specified, using default."); } // --- read in channel information // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "tagline", "ttl", "modified", "author", "generator", "copyright", "link", "entry" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // TODO: support attributes: type, mode chnl.setFormat(ChannelFormat.ATOM_0_3); // language String language = channel.getAttributeValue("lang", Namespace.XML_NAMESPACE); if (language != null) { chnl.setLanguage(language); } // description element if (channel.getChild("description") != null) { chnl.setDescription(channel.getChildTextTrim("description", defNS)); } else { // fallback chnl.setDescription(channel.getChildTextTrim("tagline", defNS)); } // ttl in dc namespace Element ttl = channel.getChild("ttl", dcNS); if (ttl != null) { String ttlString = ttl.getTextTrim(); if (ttlString != null) { chnl.setTtl(Integer.parseInt(ttlString)); } } // lastbuild element : modified ? Element modified = channel.getChild("modified", defNS); if (modified != null) { chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim())); } // TODO : issued value /* if (modified != null) { modified = channel.getChild("issued", defNS); chnl.setLastBuildDate (ParserUtils.getDate(modified.getTextTrim())); } */ // author element Element author = channel.getChild("author", defNS); if (author != null) { ParserUtils.matchCaseOfChildren(author, "name"); chnl.setCreator(author.getChildTextTrim("name", defNS)); } // generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // copyright element Element copyright = channel.getChild("copyright", defNS); if (copyright != null) { chnl.setCopyright(getCopyright(copyright)); } // n link elements // TODO : type attribut of link (text, application...) List links = channel.getChildren("link", defNS); Iterator i = links.iterator(); while (i.hasNext()) { Element linkElement = (Element) i.next(); // use first 'alternate' link String rel = linkElement.getAttributeValue("rel"); String href = linkElement.getAttributeValue("href"); if ((rel != null) && (href != null) && rel.equals("alternate")) { URL linkURL = ParserUtils.getURL(href); chnl.setSite(linkURL); break; } // TODO: further extraction of link information } // 1..n entry elements List items = channel.getChildren("entry", defNS); i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "content", "summary", "issued", "subject" }); // get title element // TODO : deal with type attribut Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = getTitle(elTitle); LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle); } if (LOGGER.isDebugEnabled()) { LOGGER.debug("Entry element found (" + strTitle + ")."); } // get link element String strLink = AtomParserUtils.getItemLink(item, defNS); // get description element String strDesc = getDescription(item, defNS); // generate new news item (link to article) ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); curItem.setFound(dateParsed); // get issued element (required) Element elIssued = item.getChild("issued", defNS); if (elIssued == null) { // [adewale@gmail.com, 01-May-2005] Fix for blogs which have // 'created' dates, but not 'issued' dates -- in clear contravention // of the Atom 0.3 spec. Element elCreated = item.getChild("created", defNS); if (elCreated != null) { curItem.setDate(ParserUtils.getDate(elCreated.getTextTrim())); } } else { curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim())); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently curItem.setSubject(elSubject.getTextTrim()); } } // set to current date chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.Atom_1_0_Parser.java
License:Open Source License
/** * a semicolon separated list of authors *//*from w ww . j av a 2s . co m*/ static String getAuthorString(List authors, Namespace defNS) { String authorName = ""; for (Object author : authors) { Element authorElt = (Element) author; if (authorElt != null) { //TODO author may have more information like uri and email ParserUtils.matchCaseOfChildren(authorElt, "name"); if (!"".equals(authorName)) { // if more than one author, a ; separated list authorName += "; "; } authorName += authorElt.getChildTextTrim("name", defNS); } } return authorName; }
From source file:de.nava.informa.parsers.Atom_1_0_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *///ww w .j a v a 2s. com public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); } Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(channel); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; LOGGER.info("No default namespace found."); } else if ((defNS.getURI() == null) || !defNS.getURI().equals("http://www.w3.org/2005/Atom")) { LOGGER.warn("Namespace is not really supported, still trying assuming Atom 1.0 format"); } LOGGER.debug("start parsing."); // --- read in channel information // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "subtitle", "updated", "published", "author", "generator", "rights", "link", "entry" }); // TODO icon and logo: Feed element can have upto 1 logo and icon. // TODO id: Feed and all entries have a unique id string. This can // be the URL of the website. Supporting this will require API change. // TODO: Feed can optionally have category information // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); chnl.setFormat(ChannelFormat.ATOM_1_0); // description element if (channel.getChild("subtitle") != null) { chnl.setDescription(channel.getChildTextTrim("subtitle", defNS)); } // TODO: should we use summary element? // lastbuild element : updated ? Element updated = channel.getChild("updated", defNS); if (updated != null) { chnl.setPubDate(ParserUtils.getDate(updated.getTextTrim())); } // author element List authors = channel.getChildren("author", defNS); chnl.setCreator(getAuthorString(authors, defNS)); // TODO we are ignoring contributors information // generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // TODO generator can have URI and version information // copyright element Element rights = channel.getChild("rights", defNS); if (rights != null) { chnl.setCopyright(AtomParserUtils.getValue(rights, getMode(rights))); } List links = channel.getChildren("link", defNS); Iterator i = links.iterator(); URL linkUrl = null; while (i.hasNext()) { Element linkElement = (Element) i.next(); // use first 'alternate' link // if rel is not present, use first link without rel String rel = linkElement.getAttributeValue("rel"); String href = linkElement.getAttributeValue("href"); // TODO we need to handle relative links also if ((rel == null) && (href != null) && (linkUrl == null)) { linkUrl = ParserUtils.getURL(href); } else if ((rel != null) && (href != null) && rel.equals("alternate")) { linkUrl = ParserUtils.getURL(href); break; } } if (linkUrl != null) { chnl.setSite(linkUrl); } List items = channel.getChildren("entry", defNS); i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "content", "summary", "published", "author" }); // TODO entry, if copied from some other feed, may have source element // TODO each entry can have its own rights declaration // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = AtomParserUtils.getValue(elTitle, getMode(elTitle)); LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle); } if (LOGGER.isDebugEnabled()) { LOGGER.debug("Entry element found (" + strTitle + ")."); } // get link element String strLink = AtomParserUtils.getItemLink(item, defNS); // get description element String strDesc = getDescription(item, defNS); // generate new news item (link to article) ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); //TODO enclosure data curItem.setFound(dateParsed); List itemAuthors = item.getChildren("author", defNS); curItem.setCreator(getAuthorString(itemAuthors, defNS)); // get published element Element elIssued = item.getChild("published", defNS); if (elIssued == null) { // published element may not be present (but updated should be) Element elUpdated = item.getChild("updated", defNS); // TODO there should be some way to determining which one are we // returning if (elUpdated != null) { curItem.setDate(ParserUtils.getDate(elUpdated.getTextTrim())); } } else { curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim())); } // get list of category elements List elCategoryList = item.getChildren("category", defNS); // categories present will be stored here Collection<CategoryIF> categories = new ArrayList<>(); // multiple category elements may be present for (Object elCategoryItem : elCategoryList) { Element elCategory = (Element) elCategoryItem; // notice: atom spec. forbids to have category "term" (="subject") // set as inner text of category tags, so we have to read it from // the "term" attribute if (elCategory != null) { // TODO: what if we have more than one category element present? // subject would be overwritten each loop and therefore represent only // the last category read, so does this make any sense? // TODO: what about adding functionality for accessing "label" or "scheme" attributes? // if set, a label should be displayed instead of the value set in term // we keep this line not to break up things which // use getSubject() to read an item category curItem.setSubject(elCategory.getAttributeValue("term")); CategoryIF c = new Category(elCategory.getAttributeValue("term")); // add current category to category list categories.add(c); } } // assign categories curItem.setCategories(categories); } // set to current date chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.RSS_1_0_Parser.java
License:Open Source License
public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); }//from www . ja va 2 s . c om Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(root); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; logger.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(root, "dc"); // fall back to default name space (for retrieving descriptions) if (dcNS == null) { dcNS = defNS; } // RSS 1.0 Syndication Module namespace Namespace syNS = ParserUtils.getNamespace(root, "sy"); // RSS 1.0 Aggregation Module namespace Namespace agNS = ParserUtils.getNamespace(root, "ag"); // RSS 1.0 Administration Module namespace Namespace adminNS = ParserUtils.getNamespace(root, "admin"); // RSS 1.0 DCTerms Module namespace Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms"); // RSS 1.0 Annotation Module namespace Namespace annotateNS = ParserUtils.getNamespace(root, "annotate"); // RSS091 Module namespace Namespace rss091NS = ParserUtils.getNamespace(root, "rss091"); // Content namespace Namespace contentNS = ParserUtils.getNamespace(root, "content"); ParserUtils.matchCaseOfChildren(root, new String[] { "channel", "item", "image", "textinput" }); // Get the channel element (only one occurs) Element channel = root.getChild("channel", defNS); if (channel == null) { logger.warn("Channel element could not be retrieved from feed."); throw new ParseException("No channel element found in feed."); } // ----------------------- read in channel information ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "link", "creator", "managingEditor", "publisher", "errorReportsTo", "webMaster", "language", "rights", "copyright", "rating", "date", "issued", "pubdate", "lastBuildDate", "modified", "generatorAgent", "updatePeriod", "updateFrequency", "updateBase" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // set channel format chnl.setFormat(ChannelFormat.RSS_1_0); // description element chnl.setDescription(channel.getChildTextTrim("description", defNS)); // link element chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); // creator element Element creator = channel.getChild("creator", dcNS); if (creator == null) { creator = channel.getChild("managingEditor", rss091NS); } if (creator != null) { chnl.setCreator(creator.getTextTrim()); } // publisher element String publisher = channel.getChildTextTrim("publisher", dcNS); if (publisher == null) { Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS); if (elErrorReportsTo != null) { publisher = elErrorReportsTo.getAttributeValue("resource", ParserUtils.getNamespace(elErrorReportsTo, "rdf")); } } if (publisher == null) { publisher = channel.getChildTextTrim("webMaster", rss091NS); } chnl.setPublisher(publisher); // language element Element language = channel.getChild("language", dcNS); if (language == null) { language = channel.getChild("language", rss091NS); } if (language != null) { chnl.setLanguage(language.getTextTrim()); } // rights element Element copyright = channel.getChild("rights", dcNS); if (copyright == null) { copyright = channel.getChild("copyright", rss091NS); } if (copyright != null) { chnl.setCopyright(copyright.getTextTrim()); } // 0..1 Rating element Element rating = channel.getChild("rating", rss091NS); if (rating != null) { chnl.setRating(rating.getTextTrim()); } // 0..1 Docs element // use namespace URI chnl.setDocs(defNS.getURI()); // 0..1 pubDate element Element pubDate = channel.getChild("date", dcNS); if (pubDate == null) { pubDate = channel.getChild("issued", dctermsNS); } if (pubDate == null) { pubDate = channel.getChild("pubdate", rss091NS); } if (pubDate != null) { chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); } // 0..1 lastBuildDate element Element lastBuildDate = channel.getChild("lastBuildDate"); if (lastBuildDate == null) { lastBuildDate = channel.getChild("modified", dctermsNS); } if (lastBuildDate == null) { lastBuildDate = channel.getChild("lastBuildDate", rss091NS); } if (lastBuildDate != null) { chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); } // RSS 1.0 Administration Module support // 0..1 generator element Element elGenerator = channel.getChild("generatorAgent", adminNS); if (elGenerator != null) { Attribute generator = elGenerator.getAttribute("resource", ParserUtils.getNamespace(elGenerator, "rdf")); if (generator != null) { chnl.setGenerator(generator.getValue()); } } // RSS 1.0 Syndication Module support // 0..1 update period element Element updatePeriod = channel.getChild("updatePeriod", syNS); if (updatePeriod != null) { try { ChannelUpdatePeriod channelUpdatePeriod = ChannelUpdatePeriod .valueFromText(updatePeriod.getTextTrim()); chnl.setUpdatePeriod(channelUpdatePeriod); } catch (IllegalArgumentException ex) { logger.warn(updatePeriod.getTextTrim(), ex); } } // 0..1 update frequency element Element updateFrequency = channel.getChild("updateFrequency", syNS); if (updateFrequency != null) { chnl.setUpdateFrequency((new Integer(updateFrequency.getTextTrim())).intValue()); } // 0..1 update base element Element updateBase = channel.getChild("updateBase", syNS); if (updateBase != null) { chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim())); } if ((updatePeriod != null) && updateFrequency != null) { int ttl = getTTL(chnl.getUpdatePeriod(), chnl.getUpdateFrequency()); chnl.setTtl(ttl); } // item elements List items = root.getChildren("item", defNS); Iterator i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description", "creator", "subject", "date", "sourceURL", "source", "timestamp", "reference" }); // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = elTitle.getTextTrim(); } if (logger.isDebugEnabled()) { logger.debug("Item element found (" + strTitle + ")."); } // get link element Element elLink = item.getChild("link", defNS); String strLink = ""; if (elLink != null) { strLink = elLink.getTextTrim(); } // get description element Element elDesc = item.getChild("encoded", contentNS); if (elDesc == null) { elDesc = item.getChild("description", defNS); } if (elDesc == null) { elDesc = item.getChild("description", dcNS); } String strDesc = ""; if (elDesc != null) { strDesc = elDesc.getTextTrim(); } // generate new RSS item (link to article) ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); rssItem.setFound(dateParsed); // get creator element Element elCreator = item.getChild("creator", dcNS); if (elCreator != null) { rssItem.setCreator(elCreator.getTextTrim()); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently rssItem.setSubject(elSubject.getTextTrim()); } // get date element Element elDate = item.getChild("date", dcNS); if (elDate != null) { rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); } // get source element - default to Aggregation module, then try Dublin Core String sourceName = null; String sourceLocation = null; Date sourceTimestamp = null; Element elSourceURL = item.getChild("sourceURL", agNS); if (elSourceURL == null) { // No Aggregation module - try Dublin Core elSourceURL = item.getChild("source", dcNS); if (elSourceURL != null) { sourceLocation = elSourceURL.getTextTrim(); sourceName = "Source"; } } else { // Aggregation module sourceLocation = elSourceURL.getTextTrim(); Element elSourceName = item.getChild("source", agNS); if (elSourceName != null) { sourceName = elSourceName.getTextTrim(); } Element elSourceTimestamp = item.getChild("timestamp", agNS); if (elSourceTimestamp != null) { sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim()); } } if (sourceLocation != null) { ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, sourceTimestamp); rssItem.setSource(itemSource); } // comments element - use Annotation module Element elReference = item.getChild("reference", annotateNS); if (elReference != null) { Attribute resource = elReference.getAttribute("resource", ParserUtils.getNamespace(elReference, "rdf")); if (resource != null) { URL resourceURL = ParserUtils.getURL(resource.getValue()); if (resourceURL != null) { rssItem.setComments(resourceURL); } } } } // image element Element image = root.getChild("image", defNS); if (image != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url", "link", "width", "height", "description" }); ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)), ParserUtils.getURL(image.getChildTextTrim("link", defNS))); Element imgWidth = image.getChild("width", defNS); if (imgWidth != null) { try { rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgHeight = image.getChild("height", defNS); if (imgHeight != null) { try { rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgDescr = image.getChild("description", defNS); if (imgDescr != null) { rssImage.setDescription(imgDescr.getTextTrim()); } chnl.setImage(rssImage); } // textinput element Element txtinp = root.getChild("textinput", defNS); if (txtinp != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "description", "name", "link" }); String tiTitle = null; if (txtinp.getChild("title", defNS) != null) { tiTitle = txtinp.getChild("title", defNS).getTextTrim(); } String tiDescr = null; if (txtinp.getChild("description", defNS) != null) { tiDescr = txtinp.getChild("description", defNS).getTextTrim(); } String tiName = null; if (txtinp.getChild("name", defNS) != null) { tiName = txtinp.getChild("name", defNS).getTextTrim(); } URL tiLink = null; if (txtinp.getChild("link", defNS) != null) { tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim()); } TextInputIF rssTextInput = cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink); chnl.setTextInput(rssTextInput); } chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.RSS_2_0_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *//*from www. j a va2s. c o m*/ public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can be created."); } Date dateParsed = new Date(); logger.debug("start parsing."); Namespace defNS = ParserUtils.getDefaultNS(root); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; logger.info("No default namespace found."); } Namespace dcNS = ParserUtils.getNamespace(root, "dc"); // fall back to default name space if (dcNS == null) { dcNS = defNS; } // Content namespace Namespace contentNS = ParserUtils.getNamespace(root, "content"); // fall back to default name space if (contentNS == null) { contentNS = defNS; } ParserUtils.matchCaseOfChildren(root, "channel"); // Get the channel element (only one occurs) Element channel = root.getChild("channel", defNS); if (channel == null) { logger.warn("Channel element could not be retrieved from feed."); throw new ParseException("No channel element found in feed."); } // --- read in channel information ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "link", "language", "item", "image", "textinput", "copyright", "rating", "docs", "generator", "pubDate", "lastBuildDate", "category", "managingEditor", "webMaster", "cloud" }); // 1 title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // set channel format chnl.setFormat(ChannelFormat.RSS_2_0); // 1 description element chnl.setDescription(channel.getChildTextTrim("description", defNS)); // 1 link element chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); // 1 language element chnl.setLanguage(channel.getChildTextTrim("language", defNS)); // 1..n item elements List items = channel.getChildren("item", defNS); for (Object item1 : items) { Element item = (Element) item1; ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description", "subject", "category", "pubDate", "date", "author", "creator", "comments", "guid", "source", "enclosure" }); // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = elTitle.getTextTrim(); } if (logger.isDebugEnabled()) { logger.debug("Item element found (" + strTitle + ")."); } // get link element Element elLink = item.getChild("link", defNS); String strLink = ""; if (elLink != null) { strLink = elLink.getTextTrim(); } // get description element Element elDesc = item.getChild("encoded", contentNS); if (elDesc == null) { elDesc = item.getChild("description", defNS); } String strDesc = ""; if (elDesc != null) { strDesc = elDesc.getTextTrim(); } // generate new RSS item (link to article) ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); // get subject element Element elSubject = item.getChild("subject", defNS); if (elSubject == null) { // fallback mechanism: get dc:subject element elSubject = item.getChild("subject", dcNS); } if (elSubject != null) { rssItem.setSubject(elSubject.getTextTrim()); } // get category list // get list of <category> elements List listCategory = item.getChildren("category", defNS); if (listCategory.size() < 1) { // fallback mechanism: get dc:category element listCategory = item.getChildren("category", dcNS); } if (listCategory.size() > 0) { RecursiveHashtable<String> catTable = new RecursiveHashtable<String>(); // for each category, parse hierarchy for (Object aListCategory : listCategory) { RecursiveHashtable<String> currTable = catTable; Element elCategory = (Element) aListCategory; // get contents of category element String[] titles = elCategory.getTextNormalize().split("/"); for (String title : titles) { // tokenize category string to extract out hierarchy if (!currTable.containsKey(title)) { // if token does not exist in current map, add it with child Hashtable currTable.put(title, new RecursiveHashtable<String>()); } // reset current Hashtable to child's Hashtable then iterate to next token currTable = currTable.get(title); } } ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>(); // transform cat list & hierarchy into list of CategoryIF elements Enumeration<String> enumCategories = catTable.keys(); while (enumCategories.hasMoreElements()) { String key = enumCategories.nextElement(); // build category list: getCategoryList(parent, title, children) CategoryIF cat = getCategoryList(null, key, catTable.get(key)); catList.add(cat); } if (catList.size() > 0) { // if categories were actually created, then add list to item node rssItem.setCategories(catList); } } // get publication date Element elDate = item.getChild("pubDate", defNS); if (elDate == null) { // fallback mechanism: get dc:date element elDate = item.getChild("date", dcNS); } if (elDate != null) { rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); } rssItem.setFound(dateParsed); // get Author element Element elAuthor = item.getChild("author", defNS); if (elAuthor == null) { // fallback mechanism: get dc:creator element elAuthor = item.getChild("creator", dcNS); } if (elAuthor != null) rssItem.setCreator(elAuthor.getTextTrim()); // get Comments element Element elComments = item.getChild("comments", defNS); String strComments = ""; if (elComments != null) { strComments = elComments.getTextTrim(); } rssItem.setComments(ParserUtils.getURL(strComments)); // get guid element Element elGuid = item.getChild("guid", defNS); if (elGuid != null) { String guidUrl = elGuid.getTextTrim(); if (guidUrl != null) { boolean permaLink = true; Attribute permaLinkAttribute = elGuid.getAttribute("isPermaLink", defNS); if (permaLinkAttribute != null) { String permaLinkStr = permaLinkAttribute.getValue(); if (permaLinkStr != null) { permaLink = Boolean.valueOf(permaLinkStr); } } ItemGuidIF itemGuid = cBuilder.createItemGuid(rssItem, guidUrl, permaLink); rssItem.setGuid(itemGuid); } } // get source element Element elSource = item.getChild("source", defNS); if (elSource != null) { String sourceName = elSource.getTextTrim(); Attribute sourceAttribute = elSource.getAttribute("url", defNS); if (sourceAttribute != null) { String sourceLocation = sourceAttribute.getValue().trim(); ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, null); rssItem.setSource(itemSource); } } // get enclosure element Element elEnclosure = item.getChild("enclosure", defNS); if (elEnclosure != null) { URL location = null; String type = null; int length = -1; Attribute urlAttribute = elEnclosure.getAttribute("url", defNS); if (urlAttribute != null) { location = ParserUtils.getURL(urlAttribute.getValue().trim()); } Attribute typeAttribute = elEnclosure.getAttribute("type", defNS); if (typeAttribute != null) { type = typeAttribute.getValue().trim(); } Attribute lengthAttribute = elEnclosure.getAttribute("length", defNS); if (lengthAttribute != null) { try { length = Integer.parseInt(lengthAttribute.getValue().trim()); } catch (NumberFormatException e) { logger.warn(e); } } ItemEnclosureIF itemEnclosure = cBuilder.createItemEnclosure(rssItem, location, type, length); rssItem.setEnclosure(itemEnclosure); } } // 0..1 image element Element image = channel.getChild("image", defNS); if (image != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url", "link", "width", "height", "description" }); ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)), ParserUtils.getURL(image.getChildTextTrim("link", defNS))); Element imgWidth = image.getChild("width", defNS); if (imgWidth != null) { try { rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); } catch (NumberFormatException e) { logger.warn("Error parsing width: " + e.getMessage()); } } Element imgHeight = image.getChild("height", defNS); if (imgHeight != null) { try { rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); } catch (NumberFormatException e) { logger.warn("Error parsing height: " + e.getMessage()); } } Element imgDescr = image.getChild("description", defNS); if (imgDescr != null) { rssImage.setDescription(imgDescr.getTextTrim()); } chnl.setImage(rssImage); } // 0..1 textinput element Element txtinp = channel.getChild("textinput", defNS); if (txtinp != null) { ParserUtils.matchCaseOfChildren(txtinp, new String[] { "title", "description", "name", "link" }); TextInputIF rssTextInput = cBuilder.createTextInput(txtinp.getChildTextTrim("title", defNS), txtinp.getChildTextTrim("description", defNS), txtinp.getChildTextTrim("name", defNS), ParserUtils.getURL(txtinp.getChildTextTrim("link", defNS))); chnl.setTextInput(rssTextInput); } // 0..1 copyright element Element copyright = channel.getChild("copyright", defNS); if (copyright != null) { chnl.setCopyright(copyright.getTextTrim()); } // 0..1 Rating element Element rating = channel.getChild("rating", defNS); if (rating != null) { chnl.setRating(rating.getTextTrim()); } // 0..1 Docs element Element docs = channel.getChild("docs", defNS); if (docs != null) { chnl.setDocs(docs.getTextTrim()); } // 0..1 Generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // 0..1 ttl element Element ttl = channel.getChild("ttl", defNS); if (ttl != null) { String ttlValue = ttl.getTextTrim(); try { chnl.setTtl(Integer.parseInt(ttlValue)); } catch (NumberFormatException e) { logger.warn("Invalid TTL format: '" + ttlValue + "'"); } } // 0..1 pubDate element Element pubDate = channel.getChild("pubDate", defNS); if (pubDate != null) { chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); } // 0..1 lastBuildDate element Element lastBuildDate = channel.getChild("lastBuildDate", defNS); if (lastBuildDate != null) { chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); } // get category list // get list of <category> elements List listCategory = channel.getChildren("category", defNS); if (listCategory.size() < 1) { // fallback mechanism: get dc:category element listCategory = channel.getChildren("category", dcNS); } if (listCategory.size() > 0) { RecursiveHashtable<String> catTable = new RecursiveHashtable<String>(); // for each category, parse hierarchy for (Object aListCategory : listCategory) { RecursiveHashtable<String> currTable = catTable; Element elCategory = (Element) aListCategory; // get contents of category element String[] titles = elCategory.getTextNormalize().split("/"); for (String title : titles) { // tokenize category string to extract out hierarchy if (!currTable.containsKey(title)) { // if token does not exist in current map, add it with child Hashtable currTable.put(title, new RecursiveHashtable<String>()); } // reset current Hashtable to child's Hashtable then iterate to next token currTable = currTable.get(title); } } ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>(); // transform cat list & hierarchy into list of CategoryIF elements Enumeration<String> enumCategories = catTable.keys(); while (enumCategories.hasMoreElements()) { String key = enumCategories.nextElement(); // build category list: getCategoryList(parent, title, children) CategoryIF cat = getCategoryList(null, key, catTable.get(key)); catList.add(cat); } if (catList.size() > 0) { // if categories were actually created, then add list to item node chnl.setCategories(catList); } } // 0..1 managingEditor element Element managingEditor = channel.getChild("managingEditor", defNS); if (managingEditor != null) { chnl.setCreator(managingEditor.getTextTrim()); } // 0..1 webMaster element Element webMaster = channel.getChild("webMaster", defNS); if (webMaster != null) { chnl.setPublisher(webMaster.getTextTrim()); } // 0..1 cloud element Element cloud = channel.getChild("cloud", defNS); if (cloud != null) { String _port = cloud.getAttributeValue("port", defNS); int port = -1; if (_port != null) { try { port = Integer.parseInt(_port); } catch (NumberFormatException e) { logger.warn(e); } } chnl.setCloud(cBuilder.createCloud(cloud.getAttributeValue("domain", defNS), port, cloud.getAttributeValue("path", defNS), cloud.getAttributeValue("registerProcedure", defNS), cloud.getAttributeValue("protocol", defNS))); } chnl.setLastUpdated(dateParsed); // 0..1 skipHours element // 0..1 skipDays element return chnl; }
From source file:io.smartspaces.workbench.project.jdom.JdomReader.java
License:Apache License
/** * Return the trimmed text of a child element. * * @param element// ww w . ja v a 2s. c o m * container element * @param namespace * namespace for the key element * @param key * variable key * * @return trimmed element text * * @throws SmartSpacesException * if the child element is not provided */ protected String getChildTextTrimmed(Element element, Namespace namespace, String key) throws SmartSpacesException { try { return element.getChildTextTrim(key, namespace); } catch (Exception e) { throw new SimpleSmartSpacesException("Looking for text of child: " + key, e); } }
From source file:net.instantcom.mm7.DeliverReq.java
License:Open Source License
@Override public void load(Element element) { super.load(element); Element body = element.getChild("Body", MM7Message.ENVELOPE); Element req = body.getChild("DeliverReq", namespace); setMm7Version(req.getChildTextTrim("MM7Version", namespace)); Element sender = req.getChild("Sender", namespace); if (sender != null) { Address a = new Address(); a.load((Element) sender.getChildren().get(0)); setSender(a);// w w w. j a va2 s. c o m } else { setSender(null); } setRecipients(extractRecipients(req.getChild("Recipients", namespace))); setLinkedId(req.getChildTextTrim("LinkedID", namespace)); setSenderSPI(req.getChildTextTrim("SenderSPI", namespace)); setRecipientSPI(req.getChildTextTrim("RecipientSPI", namespace)); setReplyChargingId(req.getChildTextTrim("ReplyChargingID", namespace)); setSubject(req.getChildTextTrim("Subject", namespace)); setApplicId(req.getChildTextTrim("ApplicID", namespace)); setReplyApplicId(req.getChildTextTrim("ReplyApplicID", namespace)); setAuxApplicInfo(req.getChildTextTrim("AuxApplicInfo", namespace)); setPriority(Priority.valueOf(req.getChildTextTrim("Priority", namespace).toUpperCase())); setTimeStamp(new RelativeDate(req.getChildTextTrim("TimeStamp", namespace)).toDate()); }
From source file:net.instantcom.mm7.DeliverRsp.java
License:Open Source License
@Override public void load(Element element) { super.load(element); Element body = element.getChild("Body", MM7Message.ENVELOPE); Element rsp = body.getChild("DeliverRsp", namespace); setServiceCode(rsp.getChildTextTrim("ServiceCode", namespace)); }
From source file:net.instantcom.mm7.DeliveryReportReq.java
License:Open Source License
@Override public void load(Element element) { super.load(element); Element body = element.getChild("Body", MM7Message.ENVELOPE); Element req = body.getChild("DeliveryReportReq", namespace); setMm7Version(req.getChildTextTrim("MM7Version", namespace)); setMessageID(req.getChildTextTrim("MessageID", namespace)); setRecipient(extractRecipient(req.getChild("Recipient", namespace))); Element sender = req.getChild("Sender", namespace); if (sender != null) { Address a = new Address(); a.load((Element) sender.getChildren().get(0)); setSender(a);//from w ww.j a va 2 s. c o m } else { setSender(null); } setDate(new RelativeDate(req.getChildTextTrim("Date", namespace)).toDate()); setMmStatus(req.getChildTextTrim("MMStatus", namespace)); setStatusText(req.getChildTextTrim("StatusText", namespace)); }
From source file:net.instantcom.mm7.MM7Message.java
License:Open Source License
@Override public void load(Element element) { Element body = element.getChild("Body", element.getNamespace()); // Extract MM7 namespace from SOAP body Iterator<?> i = body.getDescendants(new ElementFilter()); while (i.hasNext()) { Element e = (Element) i.next(); Namespace ns = e.getNamespace(); if (ns != null && ns.getURI().contains("MM7")) { this.namespace = ns; break; }//from w w w . j av a 2s .com } if (this.namespace == null) { throw new IllegalStateException("can't autodetect MM7 namespace: " + body.toString()); } Element header = element.getChild("Header", element.getNamespace()); setTransactionId(header.getChildTextTrim("TransactionID", namespace)); }