List of usage examples for org.jdom2 Namespace NO_NAMESPACE
Namespace NO_NAMESPACE
To view the source code for org.jdom2 Namespace NO_NAMESPACE.
Click Source Link
Namespace
for when not in a namespace From source file:com.sun.syndication.io.impl.Atom03Parser.java
License:Open Source License
private Content parseContent(Element e) { String value = null;//from w ww. ja v a2 s . co m String type = getAttributeValue(e, "type"); type = (type != null) ? type : "text/plain"; String mode = getAttributeValue(e, "mode"); if (mode == null) { mode = Content.XML; // default to xml content } if (mode.equals(Content.ESCAPED)) { // do nothing XML Parser took care of this value = e.getText(); } else if (mode.equals(Content.BASE64)) { value = Base64.decode(e.getText()); } else if (mode.equals(Content.XML)) { XMLOutputter outputter = new XMLOutputter(); List eContent = e.getContent(); Iterator i = eContent.iterator(); while (i.hasNext()) { org.jdom2.Content c = (org.jdom2.Content) i.next(); if (c instanceof Element) { Element eC = (Element) c; if (eC.getNamespace().equals(getAtomNamespace())) { ((Element) c).setNamespace(Namespace.NO_NAMESPACE); } } } value = outputter.outputString(eContent); } Content content = new Content(); content.setType(type); content.setMode(mode); content.setValue(value); return content; }
From source file:com.sun.syndication.io.impl.Atom10Parser.java
License:Open Source License
private String parseTextConstructToString(Element e) { String value = null;/* www . j a v a 2 s.c o m*/ String type = getAttributeValue(e, "type"); type = (type != null) ? type : Content.TEXT; if (type.equals(Content.XHTML) || (type.indexOf("/xml")) != -1 || (type.indexOf("+xml")) != -1) { // XHTML content needs special handling XMLOutputter outputter = new XMLOutputter(); List eContent = e.getContent(); Iterator i = eContent.iterator(); while (i.hasNext()) { org.jdom2.Content c = (org.jdom2.Content) i.next(); if (c instanceof Element) { Element eC = (Element) c; if (eC.getNamespace().equals(getAtomNamespace())) { ((Element) c).setNamespace(Namespace.NO_NAMESPACE); } } } value = outputter.outputString(eContent); } else { // Everything else comes in verbatim value = e.getText(); } return value; }
From source file:com.sun.syndication.io.impl.RSS091UserlandGenerator.java
License:Open Source License
protected Namespace getFeedNamespace() { return Namespace.NO_NAMESPACE; }
From source file:de.nava.informa.parsers.Atom_0_3_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *///from w w w . j ava2 s. c o m public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); } Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(channel); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; LOGGER.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(channel, "dc"); if (dcNS == null) { LOGGER.debug("No namespace for dublin core found"); dcNS = defNS; } LOGGER.debug("start parsing."); // get version attribute String formatVersion = "0.3"; if (channel.getAttribute("version") != null) { formatVersion = channel.getAttribute("version").getValue().trim(); LOGGER.debug("Atom version " + formatVersion + " specified in document."); } else { LOGGER.info("No format version specified, using default."); } // --- read in channel information // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "tagline", "ttl", "modified", "author", "generator", "copyright", "link", "entry" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // TODO: support attributes: type, mode chnl.setFormat(ChannelFormat.ATOM_0_3); // language String language = channel.getAttributeValue("lang", Namespace.XML_NAMESPACE); if (language != null) { chnl.setLanguage(language); } // description element if (channel.getChild("description") != null) { chnl.setDescription(channel.getChildTextTrim("description", defNS)); } else { // fallback chnl.setDescription(channel.getChildTextTrim("tagline", defNS)); } // ttl in dc namespace Element ttl = channel.getChild("ttl", dcNS); if (ttl != null) { String ttlString = ttl.getTextTrim(); if (ttlString != null) { chnl.setTtl(Integer.parseInt(ttlString)); } } // lastbuild element : modified ? Element modified = channel.getChild("modified", defNS); if (modified != null) { chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim())); } // TODO : issued value /* if (modified != null) { modified = channel.getChild("issued", defNS); chnl.setLastBuildDate (ParserUtils.getDate(modified.getTextTrim())); } */ // author element Element author = channel.getChild("author", defNS); if (author != null) { ParserUtils.matchCaseOfChildren(author, "name"); chnl.setCreator(author.getChildTextTrim("name", defNS)); } // generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // copyright element Element copyright = channel.getChild("copyright", defNS); if (copyright != null) { chnl.setCopyright(getCopyright(copyright)); } // n link elements // TODO : type attribut of link (text, application...) List links = channel.getChildren("link", defNS); Iterator i = links.iterator(); while (i.hasNext()) { Element linkElement = (Element) i.next(); // use first 'alternate' link String rel = linkElement.getAttributeValue("rel"); String href = linkElement.getAttributeValue("href"); if ((rel != null) && (href != null) && rel.equals("alternate")) { URL linkURL = ParserUtils.getURL(href); chnl.setSite(linkURL); break; } // TODO: further extraction of link information } // 1..n entry elements List items = channel.getChildren("entry", defNS); i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "content", "summary", "issued", "subject" }); // get title element // TODO : deal with type attribut Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = getTitle(elTitle); LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle); } if (LOGGER.isDebugEnabled()) { LOGGER.debug("Entry element found (" + strTitle + ")."); } // get link element String strLink = AtomParserUtils.getItemLink(item, defNS); // get description element String strDesc = getDescription(item, defNS); // generate new news item (link to article) ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); curItem.setFound(dateParsed); // get issued element (required) Element elIssued = item.getChild("issued", defNS); if (elIssued == null) { // [adewale@gmail.com, 01-May-2005] Fix for blogs which have // 'created' dates, but not 'issued' dates -- in clear contravention // of the Atom 0.3 spec. Element elCreated = item.getChild("created", defNS); if (elCreated != null) { curItem.setDate(ParserUtils.getDate(elCreated.getTextTrim())); } } else { curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim())); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently curItem.setSubject(elSubject.getTextTrim()); } } // set to current date chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.Atom_1_0_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *///from www . j av a2 s .c om public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); } Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(channel); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; LOGGER.info("No default namespace found."); } else if ((defNS.getURI() == null) || !defNS.getURI().equals("http://www.w3.org/2005/Atom")) { LOGGER.warn("Namespace is not really supported, still trying assuming Atom 1.0 format"); } LOGGER.debug("start parsing."); // --- read in channel information // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "subtitle", "updated", "published", "author", "generator", "rights", "link", "entry" }); // TODO icon and logo: Feed element can have upto 1 logo and icon. // TODO id: Feed and all entries have a unique id string. This can // be the URL of the website. Supporting this will require API change. // TODO: Feed can optionally have category information // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); chnl.setFormat(ChannelFormat.ATOM_1_0); // description element if (channel.getChild("subtitle") != null) { chnl.setDescription(channel.getChildTextTrim("subtitle", defNS)); } // TODO: should we use summary element? // lastbuild element : updated ? Element updated = channel.getChild("updated", defNS); if (updated != null) { chnl.setPubDate(ParserUtils.getDate(updated.getTextTrim())); } // author element List authors = channel.getChildren("author", defNS); chnl.setCreator(getAuthorString(authors, defNS)); // TODO we are ignoring contributors information // generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // TODO generator can have URI and version information // copyright element Element rights = channel.getChild("rights", defNS); if (rights != null) { chnl.setCopyright(AtomParserUtils.getValue(rights, getMode(rights))); } List links = channel.getChildren("link", defNS); Iterator i = links.iterator(); URL linkUrl = null; while (i.hasNext()) { Element linkElement = (Element) i.next(); // use first 'alternate' link // if rel is not present, use first link without rel String rel = linkElement.getAttributeValue("rel"); String href = linkElement.getAttributeValue("href"); // TODO we need to handle relative links also if ((rel == null) && (href != null) && (linkUrl == null)) { linkUrl = ParserUtils.getURL(href); } else if ((rel != null) && (href != null) && rel.equals("alternate")) { linkUrl = ParserUtils.getURL(href); break; } } if (linkUrl != null) { chnl.setSite(linkUrl); } List items = channel.getChildren("entry", defNS); i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); // Lower the case of these tags to simulate case-insensitive parsing ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "content", "summary", "published", "author" }); // TODO entry, if copied from some other feed, may have source element // TODO each entry can have its own rights declaration // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = AtomParserUtils.getValue(elTitle, getMode(elTitle)); LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle); } if (LOGGER.isDebugEnabled()) { LOGGER.debug("Entry element found (" + strTitle + ")."); } // get link element String strLink = AtomParserUtils.getItemLink(item, defNS); // get description element String strDesc = getDescription(item, defNS); // generate new news item (link to article) ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); //TODO enclosure data curItem.setFound(dateParsed); List itemAuthors = item.getChildren("author", defNS); curItem.setCreator(getAuthorString(itemAuthors, defNS)); // get published element Element elIssued = item.getChild("published", defNS); if (elIssued == null) { // published element may not be present (but updated should be) Element elUpdated = item.getChild("updated", defNS); // TODO there should be some way to determining which one are we // returning if (elUpdated != null) { curItem.setDate(ParserUtils.getDate(elUpdated.getTextTrim())); } } else { curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim())); } // get list of category elements List elCategoryList = item.getChildren("category", defNS); // categories present will be stored here Collection<CategoryIF> categories = new ArrayList<>(); // multiple category elements may be present for (Object elCategoryItem : elCategoryList) { Element elCategory = (Element) elCategoryItem; // notice: atom spec. forbids to have category "term" (="subject") // set as inner text of category tags, so we have to read it from // the "term" attribute if (elCategory != null) { // TODO: what if we have more than one category element present? // subject would be overwritten each loop and therefore represent only // the last category read, so does this make any sense? // TODO: what about adding functionality for accessing "label" or "scheme" attributes? // if set, a label should be displayed instead of the value set in term // we keep this line not to break up things which // use getSubject() to read an item category curItem.setSubject(elCategory.getAttributeValue("term")); CategoryIF c = new Category(elCategory.getAttributeValue("term")); // add current category to category list categories.add(c); } } // assign categories curItem.setCategories(categories); } // set to current date chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.RSS_1_0_Parser.java
License:Open Source License
public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can " + "be created."); }//from www .j a v a 2s. c o m Date dateParsed = new Date(); Namespace defNS = ParserUtils.getDefaultNS(root); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; logger.info("No default namespace found."); } // RSS 1.0 Dublin Core Module namespace Namespace dcNS = ParserUtils.getNamespace(root, "dc"); // fall back to default name space (for retrieving descriptions) if (dcNS == null) { dcNS = defNS; } // RSS 1.0 Syndication Module namespace Namespace syNS = ParserUtils.getNamespace(root, "sy"); // RSS 1.0 Aggregation Module namespace Namespace agNS = ParserUtils.getNamespace(root, "ag"); // RSS 1.0 Administration Module namespace Namespace adminNS = ParserUtils.getNamespace(root, "admin"); // RSS 1.0 DCTerms Module namespace Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms"); // RSS 1.0 Annotation Module namespace Namespace annotateNS = ParserUtils.getNamespace(root, "annotate"); // RSS091 Module namespace Namespace rss091NS = ParserUtils.getNamespace(root, "rss091"); // Content namespace Namespace contentNS = ParserUtils.getNamespace(root, "content"); ParserUtils.matchCaseOfChildren(root, new String[] { "channel", "item", "image", "textinput" }); // Get the channel element (only one occurs) Element channel = root.getChild("channel", defNS); if (channel == null) { logger.warn("Channel element could not be retrieved from feed."); throw new ParseException("No channel element found in feed."); } // ----------------------- read in channel information ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "link", "creator", "managingEditor", "publisher", "errorReportsTo", "webMaster", "language", "rights", "copyright", "rating", "date", "issued", "pubdate", "lastBuildDate", "modified", "generatorAgent", "updatePeriod", "updateFrequency", "updateBase" }); // title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // set channel format chnl.setFormat(ChannelFormat.RSS_1_0); // description element chnl.setDescription(channel.getChildTextTrim("description", defNS)); // link element chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); // creator element Element creator = channel.getChild("creator", dcNS); if (creator == null) { creator = channel.getChild("managingEditor", rss091NS); } if (creator != null) { chnl.setCreator(creator.getTextTrim()); } // publisher element String publisher = channel.getChildTextTrim("publisher", dcNS); if (publisher == null) { Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS); if (elErrorReportsTo != null) { publisher = elErrorReportsTo.getAttributeValue("resource", ParserUtils.getNamespace(elErrorReportsTo, "rdf")); } } if (publisher == null) { publisher = channel.getChildTextTrim("webMaster", rss091NS); } chnl.setPublisher(publisher); // language element Element language = channel.getChild("language", dcNS); if (language == null) { language = channel.getChild("language", rss091NS); } if (language != null) { chnl.setLanguage(language.getTextTrim()); } // rights element Element copyright = channel.getChild("rights", dcNS); if (copyright == null) { copyright = channel.getChild("copyright", rss091NS); } if (copyright != null) { chnl.setCopyright(copyright.getTextTrim()); } // 0..1 Rating element Element rating = channel.getChild("rating", rss091NS); if (rating != null) { chnl.setRating(rating.getTextTrim()); } // 0..1 Docs element // use namespace URI chnl.setDocs(defNS.getURI()); // 0..1 pubDate element Element pubDate = channel.getChild("date", dcNS); if (pubDate == null) { pubDate = channel.getChild("issued", dctermsNS); } if (pubDate == null) { pubDate = channel.getChild("pubdate", rss091NS); } if (pubDate != null) { chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); } // 0..1 lastBuildDate element Element lastBuildDate = channel.getChild("lastBuildDate"); if (lastBuildDate == null) { lastBuildDate = channel.getChild("modified", dctermsNS); } if (lastBuildDate == null) { lastBuildDate = channel.getChild("lastBuildDate", rss091NS); } if (lastBuildDate != null) { chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); } // RSS 1.0 Administration Module support // 0..1 generator element Element elGenerator = channel.getChild("generatorAgent", adminNS); if (elGenerator != null) { Attribute generator = elGenerator.getAttribute("resource", ParserUtils.getNamespace(elGenerator, "rdf")); if (generator != null) { chnl.setGenerator(generator.getValue()); } } // RSS 1.0 Syndication Module support // 0..1 update period element Element updatePeriod = channel.getChild("updatePeriod", syNS); if (updatePeriod != null) { try { ChannelUpdatePeriod channelUpdatePeriod = ChannelUpdatePeriod .valueFromText(updatePeriod.getTextTrim()); chnl.setUpdatePeriod(channelUpdatePeriod); } catch (IllegalArgumentException ex) { logger.warn(updatePeriod.getTextTrim(), ex); } } // 0..1 update frequency element Element updateFrequency = channel.getChild("updateFrequency", syNS); if (updateFrequency != null) { chnl.setUpdateFrequency((new Integer(updateFrequency.getTextTrim())).intValue()); } // 0..1 update base element Element updateBase = channel.getChild("updateBase", syNS); if (updateBase != null) { chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim())); } if ((updatePeriod != null) && updateFrequency != null) { int ttl = getTTL(chnl.getUpdatePeriod(), chnl.getUpdateFrequency()); chnl.setTtl(ttl); } // item elements List items = root.getChildren("item", defNS); Iterator i = items.iterator(); while (i.hasNext()) { Element item = (Element) i.next(); ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description", "creator", "subject", "date", "sourceURL", "source", "timestamp", "reference" }); // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = elTitle.getTextTrim(); } if (logger.isDebugEnabled()) { logger.debug("Item element found (" + strTitle + ")."); } // get link element Element elLink = item.getChild("link", defNS); String strLink = ""; if (elLink != null) { strLink = elLink.getTextTrim(); } // get description element Element elDesc = item.getChild("encoded", contentNS); if (elDesc == null) { elDesc = item.getChild("description", defNS); } if (elDesc == null) { elDesc = item.getChild("description", dcNS); } String strDesc = ""; if (elDesc != null) { strDesc = elDesc.getTextTrim(); } // generate new RSS item (link to article) ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); rssItem.setFound(dateParsed); // get creator element Element elCreator = item.getChild("creator", dcNS); if (elCreator != null) { rssItem.setCreator(elCreator.getTextTrim()); } // get subject element Element elSubject = item.getChild("subject", dcNS); if (elSubject != null) { // TODO: Mulitple subject elements not handled currently rssItem.setSubject(elSubject.getTextTrim()); } // get date element Element elDate = item.getChild("date", dcNS); if (elDate != null) { rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); } // get source element - default to Aggregation module, then try Dublin Core String sourceName = null; String sourceLocation = null; Date sourceTimestamp = null; Element elSourceURL = item.getChild("sourceURL", agNS); if (elSourceURL == null) { // No Aggregation module - try Dublin Core elSourceURL = item.getChild("source", dcNS); if (elSourceURL != null) { sourceLocation = elSourceURL.getTextTrim(); sourceName = "Source"; } } else { // Aggregation module sourceLocation = elSourceURL.getTextTrim(); Element elSourceName = item.getChild("source", agNS); if (elSourceName != null) { sourceName = elSourceName.getTextTrim(); } Element elSourceTimestamp = item.getChild("timestamp", agNS); if (elSourceTimestamp != null) { sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim()); } } if (sourceLocation != null) { ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, sourceTimestamp); rssItem.setSource(itemSource); } // comments element - use Annotation module Element elReference = item.getChild("reference", annotateNS); if (elReference != null) { Attribute resource = elReference.getAttribute("resource", ParserUtils.getNamespace(elReference, "rdf")); if (resource != null) { URL resourceURL = ParserUtils.getURL(resource.getValue()); if (resourceURL != null) { rssItem.setComments(resourceURL); } } } } // image element Element image = root.getChild("image", defNS); if (image != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url", "link", "width", "height", "description" }); ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)), ParserUtils.getURL(image.getChildTextTrim("link", defNS))); Element imgWidth = image.getChild("width", defNS); if (imgWidth != null) { try { rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgHeight = image.getChild("height", defNS); if (imgHeight != null) { try { rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); } catch (NumberFormatException e) { logger.warn(e); } } Element imgDescr = image.getChild("description", defNS); if (imgDescr != null) { rssImage.setDescription(imgDescr.getTextTrim()); } chnl.setImage(rssImage); } // textinput element Element txtinp = root.getChild("textinput", defNS); if (txtinp != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "description", "name", "link" }); String tiTitle = null; if (txtinp.getChild("title", defNS) != null) { tiTitle = txtinp.getChild("title", defNS).getTextTrim(); } String tiDescr = null; if (txtinp.getChild("description", defNS) != null) { tiDescr = txtinp.getChild("description", defNS).getTextTrim(); } String tiName = null; if (txtinp.getChild("name", defNS) != null) { tiName = txtinp.getChild("name", defNS).getTextTrim(); } URL tiLink = null; if (txtinp.getChild("link", defNS) != null) { tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim()); } TextInputIF rssTextInput = cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink); chnl.setTextInput(rssTextInput); } chnl.setLastUpdated(dateParsed); return chnl; }
From source file:de.nava.informa.parsers.RSS_2_0_Parser.java
License:Open Source License
/** * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element) *//*from www. j a v a 2 s . c om*/ public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException { if (cBuilder == null) { throw new RuntimeException("Without builder no channel can be created."); } Date dateParsed = new Date(); logger.debug("start parsing."); Namespace defNS = ParserUtils.getDefaultNS(root); if (defNS == null) { defNS = Namespace.NO_NAMESPACE; logger.info("No default namespace found."); } Namespace dcNS = ParserUtils.getNamespace(root, "dc"); // fall back to default name space if (dcNS == null) { dcNS = defNS; } // Content namespace Namespace contentNS = ParserUtils.getNamespace(root, "content"); // fall back to default name space if (contentNS == null) { contentNS = defNS; } ParserUtils.matchCaseOfChildren(root, "channel"); // Get the channel element (only one occurs) Element channel = root.getChild("channel", defNS); if (channel == null) { logger.warn("Channel element could not be retrieved from feed."); throw new ParseException("No channel element found in feed."); } // --- read in channel information ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "link", "language", "item", "image", "textinput", "copyright", "rating", "docs", "generator", "pubDate", "lastBuildDate", "category", "managingEditor", "webMaster", "cloud" }); // 1 title element ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS)); // set channel format chnl.setFormat(ChannelFormat.RSS_2_0); // 1 description element chnl.setDescription(channel.getChildTextTrim("description", defNS)); // 1 link element chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS))); // 1 language element chnl.setLanguage(channel.getChildTextTrim("language", defNS)); // 1..n item elements List items = channel.getChildren("item", defNS); for (Object item1 : items) { Element item = (Element) item1; ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description", "subject", "category", "pubDate", "date", "author", "creator", "comments", "guid", "source", "enclosure" }); // get title element Element elTitle = item.getChild("title", defNS); String strTitle = "<No Title>"; if (elTitle != null) { strTitle = elTitle.getTextTrim(); } if (logger.isDebugEnabled()) { logger.debug("Item element found (" + strTitle + ")."); } // get link element Element elLink = item.getChild("link", defNS); String strLink = ""; if (elLink != null) { strLink = elLink.getTextTrim(); } // get description element Element elDesc = item.getChild("encoded", contentNS); if (elDesc == null) { elDesc = item.getChild("description", defNS); } String strDesc = ""; if (elDesc != null) { strDesc = elDesc.getTextTrim(); } // generate new RSS item (link to article) ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink)); // get subject element Element elSubject = item.getChild("subject", defNS); if (elSubject == null) { // fallback mechanism: get dc:subject element elSubject = item.getChild("subject", dcNS); } if (elSubject != null) { rssItem.setSubject(elSubject.getTextTrim()); } // get category list // get list of <category> elements List listCategory = item.getChildren("category", defNS); if (listCategory.size() < 1) { // fallback mechanism: get dc:category element listCategory = item.getChildren("category", dcNS); } if (listCategory.size() > 0) { RecursiveHashtable<String> catTable = new RecursiveHashtable<String>(); // for each category, parse hierarchy for (Object aListCategory : listCategory) { RecursiveHashtable<String> currTable = catTable; Element elCategory = (Element) aListCategory; // get contents of category element String[] titles = elCategory.getTextNormalize().split("/"); for (String title : titles) { // tokenize category string to extract out hierarchy if (!currTable.containsKey(title)) { // if token does not exist in current map, add it with child Hashtable currTable.put(title, new RecursiveHashtable<String>()); } // reset current Hashtable to child's Hashtable then iterate to next token currTable = currTable.get(title); } } ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>(); // transform cat list & hierarchy into list of CategoryIF elements Enumeration<String> enumCategories = catTable.keys(); while (enumCategories.hasMoreElements()) { String key = enumCategories.nextElement(); // build category list: getCategoryList(parent, title, children) CategoryIF cat = getCategoryList(null, key, catTable.get(key)); catList.add(cat); } if (catList.size() > 0) { // if categories were actually created, then add list to item node rssItem.setCategories(catList); } } // get publication date Element elDate = item.getChild("pubDate", defNS); if (elDate == null) { // fallback mechanism: get dc:date element elDate = item.getChild("date", dcNS); } if (elDate != null) { rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim())); } rssItem.setFound(dateParsed); // get Author element Element elAuthor = item.getChild("author", defNS); if (elAuthor == null) { // fallback mechanism: get dc:creator element elAuthor = item.getChild("creator", dcNS); } if (elAuthor != null) rssItem.setCreator(elAuthor.getTextTrim()); // get Comments element Element elComments = item.getChild("comments", defNS); String strComments = ""; if (elComments != null) { strComments = elComments.getTextTrim(); } rssItem.setComments(ParserUtils.getURL(strComments)); // get guid element Element elGuid = item.getChild("guid", defNS); if (elGuid != null) { String guidUrl = elGuid.getTextTrim(); if (guidUrl != null) { boolean permaLink = true; Attribute permaLinkAttribute = elGuid.getAttribute("isPermaLink", defNS); if (permaLinkAttribute != null) { String permaLinkStr = permaLinkAttribute.getValue(); if (permaLinkStr != null) { permaLink = Boolean.valueOf(permaLinkStr); } } ItemGuidIF itemGuid = cBuilder.createItemGuid(rssItem, guidUrl, permaLink); rssItem.setGuid(itemGuid); } } // get source element Element elSource = item.getChild("source", defNS); if (elSource != null) { String sourceName = elSource.getTextTrim(); Attribute sourceAttribute = elSource.getAttribute("url", defNS); if (sourceAttribute != null) { String sourceLocation = sourceAttribute.getValue().trim(); ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, null); rssItem.setSource(itemSource); } } // get enclosure element Element elEnclosure = item.getChild("enclosure", defNS); if (elEnclosure != null) { URL location = null; String type = null; int length = -1; Attribute urlAttribute = elEnclosure.getAttribute("url", defNS); if (urlAttribute != null) { location = ParserUtils.getURL(urlAttribute.getValue().trim()); } Attribute typeAttribute = elEnclosure.getAttribute("type", defNS); if (typeAttribute != null) { type = typeAttribute.getValue().trim(); } Attribute lengthAttribute = elEnclosure.getAttribute("length", defNS); if (lengthAttribute != null) { try { length = Integer.parseInt(lengthAttribute.getValue().trim()); } catch (NumberFormatException e) { logger.warn(e); } } ItemEnclosureIF itemEnclosure = cBuilder.createItemEnclosure(rssItem, location, type, length); rssItem.setEnclosure(itemEnclosure); } } // 0..1 image element Element image = channel.getChild("image", defNS); if (image != null) { ParserUtils.matchCaseOfChildren(image, new String[] { "title", "url", "link", "width", "height", "description" }); ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS), ParserUtils.getURL(image.getChildTextTrim("url", defNS)), ParserUtils.getURL(image.getChildTextTrim("link", defNS))); Element imgWidth = image.getChild("width", defNS); if (imgWidth != null) { try { rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim())); } catch (NumberFormatException e) { logger.warn("Error parsing width: " + e.getMessage()); } } Element imgHeight = image.getChild("height", defNS); if (imgHeight != null) { try { rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim())); } catch (NumberFormatException e) { logger.warn("Error parsing height: " + e.getMessage()); } } Element imgDescr = image.getChild("description", defNS); if (imgDescr != null) { rssImage.setDescription(imgDescr.getTextTrim()); } chnl.setImage(rssImage); } // 0..1 textinput element Element txtinp = channel.getChild("textinput", defNS); if (txtinp != null) { ParserUtils.matchCaseOfChildren(txtinp, new String[] { "title", "description", "name", "link" }); TextInputIF rssTextInput = cBuilder.createTextInput(txtinp.getChildTextTrim("title", defNS), txtinp.getChildTextTrim("description", defNS), txtinp.getChildTextTrim("name", defNS), ParserUtils.getURL(txtinp.getChildTextTrim("link", defNS))); chnl.setTextInput(rssTextInput); } // 0..1 copyright element Element copyright = channel.getChild("copyright", defNS); if (copyright != null) { chnl.setCopyright(copyright.getTextTrim()); } // 0..1 Rating element Element rating = channel.getChild("rating", defNS); if (rating != null) { chnl.setRating(rating.getTextTrim()); } // 0..1 Docs element Element docs = channel.getChild("docs", defNS); if (docs != null) { chnl.setDocs(docs.getTextTrim()); } // 0..1 Generator element Element generator = channel.getChild("generator", defNS); if (generator != null) { chnl.setGenerator(generator.getTextTrim()); } // 0..1 ttl element Element ttl = channel.getChild("ttl", defNS); if (ttl != null) { String ttlValue = ttl.getTextTrim(); try { chnl.setTtl(Integer.parseInt(ttlValue)); } catch (NumberFormatException e) { logger.warn("Invalid TTL format: '" + ttlValue + "'"); } } // 0..1 pubDate element Element pubDate = channel.getChild("pubDate", defNS); if (pubDate != null) { chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim())); } // 0..1 lastBuildDate element Element lastBuildDate = channel.getChild("lastBuildDate", defNS); if (lastBuildDate != null) { chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim())); } // get category list // get list of <category> elements List listCategory = channel.getChildren("category", defNS); if (listCategory.size() < 1) { // fallback mechanism: get dc:category element listCategory = channel.getChildren("category", dcNS); } if (listCategory.size() > 0) { RecursiveHashtable<String> catTable = new RecursiveHashtable<String>(); // for each category, parse hierarchy for (Object aListCategory : listCategory) { RecursiveHashtable<String> currTable = catTable; Element elCategory = (Element) aListCategory; // get contents of category element String[] titles = elCategory.getTextNormalize().split("/"); for (String title : titles) { // tokenize category string to extract out hierarchy if (!currTable.containsKey(title)) { // if token does not exist in current map, add it with child Hashtable currTable.put(title, new RecursiveHashtable<String>()); } // reset current Hashtable to child's Hashtable then iterate to next token currTable = currTable.get(title); } } ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>(); // transform cat list & hierarchy into list of CategoryIF elements Enumeration<String> enumCategories = catTable.keys(); while (enumCategories.hasMoreElements()) { String key = enumCategories.nextElement(); // build category list: getCategoryList(parent, title, children) CategoryIF cat = getCategoryList(null, key, catTable.get(key)); catList.add(cat); } if (catList.size() > 0) { // if categories were actually created, then add list to item node chnl.setCategories(catList); } } // 0..1 managingEditor element Element managingEditor = channel.getChild("managingEditor", defNS); if (managingEditor != null) { chnl.setCreator(managingEditor.getTextTrim()); } // 0..1 webMaster element Element webMaster = channel.getChild("webMaster", defNS); if (webMaster != null) { chnl.setPublisher(webMaster.getTextTrim()); } // 0..1 cloud element Element cloud = channel.getChild("cloud", defNS); if (cloud != null) { String _port = cloud.getAttributeValue("port", defNS); int port = -1; if (_port != null) { try { port = Integer.parseInt(_port); } catch (NumberFormatException e) { logger.warn(e); } } chnl.setCloud(cBuilder.createCloud(cloud.getAttributeValue("domain", defNS), port, cloud.getAttributeValue("path", defNS), cloud.getAttributeValue("registerProcedure", defNS), cloud.getAttributeValue("protocol", defNS))); } chnl.setLastUpdated(dateParsed); // 0..1 skipHours element // 0..1 skipDays element return chnl; }
From source file:de.nava.informa.utils.XmlPathUtils.java
License:Open Source License
/** * Returns the Namespace corresponding to an element and a prefix. * * @param element the element./*from w w w . j ava 2s. c o m*/ * @param prefix the prefix. * @return the Namespace. */ private static Namespace getNamespace(final Element element, final String prefix) { Namespace namespace = (prefix == null) ? element.getNamespace("") : element.getNamespace(prefix); return (namespace == null) ? Namespace.NO_NAMESPACE : namespace; }
From source file:es.upm.dit.xsdinferencer.extraction.extractorImpl.JSONTypesExtractorImpl.java
License:Apache License
/** * This method sets a given namespace to the given element and any of its descendants whose * name is a provided one and have no namespace. * @param name the name of the searched elements * @param rootElement the root element to begin the search at * @param namespace the namespace to set *//*from w w w.j av a2 s . co m*/ private void setNamespaceToElementsByName(String name, Element rootElement, Namespace namespace) { IteratorIterable<Element> descendants = rootElement .getDescendants(Filters.element(name, Namespace.NO_NAMESPACE)); for (Element descendant : descendants) { descendant.setNamespace(namespace); } if (rootElement.getName().equals(name) && rootElement.getNamespace().equals(Namespace.NO_NAMESPACE)) { rootElement.setNamespace(namespace); } }
From source file:es.upm.dit.xsdinferencer.extraction.extractorImpl.JSONTypesExtractorImpl.java
License:Apache License
/** * This method undoes what addPrefixToJSONKeysEndingsRecursive method did at the original JSON: It looks * for elements whose name ends at a desired key ending preceeded by a prefix to remove and removes it. * @param rootElement the root element.//from ww w. ja v a 2s . c om * @param desiredKeyEnding the desired ending. * @param prefixToRemove the prefix to remove. */ private void removePrefixToElementNameEndings(Element rootElement, String desiredKeyEnding, String prefixToRemove) { String keyToSearch = prefixToRemove + desiredKeyEnding; for (Element element : rootElement.getDescendants(Filters.element())) { if (!(element.getName().endsWith(keyToSearch) && element.getNamespace().equals(Namespace.NO_NAMESPACE))) { continue; } String name = element.getName(); String newName = name.replaceAll(Pattern.quote(keyToSearch) + "$", desiredKeyEnding); element.setName(newName); } }