Example usage for org.jdom2 Element getChildTextTrim

List of usage examples for org.jdom2 Element getChildTextTrim

Introduction

In this page you can find the example usage for org.jdom2 Element getChildTextTrim.

Prototype

public String getChildTextTrim(final String cname, final Namespace ns) 

Source Link

Document

Returns the trimmed textual content of the named child element, or null if there's no such child.

Usage

From source file:de.nava.informa.parsers.Atom_0_3_Parser.java

License:Open Source License

/**
 * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element)
 *///from   w ww .  j a  v  a  2s.co m
public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can " + "be created.");
    }

    Date dateParsed = new Date();
    Namespace defNS = ParserUtils.getDefaultNS(channel);

    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        LOGGER.info("No default namespace found.");
    }

    // RSS 1.0 Dublin Core Module namespace
    Namespace dcNS = ParserUtils.getNamespace(channel, "dc");

    if (dcNS == null) {
        LOGGER.debug("No namespace for dublin core found");
        dcNS = defNS;
    }

    LOGGER.debug("start parsing.");

    // get version attribute
    String formatVersion = "0.3";

    if (channel.getAttribute("version") != null) {
        formatVersion = channel.getAttribute("version").getValue().trim();
        LOGGER.debug("Atom version " + formatVersion + " specified in document.");
    } else {
        LOGGER.info("No format version specified, using default.");
    }

    // --- read in channel information

    // Lower the case of these tags to simulate case-insensitive parsing
    ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "tagline", "ttl",
            "modified", "author", "generator", "copyright", "link", "entry" });

    // title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    // TODO: support attributes: type, mode
    chnl.setFormat(ChannelFormat.ATOM_0_3);

    // language
    String language = channel.getAttributeValue("lang", Namespace.XML_NAMESPACE);

    if (language != null) {
        chnl.setLanguage(language);
    }

    // description element
    if (channel.getChild("description") != null) {
        chnl.setDescription(channel.getChildTextTrim("description", defNS));
    } else {
        // fallback
        chnl.setDescription(channel.getChildTextTrim("tagline", defNS));
    }

    // ttl in dc namespace
    Element ttl = channel.getChild("ttl", dcNS);

    if (ttl != null) {
        String ttlString = ttl.getTextTrim();

        if (ttlString != null) {
            chnl.setTtl(Integer.parseInt(ttlString));
        }
    }

    //  lastbuild element : modified ?
    Element modified = channel.getChild("modified", defNS);

    if (modified != null) {
        chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim()));
    }

    // TODO : issued value
    /*
    if (modified != null) {
      modified = channel.getChild("issued", defNS);
      chnl.setLastBuildDate (ParserUtils.getDate(modified.getTextTrim()));
    }
    */

    // author element
    Element author = channel.getChild("author", defNS);

    if (author != null) {
        ParserUtils.matchCaseOfChildren(author, "name");
        chnl.setCreator(author.getChildTextTrim("name", defNS));
    }

    // generator element
    Element generator = channel.getChild("generator", defNS);

    if (generator != null) {
        chnl.setGenerator(generator.getTextTrim());
    }

    // copyright element
    Element copyright = channel.getChild("copyright", defNS);

    if (copyright != null) {
        chnl.setCopyright(getCopyright(copyright));
    }

    // n link elements
    // TODO : type attribut of link (text, application...)
    List links = channel.getChildren("link", defNS);
    Iterator i = links.iterator();

    while (i.hasNext()) {
        Element linkElement = (Element) i.next();

        // use first 'alternate' link
        String rel = linkElement.getAttributeValue("rel");
        String href = linkElement.getAttributeValue("href");

        if ((rel != null) && (href != null) && rel.equals("alternate")) {
            URL linkURL = ParserUtils.getURL(href);

            chnl.setSite(linkURL);

            break;
        }

        // TODO: further extraction of link information
    }

    // 1..n entry elements
    List items = channel.getChildren("entry", defNS);

    i = items.iterator();

    while (i.hasNext()) {
        Element item = (Element) i.next();

        // Lower the case of these tags to simulate case-insensitive parsing
        ParserUtils.matchCaseOfChildren(item,
                new String[] { "title", "link", "content", "summary", "issued", "subject" });

        // get title element
        // TODO : deal with type attribut
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";

        if (elTitle != null) {
            strTitle = getTitle(elTitle);
            LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle);
        }

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Entry element found (" + strTitle + ").");
        }

        // get link element
        String strLink = AtomParserUtils.getItemLink(item, defNS);

        // get description element
        String strDesc = getDescription(item, defNS);

        // generate new news item (link to article)
        ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));

        curItem.setFound(dateParsed);

        // get issued element (required)
        Element elIssued = item.getChild("issued", defNS);

        if (elIssued == null) {
            // [adewale@gmail.com, 01-May-2005] Fix for blogs which have
            // 'created' dates, but not 'issued' dates -- in clear contravention
            // of the Atom 0.3 spec.
            Element elCreated = item.getChild("created", defNS);

            if (elCreated != null) {
                curItem.setDate(ParserUtils.getDate(elCreated.getTextTrim()));
            }
        } else {
            curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim()));
        }

        // get subject element
        Element elSubject = item.getChild("subject", dcNS);

        if (elSubject != null) {
            // TODO: Mulitple subject elements not handled currently
            curItem.setSubject(elSubject.getTextTrim());
        }
    }

    // set to current date
    chnl.setLastUpdated(dateParsed);

    return chnl;
}

From source file:de.nava.informa.parsers.Atom_1_0_Parser.java

License:Open Source License

/**
 * a semicolon separated list of authors
 *//*from w ww  . j  av  a 2s . co m*/
static String getAuthorString(List authors, Namespace defNS) {
    String authorName = "";

    for (Object author : authors) {
        Element authorElt = (Element) author;

        if (authorElt != null) {
            //TODO author may have more information like uri and email
            ParserUtils.matchCaseOfChildren(authorElt, "name");

            if (!"".equals(authorName)) {
                // if more than one author, a ; separated list
                authorName += "; ";
            }

            authorName += authorElt.getChildTextTrim("name", defNS);
        }
    }

    return authorName;
}

From source file:de.nava.informa.parsers.Atom_1_0_Parser.java

License:Open Source License

/**
 * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element)
 *///ww  w  .j  a v  a 2s. com
public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can " + "be created.");
    }

    Date dateParsed = new Date();
    Namespace defNS = ParserUtils.getDefaultNS(channel);

    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        LOGGER.info("No default namespace found.");
    } else if ((defNS.getURI() == null) || !defNS.getURI().equals("http://www.w3.org/2005/Atom")) {
        LOGGER.warn("Namespace is not really supported, still trying assuming Atom 1.0 format");
    }

    LOGGER.debug("start parsing.");

    // --- read in channel information

    // Lower the case of these tags to simulate case-insensitive parsing
    ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "subtitle", "updated", "published",
            "author", "generator", "rights", "link", "entry" });

    // TODO icon and logo: Feed element can have upto 1 logo and icon.
    // TODO id: Feed and all entries have a unique id string. This can
    // be the URL of the website. Supporting this will require API change.
    // TODO: Feed can optionally have category information

    // title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    chnl.setFormat(ChannelFormat.ATOM_1_0);

    // description element
    if (channel.getChild("subtitle") != null) {
        chnl.setDescription(channel.getChildTextTrim("subtitle", defNS));
    }

    // TODO: should we use summary element?

    // lastbuild element : updated ?
    Element updated = channel.getChild("updated", defNS);

    if (updated != null) {
        chnl.setPubDate(ParserUtils.getDate(updated.getTextTrim()));
    }

    // author element
    List authors = channel.getChildren("author", defNS);

    chnl.setCreator(getAuthorString(authors, defNS));

    // TODO we are ignoring contributors information

    // generator element
    Element generator = channel.getChild("generator", defNS);

    if (generator != null) {
        chnl.setGenerator(generator.getTextTrim());
    }

    // TODO generator can have URI and version information

    // copyright element
    Element rights = channel.getChild("rights", defNS);

    if (rights != null) {
        chnl.setCopyright(AtomParserUtils.getValue(rights, getMode(rights)));
    }

    List links = channel.getChildren("link", defNS);
    Iterator i = links.iterator();

    URL linkUrl = null;

    while (i.hasNext()) {
        Element linkElement = (Element) i.next();

        // use first 'alternate' link
        // if rel is not present, use first link without rel
        String rel = linkElement.getAttributeValue("rel");
        String href = linkElement.getAttributeValue("href");

        // TODO we need to handle relative links also
        if ((rel == null) && (href != null) && (linkUrl == null)) {
            linkUrl = ParserUtils.getURL(href);
        } else if ((rel != null) && (href != null) && rel.equals("alternate")) {
            linkUrl = ParserUtils.getURL(href);

            break;
        }
    }

    if (linkUrl != null) {
        chnl.setSite(linkUrl);
    }

    List items = channel.getChildren("entry", defNS);

    i = items.iterator();

    while (i.hasNext()) {
        Element item = (Element) i.next();

        // Lower the case of these tags to simulate case-insensitive parsing
        ParserUtils.matchCaseOfChildren(item,
                new String[] { "title", "link", "content", "summary", "published", "author" });

        // TODO entry, if copied from some other feed, may have source element
        // TODO each entry can have its own rights declaration

        // get title element
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";

        if (elTitle != null) {
            strTitle = AtomParserUtils.getValue(elTitle, getMode(elTitle));
            LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle);
        }

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Entry element found (" + strTitle + ").");
        }

        // get link element
        String strLink = AtomParserUtils.getItemLink(item, defNS);

        // get description element
        String strDesc = getDescription(item, defNS);

        // generate new news item (link to article)
        ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));

        //TODO enclosure data
        curItem.setFound(dateParsed);

        List itemAuthors = item.getChildren("author", defNS);

        curItem.setCreator(getAuthorString(itemAuthors, defNS));

        // get published element
        Element elIssued = item.getChild("published", defNS);

        if (elIssued == null) {
            // published element may not be present (but updated should be)
            Element elUpdated = item.getChild("updated", defNS);

            // TODO there should be some way to determining which one are we
            // returning
            if (elUpdated != null) {
                curItem.setDate(ParserUtils.getDate(elUpdated.getTextTrim()));
            }
        } else {
            curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim()));
        }

        // get list of category elements
        List elCategoryList = item.getChildren("category", defNS);

        // categories present will be stored here
        Collection<CategoryIF> categories = new ArrayList<>();

        // multiple category elements may be present
        for (Object elCategoryItem : elCategoryList) {

            Element elCategory = (Element) elCategoryItem;

            // notice: atom spec. forbids to have category "term" (="subject")
            // set as inner text of category tags, so we have to read it from
            // the "term" attribute

            if (elCategory != null) {
                // TODO: what if we have more than one category element present?
                // subject would be overwritten each loop and therefore represent only
                // the last category read, so does this make any sense?

                // TODO: what about adding functionality for accessing "label" or "scheme" attributes?
                // if set, a label should be displayed instead of the value set in term

                // we keep this line not to break up things which
                // use getSubject() to read an item category
                curItem.setSubject(elCategory.getAttributeValue("term"));

                CategoryIF c = new Category(elCategory.getAttributeValue("term"));

                // add current category to category list
                categories.add(c);
            }
        }

        // assign categories
        curItem.setCategories(categories);
    }

    // set to current date
    chnl.setLastUpdated(dateParsed);

    return chnl;
}

From source file:de.nava.informa.parsers.RSS_1_0_Parser.java

License:Open Source License

public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can " + "be created.");
    }//from   www .  ja  va  2  s .  c  om
    Date dateParsed = new Date();
    Namespace defNS = ParserUtils.getDefaultNS(root);
    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        logger.info("No default namespace found.");
    }

    // RSS 1.0 Dublin Core Module namespace
    Namespace dcNS = ParserUtils.getNamespace(root, "dc");
    // fall back to default name space (for retrieving descriptions)
    if (dcNS == null) {
        dcNS = defNS;
    }

    // RSS 1.0 Syndication Module namespace
    Namespace syNS = ParserUtils.getNamespace(root, "sy");

    // RSS 1.0 Aggregation Module namespace
    Namespace agNS = ParserUtils.getNamespace(root, "ag");

    // RSS 1.0 Administration Module namespace
    Namespace adminNS = ParserUtils.getNamespace(root, "admin");

    // RSS 1.0 DCTerms Module namespace
    Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms");

    // RSS 1.0 Annotation Module namespace
    Namespace annotateNS = ParserUtils.getNamespace(root, "annotate");

    // RSS091 Module namespace
    Namespace rss091NS = ParserUtils.getNamespace(root, "rss091");

    // Content namespace
    Namespace contentNS = ParserUtils.getNamespace(root, "content");

    ParserUtils.matchCaseOfChildren(root, new String[] { "channel", "item", "image", "textinput" });

    // Get the channel element (only one occurs)
    Element channel = root.getChild("channel", defNS);
    if (channel == null) {
        logger.warn("Channel element could not be retrieved from feed.");
        throw new ParseException("No channel element found in feed.");
    }

    // ----------------------- read in channel information

    ParserUtils.matchCaseOfChildren(channel,
            new String[] { "title", "description", "link", "creator", "managingEditor", "publisher",
                    "errorReportsTo", "webMaster", "language", "rights", "copyright", "rating", "date",
                    "issued", "pubdate", "lastBuildDate", "modified", "generatorAgent", "updatePeriod",
                    "updateFrequency", "updateBase" });

    // title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    // set channel format
    chnl.setFormat(ChannelFormat.RSS_1_0);

    // description element
    chnl.setDescription(channel.getChildTextTrim("description", defNS));

    // link element
    chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS)));

    // creator element
    Element creator = channel.getChild("creator", dcNS);
    if (creator == null) {
        creator = channel.getChild("managingEditor", rss091NS);
    }
    if (creator != null) {
        chnl.setCreator(creator.getTextTrim());
    }

    // publisher element
    String publisher = channel.getChildTextTrim("publisher", dcNS);
    if (publisher == null) {
        Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS);
        if (elErrorReportsTo != null) {
            publisher = elErrorReportsTo.getAttributeValue("resource",
                    ParserUtils.getNamespace(elErrorReportsTo, "rdf"));
        }
    }
    if (publisher == null) {
        publisher = channel.getChildTextTrim("webMaster", rss091NS);
    }
    chnl.setPublisher(publisher);

    // language element
    Element language = channel.getChild("language", dcNS);
    if (language == null) {
        language = channel.getChild("language", rss091NS);
    }
    if (language != null) {
        chnl.setLanguage(language.getTextTrim());
    }

    // rights element
    Element copyright = channel.getChild("rights", dcNS);
    if (copyright == null) {
        copyright = channel.getChild("copyright", rss091NS);
    }
    if (copyright != null) {
        chnl.setCopyright(copyright.getTextTrim());
    }

    // 0..1 Rating element
    Element rating = channel.getChild("rating", rss091NS);
    if (rating != null) {
        chnl.setRating(rating.getTextTrim());
    }

    // 0..1 Docs element
    // use namespace URI
    chnl.setDocs(defNS.getURI());

    // 0..1 pubDate element
    Element pubDate = channel.getChild("date", dcNS);
    if (pubDate == null) {
        pubDate = channel.getChild("issued", dctermsNS);
    }
    if (pubDate == null) {
        pubDate = channel.getChild("pubdate", rss091NS);
    }
    if (pubDate != null) {
        chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
    }

    // 0..1 lastBuildDate element
    Element lastBuildDate = channel.getChild("lastBuildDate");
    if (lastBuildDate == null) {
        lastBuildDate = channel.getChild("modified", dctermsNS);
    }
    if (lastBuildDate == null) {
        lastBuildDate = channel.getChild("lastBuildDate", rss091NS);
    }
    if (lastBuildDate != null) {
        chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
    }

    // RSS 1.0 Administration Module support

    // 0..1 generator element
    Element elGenerator = channel.getChild("generatorAgent", adminNS);
    if (elGenerator != null) {
        Attribute generator = elGenerator.getAttribute("resource",
                ParserUtils.getNamespace(elGenerator, "rdf"));
        if (generator != null) {
            chnl.setGenerator(generator.getValue());
        }
    }

    // RSS 1.0 Syndication Module support

    // 0..1 update period element
    Element updatePeriod = channel.getChild("updatePeriod", syNS);
    if (updatePeriod != null) {
        try {
            ChannelUpdatePeriod channelUpdatePeriod = ChannelUpdatePeriod
                    .valueFromText(updatePeriod.getTextTrim());
            chnl.setUpdatePeriod(channelUpdatePeriod);
        } catch (IllegalArgumentException ex) {
            logger.warn(updatePeriod.getTextTrim(), ex);
        }
    }

    // 0..1 update frequency element
    Element updateFrequency = channel.getChild("updateFrequency", syNS);
    if (updateFrequency != null) {
        chnl.setUpdateFrequency((new Integer(updateFrequency.getTextTrim())).intValue());
    }

    // 0..1 update base element
    Element updateBase = channel.getChild("updateBase", syNS);
    if (updateBase != null) {
        chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim()));
    }

    if ((updatePeriod != null) && updateFrequency != null) {
        int ttl = getTTL(chnl.getUpdatePeriod(), chnl.getUpdateFrequency());
        chnl.setTtl(ttl);
    }

    // item elements
    List items = root.getChildren("item", defNS);
    Iterator i = items.iterator();
    while (i.hasNext()) {
        Element item = (Element) i.next();

        ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description",
                "creator", "subject", "date", "sourceURL", "source", "timestamp", "reference" });

        // get title element
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";
        if (elTitle != null) {
            strTitle = elTitle.getTextTrim();
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Item element found (" + strTitle + ").");
        }

        // get link element
        Element elLink = item.getChild("link", defNS);
        String strLink = "";
        if (elLink != null) {
            strLink = elLink.getTextTrim();
        }

        // get description element
        Element elDesc = item.getChild("encoded", contentNS);
        if (elDesc == null) {
            elDesc = item.getChild("description", defNS);
        }
        if (elDesc == null) {
            elDesc = item.getChild("description", dcNS);
        }
        String strDesc = "";
        if (elDesc != null) {
            strDesc = elDesc.getTextTrim();
        }

        // generate new RSS item (link to article)
        ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));
        rssItem.setFound(dateParsed);

        // get creator element
        Element elCreator = item.getChild("creator", dcNS);
        if (elCreator != null) {
            rssItem.setCreator(elCreator.getTextTrim());
        }

        // get subject element
        Element elSubject = item.getChild("subject", dcNS);
        if (elSubject != null) {
            // TODO: Mulitple subject elements not handled currently
            rssItem.setSubject(elSubject.getTextTrim());
        }

        // get date element
        Element elDate = item.getChild("date", dcNS);
        if (elDate != null) {
            rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim()));
        }

        // get source element - default to Aggregation module, then try Dublin Core
        String sourceName = null;
        String sourceLocation = null;
        Date sourceTimestamp = null;

        Element elSourceURL = item.getChild("sourceURL", agNS);
        if (elSourceURL == null) { //  No Aggregation module - try Dublin Core
            elSourceURL = item.getChild("source", dcNS);
            if (elSourceURL != null) {
                sourceLocation = elSourceURL.getTextTrim();
                sourceName = "Source";
            }
        } else { // Aggregation module
            sourceLocation = elSourceURL.getTextTrim();
            Element elSourceName = item.getChild("source", agNS);
            if (elSourceName != null) {
                sourceName = elSourceName.getTextTrim();
            }
            Element elSourceTimestamp = item.getChild("timestamp", agNS);
            if (elSourceTimestamp != null) {
                sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim());
            }
        }

        if (sourceLocation != null) {
            ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation,
                    sourceTimestamp);
            rssItem.setSource(itemSource);
        }

        // comments element - use Annotation module
        Element elReference = item.getChild("reference", annotateNS);
        if (elReference != null) {
            Attribute resource = elReference.getAttribute("resource",
                    ParserUtils.getNamespace(elReference, "rdf"));
            if (resource != null) {
                URL resourceURL = ParserUtils.getURL(resource.getValue());
                if (resourceURL != null) {
                    rssItem.setComments(resourceURL);
                }
            }
        }

    }

    // image element
    Element image = root.getChild("image", defNS);
    if (image != null) {

        ParserUtils.matchCaseOfChildren(image,
                new String[] { "title", "url", "link", "width", "height", "description" });

        ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS),
                ParserUtils.getURL(image.getChildTextTrim("url", defNS)),
                ParserUtils.getURL(image.getChildTextTrim("link", defNS)));
        Element imgWidth = image.getChild("width", defNS);
        if (imgWidth != null) {
            try {
                rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn(e);
            }
        }
        Element imgHeight = image.getChild("height", defNS);
        if (imgHeight != null) {
            try {
                rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn(e);
            }
        }
        Element imgDescr = image.getChild("description", defNS);
        if (imgDescr != null) {
            rssImage.setDescription(imgDescr.getTextTrim());
        }
        chnl.setImage(rssImage);
    }

    // textinput element
    Element txtinp = root.getChild("textinput", defNS);
    if (txtinp != null) {

        ParserUtils.matchCaseOfChildren(image, new String[] { "title", "description", "name", "link" });

        String tiTitle = null;
        if (txtinp.getChild("title", defNS) != null) {
            tiTitle = txtinp.getChild("title", defNS).getTextTrim();
        }
        String tiDescr = null;
        if (txtinp.getChild("description", defNS) != null) {
            tiDescr = txtinp.getChild("description", defNS).getTextTrim();
        }
        String tiName = null;
        if (txtinp.getChild("name", defNS) != null) {
            tiName = txtinp.getChild("name", defNS).getTextTrim();
        }
        URL tiLink = null;
        if (txtinp.getChild("link", defNS) != null) {
            tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim());
        }
        TextInputIF rssTextInput = cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink);
        chnl.setTextInput(rssTextInput);
    }

    chnl.setLastUpdated(dateParsed);

    return chnl;
}

From source file:de.nava.informa.parsers.RSS_2_0_Parser.java

License:Open Source License

/**
 * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element)
 *//*from  www.  j a va2s. c o  m*/
public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can be created.");
    }
    Date dateParsed = new Date();
    logger.debug("start parsing.");

    Namespace defNS = ParserUtils.getDefaultNS(root);
    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        logger.info("No default namespace found.");
    }
    Namespace dcNS = ParserUtils.getNamespace(root, "dc");
    // fall back to default name space
    if (dcNS == null) {
        dcNS = defNS;
    }

    // Content namespace
    Namespace contentNS = ParserUtils.getNamespace(root, "content");
    // fall back to default name space
    if (contentNS == null) {
        contentNS = defNS;
    }

    ParserUtils.matchCaseOfChildren(root, "channel");

    // Get the channel element (only one occurs)
    Element channel = root.getChild("channel", defNS);
    if (channel == null) {
        logger.warn("Channel element could not be retrieved from feed.");
        throw new ParseException("No channel element found in feed.");
    }

    // --- read in channel information

    ParserUtils.matchCaseOfChildren(channel,
            new String[] { "title", "description", "link", "language", "item", "image", "textinput",
                    "copyright", "rating", "docs", "generator", "pubDate", "lastBuildDate", "category",
                    "managingEditor", "webMaster", "cloud" });

    // 1 title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    // set channel format
    chnl.setFormat(ChannelFormat.RSS_2_0);

    // 1 description element
    chnl.setDescription(channel.getChildTextTrim("description", defNS));

    // 1 link element
    chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS)));

    // 1 language element
    chnl.setLanguage(channel.getChildTextTrim("language", defNS));

    // 1..n item elements
    List items = channel.getChildren("item", defNS);
    for (Object item1 : items) {
        Element item = (Element) item1;

        ParserUtils.matchCaseOfChildren(item,
                new String[] { "title", "link", "encoded", "description", "subject", "category", "pubDate",
                        "date", "author", "creator", "comments", "guid", "source", "enclosure" });

        // get title element
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";
        if (elTitle != null) {
            strTitle = elTitle.getTextTrim();
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Item element found (" + strTitle + ").");
        }

        // get link element
        Element elLink = item.getChild("link", defNS);
        String strLink = "";
        if (elLink != null) {
            strLink = elLink.getTextTrim();
        }

        // get description element
        Element elDesc = item.getChild("encoded", contentNS);
        if (elDesc == null) {
            elDesc = item.getChild("description", defNS);
        }
        String strDesc = "";
        if (elDesc != null) {
            strDesc = elDesc.getTextTrim();
        }

        // generate new RSS item (link to article)
        ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));

        // get subject element
        Element elSubject = item.getChild("subject", defNS);
        if (elSubject == null) {
            // fallback mechanism: get dc:subject element
            elSubject = item.getChild("subject", dcNS);
        }
        if (elSubject != null) {
            rssItem.setSubject(elSubject.getTextTrim());
        }

        // get category list
        // get list of <category> elements
        List listCategory = item.getChildren("category", defNS);
        if (listCategory.size() < 1) {
            // fallback mechanism: get dc:category element
            listCategory = item.getChildren("category", dcNS);
        }
        if (listCategory.size() > 0) {
            RecursiveHashtable<String> catTable = new RecursiveHashtable<String>();

            // for each category, parse hierarchy
            for (Object aListCategory : listCategory) {
                RecursiveHashtable<String> currTable = catTable;
                Element elCategory = (Element) aListCategory;
                // get contents of category element
                String[] titles = elCategory.getTextNormalize().split("/");
                for (String title : titles) {
                    // tokenize category string to extract out hierarchy
                    if (!currTable.containsKey(title)) {
                        // if token does not exist in current map, add it with child Hashtable
                        currTable.put(title, new RecursiveHashtable<String>());
                    }
                    // reset current Hashtable to child's Hashtable then iterate to next token
                    currTable = currTable.get(title);
                }
            }
            ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>();
            // transform cat list & hierarchy into list of CategoryIF elements
            Enumeration<String> enumCategories = catTable.keys();
            while (enumCategories.hasMoreElements()) {
                String key = enumCategories.nextElement();
                // build category list: getCategoryList(parent, title, children)
                CategoryIF cat = getCategoryList(null, key, catTable.get(key));
                catList.add(cat);
            }
            if (catList.size() > 0) {
                // if categories were actually created, then add list to item node
                rssItem.setCategories(catList);
            }
        }

        // get publication date
        Element elDate = item.getChild("pubDate", defNS);
        if (elDate == null) {
            // fallback mechanism: get dc:date element
            elDate = item.getChild("date", dcNS);
        }
        if (elDate != null) {
            rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim()));
        }

        rssItem.setFound(dateParsed);

        // get Author element
        Element elAuthor = item.getChild("author", defNS);
        if (elAuthor == null) {
            // fallback mechanism: get dc:creator element
            elAuthor = item.getChild("creator", dcNS);
        }
        if (elAuthor != null)
            rssItem.setCreator(elAuthor.getTextTrim());

        // get Comments element
        Element elComments = item.getChild("comments", defNS);
        String strComments = "";
        if (elComments != null) {
            strComments = elComments.getTextTrim();
        }
        rssItem.setComments(ParserUtils.getURL(strComments));

        // get guid element
        Element elGuid = item.getChild("guid", defNS);
        if (elGuid != null) {
            String guidUrl = elGuid.getTextTrim();
            if (guidUrl != null) {
                boolean permaLink = true;
                Attribute permaLinkAttribute = elGuid.getAttribute("isPermaLink", defNS);
                if (permaLinkAttribute != null) {
                    String permaLinkStr = permaLinkAttribute.getValue();
                    if (permaLinkStr != null) {
                        permaLink = Boolean.valueOf(permaLinkStr);
                    }
                }
                ItemGuidIF itemGuid = cBuilder.createItemGuid(rssItem, guidUrl, permaLink);
                rssItem.setGuid(itemGuid);
            }
        }

        // get source element
        Element elSource = item.getChild("source", defNS);
        if (elSource != null) {
            String sourceName = elSource.getTextTrim();
            Attribute sourceAttribute = elSource.getAttribute("url", defNS);
            if (sourceAttribute != null) {
                String sourceLocation = sourceAttribute.getValue().trim();
                ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, null);
                rssItem.setSource(itemSource);
            }
        }

        // get enclosure element
        Element elEnclosure = item.getChild("enclosure", defNS);
        if (elEnclosure != null) {
            URL location = null;
            String type = null;
            int length = -1;
            Attribute urlAttribute = elEnclosure.getAttribute("url", defNS);
            if (urlAttribute != null) {
                location = ParserUtils.getURL(urlAttribute.getValue().trim());
            }
            Attribute typeAttribute = elEnclosure.getAttribute("type", defNS);
            if (typeAttribute != null) {
                type = typeAttribute.getValue().trim();
            }
            Attribute lengthAttribute = elEnclosure.getAttribute("length", defNS);
            if (lengthAttribute != null) {
                try {
                    length = Integer.parseInt(lengthAttribute.getValue().trim());
                } catch (NumberFormatException e) {
                    logger.warn(e);
                }
            }
            ItemEnclosureIF itemEnclosure = cBuilder.createItemEnclosure(rssItem, location, type, length);
            rssItem.setEnclosure(itemEnclosure);
        }
    }

    // 0..1 image element
    Element image = channel.getChild("image", defNS);
    if (image != null) {

        ParserUtils.matchCaseOfChildren(image,
                new String[] { "title", "url", "link", "width", "height", "description" });

        ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS),
                ParserUtils.getURL(image.getChildTextTrim("url", defNS)),
                ParserUtils.getURL(image.getChildTextTrim("link", defNS)));
        Element imgWidth = image.getChild("width", defNS);
        if (imgWidth != null) {
            try {
                rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn("Error parsing width: " + e.getMessage());
            }
        }
        Element imgHeight = image.getChild("height", defNS);
        if (imgHeight != null) {
            try {
                rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn("Error parsing height: " + e.getMessage());
            }
        }
        Element imgDescr = image.getChild("description", defNS);
        if (imgDescr != null) {
            rssImage.setDescription(imgDescr.getTextTrim());
        }
        chnl.setImage(rssImage);
    }

    // 0..1 textinput element
    Element txtinp = channel.getChild("textinput", defNS);
    if (txtinp != null) {

        ParserUtils.matchCaseOfChildren(txtinp, new String[] { "title", "description", "name", "link" });

        TextInputIF rssTextInput = cBuilder.createTextInput(txtinp.getChildTextTrim("title", defNS),
                txtinp.getChildTextTrim("description", defNS), txtinp.getChildTextTrim("name", defNS),
                ParserUtils.getURL(txtinp.getChildTextTrim("link", defNS)));
        chnl.setTextInput(rssTextInput);
    }

    // 0..1 copyright element
    Element copyright = channel.getChild("copyright", defNS);
    if (copyright != null) {
        chnl.setCopyright(copyright.getTextTrim());
    }

    // 0..1 Rating element
    Element rating = channel.getChild("rating", defNS);
    if (rating != null) {
        chnl.setRating(rating.getTextTrim());
    }

    // 0..1 Docs element
    Element docs = channel.getChild("docs", defNS);
    if (docs != null) {
        chnl.setDocs(docs.getTextTrim());
    }

    // 0..1 Generator element
    Element generator = channel.getChild("generator", defNS);
    if (generator != null) {
        chnl.setGenerator(generator.getTextTrim());
    }

    // 0..1 ttl element
    Element ttl = channel.getChild("ttl", defNS);
    if (ttl != null) {
        String ttlValue = ttl.getTextTrim();
        try {
            chnl.setTtl(Integer.parseInt(ttlValue));
        } catch (NumberFormatException e) {
            logger.warn("Invalid TTL format: '" + ttlValue + "'");
        }
    }

    // 0..1 pubDate element
    Element pubDate = channel.getChild("pubDate", defNS);
    if (pubDate != null) {
        chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
    }

    // 0..1 lastBuildDate element
    Element lastBuildDate = channel.getChild("lastBuildDate", defNS);
    if (lastBuildDate != null) {
        chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
    }

    // get category list
    // get list of <category> elements
    List listCategory = channel.getChildren("category", defNS);
    if (listCategory.size() < 1) {
        // fallback mechanism: get dc:category element
        listCategory = channel.getChildren("category", dcNS);
    }
    if (listCategory.size() > 0) {
        RecursiveHashtable<String> catTable = new RecursiveHashtable<String>();
        // for each category, parse hierarchy
        for (Object aListCategory : listCategory) {
            RecursiveHashtable<String> currTable = catTable;
            Element elCategory = (Element) aListCategory;
            // get contents of category element
            String[] titles = elCategory.getTextNormalize().split("/");
            for (String title : titles) {
                // tokenize category string to extract out hierarchy
                if (!currTable.containsKey(title)) {
                    // if token does not exist in current map, add it with child Hashtable
                    currTable.put(title, new RecursiveHashtable<String>());
                }
                // reset current Hashtable to child's Hashtable then iterate to next token
                currTable = currTable.get(title);
            }
        }
        ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>();
        // transform cat list & hierarchy into list of CategoryIF elements
        Enumeration<String> enumCategories = catTable.keys();
        while (enumCategories.hasMoreElements()) {
            String key = enumCategories.nextElement();
            // build category list: getCategoryList(parent, title, children)
            CategoryIF cat = getCategoryList(null, key, catTable.get(key));
            catList.add(cat);
        }
        if (catList.size() > 0) {
            // if categories were actually created, then add list to item node
            chnl.setCategories(catList);
        }
    }

    // 0..1 managingEditor element
    Element managingEditor = channel.getChild("managingEditor", defNS);
    if (managingEditor != null) {
        chnl.setCreator(managingEditor.getTextTrim());
    }

    // 0..1 webMaster element
    Element webMaster = channel.getChild("webMaster", defNS);
    if (webMaster != null) {
        chnl.setPublisher(webMaster.getTextTrim());
    }

    // 0..1 cloud element
    Element cloud = channel.getChild("cloud", defNS);
    if (cloud != null) {
        String _port = cloud.getAttributeValue("port", defNS);
        int port = -1;
        if (_port != null) {
            try {
                port = Integer.parseInt(_port);
            } catch (NumberFormatException e) {
                logger.warn(e);
            }
        }
        chnl.setCloud(cBuilder.createCloud(cloud.getAttributeValue("domain", defNS), port,
                cloud.getAttributeValue("path", defNS), cloud.getAttributeValue("registerProcedure", defNS),
                cloud.getAttributeValue("protocol", defNS)));
    }

    chnl.setLastUpdated(dateParsed);

    // 0..1 skipHours element
    // 0..1 skipDays element

    return chnl;
}

From source file:io.smartspaces.workbench.project.jdom.JdomReader.java

License:Apache License

/**
 * Return the trimmed text of a child element.
 *
 * @param element//  ww  w  . ja  v  a  2s. c  o m
 *          container element
 * @param namespace
 *          namespace for the key element
 * @param key
 *          variable key
 *
 * @return trimmed element text
 *
 * @throws SmartSpacesException
 *           if the child element is not provided
 */
protected String getChildTextTrimmed(Element element, Namespace namespace, String key)
        throws SmartSpacesException {
    try {
        return element.getChildTextTrim(key, namespace);
    } catch (Exception e) {
        throw new SimpleSmartSpacesException("Looking for text of child: " + key, e);
    }
}

From source file:net.instantcom.mm7.DeliverReq.java

License:Open Source License

@Override
public void load(Element element) {
    super.load(element);

    Element body = element.getChild("Body", MM7Message.ENVELOPE);
    Element req = body.getChild("DeliverReq", namespace);

    setMm7Version(req.getChildTextTrim("MM7Version", namespace));

    Element sender = req.getChild("Sender", namespace);
    if (sender != null) {
        Address a = new Address();
        a.load((Element) sender.getChildren().get(0));
        setSender(a);//  w  w  w. j a  va2 s.  c o m
    } else {
        setSender(null);
    }

    setRecipients(extractRecipients(req.getChild("Recipients", namespace)));
    setLinkedId(req.getChildTextTrim("LinkedID", namespace));
    setSenderSPI(req.getChildTextTrim("SenderSPI", namespace));
    setRecipientSPI(req.getChildTextTrim("RecipientSPI", namespace));
    setReplyChargingId(req.getChildTextTrim("ReplyChargingID", namespace));
    setSubject(req.getChildTextTrim("Subject", namespace));
    setApplicId(req.getChildTextTrim("ApplicID", namespace));
    setReplyApplicId(req.getChildTextTrim("ReplyApplicID", namespace));
    setAuxApplicInfo(req.getChildTextTrim("AuxApplicInfo", namespace));
    setPriority(Priority.valueOf(req.getChildTextTrim("Priority", namespace).toUpperCase()));
    setTimeStamp(new RelativeDate(req.getChildTextTrim("TimeStamp", namespace)).toDate());

}

From source file:net.instantcom.mm7.DeliverRsp.java

License:Open Source License

@Override
public void load(Element element) {
    super.load(element);

    Element body = element.getChild("Body", MM7Message.ENVELOPE);
    Element rsp = body.getChild("DeliverRsp", namespace);
    setServiceCode(rsp.getChildTextTrim("ServiceCode", namespace));
}

From source file:net.instantcom.mm7.DeliveryReportReq.java

License:Open Source License

@Override
public void load(Element element) {
    super.load(element);

    Element body = element.getChild("Body", MM7Message.ENVELOPE);
    Element req = body.getChild("DeliveryReportReq", namespace);

    setMm7Version(req.getChildTextTrim("MM7Version", namespace));
    setMessageID(req.getChildTextTrim("MessageID", namespace));

    setRecipient(extractRecipient(req.getChild("Recipient", namespace)));

    Element sender = req.getChild("Sender", namespace);
    if (sender != null) {
        Address a = new Address();
        a.load((Element) sender.getChildren().get(0));
        setSender(a);//from   w  ww.j a  va  2  s. c  o m
    } else {
        setSender(null);
    }

    setDate(new RelativeDate(req.getChildTextTrim("Date", namespace)).toDate());
    setMmStatus(req.getChildTextTrim("MMStatus", namespace));
    setStatusText(req.getChildTextTrim("StatusText", namespace));
}

From source file:net.instantcom.mm7.MM7Message.java

License:Open Source License

@Override
public void load(Element element) {
    Element body = element.getChild("Body", element.getNamespace());

    // Extract MM7 namespace from SOAP body
    Iterator<?> i = body.getDescendants(new ElementFilter());
    while (i.hasNext()) {
        Element e = (Element) i.next();
        Namespace ns = e.getNamespace();
        if (ns != null && ns.getURI().contains("MM7")) {
            this.namespace = ns;
            break;
        }//from   w w  w  . j av a  2s  .com
    }

    if (this.namespace == null) {
        throw new IllegalStateException("can't autodetect MM7 namespace: " + body.toString());
    }

    Element header = element.getChild("Header", element.getNamespace());
    setTransactionId(header.getChildTextTrim("TransactionID", namespace));
}