Example usage for org.jdom2 Element getChildren

List of usage examples for org.jdom2 Element getChildren

Introduction

In this page you can find the example usage for org.jdom2 Element getChildren.

Prototype

public List<Element> getChildren(final String cname, final Namespace ns) 

Source Link

Document

This returns a List of all the child elements nested directly (one level deep) within this element with the given local name and belonging to the given Namespace, returned as Element objects.

Usage

From source file:de.hbrs.oryx.yawl.converter.layout.YAWLLayoutConverter.java

License:Open Source License

/**
 * Converts the all decorators of a YAWL task. There may be two decorators, each with alignment TOP, LEFT, RIGHT, BOTTOM.
 * //from  w  w w . j a  va  2  s .com
 * @param yawlContainer
 *            the container element of the YAWL task
 * @param layoutInformation
 *            already converted layout of the YAWL task
 */
private void convertDecorator(final Element yawlContainer, final NetElementLayout layoutInformation) {
    @SuppressWarnings("rawtypes")
    List yawlDecoratorList = yawlContainer.getChildren("decorator", yawlNamespace);
    if (yawlDecoratorList != null) {
        for (Object o : yawlDecoratorList) {
            Element yawlDecorator = (Element) o;
            NetElementLayout.DecoratorType decoratorType = convertDecoratorType(yawlDecorator);
            if (yawlDecorator.getAttributeValue("type").contains("join")) {
                layoutInformation.setJoinDecorator(decoratorType);
            }
            if (yawlDecorator.getAttributeValue("type").contains("split")) {
                layoutInformation.setSplitDecorator(decoratorType);
            }
        }
    }
}

From source file:de.huberlin.german.korpling.laudatioteitool.SplitTEI.java

License:Apache License

private TEIValidator.Errors extractDocumentHeaders(Document doc)
        throws LaudatioException, IOException, SAXException {
    TEIValidator validator = documentSchemeURL == null ? new TEIDocumentValidator()
            : new FromURLValidator(documentSchemeURL);

    File documentDir = new File(outputDirectory, "DocumentHeader");
    if (!documentDir.exists() && !documentDir.mkdir()) {
        throw new LaudatioException(
                messages.getString("COULD NOT CREATE DIRECTORY") + documentDir.getAbsolutePath());
    }/*from   w  w  w . j  a  va 2  s .  c o m*/

    Element documentRoot = Preconditions.checkNotNull(doc.getRootElement().getChild("teiCorpus", null));

    for (Element docHeader : documentRoot.getChildren("teiHeader", null)) {
        Preconditions.checkState("DocumentHeader".equals(docHeader.getAttributeValue("type")));

        // create the subtree for the global corpus header
        Namespace teiNS = Namespace.getNamespace("http://www.tei-c.org/ns/1.0");
        Element tei = new Element("TEI", teiNS);
        tei.addContent(docHeader.clone());
        Document newDoc = new Document(tei);

        if (documentSchemeURL == null) {
            newDoc.addContent(0, new ProcessingInstruction("xml-model",
                    "href=\"" + TEIDocumentValidator.DEFAULT_SCHEME_URL + "\""));
        } else {
            newDoc.addContent(0, new ProcessingInstruction("xml-model", "href=\"" + documentSchemeURL + "\""));
        }

        // we need to append an empty "text" element after the header
        Element text = new Element("text", teiNS);
        text.setText("");
        tei.addContent(text);

        Element fileDesc = Preconditions
                .checkNotNull(tei.getChild("teiHeader", null).getChild("fileDesc", null));

        String outName = UUID.randomUUID().toString();

        String id = fileDesc.getAttributeValue("id", Namespace.XML_NAMESPACE);
        if (id != null) {
            outName = id;
        } else {
            Element titleStmt = Preconditions.checkNotNull(fileDesc.getChild("titleStmt", null));

            String title = titleStmt.getChildText("title", null);
            if (title != null) {
                outName = title;
            }
        }

        File outputFile = new File(documentDir, outName + ".xml");
        XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat());
        xmlOut.output(newDoc, new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8"));
        log.info(messages.getString("WRITTEN DOCUMENT HEADER"), outputFile.getPath());

        validator.validate(outputFile);

    }
    return validator.getErrors();
}

From source file:de.huberlin.german.korpling.laudatioteitool.SplitTEI.java

License:Apache License

private TEIValidator.Errors extractPreparationSteps(Document doc)
        throws LaudatioException, IOException, SAXException {
    TEIValidator validator = preparationSchemeURL == null ? new TEIPreparationValidator()
            : new FromURLValidator(preparationSchemeURL);
    Multiset<String> knownPreparationTitles = HashMultiset.create();

    File documentDir = new File(outputDirectory, "PreparationHeader");
    if (!documentDir.exists() && !documentDir.mkdir()) {
        throw new LaudatioException(
                messages.getString("COULD NOT CREATE DIRECTORY") + documentDir.getAbsolutePath());
    }/*from w w  w .  j a v a2 s .  c o  m*/

    Preconditions.checkNotNull(doc.getRootElement().getChild("teiCorpus", null));
    Element preparationRoot = Preconditions
            .checkNotNull(doc.getRootElement().getChild("teiCorpus", null).getChild("teiCorpus", null));

    for (Element preparationHeader : preparationRoot.getChildren("teiHeader", null)) {
        Preconditions.checkState("PreparationHeader".equals(preparationHeader.getAttributeValue("type")));

        // create the subtree for the global corpus header
        Namespace teiNS = Namespace.getNamespace("http://www.tei-c.org/ns/1.0");
        Element tei = new Element("TEI", teiNS);
        tei.addContent(preparationHeader.clone());
        Document newDoc = new Document(tei);

        if (preparationSchemeURL == null) {
            newDoc.addContent(0, new ProcessingInstruction("xml-model",
                    "href=\"" + TEIPreparationValidator.DEFAULT_SCHEME_URL + "\""));
        } else {
            newDoc.addContent(0,
                    new ProcessingInstruction("xml-model", "href=\"" + preparationSchemeURL + "\""));
        }

        // we need to append an empty "text" element after the header
        Element text = new Element("text", teiNS);
        text.setText("");
        tei.addContent(text);

        Element fileDesc = Preconditions
                .checkNotNull(tei.getChild("teiHeader", null).getChild("fileDesc", null));

        String outName = UUID.randomUUID().toString();

        Element titleStmt = Preconditions.checkNotNull(fileDesc.getChild("titleStmt", null));
        Element title = Preconditions.checkNotNull(titleStmt.getChild("title", null));
        String corresp = title.getAttributeValue("corresp");
        if (corresp != null) {
            if (knownPreparationTitles.contains(corresp)) {
                knownPreparationTitles.add(corresp);
                outName = corresp + "_" + knownPreparationTitles.count(corresp);
                log.warn(messages.getString("MORE THAN ONE PREPARATION HEADER"), corresp);
            } else {
                outName = corresp;
                knownPreparationTitles.add(corresp);
            }
        }

        File outputFile = new File(documentDir, outName + ".xml");
        XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat());
        xmlOut.output(newDoc, new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8"));
        log.info(messages.getString("WRITTEN PREPARATION HEADER"), outputFile.getPath());

        validator.validate(outputFile);

    }
    return validator.getErrors();
}

From source file:de.intranda.goobi.plugins.sru.SRUHelper.java

License:Open Source License

public static Element getRecordWithoutSruHeader(Document document) {
    Element root = document.getRootElement();
    // <srw:records>
    Element srw_records = root.getChild("records", SRW);
    // <srw:record>
    if (srw_records == null) {
        return null;
    }/*from w w w .  jav  a2 s .  co m*/
    List<Element> srw_recordList = srw_records.getChildren("record", SRW);

    // <srw:recordData>
    if (srw_recordList == null || srw_recordList.isEmpty()) {
        return null;
    }
    Element recordData = srw_recordList.get(0).getChild("recordData", SRW);

    Element record = recordData.getChild("record", MARC);
    return record;
}

From source file:de.nava.informa.parsers.Atom_0_3_Parser.java

License:Open Source License

/**
 * Looks for "content" elements and takes first from them or looks for "summary" element if
 * "content" not found./*from w w w .  j a va 2s.  com*/
 *
 * @param item      item element.
 * @param namespace namespace.
 * @return description for item.
 */
public static String getDescription(Element item, Namespace namespace) {
    String strDesc = "";
    Element elDesc;

    List contents = item.getChildren("content", namespace);

    if (contents.size() > 0) {
        elDesc = (Element) contents.get(0);
    } else {
        elDesc = item.getChild("summary", namespace);
    }

    if (elDesc != null) {
        strDesc = getValue(elDesc);
    }

    return strDesc;
}

From source file:de.nava.informa.parsers.Atom_0_3_Parser.java

License:Open Source License

/**
 * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element)
 *//*from w ww  .  ja  v a 2  s  . com*/
public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can " + "be created.");
    }

    Date dateParsed = new Date();
    Namespace defNS = ParserUtils.getDefaultNS(channel);

    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        LOGGER.info("No default namespace found.");
    }

    // RSS 1.0 Dublin Core Module namespace
    Namespace dcNS = ParserUtils.getNamespace(channel, "dc");

    if (dcNS == null) {
        LOGGER.debug("No namespace for dublin core found");
        dcNS = defNS;
    }

    LOGGER.debug("start parsing.");

    // get version attribute
    String formatVersion = "0.3";

    if (channel.getAttribute("version") != null) {
        formatVersion = channel.getAttribute("version").getValue().trim();
        LOGGER.debug("Atom version " + formatVersion + " specified in document.");
    } else {
        LOGGER.info("No format version specified, using default.");
    }

    // --- read in channel information

    // Lower the case of these tags to simulate case-insensitive parsing
    ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "description", "tagline", "ttl",
            "modified", "author", "generator", "copyright", "link", "entry" });

    // title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    // TODO: support attributes: type, mode
    chnl.setFormat(ChannelFormat.ATOM_0_3);

    // language
    String language = channel.getAttributeValue("lang", Namespace.XML_NAMESPACE);

    if (language != null) {
        chnl.setLanguage(language);
    }

    // description element
    if (channel.getChild("description") != null) {
        chnl.setDescription(channel.getChildTextTrim("description", defNS));
    } else {
        // fallback
        chnl.setDescription(channel.getChildTextTrim("tagline", defNS));
    }

    // ttl in dc namespace
    Element ttl = channel.getChild("ttl", dcNS);

    if (ttl != null) {
        String ttlString = ttl.getTextTrim();

        if (ttlString != null) {
            chnl.setTtl(Integer.parseInt(ttlString));
        }
    }

    //  lastbuild element : modified ?
    Element modified = channel.getChild("modified", defNS);

    if (modified != null) {
        chnl.setPubDate(ParserUtils.getDate(modified.getTextTrim()));
    }

    // TODO : issued value
    /*
    if (modified != null) {
      modified = channel.getChild("issued", defNS);
      chnl.setLastBuildDate (ParserUtils.getDate(modified.getTextTrim()));
    }
    */

    // author element
    Element author = channel.getChild("author", defNS);

    if (author != null) {
        ParserUtils.matchCaseOfChildren(author, "name");
        chnl.setCreator(author.getChildTextTrim("name", defNS));
    }

    // generator element
    Element generator = channel.getChild("generator", defNS);

    if (generator != null) {
        chnl.setGenerator(generator.getTextTrim());
    }

    // copyright element
    Element copyright = channel.getChild("copyright", defNS);

    if (copyright != null) {
        chnl.setCopyright(getCopyright(copyright));
    }

    // n link elements
    // TODO : type attribut of link (text, application...)
    List links = channel.getChildren("link", defNS);
    Iterator i = links.iterator();

    while (i.hasNext()) {
        Element linkElement = (Element) i.next();

        // use first 'alternate' link
        String rel = linkElement.getAttributeValue("rel");
        String href = linkElement.getAttributeValue("href");

        if ((rel != null) && (href != null) && rel.equals("alternate")) {
            URL linkURL = ParserUtils.getURL(href);

            chnl.setSite(linkURL);

            break;
        }

        // TODO: further extraction of link information
    }

    // 1..n entry elements
    List items = channel.getChildren("entry", defNS);

    i = items.iterator();

    while (i.hasNext()) {
        Element item = (Element) i.next();

        // Lower the case of these tags to simulate case-insensitive parsing
        ParserUtils.matchCaseOfChildren(item,
                new String[] { "title", "link", "content", "summary", "issued", "subject" });

        // get title element
        // TODO : deal with type attribut
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";

        if (elTitle != null) {
            strTitle = getTitle(elTitle);
            LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle);
        }

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Entry element found (" + strTitle + ").");
        }

        // get link element
        String strLink = AtomParserUtils.getItemLink(item, defNS);

        // get description element
        String strDesc = getDescription(item, defNS);

        // generate new news item (link to article)
        ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));

        curItem.setFound(dateParsed);

        // get issued element (required)
        Element elIssued = item.getChild("issued", defNS);

        if (elIssued == null) {
            // [adewale@gmail.com, 01-May-2005] Fix for blogs which have
            // 'created' dates, but not 'issued' dates -- in clear contravention
            // of the Atom 0.3 spec.
            Element elCreated = item.getChild("created", defNS);

            if (elCreated != null) {
                curItem.setDate(ParserUtils.getDate(elCreated.getTextTrim()));
            }
        } else {
            curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim()));
        }

        // get subject element
        Element elSubject = item.getChild("subject", dcNS);

        if (elSubject != null) {
            // TODO: Mulitple subject elements not handled currently
            curItem.setSubject(elSubject.getTextTrim());
        }
    }

    // set to current date
    chnl.setLastUpdated(dateParsed);

    return chnl;
}

From source file:de.nava.informa.parsers.Atom_1_0_Parser.java

License:Open Source License

/**
 * Looks for "content" elements and takes first from them or looks for
 * "summary" element if "content" not found.
 *
 * @param item      item element.//from   ww w.jav  a 2s  .  co m
 * @param namespace namespace.
 * @return description for item.
 */
public static String getDescription(Element item, Namespace namespace) {
    String strDesc = "";
    Element elDesc;

    // TODO there should be some way of knowing if we are returning summary or
    // content
    List contents = item.getChildren("content", namespace);

    if (contents.size() > 0) {
        elDesc = (Element) contents.get(0);
    } else {
        elDesc = item.getChild("summary", namespace);
    }

    if (elDesc != null) {
        strDesc = AtomParserUtils.getValue(elDesc, getMode(elDesc));
    }

    return strDesc;
}

From source file:de.nava.informa.parsers.Atom_1_0_Parser.java

License:Open Source License

/**
 * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element)
 *//*from   w  w  w  .j a  v a 2s . com*/
public ChannelIF parse(ChannelBuilderIF cBuilder, Element channel) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can " + "be created.");
    }

    Date dateParsed = new Date();
    Namespace defNS = ParserUtils.getDefaultNS(channel);

    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        LOGGER.info("No default namespace found.");
    } else if ((defNS.getURI() == null) || !defNS.getURI().equals("http://www.w3.org/2005/Atom")) {
        LOGGER.warn("Namespace is not really supported, still trying assuming Atom 1.0 format");
    }

    LOGGER.debug("start parsing.");

    // --- read in channel information

    // Lower the case of these tags to simulate case-insensitive parsing
    ParserUtils.matchCaseOfChildren(channel, new String[] { "title", "subtitle", "updated", "published",
            "author", "generator", "rights", "link", "entry" });

    // TODO icon and logo: Feed element can have upto 1 logo and icon.
    // TODO id: Feed and all entries have a unique id string. This can
    // be the URL of the website. Supporting this will require API change.
    // TODO: Feed can optionally have category information

    // title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    chnl.setFormat(ChannelFormat.ATOM_1_0);

    // description element
    if (channel.getChild("subtitle") != null) {
        chnl.setDescription(channel.getChildTextTrim("subtitle", defNS));
    }

    // TODO: should we use summary element?

    // lastbuild element : updated ?
    Element updated = channel.getChild("updated", defNS);

    if (updated != null) {
        chnl.setPubDate(ParserUtils.getDate(updated.getTextTrim()));
    }

    // author element
    List authors = channel.getChildren("author", defNS);

    chnl.setCreator(getAuthorString(authors, defNS));

    // TODO we are ignoring contributors information

    // generator element
    Element generator = channel.getChild("generator", defNS);

    if (generator != null) {
        chnl.setGenerator(generator.getTextTrim());
    }

    // TODO generator can have URI and version information

    // copyright element
    Element rights = channel.getChild("rights", defNS);

    if (rights != null) {
        chnl.setCopyright(AtomParserUtils.getValue(rights, getMode(rights)));
    }

    List links = channel.getChildren("link", defNS);
    Iterator i = links.iterator();

    URL linkUrl = null;

    while (i.hasNext()) {
        Element linkElement = (Element) i.next();

        // use first 'alternate' link
        // if rel is not present, use first link without rel
        String rel = linkElement.getAttributeValue("rel");
        String href = linkElement.getAttributeValue("href");

        // TODO we need to handle relative links also
        if ((rel == null) && (href != null) && (linkUrl == null)) {
            linkUrl = ParserUtils.getURL(href);
        } else if ((rel != null) && (href != null) && rel.equals("alternate")) {
            linkUrl = ParserUtils.getURL(href);

            break;
        }
    }

    if (linkUrl != null) {
        chnl.setSite(linkUrl);
    }

    List items = channel.getChildren("entry", defNS);

    i = items.iterator();

    while (i.hasNext()) {
        Element item = (Element) i.next();

        // Lower the case of these tags to simulate case-insensitive parsing
        ParserUtils.matchCaseOfChildren(item,
                new String[] { "title", "link", "content", "summary", "published", "author" });

        // TODO entry, if copied from some other feed, may have source element
        // TODO each entry can have its own rights declaration

        // get title element
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";

        if (elTitle != null) {
            strTitle = AtomParserUtils.getValue(elTitle, getMode(elTitle));
            LOGGER.debug("Parsing title " + elTitle.getTextTrim() + "->" + strTitle);
        }

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Entry element found (" + strTitle + ").");
        }

        // get link element
        String strLink = AtomParserUtils.getItemLink(item, defNS);

        // get description element
        String strDesc = getDescription(item, defNS);

        // generate new news item (link to article)
        ItemIF curItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));

        //TODO enclosure data
        curItem.setFound(dateParsed);

        List itemAuthors = item.getChildren("author", defNS);

        curItem.setCreator(getAuthorString(itemAuthors, defNS));

        // get published element
        Element elIssued = item.getChild("published", defNS);

        if (elIssued == null) {
            // published element may not be present (but updated should be)
            Element elUpdated = item.getChild("updated", defNS);

            // TODO there should be some way to determining which one are we
            // returning
            if (elUpdated != null) {
                curItem.setDate(ParserUtils.getDate(elUpdated.getTextTrim()));
            }
        } else {
            curItem.setDate(ParserUtils.getDate(elIssued.getTextTrim()));
        }

        // get list of category elements
        List elCategoryList = item.getChildren("category", defNS);

        // categories present will be stored here
        Collection<CategoryIF> categories = new ArrayList<>();

        // multiple category elements may be present
        for (Object elCategoryItem : elCategoryList) {

            Element elCategory = (Element) elCategoryItem;

            // notice: atom spec. forbids to have category "term" (="subject")
            // set as inner text of category tags, so we have to read it from
            // the "term" attribute

            if (elCategory != null) {
                // TODO: what if we have more than one category element present?
                // subject would be overwritten each loop and therefore represent only
                // the last category read, so does this make any sense?

                // TODO: what about adding functionality for accessing "label" or "scheme" attributes?
                // if set, a label should be displayed instead of the value set in term

                // we keep this line not to break up things which
                // use getSubject() to read an item category
                curItem.setSubject(elCategory.getAttributeValue("term"));

                CategoryIF c = new Category(elCategory.getAttributeValue("term"));

                // add current category to category list
                categories.add(c);
            }
        }

        // assign categories
        curItem.setCategories(categories);
    }

    // set to current date
    chnl.setLastUpdated(dateParsed);

    return chnl;
}

From source file:de.nava.informa.parsers.RSS_1_0_Parser.java

License:Open Source License

public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can " + "be created.");
    }//from  w w w . ja  va 2s  . com
    Date dateParsed = new Date();
    Namespace defNS = ParserUtils.getDefaultNS(root);
    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        logger.info("No default namespace found.");
    }

    // RSS 1.0 Dublin Core Module namespace
    Namespace dcNS = ParserUtils.getNamespace(root, "dc");
    // fall back to default name space (for retrieving descriptions)
    if (dcNS == null) {
        dcNS = defNS;
    }

    // RSS 1.0 Syndication Module namespace
    Namespace syNS = ParserUtils.getNamespace(root, "sy");

    // RSS 1.0 Aggregation Module namespace
    Namespace agNS = ParserUtils.getNamespace(root, "ag");

    // RSS 1.0 Administration Module namespace
    Namespace adminNS = ParserUtils.getNamespace(root, "admin");

    // RSS 1.0 DCTerms Module namespace
    Namespace dctermsNS = ParserUtils.getNamespace(root, "dcterms");

    // RSS 1.0 Annotation Module namespace
    Namespace annotateNS = ParserUtils.getNamespace(root, "annotate");

    // RSS091 Module namespace
    Namespace rss091NS = ParserUtils.getNamespace(root, "rss091");

    // Content namespace
    Namespace contentNS = ParserUtils.getNamespace(root, "content");

    ParserUtils.matchCaseOfChildren(root, new String[] { "channel", "item", "image", "textinput" });

    // Get the channel element (only one occurs)
    Element channel = root.getChild("channel", defNS);
    if (channel == null) {
        logger.warn("Channel element could not be retrieved from feed.");
        throw new ParseException("No channel element found in feed.");
    }

    // ----------------------- read in channel information

    ParserUtils.matchCaseOfChildren(channel,
            new String[] { "title", "description", "link", "creator", "managingEditor", "publisher",
                    "errorReportsTo", "webMaster", "language", "rights", "copyright", "rating", "date",
                    "issued", "pubdate", "lastBuildDate", "modified", "generatorAgent", "updatePeriod",
                    "updateFrequency", "updateBase" });

    // title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    // set channel format
    chnl.setFormat(ChannelFormat.RSS_1_0);

    // description element
    chnl.setDescription(channel.getChildTextTrim("description", defNS));

    // link element
    chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS)));

    // creator element
    Element creator = channel.getChild("creator", dcNS);
    if (creator == null) {
        creator = channel.getChild("managingEditor", rss091NS);
    }
    if (creator != null) {
        chnl.setCreator(creator.getTextTrim());
    }

    // publisher element
    String publisher = channel.getChildTextTrim("publisher", dcNS);
    if (publisher == null) {
        Element elErrorReportsTo = channel.getChild("errorReportsTo", adminNS);
        if (elErrorReportsTo != null) {
            publisher = elErrorReportsTo.getAttributeValue("resource",
                    ParserUtils.getNamespace(elErrorReportsTo, "rdf"));
        }
    }
    if (publisher == null) {
        publisher = channel.getChildTextTrim("webMaster", rss091NS);
    }
    chnl.setPublisher(publisher);

    // language element
    Element language = channel.getChild("language", dcNS);
    if (language == null) {
        language = channel.getChild("language", rss091NS);
    }
    if (language != null) {
        chnl.setLanguage(language.getTextTrim());
    }

    // rights element
    Element copyright = channel.getChild("rights", dcNS);
    if (copyright == null) {
        copyright = channel.getChild("copyright", rss091NS);
    }
    if (copyright != null) {
        chnl.setCopyright(copyright.getTextTrim());
    }

    // 0..1 Rating element
    Element rating = channel.getChild("rating", rss091NS);
    if (rating != null) {
        chnl.setRating(rating.getTextTrim());
    }

    // 0..1 Docs element
    // use namespace URI
    chnl.setDocs(defNS.getURI());

    // 0..1 pubDate element
    Element pubDate = channel.getChild("date", dcNS);
    if (pubDate == null) {
        pubDate = channel.getChild("issued", dctermsNS);
    }
    if (pubDate == null) {
        pubDate = channel.getChild("pubdate", rss091NS);
    }
    if (pubDate != null) {
        chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
    }

    // 0..1 lastBuildDate element
    Element lastBuildDate = channel.getChild("lastBuildDate");
    if (lastBuildDate == null) {
        lastBuildDate = channel.getChild("modified", dctermsNS);
    }
    if (lastBuildDate == null) {
        lastBuildDate = channel.getChild("lastBuildDate", rss091NS);
    }
    if (lastBuildDate != null) {
        chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
    }

    // RSS 1.0 Administration Module support

    // 0..1 generator element
    Element elGenerator = channel.getChild("generatorAgent", adminNS);
    if (elGenerator != null) {
        Attribute generator = elGenerator.getAttribute("resource",
                ParserUtils.getNamespace(elGenerator, "rdf"));
        if (generator != null) {
            chnl.setGenerator(generator.getValue());
        }
    }

    // RSS 1.0 Syndication Module support

    // 0..1 update period element
    Element updatePeriod = channel.getChild("updatePeriod", syNS);
    if (updatePeriod != null) {
        try {
            ChannelUpdatePeriod channelUpdatePeriod = ChannelUpdatePeriod
                    .valueFromText(updatePeriod.getTextTrim());
            chnl.setUpdatePeriod(channelUpdatePeriod);
        } catch (IllegalArgumentException ex) {
            logger.warn(updatePeriod.getTextTrim(), ex);
        }
    }

    // 0..1 update frequency element
    Element updateFrequency = channel.getChild("updateFrequency", syNS);
    if (updateFrequency != null) {
        chnl.setUpdateFrequency((new Integer(updateFrequency.getTextTrim())).intValue());
    }

    // 0..1 update base element
    Element updateBase = channel.getChild("updateBase", syNS);
    if (updateBase != null) {
        chnl.setUpdateBase(ParserUtils.getDate(updateBase.getTextTrim()));
    }

    if ((updatePeriod != null) && updateFrequency != null) {
        int ttl = getTTL(chnl.getUpdatePeriod(), chnl.getUpdateFrequency());
        chnl.setTtl(ttl);
    }

    // item elements
    List items = root.getChildren("item", defNS);
    Iterator i = items.iterator();
    while (i.hasNext()) {
        Element item = (Element) i.next();

        ParserUtils.matchCaseOfChildren(item, new String[] { "title", "link", "encoded", "description",
                "creator", "subject", "date", "sourceURL", "source", "timestamp", "reference" });

        // get title element
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";
        if (elTitle != null) {
            strTitle = elTitle.getTextTrim();
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Item element found (" + strTitle + ").");
        }

        // get link element
        Element elLink = item.getChild("link", defNS);
        String strLink = "";
        if (elLink != null) {
            strLink = elLink.getTextTrim();
        }

        // get description element
        Element elDesc = item.getChild("encoded", contentNS);
        if (elDesc == null) {
            elDesc = item.getChild("description", defNS);
        }
        if (elDesc == null) {
            elDesc = item.getChild("description", dcNS);
        }
        String strDesc = "";
        if (elDesc != null) {
            strDesc = elDesc.getTextTrim();
        }

        // generate new RSS item (link to article)
        ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));
        rssItem.setFound(dateParsed);

        // get creator element
        Element elCreator = item.getChild("creator", dcNS);
        if (elCreator != null) {
            rssItem.setCreator(elCreator.getTextTrim());
        }

        // get subject element
        Element elSubject = item.getChild("subject", dcNS);
        if (elSubject != null) {
            // TODO: Mulitple subject elements not handled currently
            rssItem.setSubject(elSubject.getTextTrim());
        }

        // get date element
        Element elDate = item.getChild("date", dcNS);
        if (elDate != null) {
            rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim()));
        }

        // get source element - default to Aggregation module, then try Dublin Core
        String sourceName = null;
        String sourceLocation = null;
        Date sourceTimestamp = null;

        Element elSourceURL = item.getChild("sourceURL", agNS);
        if (elSourceURL == null) { //  No Aggregation module - try Dublin Core
            elSourceURL = item.getChild("source", dcNS);
            if (elSourceURL != null) {
                sourceLocation = elSourceURL.getTextTrim();
                sourceName = "Source";
            }
        } else { // Aggregation module
            sourceLocation = elSourceURL.getTextTrim();
            Element elSourceName = item.getChild("source", agNS);
            if (elSourceName != null) {
                sourceName = elSourceName.getTextTrim();
            }
            Element elSourceTimestamp = item.getChild("timestamp", agNS);
            if (elSourceTimestamp != null) {
                sourceTimestamp = ParserUtils.getDate(elSourceTimestamp.getTextTrim());
            }
        }

        if (sourceLocation != null) {
            ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation,
                    sourceTimestamp);
            rssItem.setSource(itemSource);
        }

        // comments element - use Annotation module
        Element elReference = item.getChild("reference", annotateNS);
        if (elReference != null) {
            Attribute resource = elReference.getAttribute("resource",
                    ParserUtils.getNamespace(elReference, "rdf"));
            if (resource != null) {
                URL resourceURL = ParserUtils.getURL(resource.getValue());
                if (resourceURL != null) {
                    rssItem.setComments(resourceURL);
                }
            }
        }

    }

    // image element
    Element image = root.getChild("image", defNS);
    if (image != null) {

        ParserUtils.matchCaseOfChildren(image,
                new String[] { "title", "url", "link", "width", "height", "description" });

        ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS),
                ParserUtils.getURL(image.getChildTextTrim("url", defNS)),
                ParserUtils.getURL(image.getChildTextTrim("link", defNS)));
        Element imgWidth = image.getChild("width", defNS);
        if (imgWidth != null) {
            try {
                rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn(e);
            }
        }
        Element imgHeight = image.getChild("height", defNS);
        if (imgHeight != null) {
            try {
                rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn(e);
            }
        }
        Element imgDescr = image.getChild("description", defNS);
        if (imgDescr != null) {
            rssImage.setDescription(imgDescr.getTextTrim());
        }
        chnl.setImage(rssImage);
    }

    // textinput element
    Element txtinp = root.getChild("textinput", defNS);
    if (txtinp != null) {

        ParserUtils.matchCaseOfChildren(image, new String[] { "title", "description", "name", "link" });

        String tiTitle = null;
        if (txtinp.getChild("title", defNS) != null) {
            tiTitle = txtinp.getChild("title", defNS).getTextTrim();
        }
        String tiDescr = null;
        if (txtinp.getChild("description", defNS) != null) {
            tiDescr = txtinp.getChild("description", defNS).getTextTrim();
        }
        String tiName = null;
        if (txtinp.getChild("name", defNS) != null) {
            tiName = txtinp.getChild("name", defNS).getTextTrim();
        }
        URL tiLink = null;
        if (txtinp.getChild("link", defNS) != null) {
            tiLink = ParserUtils.getURL(txtinp.getChild("link", defNS).getTextTrim());
        }
        TextInputIF rssTextInput = cBuilder.createTextInput(tiTitle, tiDescr, tiName, tiLink);
        chnl.setTextInput(rssTextInput);
    }

    chnl.setLastUpdated(dateParsed);

    return chnl;
}

From source file:de.nava.informa.parsers.RSS_2_0_Parser.java

License:Open Source License

/**
 * @see de.nava.informa.core.ChannelParserIF#parse(de.nava.informa.core.ChannelBuilderIF, org.jdom2.Element)
 *//* w ww.ja  v a2s.c om*/
public ChannelIF parse(ChannelBuilderIF cBuilder, Element root) throws ParseException {
    if (cBuilder == null) {
        throw new RuntimeException("Without builder no channel can be created.");
    }
    Date dateParsed = new Date();
    logger.debug("start parsing.");

    Namespace defNS = ParserUtils.getDefaultNS(root);
    if (defNS == null) {
        defNS = Namespace.NO_NAMESPACE;
        logger.info("No default namespace found.");
    }
    Namespace dcNS = ParserUtils.getNamespace(root, "dc");
    // fall back to default name space
    if (dcNS == null) {
        dcNS = defNS;
    }

    // Content namespace
    Namespace contentNS = ParserUtils.getNamespace(root, "content");
    // fall back to default name space
    if (contentNS == null) {
        contentNS = defNS;
    }

    ParserUtils.matchCaseOfChildren(root, "channel");

    // Get the channel element (only one occurs)
    Element channel = root.getChild("channel", defNS);
    if (channel == null) {
        logger.warn("Channel element could not be retrieved from feed.");
        throw new ParseException("No channel element found in feed.");
    }

    // --- read in channel information

    ParserUtils.matchCaseOfChildren(channel,
            new String[] { "title", "description", "link", "language", "item", "image", "textinput",
                    "copyright", "rating", "docs", "generator", "pubDate", "lastBuildDate", "category",
                    "managingEditor", "webMaster", "cloud" });

    // 1 title element
    ChannelIF chnl = cBuilder.createChannel(channel, channel.getChildTextTrim("title", defNS));

    // set channel format
    chnl.setFormat(ChannelFormat.RSS_2_0);

    // 1 description element
    chnl.setDescription(channel.getChildTextTrim("description", defNS));

    // 1 link element
    chnl.setSite(ParserUtils.getURL(channel.getChildTextTrim("link", defNS)));

    // 1 language element
    chnl.setLanguage(channel.getChildTextTrim("language", defNS));

    // 1..n item elements
    List items = channel.getChildren("item", defNS);
    for (Object item1 : items) {
        Element item = (Element) item1;

        ParserUtils.matchCaseOfChildren(item,
                new String[] { "title", "link", "encoded", "description", "subject", "category", "pubDate",
                        "date", "author", "creator", "comments", "guid", "source", "enclosure" });

        // get title element
        Element elTitle = item.getChild("title", defNS);
        String strTitle = "<No Title>";
        if (elTitle != null) {
            strTitle = elTitle.getTextTrim();
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Item element found (" + strTitle + ").");
        }

        // get link element
        Element elLink = item.getChild("link", defNS);
        String strLink = "";
        if (elLink != null) {
            strLink = elLink.getTextTrim();
        }

        // get description element
        Element elDesc = item.getChild("encoded", contentNS);
        if (elDesc == null) {
            elDesc = item.getChild("description", defNS);
        }
        String strDesc = "";
        if (elDesc != null) {
            strDesc = elDesc.getTextTrim();
        }

        // generate new RSS item (link to article)
        ItemIF rssItem = cBuilder.createItem(item, chnl, strTitle, strDesc, ParserUtils.getURL(strLink));

        // get subject element
        Element elSubject = item.getChild("subject", defNS);
        if (elSubject == null) {
            // fallback mechanism: get dc:subject element
            elSubject = item.getChild("subject", dcNS);
        }
        if (elSubject != null) {
            rssItem.setSubject(elSubject.getTextTrim());
        }

        // get category list
        // get list of <category> elements
        List listCategory = item.getChildren("category", defNS);
        if (listCategory.size() < 1) {
            // fallback mechanism: get dc:category element
            listCategory = item.getChildren("category", dcNS);
        }
        if (listCategory.size() > 0) {
            RecursiveHashtable<String> catTable = new RecursiveHashtable<String>();

            // for each category, parse hierarchy
            for (Object aListCategory : listCategory) {
                RecursiveHashtable<String> currTable = catTable;
                Element elCategory = (Element) aListCategory;
                // get contents of category element
                String[] titles = elCategory.getTextNormalize().split("/");
                for (String title : titles) {
                    // tokenize category string to extract out hierarchy
                    if (!currTable.containsKey(title)) {
                        // if token does not exist in current map, add it with child Hashtable
                        currTable.put(title, new RecursiveHashtable<String>());
                    }
                    // reset current Hashtable to child's Hashtable then iterate to next token
                    currTable = currTable.get(title);
                }
            }
            ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>();
            // transform cat list & hierarchy into list of CategoryIF elements
            Enumeration<String> enumCategories = catTable.keys();
            while (enumCategories.hasMoreElements()) {
                String key = enumCategories.nextElement();
                // build category list: getCategoryList(parent, title, children)
                CategoryIF cat = getCategoryList(null, key, catTable.get(key));
                catList.add(cat);
            }
            if (catList.size() > 0) {
                // if categories were actually created, then add list to item node
                rssItem.setCategories(catList);
            }
        }

        // get publication date
        Element elDate = item.getChild("pubDate", defNS);
        if (elDate == null) {
            // fallback mechanism: get dc:date element
            elDate = item.getChild("date", dcNS);
        }
        if (elDate != null) {
            rssItem.setDate(ParserUtils.getDate(elDate.getTextTrim()));
        }

        rssItem.setFound(dateParsed);

        // get Author element
        Element elAuthor = item.getChild("author", defNS);
        if (elAuthor == null) {
            // fallback mechanism: get dc:creator element
            elAuthor = item.getChild("creator", dcNS);
        }
        if (elAuthor != null)
            rssItem.setCreator(elAuthor.getTextTrim());

        // get Comments element
        Element elComments = item.getChild("comments", defNS);
        String strComments = "";
        if (elComments != null) {
            strComments = elComments.getTextTrim();
        }
        rssItem.setComments(ParserUtils.getURL(strComments));

        // get guid element
        Element elGuid = item.getChild("guid", defNS);
        if (elGuid != null) {
            String guidUrl = elGuid.getTextTrim();
            if (guidUrl != null) {
                boolean permaLink = true;
                Attribute permaLinkAttribute = elGuid.getAttribute("isPermaLink", defNS);
                if (permaLinkAttribute != null) {
                    String permaLinkStr = permaLinkAttribute.getValue();
                    if (permaLinkStr != null) {
                        permaLink = Boolean.valueOf(permaLinkStr);
                    }
                }
                ItemGuidIF itemGuid = cBuilder.createItemGuid(rssItem, guidUrl, permaLink);
                rssItem.setGuid(itemGuid);
            }
        }

        // get source element
        Element elSource = item.getChild("source", defNS);
        if (elSource != null) {
            String sourceName = elSource.getTextTrim();
            Attribute sourceAttribute = elSource.getAttribute("url", defNS);
            if (sourceAttribute != null) {
                String sourceLocation = sourceAttribute.getValue().trim();
                ItemSourceIF itemSource = cBuilder.createItemSource(rssItem, sourceName, sourceLocation, null);
                rssItem.setSource(itemSource);
            }
        }

        // get enclosure element
        Element elEnclosure = item.getChild("enclosure", defNS);
        if (elEnclosure != null) {
            URL location = null;
            String type = null;
            int length = -1;
            Attribute urlAttribute = elEnclosure.getAttribute("url", defNS);
            if (urlAttribute != null) {
                location = ParserUtils.getURL(urlAttribute.getValue().trim());
            }
            Attribute typeAttribute = elEnclosure.getAttribute("type", defNS);
            if (typeAttribute != null) {
                type = typeAttribute.getValue().trim();
            }
            Attribute lengthAttribute = elEnclosure.getAttribute("length", defNS);
            if (lengthAttribute != null) {
                try {
                    length = Integer.parseInt(lengthAttribute.getValue().trim());
                } catch (NumberFormatException e) {
                    logger.warn(e);
                }
            }
            ItemEnclosureIF itemEnclosure = cBuilder.createItemEnclosure(rssItem, location, type, length);
            rssItem.setEnclosure(itemEnclosure);
        }
    }

    // 0..1 image element
    Element image = channel.getChild("image", defNS);
    if (image != null) {

        ParserUtils.matchCaseOfChildren(image,
                new String[] { "title", "url", "link", "width", "height", "description" });

        ImageIF rssImage = cBuilder.createImage(image.getChildTextTrim("title", defNS),
                ParserUtils.getURL(image.getChildTextTrim("url", defNS)),
                ParserUtils.getURL(image.getChildTextTrim("link", defNS)));
        Element imgWidth = image.getChild("width", defNS);
        if (imgWidth != null) {
            try {
                rssImage.setWidth(Integer.parseInt(imgWidth.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn("Error parsing width: " + e.getMessage());
            }
        }
        Element imgHeight = image.getChild("height", defNS);
        if (imgHeight != null) {
            try {
                rssImage.setHeight(Integer.parseInt(imgHeight.getTextTrim()));
            } catch (NumberFormatException e) {
                logger.warn("Error parsing height: " + e.getMessage());
            }
        }
        Element imgDescr = image.getChild("description", defNS);
        if (imgDescr != null) {
            rssImage.setDescription(imgDescr.getTextTrim());
        }
        chnl.setImage(rssImage);
    }

    // 0..1 textinput element
    Element txtinp = channel.getChild("textinput", defNS);
    if (txtinp != null) {

        ParserUtils.matchCaseOfChildren(txtinp, new String[] { "title", "description", "name", "link" });

        TextInputIF rssTextInput = cBuilder.createTextInput(txtinp.getChildTextTrim("title", defNS),
                txtinp.getChildTextTrim("description", defNS), txtinp.getChildTextTrim("name", defNS),
                ParserUtils.getURL(txtinp.getChildTextTrim("link", defNS)));
        chnl.setTextInput(rssTextInput);
    }

    // 0..1 copyright element
    Element copyright = channel.getChild("copyright", defNS);
    if (copyright != null) {
        chnl.setCopyright(copyright.getTextTrim());
    }

    // 0..1 Rating element
    Element rating = channel.getChild("rating", defNS);
    if (rating != null) {
        chnl.setRating(rating.getTextTrim());
    }

    // 0..1 Docs element
    Element docs = channel.getChild("docs", defNS);
    if (docs != null) {
        chnl.setDocs(docs.getTextTrim());
    }

    // 0..1 Generator element
    Element generator = channel.getChild("generator", defNS);
    if (generator != null) {
        chnl.setGenerator(generator.getTextTrim());
    }

    // 0..1 ttl element
    Element ttl = channel.getChild("ttl", defNS);
    if (ttl != null) {
        String ttlValue = ttl.getTextTrim();
        try {
            chnl.setTtl(Integer.parseInt(ttlValue));
        } catch (NumberFormatException e) {
            logger.warn("Invalid TTL format: '" + ttlValue + "'");
        }
    }

    // 0..1 pubDate element
    Element pubDate = channel.getChild("pubDate", defNS);
    if (pubDate != null) {
        chnl.setPubDate(ParserUtils.getDate(pubDate.getTextTrim()));
    }

    // 0..1 lastBuildDate element
    Element lastBuildDate = channel.getChild("lastBuildDate", defNS);
    if (lastBuildDate != null) {
        chnl.setLastBuildDate(ParserUtils.getDate(lastBuildDate.getTextTrim()));
    }

    // get category list
    // get list of <category> elements
    List listCategory = channel.getChildren("category", defNS);
    if (listCategory.size() < 1) {
        // fallback mechanism: get dc:category element
        listCategory = channel.getChildren("category", dcNS);
    }
    if (listCategory.size() > 0) {
        RecursiveHashtable<String> catTable = new RecursiveHashtable<String>();
        // for each category, parse hierarchy
        for (Object aListCategory : listCategory) {
            RecursiveHashtable<String> currTable = catTable;
            Element elCategory = (Element) aListCategory;
            // get contents of category element
            String[] titles = elCategory.getTextNormalize().split("/");
            for (String title : titles) {
                // tokenize category string to extract out hierarchy
                if (!currTable.containsKey(title)) {
                    // if token does not exist in current map, add it with child Hashtable
                    currTable.put(title, new RecursiveHashtable<String>());
                }
                // reset current Hashtable to child's Hashtable then iterate to next token
                currTable = currTable.get(title);
            }
        }
        ArrayList<CategoryIF> catList = new ArrayList<CategoryIF>();
        // transform cat list & hierarchy into list of CategoryIF elements
        Enumeration<String> enumCategories = catTable.keys();
        while (enumCategories.hasMoreElements()) {
            String key = enumCategories.nextElement();
            // build category list: getCategoryList(parent, title, children)
            CategoryIF cat = getCategoryList(null, key, catTable.get(key));
            catList.add(cat);
        }
        if (catList.size() > 0) {
            // if categories were actually created, then add list to item node
            chnl.setCategories(catList);
        }
    }

    // 0..1 managingEditor element
    Element managingEditor = channel.getChild("managingEditor", defNS);
    if (managingEditor != null) {
        chnl.setCreator(managingEditor.getTextTrim());
    }

    // 0..1 webMaster element
    Element webMaster = channel.getChild("webMaster", defNS);
    if (webMaster != null) {
        chnl.setPublisher(webMaster.getTextTrim());
    }

    // 0..1 cloud element
    Element cloud = channel.getChild("cloud", defNS);
    if (cloud != null) {
        String _port = cloud.getAttributeValue("port", defNS);
        int port = -1;
        if (_port != null) {
            try {
                port = Integer.parseInt(_port);
            } catch (NumberFormatException e) {
                logger.warn(e);
            }
        }
        chnl.setCloud(cBuilder.createCloud(cloud.getAttributeValue("domain", defNS), port,
                cloud.getAttributeValue("path", defNS), cloud.getAttributeValue("registerProcedure", defNS),
                cloud.getAttributeValue("protocol", defNS)));
    }

    chnl.setLastUpdated(dateParsed);

    // 0..1 skipHours element
    // 0..1 skipDays element

    return chnl;
}