Example usage for org.dom4j Node getText

List of usage examples for org.dom4j Node getText

Introduction

In this page you can find the example usage for org.dom4j Node getText.

Prototype

String getText();

Source Link

Document

Returns the text of this node.

Usage

From source file:de.fct.companian.analyze.mvn.helper.PomHelper.java

License:Apache License

private PomInfo extractParentPom() throws DocumentException {
    PomInfo pomInfo = null;/* www .j av  a2  s.c  o m*/

    // Schaue nach Parent POM
    Node parentArtifactIdNode = this.selectSingleNode("/mvn:project/mvn:parent/mvn:artifactId");
    Node parentGroupIdNode = this.selectSingleNode("/mvn:project/mvn:parent/mvn:groupId");
    Node parentVersionNode = this.selectSingleNode("/mvn:project/mvn:parent/mvn:version");

    if (parentArtifactIdNode != null && parentGroupIdNode != null && parentVersionNode != null) {
        String parentArtifactId = parentArtifactIdNode.getText();
        String parentGroupId = parentGroupIdNode.getText();
        String parentVersion = parentVersionNode.getText();

        String userHome = System.getProperty("user.home");

        String parentPomName = userHome + "/.m2/repository/" + makePath(parentGroupId) + "/" + parentArtifactId
                + "/" + parentVersion + "/" + parentArtifactId + "-" + parentVersion + ".pom";
        if (logger.isInfoEnabled()) {
            logger.info("extractParentPom() looking for parent POM at " + parentPomName);
        }
        File parentPom = new File(parentPomName);
        if (parentPom.exists() && parentPom.canRead()) {
            if (logger.isDebugEnabled()) {
                logger.debug("extractParentPom() parent POM found - starting new extraction round");
            }
            PomHelper parentHelper = new PomHelper(parentPom);
            pomInfo = parentHelper.extractPomInfo();
        } else {
            if (logger.isInfoEnabled()) {
                logger.info("extractParentPom() either the parent POM " + parentPomName
                        + " does not exist or can't be read");
            }
        }
    }

    return pomInfo;
}

From source file:de.innovationgate.wga.common.WGAXML.java

License:Apache License

/**
 * Performs normalization on the wga.xml by creating mandatory elements and attributes and doing some
 * additional validations, like converting obsolete structures, defining yet undefined domains etc.
 * @param doc The wga.xml/*from  w ww  .  j  a va2  s.c  om*/
 */
public static void normalize(Document doc) {

    // Remove obsolete namespace
    String ns = "urn:de.innovationgate.webgate.api.query.domino.WGDatabaseImpl";
    Iterator nodes = doc.selectNodes("//*[namespace-uri(.)='" + ns + "']").iterator();
    Element element;
    while (nodes.hasNext()) {
        element = (Element) nodes.next();
        element.setQName(QName.get(element.getName()));
    }

    // Build necessary elements
    Element wga = (Element) doc.selectSingleNode("wga");

    // Licenses
    Element licenses = WGUtils.getOrCreateElement(wga, "licenses");
    Iterator licenseTags = licenses.elements("authorlicense").iterator();
    while (licenseTags.hasNext()) {
        Element licenseTag = (Element) licenseTags.next();
        //WGUtils.getOrCreateAttribute(licenseTag, "type", "WGA.Client");
        // B0000486E
        licenseTag.addAttribute("type", "WGA.Client");
    }

    // administrators
    WGUtils.getOrCreateElement(wga, "administrators");

    // configuration
    Element configuration = WGUtils.getOrCreateElement(wga, "configuration");
    Element defaultdb = WGUtils.getOrCreateElement(configuration, "defaultdb");
    WGUtils.getOrCreateAttribute(defaultdb, "key", "");
    WGUtils.getOrCreateAttribute(defaultdb, "favicon", "");
    WGUtils.getOrCreateAttribute(defaultdb, "datacache", "10000");
    WGUtils.getOrCreateAttribute(defaultdb, "staticexpiration", "10");

    Element features = WGUtils.getOrCreateElement(configuration, "features");
    WGUtils.getOrCreateAttribute(features, "bi", "true");
    WGUtils.getOrCreateAttribute(features, "adminpage", "true");
    WGUtils.getOrCreateAttribute(features, "manager", "true");
    WGUtils.getOrCreateAttribute(features, "startpage", "true");
    WGUtils.getOrCreateAttribute(features, "webdav", "true");
    WGUtils.getOrCreateAttribute(features, "webservice", "true");
    WGUtils.getOrCreateAttribute(features, "adminport", "");
    WGUtils.getOrCreateAttribute(features, "authoringport", "");
    WGUtils.getOrCreateAttribute(features, "clusterport", "");

    Element warnings = WGUtils.getOrCreateElement(configuration, "warnings");
    WGUtils.getOrCreateAttribute(warnings, "enabled", "true");
    WGUtils.getOrCreateAttribute(warnings, "consoleOutput", "false");
    WGUtils.getOrCreateAttribute(warnings, "pageOutput", "true");

    Element tml = WGUtils.getOrCreateElement(configuration, "tml");
    WGUtils.getOrCreateAttribute(tml, "characterEncoding", "");
    WGUtils.getOrCreateAttribute(tml, "linkEncoding", "UTF-8");
    Element tmlheader = WGUtils.getOrCreateElement(tml, "tmlheader");
    WGUtils.getOrCreateAttribute(tmlheader, "buffer", "8kb");

    Element authoringconfig = WGUtils.getOrCreateElement(configuration, "authoringconfig");
    WGUtils.getOrCreateAttribute(authoringconfig, "dbfile", "");

    Element applog = WGUtils.getOrCreateElement(configuration, "applog");
    WGUtils.getOrCreateAttribute(applog, "level", "INFO");
    WGUtils.getOrCreateAttribute(applog, "logserver", "false");

    Element compression = WGUtils.getOrCreateElement(configuration, "compression");
    WGUtils.getOrCreateAttribute(compression, "enabled", "false");

    Element listeners = WGUtils.getOrCreateElement(configuration, "listeners");

    Element lucene = WGUtils.getOrCreateElement(configuration, "lucene");
    WGUtils.getOrCreateAttribute(lucene, "dir", "");
    WGUtils.getOrCreateAttribute(lucene, "enabled", "false");
    WGUtils.getOrCreateAttribute(lucene, "booleanQueryMaxClauseCount", "1024");
    WGUtils.getOrCreateAttribute(lucene, "maxDocsPerDBSession", "50");

    // read old lucene enabled dbs
    Attribute dbs = WGUtils.getOrCreateAttribute(lucene, "dbs", "");
    List oldLuceneEnabledDBKeys = WGUtils.deserializeCollection(dbs.getText(), ",");
    // remove old attribute for lucene enabled dbs
    lucene.remove(dbs);

    Element persoconfig = WGUtils.getOrCreateElement(configuration, "personalisation");

    // Element for TestCore - config
    Element testcore = WGUtils.getOrCreateElement(configuration, "testcore");
    WGUtils.getOrCreateAttribute(testcore, "dir", "");
    WGUtils.getOrCreateAttribute(testcore, "enabled", "false");

    Element design = WGUtils.getOrCreateElement(configuration, "designsync");
    WGUtils.getOrCreateAttribute(design, "fileEncoding", "");
    WGUtils.getOrCreateAttribute(design, "interval", "1");
    WGUtils.getOrCreateAttribute(design, "throttling", "false");
    WGUtils.getOrCreateAttribute(design, "throttlingactivation", "10");

    Element jdbcDrivers = WGUtils.getOrCreateElement(configuration, "jdbcdrivers");

    WGUtils.getOrCreateElement(configuration, "defaultdboptions");
    WGUtils.getOrCreateElement(configuration, "defaultpublisheroptions");

    Element mailConfig = WGUtils.getOrCreateElement(configuration, "mailconfig");
    WGUtils.getOrCreateAttribute(mailConfig, "mailHost", "");
    WGUtils.getOrCreateAttribute(mailConfig, "mailUser", "");
    WGUtils.getOrCreateAttribute(mailConfig, "mailPassword", "");
    WGUtils.getOrCreateAttribute(mailConfig, "mailFrom", "");
    WGUtils.getOrCreateAttribute(mailConfig, "mailTo", "");
    WGUtils.getOrCreateAttribute(mailConfig, "mailWGARootURL", "");
    WGUtils.getOrCreateAttribute(mailConfig, "useAsDefaultForWF", "false");
    WGUtils.getOrCreateAttribute(mailConfig, "enableAdminNotifications", "true");

    // Mappings
    Element mappings = WGUtils.getOrCreateElement(wga, "mappings");
    Attribute mappingLibraries = WGUtils.getOrCreateAttribute(mappings, "libraries", "");

    Element elementmappings = WGUtils.getOrCreateElement(mappings, "elementmappings");
    if (elementmappings.attribute("libraries") != null && mappingLibraries.getText().equals("")) {
        mappingLibraries.setText(elementmappings.attributeValue("libraries", ""));
        elementmappings.remove(elementmappings.attribute("libraries"));
    }

    List elementsToRemove = new ArrayList();
    Iterator elementmappingTags = elementmappings.selectNodes("elementmapping").iterator();
    while (elementmappingTags.hasNext()) {
        Element elementmapping = (Element) elementmappingTags.next();
        if (elementmapping.attribute("binary") != null) {
            elementmapping.remove(elementmapping.attribute("binary"));
        }
        // remove old FOP implementation reference (F000040EE)
        String implClass = elementmapping.attributeValue("class", null);
        if (implClass != null && implClass.equals("de.innovationgate.wgpublisher.webtml.elements.FOP")) {
            elementsToRemove.add(elementmapping);
        }
    }
    Iterator toRemove = elementsToRemove.iterator();
    while (toRemove.hasNext()) {
        Element elementmapping = (Element) toRemove.next();
        elementmappings.remove(elementmapping);
    }

    Element mediamappings = WGUtils.getOrCreateElement(mappings, "mediamappings");
    Iterator mediamappingTags = mediamappings.selectNodes("mediamapping").iterator();
    while (mediamappingTags.hasNext()) {
        Element mediamapping = (Element) mediamappingTags.next();
        WGUtils.getOrCreateAttribute(mediamapping, "binary", "false");
        WGUtils.getOrCreateAttribute(mediamapping, "httplogin", "false");
    }

    WGUtils.getOrCreateElement(mappings, "encodermappings");
    WGUtils.getOrCreateElement(mappings, "syncmappings");

    Element analyzermappings = WGUtils.getOrCreateElement(mappings, "analyzermappings");
    WGUtils.getOrCreateAttribute(analyzermappings, "defaultAnalyzerClass",
            "de.innovationgate.wgpublisher.lucene.analysis.StandardAnalyzer");

    removeDefaultFileHandlerMappings(WGUtils.getOrCreateElement(mappings, "filehandlermappings"));

    WGUtils.getOrCreateElement(mappings, "filtermappings");

    Element scheduler = WGUtils.getOrCreateElement(wga, "scheduler");
    WGUtils.getOrCreateAttribute(scheduler, "loggingdir", "");

    // Domains
    Element domains = WGUtils.getOrCreateElement(wga, "domains");
    Iterator domainsIt = domains.elementIterator("domain");
    while (domainsIt.hasNext()) {
        Element domain = (Element) domainsIt.next();
        WGUtils.getOrCreateAttribute(domain, "name", "");
        WGUtils.getOrCreateAttribute(domain, "loginattempts", "5");
        WGUtils.getOrCreateAttribute(domain, "defaultmanager", "");
        Element login = WGUtils.getOrCreateElement(domain, "login");
        WGUtils.getOrCreateAttribute(login, "mode", "user");
        WGUtils.getOrCreateAttribute(login, "username", "");
        WGUtils.getOrCreateAttribute(login, "password", "");
        Element errorpage = WGUtils.getOrCreateElement(domain, "errorpage");
        WGUtils.getOrCreateAttribute(errorpage, "enabled", "false");
        WGUtils.getOrCreateElement(domain, "defaultdboptions");
        WGUtils.getOrCreateElement(domain, "defaultpublisheroptions");
    }

    // content dbs
    Element contentdbs = WGUtils.getOrCreateElement(wga, "contentdbs");
    Iterator contentdbTags = contentdbs.selectNodes("contentdb").iterator();
    Set usedDomains = new HashSet();
    while (contentdbTags.hasNext()) {
        Element contentdb = (Element) contentdbTags.next();
        WGUtils.getOrCreateAttribute(contentdb, "enabled", "true");
        WGUtils.getOrCreateAttribute(contentdb, "lazyconnect", "false");

        Element type = WGUtils.getOrCreateElement(contentdb, "type");
        String typeName = type.getStringValue();
        if (typeName.equals("de.innovationgate.webgate.api.domino.local.WGDatabaseImpl")) {
            type.setText("de.innovationgate.webgate.api.domino.WGDatabaseImpl");
        }

        boolean isFullContentStore = false;
        DbType dbType = DbType.getByImplClass(DbType.GENTYPE_CONTENT, typeName);
        if (dbType != null) {
            isFullContentStore = dbType.isFullContentStore();
        }

        //lowercase dbkey
        Element dbkey = WGUtils.getOrCreateElement(contentdb, "dbkey");
        dbkey.setText(dbkey.getText().trim().toLowerCase());

        WGUtils.getOrCreateElement(contentdb, "title");
        Element domain = WGUtils.getOrCreateElement(contentdb, "domain");
        String domainStr = domain.getTextTrim();
        if (domainStr.equals("")) {
            domainStr = "masterloginonly";
            domain.setText("masterloginonly");
        }
        usedDomains.add(domainStr);
        WGUtils.getOrCreateElement(contentdb, "login");

        Element dboptions = WGUtils.getOrCreateElement(contentdb, "dboptions");
        Iterator options = dboptions.selectNodes("option").iterator();
        Element option;
        String optionName;
        while (options.hasNext()) {
            option = (Element) options.next();
            optionName = option.attributeValue("name");
            if (optionName.indexOf(":") != -1) {
                option.addAttribute("name", optionName.substring(optionName.indexOf(":") + 1));
            }
        }

        WGUtils.getOrCreateElement(contentdb, "publisheroptions");
        WGUtils.getOrCreateElement(contentdb, "storedqueries");
        WGUtils.getOrCreateElement(contentdb, "fieldmappings");

        if (isFullContentStore) {
            WGUtils.getOrCreateElement(contentdb, "shares");
        } else {
            if (contentdb.element("shares") != null) {
                contentdb.remove(contentdb.element("shares"));
            }
        }

        Element cache = WGUtils.getOrCreateElement(contentdb, "cache");
        WGUtils.getOrCreateAttribute(cache, "type", "de.innovationgate.wgpublisher.cache.WGACacheHSQLDB");
        WGUtils.getOrCreateAttribute(cache, "path", "");
        WGUtils.getOrCreateAttribute(cache, "maxpages", "5000");

        // Design - Migrate old designsync element
        Element designsync = contentdb.element("designsync");
        design = contentdb.element("design");
        if (designsync != null && design == null) {
            design = contentdb.addElement("design");
            if (designsync.attributeValue("enabled", "false").equals("true")) {
                design.addAttribute("provider", "sync");
            } else {
                design.addAttribute("provider", "none");
            }
            design.addAttribute("mode", designsync.attributeValue("mode", ""));
            design.addAttribute("key", designsync.attributeValue("key", ""));
            design.setText(designsync.getText());
        } else {
            design = WGUtils.getOrCreateElement(contentdb, "design");
            WGUtils.getOrCreateAttribute(design, "provider", "none");
            WGUtils.getOrCreateAttribute(design, "mode", "");
            WGUtils.getOrCreateAttribute(design, "key", "");
        }

        // create default lucene config for old enabled dbs
        if (oldLuceneEnabledDBKeys.contains(dbkey.getText().toLowerCase())) {
            Element luceneDBConfig = WGUtils.getOrCreateElement(contentdb, "lucene");
            WGUtils.getOrCreateAttribute(luceneDBConfig, "enabled", "true");
            WGUtils.getOrCreateElement(luceneDBConfig, "itemrules");
            // create defaultrule
            LuceneIndexItemRule.addDefaultRule(luceneDBConfig);
        }

        //lucene config per db
        Element luceneDBConfig = WGUtils.getOrCreateElement(contentdb, "lucene");
        WGUtils.getOrCreateAttribute(luceneDBConfig, "enabled", "false");
        WGUtils.getOrCreateElement(luceneDBConfig, "itemrules");
        //check for default rule
        ArrayList rules = (ArrayList) LuceneIndexItemRule.getRules(luceneDBConfig);
        if (rules.size() > 0) {
            //check if last rule is defaultrule
            LuceneIndexItemRule checkDefaultRule = (LuceneIndexItemRule) rules.get(rules.size() - 1);
            if (!checkDefaultRule.getItemExpression().equals(LuceneIndexItemRule.EXPRESSION_WILDCARD)) {
                //last rule is no defaultRule, create defaultRule
                LuceneIndexItemRule.addDefaultRule(luceneDBConfig);
            }
        } else {
            //no rules present, create defaultRule
            LuceneIndexItemRule.addDefaultRule(luceneDBConfig);
        }
        // lucene file rules
        WGUtils.getOrCreateElement(luceneDBConfig, "filerules");
        //check for default filerule
        rules = (ArrayList) LuceneIndexFileRule.getRules(luceneDBConfig);
        if (rules.size() > 0) {
            //check if last rule is defaultrule
            LuceneIndexFileRule checkDefaultRule = (LuceneIndexFileRule) rules.get(rules.size() - 1);
            if (!checkDefaultRule.isDefaultRule()) {
                //last rule is no defaultRule, create defaultRule
                LuceneIndexFileRule.addDefaultRule(luceneDBConfig);
            }
        } else {
            //no rules present, create defaultRule
            LuceneIndexFileRule.addDefaultRule(luceneDBConfig);
        }

        // client restrictions
        Element clientRestrictions = WGUtils.getOrCreateElement(contentdb, "clientrestrictions");
        WGUtils.getOrCreateAttribute(clientRestrictions, "enabled", "false");
        WGUtils.getOrCreateElement(clientRestrictions, "restrictions");
    }

    // Personalisation dbs
    Element persodbs = WGUtils.getOrCreateElement(wga, "personalisationdbs");
    Iterator persodbTags = persodbs.selectNodes("personalisationdb").iterator();
    while (persodbTags.hasNext()) {
        Element persodb = (Element) persodbTags.next();
        WGUtils.getOrCreateAttribute(persodb, "enabled", "true");
        WGUtils.getOrCreateAttribute(persodb, "lazyconnect", "false");

        Element type = WGUtils.getOrCreateElement(persodb, "type");
        if (type.getStringValue().equals("de.innovationgate.webgate.api.domino.local.WGDatabaseImpl")) {
            type.setText("de.innovationgate.webgate.api.domino.WGDatabaseImpl");
        }

        Element domain = WGUtils.getOrCreateElement(persodb, "domain");
        String domainStr = domain.getTextTrim();
        if (domainStr.equals("")) {
            domainStr = "masterloginonly";
            domain.setText("masterloginonly");
        }
        usedDomains.add(domainStr);
        WGUtils.getOrCreateElement(persodb, "login");

        Element persConfig = WGUtils.getOrCreateElement(persodb, "persconfig");
        WGUtils.getOrCreateAttribute(persConfig, "mode", "auto");
        WGUtils.getOrCreateAttribute(persConfig, "statistics", "off");

        Element dboptions = WGUtils.getOrCreateElement(persodb, "dboptions");
        Iterator options = dboptions.selectNodes("option").iterator();
        Element option;
        String optionName;
        while (options.hasNext()) {
            option = (Element) options.next();
            optionName = option.attributeValue("name");
            if (optionName.indexOf(":") != -1) {
                option.addAttribute("name", optionName.substring(optionName.indexOf(":") + 1));
            }
        }

        WGUtils.getOrCreateElement(persodb, "publisheroptions");
    }

    //  **** Post-Processings **** 

    // Turn stored queries into CDATA-Sections
    List queries = doc.selectNodes("/wga/contentdbs/contentdb/storedqueries/storedquery/query");
    for (Iterator iter = queries.iterator(); iter.hasNext();) {
        Element query = (Element) iter.next();
        Node text = query.selectSingleNode("text()");
        if (text != null && text instanceof Text) {
            query.addCDATA(text.getText());
            query.remove(text);
        }
    }

    // Create domains from database definitions
    Iterator usedDomainsIt = usedDomains.iterator();
    String usedDomain;
    while (usedDomainsIt.hasNext()) {
        usedDomain = (String) usedDomainsIt.next();
        Element domain = (Element) domains.selectSingleNode("domain[@name='" + usedDomain + "']");
        if (domain == null) {
            domain = domains.addElement("domain");
            domain.addAttribute("name", usedDomain);
            Element login = domain.addElement("login");
            if (usedDomain.equals("masterloginonly")) {
                login.addAttribute("mode", "master");
            } else {
                login.addAttribute("mode", "user");
            }
            login.addAttribute("username", "");
            login.addAttribute("password", "");
            Element errorPage = domain.addElement("errorpage");
            errorPage.addAttribute("enabled", "false");
            Element defDBOptions = domain.addElement("defaultdboptions");
            Element defPublisherOptions = domain.addElement("defaultpublisheroptions");
        }
    }

    // Reorder content dbs, so design providers are first
    pullupDesignProviders(doc);
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.SemCorXMLReader.java

License:Apache License

@SuppressWarnings("unchecked")
private StringBuffer processSentences(JCas jCas, Element element, int offset, String idPrefix)
        throws CollectionException {
    StringBuffer sentenceText = new StringBuffer();
    for (Iterator<Element> sentenceIterator = element.elementIterator(ELEMENT_SENTENCE); sentenceIterator
            .hasNext();) {/*from ww  w  .ja  va2 s.  c  o m*/
        Element sentence = sentenceIterator.next();
        Sentence sentenceAnnotation = new Sentence(jCas);
        sentenceAnnotation.setBegin(offset);
        String sentenceId = sentence.attributeValue(ATTR_SNUM);
        int wordFormCount = 0;

        for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) {
            Node node = nodeIterator.next();
            String nodeText = node.getText().replace('\n', ' ');
            int oldOffset = offset;
            offset += nodeText.length();
            sentenceText.append(nodeText);

            if (node.getName() == null) {
                continue;
            }

            if (node.getName().equals(ELEMENT_PUNCTUATION)) {
                logger.trace("Found punctuation " + node.getText());
                continue;
            }

            if (node.getName().equals(ELEMENT_WORDFORM) == false) {
                throw new CollectionException("unknown_element", new Object[] { node.getName() });
            }

            // Find or construct a unique ID for this word form
            wordFormCount++;
            totalWordFormCount++;
            Element wordForm = (Element) node;
            String wordFormId = wordForm.attributeValue(ATTR_ID);
            if (wordFormId == null) {
                wordFormId = idPrefix + ".s" + sentenceId + ".w" + wordFormCount;
            }
            logger.trace("Found wf id: " + wordFormId);

            String lemma = wordForm.attributeValue(ATTR_LEMMA);
            String pos = wordForm.attributeValue(ATTR_POS);

            // write DKPro Core annotations Token, Lemma, and POS
            if (shouldWriteCoreAnnotations) {
                Lemma lemmaAnno = null;
                if (lemma != null) {
                    lemmaAnno = new Lemma(jCas, offset, oldOffset + nodeText.length());
                    lemmaAnno.setValue(lemma);
                    lemmaAnno.addToIndexes();
                }

                de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS posAnno = null;
                if (pos != null) {
                    Type posTag = mappingProvider.getTagType(pos);
                    posAnno = (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) jCas.getCas()
                            .createAnnotation(posTag, oldOffset, oldOffset + nodeText.length());
                    posAnno.setPosValue(pos);
                    posAnno.addToIndexes();
                }

                Token tokenAnno = new Token(jCas, oldOffset, oldOffset + nodeText.length());
                tokenAnno.setLemma(lemmaAnno);
                tokenAnno.setPos(posAnno);
                tokenAnno.addToIndexes();
            }

            // Skip <wf> elements which are not marked as "done"
            if (skipUndone == true && wordForm.attributeValue(ATTR_CMD).equals(VAL_DONE) == false) {
                logger.debug("Skipping wf " + wordFormId + ": not marked as 'done'");
                continue;
            }

            // Skip <wf> elements for which semantic tags could not be
            // assigned
            if (skipUnassignable == true && wordForm.attributeValue(ATTR_OT) != null) {
                logger.debug("Skipping wf " + wordFormId + ": ot=" + wordForm.attributeValue(ATTR_OT));
                continue;
            }

            // Find the number of valid sense tags for this word form.
            // Tags with a wnsn attribute value of "0"
            // (or "-1" according to some specifications) could not be
            // mapped and so are skipped.
            String wnsn = wordForm.attributeValue(ATTR_WNSN);
            if (skipWithoutWnsn == true && wnsn == null) {
                logger.debug("Skipping wf " + wordFormId + ": no wnsn");
                continue;
            }
            int totalValidWf = 0;
            String wnsns[] = wnsn.split(";");
            for (String s : wnsns) {
                if (isValidWnsn(s)) {
                    totalValidWf++;
                }
            }
            if (skipWithoutWnsn == true && totalValidWf == 0) {
                logger.debug("Skipping wf " + wordFormId + ": wnsn=" + wordForm.attributeValue(ATTR_WNSN));
                continue;
            }

            // Skip word forms without a lemma
            if (skipWithoutLemma == true && lemma == null) {
                logger.warn("Sipping wf " + wordFormId + ": no lemma");
                continue;
            }

            // Skip word forms without a POS
            if (skipWithoutPos == true && pos == null) {
                logger.warn("Skipping " + wordFormId + ": no pos");
                continue;
            }
            try {
                pos = semCorPosToPOS(pos).toString();
            } catch (IllegalArgumentException e) {
                logger.warn("Skipping wf " + wordFormId + ": unrecognized pos=" + pos);
                continue;
            }

            // Create the necessary WSDItem and LexicalItemConstituent
            // annotations for this word form
            LexicalItemConstituent c = newLexicalItemConstituent(jCas, wordFormId, ELEMENT_WORDFORM, oldOffset,
                    nodeText.length());
            WSDItem w = newWsdItem(jCas, wordFormId, oldOffset, nodeText.length(), pos, lemma);
            w.setConstituents(new FSArray(jCas, 1));
            w.setConstituents(0, c);

            // Get an array of sense tags. Sense tags are found
            // in the lexsn attribute and are separated with
            // semicolons. Sometimes the head_word field contains
            // a superfluous character in parentheses which must
            // be removed. (These quirks are not documented in
            // the SemCor file format specification.)
            String lexsns[] = wordForm.attributeValue(ATTR_LEXSN).replaceAll("\\(.\\)", "").split(";");
            FSArray senseArray = new FSArray(jCas, totalValidWf);
            int validWfCount = 0;
            for (int i = 0; i < lexsns.length; i++) {
                if (isValidWnsn(wnsns[i])) {
                    Sense sense = new Sense(jCas);
                    sense.setId(lemma + "%" + lexsns[i]);
                    sense.setConfidence(1.0);
                    sense.addToIndexes();
                    senseArray.set(validWfCount++, sense);
                }
            }

            WSDResult wsdResult = new WSDResult(jCas, oldOffset, oldOffset + nodeText.length());
            wsdResult.setWsdItem(w);
            wsdResult.setSenses(senseArray);
            wsdResult.setSenseInventory(senseInventory);
            wsdResult.setDisambiguationMethod(DISAMBIGUATION_METHOD_NAME);
            wsdResult.addToIndexes();
        }

        sentenceAnnotation.setEnd(offset);
        sentenceAnnotation.addToIndexes();
    }
    return sentenceText;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.WebCAGeXMLReader.java

License:Apache License

@SuppressWarnings("unchecked")
private StringBuffer processText(JCas jCas, Element text) throws CollectionException {
    StringBuffer documentText = new StringBuffer();
    int offset = 0;

    // Loop over all nodes to get the document text in order
    for (Iterator<Node> nodeIterator = text.nodeIterator(); nodeIterator.hasNext();) {

        Node node = nodeIterator.next();
        String nodeText = node.getText().replace('\n', ' ');
        String nodeName = node.getName();

        // TODO: For now we ignore satellites. We should add support for
        // them.// w  ww . j  a  v  a  2 s. c  o m
        if (nodeName == null || nodeName.equals(ELEMENT_SAT)) {
            offset += nodeText.length();
            documentText.append(nodeText);
            continue;
        }

        // If the node is a head, create a LexicalItemConstituent and a
        // WSDItem
        else if (nodeName.equals(ELEMENT_HEAD)) {
            Element head = (Element) node;
            String headId = head.attributeValue(ATTR_ID);
            String lemma = head.attributeValue(ATTR_LEMMA);

            logger.trace("Reading instance " + headId);

            // Skip word forms without a POS
            String pos = head.attributeValue(ATTR_POS);
            if (pos == null) {
                logger.warn("No POS provided for " + headId + "; skipping");
                continue;
            }
            try {
                pos = webCAGePosToPOS(pos).toString();
            } catch (IllegalArgumentException e) {
                logger.warn("Unrecognized POS " + pos + " provided for " + headId + "; skipping");
                continue;
            }

            // Create the necessary WSDItem and LexicalItemConstituent
            // annotations for this word form
            LexicalItemConstituent c = newLexicalItemConstituent(jCas, headId, ELEMENT_HEAD, offset,
                    nodeText.length());
            WSDItem w = newWsdItem(jCas, headId, offset, nodeText.length(), pos, lemma);
            w.setConstituents(new FSArray(jCas, 1));
            w.setConstituents(0, c);

            // Get an array of lexical unit IDs (LUIDs). LUIDs are found
            // in the luids attribute and are separated with
            // # characters.
            String luids[] = head.attributeValue(ATTR_LUIDS).split("#");
            FSArray senseArray = new FSArray(jCas, luids.length);
            for (int i = 0; i < luids.length; i++) {
                Sense sense = new Sense(jCas);
                sense.setId(luids[i].substring(1));
                sense.setConfidence(1.0);
                sense.addToIndexes();
                senseArray.set(i, sense);
            }

            WSDResult wsdResult = new WSDResult(jCas);
            wsdResult.setWsdItem(w);
            wsdResult.setSenses(senseArray);
            wsdResult.setSenseInventory(senseInventory);
            wsdResult.setDisambiguationMethod(DISAMBIGUATION_METHOD_NAME);
            wsdResult.addToIndexes();

        }

        // If the node is any other element, something is wrong
        else if (node.getNodeTypeName().equals("Entity") == false) {
            throw new CollectionException("unknown_element", new Object[] { node.getName() });
        }

        offset += nodeText.length();
        documentText.append(nodeText);
    }
    return documentText;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Semeval1AWReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from w w w  .  j  a  va2  s  .  com*/
public void getNext(JCas jCas) throws IOException, CollectionException {
    int offset = 0, numSentences = 0;
    String s = "";
    Element text = textIterator.next();

    for (Iterator<Element> sentenceIterator = text.elementIterator(SENTENCE_ELEMENT_NAME); sentenceIterator
            .hasNext();) {

        Element sentence = sentenceIterator.next();
        Sentence sentenceAnnotation = new Sentence(jCas);
        sentenceAnnotation.setBegin(offset);

        for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) {

            Node node = nodeIterator.next();
            String nodeText = node.getText().replace('\n', ' ');

            // If the node is a head, create a LexicalItemConstituent and a
            // WSDItem
            if (node.getName() != null && node.getName().equals(HEAD_ELEMENT_NAME)) {
                Element head = (Element) node;
                String id = head.attributeValue(ID_ATTRIBUTE_NAME);

                LexicalItemConstituent c = newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset,
                        nodeText.length());
                WSDItem w = newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(),
                        head.attributeValue(POS_ATTRIBUTE_NAME), head.attributeValue(LEMMA_ATTRIBUTE_NAME));
                w.setConstituents(new FSArray(jCas, 1));
                w.setConstituents(0, c);
            } else if (node.getName() != null) {
                throw new CollectionException("unknown_element", new Object[] { node.getName() });
            }

            offset += nodeText.length();
            s += nodeText;
        }

        sentenceAnnotation.setEnd(offset);
        sentenceAnnotation.addToIndexes();
        numSentences++;
    }

    // The Semeval-1 DTD requires each text to have at least one sentence
    if (numSentences == 0) {
        throw new CollectionException("element_not_found",
                new Object[] { SENTENCE_ELEMENT_NAME, TEXT_ELEMENT_NAME });
    }

    jCas.setDocumentText(s);

    try {
        setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME));
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

    textCount++;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Semeval2AWReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w  ww.j  a v  a  2s.  c o m
public void getNext(JCas jCas) throws IOException, CollectionException {
    int offset = 0;
    String s = "";
    Element text = textIterator.next();

    for (Iterator<Element> sentenceIterator = text.elementIterator(SENTENCE_ELEMENT_NAME); sentenceIterator
            .hasNext();) {

        Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>();
        Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>();
        Map<String, String> sats = new HashMap<String, String>();
        Element sentence = sentenceIterator.next();
        Sentence sentenceAnnotation = new Sentence(jCas);
        sentenceAnnotation.setBegin(offset);

        // Loop over all nodes to get the document text in order
        for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) {

            Node node = nodeIterator.next();
            String nodeText = node.getText().replace('\n', ' ');
            String nodeName = node.getName();

            if (nodeName == null) {
                offset += nodeText.length();
                s += nodeText;
                continue;
            }

            // If the node is a satellite, create a LexicalItemConstituent
            if (nodeName.equals(SATELLITE_ELEMENT_NAME)) {
                String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME);
                lics.put(id,
                        newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length()));
            }

            // If the node is a head, create a LexicalItemConstituent and a WSDItem
            else if (nodeName.equals(HEAD_ELEMENT_NAME)) {
                Element head = (Element) node;
                String id = head.attributeValue(ID_ATTRIBUTE_NAME);
                String satellites = head.attributeValue(SATELLITES_ATTRIBUTE_NAME);

                lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length()));
                wsdItems.put(id, newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(),
                        head.attributeValue(POS_ATTRIBUTE_NAME), head.attributeValue(LEMMA_ATTRIBUTE_NAME)));

                if (satellites != null)
                    sats.put(id, satellites);
            }

            // If the node is any other element, something is wrong
            else if (node.getNodeTypeName().equals("Entity") == false) {
                throw new CollectionException("unknown_element", new Object[] { node.getName() });
            }

            offset += nodeText.length();
            s += nodeText;
        }

        // Add a sentence annotation
        sentenceAnnotation.setEnd(offset);
        sentenceAnnotation.addToIndexes();

        populateLexicalItemConstituents(jCas, wsdItems, lics, sats);
    }

    jCas.setDocumentText(s);

    try {
        setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME));
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

    textCount++;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2AWReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from ww w  .java  2 s  .c  o  m
public void getNext(JCas jCas) throws IOException, CollectionException {
    int offset = 0;
    String s = "";
    Element text = textIterator.next();

    Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>();
    Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>();
    Map<String, String> sats = new HashMap<String, String>();

    // Loop over all nodes to get the document text in order
    for (Iterator<Node> nodeIterator = text.nodeIterator(); nodeIterator.hasNext();) {

        Node node = nodeIterator.next();
        String nodeText = node.getText().replace('\n', ' ');
        String nodeName = node.getName();

        if (nodeName == null) {
            offset += nodeText.length();
            s += nodeText;
            continue;
        }

        // If the node is a satellite, create a LexicalItemConstituent
        if (nodeName.equals(SATELLITE_ELEMENT_NAME)) {
            String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME);
            LexicalItemConstituent lic = newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset,
                    nodeText.length());
            lics.put(id, lic);
        }

        // If the node is a head, create a LexicalItemConstituent and a WSDItem
        else if (nodeName.equals(HEAD_ELEMENT_NAME)) {
            Element head = (Element) node;
            String id = head.attributeValue(ID_ATTRIBUTE_NAME);
            String satellites = head.attributeValue(SATELLITES_ATTRIBUTE_NAME);

            lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length()));
            WSDItem wsdItem = newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), null, nodeText);
            wsdItems.put(id, wsdItem);

            if (satellites != null) {
                sats.put(id, satellites);
            }
        }

        // If the node is any other element, something is wrong
        else if (node.getNodeTypeName().equals("Entity") == false) {
            throw new CollectionException("unknown_element", new Object[] { node.getName() });
        }

        offset += nodeText.length();
        s += nodeText;
    }

    populateLexicalItemConstituents(jCas, wsdItems, lics, sats);

    jCas.setDocumentText(s);

    try {
        setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME));
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

    textCount++;
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2LSReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/* w ww  .  j a v a2 s  .c  o  m*/
public void getNext(JCas jCas) throws IOException, CollectionException {
    // If there are no more <instance>s in this <lexelt>, get the next
    // <lexelt>
    if (instanceIterator.hasNext() == false) {
        lexelt = lexeltIterator.next();
        lexeltPOS = getLexeltPOS(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME));
        lexeltLemma = getLexeltLemma(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME));
        textCount++;
        instanceIterator = lexelt.elementIterator(INSTANCE_ELEMENT_NAME);
    }

    Element instance = instanceIterator.next();
    Element context = instance.element(CONTEXT_ELEMENT_NAME);
    int offset = 0;
    String s = "";
    Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>();
    Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>();
    Map<String, String> sats = new HashMap<String, String>();

    // Loop over all nodes to get the document text in order
    for (Iterator<Node> nodeIterator = context.nodeIterator(); nodeIterator.hasNext();) {

        Node node = nodeIterator.next();
        String nodeText = node.getText().replace('\n', ' ');
        String nodeName = node.getName();

        if (nodeName == null) {
            offset += nodeText.length();
            s += nodeText;
            continue;
        }

        // If the node is a satellite, create a LexicalItemConstituent
        if (nodeName.equals(SATELLITE_ELEMENT_NAME)) {
            String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME);
            lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length()));
        }

        // If the node is a head, create a LexicalItemConstituent and a
        // WSDItem
        else if (nodeName.equals(HEAD_ELEMENT_NAME)) {
            String id = instance.attributeValue(ID_ATTRIBUTE_NAME);
            String satellites = ((Element) node).attributeValue(SATELLITES_ATTRIBUTE_NAME);

            lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length()));
            wsdItems.put(id,
                    newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), lexeltPOS, lexeltLemma));

            if (satellites != null) {
                sats.put(id, satellites);
            }
        }

        // If the node is any other element, something is wrong
        else if (node.getNodeTypeName().equals("Entity") == false) {
            throw new CollectionException("unknown_element", new Object[] { node.getName() });
        }

        offset += nodeText.length();
        s += nodeText;
    }

    populateLexicalItemConstituents(jCas, wsdItems, lics, sats);

    jCas.setDocumentText(s);

    try {
        setDocumentMetadata(jCas, instance.attributeValue(ID_ATTRIBUTE_NAME));
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

}

From source file:dk.dma.nogoservice.ExtractBagData.java

License:Apache License

@SneakyThrows(DocumentException.class)
private GridData readMetaData(String bagFile) {
    File xmlFile = new File(bagFile + ".xml");
    if (!xmlFile.exists()) {
        throw new ExitCodeException(NO_XML_FILE);
    }/*ww  w .  ja v a  2s .c o  m*/

    SAXReader reader = new SAXReader();
    Document document = reader.read(xmlFile);

    // we use fully qualified XPath, as we would like to fail if the format change
    Node coordinateBox = document.selectSingleNode(
            "/gmi:MI_Metadata/gmd:identificationInfo/bag:BAG_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox");
    Node westLon = coordinateBox.selectSingleNode("gmd:westBoundLongitude/gco:Decimal");
    Node eastLon = coordinateBox.selectSingleNode("gmd:eastBoundLongitude/gco:Decimal");
    Node southLat = coordinateBox.selectSingleNode("gmd:southBoundLatitude/gco:Decimal");
    Node northLat = coordinateBox.selectSingleNode("gmd:northBoundLatitude/gco:Decimal");

    Node columnNode = document.selectSingleNode(
            "/gmi:MI_Metadata/gmd:spatialRepresentationInfo/gmd:MD_Georectified/gmd:axisDimensionProperties/gmd:MD_Dimension/gmd:dimensionName/gmd:MD_DimensionNameTypeCode[@codeListValue=\"column\"]/../../gmd:dimensionSize/gco:Integer");
    Node rowNode = document.selectSingleNode(
            "/gmi:MI_Metadata/gmd:spatialRepresentationInfo/gmd:MD_Georectified/gmd:axisDimensionProperties/gmd:MD_Dimension/gmd:dimensionName/gmd:MD_DimensionNameTypeCode[@codeListValue=\"row\"]/../../gmd:dimensionSize/gco:Integer");

    GridData gridData = new GridData();
    gridData.setLo1(Float.parseFloat(westLon.getText()));
    gridData.setLo2(Float.parseFloat(eastLon.getText()));
    gridData.setLa1(Float.parseFloat(southLat.getText()));
    gridData.setLa2(Float.parseFloat(northLat.getText()));
    gridData.setNx(Integer.parseInt(columnNode.getText()));
    gridData.setNy(Integer.parseInt(rowNode.getText()));

    return gridData;
}

From source file:dk.netarkivet.common.utils.SimpleXml.java

License:Open Source License

/**
 * Get list of all items matching the key. If no items exist matching the key, an empty list is returned.
 *
 * @param key the path down to elements to get
 * @return a list of items that match the supplied key
 *///from   www  . j  a  v a 2  s.c  o  m
public List<String> getList(String key) {
    ArgumentNotValid.checkNotNullOrEmpty(key, "key");

    List<Node> nodes = (List<Node>) getXPath(key).selectNodes(xmlDoc);
    if (nodes == null || nodes.size() == 0) {
        return Collections.emptyList();
    }
    List<String> results = new ArrayList<String>(nodes.size());
    for (Node node : nodes) {
        results.add(node.getText());
    }
    return results;
}