List of usage examples for org.dom4j Node getText
String getText();
Returns the text of this node.
From source file:de.fct.companian.analyze.mvn.helper.PomHelper.java
License:Apache License
private PomInfo extractParentPom() throws DocumentException { PomInfo pomInfo = null;/* www .j av a2 s.c o m*/ // Schaue nach Parent POM Node parentArtifactIdNode = this.selectSingleNode("/mvn:project/mvn:parent/mvn:artifactId"); Node parentGroupIdNode = this.selectSingleNode("/mvn:project/mvn:parent/mvn:groupId"); Node parentVersionNode = this.selectSingleNode("/mvn:project/mvn:parent/mvn:version"); if (parentArtifactIdNode != null && parentGroupIdNode != null && parentVersionNode != null) { String parentArtifactId = parentArtifactIdNode.getText(); String parentGroupId = parentGroupIdNode.getText(); String parentVersion = parentVersionNode.getText(); String userHome = System.getProperty("user.home"); String parentPomName = userHome + "/.m2/repository/" + makePath(parentGroupId) + "/" + parentArtifactId + "/" + parentVersion + "/" + parentArtifactId + "-" + parentVersion + ".pom"; if (logger.isInfoEnabled()) { logger.info("extractParentPom() looking for parent POM at " + parentPomName); } File parentPom = new File(parentPomName); if (parentPom.exists() && parentPom.canRead()) { if (logger.isDebugEnabled()) { logger.debug("extractParentPom() parent POM found - starting new extraction round"); } PomHelper parentHelper = new PomHelper(parentPom); pomInfo = parentHelper.extractPomInfo(); } else { if (logger.isInfoEnabled()) { logger.info("extractParentPom() either the parent POM " + parentPomName + " does not exist or can't be read"); } } } return pomInfo; }
From source file:de.innovationgate.wga.common.WGAXML.java
License:Apache License
/** * Performs normalization on the wga.xml by creating mandatory elements and attributes and doing some * additional validations, like converting obsolete structures, defining yet undefined domains etc. * @param doc The wga.xml/*from w ww . j a va2 s.c om*/ */ public static void normalize(Document doc) { // Remove obsolete namespace String ns = "urn:de.innovationgate.webgate.api.query.domino.WGDatabaseImpl"; Iterator nodes = doc.selectNodes("//*[namespace-uri(.)='" + ns + "']").iterator(); Element element; while (nodes.hasNext()) { element = (Element) nodes.next(); element.setQName(QName.get(element.getName())); } // Build necessary elements Element wga = (Element) doc.selectSingleNode("wga"); // Licenses Element licenses = WGUtils.getOrCreateElement(wga, "licenses"); Iterator licenseTags = licenses.elements("authorlicense").iterator(); while (licenseTags.hasNext()) { Element licenseTag = (Element) licenseTags.next(); //WGUtils.getOrCreateAttribute(licenseTag, "type", "WGA.Client"); // B0000486E licenseTag.addAttribute("type", "WGA.Client"); } // administrators WGUtils.getOrCreateElement(wga, "administrators"); // configuration Element configuration = WGUtils.getOrCreateElement(wga, "configuration"); Element defaultdb = WGUtils.getOrCreateElement(configuration, "defaultdb"); WGUtils.getOrCreateAttribute(defaultdb, "key", ""); WGUtils.getOrCreateAttribute(defaultdb, "favicon", ""); WGUtils.getOrCreateAttribute(defaultdb, "datacache", "10000"); WGUtils.getOrCreateAttribute(defaultdb, "staticexpiration", "10"); Element features = WGUtils.getOrCreateElement(configuration, "features"); WGUtils.getOrCreateAttribute(features, "bi", "true"); WGUtils.getOrCreateAttribute(features, "adminpage", "true"); WGUtils.getOrCreateAttribute(features, "manager", "true"); WGUtils.getOrCreateAttribute(features, "startpage", "true"); WGUtils.getOrCreateAttribute(features, "webdav", "true"); WGUtils.getOrCreateAttribute(features, "webservice", "true"); WGUtils.getOrCreateAttribute(features, "adminport", ""); WGUtils.getOrCreateAttribute(features, "authoringport", ""); WGUtils.getOrCreateAttribute(features, "clusterport", ""); Element warnings = WGUtils.getOrCreateElement(configuration, "warnings"); WGUtils.getOrCreateAttribute(warnings, "enabled", "true"); WGUtils.getOrCreateAttribute(warnings, "consoleOutput", "false"); WGUtils.getOrCreateAttribute(warnings, "pageOutput", "true"); Element tml = WGUtils.getOrCreateElement(configuration, "tml"); WGUtils.getOrCreateAttribute(tml, "characterEncoding", ""); WGUtils.getOrCreateAttribute(tml, "linkEncoding", "UTF-8"); Element tmlheader = WGUtils.getOrCreateElement(tml, "tmlheader"); WGUtils.getOrCreateAttribute(tmlheader, "buffer", "8kb"); Element authoringconfig = WGUtils.getOrCreateElement(configuration, "authoringconfig"); WGUtils.getOrCreateAttribute(authoringconfig, "dbfile", ""); Element applog = WGUtils.getOrCreateElement(configuration, "applog"); WGUtils.getOrCreateAttribute(applog, "level", "INFO"); WGUtils.getOrCreateAttribute(applog, "logserver", "false"); Element compression = WGUtils.getOrCreateElement(configuration, "compression"); WGUtils.getOrCreateAttribute(compression, "enabled", "false"); Element listeners = WGUtils.getOrCreateElement(configuration, "listeners"); Element lucene = WGUtils.getOrCreateElement(configuration, "lucene"); WGUtils.getOrCreateAttribute(lucene, "dir", ""); WGUtils.getOrCreateAttribute(lucene, "enabled", "false"); WGUtils.getOrCreateAttribute(lucene, "booleanQueryMaxClauseCount", "1024"); WGUtils.getOrCreateAttribute(lucene, "maxDocsPerDBSession", "50"); // read old lucene enabled dbs Attribute dbs = WGUtils.getOrCreateAttribute(lucene, "dbs", ""); List oldLuceneEnabledDBKeys = WGUtils.deserializeCollection(dbs.getText(), ","); // remove old attribute for lucene enabled dbs lucene.remove(dbs); Element persoconfig = WGUtils.getOrCreateElement(configuration, "personalisation"); // Element for TestCore - config Element testcore = WGUtils.getOrCreateElement(configuration, "testcore"); WGUtils.getOrCreateAttribute(testcore, "dir", ""); WGUtils.getOrCreateAttribute(testcore, "enabled", "false"); Element design = WGUtils.getOrCreateElement(configuration, "designsync"); WGUtils.getOrCreateAttribute(design, "fileEncoding", ""); WGUtils.getOrCreateAttribute(design, "interval", "1"); WGUtils.getOrCreateAttribute(design, "throttling", "false"); WGUtils.getOrCreateAttribute(design, "throttlingactivation", "10"); Element jdbcDrivers = WGUtils.getOrCreateElement(configuration, "jdbcdrivers"); WGUtils.getOrCreateElement(configuration, "defaultdboptions"); WGUtils.getOrCreateElement(configuration, "defaultpublisheroptions"); Element mailConfig = WGUtils.getOrCreateElement(configuration, "mailconfig"); WGUtils.getOrCreateAttribute(mailConfig, "mailHost", ""); WGUtils.getOrCreateAttribute(mailConfig, "mailUser", ""); WGUtils.getOrCreateAttribute(mailConfig, "mailPassword", ""); WGUtils.getOrCreateAttribute(mailConfig, "mailFrom", ""); WGUtils.getOrCreateAttribute(mailConfig, "mailTo", ""); WGUtils.getOrCreateAttribute(mailConfig, "mailWGARootURL", ""); WGUtils.getOrCreateAttribute(mailConfig, "useAsDefaultForWF", "false"); WGUtils.getOrCreateAttribute(mailConfig, "enableAdminNotifications", "true"); // Mappings Element mappings = WGUtils.getOrCreateElement(wga, "mappings"); Attribute mappingLibraries = WGUtils.getOrCreateAttribute(mappings, "libraries", ""); Element elementmappings = WGUtils.getOrCreateElement(mappings, "elementmappings"); if (elementmappings.attribute("libraries") != null && mappingLibraries.getText().equals("")) { mappingLibraries.setText(elementmappings.attributeValue("libraries", "")); elementmappings.remove(elementmappings.attribute("libraries")); } List elementsToRemove = new ArrayList(); Iterator elementmappingTags = elementmappings.selectNodes("elementmapping").iterator(); while (elementmappingTags.hasNext()) { Element elementmapping = (Element) elementmappingTags.next(); if (elementmapping.attribute("binary") != null) { elementmapping.remove(elementmapping.attribute("binary")); } // remove old FOP implementation reference (F000040EE) String implClass = elementmapping.attributeValue("class", null); if (implClass != null && implClass.equals("de.innovationgate.wgpublisher.webtml.elements.FOP")) { elementsToRemove.add(elementmapping); } } Iterator toRemove = elementsToRemove.iterator(); while (toRemove.hasNext()) { Element elementmapping = (Element) toRemove.next(); elementmappings.remove(elementmapping); } Element mediamappings = WGUtils.getOrCreateElement(mappings, "mediamappings"); Iterator mediamappingTags = mediamappings.selectNodes("mediamapping").iterator(); while (mediamappingTags.hasNext()) { Element mediamapping = (Element) mediamappingTags.next(); WGUtils.getOrCreateAttribute(mediamapping, "binary", "false"); WGUtils.getOrCreateAttribute(mediamapping, "httplogin", "false"); } WGUtils.getOrCreateElement(mappings, "encodermappings"); WGUtils.getOrCreateElement(mappings, "syncmappings"); Element analyzermappings = WGUtils.getOrCreateElement(mappings, "analyzermappings"); WGUtils.getOrCreateAttribute(analyzermappings, "defaultAnalyzerClass", "de.innovationgate.wgpublisher.lucene.analysis.StandardAnalyzer"); removeDefaultFileHandlerMappings(WGUtils.getOrCreateElement(mappings, "filehandlermappings")); WGUtils.getOrCreateElement(mappings, "filtermappings"); Element scheduler = WGUtils.getOrCreateElement(wga, "scheduler"); WGUtils.getOrCreateAttribute(scheduler, "loggingdir", ""); // Domains Element domains = WGUtils.getOrCreateElement(wga, "domains"); Iterator domainsIt = domains.elementIterator("domain"); while (domainsIt.hasNext()) { Element domain = (Element) domainsIt.next(); WGUtils.getOrCreateAttribute(domain, "name", ""); WGUtils.getOrCreateAttribute(domain, "loginattempts", "5"); WGUtils.getOrCreateAttribute(domain, "defaultmanager", ""); Element login = WGUtils.getOrCreateElement(domain, "login"); WGUtils.getOrCreateAttribute(login, "mode", "user"); WGUtils.getOrCreateAttribute(login, "username", ""); WGUtils.getOrCreateAttribute(login, "password", ""); Element errorpage = WGUtils.getOrCreateElement(domain, "errorpage"); WGUtils.getOrCreateAttribute(errorpage, "enabled", "false"); WGUtils.getOrCreateElement(domain, "defaultdboptions"); WGUtils.getOrCreateElement(domain, "defaultpublisheroptions"); } // content dbs Element contentdbs = WGUtils.getOrCreateElement(wga, "contentdbs"); Iterator contentdbTags = contentdbs.selectNodes("contentdb").iterator(); Set usedDomains = new HashSet(); while (contentdbTags.hasNext()) { Element contentdb = (Element) contentdbTags.next(); WGUtils.getOrCreateAttribute(contentdb, "enabled", "true"); WGUtils.getOrCreateAttribute(contentdb, "lazyconnect", "false"); Element type = WGUtils.getOrCreateElement(contentdb, "type"); String typeName = type.getStringValue(); if (typeName.equals("de.innovationgate.webgate.api.domino.local.WGDatabaseImpl")) { type.setText("de.innovationgate.webgate.api.domino.WGDatabaseImpl"); } boolean isFullContentStore = false; DbType dbType = DbType.getByImplClass(DbType.GENTYPE_CONTENT, typeName); if (dbType != null) { isFullContentStore = dbType.isFullContentStore(); } //lowercase dbkey Element dbkey = WGUtils.getOrCreateElement(contentdb, "dbkey"); dbkey.setText(dbkey.getText().trim().toLowerCase()); WGUtils.getOrCreateElement(contentdb, "title"); Element domain = WGUtils.getOrCreateElement(contentdb, "domain"); String domainStr = domain.getTextTrim(); if (domainStr.equals("")) { domainStr = "masterloginonly"; domain.setText("masterloginonly"); } usedDomains.add(domainStr); WGUtils.getOrCreateElement(contentdb, "login"); Element dboptions = WGUtils.getOrCreateElement(contentdb, "dboptions"); Iterator options = dboptions.selectNodes("option").iterator(); Element option; String optionName; while (options.hasNext()) { option = (Element) options.next(); optionName = option.attributeValue("name"); if (optionName.indexOf(":") != -1) { option.addAttribute("name", optionName.substring(optionName.indexOf(":") + 1)); } } WGUtils.getOrCreateElement(contentdb, "publisheroptions"); WGUtils.getOrCreateElement(contentdb, "storedqueries"); WGUtils.getOrCreateElement(contentdb, "fieldmappings"); if (isFullContentStore) { WGUtils.getOrCreateElement(contentdb, "shares"); } else { if (contentdb.element("shares") != null) { contentdb.remove(contentdb.element("shares")); } } Element cache = WGUtils.getOrCreateElement(contentdb, "cache"); WGUtils.getOrCreateAttribute(cache, "type", "de.innovationgate.wgpublisher.cache.WGACacheHSQLDB"); WGUtils.getOrCreateAttribute(cache, "path", ""); WGUtils.getOrCreateAttribute(cache, "maxpages", "5000"); // Design - Migrate old designsync element Element designsync = contentdb.element("designsync"); design = contentdb.element("design"); if (designsync != null && design == null) { design = contentdb.addElement("design"); if (designsync.attributeValue("enabled", "false").equals("true")) { design.addAttribute("provider", "sync"); } else { design.addAttribute("provider", "none"); } design.addAttribute("mode", designsync.attributeValue("mode", "")); design.addAttribute("key", designsync.attributeValue("key", "")); design.setText(designsync.getText()); } else { design = WGUtils.getOrCreateElement(contentdb, "design"); WGUtils.getOrCreateAttribute(design, "provider", "none"); WGUtils.getOrCreateAttribute(design, "mode", ""); WGUtils.getOrCreateAttribute(design, "key", ""); } // create default lucene config for old enabled dbs if (oldLuceneEnabledDBKeys.contains(dbkey.getText().toLowerCase())) { Element luceneDBConfig = WGUtils.getOrCreateElement(contentdb, "lucene"); WGUtils.getOrCreateAttribute(luceneDBConfig, "enabled", "true"); WGUtils.getOrCreateElement(luceneDBConfig, "itemrules"); // create defaultrule LuceneIndexItemRule.addDefaultRule(luceneDBConfig); } //lucene config per db Element luceneDBConfig = WGUtils.getOrCreateElement(contentdb, "lucene"); WGUtils.getOrCreateAttribute(luceneDBConfig, "enabled", "false"); WGUtils.getOrCreateElement(luceneDBConfig, "itemrules"); //check for default rule ArrayList rules = (ArrayList) LuceneIndexItemRule.getRules(luceneDBConfig); if (rules.size() > 0) { //check if last rule is defaultrule LuceneIndexItemRule checkDefaultRule = (LuceneIndexItemRule) rules.get(rules.size() - 1); if (!checkDefaultRule.getItemExpression().equals(LuceneIndexItemRule.EXPRESSION_WILDCARD)) { //last rule is no defaultRule, create defaultRule LuceneIndexItemRule.addDefaultRule(luceneDBConfig); } } else { //no rules present, create defaultRule LuceneIndexItemRule.addDefaultRule(luceneDBConfig); } // lucene file rules WGUtils.getOrCreateElement(luceneDBConfig, "filerules"); //check for default filerule rules = (ArrayList) LuceneIndexFileRule.getRules(luceneDBConfig); if (rules.size() > 0) { //check if last rule is defaultrule LuceneIndexFileRule checkDefaultRule = (LuceneIndexFileRule) rules.get(rules.size() - 1); if (!checkDefaultRule.isDefaultRule()) { //last rule is no defaultRule, create defaultRule LuceneIndexFileRule.addDefaultRule(luceneDBConfig); } } else { //no rules present, create defaultRule LuceneIndexFileRule.addDefaultRule(luceneDBConfig); } // client restrictions Element clientRestrictions = WGUtils.getOrCreateElement(contentdb, "clientrestrictions"); WGUtils.getOrCreateAttribute(clientRestrictions, "enabled", "false"); WGUtils.getOrCreateElement(clientRestrictions, "restrictions"); } // Personalisation dbs Element persodbs = WGUtils.getOrCreateElement(wga, "personalisationdbs"); Iterator persodbTags = persodbs.selectNodes("personalisationdb").iterator(); while (persodbTags.hasNext()) { Element persodb = (Element) persodbTags.next(); WGUtils.getOrCreateAttribute(persodb, "enabled", "true"); WGUtils.getOrCreateAttribute(persodb, "lazyconnect", "false"); Element type = WGUtils.getOrCreateElement(persodb, "type"); if (type.getStringValue().equals("de.innovationgate.webgate.api.domino.local.WGDatabaseImpl")) { type.setText("de.innovationgate.webgate.api.domino.WGDatabaseImpl"); } Element domain = WGUtils.getOrCreateElement(persodb, "domain"); String domainStr = domain.getTextTrim(); if (domainStr.equals("")) { domainStr = "masterloginonly"; domain.setText("masterloginonly"); } usedDomains.add(domainStr); WGUtils.getOrCreateElement(persodb, "login"); Element persConfig = WGUtils.getOrCreateElement(persodb, "persconfig"); WGUtils.getOrCreateAttribute(persConfig, "mode", "auto"); WGUtils.getOrCreateAttribute(persConfig, "statistics", "off"); Element dboptions = WGUtils.getOrCreateElement(persodb, "dboptions"); Iterator options = dboptions.selectNodes("option").iterator(); Element option; String optionName; while (options.hasNext()) { option = (Element) options.next(); optionName = option.attributeValue("name"); if (optionName.indexOf(":") != -1) { option.addAttribute("name", optionName.substring(optionName.indexOf(":") + 1)); } } WGUtils.getOrCreateElement(persodb, "publisheroptions"); } // **** Post-Processings **** // Turn stored queries into CDATA-Sections List queries = doc.selectNodes("/wga/contentdbs/contentdb/storedqueries/storedquery/query"); for (Iterator iter = queries.iterator(); iter.hasNext();) { Element query = (Element) iter.next(); Node text = query.selectSingleNode("text()"); if (text != null && text instanceof Text) { query.addCDATA(text.getText()); query.remove(text); } } // Create domains from database definitions Iterator usedDomainsIt = usedDomains.iterator(); String usedDomain; while (usedDomainsIt.hasNext()) { usedDomain = (String) usedDomainsIt.next(); Element domain = (Element) domains.selectSingleNode("domain[@name='" + usedDomain + "']"); if (domain == null) { domain = domains.addElement("domain"); domain.addAttribute("name", usedDomain); Element login = domain.addElement("login"); if (usedDomain.equals("masterloginonly")) { login.addAttribute("mode", "master"); } else { login.addAttribute("mode", "user"); } login.addAttribute("username", ""); login.addAttribute("password", ""); Element errorPage = domain.addElement("errorpage"); errorPage.addAttribute("enabled", "false"); Element defDBOptions = domain.addElement("defaultdboptions"); Element defPublisherOptions = domain.addElement("defaultpublisheroptions"); } } // Reorder content dbs, so design providers are first pullupDesignProviders(doc); }
From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.SemCorXMLReader.java
License:Apache License
@SuppressWarnings("unchecked") private StringBuffer processSentences(JCas jCas, Element element, int offset, String idPrefix) throws CollectionException { StringBuffer sentenceText = new StringBuffer(); for (Iterator<Element> sentenceIterator = element.elementIterator(ELEMENT_SENTENCE); sentenceIterator .hasNext();) {/*from ww w .ja va2 s. c o m*/ Element sentence = sentenceIterator.next(); Sentence sentenceAnnotation = new Sentence(jCas); sentenceAnnotation.setBegin(offset); String sentenceId = sentence.attributeValue(ATTR_SNUM); int wordFormCount = 0; for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); int oldOffset = offset; offset += nodeText.length(); sentenceText.append(nodeText); if (node.getName() == null) { continue; } if (node.getName().equals(ELEMENT_PUNCTUATION)) { logger.trace("Found punctuation " + node.getText()); continue; } if (node.getName().equals(ELEMENT_WORDFORM) == false) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } // Find or construct a unique ID for this word form wordFormCount++; totalWordFormCount++; Element wordForm = (Element) node; String wordFormId = wordForm.attributeValue(ATTR_ID); if (wordFormId == null) { wordFormId = idPrefix + ".s" + sentenceId + ".w" + wordFormCount; } logger.trace("Found wf id: " + wordFormId); String lemma = wordForm.attributeValue(ATTR_LEMMA); String pos = wordForm.attributeValue(ATTR_POS); // write DKPro Core annotations Token, Lemma, and POS if (shouldWriteCoreAnnotations) { Lemma lemmaAnno = null; if (lemma != null) { lemmaAnno = new Lemma(jCas, offset, oldOffset + nodeText.length()); lemmaAnno.setValue(lemma); lemmaAnno.addToIndexes(); } de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS posAnno = null; if (pos != null) { Type posTag = mappingProvider.getTagType(pos); posAnno = (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) jCas.getCas() .createAnnotation(posTag, oldOffset, oldOffset + nodeText.length()); posAnno.setPosValue(pos); posAnno.addToIndexes(); } Token tokenAnno = new Token(jCas, oldOffset, oldOffset + nodeText.length()); tokenAnno.setLemma(lemmaAnno); tokenAnno.setPos(posAnno); tokenAnno.addToIndexes(); } // Skip <wf> elements which are not marked as "done" if (skipUndone == true && wordForm.attributeValue(ATTR_CMD).equals(VAL_DONE) == false) { logger.debug("Skipping wf " + wordFormId + ": not marked as 'done'"); continue; } // Skip <wf> elements for which semantic tags could not be // assigned if (skipUnassignable == true && wordForm.attributeValue(ATTR_OT) != null) { logger.debug("Skipping wf " + wordFormId + ": ot=" + wordForm.attributeValue(ATTR_OT)); continue; } // Find the number of valid sense tags for this word form. // Tags with a wnsn attribute value of "0" // (or "-1" according to some specifications) could not be // mapped and so are skipped. String wnsn = wordForm.attributeValue(ATTR_WNSN); if (skipWithoutWnsn == true && wnsn == null) { logger.debug("Skipping wf " + wordFormId + ": no wnsn"); continue; } int totalValidWf = 0; String wnsns[] = wnsn.split(";"); for (String s : wnsns) { if (isValidWnsn(s)) { totalValidWf++; } } if (skipWithoutWnsn == true && totalValidWf == 0) { logger.debug("Skipping wf " + wordFormId + ": wnsn=" + wordForm.attributeValue(ATTR_WNSN)); continue; } // Skip word forms without a lemma if (skipWithoutLemma == true && lemma == null) { logger.warn("Sipping wf " + wordFormId + ": no lemma"); continue; } // Skip word forms without a POS if (skipWithoutPos == true && pos == null) { logger.warn("Skipping " + wordFormId + ": no pos"); continue; } try { pos = semCorPosToPOS(pos).toString(); } catch (IllegalArgumentException e) { logger.warn("Skipping wf " + wordFormId + ": unrecognized pos=" + pos); continue; } // Create the necessary WSDItem and LexicalItemConstituent // annotations for this word form LexicalItemConstituent c = newLexicalItemConstituent(jCas, wordFormId, ELEMENT_WORDFORM, oldOffset, nodeText.length()); WSDItem w = newWsdItem(jCas, wordFormId, oldOffset, nodeText.length(), pos, lemma); w.setConstituents(new FSArray(jCas, 1)); w.setConstituents(0, c); // Get an array of sense tags. Sense tags are found // in the lexsn attribute and are separated with // semicolons. Sometimes the head_word field contains // a superfluous character in parentheses which must // be removed. (These quirks are not documented in // the SemCor file format specification.) String lexsns[] = wordForm.attributeValue(ATTR_LEXSN).replaceAll("\\(.\\)", "").split(";"); FSArray senseArray = new FSArray(jCas, totalValidWf); int validWfCount = 0; for (int i = 0; i < lexsns.length; i++) { if (isValidWnsn(wnsns[i])) { Sense sense = new Sense(jCas); sense.setId(lemma + "%" + lexsns[i]); sense.setConfidence(1.0); sense.addToIndexes(); senseArray.set(validWfCount++, sense); } } WSDResult wsdResult = new WSDResult(jCas, oldOffset, oldOffset + nodeText.length()); wsdResult.setWsdItem(w); wsdResult.setSenses(senseArray); wsdResult.setSenseInventory(senseInventory); wsdResult.setDisambiguationMethod(DISAMBIGUATION_METHOD_NAME); wsdResult.addToIndexes(); } sentenceAnnotation.setEnd(offset); sentenceAnnotation.addToIndexes(); } return sentenceText; }
From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.WebCAGeXMLReader.java
License:Apache License
@SuppressWarnings("unchecked") private StringBuffer processText(JCas jCas, Element text) throws CollectionException { StringBuffer documentText = new StringBuffer(); int offset = 0; // Loop over all nodes to get the document text in order for (Iterator<Node> nodeIterator = text.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); String nodeName = node.getName(); // TODO: For now we ignore satellites. We should add support for // them.// w ww . j a v a 2 s. c o m if (nodeName == null || nodeName.equals(ELEMENT_SAT)) { offset += nodeText.length(); documentText.append(nodeText); continue; } // If the node is a head, create a LexicalItemConstituent and a // WSDItem else if (nodeName.equals(ELEMENT_HEAD)) { Element head = (Element) node; String headId = head.attributeValue(ATTR_ID); String lemma = head.attributeValue(ATTR_LEMMA); logger.trace("Reading instance " + headId); // Skip word forms without a POS String pos = head.attributeValue(ATTR_POS); if (pos == null) { logger.warn("No POS provided for " + headId + "; skipping"); continue; } try { pos = webCAGePosToPOS(pos).toString(); } catch (IllegalArgumentException e) { logger.warn("Unrecognized POS " + pos + " provided for " + headId + "; skipping"); continue; } // Create the necessary WSDItem and LexicalItemConstituent // annotations for this word form LexicalItemConstituent c = newLexicalItemConstituent(jCas, headId, ELEMENT_HEAD, offset, nodeText.length()); WSDItem w = newWsdItem(jCas, headId, offset, nodeText.length(), pos, lemma); w.setConstituents(new FSArray(jCas, 1)); w.setConstituents(0, c); // Get an array of lexical unit IDs (LUIDs). LUIDs are found // in the luids attribute and are separated with // # characters. String luids[] = head.attributeValue(ATTR_LUIDS).split("#"); FSArray senseArray = new FSArray(jCas, luids.length); for (int i = 0; i < luids.length; i++) { Sense sense = new Sense(jCas); sense.setId(luids[i].substring(1)); sense.setConfidence(1.0); sense.addToIndexes(); senseArray.set(i, sense); } WSDResult wsdResult = new WSDResult(jCas); wsdResult.setWsdItem(w); wsdResult.setSenses(senseArray); wsdResult.setSenseInventory(senseInventory); wsdResult.setDisambiguationMethod(DISAMBIGUATION_METHOD_NAME); wsdResult.addToIndexes(); } // If the node is any other element, something is wrong else if (node.getNodeTypeName().equals("Entity") == false) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } offset += nodeText.length(); documentText.append(nodeText); } return documentText; }
From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Semeval1AWReader.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w . j a va2 s . com*/ public void getNext(JCas jCas) throws IOException, CollectionException { int offset = 0, numSentences = 0; String s = ""; Element text = textIterator.next(); for (Iterator<Element> sentenceIterator = text.elementIterator(SENTENCE_ELEMENT_NAME); sentenceIterator .hasNext();) { Element sentence = sentenceIterator.next(); Sentence sentenceAnnotation = new Sentence(jCas); sentenceAnnotation.setBegin(offset); for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); // If the node is a head, create a LexicalItemConstituent and a // WSDItem if (node.getName() != null && node.getName().equals(HEAD_ELEMENT_NAME)) { Element head = (Element) node; String id = head.attributeValue(ID_ATTRIBUTE_NAME); LexicalItemConstituent c = newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length()); WSDItem w = newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), head.attributeValue(POS_ATTRIBUTE_NAME), head.attributeValue(LEMMA_ATTRIBUTE_NAME)); w.setConstituents(new FSArray(jCas, 1)); w.setConstituents(0, c); } else if (node.getName() != null) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } offset += nodeText.length(); s += nodeText; } sentenceAnnotation.setEnd(offset); sentenceAnnotation.addToIndexes(); numSentences++; } // The Semeval-1 DTD requires each text to have at least one sentence if (numSentences == 0) { throw new CollectionException("element_not_found", new Object[] { SENTENCE_ELEMENT_NAME, TEXT_ELEMENT_NAME }); } jCas.setDocumentText(s); try { setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME)); } catch (URISyntaxException e) { throw new IOException(e); } textCount++; }
From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Semeval2AWReader.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from w ww.j a v a 2s. c o m public void getNext(JCas jCas) throws IOException, CollectionException { int offset = 0; String s = ""; Element text = textIterator.next(); for (Iterator<Element> sentenceIterator = text.elementIterator(SENTENCE_ELEMENT_NAME); sentenceIterator .hasNext();) { Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>(); Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>(); Map<String, String> sats = new HashMap<String, String>(); Element sentence = sentenceIterator.next(); Sentence sentenceAnnotation = new Sentence(jCas); sentenceAnnotation.setBegin(offset); // Loop over all nodes to get the document text in order for (Iterator<Node> nodeIterator = sentence.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); String nodeName = node.getName(); if (nodeName == null) { offset += nodeText.length(); s += nodeText; continue; } // If the node is a satellite, create a LexicalItemConstituent if (nodeName.equals(SATELLITE_ELEMENT_NAME)) { String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length())); } // If the node is a head, create a LexicalItemConstituent and a WSDItem else if (nodeName.equals(HEAD_ELEMENT_NAME)) { Element head = (Element) node; String id = head.attributeValue(ID_ATTRIBUTE_NAME); String satellites = head.attributeValue(SATELLITES_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length())); wsdItems.put(id, newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), head.attributeValue(POS_ATTRIBUTE_NAME), head.attributeValue(LEMMA_ATTRIBUTE_NAME))); if (satellites != null) sats.put(id, satellites); } // If the node is any other element, something is wrong else if (node.getNodeTypeName().equals("Entity") == false) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } offset += nodeText.length(); s += nodeText; } // Add a sentence annotation sentenceAnnotation.setEnd(offset); sentenceAnnotation.addToIndexes(); populateLexicalItemConstituents(jCas, wsdItems, lics, sats); } jCas.setDocumentText(s); try { setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME)); } catch (URISyntaxException e) { throw new IOException(e); } textCount++; }
From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2AWReader.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from ww w .java 2 s .c o m public void getNext(JCas jCas) throws IOException, CollectionException { int offset = 0; String s = ""; Element text = textIterator.next(); Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>(); Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>(); Map<String, String> sats = new HashMap<String, String>(); // Loop over all nodes to get the document text in order for (Iterator<Node> nodeIterator = text.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); String nodeName = node.getName(); if (nodeName == null) { offset += nodeText.length(); s += nodeText; continue; } // If the node is a satellite, create a LexicalItemConstituent if (nodeName.equals(SATELLITE_ELEMENT_NAME)) { String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME); LexicalItemConstituent lic = newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length()); lics.put(id, lic); } // If the node is a head, create a LexicalItemConstituent and a WSDItem else if (nodeName.equals(HEAD_ELEMENT_NAME)) { Element head = (Element) node; String id = head.attributeValue(ID_ATTRIBUTE_NAME); String satellites = head.attributeValue(SATELLITES_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length())); WSDItem wsdItem = newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), null, nodeText); wsdItems.put(id, wsdItem); if (satellites != null) { sats.put(id, satellites); } } // If the node is any other element, something is wrong else if (node.getNodeTypeName().equals("Entity") == false) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } offset += nodeText.length(); s += nodeText; } populateLexicalItemConstituents(jCas, wsdItems, lics, sats); jCas.setDocumentText(s); try { setDocumentMetadata(jCas, text.attributeValue(ID_ATTRIBUTE_NAME)); } catch (URISyntaxException e) { throw new IOException(e); } textCount++; }
From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.Senseval2LSReader.java
License:Apache License
@SuppressWarnings("unchecked") @Override/* w ww . j a v a2 s .c o m*/ public void getNext(JCas jCas) throws IOException, CollectionException { // If there are no more <instance>s in this <lexelt>, get the next // <lexelt> if (instanceIterator.hasNext() == false) { lexelt = lexeltIterator.next(); lexeltPOS = getLexeltPOS(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME)); lexeltLemma = getLexeltLemma(lexelt.attributeValue(ITEM_ATTRIBUTE_NAME)); textCount++; instanceIterator = lexelt.elementIterator(INSTANCE_ELEMENT_NAME); } Element instance = instanceIterator.next(); Element context = instance.element(CONTEXT_ELEMENT_NAME); int offset = 0; String s = ""; Map<String, WSDItem> wsdItems = new HashMap<String, WSDItem>(); Map<String, LexicalItemConstituent> lics = new HashMap<String, LexicalItemConstituent>(); Map<String, String> sats = new HashMap<String, String>(); // Loop over all nodes to get the document text in order for (Iterator<Node> nodeIterator = context.nodeIterator(); nodeIterator.hasNext();) { Node node = nodeIterator.next(); String nodeText = node.getText().replace('\n', ' '); String nodeName = node.getName(); if (nodeName == null) { offset += nodeText.length(); s += nodeText; continue; } // If the node is a satellite, create a LexicalItemConstituent if (nodeName.equals(SATELLITE_ELEMENT_NAME)) { String id = ((Element) node).attributeValue(ID_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_SATELLITE, offset, nodeText.length())); } // If the node is a head, create a LexicalItemConstituent and a // WSDItem else if (nodeName.equals(HEAD_ELEMENT_NAME)) { String id = instance.attributeValue(ID_ATTRIBUTE_NAME); String satellites = ((Element) node).attributeValue(SATELLITES_ATTRIBUTE_NAME); lics.put(id, newLexicalItemConstituent(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length())); wsdItems.put(id, newWsdItem(jCas, id, LIC_TYPE_HEAD, offset, nodeText.length(), lexeltPOS, lexeltLemma)); if (satellites != null) { sats.put(id, satellites); } } // If the node is any other element, something is wrong else if (node.getNodeTypeName().equals("Entity") == false) { throw new CollectionException("unknown_element", new Object[] { node.getName() }); } offset += nodeText.length(); s += nodeText; } populateLexicalItemConstituents(jCas, wsdItems, lics, sats); jCas.setDocumentText(s); try { setDocumentMetadata(jCas, instance.attributeValue(ID_ATTRIBUTE_NAME)); } catch (URISyntaxException e) { throw new IOException(e); } }
From source file:dk.dma.nogoservice.ExtractBagData.java
License:Apache License
@SneakyThrows(DocumentException.class) private GridData readMetaData(String bagFile) { File xmlFile = new File(bagFile + ".xml"); if (!xmlFile.exists()) { throw new ExitCodeException(NO_XML_FILE); }/*ww w . ja v a 2s .c o m*/ SAXReader reader = new SAXReader(); Document document = reader.read(xmlFile); // we use fully qualified XPath, as we would like to fail if the format change Node coordinateBox = document.selectSingleNode( "/gmi:MI_Metadata/gmd:identificationInfo/bag:BAG_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox"); Node westLon = coordinateBox.selectSingleNode("gmd:westBoundLongitude/gco:Decimal"); Node eastLon = coordinateBox.selectSingleNode("gmd:eastBoundLongitude/gco:Decimal"); Node southLat = coordinateBox.selectSingleNode("gmd:southBoundLatitude/gco:Decimal"); Node northLat = coordinateBox.selectSingleNode("gmd:northBoundLatitude/gco:Decimal"); Node columnNode = document.selectSingleNode( "/gmi:MI_Metadata/gmd:spatialRepresentationInfo/gmd:MD_Georectified/gmd:axisDimensionProperties/gmd:MD_Dimension/gmd:dimensionName/gmd:MD_DimensionNameTypeCode[@codeListValue=\"column\"]/../../gmd:dimensionSize/gco:Integer"); Node rowNode = document.selectSingleNode( "/gmi:MI_Metadata/gmd:spatialRepresentationInfo/gmd:MD_Georectified/gmd:axisDimensionProperties/gmd:MD_Dimension/gmd:dimensionName/gmd:MD_DimensionNameTypeCode[@codeListValue=\"row\"]/../../gmd:dimensionSize/gco:Integer"); GridData gridData = new GridData(); gridData.setLo1(Float.parseFloat(westLon.getText())); gridData.setLo2(Float.parseFloat(eastLon.getText())); gridData.setLa1(Float.parseFloat(southLat.getText())); gridData.setLa2(Float.parseFloat(northLat.getText())); gridData.setNx(Integer.parseInt(columnNode.getText())); gridData.setNy(Integer.parseInt(rowNode.getText())); return gridData; }
From source file:dk.netarkivet.common.utils.SimpleXml.java
License:Open Source License
/** * Get list of all items matching the key. If no items exist matching the key, an empty list is returned. * * @param key the path down to elements to get * @return a list of items that match the supplied key *///from www . j a v a 2 s.c o m public List<String> getList(String key) { ArgumentNotValid.checkNotNullOrEmpty(key, "key"); List<Node> nodes = (List<Node>) getXPath(key).selectNodes(xmlDoc); if (nodes == null || nodes.size() == 0) { return Collections.emptyList(); } List<String> results = new ArrayList<String>(nodes.size()); for (Node node : nodes) { results.add(node.getText()); } return results; }