List of usage examples for opennlp.tools.parser Parse getChildren
public Parse[] getChildren()
From source file:knowledgeMiner.mining.SentenceParserHeuristic.java
/** * Pairs the nouns and adjectives together to produce a number of result * text fragments to be resolved to concepts. * * @param parse/*from w w w. j a v a 2 s. c o m*/ * The parse to process and pair. * @param anchors * The anchor map. * @param existingAnchorTrees * The anchor trees already added to the results (for reuse and * subtree-ing) * @return A collection of possible mappable entities composed of at least * one noun and possible adjectives (with sub-adjectives). */ private Collection<Tree<String>> pairNounAdjs(Parse parse, SortedMap<String, String> anchors, Map<String, Tree<String>> existingAnchorTrees) { Collection<Tree<String>> results = new ArrayList<>(); boolean createNewNounSet = false; ArrayList<String> nounPhrases = new ArrayList<>(); Parse[] children = parse.getChildren(); for (int i = children.length - 1; i >= 0; i--) { String childType = children[i].getType(); String childText = children[i].getCoveredText(); if (childType.startsWith("NN") || childType.equals("NP")) { // Note the noun, adding it to the front of the existing NP. if (createNewNounSet) nounPhrases.clear(); String existingNounPhrase = ""; if (!nounPhrases.isEmpty()) existingNounPhrase = nounPhrases.get(nounPhrases.size() - 1); String np = (childText + " " + existingNounPhrase).trim(); nounPhrases.add(np); // Add to the tree (if not a pure anchor) if (!anchors.containsKey(np)) results.add(new Tree<String>(reAnchorString(np, anchors))); } else if (childType.startsWith("JJ") || childType.equals("ADJP")) { // Only process if we have an NP if (!nounPhrases.isEmpty()) { // For every nounPhrase StringBuilder adjective = new StringBuilder(); for (int j = i; children[j].getType().startsWith("JJ") || children[j].getType().equals("ADJP"); j++) { // Build adjective combinations if (adjective.length() != 0) adjective.append(" "); adjective.append(children[j].getCoveredText()); for (String np : nounPhrases) { // Create the tree (with sub adjective tree) String adjNP = adjective + " " + np; Tree<String> adjP = null; // Check for an existing anchor tree if (existingAnchorTrees.containsKey(adjNP)) adjP = existingAnchorTrees.get(adjNP); else adjP = new Tree<String>(reAnchorString(adjNP, anchors)); if (!anchors.containsKey(adjective.toString())) adjP.addSubValue(reAnchorString(adjective.toString(), anchors)); // Add to the tree results.add(adjP); } } } createNewNounSet = true; } else { createNewNounSet = true; } } return results; }
From source file:knowledgeMiner.mining.SentenceParserHeuristic.java
private String disambiguateTree(Parse parse, String[] predicateStrs, MappableConcept focusConcept, SortedMap<String, String> anchors, WMISocket wmi, OntologySocket cyc, MiningHeuristic heuristic, Collection<PartialAssertion> results) throws Exception { if (predicateStrs == null) { predicateStrs = new String[1]; predicateStrs[0] = ""; }/*from w ww . j a v a2 s. co m*/ Parse[] children = parse.getChildren(); String type = parse.getType(); String text = parse.getCoveredText(); // No children? Return value if (children.length == 0) return text; // Recurse to 'left' int childIndex = 0; String left = disambiguateTree(children[childIndex++], Arrays.copyOf(predicateStrs, predicateStrs.length), focusConcept, anchors, wmi, cyc, heuristic, results); // If VP or PP, add to predicate boolean canCreate = true; if (left != null) { if (type.equals("VP")) predicateStrs[0] = left.trim(); else if (type.equals("PP")) { // If PP, split recursion into two predicates predicateStrs[0] = (predicateStrs[0] + " " + left).trim(); if (!predicateStrs[0].equals(left)) { predicateStrs = Arrays.copyOf(predicateStrs, predicateStrs.length + 1); predicateStrs[predicateStrs.length - 1] = left; } } } else canCreate = false; for (; childIndex < children.length; childIndex++) { Parse childParse = children[childIndex]; String result = disambiguateTree(childParse, Arrays.copyOf(predicateStrs, predicateStrs.length), focusConcept, anchors, wmi, cyc, heuristic, results); if (result == null) { canCreate = false; } } if (type.equals("VP") || type.equals("PP")) return null; // Can create and we have a target and predicate(s) if (canCreate && type.equals("NP") && !predicateStrs[0].isEmpty()) { for (String predStr : predicateStrs) { AssertionArgument predicate = null; if (isCopula(predStr)) { predicate = CycConstants.ISA_GENLS.getConcept(); } else { // TODO Figure out a safe way to parse predicates. Probably // need to look at the parse code again. // predStr = reAnchorString(predStr, anchors); // predicate = new TextMappedConcept(predStr, true, true); } if (predicate == null) continue; // Return the possible noun strings Collection<Tree<String>> nounStrs = composeAdjNounsTree(parse, anchors); logger_.trace("createAssertions: " + predicate.toString() + " " + nounStrs.toString().replaceAll("\\\\\n", " ")); // Recurse through the tree and build the partial assertions HeuristicProvenance provenance = new HeuristicProvenance(heuristic, predStr + "+" + text); Collection<PartialAssertion> currAssertions = recurseStringTree(predicate, focusConcept, nounStrs, provenance); // Add the assertions for (PartialAssertion pa : currAssertions) if (!results.contains(pa)) { results.add(pa); canCreate = false; } } } if (!canCreate) return null; return text; }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
/** * It converts a penn treebank constituent tree into tokens oneline form. * /*from w w w .j a v a 2 s. c om*/ * @param parse * the parse tree * @param sb * the stringbuilder to add the trees */ private void getTokens(Parse parse, StringBuilder sb) { if (parse.isPosTag()) { if (!parse.getType().equals("-NONE-")) { sb.append(parse.getCoveredText()).append(" "); } } else { Parse children[] = parse.getChildren(); for (int i = 0; i < children.length; i++) { getTokens(children[i], sb); } } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
/** * It converts a penn treebank constituent tree into Word_POS form * /*from w w w. j a v a2s. co m*/ * @param parse * @param sb */ private void getWordType(Parse parse, StringBuilder sb) { if (parse.isPosTag()) { if (!parse.getType().equals("-NONE-")) { sb.append(parse.getCoveredText()).append("_").append(parse.getType()).append(" "); } } else { Parse children[] = parse.getChildren(); for (int i = 0; i < children.length; i++) { getWordType(children[i], sb); } } }
From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java
/** * Creates linked constituent annotations + POS annotations * * @param aNode/*from ww w . ja v a2 s .co m*/ * the source tree * @param aParentFS * @param aCreatePos * sets whether to create or not to create POS tags * @param aCreateLemmas * sets whether to create or not to create Lemmas * @return the child-structure (needed for recursive call only) */ private Annotation createConstituentAnnotationFromTree(JCas aJCas, Parse aNode, Annotation aParentFS, List<Token> aTokens) { // If the node is a word-level constituent node (== POS): // create parent link on token and (if not turned off) create POS tag if (aNode.isPosTag()) { Token token = getToken(aTokens, aNode.getSpan().getStart(), aNode.getSpan().getEnd()); // link token to its parent constituent if (aParentFS != null) { token.setParent(aParentFS); } // only add POS to index if we want POS-tagging if (createPosTags) { Type posTag = mappingProvider.getTagType(aNode.getType()); POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd()); posAnno.setPosValue(internTags ? aNode.getType().intern() : aNode.getType()); posAnno.addToIndexes(); token.setPos((POS) posAnno); } return token; } // Check if node is a constituent node on sentence or phrase-level else { String typeName = aNode.getType(); if (AbstractBottomUpParser.TOP_NODE.equals(typeName)) { typeName = "ROOT"; // in DKPro the root is ROOT, not TOP } // create the necessary objects and methods String constituentTypeName = CONPACKAGE + typeName; Type type = aJCas.getTypeSystem().getType(constituentTypeName); //if type is unknown, map to X-type if (type == null) { type = aJCas.getTypeSystem().getType(CONPACKAGE + "X"); } Constituent constAnno = (Constituent) aJCas.getCas().createAnnotation(type, aNode.getSpan().getStart(), aNode.getSpan().getEnd()); constAnno.setConstituentType(typeName); // link to parent if (aParentFS != null) { constAnno.setParent(aParentFS); } // Do we have any children? List<Annotation> childAnnotations = new ArrayList<Annotation>(); for (Parse child : aNode.getChildren()) { Annotation childAnnotation = createConstituentAnnotationFromTree(aJCas, child, constAnno, aTokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children FSArray childArray = (FSArray) FSCollectionFactory.createFSArray(aJCas, childAnnotations); constAnno.setChildren(childArray); // write annotation for current node to index aJCas.addFsToIndexes(constAnno); return constAnno; } }
From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java
public static boolean allChildNodesArePOSTags(Parse p) { Parse[] subParses = p.getChildren(); for (int pi = 0; pi < subParses.length; pi++) if (!((Parse) subParses[pi]).isPosTag()) return false; return true;//from w w w. ja v a 2 s . co m }
From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java
public ArrayList<String> getNounPhrases(Parse p) { ArrayList<String> nounphrases = new ArrayList<String>(); Parse[] subparses = p.getChildren(); for (int pi = 0; pi < subparses.length; pi++) { if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi])) { Span _span = subparses[pi].getSpan(); nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd())); } else if (!((Parse) subparses[pi]).isPosTag()) nounphrases.addAll(getNounPhrases(subparses[pi])); }/*from w w w.ja va 2s . co m*/ return nounphrases; }
From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java
public ArrayList<String> getVerbPhrases(Parse p) { ArrayList<String> verbPhrases = new ArrayList<String>(); Parse[] subparses = p.getChildren(); for (int pi = 0; pi < subparses.length; pi++) { if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi])) { Span _span = subparses[pi].getSpan(); verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd())); } else if (!((Parse) subparses[pi]).isPosTag()) verbPhrases.addAll(getNounPhrases(subparses[pi])); }/*w w w. jav a 2s . com*/ return verbPhrases; }
From source file:org.opentestsystem.airose.docprocessors.ConventionsQualityDocProcessor.java
private ConventionsDocumentQualityHolder evaluateSyntax(Parse parse) { double overallPunctScore = 0.0; double minSyntaxScore = 1.0; double overallSyntaxScore = 0.0; double numOfNoms = 0; double numLongNominals = 0; double syntaxCount = 0; int countPunct = 0; Queue<Parse> parseTree = new LinkedList<Parse>(); parseTree.add(parse);//from w w w .ja va2 s . c om double rootProb = parse.getProb(); while (parseTree.size() > 0) { Parse p = parseTree.remove(); if ((p.getChildCount() == 1) && (p.getProb() < 1)) { double prob = p.getProb(); String pType = p.getType(); if (StringUtils.equals(pType, ",") || StringUtils.equals(pType, ".") || StringUtils.equals(pType, "!") || StringUtils.equals(pType, "?") || StringUtils.equals(pType, ";") || StringUtils.equals(pType, ":")) { overallPunctScore += prob; countPunct++; } else { if (!StringUtils.equals(pType, "TOP") && !StringUtils.equals(pType, "S")) { // string s = sentText_; if ((pType.startsWith("NN")))// || p.Type.StartsWith("JJ")) { numOfNoms++; } else { if ((numOfNoms > 2) && (rootProb > -25.5)) numLongNominals++; // _numOfNoms = 0; } if (prob < minSyntaxScore) minSyntaxScore = prob; overallSyntaxScore += prob; syntaxCount++; } } } Parse[] children = p.getChildren(); for (Parse pc : children) parseTree.add(pc); } overallPunctScore = (countPunct == 0) ? 0.0 : overallPunctScore / countPunct; ConventionsDocumentQualityHolder values = new ConventionsDocumentQualityHolder(); values.setOverallPunctScore(overallPunctScore); values.setMinSyntaxScore(minSyntaxScore); values.setOverallSyntaxScore(overallSyntaxScore); values.setNumOfNoms(numOfNoms); values.setNumLongNominals(numLongNominals); values.setSyntaxCount(syntaxCount); return values; }
From source file:org.opentestsystem.airose.docquality.processors.PassiveSentencesQualityProcessor.java
private boolean checkPassive(AbstractDocument doc, Parse p) { Queue<Parse> queue = new LinkedList<Parse>(); queue.add(p);//from ww w .j a v a2 s .c o m while (queue.size() > 0) { p = queue.remove(); String parseType = p.getType(); if ((parseType.length() >= 2) && StringUtils.equalsIgnoreCase(parseType.substring(0, 2), "VB")) { String word = p.getText().substring(p.getSpan().getStart(), p.getSpan().getStart() + p.getSpan().length()); List<String> roots = wordnet.getBaseWords(word, EnumPOS.VERB); if ((roots.size() > 0) && (StringUtils.endsWithIgnoreCase(roots.get(0), "be"))) { return true; } else return false; } else { for (Parse child : p.getChildren()) queue.add(child); } } return false; }