List of usage examples for opennlp.tools.parser Parse getCoveredText
public String getCoveredText()
From source file:knowledgeMiner.mining.SentenceParserHeuristic.java
/** * Compose the hierarchical set of noun-adjective combinations from the NP. * First gets all anchors, then processes each JJ and NN combination, adding * them directly or as subvalues of the anchors. * // www . j av a2s . c om * @param parse * The parse to compose the strings from. * @param anchors * The anchors to insert during composition. * @return A Hierarchical Weighted Set of strings representing the order of * strings that should be attempted to assert. */ public Collection<Tree<String>> composeAdjNounsTree(Parse parse, SortedMap<String, String> anchors) { String text = parse.getCoveredText(); Collection<Tree<String>> results = new ArrayList<>(); // Add all visible anchors // Keep track of which text is in what anchors Map<String, Tree<String>> anchorMap = new HashMap<>(); results.addAll(extractAnchors(text, anchors, anchorMap)); // Work backwards through the children, adding nouns, then adjectives to // the nouns results.addAll(pairNounAdjs(parse, anchors, anchorMap)); return results; }
From source file:knowledgeMiner.mining.SentenceParserHeuristic.java
private String disambiguateTree(Parse parse, String[] predicateStrs, MappableConcept focusConcept, SortedMap<String, String> anchors, WMISocket wmi, OntologySocket cyc, MiningHeuristic heuristic, Collection<PartialAssertion> results) throws Exception { if (predicateStrs == null) { predicateStrs = new String[1]; predicateStrs[0] = ""; }//w ww . j a v a 2s . c o m Parse[] children = parse.getChildren(); String type = parse.getType(); String text = parse.getCoveredText(); // No children? Return value if (children.length == 0) return text; // Recurse to 'left' int childIndex = 0; String left = disambiguateTree(children[childIndex++], Arrays.copyOf(predicateStrs, predicateStrs.length), focusConcept, anchors, wmi, cyc, heuristic, results); // If VP or PP, add to predicate boolean canCreate = true; if (left != null) { if (type.equals("VP")) predicateStrs[0] = left.trim(); else if (type.equals("PP")) { // If PP, split recursion into two predicates predicateStrs[0] = (predicateStrs[0] + " " + left).trim(); if (!predicateStrs[0].equals(left)) { predicateStrs = Arrays.copyOf(predicateStrs, predicateStrs.length + 1); predicateStrs[predicateStrs.length - 1] = left; } } } else canCreate = false; for (; childIndex < children.length; childIndex++) { Parse childParse = children[childIndex]; String result = disambiguateTree(childParse, Arrays.copyOf(predicateStrs, predicateStrs.length), focusConcept, anchors, wmi, cyc, heuristic, results); if (result == null) { canCreate = false; } } if (type.equals("VP") || type.equals("PP")) return null; // Can create and we have a target and predicate(s) if (canCreate && type.equals("NP") && !predicateStrs[0].isEmpty()) { for (String predStr : predicateStrs) { AssertionArgument predicate = null; if (isCopula(predStr)) { predicate = CycConstants.ISA_GENLS.getConcept(); } else { // TODO Figure out a safe way to parse predicates. Probably // need to look at the parse code again. // predStr = reAnchorString(predStr, anchors); // predicate = new TextMappedConcept(predStr, true, true); } if (predicate == null) continue; // Return the possible noun strings Collection<Tree<String>> nounStrs = composeAdjNounsTree(parse, anchors); logger_.trace("createAssertions: " + predicate.toString() + " " + nounStrs.toString().replaceAll("\\\\\n", " ")); // Recurse through the tree and build the partial assertions HeuristicProvenance provenance = new HeuristicProvenance(heuristic, predStr + "+" + text); Collection<PartialAssertion> currAssertions = recurseStringTree(predicate, focusConcept, nounStrs, provenance); // Add the assertions for (PartialAssertion pa : currAssertions) if (!results.contains(pa)) { results.add(pa); canCreate = false; } } } if (!canCreate) return null; return text; }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
/** * It converts a penn treebank constituent tree into tokens oneline form. * //from w w w. j av a 2 s .c o m * @param parse * the parse tree * @param sb * the stringbuilder to add the trees */ private void getTokens(Parse parse, StringBuilder sb) { if (parse.isPosTag()) { if (!parse.getType().equals("-NONE-")) { sb.append(parse.getCoveredText()).append(" "); } } else { Parse children[] = parse.getChildren(); for (int i = 0; i < children.length; i++) { getTokens(children[i], sb); } } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
/** * It converts a penn treebank constituent tree into Word_POS form * /* www .ja v a2s. c o m*/ * @param parse * @param sb */ private void getWordType(Parse parse, StringBuilder sb) { if (parse.isPosTag()) { if (!parse.getType().equals("-NONE-")) { sb.append(parse.getCoveredText()).append("_").append(parse.getType()).append(" "); } } else { Parse children[] = parse.getChildren(); for (int i = 0; i < children.length; i++) { getWordType(children[i], sb); } } }