List of usage examples for opennlp.tools.parser Parse getType
public String getType()
From source file:knowledgeMiner.mining.SentenceParserHeuristic.java
public static synchronized Parse parseLine(String cleanSentence) { Parse parse = null; while (parse == null) { parse = OpenNLP.parseLine(cleanSentence); // Could not parse if (parse.getType().equals("INC")) { try { IOManager.getInstance().writeFirstSentence(-1, cleanSentence); } catch (IOException e) { e.printStackTrace();/*from w w w. jav a 2 s . c om*/ } // Simplify the sentence for (Pattern p : SENTENCE_SIMPLIFIER) { String simplifiedSentence = p.matcher(cleanSentence).replaceFirst(""); // Replace the clean sentence if (!simplifiedSentence.equals(cleanSentence)) { cleanSentence = StringUtils.capitalize(simplifiedSentence); parse = null; break; } } } } return parse; }
From source file:knowledgeMiner.mining.ExtractionPattern.java
/** * Checks if a current parse meets the current index argument POS type. If * so, spawns a new Extraction Pattern with the parse included. * // w ww . jav a2 s .c om * @param currentParse * The current parse. * @return A new EP if the current parse is a valid argument. */ public ExtractionPattern checkParse(Parse currentParse) { if (currentParse.getType().startsWith(posTypes_[index_])) { ExtractionPattern newEP = new ExtractionPattern(this); newEP.parseArgs_[index_] = currentParse; newEP.index_++; return newEP; } return null; }
From source file:knowledgeMiner.mining.SentenceParserHeuristic.java
private String disambiguateTree(Parse parse, String[] predicateStrs, MappableConcept focusConcept, SortedMap<String, String> anchors, WMISocket wmi, OntologySocket cyc, MiningHeuristic heuristic, Collection<PartialAssertion> results) throws Exception { if (predicateStrs == null) { predicateStrs = new String[1]; predicateStrs[0] = ""; }//from w w w. j a v a2 s . c o m Parse[] children = parse.getChildren(); String type = parse.getType(); String text = parse.getCoveredText(); // No children? Return value if (children.length == 0) return text; // Recurse to 'left' int childIndex = 0; String left = disambiguateTree(children[childIndex++], Arrays.copyOf(predicateStrs, predicateStrs.length), focusConcept, anchors, wmi, cyc, heuristic, results); // If VP or PP, add to predicate boolean canCreate = true; if (left != null) { if (type.equals("VP")) predicateStrs[0] = left.trim(); else if (type.equals("PP")) { // If PP, split recursion into two predicates predicateStrs[0] = (predicateStrs[0] + " " + left).trim(); if (!predicateStrs[0].equals(left)) { predicateStrs = Arrays.copyOf(predicateStrs, predicateStrs.length + 1); predicateStrs[predicateStrs.length - 1] = left; } } } else canCreate = false; for (; childIndex < children.length; childIndex++) { Parse childParse = children[childIndex]; String result = disambiguateTree(childParse, Arrays.copyOf(predicateStrs, predicateStrs.length), focusConcept, anchors, wmi, cyc, heuristic, results); if (result == null) { canCreate = false; } } if (type.equals("VP") || type.equals("PP")) return null; // Can create and we have a target and predicate(s) if (canCreate && type.equals("NP") && !predicateStrs[0].isEmpty()) { for (String predStr : predicateStrs) { AssertionArgument predicate = null; if (isCopula(predStr)) { predicate = CycConstants.ISA_GENLS.getConcept(); } else { // TODO Figure out a safe way to parse predicates. Probably // need to look at the parse code again. // predStr = reAnchorString(predStr, anchors); // predicate = new TextMappedConcept(predStr, true, true); } if (predicate == null) continue; // Return the possible noun strings Collection<Tree<String>> nounStrs = composeAdjNounsTree(parse, anchors); logger_.trace("createAssertions: " + predicate.toString() + " " + nounStrs.toString().replaceAll("\\\\\n", " ")); // Recurse through the tree and build the partial assertions HeuristicProvenance provenance = new HeuristicProvenance(heuristic, predStr + "+" + text); Collection<PartialAssertion> currAssertions = recurseStringTree(predicate, focusConcept, nounStrs, provenance); // Add the assertions for (PartialAssertion pa : currAssertions) if (!results.contains(pa)) { results.add(pa); canCreate = false; } } } if (!canCreate) return null; return text; }
From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java
/** * Creates linked constituent annotations + POS annotations * * @param aNode/*ww w . j av a 2 s . co m*/ * the source tree * @param aParentFS * @param aCreatePos * sets whether to create or not to create POS tags * @param aCreateLemmas * sets whether to create or not to create Lemmas * @return the child-structure (needed for recursive call only) */ private Annotation createConstituentAnnotationFromTree(JCas aJCas, Parse aNode, Annotation aParentFS, List<Token> aTokens) { // If the node is a word-level constituent node (== POS): // create parent link on token and (if not turned off) create POS tag if (aNode.isPosTag()) { Token token = getToken(aTokens, aNode.getSpan().getStart(), aNode.getSpan().getEnd()); // link token to its parent constituent if (aParentFS != null) { token.setParent(aParentFS); } // only add POS to index if we want POS-tagging if (createPosTags) { Type posTag = mappingProvider.getTagType(aNode.getType()); POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd()); posAnno.setPosValue(internTags ? aNode.getType().intern() : aNode.getType()); posAnno.addToIndexes(); token.setPos((POS) posAnno); } return token; } // Check if node is a constituent node on sentence or phrase-level else { String typeName = aNode.getType(); if (AbstractBottomUpParser.TOP_NODE.equals(typeName)) { typeName = "ROOT"; // in DKPro the root is ROOT, not TOP } // create the necessary objects and methods String constituentTypeName = CONPACKAGE + typeName; Type type = aJCas.getTypeSystem().getType(constituentTypeName); //if type is unknown, map to X-type if (type == null) { type = aJCas.getTypeSystem().getType(CONPACKAGE + "X"); } Constituent constAnno = (Constituent) aJCas.getCas().createAnnotation(type, aNode.getSpan().getStart(), aNode.getSpan().getEnd()); constAnno.setConstituentType(typeName); // link to parent if (aParentFS != null) { constAnno.setParent(aParentFS); } // Do we have any children? List<Annotation> childAnnotations = new ArrayList<Annotation>(); for (Parse child : aNode.getChildren()) { Annotation childAnnotation = createConstituentAnnotationFromTree(aJCas, child, constAnno, aTokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children FSArray childArray = (FSArray) FSCollectionFactory.createFSArray(aJCas, childAnnotations); constAnno.setChildren(childArray); // write annotation for current node to index aJCas.addFsToIndexes(constAnno); return constAnno; } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
/** * It converts a penn treebank constituent tree into tokens oneline form. * //from ww w . j a va2 s.c o m * @param parse * the parse tree * @param sb * the stringbuilder to add the trees */ private void getTokens(Parse parse, StringBuilder sb) { if (parse.isPosTag()) { if (!parse.getType().equals("-NONE-")) { sb.append(parse.getCoveredText()).append(" "); } } else { Parse children[] = parse.getChildren(); for (int i = 0; i < children.length; i++) { getTokens(children[i], sb); } } }
From source file:es.ehu.si.ixa.pipe.convert.Convert.java
/** * It converts a penn treebank constituent tree into Word_POS form * //from w w w .ja v a 2 s. co m * @param parse * @param sb */ private void getWordType(Parse parse, StringBuilder sb) { if (parse.isPosTag()) { if (!parse.getType().equals("-NONE-")) { sb.append(parse.getCoveredText()).append("_").append(parse.getType()).append(" "); } } else { Parse children[] = parse.getChildren(); for (int i = 0; i < children.length; i++) { getWordType(children[i], sb); } } }
From source file:org.opentestsystem.airose.docprocessors.ConventionsQualityDocProcessor.java
private ConventionsDocumentQualityHolder evaluateSyntax(Parse parse) { double overallPunctScore = 0.0; double minSyntaxScore = 1.0; double overallSyntaxScore = 0.0; double numOfNoms = 0; double numLongNominals = 0; double syntaxCount = 0; int countPunct = 0; Queue<Parse> parseTree = new LinkedList<Parse>(); parseTree.add(parse);//from w ww . j av a 2 s . co m double rootProb = parse.getProb(); while (parseTree.size() > 0) { Parse p = parseTree.remove(); if ((p.getChildCount() == 1) && (p.getProb() < 1)) { double prob = p.getProb(); String pType = p.getType(); if (StringUtils.equals(pType, ",") || StringUtils.equals(pType, ".") || StringUtils.equals(pType, "!") || StringUtils.equals(pType, "?") || StringUtils.equals(pType, ";") || StringUtils.equals(pType, ":")) { overallPunctScore += prob; countPunct++; } else { if (!StringUtils.equals(pType, "TOP") && !StringUtils.equals(pType, "S")) { // string s = sentText_; if ((pType.startsWith("NN")))// || p.Type.StartsWith("JJ")) { numOfNoms++; } else { if ((numOfNoms > 2) && (rootProb > -25.5)) numLongNominals++; // _numOfNoms = 0; } if (prob < minSyntaxScore) minSyntaxScore = prob; overallSyntaxScore += prob; syntaxCount++; } } } Parse[] children = p.getChildren(); for (Parse pc : children) parseTree.add(pc); } overallPunctScore = (countPunct == 0) ? 0.0 : overallPunctScore / countPunct; ConventionsDocumentQualityHolder values = new ConventionsDocumentQualityHolder(); values.setOverallPunctScore(overallPunctScore); values.setMinSyntaxScore(minSyntaxScore); values.setOverallSyntaxScore(overallSyntaxScore); values.setNumOfNoms(numOfNoms); values.setNumLongNominals(numLongNominals); values.setSyntaxCount(syntaxCount); return values; }
From source file:org.opentestsystem.airose.docquality.processors.PassiveSentencesQualityProcessor.java
private boolean checkPassive(AbstractDocument doc, Parse p) { Queue<Parse> queue = new LinkedList<Parse>(); queue.add(p);//from w w w . j a v a 2 s . co m while (queue.size() > 0) { p = queue.remove(); String parseType = p.getType(); if ((parseType.length() >= 2) && StringUtils.equalsIgnoreCase(parseType.substring(0, 2), "VB")) { String word = p.getText().substring(p.getSpan().getStart(), p.getSpan().getStart() + p.getSpan().length()); List<String> roots = wordnet.getBaseWords(word, EnumPOS.VERB); if ((roots.size() > 0) && (StringUtils.endsWithIgnoreCase(roots.get(0), "be"))) { return true; } else return false; } else { for (Parse child : p.getChildren()) queue.add(child); } } return false; }