Example usage for opennlp.tools.parser Parse getChildren

List of usage examples for opennlp.tools.parser Parse getChildren

Introduction

In this page you can find the example usage for opennlp.tools.parser Parse getChildren.

Prototype

public Parse[] getChildren() 

Source Link

Document

Returns the child constituents of this constituent .

Usage

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

/**
 * Pairs the nouns and adjectives together to produce a number of result
 * text fragments to be resolved to concepts.
 *
 * @param parse/*from w  w w.  j  a  v a  2  s.  c  o m*/
 *            The parse to process and pair.
 * @param anchors
 *            The anchor map.
 * @param existingAnchorTrees
 *            The anchor trees already added to the results (for reuse and
 *            subtree-ing)
 * @return A collection of possible mappable entities composed of at least
 *         one noun and possible adjectives (with sub-adjectives).
 */
private Collection<Tree<String>> pairNounAdjs(Parse parse, SortedMap<String, String> anchors,
        Map<String, Tree<String>> existingAnchorTrees) {
    Collection<Tree<String>> results = new ArrayList<>();
    boolean createNewNounSet = false;
    ArrayList<String> nounPhrases = new ArrayList<>();
    Parse[] children = parse.getChildren();
    for (int i = children.length - 1; i >= 0; i--) {
        String childType = children[i].getType();
        String childText = children[i].getCoveredText();
        if (childType.startsWith("NN") || childType.equals("NP")) {
            // Note the noun, adding it to the front of the existing NP.
            if (createNewNounSet)
                nounPhrases.clear();
            String existingNounPhrase = "";
            if (!nounPhrases.isEmpty())
                existingNounPhrase = nounPhrases.get(nounPhrases.size() - 1);
            String np = (childText + " " + existingNounPhrase).trim();
            nounPhrases.add(np);

            // Add to the tree (if not a pure anchor)
            if (!anchors.containsKey(np))
                results.add(new Tree<String>(reAnchorString(np, anchors)));
        } else if (childType.startsWith("JJ") || childType.equals("ADJP")) {
            // Only process if we have an NP
            if (!nounPhrases.isEmpty()) {
                // For every nounPhrase
                StringBuilder adjective = new StringBuilder();
                for (int j = i; children[j].getType().startsWith("JJ")
                        || children[j].getType().equals("ADJP"); j++) {
                    // Build adjective combinations
                    if (adjective.length() != 0)
                        adjective.append(" ");
                    adjective.append(children[j].getCoveredText());
                    for (String np : nounPhrases) {
                        // Create the tree (with sub adjective tree)
                        String adjNP = adjective + " " + np;
                        Tree<String> adjP = null;
                        // Check for an existing anchor tree
                        if (existingAnchorTrees.containsKey(adjNP))
                            adjP = existingAnchorTrees.get(adjNP);
                        else
                            adjP = new Tree<String>(reAnchorString(adjNP, anchors));
                        if (!anchors.containsKey(adjective.toString()))
                            adjP.addSubValue(reAnchorString(adjective.toString(), anchors));

                        // Add to the tree
                        results.add(adjP);
                    }
                }
            }
            createNewNounSet = true;
        } else {
            createNewNounSet = true;
        }
    }
    return results;
}

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

private String disambiguateTree(Parse parse, String[] predicateStrs, MappableConcept focusConcept,
        SortedMap<String, String> anchors, WMISocket wmi, OntologySocket cyc, MiningHeuristic heuristic,
        Collection<PartialAssertion> results) throws Exception {
    if (predicateStrs == null) {
        predicateStrs = new String[1];
        predicateStrs[0] = "";
    }/*from w ww  .  j  a v  a2  s. co m*/

    Parse[] children = parse.getChildren();
    String type = parse.getType();
    String text = parse.getCoveredText();

    // No children? Return value
    if (children.length == 0)
        return text;

    // Recurse to 'left'
    int childIndex = 0;
    String left = disambiguateTree(children[childIndex++], Arrays.copyOf(predicateStrs, predicateStrs.length),
            focusConcept, anchors, wmi, cyc, heuristic, results);

    // If VP or PP, add to predicate
    boolean canCreate = true;
    if (left != null) {
        if (type.equals("VP"))
            predicateStrs[0] = left.trim();
        else if (type.equals("PP")) {
            // If PP, split recursion into two predicates
            predicateStrs[0] = (predicateStrs[0] + " " + left).trim();
            if (!predicateStrs[0].equals(left)) {
                predicateStrs = Arrays.copyOf(predicateStrs, predicateStrs.length + 1);
                predicateStrs[predicateStrs.length - 1] = left;
            }
        }
    } else
        canCreate = false;

    for (; childIndex < children.length; childIndex++) {
        Parse childParse = children[childIndex];
        String result = disambiguateTree(childParse, Arrays.copyOf(predicateStrs, predicateStrs.length),
                focusConcept, anchors, wmi, cyc, heuristic, results);
        if (result == null) {
            canCreate = false;
        }
    }

    if (type.equals("VP") || type.equals("PP"))
        return null;

    // Can create and we have a target and predicate(s)
    if (canCreate && type.equals("NP") && !predicateStrs[0].isEmpty()) {
        for (String predStr : predicateStrs) {
            AssertionArgument predicate = null;
            if (isCopula(predStr)) {
                predicate = CycConstants.ISA_GENLS.getConcept();
            } else {
                // TODO Figure out a safe way to parse predicates. Probably
                // need to look at the parse code again.
                // predStr = reAnchorString(predStr, anchors);
                // predicate = new TextMappedConcept(predStr, true, true);
            }

            if (predicate == null)
                continue;

            // Return the possible noun strings
            Collection<Tree<String>> nounStrs = composeAdjNounsTree(parse, anchors);
            logger_.trace("createAssertions: " + predicate.toString() + " "
                    + nounStrs.toString().replaceAll("\\\\\n", " "));

            // Recurse through the tree and build the partial assertions
            HeuristicProvenance provenance = new HeuristicProvenance(heuristic, predStr + "+" + text);
            Collection<PartialAssertion> currAssertions = recurseStringTree(predicate, focusConcept, nounStrs,
                    provenance);

            // Add the assertions
            for (PartialAssertion pa : currAssertions)
                if (!results.contains(pa)) {
                    results.add(pa);
                    canCreate = false;
                }
        }
    }

    if (!canCreate)
        return null;

    return text;
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into tokens oneline form.
 * /*from  w  w  w  .j  a v  a 2 s.  c om*/
 * @param parse
 *          the parse tree
 * @param sb
 *          the stringbuilder to add the trees
 */
private void getTokens(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getTokens(children[i], sb);
        }
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into Word_POS form
 * /*from  w w  w.  j  a v a2s.  co  m*/
 * @param parse
 * @param sb
 */
private void getWordType(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append("_").append(parse.getType()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getWordType(children[i], sb);
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java

/**
 * Creates linked constituent annotations + POS annotations
 *
 * @param aNode/*from   ww  w  .  ja v a2 s  .co  m*/
 *            the source tree
 * @param aParentFS
 * @param aCreatePos
 *            sets whether to create or not to create POS tags
 * @param aCreateLemmas
 *            sets whether to create or not to create Lemmas
 * @return the child-structure (needed for recursive call only)
 */
private Annotation createConstituentAnnotationFromTree(JCas aJCas, Parse aNode, Annotation aParentFS,
        List<Token> aTokens) {
    // If the node is a word-level constituent node (== POS):
    // create parent link on token and (if not turned off) create POS tag
    if (aNode.isPosTag()) {
        Token token = getToken(aTokens, aNode.getSpan().getStart(), aNode.getSpan().getEnd());

        // link token to its parent constituent
        if (aParentFS != null) {
            token.setParent(aParentFS);
        }

        // only add POS to index if we want POS-tagging
        if (createPosTags) {
            Type posTag = mappingProvider.getTagType(aNode.getType());
            POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
            posAnno.setPosValue(internTags ? aNode.getType().intern() : aNode.getType());
            posAnno.addToIndexes();
            token.setPos((POS) posAnno);
        }

        return token;
    }
    // Check if node is a constituent node on sentence or phrase-level
    else {
        String typeName = aNode.getType();
        if (AbstractBottomUpParser.TOP_NODE.equals(typeName)) {
            typeName = "ROOT"; // in DKPro the root is ROOT, not TOP
        }

        // create the necessary objects and methods
        String constituentTypeName = CONPACKAGE + typeName;

        Type type = aJCas.getTypeSystem().getType(constituentTypeName);

        //if type is unknown, map to X-type
        if (type == null) {
            type = aJCas.getTypeSystem().getType(CONPACKAGE + "X");
        }

        Constituent constAnno = (Constituent) aJCas.getCas().createAnnotation(type, aNode.getSpan().getStart(),
                aNode.getSpan().getEnd());
        constAnno.setConstituentType(typeName);

        // link to parent
        if (aParentFS != null) {
            constAnno.setParent(aParentFS);
        }

        // Do we have any children?
        List<Annotation> childAnnotations = new ArrayList<Annotation>();
        for (Parse child : aNode.getChildren()) {
            Annotation childAnnotation = createConstituentAnnotationFromTree(aJCas, child, constAnno, aTokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        FSArray childArray = (FSArray) FSCollectionFactory.createFSArray(aJCas, childAnnotations);
        constAnno.setChildren(childArray);

        // write annotation for current node to index
        aJCas.addFsToIndexes(constAnno);

        return constAnno;
    }
}

From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java

public static boolean allChildNodesArePOSTags(Parse p) {
    Parse[] subParses = p.getChildren();
    for (int pi = 0; pi < subParses.length; pi++)
        if (!((Parse) subParses[pi]).isPosTag())
            return false;
    return true;//from  w w w.  ja v a  2 s  .  co m
}

From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java

public ArrayList<String> getNounPhrases(Parse p) {
    ArrayList<String> nounphrases = new ArrayList<String>();

    Parse[] subparses = p.getChildren();
    for (int pi = 0; pi < subparses.length; pi++) {

        if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi])) {
            Span _span = subparses[pi].getSpan();
            nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
        } else if (!((Parse) subparses[pi]).isPosTag())
            nounphrases.addAll(getNounPhrases(subparses[pi]));
    }/*from w  w  w.ja  va 2s .  co  m*/

    return nounphrases;
}

From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java

public ArrayList<String> getVerbPhrases(Parse p) {
    ArrayList<String> verbPhrases = new ArrayList<String>();

    Parse[] subparses = p.getChildren();
    for (int pi = 0; pi < subparses.length; pi++) {

        if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi])) {
            Span _span = subparses[pi].getSpan();
            verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
        } else if (!((Parse) subparses[pi]).isPosTag())
            verbPhrases.addAll(getNounPhrases(subparses[pi]));
    }/*w w w. jav  a 2s  . com*/

    return verbPhrases;
}

From source file:org.opentestsystem.airose.docprocessors.ConventionsQualityDocProcessor.java

private ConventionsDocumentQualityHolder evaluateSyntax(Parse parse) {

    double overallPunctScore = 0.0;
    double minSyntaxScore = 1.0;
    double overallSyntaxScore = 0.0;

    double numOfNoms = 0;
    double numLongNominals = 0;
    double syntaxCount = 0;

    int countPunct = 0;

    Queue<Parse> parseTree = new LinkedList<Parse>();
    parseTree.add(parse);//from w w w .ja  va2  s . c om
    double rootProb = parse.getProb();

    while (parseTree.size() > 0) {
        Parse p = parseTree.remove();
        if ((p.getChildCount() == 1) && (p.getProb() < 1)) {
            double prob = p.getProb();
            String pType = p.getType();
            if (StringUtils.equals(pType, ",") || StringUtils.equals(pType, ".")
                    || StringUtils.equals(pType, "!") || StringUtils.equals(pType, "?")
                    || StringUtils.equals(pType, ";") || StringUtils.equals(pType, ":")) {
                overallPunctScore += prob;
                countPunct++;
            } else {
                if (!StringUtils.equals(pType, "TOP") && !StringUtils.equals(pType, "S")) {
                    // string s = sentText_;
                    if ((pType.startsWith("NN")))// || p.Type.StartsWith("JJ"))
                    {
                        numOfNoms++;
                    } else {
                        if ((numOfNoms > 2) && (rootProb > -25.5))
                            numLongNominals++;
                        // _numOfNoms = 0;
                    }

                    if (prob < minSyntaxScore)
                        minSyntaxScore = prob;

                    overallSyntaxScore += prob;
                    syntaxCount++;
                }
            }
        }

        Parse[] children = p.getChildren();
        for (Parse pc : children)
            parseTree.add(pc);
    }
    overallPunctScore = (countPunct == 0) ? 0.0 : overallPunctScore / countPunct;

    ConventionsDocumentQualityHolder values = new ConventionsDocumentQualityHolder();
    values.setOverallPunctScore(overallPunctScore);
    values.setMinSyntaxScore(minSyntaxScore);
    values.setOverallSyntaxScore(overallSyntaxScore);
    values.setNumOfNoms(numOfNoms);
    values.setNumLongNominals(numLongNominals);
    values.setSyntaxCount(syntaxCount);

    return values;
}

From source file:org.opentestsystem.airose.docquality.processors.PassiveSentencesQualityProcessor.java

private boolean checkPassive(AbstractDocument doc, Parse p) {

    Queue<Parse> queue = new LinkedList<Parse>();
    queue.add(p);//from   ww w .j a  v a2  s .c o  m

    while (queue.size() > 0) {
        p = queue.remove();
        String parseType = p.getType();
        if ((parseType.length() >= 2) && StringUtils.equalsIgnoreCase(parseType.substring(0, 2), "VB")) {

            String word = p.getText().substring(p.getSpan().getStart(),
                    p.getSpan().getStart() + p.getSpan().length());

            List<String> roots = wordnet.getBaseWords(word, EnumPOS.VERB);
            if ((roots.size() > 0) && (StringUtils.endsWithIgnoreCase(roots.get(0), "be"))) {
                return true;
            } else
                return false;

        } else {
            for (Parse child : p.getChildren())
                queue.add(child);
        }
    }
    return false;
}