Example usage for opennlp.tools.parser Parse getChildren

Introduction

In this page you can find the example usage for opennlp.tools.parser Parse getChildren.

Prototype

public Parse[] getChildren()

Source Link

Document

Returns the child constituents of this constituent .

Usage

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

/**
 * Pairs the nouns and adjectives together to produce a number of result
 * text fragments to be resolved to concepts.
 *
 * @param parse/*from w  w w.  j  a  v a  2  s.  c  o m*/
 *            The parse to process and pair.
 * @param anchors
 *            The anchor map.
 * @param existingAnchorTrees
 *            The anchor trees already added to the results (for reuse and
 *            subtree-ing)
 * @return A collection of possible mappable entities composed of at least
 *         one noun and possible adjectives (with sub-adjectives).
 */
private Collection<Tree<String>> pairNounAdjs(Parse parse, SortedMap<String, String> anchors,
        Map<String, Tree<String>> existingAnchorTrees) {
    Collection<Tree<String>> results = new ArrayList<>();
    boolean createNewNounSet = false;
    ArrayList<String> nounPhrases = new ArrayList<>();
    Parse[] children = parse.getChildren();
    for (int i = children.length - 1; i >= 0; i--) {
        String childType = children[i].getType();
        String childText = children[i].getCoveredText();
        if (childType.startsWith("NN") || childType.equals("NP")) {
            // Note the noun, adding it to the front of the existing NP.
            if (createNewNounSet)
                nounPhrases.clear();
            String existingNounPhrase = "";
            if (!nounPhrases.isEmpty())
                existingNounPhrase = nounPhrases.get(nounPhrases.size() - 1);
            String np = (childText + " " + existingNounPhrase).trim();
            nounPhrases.add(np);

            // Add to the tree (if not a pure anchor)
            if (!anchors.containsKey(np))
                results.add(new Tree<String>(reAnchorString(np, anchors)));
        } else if (childType.startsWith("JJ") || childType.equals("ADJP")) {
            // Only process if we have an NP
            if (!nounPhrases.isEmpty()) {
                // For every nounPhrase
                StringBuilder adjective = new StringBuilder();
                for (int j = i; children[j].getType().startsWith("JJ")
                        || children[j].getType().equals("ADJP"); j++) {
                    // Build adjective combinations
                    if (adjective.length() != 0)
                        adjective.append(" ");
                    adjective.append(children[j].getCoveredText());
                    for (String np : nounPhrases) {
                        // Create the tree (with sub adjective tree)
                        String adjNP = adjective + " " + np;
                        Tree<String> adjP = null;
                        // Check for an existing anchor tree
                        if (existingAnchorTrees.containsKey(adjNP))
                            adjP = existingAnchorTrees.get(adjNP);
                        else
                            adjP = new Tree<String>(reAnchorString(adjNP, anchors));
                        if (!anchors.containsKey(adjective.toString()))
                            adjP.addSubValue(reAnchorString(adjective.toString(), anchors));

                        // Add to the tree
                        results.add(adjP);
                    }
                }
            }
            createNewNounSet = true;
        } else {
            createNewNounSet = true;
        }
    }
    return results;
}

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

private String disambiguateTree(Parse parse, String[] predicateStrs, MappableConcept focusConcept,
        SortedMap<String, String> anchors, WMISocket wmi, OntologySocket cyc, MiningHeuristic heuristic,
        Collection<PartialAssertion> results) throws Exception {
    if (predicateStrs == null) {
        predicateStrs = new String[1];
        predicateStrs[0] = "";
    }/*from w ww  .  j  a v  a2  s. co m*/

    Parse[] children = parse.getChildren();
    String type = parse.getType();
    String text = parse.getCoveredText();

    // No children? Return value
    if (children.length == 0)
        return text;

    // Recurse to 'left'
    int childIndex = 0;
    String left = disambiguateTree(children[childIndex++], Arrays.copyOf(predicateStrs, predicateStrs.length),
            focusConcept, anchors, wmi, cyc, heuristic, results);

    // If VP or PP, add to predicate
    boolean canCreate = true;
    if (left != null) {
        if (type.equals("VP"))
            predicateStrs[0] = left.trim();
        else if (type.equals("PP")) {
            // If PP, split recursion into two predicates
            predicateStrs[0] = (predicateStrs[0] + " " + left).trim();
            if (!predicateStrs[0].equals(left)) {
                predicateStrs = Arrays.copyOf(predicateStrs, predicateStrs.length + 1);
                predicateStrs[predicateStrs.length - 1] = left;
            }
        }
    } else
        canCreate = false;

    for (; childIndex < children.length; childIndex++) {
        Parse childParse = children[childIndex];
        String result = disambiguateTree(childParse, Arrays.copyOf(predicateStrs, predicateStrs.length),
                focusConcept, anchors, wmi, cyc, heuristic, results);
        if (result == null) {
            canCreate = false;
        }
    }

    if (type.equals("VP") || type.equals("PP"))
        return null;

    // Can create and we have a target and predicate(s)
    if (canCreate && type.equals("NP") && !predicateStrs[0].isEmpty()) {
        for (String predStr : predicateStrs) {
            AssertionArgument predicate = null;
            if (isCopula(predStr)) {
                predicate = CycConstants.ISA_GENLS.getConcept();
            } else {
                // TODO Figure out a safe way to parse predicates. Probably
                // need to look at the parse code again.
                // predStr = reAnchorString(predStr, anchors);
                // predicate = new TextMappedConcept(predStr, true, true);
            }

            if (predicate == null)
                continue;

            // Return the possible noun strings
            Collection<Tree<String>> nounStrs = composeAdjNounsTree(parse, anchors);
            logger_.trace("createAssertions: " + predicate.toString() + " "
                    + nounStrs.toString().replaceAll("\\\\\n", " "));

            // Recurse through the tree and build the partial assertions
            HeuristicProvenance provenance = new HeuristicProvenance(heuristic, predStr + "+" + text);
            Collection<PartialAssertion> currAssertions = recurseStringTree(predicate, focusConcept, nounStrs,
                    provenance);

            // Add the assertions
            for (PartialAssertion pa : currAssertions)
                if (!results.contains(pa)) {
                    results.add(pa);
                    canCreate = false;
                }
        }
    }

    if (!canCreate)
        return null;

    return text;
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into tokens oneline form.
 * /*from  w  w  w  .j  a v  a 2 s.  c om*/
 * @param parse
 *          the parse tree
 * @param sb
 *          the stringbuilder to add the trees
 */
private void getTokens(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getTokens(children[i], sb);
        }
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into Word_POS form
 * /*from  w w  w.  j  a v a2s.  co  m*/
 * @param parse
 * @param sb
 */
private void getWordType(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append("_").append(parse.getType()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getWordType(children[i], sb);
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java

/**
 * Creates linked constituent annotations + POS annotations
 *
 * @param aNode/*from   ww  w  .  ja v a2 s  .co  m*/
 *            the source tree
 * @param aParentFS
 * @param aCreatePos
 *            sets whether to create or not to create POS tags
 * @param aCreateLemmas
 *            sets whether to create or not to create Lemmas
 * @return the child-structure (needed for recursive call only)
 */
private Annotation createConstituentAnnotationFromTree(JCas aJCas, Parse aNode, Annotation aParentFS,
        List<Token> aTokens) {
    // If the node is a word-level constituent node (== POS):
    // create parent link on token and (if not turned off) create POS tag
    if (aNode.isPosTag()) {
        Token token = getToken(aTokens, aNode.getSpan().getStart(), aNode.getSpan().getEnd());

        // link token to its parent constituent
        if (aParentFS != null) {
            token.setParent(aParentFS);
        }

        // only add POS to index if we want POS-tagging
        if (createPosTags) {
            Type posTag = mappingProvider.getTagType(aNode.getType());
            POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
            posAnno.setPosValue(internTags ? aNode.getType().intern() : aNode.getType());
            posAnno.addToIndexes();
            token.setPos((POS) posAnno);
        }

        return token;
    }
    // Check if node is a constituent node on sentence or phrase-level
    else {
        String typeName = aNode.getType();
        if (AbstractBottomUpParser.TOP_NODE.equals(typeName)) {
            typeName = "ROOT"; // in DKPro the root is ROOT, not TOP
        }

        // create the necessary objects and methods
        String constituentTypeName = CONPACKAGE + typeName;

        Type type = aJCas.getTypeSystem().getType(constituentTypeName);

        //if type is unknown, map to X-type
        if (type == null) {
            type = aJCas.getTypeSystem().getType(CONPACKAGE + "X");
        }

        Constituent constAnno = (Constituent) aJCas.getCas().createAnnotation(type, aNode.getSpan().getStart(),
                aNode.getSpan().getEnd());
        constAnno.setConstituentType(typeName);

        // link to parent
        if (aParentFS != null) {
            constAnno.setParent(aParentFS);
        }

        // Do we have any children?
        List<Annotation> childAnnotations = new ArrayList<Annotation>();
        for (Parse child : aNode.getChildren()) {
            Annotation childAnnotation = createConstituentAnnotationFromTree(aJCas, child, constAnno, aTokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        FSArray childArray = (FSArray) FSCollectionFactory.createFSArray(aJCas, childAnnotations);
        constAnno.setChildren(childArray);

        // write annotation for current node to index
        aJCas.addFsToIndexes(constAnno);

        return constAnno;
    }
}

From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java

public static boolean allChildNodesArePOSTags(Parse p) {
    Parse[] subParses = p.getChildren();
    for (int pi = 0; pi < subParses.length; pi++)
        if (!((Parse) subParses[pi]).isPosTag())
            return false;
    return true;//from  w w w.  ja v a  2 s  .  co m
}

From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java

public ArrayList<String> getNounPhrases(Parse p) {
    ArrayList<String> nounphrases = new ArrayList<String>();

    Parse[] subparses = p.getChildren();
    for (int pi = 0; pi < subparses.length; pi++) {

        if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi])) {
            Span _span = subparses[pi].getSpan();
            nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
        } else if (!((Parse) subparses[pi]).isPosTag())
            nounphrases.addAll(getNounPhrases(subparses[pi]));
    }/*from w  w  w.ja  va 2s .  co  m*/

    return nounphrases;
}

From source file:opennlp.tools.apps.relevanceVocabs.PhraseProcessor.java

public ArrayList<String> getVerbPhrases(Parse p) {
    ArrayList<String> verbPhrases = new ArrayList<String>();

    Parse[] subparses = p.getChildren();
    for (int pi = 0; pi < subparses.length; pi++) {

        if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi])) {
            Span _span = subparses[pi].getSpan();
            verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
        } else if (!((Parse) subparses[pi]).isPosTag())
            verbPhrases.addAll(getNounPhrases(subparses[pi]));
    }/*w w w. jav  a 2s  . com*/

    return verbPhrases;
}

From source file:org.opentestsystem.airose.docprocessors.ConventionsQualityDocProcessor.java

private ConventionsDocumentQualityHolder evaluateSyntax(Parse parse) {

    double overallPunctScore = 0.0;
    double minSyntaxScore = 1.0;
    double overallSyntaxScore = 0.0;

    double numOfNoms = 0;
    double numLongNominals = 0;
    double syntaxCount = 0;

    int countPunct = 0;

    Queue<Parse> parseTree = new LinkedList<Parse>();
    parseTree.add(parse);//from w w w .ja  va2  s . c om
    double rootProb = parse.getProb();

    while (parseTree.size() > 0) {
        Parse p = parseTree.remove();
        if ((p.getChildCount() == 1) && (p.getProb() < 1)) {
            double prob = p.getProb();
            String pType = p.getType();
            if (StringUtils.equals(pType, ",") || StringUtils.equals(pType, ".")
                    || StringUtils.equals(pType, "!") || StringUtils.equals(pType, "?")
                    || StringUtils.equals(pType, ";") || StringUtils.equals(pType, ":")) {
                overallPunctScore += prob;
                countPunct++;
            } else {
                if (!StringUtils.equals(pType, "TOP") && !StringUtils.equals(pType, "S")) {
                    // string s = sentText_;
                    if ((pType.startsWith("NN")))// || p.Type.StartsWith("JJ"))
                    {
                        numOfNoms++;
                    } else {
                        if ((numOfNoms > 2) && (rootProb > -25.5))
                            numLongNominals++;
                        // _numOfNoms = 0;
                    }

                    if (prob < minSyntaxScore)
                        minSyntaxScore = prob;

                    overallSyntaxScore += prob;
                    syntaxCount++;
                }
            }
        }

        Parse[] children = p.getChildren();
        for (Parse pc : children)
            parseTree.add(pc);
    }
    overallPunctScore = (countPunct == 0) ? 0.0 : overallPunctScore / countPunct;

    ConventionsDocumentQualityHolder values = new ConventionsDocumentQualityHolder();
    values.setOverallPunctScore(overallPunctScore);
    values.setMinSyntaxScore(minSyntaxScore);
    values.setOverallSyntaxScore(overallSyntaxScore);
    values.setNumOfNoms(numOfNoms);
    values.setNumLongNominals(numLongNominals);
    values.setSyntaxCount(syntaxCount);

    return values;
}

From source file:org.opentestsystem.airose.docquality.processors.PassiveSentencesQualityProcessor.java

private boolean checkPassive(AbstractDocument doc, Parse p) {

    Queue<Parse> queue = new LinkedList<Parse>();
    queue.add(p);//from   ww w .j a  v a2  s .c o  m

    while (queue.size() > 0) {
        p = queue.remove();
        String parseType = p.getType();
        if ((parseType.length() >= 2) && StringUtils.equalsIgnoreCase(parseType.substring(0, 2), "VB")) {

            String word = p.getText().substring(p.getSpan().getStart(),
                    p.getSpan().getStart() + p.getSpan().length());

            List<String> roots = wordnet.getBaseWords(word, EnumPOS.VERB);
            if ((roots.size() > 0) && (StringUtils.endsWithIgnoreCase(roots.get(0), "be"))) {
                return true;
            } else
                return false;

        } else {
            for (Parse child : p.getChildren())
                queue.add(child);
        }
    }
    return false;
}