Example usage for opennlp.tools.parser Parse getCoveredText

List of usage examples for opennlp.tools.parser Parse getCoveredText

Introduction

In this page you can find the example usage for opennlp.tools.parser Parse getCoveredText.

Prototype

public String getCoveredText() 

Source Link

Usage

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

/**
 * Compose the hierarchical set of noun-adjective combinations from the NP.
 * First gets all anchors, then processes each JJ and NN combination, adding
 * them directly or as subvalues of the anchors.
 * // www  . j av  a2s . c  om
 * @param parse
 *            The parse to compose the strings from.
 * @param anchors
 *            The anchors to insert during composition.
 * @return A Hierarchical Weighted Set of strings representing the order of
 *         strings that should be attempted to assert.
 */
public Collection<Tree<String>> composeAdjNounsTree(Parse parse, SortedMap<String, String> anchors) {
    String text = parse.getCoveredText();
    Collection<Tree<String>> results = new ArrayList<>();

    // Add all visible anchors
    // Keep track of which text is in what anchors
    Map<String, Tree<String>> anchorMap = new HashMap<>();
    results.addAll(extractAnchors(text, anchors, anchorMap));

    // Work backwards through the children, adding nouns, then adjectives to
    // the nouns
    results.addAll(pairNounAdjs(parse, anchors, anchorMap));
    return results;
}

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

private String disambiguateTree(Parse parse, String[] predicateStrs, MappableConcept focusConcept,
        SortedMap<String, String> anchors, WMISocket wmi, OntologySocket cyc, MiningHeuristic heuristic,
        Collection<PartialAssertion> results) throws Exception {
    if (predicateStrs == null) {
        predicateStrs = new String[1];
        predicateStrs[0] = "";
    }//w  ww  . j a v  a 2s .  c o m

    Parse[] children = parse.getChildren();
    String type = parse.getType();
    String text = parse.getCoveredText();

    // No children? Return value
    if (children.length == 0)
        return text;

    // Recurse to 'left'
    int childIndex = 0;
    String left = disambiguateTree(children[childIndex++], Arrays.copyOf(predicateStrs, predicateStrs.length),
            focusConcept, anchors, wmi, cyc, heuristic, results);

    // If VP or PP, add to predicate
    boolean canCreate = true;
    if (left != null) {
        if (type.equals("VP"))
            predicateStrs[0] = left.trim();
        else if (type.equals("PP")) {
            // If PP, split recursion into two predicates
            predicateStrs[0] = (predicateStrs[0] + " " + left).trim();
            if (!predicateStrs[0].equals(left)) {
                predicateStrs = Arrays.copyOf(predicateStrs, predicateStrs.length + 1);
                predicateStrs[predicateStrs.length - 1] = left;
            }
        }
    } else
        canCreate = false;

    for (; childIndex < children.length; childIndex++) {
        Parse childParse = children[childIndex];
        String result = disambiguateTree(childParse, Arrays.copyOf(predicateStrs, predicateStrs.length),
                focusConcept, anchors, wmi, cyc, heuristic, results);
        if (result == null) {
            canCreate = false;
        }
    }

    if (type.equals("VP") || type.equals("PP"))
        return null;

    // Can create and we have a target and predicate(s)
    if (canCreate && type.equals("NP") && !predicateStrs[0].isEmpty()) {
        for (String predStr : predicateStrs) {
            AssertionArgument predicate = null;
            if (isCopula(predStr)) {
                predicate = CycConstants.ISA_GENLS.getConcept();
            } else {
                // TODO Figure out a safe way to parse predicates. Probably
                // need to look at the parse code again.
                // predStr = reAnchorString(predStr, anchors);
                // predicate = new TextMappedConcept(predStr, true, true);
            }

            if (predicate == null)
                continue;

            // Return the possible noun strings
            Collection<Tree<String>> nounStrs = composeAdjNounsTree(parse, anchors);
            logger_.trace("createAssertions: " + predicate.toString() + " "
                    + nounStrs.toString().replaceAll("\\\\\n", " "));

            // Recurse through the tree and build the partial assertions
            HeuristicProvenance provenance = new HeuristicProvenance(heuristic, predStr + "+" + text);
            Collection<PartialAssertion> currAssertions = recurseStringTree(predicate, focusConcept, nounStrs,
                    provenance);

            // Add the assertions
            for (PartialAssertion pa : currAssertions)
                if (!results.contains(pa)) {
                    results.add(pa);
                    canCreate = false;
                }
        }
    }

    if (!canCreate)
        return null;

    return text;
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into tokens oneline form.
 * //from w w  w. j  av  a  2  s .c o m
 * @param parse
 *          the parse tree
 * @param sb
 *          the stringbuilder to add the trees
 */
private void getTokens(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getTokens(children[i], sb);
        }
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into Word_POS form
 * /*  www  .ja v  a2s. c  o  m*/
 * @param parse
 * @param sb
 */
private void getWordType(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append("_").append(parse.getType()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getWordType(children[i], sb);
        }
    }
}