Example usage for opennlp.tools.parser Parse getType

List of usage examples for opennlp.tools.parser Parse getType

Introduction

In this page you can find the example usage for opennlp.tools.parser Parse getType.

Prototype

public String getType() 

Source Link

Document

Returns the constituent label for this node of the parse.

Usage

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

public static synchronized Parse parseLine(String cleanSentence) {
    Parse parse = null;
    while (parse == null) {
        parse = OpenNLP.parseLine(cleanSentence);
        // Could not parse
        if (parse.getType().equals("INC")) {
            try {
                IOManager.getInstance().writeFirstSentence(-1, cleanSentence);
            } catch (IOException e) {
                e.printStackTrace();/*from  w  w  w.  jav  a  2  s  . c om*/
            }

            // Simplify the sentence
            for (Pattern p : SENTENCE_SIMPLIFIER) {
                String simplifiedSentence = p.matcher(cleanSentence).replaceFirst("");
                // Replace the clean sentence
                if (!simplifiedSentence.equals(cleanSentence)) {
                    cleanSentence = StringUtils.capitalize(simplifiedSentence);
                    parse = null;
                    break;
                }
            }
        }
    }
    return parse;
}

From source file:knowledgeMiner.mining.ExtractionPattern.java

/**
 * Checks if a current parse meets the current index argument POS type. If
 * so, spawns a new Extraction Pattern with the parse included.
 * //  w  ww .  jav  a2  s .c  om
 * @param currentParse
 *            The current parse.
 * @return A new EP if the current parse is a valid argument.
 */
public ExtractionPattern checkParse(Parse currentParse) {
    if (currentParse.getType().startsWith(posTypes_[index_])) {
        ExtractionPattern newEP = new ExtractionPattern(this);
        newEP.parseArgs_[index_] = currentParse;
        newEP.index_++;
        return newEP;
    }
    return null;
}

From source file:knowledgeMiner.mining.SentenceParserHeuristic.java

private String disambiguateTree(Parse parse, String[] predicateStrs, MappableConcept focusConcept,
        SortedMap<String, String> anchors, WMISocket wmi, OntologySocket cyc, MiningHeuristic heuristic,
        Collection<PartialAssertion> results) throws Exception {
    if (predicateStrs == null) {
        predicateStrs = new String[1];
        predicateStrs[0] = "";
    }//from   w w w. j a v a2  s .  c o  m

    Parse[] children = parse.getChildren();
    String type = parse.getType();
    String text = parse.getCoveredText();

    // No children? Return value
    if (children.length == 0)
        return text;

    // Recurse to 'left'
    int childIndex = 0;
    String left = disambiguateTree(children[childIndex++], Arrays.copyOf(predicateStrs, predicateStrs.length),
            focusConcept, anchors, wmi, cyc, heuristic, results);

    // If VP or PP, add to predicate
    boolean canCreate = true;
    if (left != null) {
        if (type.equals("VP"))
            predicateStrs[0] = left.trim();
        else if (type.equals("PP")) {
            // If PP, split recursion into two predicates
            predicateStrs[0] = (predicateStrs[0] + " " + left).trim();
            if (!predicateStrs[0].equals(left)) {
                predicateStrs = Arrays.copyOf(predicateStrs, predicateStrs.length + 1);
                predicateStrs[predicateStrs.length - 1] = left;
            }
        }
    } else
        canCreate = false;

    for (; childIndex < children.length; childIndex++) {
        Parse childParse = children[childIndex];
        String result = disambiguateTree(childParse, Arrays.copyOf(predicateStrs, predicateStrs.length),
                focusConcept, anchors, wmi, cyc, heuristic, results);
        if (result == null) {
            canCreate = false;
        }
    }

    if (type.equals("VP") || type.equals("PP"))
        return null;

    // Can create and we have a target and predicate(s)
    if (canCreate && type.equals("NP") && !predicateStrs[0].isEmpty()) {
        for (String predStr : predicateStrs) {
            AssertionArgument predicate = null;
            if (isCopula(predStr)) {
                predicate = CycConstants.ISA_GENLS.getConcept();
            } else {
                // TODO Figure out a safe way to parse predicates. Probably
                // need to look at the parse code again.
                // predStr = reAnchorString(predStr, anchors);
                // predicate = new TextMappedConcept(predStr, true, true);
            }

            if (predicate == null)
                continue;

            // Return the possible noun strings
            Collection<Tree<String>> nounStrs = composeAdjNounsTree(parse, anchors);
            logger_.trace("createAssertions: " + predicate.toString() + " "
                    + nounStrs.toString().replaceAll("\\\\\n", " "));

            // Recurse through the tree and build the partial assertions
            HeuristicProvenance provenance = new HeuristicProvenance(heuristic, predStr + "+" + text);
            Collection<PartialAssertion> currAssertions = recurseStringTree(predicate, focusConcept, nounStrs,
                    provenance);

            // Add the assertions
            for (PartialAssertion pa : currAssertions)
                if (!results.contains(pa)) {
                    results.add(pa);
                    canCreate = false;
                }
        }
    }

    if (!canCreate)
        return null;

    return text;
}

From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java

/**
 * Creates linked constituent annotations + POS annotations
 *
 * @param aNode/*ww w .  j  av  a  2 s  . co  m*/
 *            the source tree
 * @param aParentFS
 * @param aCreatePos
 *            sets whether to create or not to create POS tags
 * @param aCreateLemmas
 *            sets whether to create or not to create Lemmas
 * @return the child-structure (needed for recursive call only)
 */
private Annotation createConstituentAnnotationFromTree(JCas aJCas, Parse aNode, Annotation aParentFS,
        List<Token> aTokens) {
    // If the node is a word-level constituent node (== POS):
    // create parent link on token and (if not turned off) create POS tag
    if (aNode.isPosTag()) {
        Token token = getToken(aTokens, aNode.getSpan().getStart(), aNode.getSpan().getEnd());

        // link token to its parent constituent
        if (aParentFS != null) {
            token.setParent(aParentFS);
        }

        // only add POS to index if we want POS-tagging
        if (createPosTags) {
            Type posTag = mappingProvider.getTagType(aNode.getType());
            POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
            posAnno.setPosValue(internTags ? aNode.getType().intern() : aNode.getType());
            posAnno.addToIndexes();
            token.setPos((POS) posAnno);
        }

        return token;
    }
    // Check if node is a constituent node on sentence or phrase-level
    else {
        String typeName = aNode.getType();
        if (AbstractBottomUpParser.TOP_NODE.equals(typeName)) {
            typeName = "ROOT"; // in DKPro the root is ROOT, not TOP
        }

        // create the necessary objects and methods
        String constituentTypeName = CONPACKAGE + typeName;

        Type type = aJCas.getTypeSystem().getType(constituentTypeName);

        //if type is unknown, map to X-type
        if (type == null) {
            type = aJCas.getTypeSystem().getType(CONPACKAGE + "X");
        }

        Constituent constAnno = (Constituent) aJCas.getCas().createAnnotation(type, aNode.getSpan().getStart(),
                aNode.getSpan().getEnd());
        constAnno.setConstituentType(typeName);

        // link to parent
        if (aParentFS != null) {
            constAnno.setParent(aParentFS);
        }

        // Do we have any children?
        List<Annotation> childAnnotations = new ArrayList<Annotation>();
        for (Parse child : aNode.getChildren()) {
            Annotation childAnnotation = createConstituentAnnotationFromTree(aJCas, child, constAnno, aTokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        FSArray childArray = (FSArray) FSCollectionFactory.createFSArray(aJCas, childAnnotations);
        constAnno.setChildren(childArray);

        // write annotation for current node to index
        aJCas.addFsToIndexes(constAnno);

        return constAnno;
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into tokens oneline form.
 * //from  ww  w  .  j  a va2  s.c  o m
 * @param parse
 *          the parse tree
 * @param sb
 *          the stringbuilder to add the trees
 */
private void getTokens(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getTokens(children[i], sb);
        }
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * It converts a penn treebank constituent tree into Word_POS form
 * //from  w w  w .ja v a  2  s.  co m
 * @param parse
 * @param sb
 */
private void getWordType(Parse parse, StringBuilder sb) {
    if (parse.isPosTag()) {
        if (!parse.getType().equals("-NONE-")) {
            sb.append(parse.getCoveredText()).append("_").append(parse.getType()).append(" ");
        }
    } else {
        Parse children[] = parse.getChildren();
        for (int i = 0; i < children.length; i++) {
            getWordType(children[i], sb);
        }
    }
}

From source file:org.opentestsystem.airose.docprocessors.ConventionsQualityDocProcessor.java

private ConventionsDocumentQualityHolder evaluateSyntax(Parse parse) {

    double overallPunctScore = 0.0;
    double minSyntaxScore = 1.0;
    double overallSyntaxScore = 0.0;

    double numOfNoms = 0;
    double numLongNominals = 0;
    double syntaxCount = 0;

    int countPunct = 0;

    Queue<Parse> parseTree = new LinkedList<Parse>();
    parseTree.add(parse);//from   w ww . j  av  a  2  s  .  co m
    double rootProb = parse.getProb();

    while (parseTree.size() > 0) {
        Parse p = parseTree.remove();
        if ((p.getChildCount() == 1) && (p.getProb() < 1)) {
            double prob = p.getProb();
            String pType = p.getType();
            if (StringUtils.equals(pType, ",") || StringUtils.equals(pType, ".")
                    || StringUtils.equals(pType, "!") || StringUtils.equals(pType, "?")
                    || StringUtils.equals(pType, ";") || StringUtils.equals(pType, ":")) {
                overallPunctScore += prob;
                countPunct++;
            } else {
                if (!StringUtils.equals(pType, "TOP") && !StringUtils.equals(pType, "S")) {
                    // string s = sentText_;
                    if ((pType.startsWith("NN")))// || p.Type.StartsWith("JJ"))
                    {
                        numOfNoms++;
                    } else {
                        if ((numOfNoms > 2) && (rootProb > -25.5))
                            numLongNominals++;
                        // _numOfNoms = 0;
                    }

                    if (prob < minSyntaxScore)
                        minSyntaxScore = prob;

                    overallSyntaxScore += prob;
                    syntaxCount++;
                }
            }
        }

        Parse[] children = p.getChildren();
        for (Parse pc : children)
            parseTree.add(pc);
    }
    overallPunctScore = (countPunct == 0) ? 0.0 : overallPunctScore / countPunct;

    ConventionsDocumentQualityHolder values = new ConventionsDocumentQualityHolder();
    values.setOverallPunctScore(overallPunctScore);
    values.setMinSyntaxScore(minSyntaxScore);
    values.setOverallSyntaxScore(overallSyntaxScore);
    values.setNumOfNoms(numOfNoms);
    values.setNumLongNominals(numLongNominals);
    values.setSyntaxCount(syntaxCount);

    return values;
}

From source file:org.opentestsystem.airose.docquality.processors.PassiveSentencesQualityProcessor.java

private boolean checkPassive(AbstractDocument doc, Parse p) {

    Queue<Parse> queue = new LinkedList<Parse>();
    queue.add(p);//from   w w  w . j  a v a 2 s  .  co m

    while (queue.size() > 0) {
        p = queue.remove();
        String parseType = p.getType();
        if ((parseType.length() >= 2) && StringUtils.equalsIgnoreCase(parseType.substring(0, 2), "VB")) {

            String word = p.getText().substring(p.getSpan().getStart(),
                    p.getSpan().getStart() + p.getSpan().length());

            List<String> roots = wordnet.getBaseWords(word, EnumPOS.VERB);
            if ((roots.size() > 0) && (StringUtils.endsWithIgnoreCase(roots.get(0), "be"))) {
                return true;
            } else
                return false;

        } else {
            for (Parse child : p.getChildren())
                queue.add(child);
        }
    }
    return false;
}