org.opentestsystem.airose.docprocessors.ConventionsQualityDocProcessor.java Source code

Java tutorial

Introduction

Here is the source code for org.opentestsystem.airose.docprocessors.ConventionsQualityDocProcessor.java

Source

/*******************************************************************************
 * Copyright (c) 2013 American Institutes for Research
 * 
 * This file is part of AIROSE.
 * 
 * AIROSE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 * 
 * AIROSE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with AIROSE.  If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/
package org.opentestsystem.airose.docprocessors;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.Queue;

import opennlp.tools.parser.Parse;

import org.apache.commons.lang.StringUtils;
import org.opentestsystem.airose.common.abstractdocument.AbstractDocProcessors;
import org.opentestsystem.airose.common.abstractdocument.AbstractDocument;
import org.opentestsystem.airose.common.abstractdocument.AbstractResource;
import org.opentestsystem.airose.common.abstractdocument.DocProcessorException;
import org.opentestsystem.airose.docquality.ConventionsDocumentQualityHolder;

public class ConventionsQualityDocProcessor extends AbstractDocProcessors {

    public ConventionsQualityDocProcessor(AbstractDocument doc) {
        super(doc);
    }

    @Override
    public boolean processDocument(HashMap<String, AbstractResource> resourceMap) throws DocProcessorException {

        ConventionsDocumentQualityHolder overallQualityValues = new ConventionsDocumentQualityHolder();
        for (Parse p : getDocument().getParse()) {
            ConventionsDocumentQualityHolder pValues = evaluateSyntax(p);

            overallQualityValues.setOverallPunctScore(
                    overallQualityValues.getOverallPunctScore() + pValues.getOverallPunctScore());
            overallQualityValues
                    .setMinSyntaxScore(overallQualityValues.getMinSyntaxScore() + pValues.getMinSyntaxScore());
            overallQualityValues.setOverallSyntaxScore(
                    overallQualityValues.getOverallSyntaxScore() + pValues.getOverallSyntaxScore());
            overallQualityValues.setNumOfNoms(overallQualityValues.getNumOfNoms() + pValues.getNumOfNoms());
            overallQualityValues
                    .setNumLongNominals(overallQualityValues.getNumLongNominals() + pValues.getNumLongNominals());
            overallQualityValues.setSyntaxCount(overallQualityValues.getSyntaxCount() + pValues.getSyntaxCount());
        }

        double syntaxCount = overallQualityValues.getSyntaxCount();
        if (syntaxCount != 0)
            overallQualityValues.setOverallSyntaxScore(overallQualityValues.getOverallSyntaxScore() / syntaxCount);
        else
            overallQualityValues.setOverallSyntaxScore(0);

        double sentenceCount = getDocument().getSentenceCount();
        if (sentenceCount > 0) {
            overallQualityValues.setOverallPunctScore(overallQualityValues.getOverallPunctScore() / sentenceCount);
            overallQualityValues.setMinSyntaxScore(overallQualityValues.getMinSyntaxScore() / sentenceCount);
            overallQualityValues.setAverageSyntaxNodesPerSentence(syntaxCount / sentenceCount);
        }
        getDocument().setConventionsAssessments(overallQualityValues);
        return true;
    }

    private ConventionsDocumentQualityHolder evaluateSyntax(Parse parse) {

        double overallPunctScore = 0.0;
        double minSyntaxScore = 1.0;
        double overallSyntaxScore = 0.0;

        double numOfNoms = 0;
        double numLongNominals = 0;
        double syntaxCount = 0;

        int countPunct = 0;

        Queue<Parse> parseTree = new LinkedList<Parse>();
        parseTree.add(parse);
        double rootProb = parse.getProb();

        while (parseTree.size() > 0) {
            Parse p = parseTree.remove();
            if ((p.getChildCount() == 1) && (p.getProb() < 1)) {
                double prob = p.getProb();
                String pType = p.getType();
                if (StringUtils.equals(pType, ",") || StringUtils.equals(pType, ".")
                        || StringUtils.equals(pType, "!") || StringUtils.equals(pType, "?")
                        || StringUtils.equals(pType, ";") || StringUtils.equals(pType, ":")) {
                    overallPunctScore += prob;
                    countPunct++;
                } else {
                    if (!StringUtils.equals(pType, "TOP") && !StringUtils.equals(pType, "S")) {
                        // string s = sentText_;
                        if ((pType.startsWith("NN")))// || p.Type.StartsWith("JJ"))
                        {
                            numOfNoms++;
                        } else {
                            if ((numOfNoms > 2) && (rootProb > -25.5))
                                numLongNominals++;
                            // _numOfNoms = 0;
                        }

                        if (prob < minSyntaxScore)
                            minSyntaxScore = prob;

                        overallSyntaxScore += prob;
                        syntaxCount++;
                    }
                }
            }

            Parse[] children = p.getChildren();
            for (Parse pc : children)
                parseTree.add(pc);
        }
        overallPunctScore = (countPunct == 0) ? 0.0 : overallPunctScore / countPunct;

        ConventionsDocumentQualityHolder values = new ConventionsDocumentQualityHolder();
        values.setOverallPunctScore(overallPunctScore);
        values.setMinSyntaxScore(minSyntaxScore);
        values.setOverallSyntaxScore(overallSyntaxScore);
        values.setNumOfNoms(numOfNoms);
        values.setNumLongNominals(numLongNominals);
        values.setSyntaxCount(syntaxCount);

        return values;
    }
}