org.opentestsystem.airose.docquality.processors.PassiveSentencesQualityProcessor.java Source code

Java tutorial

Introduction

Here is the source code for org.opentestsystem.airose.docquality.processors.PassiveSentencesQualityProcessor.java

Source

/*******************************************************************************
 * Copyright (c) 2013 American Institutes for Research
 * 
 * This file is part of AIROSE.
 * 
 * AIROSE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 * 
 * AIROSE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with AIROSE.  If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/
package org.opentestsystem.airose.docquality.processors;

import java.util.List;
import java.util.Queue;
import java.util.LinkedList;
import opennlp.tools.parser.Parse;

import org.apache.commons.lang.StringUtils;
import org.opentestsystem.airose.common.abstractdocument.AbstractDocument;
import org.opentestsystem.airose.common.abstractdocument.EnumPOS;
import org.opentestsystem.airose.db.entities.DocumentQuality;
import org.opentestsystem.airose.docquality.EnumDocumentQualityAttributes;
import org.opentestsystem.airose.wordnet.EssayScorerWordNetJAWS;

public class PassiveSentencesQualityProcessor extends AbstractDocumentQualityProcessor {
    private SentenceCountQualityProcessor _sentenceCounter = new SentenceCountQualityProcessor();
    private EssayScorerWordNetJAWS wordnet = EssayScorerWordNetJAWS.getInstace();

    public PassiveSentencesQualityProcessor(DocumentQuality quality) {
        super(quality);
    }

    @Override
    public double assignQualityWeight(AbstractDocument doc) {
        double sentenceCount = _sentenceCounter.assignQualityWeight(doc);
        if (sentenceCount > 0) {
            double passiveSentences = 0;
            Parse[] parses = doc.getParse();
            for (Parse p : parses) {
                if (checkPassive(doc, p))
                    ++passiveSentences;
            }
            return passiveSentences / sentenceCount;
        }
        return 0;
    }

    @Override
    public EnumDocumentQualityAttributes getDocumentQualityAttributeType() {
        return EnumDocumentQualityAttributes.PASSIVE_SENTENCES_PROPORTION;
    }

    private boolean checkPassive(AbstractDocument doc, Parse p) {

        Queue<Parse> queue = new LinkedList<Parse>();
        queue.add(p);

        while (queue.size() > 0) {
            p = queue.remove();
            String parseType = p.getType();
            if ((parseType.length() >= 2) && StringUtils.equalsIgnoreCase(parseType.substring(0, 2), "VB")) {

                String word = p.getText().substring(p.getSpan().getStart(),
                        p.getSpan().getStart() + p.getSpan().length());

                List<String> roots = wordnet.getBaseWords(word, EnumPOS.VERB);
                if ((roots.size() > 0) && (StringUtils.endsWithIgnoreCase(roots.get(0), "be"))) {
                    return true;
                } else
                    return false;

            } else {
                for (Parse child : p.getChildren())
                    queue.add(child);
            }
        }
        return false;
    }
}