Java tutorial
/******************************************************************************* * Copyright (c) 2013 American Institutes for Research * * This file is part of AIROSE. * * AIROSE is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * AIROSE is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with AIROSE. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package org.opentestsystem.airose.docprocessors; import java.util.HashMap; import java.util.LinkedList; import java.util.Queue; import opennlp.tools.parser.Parse; import org.apache.commons.lang.StringUtils; import org.opentestsystem.airose.common.abstractdocument.AbstractDocProcessors; import org.opentestsystem.airose.common.abstractdocument.AbstractDocument; import org.opentestsystem.airose.common.abstractdocument.AbstractResource; import org.opentestsystem.airose.common.abstractdocument.DocProcessorException; import org.opentestsystem.airose.docquality.ConventionsDocumentQualityHolder; public class ConventionsQualityDocProcessor extends AbstractDocProcessors { public ConventionsQualityDocProcessor(AbstractDocument doc) { super(doc); } @Override public boolean processDocument(HashMap<String, AbstractResource> resourceMap) throws DocProcessorException { ConventionsDocumentQualityHolder overallQualityValues = new ConventionsDocumentQualityHolder(); for (Parse p : getDocument().getParse()) { ConventionsDocumentQualityHolder pValues = evaluateSyntax(p); overallQualityValues.setOverallPunctScore( overallQualityValues.getOverallPunctScore() + pValues.getOverallPunctScore()); overallQualityValues .setMinSyntaxScore(overallQualityValues.getMinSyntaxScore() + pValues.getMinSyntaxScore()); overallQualityValues.setOverallSyntaxScore( overallQualityValues.getOverallSyntaxScore() + pValues.getOverallSyntaxScore()); overallQualityValues.setNumOfNoms(overallQualityValues.getNumOfNoms() + pValues.getNumOfNoms()); overallQualityValues .setNumLongNominals(overallQualityValues.getNumLongNominals() + pValues.getNumLongNominals()); overallQualityValues.setSyntaxCount(overallQualityValues.getSyntaxCount() + pValues.getSyntaxCount()); } double syntaxCount = overallQualityValues.getSyntaxCount(); if (syntaxCount != 0) overallQualityValues.setOverallSyntaxScore(overallQualityValues.getOverallSyntaxScore() / syntaxCount); else overallQualityValues.setOverallSyntaxScore(0); double sentenceCount = getDocument().getSentenceCount(); if (sentenceCount > 0) { overallQualityValues.setOverallPunctScore(overallQualityValues.getOverallPunctScore() / sentenceCount); overallQualityValues.setMinSyntaxScore(overallQualityValues.getMinSyntaxScore() / sentenceCount); overallQualityValues.setAverageSyntaxNodesPerSentence(syntaxCount / sentenceCount); } getDocument().setConventionsAssessments(overallQualityValues); return true; } private ConventionsDocumentQualityHolder evaluateSyntax(Parse parse) { double overallPunctScore = 0.0; double minSyntaxScore = 1.0; double overallSyntaxScore = 0.0; double numOfNoms = 0; double numLongNominals = 0; double syntaxCount = 0; int countPunct = 0; Queue<Parse> parseTree = new LinkedList<Parse>(); parseTree.add(parse); double rootProb = parse.getProb(); while (parseTree.size() > 0) { Parse p = parseTree.remove(); if ((p.getChildCount() == 1) && (p.getProb() < 1)) { double prob = p.getProb(); String pType = p.getType(); if (StringUtils.equals(pType, ",") || StringUtils.equals(pType, ".") || StringUtils.equals(pType, "!") || StringUtils.equals(pType, "?") || StringUtils.equals(pType, ";") || StringUtils.equals(pType, ":")) { overallPunctScore += prob; countPunct++; } else { if (!StringUtils.equals(pType, "TOP") && !StringUtils.equals(pType, "S")) { // string s = sentText_; if ((pType.startsWith("NN")))// || p.Type.StartsWith("JJ")) { numOfNoms++; } else { if ((numOfNoms > 2) && (rootProb > -25.5)) numLongNominals++; // _numOfNoms = 0; } if (prob < minSyntaxScore) minSyntaxScore = prob; overallSyntaxScore += prob; syntaxCount++; } } } Parse[] children = p.getChildren(); for (Parse pc : children) parseTree.add(pc); } overallPunctScore = (countPunct == 0) ? 0.0 : overallPunctScore / countPunct; ConventionsDocumentQualityHolder values = new ConventionsDocumentQualityHolder(); values.setOverallPunctScore(overallPunctScore); values.setMinSyntaxScore(minSyntaxScore); values.setOverallSyntaxScore(overallSyntaxScore); values.setNumOfNoms(numOfNoms); values.setNumLongNominals(numLongNominals); values.setSyntaxCount(syntaxCount); return values; } }