org.dllearner.learningproblems.ScoreThreeValued.java Source code

Introduction

Here is the source code for org.dllearner.learningproblems.ScoreThreeValued.java
Source

/**
 * Copyright (C) 2007 - 2016, Jens Lehmann
 *
 * This file is part of DL-Learner.
 *
 * DL-Learner is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * DL-Learner is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.dllearner.learningproblems;

import java.text.DecimalFormat;
import java.util.Set;

import org.semanticweb.owlapi.model.OWLEntity;

import com.google.common.collect.Sets;

/**
 * Computes the score (a negative value) by comparing the classification results
 * with ideal results.
 * 
 * TODO: The implementation is not very efficient, because some things are
 * only computed to be able to present the score results. This means that
 * it would be better to compute only the necessary computations and do
 * the other ones only when they are needed to calculate statistical values.
 * 
 * @author Jens Lehmann
 *
 */
public class ScoreThreeValued<T extends OWLEntity> extends ScorePosNeg<T> {

    private static final long serialVersionUID = -1780084688122949685L;

    public enum ScoreMethod {
        POSITIVE, FULL
    }

    // configuration options
    private double accuracyPenalty;
    private double errorPenalty;
    private boolean penaliseNeutralExamples;
    private double percentPerLengthUnit;

    // potential configuration options (not implemented as such, but one
    // could so)
    private boolean showCorrectClassifications = false;
    private static ScoreMethod scoreMethod = ScoreMethod.POSITIVE;

    private Set<T> posClassified;
    private Set<T> neutClassified;
    private Set<T> negClassified;
    private Set<T> posExamples;
    private Set<T> neutExamples;
    private Set<T> negExamples;

    private Set<T> posAsNeg;
    private Set<T> negAsPos;
    private Set<T> posAsNeut;
    private Set<T> neutAsPos;
    private Set<T> neutAsNeg;
    private Set<T> negAsNeut;
    private Set<T> posAsPos;
    private Set<T> negAsNeg;
    private Set<T> neutAsNeut;

    private double score;
    private double accuracy;
    private double accuracyOnExamples;
    private double accuracyOnPositiveExamples;
    private double errorRate;

    private int nrOfExamples;
    private int conceptLength;

    public ScoreThreeValued(int conceptLength, double accuracyPenalty, double errorPenalty,
            boolean penaliseNeutralExamples, double percentPerLengthUnit, Set<T> posClassified,
            Set<T> neutClassified, Set<T> negClassified, Set<T> posExamples, Set<T> neutExamples,
            Set<T> negExamples) {
        this.conceptLength = conceptLength;
        this.accuracyPenalty = accuracyPenalty;
        this.errorPenalty = errorPenalty;
        this.penaliseNeutralExamples = penaliseNeutralExamples;
        this.percentPerLengthUnit = percentPerLengthUnit;
        this.posClassified = posClassified;
        this.neutClassified = neutClassified;
        this.negClassified = negClassified;
        this.posExamples = posExamples;
        this.neutExamples = neutExamples;
        this.negExamples = negExamples;
        nrOfExamples = posExamples.size() + negExamples.size();
        computeClassificationMatrix();
        computeStatistics();
    }

    private void computeClassificationMatrix() {
        posAsNeg = Sets.intersection(posExamples, negClassified);
        negAsPos = Sets.intersection(negExamples, posClassified);
        posAsNeut = Sets.intersection(posExamples, neutClassified);
        neutAsPos = Sets.intersection(neutExamples, posClassified);
        neutAsNeg = Sets.intersection(neutExamples, negClassified);
        negAsNeut = Sets.intersection(negExamples, neutClassified);
        // die 3 Berechnungen sind nicht so wichtig fr die Punktzahl, d.h. falls
        // es Performance bringt, dann kann man sie auch ausgliedern
        posAsPos = Sets.intersection(posExamples, posClassified);
        negAsNeg = Sets.intersection(negExamples, negClassified);
        neutAsNeut = Sets.intersection(neutExamples, neutClassified);
    }

    private void computeStatistics() {
        score = -posAsNeg.size() * errorPenalty - negAsPos.size() * errorPenalty
                - posAsNeut.size() * accuracyPenalty;

        if (scoreMethod == ScoreMethod.FULL)
            score -= negAsNeut.size() * accuracyPenalty;

        if (penaliseNeutralExamples)
            score -= (neutAsPos.size() * accuracyPenalty + neutAsNeg.size() * accuracyPenalty);

        // TODO: man knnte hier statt error penalty auch accuracy penalty
        // nehmen
        double worstValue = nrOfExamples * errorPenalty;
        // ergibt Zahl zwischen -1 und 0
        score = score / worstValue;
        score -= percentPerLengthUnit * conceptLength;

        // die folgenden Berechnungen knnten aus Performancegrnden auch
        // ausgegliedert werden
        // int domainSize = abox.domain.size();
        int numberOfExamples = posExamples.size() + negExamples.size();
        int domainSize = numberOfExamples + neutExamples.size();
        int correctlyClassified = posAsPos.size() + negAsNeg.size() + neutAsNeut.size();
        int correctOnExamples = posAsPos.size() + negAsNeg.size();
        int errors = posAsNeg.size() + negAsPos.size();

        // Accuracy = Quotient von richtig klassifizierten durch Anzahl Domainelemente
        accuracy = (double) correctlyClassified / domainSize;

        // Accuracy on Examples = Quotient von richtig klassifizierten durch Anzahl pos.
        // und neg. Beispiele
        accuracyOnExamples = (double) correctOnExamples / numberOfExamples;

        accuracyOnPositiveExamples = (double) posAsPos.size() / posExamples.size();

        // Error = Quotient von komplett falsch klassifizierten durch Anzahl pos.
        // und neg. Beispiele
        errorRate = (double) errors / numberOfExamples;
    }

    @Override
    public double getScoreValue() {
        return score;
    }

    /**
     * @return number of cases of individuals that got exactly the same
     *         classification with both definitions
     */
    public int getMatchRate() {
        return posAsPos.size() + negAsNeg.size();
    }

    /**
     * @return amount of individuals for which class-membership w.r.t.
     *         the given query could not determined using the induced
     *         definition, while they actually belong (do not belong) to the
     *         query concept
     */
    public int getOmmissionErrorRate() {
        return posAsNeut.size() + negAsNeut.size();
    }

    /**
     * @return amount of individuals found not to belong to the query concept
     *         according to the induced definition, while they actually belong
     *         to it and vice-versa
     */
    public int getCommissionErrorRate() {
        return posAsNeg.size() + negAsPos.size();
    }

    /**
     * @return amount of individuals found to belong or not to belong to the
     *         query concept according to the induced definition, while either
     *         case is not logically derivable from the knowledge base with the
     *         original definition
     */
    public int getInductionRate() {
        return neutAsPos.size() + neutAsNeg.size();
    }

    @Override
    public String toString() {
        DecimalFormat df = new DecimalFormat("0.00");
        String str = "";
        str += "score method ";
        if (scoreMethod == ScoreMethod.FULL)
            str += "full";
        else
            str += "positive";
        if (!penaliseNeutralExamples)
            str += " (neutral examples not penalized)";
        str += "\n";
        if (showCorrectClassifications) {
            str += "Correctly classified:\n";
            str += "  positive --> positive: " + posAsPos + "\n";
            str += "  neutral --> neutral: " + neutAsNeut + "\n";
            str += "  negative --> negative: " + negAsNeg + "\n";
        }
        str += "Inaccurately classified (penalty of " + df.format(accuracyPenalty) + " per instance):\n";
        str += "  positive --> neutral: " + posAsNeut + "\n";
        if (penaliseNeutralExamples) {
            str += "  neutral --> positive: " + neutAsPos + "\n";
            str += "  neutral --> negative: " + neutAsNeg + "\n";
        }
        if (scoreMethod == ScoreMethod.FULL)
            str += "  negative --> neutral: " + negAsNeut + "\n";
        str += "Classification errors (penalty of " + df.format(errorPenalty) + " per instance):\n";
        str += "  positive --> negative: " + posAsNeg + "\n";
        str += "  negative --> positive: " + negAsPos + "\n";
        str += "Statistics:\n";
        str += "  Score: " + df.format(score) + "\n";
        str += "  Accuracy: " + df.format(accuracy * 100) + "%\n";
        str += "  Accuracy on examples: " + df.format(accuracyOnExamples * 100) + "%\n";
        str += "  Accuracy on positive examples: " + df.format(accuracyOnPositiveExamples * 100) + "%\n";
        str += "  Error rate: " + df.format(errorRate * 100) + "%\n";
        return str;
    }

    public Set<T> getNegClassified() {
        return negClassified;
    }

    public Set<T> getPosClassified() {
        return posClassified;
    }

    @Override
    public Set<T> getCoveredNegatives() {
        return negAsPos;
    }

    @Override
    public Set<T> getCoveredPositives() {
        return posAsPos;
    }

    @Override
    public Set<T> getNotCoveredPositives() {
        return posAsNeg;
    }

    /* (non-Javadoc)
     * @see org.dllearner.core.Score#getNotCoveredNegatives()
     */
    @Override
    public Set<T> getNotCoveredNegatives() {
        return negAsNeg;
    }

    @Override
    public ScorePosNeg<T> getModifiedLengthScore(int newLength) {
        return new ScoreThreeValued<>(newLength, accuracyPenalty, errorPenalty, penaliseNeutralExamples,
                percentPerLengthUnit, posClassified, neutClassified, negClassified, posExamples, neutExamples,
                negExamples);
    }

    /* (non-Javadoc)
     * @see org.dllearner.core.Score#getAccuracy()
     */
    @Override
    public double getAccuracy() {
        return accuracy;
    }
}