de.tudarmstadt.ukp.dkpro.bigdata.collocations.AssociationMetrics.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.dkpro.bigdata.collocations.AssociationMetrics.java

Source

/*******************************************************************************
 * Copyright 2013
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tudarmstadt.ukp.dkpro.bigdata.collocations;

import org.apache.commons.lang.NotImplementedException;

/**
 * Implementation of association metrics from Stefan Everts' www.collocations.de
 * 
 * @author zorn
 * 
 */
public class AssociationMetrics {
    // observed frequencies
    double o11;
    double o12;
    double o21;
    double o22;
    // marginals
    double r1;
    double r2;
    double c1;
    double c2;
    double n;
    // expected values
    double e11 = 0.0;
    double e12 = 0.0;
    double e21 = 0.0;
    double e22 = 0.0;
    // observed frequencies
    double O11;
    double O12;
    double O21;
    double O22;
    // marginals
    double R1;
    double R2;
    double C1;
    double C2;
    double N;

    public void init(long o11, long o12, long o21, long o22) {
        this.o11 = o11;
        this.o12 = o12;
        this.o21 = o21;
        this.o22 = o22;
        // calculate marginals
        R1 = o11 + o12;
        R2 = o21 + o22;
        C1 = o11 + o21;
        C2 = o12 + o22;
        N = R1 + R2;
        // pre-type convert, not sure whether this really is necessary, need to benchmark
        r1 = R1;
        r2 = R2;
        c1 = C1;
        c2 = C2;
        n = N;
        // calculate expected values;
        e11 = (r1 * c1) / N;
        e12 = (r1 * c2) / N;
        e21 = (r2 * c1) / N;
        e22 = (r2 * c2) / N;

    }

    public double pmi() {
        return Math.log(e11 / (e12) * (e21)) / Math.log(2);
    }

    public double chisquared() {
        return (N * (o11 - e11) * N * (o11 - e11)) / (e11 * e22);
    }

    public double chisquared_corr() {
        return ((N * (o11 * o22 - e11) * N * (o12 - o21)) - (n / 2)) / (r1 * r2 * c1 * c2);
    }

    public double mutual_information() {
        return Math.log(o11 / e11);
    }

    public double odds_ratio_discounted() {
        return ((o11 + 0.5) * (o22 + 0.5)) / ((o12 + 0.5) * (o21 + 0.5));
    }

    public double dice() {
        return (2 * o11 / (r1 + c1));
    }

    public double ms() {
        return Math.min(o11 / r1, o11 / c1);
    }

    public double gmean() {
        return o11 / Math.sqrt(N * e11);
    }

    public double local_mi() {
        return 0;
    }

    public double average_mi() {
        return 0;
    }

    // public double fisher() {
    // double sum=0;
    // for (long k=(long) o11;k<Math.max(r1,c1);k++) {
    // sum+=(binomial((long) c1,k)*binomial((long) c2,(long)r1-k))/binomial((long)N,(long)r1);
    // }
    // }
    //
    //
    // private double binomial(long n, long k)
    // {
    // // TODO Auto-generated method stub
    // return 0;
    // }

}