com.cloudera.science.pig.EBGM.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.science.pig.EBGM.java

Source

/**
 * Copyright 2011 Cloudera Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.science.pig;

import java.io.IOException;

import org.apache.commons.math.special.Gamma;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;

import com.cloudera.science.mgps.NFunction;
import com.cloudera.science.mgps.QFunction;

/**
 * A Pig UDF for calculating the Empirical Bayes Geometric Mean for the
 * multi-item association sets algorithm described in:
 * "Empirical bayes screening for multi-item associations", DuMouchel and
 * Pregibon (2001). Available from:
 * 
 * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.22.6251&rep=rep1&type=pdf
 */
public class EBGM extends EvalFunc<Double> {

    private static class DeltaFunction {
        private final double alpha;
        private final double beta;

        public DeltaFunction(double alpha, double beta) {
            this.alpha = alpha;
            this.beta = beta;
        }

        public double eval(int n, double e) {
            return Gamma.digamma(alpha + n) - Math.log(beta + e);
        }
    }

    private final DeltaFunction delta1;
    private final DeltaFunction delta2;
    private final QFunction q;

    public EBGM(String alpha1, String beta1, String alpha2, String beta2, String p) {
        this(Double.valueOf(alpha1), Double.valueOf(beta1), Double.valueOf(alpha2), Double.valueOf(beta2),
                Double.valueOf(p));
    }

    public EBGM(double alpha1, double beta1, double alpha2, double beta2, double p) {
        this.delta1 = new DeltaFunction(alpha1, beta1);
        this.delta2 = new DeltaFunction(alpha2, beta2);
        this.q = new QFunction(new NFunction(alpha1, beta1), new NFunction(alpha2, beta2), p);
    }

    public double eval(int n, double e) {
        double qval = q.eval(n, e);
        return Math.exp(qval * delta1.eval(n, e) + (1.0 - qval) * delta2.eval(n, e));
    }

    @Override
    public Double exec(Tuple input) throws IOException {
        int n = ((Number) input.get(0)).intValue();
        double e = ((Number) input.get(1)).doubleValue();
        return eval(n, e);
    }
}