it.crs4.seal.recab.RecabTableReducer.java Source code

Java tutorial

Introduction

Here is the source code for it.crs4.seal.recab.RecabTableReducer.java

Source

// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal.  If not, see <http://www.gnu.org/licenses/>.

package it.crs4.seal.recab;

import it.crs4.seal.common.IMRContext;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;

public class RecabTableReducer {
    public static final String CONF_SMOOTHING = "seal.recab.smoothing";
    public static final float CONF_SMOOTHING_DEFAULT = 0.0f;

    public static final String CONF_MAX_QSCORE = "seal.recab.max-qscore";
    public static final int CONF_MAX_QSCORE_DEFAULT = 40;

    public static final double PERROR_EPS = 0.0001;

    protected double smoothing = CONF_SMOOTHING_DEFAULT;
    protected int maxQscore = CONF_MAX_QSCORE_DEFAULT;
    protected ObservationCount sum = new ObservationCount();
    protected StringBuilder sbuilder = new StringBuilder(100);
    protected Text outputValue = new Text();

    public void setup(Configuration conf) {
        smoothing = conf.getFloat(CONF_SMOOTHING, CONF_SMOOTHING_DEFAULT);
        if (smoothing < 0.0)
            throw new IllegalArgumentException(CONF_SMOOTHING + " can't be less than 0");

        maxQscore = conf.getInt(CONF_MAX_QSCORE, CONF_MAX_QSCORE_DEFAULT);
        if (maxQscore <= 0)
            throw new IllegalArgumentException(CONF_MAX_QSCORE + " must be greater than 0");
    }

    public void reduce(Text key, Iterable<ObservationCount> values, IMRContext<Text, Text> context)
            throws IOException, InterruptedException {
        sum.set(0, 0);
        sbuilder.delete(0, sbuilder.length());

        for (ObservationCount counts : values)
            sum.addToThis(counts);

        if (sum.getObservations() > 0) {
            outputValue.set(key);
            sbuilder.append(sum.getObservations()).append(RecabTable.TableDelim).append(sum.getMismatches())
                    .append(RecabTable.TableDelim).append(empiricalQuality(sum));

            String tmp = sbuilder.toString();
            outputValue.append(tmp.getBytes(RecabTable.ASCII), 0, tmp.length());

            context.write(null, outputValue);
        }
    }

    protected int empiricalQuality(ObservationCount observations) {
        double perror = (observations.getMismatches() + smoothing) / (observations.getObservations() + smoothing)
                + PERROR_EPS;
        int score = (int) Math.rint(-10.0 * Math.log10(perror));
        // bound score.  GATK bounds quality between 1 and MAX_REASONABLE_Q_SCORE
        if (score > maxQscore)
            score = maxQscore;
        else if (score <= 0)
            score = 1;

        return score;
    }
}