gedi.lfc.LfcAlignedReadsProcessor.java Source code

Java tutorial

Introduction

Here is the source code for gedi.lfc.LfcAlignedReadsProcessor.java

Source

/**
 * 
 *    Copyright 2017 Florian Erhard
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 * 
 */
package gedi.lfc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;

import org.apache.commons.math3.distribution.BetaDistribution;

import gedi.core.data.annotation.Transcript;
import gedi.core.data.reads.AlignedReadsData;
import gedi.core.data.reads.ContrastMapping;
import gedi.core.processing.GenomicRegionProcessor;
import gedi.core.processing.OverlapMode;
import gedi.core.processing.ProcessorContext;
import gedi.core.region.GenomicRegion;
import gedi.core.region.GenomicRegionStorage;
import gedi.core.region.ImmutableReferenceGenomicRegion;
import gedi.core.region.MissingInformationIntronInformation;
import gedi.core.region.MutableReferenceGenomicRegion;
import gedi.util.ArrayUtils;
import gedi.util.datastructure.tree.redblacktree.IntervalTree;
import gedi.util.datastructure.tree.redblacktree.SimpleInterval;
import gedi.util.io.text.LineOrientedFile;

public class LfcAlignedReadsProcessor implements GenomicRegionProcessor {

    // TODO: once table framework is there, do not produce an output file but write into a table of the context! (a table output processor will then do the trick)

    private ContrastMapping before;
    private ContrastMapping after;
    private Downsampling downsampling;
    private double credi = 0.05;

    private boolean allreads = false;

    private LineOrientedFile out;

    double[] total;
    double[] buff;
    private int minCond = -1;
    private GenomicRegionStorage<Transcript> transcripts;

    public LfcAlignedReadsProcessor(ContrastMapping contrast, Downsampling downsampling, LineOrientedFile out) {
        this.before = contrast;
        this.downsampling = downsampling;
        this.out = out;

        //      if (contrast.getNumMergedConditions()!=2) 
        //         throw new RuntimeException("Must be binary contrast!");

    }

    public LfcAlignedReadsProcessor(ContrastMapping before, ContrastMapping after, Downsampling downsampling,
            LineOrientedFile out) {
        this.before = before;
        this.after = after;
        this.downsampling = downsampling;
        this.out = out;
    }

    public LfcAlignedReadsProcessor setAllreads(boolean allreads) {
        this.allreads = allreads;
        return this;
    }

    @Override
    public void begin(ProcessorContext context) throws IOException {
        out.startWriting();
        if (allreads) {
            out.writef("Gene\tLocation\tMode");
            if (minCond > -1)
                out.writef("\twith reads");
            ContrastMapping contr = after == null ? before : after;
            for (int i = 0; i < contr.getNumMergedConditions(); i++)
                out.writef("\t%s", contr.getMappedName(i));
            out.writeLine(transcripts != null ? "\tTranscripts" : "");

            total = new double[contr.getNumMergedConditions()];
            buff = new double[contr.getNumMergedConditions()];
        } else if (multimode()) {
            out.writef("Gene");
            ContrastMapping contr = after == null ? before : after;
            for (int i = 0; i < contr.getNumMergedConditions(); i++)
                out.writef("\t%s", contr.getMappedName(i));
            out.writeLine();

            total = new double[contr.getNumMergedConditions()];
            buff = new double[contr.getNumMergedConditions()];

        } else {
            out.writef("Gene\talpha\tbeta\t%.3g credible\tlog2 fold change\t%.3g credible\n", 0.5 * credi,
                    1 - 0.5 * credi);

            total = new double[2];
            buff = new double[2];
        }
    }

    public LfcAlignedReadsProcessor forceMultiMode() {
        setCredible(Double.NaN);
        return this;
    }

    public LfcAlignedReadsProcessor setCredible(double credi) {
        this.credi = credi;
        return this;
    }

    public LfcAlignedReadsProcessor setTranscripts(GenomicRegionStorage<Transcript> transcripts) {
        this.transcripts = transcripts;
        return this;
    }

    public GenomicRegionProcessor setMinConditionsWithReads(int minCond) {
        this.minCond = minCond;
        return this;
    }

    private boolean multimode() {
        return Double.isNaN(credi) || (after == null && before.getNumMergedConditions() != 2)
                || (after != null && after.getNumMergedConditions() != 2);
    }

    @Override
    public void beginRegion(MutableReferenceGenomicRegion<?> region, ProcessorContext context) {
        Arrays.fill(total, 0);
        Arrays.fill(buff, 0);
    }

    public void setBuffer(double[] buff) {
        this.total = buff;
    }

    @Override
    public void read(MutableReferenceGenomicRegion<?> region, MutableReferenceGenomicRegion<AlignedReadsData> read,
            ProcessorContext context) throws IOException {

        //      if (region.getRegion().contains(read.getRegion())){
        // compute downsampled and add

        //      if (read.getRegion().getTotalLength()>100) return;

        if (after == null)
            downsampling.getDownsampled(read.getData(), before, buff);
        else
            downsampling.getDownsampled(read.getData(), before, after, buff);

        //         if (read.getData().getNumConditions()==16) 
        //            System.out.println(read);
        //         else
        //         for (int i=0; i<read.getData().getNumConditions(); i++) {
        //            if (i/8==1 || i/8==3) {
        //               if (read.getData().getTotalCount(i)>0) {
        //                  System.out.print(read.getReference()+":"+read.getRegion()+" [");
        //                  for (int c=0; c<read.getData().getNumConditions(); c++) {
        //                     if (c/8==1 || c/8==3) {
        //                        if (c>8) System.out.print(", ");
        //                        System.out.print(read.getData().getTotalCount(c));
        //                     }
        //                  }
        //                  System.out.println("] "+read.getData().getMultiplicity(0));
        //                  break;
        //               }
        //            }
        //         }

        int wr = 0;
        if (minCond > 0) {
            for (int i = 0; i < buff.length; i++)
                if (buff[i] > 0)
                    wr++;
        }

        if (wr >= minCond)
            ArrayUtils.add(total, buff);

        if (allreads && wr >= minCond) {

            String mode = "";
            for (OverlapMode m : OverlapMode.values())
                if (m.test(region.getRegion(), context.get(ProcessorContext.EXON_TREE), read.getRegion())) {
                    mode = m.name();
                    break;
                }

            out.writef("%s\t%s:%s\t%s", region.getData(), read.getReference().toString(),
                    read.getRegion().toRegionString(), mode);
            if (minCond > -1)
                out.writef("\t%d", wr);
            for (int i = 0; i < buff.length; i++)
                out.writef("\t%.1f", buff[i]);

            // find all compatible transcripts
            if (transcripts != null) {
                out.writef("\t");
                int n = 0;
                for (ImmutableReferenceGenomicRegion<Transcript> tr : transcripts
                        .getReferenceRegionsIntersecting(read.getReference(), read.getRegion())) {
                    if (compatible(tr.getRegion(),
                            read.getRegion() instanceof MissingInformationIntronInformation
                                    ? ((MissingInformationIntronInformation) read.getRegion())
                                            .getInformationGenomicRegions()
                                    : new GenomicRegion[] { read.getRegion() }))
                        out.writef(n++ > 0 ? ",%s" : "%s", tr.getData().getTranscriptId());
                }
            }

            out.writeLine();
        }
        //         System.out.println(Arrays.toString(buff)+"\t"+read.getReference()+":"+read.getRegion()+"\t"+read.getData());

        //      }
    }

    private boolean compatible(GenomicRegion region, GenomicRegion[] info) {
        for (GenomicRegion i : info)
            if (!region.containsUnspliced(i))
                return false;
        return true;
    }

    @Override
    public void endRegion(MutableReferenceGenomicRegion<?> region, ProcessorContext context) throws IOException {
        if (allreads) {
        } else if (multimode()) {
            out.writef("%s", region.getData());
            for (int i = 0; i < total.length; i++)
                out.writef("\t%.1f", total[i]);
            out.writeLine();
        } else {
            BetaDistribution beta = new BetaDistribution(total[0] + 1, total[1] + 1);
            out.writef("%s\t%.1f\t%.1f\t%.4f\t%.4f\t%.4f\n", region.getData(), total[0] + 1, total[1] + 1,
                    pToLog2Fc(beta.inverseCumulativeProbability(0.5 * credi)),
                    pToLog2Fc((beta.getAlpha() - 1) / (beta.getAlpha() + beta.getBeta() - 2)),
                    pToLog2Fc(beta.inverseCumulativeProbability(1 - 0.5 * credi)));
        }

    }

    @Override
    public void end(ProcessorContext context) throws IOException {
        out.finishWriting();
    }

    private static double pToLog2Fc(double p) {
        if (Double.isNaN(p))
            return 0;
        return Math.log(p / (1 - p)) / Math.log(2);
    }

}