gedi.atac.Atac.java Source code

Java tutorial

Introduction

Here is the source code for gedi.atac.Atac.java

Source

/**
 * 
 *    Copyright 2017 Florian Erhard
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 * 
 */
package gedi.atac;

import gedi.core.data.annotation.Transcript;
import gedi.core.data.numeric.GenomicNumericProvider.PositionNumericIterator;
import gedi.core.data.numeric.diskrmq.DiskGenomicNumericBuilder;
import gedi.core.data.numeric.diskrmq.DiskGenomicNumericProvider;
import gedi.core.data.reads.AlignedReadsData;
import gedi.core.data.reads.ContrastMapping;
import gedi.core.data.reads.ReadCountMode;
import gedi.core.processing.GenomicRegionProcessor;
import gedi.core.processing.ProcessorContext;
import gedi.core.processing.sources.ProcessorSource;
import gedi.core.reference.Chromosome;
import gedi.core.reference.ReferenceSequence;
import gedi.core.reference.ReferenceSequenceConversion;
import gedi.core.reference.Strand;
import gedi.core.region.ArrayGenomicRegion;
import gedi.core.region.GenomicRegionPosition;
import gedi.core.region.GenomicRegionStorage;
import gedi.core.region.ImmutableReferenceGenomicRegion;
import gedi.core.region.MutableReferenceGenomicRegion;
import gedi.util.ArrayUtils;
import gedi.util.FileUtils;
import gedi.util.FunctorUtils;
import gedi.util.StringUtils;
import gedi.util.datastructure.array.NumericArray;
import gedi.util.datastructure.array.NumericArray.NumericArrayType;
import gedi.util.datastructure.collections.doublecollections.DoubleArrayList;
import gedi.util.datastructure.collections.intcollections.IntArrayList;
import gedi.util.dynamic.DynamicObject;
import gedi.util.functions.ExtendedIterator;
import gedi.util.io.text.LineIterator;
import gedi.util.io.text.LineOrientedFile;
import gedi.util.mutable.MutableMonad;
import gedi.util.userInteraction.progress.ConsoleProgress;
import gedi.util.userInteraction.progress.Progress;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import java.util.stream.StreamSupport;

import org.apache.commons.math3.distribution.ChiSquaredDistribution;
import org.apache.commons.math3.ml.clustering.CentroidCluster;
import org.apache.commons.math3.ml.clustering.Clusterable;
import org.apache.commons.math3.ml.clustering.KMeansPlusPlusClusterer;
import org.apache.commons.math3.special.Gamma;

public class Atac {

    public static void analyzeNfkb(DiskGenomicNumericProvider rmq, GenomicRegionStorage<?> tfbs) throws Exception {

        int size = tfbs.getRandomEntry().getRegion().getTotalLength();
        double[] buff = new double[size + 200];

        tfbs.iterateReferenceGenomicRegions().forEachRemaining(rgr -> {
            if (rgr.getRegion().getTotalLength() != size || rgr.getRegion().getNumParts() != 1)
                throw new RuntimeException();

            ArrayGenomicRegion reg = rgr.getRegion().extendAll(100, 100);
            PositionNumericIterator it = rmq.iterateValues(rgr.getReference().toPlusStrand(), reg);
            while (it.hasNext())
                buff[it.nextInt() - reg.getStart()] += it.getValue(0);
            it = rmq.iterateValues(rgr.getReference().toMinusStrand(), reg);
            while (it.hasNext())
                buff[it.nextInt() - reg.getStart()] += it.getValue(0);

            System.out.printf("%s:%s", rgr.getReference().toStrandIndependent(), rgr.getRegion().toRegionString());
            for (int i = 0; i < buff.length; i++)
                System.out.printf("\t%.0f", buff[i]);
            System.out.println();
            Arrays.fill(buff, 0);
        });

    }

    public static void analyzePromotors(GenomicRegionStorage<AlignedReadsData> storage,
            GenomicRegionStorage<String> promotors) throws Exception {

        int offset = 4;
        int numCond = storage.getRandomRecord().getNumConditions();

        ProcessorSource<String> p = new ProcessorSource<String>();
        p.setProgress(new ConsoleProgress());
        p.process(storage, ReferenceSequenceConversion.none, promotors, new GenomicRegionProcessor() {

            int[][] tn5;
            LineOrientedFile[] out = new LineOrientedFile[numCond];

            @Override
            public void begin(ProcessorContext context) throws Exception {
                for (int i = 0; i < out.length; i++) {
                    out[i] = new LineOrientedFile("promotor."
                            + storage.getMetaData().get("conditions").getEntry(i).getEntry("name").asString()
                            + ".csv");
                    out[i].startWriting();
                }
            }

            @Override
            public void beginRegion(MutableReferenceGenomicRegion<?> region, ProcessorContext context)
                    throws Exception {
                if (tn5 == null || tn5[0].length != region.getRegion().getTotalLength())
                    tn5 = new int[numCond][region.getRegion().getTotalLength()];
                else
                    for (int i = 0; i < tn5.length; i++)
                        Arrays.fill(tn5[i], 0);
            }

            @Override
            public void read(MutableReferenceGenomicRegion<?> region,
                    MutableReferenceGenomicRegion<AlignedReadsData> read, ProcessorContext context)
                    throws Exception {

                for (int c = 0; c < numCond; c++) {
                    int v = read.getData().getTotalCountForConditionInt(c, ReadCountMode.All);
                    addValue(region,
                            GenomicRegionPosition.Start.position(read.getReference(), read.getRegion(), offset), c,
                            v);
                    addValue(region,
                            GenomicRegionPosition.Stop.position(read.getReference(), read.getRegion(), -offset), c,
                            v);
                }
            }

            private void addValue(MutableReferenceGenomicRegion<?> region, int position, int condition, int value) {
                if (region.getRegion().contains(position)) {
                    position = region.getRegion().induce(position);
                    if (region.getReference().getStrand() == Strand.Minus)
                        position = region.getRegion().getTotalLength() - 1 - position;
                    tn5[condition][position] += value;
                }
            }

            @Override
            public void endRegion(MutableReferenceGenomicRegion<?> region, ProcessorContext context)
                    throws Exception {

                for (int i = 0; i < out.length; i++) {
                    out[i].writef(region.getData().toString());
                    for (int p = 0; p < tn5[i].length; p++)
                        out[i].writef("\t%d", tn5[i][p]);
                    out[i].writeLine();
                }

            }

            @Override
            public void end(ProcessorContext context) throws Exception {
                for (int i = 0; i < out.length; i++) {
                    out[i].finishWriting();
                }
            }
        });

    }

    public static void buildFseqBed(String path, GenomicRegionStorage<? extends AlignedReadsData> storage)
            throws IOException {

        int offset = 4;
        LineOrientedFile out = new LineOrientedFile(path);
        out.startWriting();
        ConsoleProgress progress = new ConsoleProgress();
        progress.init();

        storage.iterateMutableReferenceGenomicRegions(Chromosome.obtain("chr9"))
                .forEachRemaining(new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

                    @Override
                    public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                        try {
                            int v = mrgr.getData().getTotalCountOverallInt(ReadCountMode.All);
                            if (v != 0) {
                                writeBed(out, mrgr.getReference(), GenomicRegionPosition.Start
                                        .position(mrgr.getReference(), mrgr.getRegion(), offset), "+", v);
                                writeBed(out, mrgr.getReference(), GenomicRegionPosition.Stop
                                        .position(mrgr.getReference(), mrgr.getRegion(), -offset), "-", v);
                            }
                            progress.setDescription(mrgr.getReference() + ":" + mrgr.getRegion());
                            progress.incrementProgress();
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }

                });

        progress.finish();
        out.finishWriting();
    }

    private static void writeBed(LineOrientedFile out, ReferenceSequence ref, int pos, String strand, int c)
            throws IOException {
        for (int i = 0; i < c; i++)
            out.writef("%s\t%d\t%d\t.\t.\t%s\n", ref.getName(), pos, pos + 25, strand);
    }

    public static void buildInsertionIndices(String prefix,
            GenomicRegionStorage<? extends AlignedReadsData> storage, boolean shortLong) throws IOException {

        int ncond = storage.getRandomRecord().getNumConditions();
        for (int i = 0; i < ncond; i++) {
            String name = storage.getMetaData().get("conditions").getEntry(i).getEntry("name").asString();
            if (name == null || name.length() == 0)
                name = i + "";

            if (shortLong) {
                String path = prefix + "." + name + ".short.rmq";
                if (!new File(path).exists()) {
                    System.out.println("Building short fragments " + name + " into: " + path);
                    buildInsertionIndex(path, true, false, storage, i, true, true);
                } else {
                    System.out.println("Skip " + name + ", file already exists: " + path);
                }

                path = prefix + "." + name + ".long.rmq";
                if (!new File(path).exists()) {
                    System.out.println("Building long fragments " + name + " into: " + path);
                    buildInsertionIndex(path, false, true, storage, i, true, true);
                } else {
                    System.out.println("Skip " + name + ", file already exists: " + path);
                }
            }

            String path = prefix + "." + name + ".rmq";
            if (!new File(path).exists()) {
                System.out.println("Building all fragments " + name + " into: " + path);
                buildInsertionIndex(path, true, true, storage, i, true, true);
            } else {
                System.out.println("Skip " + name + ", file already exists: " + path);
            }

        }
    }

    public static void buildCoverageIndices(String prefix, GenomicRegionStorage<? extends AlignedReadsData> storage,
            boolean shortLong) throws IOException {

        int ncond = storage.getRandomRecord().getNumConditions();
        for (int i = 0; i < ncond; i++) {
            String name = storage.getMetaData().get("conditions").getEntry(i).getEntry("name").asString();
            if (name == null || name.length() == 0)
                name = i + "";

            if (shortLong) {
                String path = prefix + "." + name + ".short.rmq";
                if (!new File(path).exists()) {
                    System.out.println("Building short fragments " + name + " into: " + path);
                    buildCoverageIndex(path, true, false, storage, i);
                } else {
                    System.out.println("Skip " + name + ", file already exists: " + path);
                }

                path = prefix + "." + name + ".long.rmq";
                if (!new File(path).exists()) {
                    System.out.println("Building long fragments " + name + " into: " + path);
                    buildCoverageIndex(path, false, true, storage, i);
                } else {
                    System.out.println("Skip " + name + ", file already exists: " + path);
                }
            }

            String path = prefix + "." + name + ".rmq";
            if (!new File(path).exists()) {
                System.out.println("Building all fragments " + name + " into: " + path);
                buildCoverageIndex(path, true, true, storage, i);
            } else {
                System.out.println("Skip " + name + ", file already exists: " + path);
            }

        }
    }

    public static void buildInsertionIndex(String path, boolean shortFrags, boolean longFrags,
            GenomicRegionStorage<? extends AlignedReadsData> storage, int condition, boolean start, boolean stop)
            throws IOException {

        DiskGenomicNumericBuilder build = new DiskGenomicNumericBuilder(path);
        int offset = 4;
        build.setReferenceSorted(true);

        storage.ei().progress(new ConsoleProgress(), (int) storage.size(), r -> r.toLocationStringRemovedIntrons())
                .forEachRemaining(new Consumer<ImmutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

                    @Override
                    public void accept(ImmutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                        try {
                            int v = mrgr.getData().getTotalCountForConditionInt(condition, ReadCountMode.All);
                            int l = mrgr.getRegion().getTotalLength();
                            if ((l > 180 && longFrags) || (l <= 180 && shortFrags)) {
                                if (v != 0) {
                                    if (start)
                                        build.addValue(mrgr.getReference(), GenomicRegionPosition.Start
                                                .position(mrgr.getReference(), mrgr.getRegion(), offset), v);
                                    if (stop)
                                        build.addValue(mrgr.getReference(), GenomicRegionPosition.Stop
                                                .position(mrgr.getReference(), mrgr.getRegion(), -offset), v);
                                }
                            }
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }

                });

        build.build();

        FileUtils
                .writeAllText(
                        DynamicObject
                                .from("conditions",
                                        DynamicObject.from(new Object[] {
                                                storage.getMetaData().getEntry("conditions").getEntry(condition) }))
                                .toJson(),
                        new File(path + ".metadata.json"));
    }

    public static void buildCoverageIndex(String path, boolean shortFrags, boolean longFrags,
            GenomicRegionStorage<? extends AlignedReadsData> storage, int condition) throws IOException {

        DiskGenomicNumericBuilder build = new DiskGenomicNumericBuilder(path);
        int offset = 4;
        build.setReferenceSorted(true);
        NumericArray buff = NumericArray.createMemory(1, NumericArrayType.Integer);

        storage.ei().progress(new ConsoleProgress(), (int) storage.size(), r -> r.toLocationStringRemovedIntrons())
                .forEachRemaining(new Consumer<ImmutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

                    @Override
                    public void accept(ImmutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                        try {
                            int v = mrgr.getData().getTotalCountForConditionInt(condition, ReadCountMode.All);
                            int l = mrgr.getRegion().getTotalLength();
                            if ((l > 180 && longFrags) || (l <= 180 && shortFrags)) {
                                if (v != 0) {
                                    buff.setInt(0, v);
                                    build.addCoverageEx(mrgr.getReference(), mrgr.getRegion(), buff);
                                }
                            }
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }

                });

        build.build(true);

        FileUtils
                .writeAllText(
                        DynamicObject
                                .from("conditions",
                                        DynamicObject.from(new Object[] {
                                                storage.getMetaData().getEntry("conditions").getEntry(condition) }))
                                .toJson(),
                        new File(path + ".metadata.json"));
    }

    public static void buildInsertionIndex(String path, GenomicRegionStorage<? extends AlignedReadsData> storage)
            throws IOException {

        DiskGenomicNumericBuilder build = new DiskGenomicNumericBuilder(path);
        int offset = 4;
        build.setReferenceSorted(true);

        TreeSet<String> refs = new TreeSet<String>();
        storage.getReferenceSequences().forEach(r -> refs.add(r.getName()));
        ConsoleProgress p = new ConsoleProgress();

        Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>> adder = new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

            @Override
            public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                try {
                    int v = mrgr.getData().getTotalCountOverallInt(ReadCountMode.All);
                    if (v > 0) {
                        build.addValue(mrgr.getReference().toPlusStrand(),
                                GenomicRegionPosition.Start.position(mrgr.getReference(), mrgr.getRegion(), offset),
                                v);
                        build.addValue(mrgr.getReference().toPlusStrand(),
                                GenomicRegionPosition.Stop.position(mrgr.getReference(), mrgr.getRegion(), -offset),
                                v);
                    }
                    p.incrementProgress();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }

        };

        for (String n : refs) {
            p.init();
            p.setDescription(n + "+");
            storage.iterateMutableReferenceGenomicRegions(Chromosome.obtain(n, Strand.Plus))
                    .forEachRemaining(adder);
            p.setDescription(n + "-");
            storage.iterateMutableReferenceGenomicRegions(Chromosome.obtain(n, Strand.Minus))
                    .forEachRemaining(adder);
            p.finish();
        }

        build.build();
    }

    public static void perChromosomeAndLengthStatistics(GenomicRegionStorage<? extends AlignedReadsData> storage,
            String chrPath, String lenPath) throws IOException {

        LineOrientedFile chr = new LineOrientedFile(chrPath);
        LineOrientedFile len = new LineOrientedFile(lenPath);
        chr.startWriting();
        len.startWriting();

        chr.write("chromosome");
        len.write("Length");
        for (DynamicObject cond : storage.getMetaData().get("conditions").asArray()) {
            chr.writef("\t%s", cond.getEntry("name").asString());
            len.writef("\t%s", cond.getEntry("name").asString());
        }
        chr.writeLine();
        len.writeLine();

        IntArrayList[] lenHisto = new IntArrayList[storage.getRandomRecord().getNumConditions()];
        for (int i = 0; i < lenHisto.length; i++)
            lenHisto[i] = new IntArrayList();

        for (ReferenceSequence ref : storage.getReferenceSequences()) {

            MutableMonad<long[]> count = new MutableMonad<long[]>();

            storage.iterateMutableReferenceGenomicRegions(ref)
                    .forEachRemaining(new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

                        @Override
                        public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                            if (count.Item == null)
                                count.Item = new long[mrgr.getData().getNumConditions()];
                            for (int d = 0; d < mrgr.getData().getDistinctSequences(); d++)
                                //                  if (mrgr.getData().getMultiplicity(d)==1)
                                for (int i = 0; i < count.Item.length; i++) {
                                    count.Item[i] += mrgr.getData().getCount(d, i) > 0 ? 1 : 0;
                                    lenHisto[i].increment(mrgr.getRegion().getTotalLength(),
                                            mrgr.getData().getCount(d, i) > 0 ? 1 : 0);
                                }
                        }

                    });

            if (count.Item != null) {
                chr.write(ref.toString());
                for (int i = 0; i < count.Item.length; i++)
                    chr.writef("\t%s", count.Item[i]);
                chr.writeLine();
            }

        }

        int maxLength = 0;
        for (int i = 0; i < lenHisto.length; i++)
            maxLength = Math.max(maxLength, lenHisto[i].size() - 1);

        for (int l = 1; l <= maxLength; l++) {
            len.writef("%d", l);
            for (int i = 0; i < lenHisto.length; i++)
                len.writef("\t%d", lenHisto[i].getInt(l));
            len.writeLine();
        }
        chr.finishWriting();
        len.finishWriting();
    }

    public static void lengthPerTypeStatistics(GenomicRegionStorage<? extends AlignedReadsData> storage, String out,
            String aggOut, String... typePattern) throws IOException {

        IntArrayList[][] lenHisto = new IntArrayList[typePattern.length][storage.getRandomRecord()
                .getNumConditions()];
        for (int t = 0; t < lenHisto.length; t++)
            for (int i = 0; i < lenHisto[t].length; i++)
                lenHisto[t][i] = new IntArrayList();

        Pattern[] types = new Pattern[typePattern.length];
        for (int i = 0; i < types.length; i++)
            types[i] = Pattern.compile(typePattern[i]);

        for (ReferenceSequence ref : storage.getReferenceSequences()) {

            int ty = 0;
            for (; ty < types.length && !types[ty].matcher(ref.toPlusMinusString()).find(); ty++)
                ;

            if (ty < types.length)
                System.out.println(ref + " -> " + types[ty]);
            else
                System.out.println("Skipping " + ref);
            MutableMonad<long[]> count = new MutableMonad<long[]>();

            int tyind = ty;
            if (ty < types.length)
                storage.iterateMutableReferenceGenomicRegions(ref).forEachRemaining(
                        new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

                            @Override
                            public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                                if (count.Item == null)
                                    count.Item = new long[mrgr.getData().getNumConditions()];

                                for (int i = 0; i < count.Item.length; i++) {
                                    count.Item[i] += mrgr.getData().getTotalCountForConditionInt(i,
                                            ReadCountMode.All);
                                    lenHisto[tyind][i].increment(mrgr.getRegion().getTotalLength(),
                                            mrgr.getData().getTotalCountForConditionInt(i, ReadCountMode.All));
                                }
                            }

                        });

        }

        LineOrientedFile o = new LineOrientedFile(out);
        o.startWriting();

        o.write("Type\tLength");
        for (DynamicObject cond : storage.getMetaData().get("conditions").asArray()) {
            o.writef("\t%s", cond.getEntry("name").asString());
        }
        o.writeLine();

        int maxLength = 0;
        for (int t = 0; t < lenHisto.length; t++)
            for (int i = 0; i < lenHisto.length; i++)
                maxLength = Math.max(maxLength, lenHisto[t][i].size() - 1);

        for (int t = 0; t < lenHisto.length; t++)
            for (int l = 1; l <= maxLength; l++) {
                o.writef("%s\t%d", typePattern[t], l);
                for (int i = 0; i < lenHisto[t].length; i++)
                    o.writef("\t%d", lenHisto[t][i].getInt(l));
                o.writeLine();
            }
        o.finishWriting();

        o = new LineOrientedFile(aggOut);
        o.startWriting();

        o.write("Type");
        for (DynamicObject cond : storage.getMetaData().get("conditions").asArray()) {
            o.writef("\t%s", cond.getEntry("name").asString());
        }
        o.writeLine();

        for (int t = 0; t < lenHisto.length; t++) {
            o.writef("%s", typePattern[t]);
            for (int i = 0; i < lenHisto[t].length; i++) {
                long sum = 0;
                for (int l = 1; l <= maxLength; l++)
                    sum += lenHisto[t][i].getInt(l);
                o.writef("\t%d", sum);
            }
            o.writeLine();
        }
        o.finishWriting();
    }

    public static void normalizationFactors(GenomicRegionStorage<? extends AlignedReadsData> storage,
            GenomicRegionStorage<?> peaks, String out, String peakout, String detailout, String... typePattern)
            throws IOException {

        int cond = storage.getRandomRecord().getNumConditions();
        int[][] allCounts = new int[typePattern.length][cond];
        int[][] peakCounts = new int[typePattern.length][cond];

        Pattern[] types = new Pattern[typePattern.length];
        for (int i = 0; i < types.length; i++)
            types[i] = Pattern.compile(typePattern[i]);

        new LineOrientedFile(detailout).delete();

        Set<ReferenceSequence> refs = new TreeSet<ReferenceSequence>();
        for (ReferenceSequence ref : storage.getReferenceSequences())
            refs.add(ref.toStrandIndependent());

        for (ReferenceSequence ref : refs) {

            int ty = 0;
            for (; ty < types.length && !types[ty].matcher(ref.toPlusMinusString()).find(); ty++)
                ;

            if (ty < types.length)
                System.out.println(ref + " -> " + types[ty]);
            else
                System.out.println("Skipping " + ref);

            HashMap<ImmutableReferenceGenomicRegion<?>, int[]> detail = new HashMap<ImmutableReferenceGenomicRegion<?>, int[]>();

            int tyind = ty;
            Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>> adder = new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

                @Override
                public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {

                    int f = GenomicRegionPosition.Start.position(ref, mrgr.getRegion(), 4);
                    int b = GenomicRegionPosition.Stop.position(ref, mrgr.getRegion(), -4);

                    int inpeak = 0;
                    if (StreamSupport.stream(peaks.iterateIntersectingMutableReferenceGenomicRegions(
                            ref.toStrandIndependent(), f, f + 1), false).peek(peak -> {
                                int[] c = detail.computeIfAbsent(peak.toImmutable(), x -> new int[cond]);
                                for (int i = 0; i < c.length; i++)
                                    c[i] += mrgr.getData().getTotalCountForConditionInt(i, ReadCountMode.All);
                            }).count() > 0)
                        inpeak++;

                    if (StreamSupport.stream(peaks.iterateIntersectingMutableReferenceGenomicRegions(
                            ref.toStrandIndependent(), b, b + 1), false).peek(peak -> {
                                int[] c = detail.computeIfAbsent(peak.toImmutable(), x -> new int[cond]);
                                for (int i = 0; i < c.length; i++)
                                    c[i] += mrgr.getData().getTotalCountForConditionInt(i, ReadCountMode.All);
                            }).count() > 0)
                        inpeak++;

                    for (int i = 0; i < allCounts[tyind].length; i++) {
                        allCounts[tyind][i] += mrgr.getData().getTotalCountForConditionInt(i, ReadCountMode.All);
                        if (inpeak > 0)
                            peakCounts[tyind][i] += mrgr.getData().getTotalCountForConditionInt(i,
                                    ReadCountMode.All) * inpeak;
                    }
                }

            };
            if (ty < types.length) {
                storage.iterateMutableReferenceGenomicRegions(ref).forEachRemaining(adder);
                storage.iterateMutableReferenceGenomicRegions(ref.toPlusStrand()).forEachRemaining(adder);
                storage.iterateMutableReferenceGenomicRegions(ref.toMinusStrand()).forEachRemaining(adder);
            }

            LineOrientedFile d = new LineOrientedFile(detailout);
            if (d.exists())
                d.startAppending();
            else {
                d.startWriting();
                d.write("Peak\tType");
                for (int i = 0; i < cond; i++)
                    d.writef("\t%d", i);
                d.writeLine();
            }

            for (ImmutableReferenceGenomicRegion<?> peak : detail.keySet()) {
                int[] count = detail.get(peak);
                d.writef("%s\t%s", peak.toLocationString(), typePattern[ty]);
                for (int c = 0; c < cond; c++)
                    d.writef("\t%d", count[c]);
                d.writeLine();
            }
            d.finishWriting();

        }

        LineOrientedFile o = new LineOrientedFile(out);
        o.startWriting();
        o.write("Type\tCondition Index\tCount\n");
        for (int i = 0; i < types.length; i++) {
            for (int c = 0; c < allCounts[i].length; c++) {
                o.writef("%s\t%d\t%d\n", typePattern[i], c, allCounts[i][c]);
            }
        }
        o.finishWriting();

        o = new LineOrientedFile(peakout);
        o.startWriting();
        o.write("Type\tCondition Index\tCount\n");
        for (int i = 0; i < types.length; i++) {
            for (int c = 0; c < allCounts[i].length; c++) {
                o.writef("%s\t%d\t%d\n", typePattern[i], c, peakCounts[i][c]);
            }
        }
        o.finishWriting();
    }

    private enum PeakAnnotation {
        Intergenic, GeneBody, Promotor, BidirectionalPromotor
    }

    public static void annotatePeaks(GenomicRegionStorage<Transcript> genes, GenomicRegionStorage<Transcript> trans,
            String peakFile, String out) throws IOException {
        LineIterator it = new LineOrientedFile(peakFile).lineIterator();
        LineOrientedFile o = new LineOrientedFile(out);
        o.startWriting();
        o.writef("%s\tType\tGene\n", it.next());

        while (it.hasNext()) {
            String line = it.next();
            ImmutableReferenceGenomicRegion<Object> rgr = ImmutableReferenceGenomicRegion
                    .parse(StringUtils.splitField(line, '\t', 0));

            ArrayList<ImmutableReferenceGenomicRegion<Transcript>> inter = new ArrayList<ImmutableReferenceGenomicRegion<Transcript>>();
            genes.getReferenceRegionsIntersecting(rgr.getReference().toPlusStrand(), rgr.getRegion(), inter);
            genes.getReferenceRegionsIntersecting(rgr.getReference().toMinusStrand(), rgr.getRegion(), inter);
            trans.getReferenceRegionsIntersecting(rgr.getReference().toPlusStrand(), rgr.getRegion(), inter);
            trans.getReferenceRegionsIntersecting(rgr.getReference().toMinusStrand(), rgr.getRegion(), inter);

            PeakAnnotation a = inter.size() > 0 ? PeakAnnotation.GeneBody : PeakAnnotation.Intergenic;
            Transcript tp = null;
            Transcript tm = null;

            for (ImmutableReferenceGenomicRegion<Transcript> tr : inter) {
                if (rgr.getRegion().contains(GenomicRegionPosition.FivePrime.position(tr))) {
                    if (a == PeakAnnotation.GeneBody)
                        tp = tm = null;

                    a = PeakAnnotation.Promotor;
                    if (tr.getReference().getStrand() == Strand.Plus)
                        tp = tr.getData();
                    else
                        tm = tr.getData();
                } else if (a == PeakAnnotation.GeneBody) {
                    if (tr.getReference().getStrand() == Strand.Plus)
                        tp = tr.getData();
                    else
                        tm = tr.getData();
                }
            }

            if (tp != null && tm != null && a == PeakAnnotation.Promotor)
                a = PeakAnnotation.BidirectionalPromotor;
            StringBuilder gene = new StringBuilder();
            if (tp != null)
                gene.append(tp.getGeneId());
            if (tm != null && tp == null)
                gene.append(tm.getGeneId());
            else if (tm != null && tp != null && a == PeakAnnotation.BidirectionalPromotor)
                gene.append(", ").append(tm.getGeneId());

            o.writef("%s\t%s\t%s\n", line, a.toString(), gene);
        }

        o.finishWriting();
    }

    public static void testInPeaks(GenomicRegionStorage<? extends AlignedReadsData> storage, String contrasts,
            String peakFile, String rmq, String compOut, String bicOut, String out, boolean randomizeContrasts)
            throws IOException {

        DiskGenomicNumericBuilder clusterRmq = new DiskGenomicNumericBuilder(rmq);
        LineIterator it = new LineOrientedFile(peakFile).lineIterator();
        LineOrientedFile o = new LineOrientedFile(out);
        o.startWriting();
        o.writef("%s\tComponents\tp.value\n", it.next());
        int offset = 4;

        ContrastMapping contr = new ContrastMapping();
        ExtendedIterator<String> coit = new LineOrientedFile(contrasts).lineIterator();
        if (randomizeContrasts) {
            String[] ca = coit.toArray(new String[0]);
            ArrayUtils.shuffleSlice(ca, 0, ca.length);
            coit = FunctorUtils.arrayIterator(ca);
        }
        coit.forEachRemaining(
                l -> contr.addMapping(contr.getNumOriginalConditions(), contr.getMappedIndexOrNext(l), l));

        LineOrientedFile co = new LineOrientedFile(compOut);
        co.startWriting();
        co.writef("Peak\tComponent");
        for (int i = 0; i < contr.getNumMergedConditions(); i++)
            co.writef("\t%s", contr.getMappedName(i));
        co.writeLine();

        LineOrientedFile bico = new LineOrientedFile(bicOut);
        bico.startWriting();
        bico.writef("Peak\tk\tBIC\n");

        Progress pr = new ConsoleProgress();
        pr.init();
        int peakCount = (int) new LineOrientedFile(peakFile).lineIterator().count() - 1;
        pr.setCount(peakCount);

        while (it.hasNext()) {
            String line = it.next();
            ImmutableReferenceGenomicRegion<Object> peak = ImmutableReferenceGenomicRegion
                    .parse(StringUtils.splitField(line, '\t', 0));

            pr.setDescription(peak.toString());
            pr.incrementProgress();

            HashMap<FixedDoublePoint, Integer> pToPos = new HashMap<FixedDoublePoint, Integer>();
            FixedDoublePoint[] m = new FixedDoublePoint[peak.getRegion().getTotalLength()];
            for (int i = 0; i < m.length; i++) {
                m[i] = new FixedDoublePoint(new double[contr.getNumMergedConditions()]);
                pToPos.put(m[i], peak.getRegion().map(i));
            }

            Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>> adder = new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

                @Override
                public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                    try {

                        int start = GenomicRegionPosition.Start.position(mrgr.getReference(), mrgr.getRegion(),
                                offset);
                        if (peak.getRegion().contains(start))
                            addDownsampled(contr, m[peak.getRegion().induce(start)].getPoint(),
                                    mrgr.getData().getTotalCountsForConditions(ReadCountMode.All));

                        int stop = GenomicRegionPosition.Stop.position(mrgr.getReference(), mrgr.getRegion(),
                                -offset);
                        if (peak.getRegion().contains(stop))
                            addDownsampled(contr, m[peak.getRegion().induce(stop)].getPoint(),
                                    mrgr.getData().getTotalCountsForConditions(ReadCountMode.All));
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }

                private void addDownsampled(ContrastMapping contr, double[] re, double[] c) {
                    double max = ArrayUtils.max(c);
                    if (max > 0)
                        ArrayUtils.mult(c, 1 / max);
                    for (int i = 0; i < c.length; i++)
                        if (contr.getMappedIndex(i) > -1)
                            re[contr.getMappedIndex(i)] += c[i];
                }
            };
            storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toPlusStrand(),
                    peak.getRegion()).forEachRemaining(adder);
            storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toMinusStrand(),
                    peak.getRegion()).forEachRemaining(adder);

            //         double[] total = new double[cond];
            //         for (int i=0; i<m.length; i++) 
            //            for (int j=0; j<cond; j++)
            //               total[j]+=m[i].getPoint()[j];
            //         ArrayUtils.normalize(total);
            //         
            //         double ll = 0;
            //         for (int i=0; i<m.length; i++)
            //            ll+=ddirichlet1(m[i].getPoint(), total);
            //         

            DoubleArrayList ll = new DoubleArrayList();
            ll.add(0);
            DoubleArrayList bic = new DoubleArrayList();
            bic.add(0);

            ArrayList<FixedDoublePoint> list = new ArrayList<FixedDoublePoint>();
            for (FixedDoublePoint p : m)
                if (ArrayUtils.sum(p.getPoint()) > 0)
                    list.add(p);

            List<CentroidCluster<FixedDoublePoint>> ocl = null;
            double op = 0;

            for (int k = 1; k < Math.min(list.size(), 50); k++) {

                KMeansPlusPlusClusterer<FixedDoublePoint> kmeans = new KMeansPlusPlusClusterer<FixedDoublePoint>(k);
                List<CentroidCluster<FixedDoublePoint>> cl = kmeans.cluster(list);

                double cll = 0;
                for (CentroidCluster<FixedDoublePoint> c : cl) {
                    double[] total = new double[contr.getNumMergedConditions()];
                    Arrays.fill(total, 1);
                    for (FixedDoublePoint p : c.getPoints())
                        for (int j = 0; j < contr.getNumMergedConditions(); j++)
                            total[j] += p.getPoint()[j];
                    ArrayUtils.normalize(total);

                    for (FixedDoublePoint p : c.getPoints())
                        cll += ddirichlet1(p.getPoint(), total);
                }

                // LLR test
                double LLR = 2 * cll - 2 * ll.getLastDouble();
                double p = 1
                        - new ChiSquaredDistribution(contr.getNumMergedConditions() - 1).cumulativeProbability(LLR);

                bic.add(-2 * cll + 2 * (contr.getNumMergedConditions() - 1) * k);
                bico.writef("%s\t%d\t%.1f\n", peak.toLocationString(), k, bic.getLastDouble());

                // bonferroni correction
                p = p * peakCount;

                if (p > 0.01) {
                    if (ocl.size() > 1) {
                        for (int i = 0; i < ocl.size(); i++) {
                            co.writef("%s\t%d", peak.toLocationString(), i);
                            double[] total = new double[contr.getNumMergedConditions()];
                            Arrays.fill(total, 1);
                            for (FixedDoublePoint pp : ocl.get(i).getPoints()) {
                                clusterRmq.addValue(peak.getReference(), pToPos.get(pp).intValue(), (byte) i);
                                for (int j = 0; j < contr.getNumMergedConditions(); j++)
                                    total[j] += pp.getPoint()[j];
                            }
                            ArrayUtils.normalize(total);
                            for (int c = 0; c < contr.getNumMergedConditions(); c++)
                                co.writef("\t%.4f", total[c]);
                            co.writeLine();

                        }
                    }
                    break;
                }

                ll.add(cll);
                ocl = cl;
                op = p;
            }

            o.writef("%s\t%d\t%.4g\n", line, ll.size() - 1, ll.size() == 2 ? Double.NaN : op);
        }

        pr.finish();
        o.finishWriting();
        co.finishWriting();

        clusterRmq.build();
    }

    private static double ddirichlet1(double[] alpha1, double[] p) {
        double re = 0;
        double asum = 0;
        double gsum = 0;
        for (int i = 0; i < p.length; i++) {
            re += Math.log(p[i]) * (alpha1[i]);
            asum += alpha1[i] + 1;
            gsum += Gamma.logGamma(alpha1[i] + 1);
        }
        return re + Gamma.logGamma(asum) - gsum;
    }

    private static class FixedDoublePoint implements Clusterable, Serializable {

        /** Serializable version identifier. */
        private static final long serialVersionUID = 3946024775784901369L;

        /** Point coordinates. */
        private final double[] point;

        /**
         * Build an instance wrapping an double array.
         * <p>
         * The wrapped array is referenced, it is <em>not</em> copied.
         *
         * @param point the n-dimensional point in double space
         */
        public FixedDoublePoint(final double[] point) {
            this.point = point;
        }

        /**
         * Build an instance wrapping an integer array.
         * <p>
         * The wrapped array is copied to an internal double array.
         *
         * @param point the n-dimensional point in integer space
         */
        public FixedDoublePoint(final int[] point) {
            this.point = new double[point.length];
            for (int i = 0; i < point.length; i++) {
                this.point[i] = point[i];
            }
        }

        /** {@inheritDoc} */
        public double[] getPoint() {
            return point;
        }

        /** {@inheritDoc} */
        @Override
        public String toString() {
            return Arrays.toString(point);
        }

    }
}