diffhunter.DiffHunter.java Source code

Java tutorial

Introduction

Here is the source code for diffhunter.DiffHunter.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package diffhunter;

import com.sleepycat.je.Database;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FilenameUtils;

/**
 *
 * @author hashemis
 */
public class DiffHunter {

    /**
     * @param args the command line arguments
     * @throws org.apache.commons.cli.ParseException
     * @throws java.io.IOException
     */
    public static void main(String[] args) throws ParseException, IOException {

        //String test_ = Paths.get("J:\\VishalData\\additional\\", "Sasan" + "_BDB").toAbsolutePath().toString();

        // TODO code application logic here
        /*args = new String[]
        {
        "-i", "-b", "J:\\VishalData\\additional\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted.bed", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-o", "J:\\VishalData"
        };*/

        /*args = new String[]
        {
        "-c", "-r", "J:\\VishalData\\additional\\mouse_mm9.txt", "-1", "J:\\VishalData\\Ptbp2_Adult_testis_CLIP_mm9_plus_strand_sorted_BDB", "-2", "J:\\VishalData\\Ptbp2_E18_5_cortex_CLIP_mm9_plus_strand_sorted_BDB", "-w", "200", "-s", "50", "-o", "J:\\VishalData"
        };*/
        Options options = new Options();

        // add t option
        options.addOption("i", "index", false, "Indexing BED files.");
        options.addOption("b", "bed", true, "bed file to be indexed");
        options.addOption("o", "output", true, "Folder that the index/comparison file will be created.");
        options.addOption("r", "reference", true, "Reference annotation file to be used for indexing");
        options.addOption("c", "compare", false, "Finding differences between two conditions");
        options.addOption("1", "first", true, "First sample index location");
        options.addOption("2", "second", true, "Second sample index location");
        options.addOption("w", "window", true, "Length of window for identifying differences");
        options.addOption("s", "sliding", true, "Length of sliding");

        CommandLineParser parser = new BasicParser();
        CommandLine cmd = parser.parse(options, args);

        boolean indexing = false;
        boolean comparing = false;

        //Indexing!
        if (cmd.hasOption("i")) {
            //if(cmd.hasOption("1"))
            //System.err.println("sasan");

            //System.out.println("sasa");
            indexing = true;

        } else if (cmd.hasOption("c")) {
            //System.err.println("");
            comparing = true;

        } else {
            //System.err.println("Option is not deteced.");
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("diffhunter", options);
            return;
        }

        //Indexing is selected
        //
        if (indexing == true) {
            //Since indexing is true.
            //User have to provide file for indexing.
            if (!(cmd.hasOption("o") || cmd.hasOption("r") || cmd.hasOption("b"))) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("diffhunter", options);
                return;
            }
            String bedfile_ = cmd.getOptionValue("b");
            String reference_file = cmd.getOptionValue("r");
            String folder_loc = cmd.getOptionValue("o");

            String sample_name = FilenameUtils.getBaseName(bedfile_);

            try (Database B2 = BerkeleyDB_Box.Get_BerkeleyDB(
                    Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString(), true, sample_name)) {
                Indexer indexing_ = new Indexer(reference_file);
                indexing_.Make_Index(B2, bedfile_,
                        Paths.get(folder_loc, sample_name + "_BDB").toAbsolutePath().toString());
                B2.close();

            }
        } else if (comparing == true) {
            if (!(cmd.hasOption("o") || cmd.hasOption("w") || cmd.hasOption("s") || cmd.hasOption("1")
                    || cmd.hasOption("2"))) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("diffhunter", options);
                return;
            }
            String folder_loc = cmd.getOptionValue("o");
            int window_ = Integer.parseInt(cmd.getOptionValue("w"));
            //int window_=600;

            int slide_ = Integer.parseInt(cmd.getOptionValue("s"));

            String first = cmd.getOptionValue("1").replace("_BDB", "");
            String second = cmd.getOptionValue("2").replace("_BDB", "");
            String reference_file = cmd.getOptionValue("r");
            //String folder_loc=cmd.getOptionValue("o");

            String sample_name_first = FilenameUtils.getBaseName(first);
            String sample_name_second = FilenameUtils.getBaseName(second);

            Database B1 = BerkeleyDB_Box.Get_BerkeleyDB(first + "_BDB", false, sample_name_first);
            Database B2 = BerkeleyDB_Box.Get_BerkeleyDB(second + "_BDB", false, sample_name_second);

            List<String> first_condition_genes = Files
                    .lines(Paths.get(first + "_BDB", sample_name_first + ".txt").toAbsolutePath())
                    .collect(Collectors.toList());
            List<String> second_condition_genes = Files
                    .lines(Paths.get(second + "_BDB", sample_name_second + ".txt").toAbsolutePath())
                    .collect(Collectors.toList());
            System.out.println("First and second condition are loaded!!! ");
            List<String> intersection_ = new ArrayList<>(first_condition_genes);
            intersection_.retainAll(second_condition_genes);

            BufferedWriter output = new BufferedWriter(
                    new FileWriter(Paths.get(folder_loc, "differences_" + window_ + "_s" + slide_ + "_c" + ".txt")
                            .toAbsolutePath().toString(), false));
            List<Result_Window> final_results = Collections.synchronizedList(new ArrayList<>());
            Worker_New worker_class = new Worker_New();
            worker_class.Read_Reference(reference_file);

            while (!intersection_.isEmpty()) {
                List<String> selected_genes = new ArrayList<>();
                //if (intersection_.size()<=10000){selected_genes.addAll(intersection_.subList(0, intersection_.size()));}
                //else selected_genes.addAll(intersection_.subList(0, 10000));
                if (intersection_.size() <= intersection_.size()) {
                    selected_genes.addAll(intersection_.subList(0, intersection_.size()));
                } else {
                    selected_genes.addAll(intersection_.subList(0, intersection_.size()));
                }
                intersection_.removeAll(selected_genes);
                //System.out.println("Intersection count is:"+intersection_.size());
                //final List<Result_Window> resultssss_=new ArrayList<>();
                IntStream.range(0, selected_genes.size()).parallel().forEach(i -> {
                    System.out.println(selected_genes.get(i) + "\tprocessing......");
                    String gene_of_interest = selected_genes.get(i);//"ENSG00000142657|PGD";//intersection_.get(6);////"ENSG00000163395|IGFN1";//"ENSG00000270066|SCARNA2";
                    int start = worker_class.dic_genes.get(gene_of_interest).start_loc;
                    int end = worker_class.dic_genes.get(gene_of_interest).end_loc;

                    Map<Integer, Integer> first_ = Collections.EMPTY_MAP;
                    try {
                        first_ = BerkeleyDB_Box.Get_Coord_Read(B1, gene_of_interest);
                    } catch (IOException | ClassNotFoundException ex) {
                        Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex);
                    }

                    Map<Integer, Integer> second_ = Collections.EMPTY_MAP;
                    try {
                        second_ = BerkeleyDB_Box.Get_Coord_Read(B2, gene_of_interest);
                    } catch (IOException | ClassNotFoundException ex) {
                        Logger.getLogger(DiffHunter.class.getName()).log(Level.SEVERE, null, ex);
                    }
                    List<Window> top_windows_first = worker_class.Get_Top_Windows(window_, first_, slide_);
                    List<Window> top_windows_second = worker_class.Get_Top_Windows(window_, second_, slide_);
                    //System.out.println("passed for window peak call for gene \t"+selected_genes.get(i));
                    // System.out.println("top_window_first_Count\t"+top_windows_first.size());
                    // System.out.println("top_window_second_Count\t"+top_windows_second.size());
                    if (top_windows_first.isEmpty() && top_windows_second.isEmpty()) {
                        return;
                    }

                    List<Result_Window> res_temp = new Worker_New().Get_Significant_Windows(gene_of_interest, start,
                            end, top_windows_first, top_windows_second, second_, first_, sample_name_first,
                            sample_name_second, 0.01);
                    if (!res_temp.isEmpty()) {
                        final_results.addAll(res_temp);//final_results.addAll(worker_class.Get_Significant_Windows(gene_of_interest, start, end, top_windows_first, top_windows_second, second_, first_, first_condition, second_condition, 0.01));

                    } //System.out.println(selected_genes.get(i)+"\tprocessed.");

                });

                /*selected_genes.parallelStream().forEach(i ->
                 {
                    
                    
                 });*/
                List<Double> pvals = new ArrayList<>();

                for (int i = 0; i < final_results.size(); i++) {
                    pvals.add(final_results.get(i).p_value);
                }
                List<Double> qvals = MultipleTestCorrection.benjaminiHochberg(pvals);

                System.out.println("Writing to file...");
                output.append("Gene_Symbol\tContributing_Sample\tStart\tEnd\tOddsRatio\tp_Value\tFDR");
                output.newLine();

                for (int i = 0; i < final_results.size(); i++) {
                    Result_Window item = final_results.get(i);
                    output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t"
                            + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value
                            + "\t" + qvals.get(i)); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non);
                    output.newLine();
                }

                /* for (Result_Window item : final_results)
                 {
                output.append(item.associated_gene_symbol + "\t" + item.contributing_windows + "\t" + item.start_loc + "\t" + item.end_loc + "\t" + item.oddsratio_ + "\t" + item.p_value); //+ "\t" + item.average_other_readcount_cotributing + "\t" + item.average_other_readcount_cotributing + "\t" + item.average_window_readcount_non + "\t" + item.average_other_readcount_non);
                output.newLine();
                 }
                   */
                final_results.clear();

            }
            output.close();

        }
        System.out.println("Done.");

    }

}