org.bgi.flexlab.gaea.tools.mapreduce.markduplicate.MarkDuplicateOptions.java Source code

Java tutorial

Introduction

Here is the source code for org.bgi.flexlab.gaea.tools.mapreduce.markduplicate.MarkDuplicateOptions.java

Source

/*******************************************************************************
 * Copyright (c) 2017, BGI-Shenzhen
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>
 *******************************************************************************/
package org.bgi.flexlab.gaea.tools.mapreduce.markduplicate;

import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.bgi.flexlab.gaea.data.mapreduce.options.HadoopOptions;
import org.bgi.flexlab.gaea.data.options.GaeaOptions;
import org.seqdoop.hadoop_bam.SAMFormat;

import java.io.IOException;
import java.util.ArrayList;

/**
 * Created by huangzhibo on 2017/4/14.
 */
public class MarkDuplicateOptions extends GaeaOptions implements HadoopOptions {
    private final static String SOFTWARE_NAME = "MarkDuplicate";
    private final static String SOFTWARE_VERSION = "1.0";

    private String input;
    private ArrayList<Path> inputFileList;
    private int inputFormat;
    private String output;
    private int outputFormat;
    private boolean outputDupRead;
    private boolean isSE;
    private int reducerNum;
    private int windowSize;
    private int extendSize;
    FileSystem fs;

    public MarkDuplicateOptions() {
        addOption("i", "input", true, "input directory [required]", true);
        addOption("I", "inputFormat", true, "input Format. 0:BAM; 1:SAM [1]");
        addOption("o", "output", true, "output directory [required]", true);
        addOption("O", "outputFormat", true, "output Format. 0:BAM; 1:SAM [0]");
        addOption("D", "outputDupRead", false, "output Duplicates reads [true]");
        addOption("S", "isSE", false, "input is SE data [false]");
        addOption("R", "reducer", true, "reducer numbers [30]");
        addOption("W", "windowSize", true, "window size that sharding the data [100000]");
        addOption("E", "extendSize", true, "The extend size (must greater than read length) [100]");
        addOption("h", "help", false, "print help information.");
        FormatHelpInfo(SOFTWARE_NAME, SOFTWARE_VERSION);

        inputFileList = new ArrayList<>();
    }

    @Override
    public void parse(String[] args) {
        try {
            cmdLine = parser.parse(options, args);
            if (cmdLine.hasOption("h")) {
                helpInfo.printHelp("Options:", options, true);
                System.exit(1);
            }
        } catch (ParseException e) {
            helpInfo.printHelp("Options:", options, true);
            System.exit(1);
        }

        if (args.length == 0 || getOptionBooleanValue("h", false)) {
            printHelpInfotmation(SOFTWARE_NAME);
            System.exit(1);
        }

        input = getOptionValue("i", null);
        inputFormat = getOptionIntValue("I", 1);
        output = getOptionValue("o", null);
        outputFormat = getOptionIntValue("O", 0);
        outputDupRead = getOptionBooleanValue("D", true);
        isSE = getOptionBooleanValue("S", false);
        reducerNum = getOptionIntValue("R", 30);
        windowSize = getOptionIntValue("W", 100000);
        extendSize = getOptionIntValue("E", 100);
    }

    @Override
    public void setHadoopConf(String[] args, Configuration conf) {
        conf.setStrings("args", args);
        Path p = new Path(this.getInput());
        try {
            fs = p.getFileSystem(conf);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        traversalInputPath(this.getInput());
    }

    @Override
    public void getOptionsFromHadoopConf(Configuration conf) {
        String[] args = conf.getStrings("args");
        this.parse(args);
    }

    private void traversalInputPath(String input) {
        Path path = new Path(input);
        try {
            if (!fs.exists(path)) {
                System.err.println("Input File Path is not exist! Please check -i var.");
                System.exit(-1);
            }
            if (fs.isFile(path)) {
                inputFileList.add(path);
            } else {
                FileStatus stats[] = fs.listStatus(path);

                for (FileStatus file : stats) {
                    Path filePath = file.getPath();

                    if (!fs.isFile(filePath)) {
                        String childPath = filePath.toString();
                        traversalInputPath(childPath);
                    } else {
                        inputFileList.add(filePath);
                    }
                }
            }
        } catch (IOException ioe) {
            throw new RuntimeException(ioe);
        }
    }

    public ArrayList<Path> getInputFileList() {
        return inputFileList;
    }

    public String getInput() {
        return input;
    }

    public SAMFormat getInputFormat() {
        return inputFormat == 0 ? SAMFormat.BAM : SAMFormat.SAM;
    }

    public String getOutput() {
        return output;
    }

    public int getOutputFormat() {
        return outputFormat;
    }

    public boolean isOutputDupRead() {
        return outputDupRead;
    }

    public boolean isSE() {
        return isSE;
    }

    public int getReducerNum() {
        return reducerNum;
    }

    public int getWindowSize() {
        return windowSize;
    }

    public int getExtendSize() {
        return extendSize;
    }
}