org.bgi.flexlab.gaea.tools.mapreduce.fastqqualitycontrol.FastqQualityControlOptions.java Source code

Java tutorial

Introduction

Here is the source code for org.bgi.flexlab.gaea.tools.mapreduce.fastqqualitycontrol.FastqQualityControlOptions.java

Source

/*******************************************************************************
 * Copyright (c) 2017, BGI-Shenzhen
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>
 *******************************************************************************/
package org.bgi.flexlab.gaea.tools.mapreduce.fastqqualitycontrol;

import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.bgi.flexlab.gaea.data.mapreduce.options.HadoopOptions;
import org.bgi.flexlab.gaea.data.options.GaeaOptions;

public class FastqQualityControlOptions extends GaeaOptions implements HadoopOptions {
    private final static String SOFTWARE_NAME = "FilterQualityControl";
    private final static String SOFTWARE_VERSION = "1.0";
    private final static int LENGTH_QUALITY = 64;

    public FastqQualityControlOptions() {
        addOption("1", "in1fq", true, "the first fastq");
        addOption("2", "in2fq", true, "the second fastq");
        addOption("3", "adapter1", true, "the first adapter list file");
        addOption("4", "adapter2", true, "the second adapter list file");
        addOption("l", "lowQuality", true, "low quality value (default:5)");
        addOption("Q", "qualitySystem", true,
                "Quality score schema(default:1)\n0: NCBI/Sanger or Illumina 1.8 and later; "
                        + " \n1: Illumina Pipeline 1.5 to 1.7;  \n2: Illumina Pipeline 1.3 and 1.4;  "
                        + "\n3: Illumina Pipeline 1.2 and earlier");
        addOption("C", "sanger", false, "change base quality to sanger");
        addOption("q", "qualityRate", true, "low quality rate(default:0.5)");
        addOption("t", "readType", true,
                "read name type.(default:0)\n0: reads_xxx/1\n1: reads_xx: 1:N:XX\n2: reads_xx");
        addOption("N", "NRate", true, "Maximum N rate(default:0.1)");
        addOption("s", "trim", true,
                "trim some bp of the read's head and tail, they means: (read1's head and tail and read2's head and tail  [0,0,0,0]");
        addOption("5", "ignored1fq", false, "not output reads from fastq1");
        addOption("6", "ignored2fq", false, "not output reads from fastq2");
        addOption("m", "multiSample", true, "Mulit samples list");
        addOption("n", "reducerNumber", true, "reducer number");
        addOption("o", "output", true, "output directory", true);
        addOption("d", "qualityFreq", false, "output quality frequency statice");
        addOption("M", "ignoredSample", false, "QC statictis together");
        addOption("D", "dyncut", false, "run dyncutadaptor process");
        addOption("S", "seed", true, "initial length of adaptor.(default:10)");
        addOption("T", "ignoredTail", true, "don't cut tail in the last n bp.(default:3)");
        addOption("A", "mismatch", true, "tolerate mismatchs.(default:1)");
        addOption("B", "minimum", true, "don't keep seqences shorter.(default:0)");
        addOption("h", "help", false, "help information");

        FormatHelpInfo(SOFTWARE_NAME, SOFTWARE_VERSION);
    }

    private String adapter1;
    private String adapter2;
    private String input1Fastq;
    private String input2Fastq;
    private String outputDir;
    private String multiSampleList;

    private double qualRate;
    private double NRate;

    private int minimumQualityScore;
    private int Q;
    private int[] trim;
    private int reducerNumber;
    private int lowQual;
    private int cest;
    private int minimum;
    private int bias;
    private int length_adaptor;
    private int readType;

    private boolean SEdata = false;
    private boolean filterSE;
    private boolean qualTrim;
    private boolean dyncut = false;
    private boolean multiStatis = true;
    private boolean qualFreq = false;
    private boolean ignoredfastq1;
    private boolean ignoredfastq2;

    @Override
    public void parse(String[] args) {
        try {
            cmdLine = parser.parse(options, args);
        } catch (ParseException e) {
            System.err.println(e.getMessage());
            printHelpInfotmation(SOFTWARE_NAME);
            System.exit(1);
        }

        if (!options.hasOption("1") && !options.hasOption("m")) {
            System.err.println("must set -1 or -m");
            System.exit(1);
        }

        input1Fastq = getOptionValue("1", null);
        input2Fastq = getOptionValue("2", null);
        adapter1 = getOptionValue("3", null);
        adapter2 = getOptionValue("4", null);
        outputDir = getOptionValue("o", null);
        multiSampleList = getOptionValue("m", null);

        qualRate = getOptionDoubleValue("q", 0.5);
        NRate = getOptionDoubleValue("N", 0.1);

        Q = getOptionIntValue("Q", 1);
        setTrim(getOptionValue("s", "0,0,0,0"));
        reducerNumber = getOptionIntValue("n", 100);
        lowQual = getOptionIntValue("l", 5);
        cest = getOptionIntValue("T", 3);
        minimum = getOptionIntValue("B", 30);
        bias = getOptionIntValue("A", 1);
        length_adaptor = getOptionIntValue("S", 10);
        readType = getOptionIntValue("t", 0);

        qualTrim = getOptionBooleanValue("C", false);
        dyncut = getOptionBooleanValue("D", false);
        multiStatis = getOptionBooleanValue("M", true);
        qualFreq = getOptionBooleanValue("d", false);
        ignoredfastq1 = getOptionBooleanValue("5", false);
        ignoredfastq2 = getOptionBooleanValue("6", false);

        setQualitySystem(Q);
    }

    private void setQualitySystem(int Q) {
        if (Q == 0) { // NCBI/Sanger or Illumina 1.8 and later
            minimumQualityScore = 33;
            System.out.println("> Quality score schema: NCBI/Sanger or Illumina 1.8 and later (ASCII 33 to 93)");
        } else if (Q == 1) { // Illumina Pipeline 1.5 to 1.7
            minimumQualityScore = 64;
            System.out.println("> Quality score schema: Illumina Pipeline 1.5 to 1.7 (ASCII 64 to 104)");
        } else if (Q == 2) { // Illumina Pipeline 1.3 and 1.4
            minimumQualityScore = 64;
            System.out.println(
                    "> Quality score schema: Illumina Pipeline 1.3 and 1.4 (ASCII 64 to 104, Values 0 (@) and 1 (A) are not used anymore. Value 2 (B) has special meaning and is used as a trim clipping.)");
        } else if (Q == 3) { // Illumina Pipeline 1.2 and earlier
            minimumQualityScore = 59;
            System.out.println("> Quality score schema: Illumina Pipeline 1.2 and earlier (ASCII 59 to 104)");
        } else {
            System.out.println(
                    "> Invailid value of quality score schema. Please check it. (0: NCBI/Sanger or Illumina 1.8 and later; 1: Illumina Pipeline 1.5 to 1.7; 2: Illumina Pipeline 1.3 and 1.4; 3: Illumina Pipeline 1.2 and earlier.)");
            System.exit(1);
        }
    }

    public String getAdapter1() {
        return adapter1;
    }

    public String getAdapter2() {
        return adapter2;
    }

    public String getInputFastq1() {
        return input1Fastq;
    }

    public String getInputFastq2() {
        return input2Fastq;
    }

    public String getOutputDirectory() {
        return outputDir;
    }

    public int getLowQuality() {
        return lowQual;
    }

    public double getQualityRate() {
        return qualRate;
    }

    public double getNRate() {
        return NRate;
    }

    public int getReducerNumber() {
        return reducerNumber;
    }

    public int getQuality() {
        return minimumQualityScore;
    }

    public int getQualityLength() {
        return LENGTH_QUALITY;
    }

    public void setQualityScore(int qual) {
        this.minimumQualityScore = qual;
    }

    public boolean isIgnoredFastq1() {
        return ignoredfastq1;
    }

    public boolean isIgnoredFastq2() {
        return ignoredfastq2;
    }

    public String getMultiSampleList() {
        return multiSampleList;
    }

    public boolean isSEdata() {
        return SEdata;
    }

    public void setSEdata(boolean sEdata) {
        SEdata = sEdata;
    }

    // trimStr : read1HeadTrim,read1TailTrim,read2HeadTrim,read2TailTrim  (int,int,int,int)
    private void setTrim(String trimStr) {
        trim = new int[4];
        String[] trimTemp = trimStr.trim().split(",");
        for (int i = 0; i < 4; i++) {
            if (i < trimTemp.length) {
                trim[i] = Integer.parseInt(trimTemp[i]);
            } else {
                trim[i] = 0;
            }
        }
    }

    public int[] getTrim() {
        return trim;
    }

    public boolean isFilterSE() {
        return filterSE;
    }

    public boolean isDyncut() {
        return dyncut;
    }

    public int getCest() {
        return cest;
    }

    public int getBias() {
        return bias;
    }

    public int getAdaptorLength() {
        return length_adaptor;
    }

    public int getMinimum() {
        return minimum;
    }

    public boolean isQualityTrim() {
        return qualTrim;
    }

    public boolean isMultiStatis() {
        return this.multiStatis;
    }

    public int getQualitySystem() {
        return this.Q;
    }

    public boolean isQualityFrequency() {
        return qualFreq;
    }

    @Override
    public void setHadoopConf(String[] args, Configuration conf) {
        conf.setStrings("args", args);
    }

    @Override
    public void getOptionsFromHadoopConf(Configuration conf) {
        String[] args = conf.getStrings("args");
        this.parse(args);
    }

    public int getReadType() {
        return readType;
    }
}