org.opencb.hpg.bigdata.app.cli.hadoop.SequenceCommandExecutor.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.hpg.bigdata.app.cli.hadoop.SequenceCommandExecutor.java

Source

/*
 * Copyright 2015 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.hpg.bigdata.app.cli.hadoop;

import java.io.IOException;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.opencb.hpg.bigdata.app.cli.CommandExecutor;
import org.opencb.hpg.bigdata.tools.sequence.Fastq2AvroMR;
import org.opencb.hpg.bigdata.tools.sequence.stats.ReadKmersMR;
import org.opencb.hpg.bigdata.tools.sequence.stats.ReadStatsMR;
import org.opencb.hpg.bigdata.core.utils.PathUtils;

/**
 * Created by imedina on 03/02/15.
 */
public class SequenceCommandExecutor extends CommandExecutor {

    private CliOptionsParser.SequenceCommandOptions sequenceCommandOptions;

    public SequenceCommandExecutor(CliOptionsParser.SequenceCommandOptions sequenceCommandOptions) {
        this.sequenceCommandOptions = sequenceCommandOptions;
    }

    /**
     * Parse specific 'sequence' command options
     */
    public void execute() throws Exception {
        String subCommand = sequenceCommandOptions.getParsedSubCommand();

        switch (subCommand) {
        case "convert":
            convert();
            break;
        case "stats":
            stats();
            break;
        case "align":
            System.out.println("Sub-command 'align': Not yet implemented for the command 'sequence' !");
            break;
        default:
            break;
        }
    }

    private void convert() throws Exception {
        CliOptionsParser.ConvertSequenceCommandOptions convertSequenceCommandOptions = sequenceCommandOptions.convertSequenceCommandOptions;

        // get input parameters
        String input = convertSequenceCommandOptions.input;
        String output = convertSequenceCommandOptions.output;
        String codecName = convertSequenceCommandOptions.compression;

        // sanity check
        if (codecName.equals("null")) {
            codecName = null;
        }

        // run MapReduce job to convert to GA4GH/Avro model
        try {
            Fastq2AvroMR.run(input, output, codecName);
        } catch (Exception e) {
            throw e;
        }
    }

    private void stats() throws Exception {
        CliOptionsParser.StatsSequenceCommandOptions statsSequenceCommandOptions = sequenceCommandOptions.statsSequenceCommandOptions;

        // get input parameters
        String input = statsSequenceCommandOptions.input;
        String output = statsSequenceCommandOptions.output;
        int kvalue = statsSequenceCommandOptions.kmers;

        // prepare the HDFS output folder
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        String outHdfsDirname = Long.toString(new Date().getTime());

        // run MapReduce job to compute stats
        ReadStatsMR.run(input, outHdfsDirname, kvalue);

        // post-processing
        Path outFile = new Path(outHdfsDirname + "/part-r-00000");

        try {
            if (!fs.exists(outFile)) {
                logger.error("Stats results file not found: {}", outFile.getName());
            } else {
                String outRawFileName = output + "/stats.json";
                fs.copyToLocalFile(outFile, new Path(outRawFileName));

                //Utils.parseStatsFile(outRawFileName, out);
            }
            fs.delete(new Path(outHdfsDirname), true);
        } catch (IOException e) {
            throw e;
        }
    }

    @Deprecated
    private void kmers(String input, String output, int kvalue) throws Exception {
        // clean paths
        String in = PathUtils.clean(input);
        String out = PathUtils.clean(output);

        if (!PathUtils.isHdfs(input)) {
            throw new IOException("To run fastq kmers, input files '" + input
                    + "' must be stored in the HDFS/Haddop. Use the command 'convert fastq2sa' to import your file.");
        }

        try {
            ReadKmersMR.run(in, out, kvalue);
        } catch (Exception e) {
            throw e;
        }
    }
}