gov.jgi.meta.exec.CapCommand.java Source code

Java tutorial

Introduction

Here is the source code for gov.jgi.meta.exec.CapCommand.java

Source

/*
 * Copyright (c) 2010, The Regents of the University of California, through Lawrence Berkeley
 * National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided
 * that the following conditions are met:
 *
 * (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the
 * following disclaimer.
 *
 * (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions
 * and the following disclaimer in the documentation and/or other materials provided with the distribution.
 *
 * (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept.
 * of Energy, nor the names of its contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the
 * features, functionality or performance of the source code ("Enhancements") to anyone; however,
 * if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley
 * National Laboratory, without imposing a separate written license agreement for such Enhancements,
 * then you hereby grant the following license: a  non-exclusive, royalty-free perpetual license to install,
 * use, modify, prepare derivative works, incorporate into other computer software, distribute, and
 * sublicense such enhancements or derivative works thereof, in binary and source code form.
 */

package gov.jgi.meta.exec;

import com.devdaily.system.SystemCommandExecutor;
import gov.jgi.meta.MetaUtils;
import gov.jgi.meta.hadoop.input.FastaBlockLineReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.log4j.Logger;

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.*;

/**
 * class that wraps execution of commandline cap3 program.
 *
 * Use this by creating a new CapCommand than invoking the
 * exec method.  eg:
 *
 * CapCommand b = new CapCommand();
 * r = b.exec(...)
 *
 */
public class CapCommand implements CommandLineProgram {
    String DEFAULTCOMMANDLINE = "-k 0";
    String DEFAULTCOMMANDPATH = "cap3";
    String DEFAULTTMPDIR = "/tmp/cap3";

    /**
     * logger
     */
    Logger log = Logger.getLogger(CapCommand.class);

    /**
     * the commandline to execute (all options except the input/output)
     */
    String commandLine = null;

    /**
     * the location of the executable in the filesystem
     */
    String commandPath = null;

    /**
     * the location of where to copy input for failed executions
     */
    String recoveryPath = null;

    /**
     * temporary directory to use for intermediate files
     */
    String tmpDir = null;
    File tmpDirFile = null;

    /**
     * the contents of the stdout after executing the command
     */
    String stdout = null;

    /**
     * the contents of the stderr after executing the command
     */
    String stderr = null;

    /**
     * the shell return value after executing the command
     */
    int exitValue = 0;

    /**
     * flat to determine whether to clean up directories after
     * execution
     */
    Boolean doCleanup = true;

    /**
     * new cap3 command based on default parameters
     * @throws IOException if tmp directories can't be created
     */
    public CapCommand() throws IOException {
        commandLine = DEFAULTCOMMANDLINE;
        commandPath = DEFAULTCOMMANDPATH;
        tmpDir = DEFAULTTMPDIR;
        tmpDirFile = MetaUtils.createTempDir("cap3_", tmpDir);
    }

    /**
     * new cap3 command based on values stored in the configuration.
     * <p/>
     * Looks for the following config values: blast.commandline,
     * blast.commandpath, and blast.tmpdir
     *
     * @param config is the hadoop configuration with overriding values
     *               for commandline options and paths
     * @throws IOException if command can't be configured
     */
    public CapCommand(Configuration config) throws IOException {
        String c;

        if ((c = config.get("cap3.commandline")) != null) {
            commandLine = c;
        } else {
            commandLine = DEFAULTCOMMANDLINE;
        }
        if ((c = config.get("cap3.commandpath")) != null) {
            commandPath = c;
        } else {
            commandPath = DEFAULTCOMMANDPATH;
        }
        if ((c = config.get("assembler.tmpdir")) != null) {
            tmpDir = c;
        } else {
            tmpDir = DEFAULTTMPDIR;
        }

        if ((c = config.get("recoverypath")) != null) {
            recoveryPath = c;
        }

        doCleanup = config.getBoolean("assembler.cleanup", true);

        /*
         * do sanity check to make sure all paths exist
         */
        //checkFileExists(commandLine);
        //checkFileExists(commandPath);
        //checkDirExists(tmpDir);

        /*
         * if all is good, create a working space inside tmpDir
         */

        tmpDirFile = MetaUtils.createTempDir("cap3_", tmpDir);
    }

    /**
     * destructor deletes the tmp space if it was created
     *
     * @throws Throwable
     */
    protected void finalize() throws Throwable {
        //    cleanup();
        super.finalize();
    }

    /**
     * clean up all footsteps of program
     */
    public void cleanup() {
        log.info("deleting tmp file: " + tmpDirFile.getPath());

        if (this.doCleanup) {
            if (tmpDirFile != null) {
                MetaUtils.recursiveDelete(tmpDirFile);
                tmpDirFile = null;
            }
        }
    }

    public File getTmpDir() {
        return (tmpDirFile);
    }

    /**
     * execute the cap3 command and return a set of contigs that result from the assembly
     *
     * @param seqDatabase is the key/value map of sequences that act as reference keyed by name
     * @return a map of sequence ids in the reference that match the cazy database
     * @throws IOException if file system error occured
     * @throws InterruptedException if execution got interrupted
     */
    public Map<String, String> exec(String groupId, Map<String, String> seqDatabase, Reducer.Context context)
            throws IOException, InterruptedException {
        Long executionStartTime = new Date().getTime();

        Map<String, String> s = new HashMap<String, String>();

        log.info("Preparing Assembly execution");
        if (context != null) {
            context.setStatus("Preparing Assembly execution");
        }

        /*
         * dump the database from the map to a file
         */
        String seqFilepath = MetaUtils.sequenceToLocalFile(seqDatabase, tmpDirFile + "/reads.fa");

        if (seqFilepath == null) {
            /*
             * return with fail
             */
            throw new IOException("unable to find file: " + tmpDirFile + "/reads.fa");
        }

        if (context != null) {
            context.setStatus("running cap3 with " + seqDatabase.size() + " reads");
        }

        /*
         * now set up a cap3 execution
         */
        List<String> commands = new ArrayList<String>();
        commands.add("/bin/sh");
        commands.add("-c");
        commands.add("umask 000; " + commandPath + " " + seqFilepath + " " + commandLine);

        log.info("command = " + commands);

        SystemCommandExecutor commandExecutor = new SystemCommandExecutor(commands);
        exitValue = commandExecutor.executeCommand();

        // stdout and stderr of the command are returned as StringBuilder objects
        stdout = commandExecutor.getStandardOutputFromCommand().toString();
        stderr = commandExecutor.getStandardErrorFromCommand().toString();

        log.info("exit = " + exitValue);
        if (exitValue != 0) {
            if (recoveryPath != null) {
                List<String> recoveryCopyCommands = new ArrayList<String>();
                recoveryCopyCommands.add("/bin/sh");
                recoveryCopyCommands.add("-c");
                String tmpDateDir = new SimpleDateFormat("yyyy-MM-dd:hh:mm:ss").format(new Date());
                recoveryCopyCommands.add("mkdir " + recoveryPath + "/" + tmpDateDir + "/");
                recoveryCopyCommands.add("cp -r " + seqFilepath + " " + recoveryPath + "/" + tmpDateDir + "/");

                log.info("about to execute copy to recovery path, command = " + recoveryCopyCommands);

                SystemCommandExecutor recoveryCopyCommandExecutor = new SystemCommandExecutor(recoveryCopyCommands);
                exitValue = recoveryCopyCommandExecutor.executeCommand();
                String recoveryCommandsStderr = recoveryCopyCommandExecutor.getStandardErrorFromCommand()
                        .toString();

                if (exitValue != 0) {
                    log.info("Failed to execute '" + recoveryCopyCommands + "'. Reason: " + recoveryCommandsStderr);
                }
            }
            log.info("stdout = " + stdout);
        }
        log.info("stderr = " + stderr);

        if (exitValue != 0)
            throw new RuntimeException("Failed to execute cap3 using '" + commands + "'. Reason: " + stderr);

        Long executionTime = new Date().getTime() - executionStartTime;

        if (context != null) {
            context.getCounter("reduce.assembly", "EXECUTION_TIME").increment(executionTime);
        }

        /*
         * now parse the output and clean up
         */

        FileInputStream fstream = null;
        FastaBlockLineReader in = null;

        try {
            Text t = new Text();
            fstream = new FileInputStream(tmpDirFile.getPath() + "/reads.fa.cap.contigs");
            in = new FastaBlockLineReader(fstream);
            int bytes = in.readLine(t, s);
        } catch (java.io.FileNotFoundException e) {
            // in this case, its not an error, it may be that cap3 could not create any contigs
        } catch (Exception e) {
            log.error("unable to find outputfile:" + e);
            throw new IOException(e);
        } finally {
            if (fstream != null) {
                fstream.close();
            }
            if (in != null) {
                in.close();
            }
            System.gc();
        }

        return (s);
    }
}