uk.ac.tgac.conan.process.asm.tools.AllpathsLgV50.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.tgac.conan.process.asm.tools.AllpathsLgV50.java

Source

/**
 * RAMPART - Robust Automatic MultiPle AssembleR Toolkit
 * Copyright (C) 2013  Daniel Mapleson - TGAC
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **/
package uk.ac.tgac.conan.process.asm.tools;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.kohsuke.MetaInfServices;
import uk.ac.ebi.fgpt.conan.model.param.AbstractProcessParams;
import uk.ac.ebi.fgpt.conan.model.param.ConanParameter;
import uk.ac.ebi.fgpt.conan.model.param.ParamMap;
import uk.ac.ebi.fgpt.conan.service.ConanExecutorService;
import uk.ac.tgac.conan.core.data.Library;
import uk.ac.tgac.conan.core.data.Organism;
import uk.ac.tgac.conan.process.asm.*;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * User: maplesod
 * Date: 13/08/13
 * Time: 15:24
 */
@MetaInfServices(uk.ac.tgac.conan.process.asm.Assembler.class)
public class AllpathsLgV50 extends AbstractAssembler implements Subsampler {

    public static final String EXE = "RunAllPathsLG";
    public static final String NAME = "AllpathsLg_V50";

    @Override
    public void setDesiredCoverage(int desiredCoverage) {
        this.getArgs().setDesiredCoverage(desiredCoverage);
    }

    @Override
    public int getDesiredCoverage() {
        return this.getArgs().getDesiredCoverage();
    }

    private static class GroupInfo {
        private File inGroupsPhred33;
        private File inGroupsPhred64;
        private List<String> groupNames;

        private GroupInfo(File inGroupsPhred33, File inGroupsPhred64, List<String> groupNames) {
            this.inGroupsPhred33 = inGroupsPhred33;
            this.inGroupsPhred64 = inGroupsPhred64;
            this.groupNames = groupNames;
        }

        private File getInGroupsPhred33() {
            return inGroupsPhred33;
        }

        private File getInGroupsPhred64() {
            return inGroupsPhred64;
        }

        private List<String> getGroupNames() {
            return groupNames;
        }
    }

    private File refDir;
    private File cacheDir;
    private File dataDir;
    private File runDir;
    private File assembliesDir;

    private File inLibs;
    private GroupInfo groupInfo;
    private long genomeSize;

    public AllpathsLgV50() {
        this(null);
    }

    public AllpathsLgV50(ConanExecutorService conanExecutorService) {
        this(conanExecutorService, new Args());
    }

    public AllpathsLgV50(ConanExecutorService conanExecutorService, Args args) {
        super(NAME, EXE, args, new Params(), conanExecutorService);

        this.genomeSize = 0;
    }

    public Args getArgs() {
        return (Args) this.getProcessArgs();
    }

    @Override
    public boolean makesUnitigs() {
        return false;
    }

    @Override
    public boolean makesContigs() {
        return true;
    }

    @Override
    public boolean makesScaffolds() {
        return true;
    }

    @Override
    public boolean makesBubbles() {
        return false;
    }

    @Override
    public File getUnitigsFile() {
        return null;
    }

    @Override
    public File getContigsFile() {
        return new File(assembliesDir, "final.contigs.fasta");
    }

    @Override
    public File getScaffoldsFile() {
        return new File(assembliesDir, "final.assembly.fasta");
    }

    @Override
    public File getBubbleFile() {
        return null;
    }

    @Override
    public File getBestAssembly() {
        return this.getScaffoldsFile();
    }

    @Override
    public boolean doesSubsampling() {
        return true;
    }

    @Override
    public Assembler.Type getType() {
        return Assembler.Type.DE_BRUIJN_AUTO;
    }

    /**
     * ALLPATHS requires at least a "fragment" library (i.e. an overlapping paired end library), and a "jumping" library
     * (i.e. a normal paired end or mate pair library)
     * @param libraries
     * @return
     */
    @Override
    public boolean acceptsLibraries(List<Library> libraries) {

        boolean foundFragmentLib = false;
        boolean foundJumpingLib = false;

        for (Library lib : libraries) {
            if (lib.getType() == Library.Type.OPE) {
                foundFragmentLib = true;
            } else if (lib.getType() == Library.Type.PE || lib.getType() == Library.Type.MP) {
                foundJumpingLib = true;
            }
        }

        return foundFragmentLib && foundJumpingLib;
    }

    @Override
    public void setLibraries(List<Library> libraries) {
        this.getArgs().setLibs(libraries);
    }

    @Override
    public void setup() throws IOException {

        // Quick handle to args
        Args args = this.getArgs();

        // Create lib input file
        this.inLibs = new File(args.getOutputDir(), "in_libs.csv");
        this.createLibFile(this.inLibs, args.getLibs());

        // Create groups input files (sets inGroupsPhredXX)
        this.groupInfo = this.createGroupFiles(args.getLibs());

        String fileSystemFriendlyName = args.getOrganism().getName().replaceAll(" ", "_");

        // Define ALLPATHS dirs
        this.refDir = new File(args.getOutputDir(), fileSystemFriendlyName);
        this.cacheDir = new File(refDir, "cache");
        this.dataDir = new File(refDir, "data");
        this.runDir = new File(dataDir, "rampart");
        this.assembliesDir = new File(runDir, "ASSEMBLIES/test");

        // Create required ALLPATHS dirs
        if (!this.cacheDir.exists()) {
            if (!this.cacheDir.mkdirs()) {
                throw new IOException(
                        "Could not create cache directory for ALLPATHS-LG: " + this.cacheDir.getAbsolutePath());
            }
        }

        // Get genome size (may take a while if acquiring from fasta)
        this.genomeSize = args.getOrganism().getGenomeSize();

        // Create ploidy file
        this.createPloidyFile(args.getOrganism().getPloidy(), new File(dataDir, "ploidy"));
    }

    protected void createPloidyFile(int ploidy, File ploidyFile) throws IOException {

        List<String> lines = new ArrayList<>();
        lines.add(Integer.toString(ploidy));

        FileUtils.writeLines(ploidyFile, lines);
    }

    protected void createLibFile(File libFile, List<Library> libraries) throws IOException {

        List<String> lines = new ArrayList<String>();

        lines.add(
                "library_name, project_name, organism_name, type, paired, frag_size, frag_stddev, insert_size, insert_stddev, read_orientation, genomic_start, genomic_end");

        for (Library lib : libraries) {

            String type = "";
            String paired = "";
            String fragSize = "";
            String fragDev = "";
            String insertSize = "";
            String insertDev = "";
            String readOrientation = "";

            switch (lib.getType()) {
            case OPE:
                type = "fragment";
                paired = "1";
                fragSize = Integer.toString(lib.getAverageInsertSize());
                fragDev = Integer.toString((int) (lib.getAverageInsertSize() * lib.getInsertErrorTolerance()));
                readOrientation = lib.getSeqOrientation() == Library.SeqOrientation.FR ? "inward" : "outward";
                break;
            case PE:
                type = "jumping";
                paired = "1";
                insertSize = Integer.toString(lib.getAverageInsertSize());
                insertDev = Integer.toString((int) (lib.getAverageInsertSize() * lib.getInsertErrorTolerance()));
                readOrientation = lib.getSeqOrientation() == Library.SeqOrientation.FR ? "inward" : "outward";
                break;
            case MP:
                type = "long_jump";
                paired = "1";
                insertSize = Integer.toString(lib.getAverageInsertSize());
                insertDev = Integer.toString((int) (lib.getAverageInsertSize() * lib.getInsertErrorTolerance()));
                readOrientation = lib.getSeqOrientation() == Library.SeqOrientation.FR ? "inward" : "outward";
                break;
            case SE:
                type = "long";
                paired = "0";
                break;
            }

            List<String> parts = new ArrayList<String>();
            parts.add(lib.getName());
            parts.add("rampart");
            parts.add(this.getArgs().getOrganism().getName().replaceAll(" ", "_"));
            parts.add(type);
            parts.add(paired);
            parts.add(fragSize);
            parts.add(fragDev);
            parts.add(insertSize);
            parts.add(insertDev);
            parts.add(readOrientation);
            parts.add("");
            parts.add("");

            lines.add(StringUtils.join(parts, ", "));
        }

        FileUtils.writeLines(libFile, lines);
    }

    private GroupInfo createGroupFiles(List<Library> libraries) throws IOException {

        int countOpe = 0;
        int countPe = 0;
        int countMp = 0;
        int countLong = 0;

        List<String> phred33Lines = new ArrayList<>();
        List<String> phred64Lines = new ArrayList<>();
        List<String> groupNames = new ArrayList<>();

        final String groupHeader = "group_name, library_name, file_name";

        phred33Lines.add(groupHeader);
        phred64Lines.add(groupHeader);

        for (Library lib : libraries) {

            String groupName = lib.getType().toString();

            switch (lib.getType()) {
            case OPE:
                countOpe++;
                groupName += countOpe;
                break;
            case PE:
                countPe++;
                groupName += countPe;
                break;
            case MP:
                countMp++;
                groupName += countMp;
                break;
            case SE:
                countLong++;
                groupName += countLong;
                break;
            default:
            }

            groupNames.add(groupName);

            List<String> parts = new ArrayList<String>();
            parts.add(groupName);
            parts.add(lib.getName());

            if (lib.getType() == Library.Type.SE) {
                parts.add(lib.getFile1().getAbsolutePath());
            } else {
                parts.add(createGlobPattern(lib.getFile1(), lib.getFile2()));
            }

            String groupLine = StringUtils.join(parts, ", ");

            if (lib.getPhred() == null || lib.getPhred() == Library.Phred.PHRED_64) {
                phred64Lines.add(groupLine);
            } else {
                phred33Lines.add(groupLine);
            }

        }

        File p33File = null;
        File p64File = null;
        File outputDir = this.getArgs().getOutputDir();

        if (phred33Lines.size() > 1) {
            p33File = new File(outputDir, "in_groups_phred33.csv");
            FileUtils.writeLines(p33File, phred33Lines);
        }

        if (phred64Lines.size() > 1) {
            p64File = new File(outputDir, "in_groups_phred64.csv");
            FileUtils.writeLines(p64File, phred64Lines);
        }

        return new GroupInfo(p33File, p64File, groupNames);
    }

    public String createGlobPattern(File file1, File file2) throws IOException {

        String filePath1 = file1.getAbsolutePath();
        String filePath2 = file2.getAbsolutePath();

        if (filePath1.length() != filePath2.length()) {
            throw new IOException("Can't create GLOB pattern of these two file paths: " + filePath1 + ", "
                    + filePath2 + "; "
                    + "paths must be the same length and differ in only one character (i.e. the file pair id)");
        }

        int diffCount = 0;
        int diffIndex = 0;
        for (int i = 0; i < filePath1.length(); i++) {

            if (filePath1.charAt(i) != filePath2.charAt(i)) {
                diffCount++;
                diffIndex = i;
            }
        }

        if (diffCount == 0) {
            throw new IOException("File paths are identical can't create glob pattern: " + filePath1);
        } else if (diffCount > 1) {
            throw new IOException("Found " + diffCount + " differences in file paths: " + filePath1 + ", "
                    + filePath2 + " Can only create glob pattern if there is a single character difference");
        }

        return filePath1.substring(0, diffIndex) + "?" + filePath1.substring(diffIndex + 1);
    }

    @Override
    public String getCommand() {

        List<String> mainCommands = new ArrayList<String>();

        // Generate cache libraries command ****
        mainCommands.add(makeCacheLibsCommand());

        // **** Generate cache groups command(s) ****

        if (this.groupInfo.getInGroupsPhred33() != null) {
            mainCommands.add(makeCacheGroupsCommand(this.groupInfo.getInGroupsPhred33(), false));
        }

        if (this.groupInfo.getInGroupsPhred64() != null) {
            mainCommands.add(makeCacheGroupsCommand(this.groupInfo.getInGroupsPhred64(), true));
        }

        // **** Generated ALLPATHS-LG cache to input commands ****
        mainCommands.add(makeCacheToInputsCommand(this.groupInfo.getGroupNames()));

        // **** Generate run ALLPATHS-LG command ****
        mainCommands.add(makeRunCommand());

        // Join commands
        return StringUtils.join(mainCommands, "; ");
    }

    private String makeCacheLibsCommand() {

        return "CacheLibs.pl " + "CACHE_DIR=" + this.cacheDir.getAbsolutePath() + " " + "IN_LIBS_CSV="
                + this.inLibs.getAbsolutePath() + " " + "ACTION=Add " + "OVERWRITE=True";
    }

    protected String makeCacheGroupsCommand(File inputCsv, boolean phred64) {

        String phredString = phred64 ? "True" : "False";

        return "CacheGroups.pl " + "CACHE_DIR=" + this.cacheDir.getAbsolutePath() + " " + "IN_GROUPS_CSV="
                + inputCsv.getAbsolutePath() + " " + "TMP_DIR=" + new File(this.cacheDir, "tmp").getAbsolutePath()
                + " " + "ACTION=Add " + "PHRED_64=" + phredString + " " + "OVERWRITE=True";
    }

    protected String makeCacheToInputsCommand(List<String> groupNames) {

        Args args = this.getArgs();
        String groupsString = makeGroupsString(groupNames);
        String coverageString = makeCoverageString(groupNames.size(), args.getDesiredCoverage());

        return "CacheToAllPathsInputs.pl " + "CACHE_DIR=" + this.cacheDir.getAbsolutePath() + " " + "DATA_DIR="
                + this.dataDir.getAbsolutePath() + " " + "GROUPS=" + groupsString + " "
                + (this.genomeSize > 0
                        ? "GENOME_SIZE=" + this.genomeSize + " " + "COVERAGES=" + coverageString + " "
                        : "")
                + "PLOIDY=" + args.getOrganism().getPloidy();
    }

    private String makeGroupsString(List<String> groupNames) {

        return "'{" + StringUtils.join(groupNames, ",") + "}'";
    }

    private String makeCoverageString(int size, int desiredCoverage) {

        if (desiredCoverage == -1)
            return "";

        List<String> coverages = new ArrayList<String>();

        for (int i = 0; i < size; i++) {
            coverages.add(Integer.toString(desiredCoverage));
        }

        return "'{" + StringUtils.join(coverages, ",") + "}'";
    }

    protected String makeRunCommand() {

        Args args = this.getArgs();

        return "RunAllPathsLG " + "PRE=" + args.getOutputDir().getAbsolutePath() + " " + "REFERENCE_NAME="
                + args.getOrganism().getName().replaceAll(" ", "_") + " " + "DATA_SUBDIR=data " + "RUN=rampart "
                + "THREADS=" + args.getThreads() + " " + "OVERWRITE=True " + "TARGETS=full_eval";
    }

    @MetaInfServices(DeBruijnAutoArgs.class)
    public static class Args extends AbstractAssemblerArgs implements DeBruijnAutoArgs {

        public static final int DEFAULT_DESIRED_COVERAGE = -1;
        public static final int DEFAULT_THREADS = 1;

        private int desiredCoverage;
        private Organism organism;

        public Args() {
            super(new Params(), NAME);

            this.desiredCoverage = DEFAULT_DESIRED_COVERAGE;
            this.organism = null;
        }

        public Params getParams() {
            return (Params) this.params;
        }

        public void setDesiredCoverage(int desiredCoverage) {
            this.desiredCoverage = desiredCoverage;
        }

        public int getDesiredCoverage() {
            return desiredCoverage;
        }

        public Organism getOrganism() {
            return organism;
        }

        public void setOrganism(Organism organism) {
            this.organism = organism;
        }

        @Override
        public void parseCommandLine(CommandLine cmdLine) {

            Params params = this.getParams();
        }

        @Override
        public ParamMap getArgMap() {
            return null; //To change body of implemented methods use File | Settings | File Templates.
        }

        @Override
        protected void setOptionFromMapEntry(ConanParameter param, String value) {

        }

        @Override
        protected void setArgFromMapEntry(ConanParameter param, String value) {

        }

        @Override
        public void setFromArgMap(ParamMap pvp) {
            //To change body of implemented methods use File | Settings | File Templates.
        }

        @Override
        public GenericDeBruijnAutoArgs getDeBruijnAutoArgs() {

            GenericDeBruijnAutoArgs args = new GenericDeBruijnAutoArgs();

            args.setOutputDir(this.getOutputDir());
            args.setLibraries(this.getLibs());
            args.setThreads(this.getThreads());
            args.setMemory(this.getMaxMemUsageMB());
            args.setOrganism(this.organism);

            return args;
        }

        @Override
        public void setDeBruijnAutoArgs(GenericDeBruijnAutoArgs args) {

            this.outputDir = args.getOutputDir();
            this.libs = args.getLibraries();
            this.threads = args.getThreads();
            this.maxMemUsageMB = args.getMemory();

            this.organism = args.getOrganism();
        }
    }

    public static class Params extends AbstractProcessParams {

        @Override
        public ConanParameter[] getConanParametersAsArray() {
            return new ConanParameter[0];
        }
    }

}