Java tutorial
/* This file is part of RUbioSeq-GUI. RUbioSeq-GUI is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. RUbioSeq-GUI is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with RUbioSeq-GUI. If not, see <http://www.gnu.org/licenses/>. */ package es.uvigo.ei.sing.rubioseq.gui.view.models.experiments; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Arrays; import java.util.LinkedList; import java.util.List; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.input.SAXBuilder; import org.jdom2.output.Format; import org.jdom2.output.XMLOutputter; import es.uvigo.ei.sing.rubioseq.gui.macros.InvalidRUbioSeqFile; import es.uvigo.ei.sing.rubioseq.gui.macros.RUbioSeqFile; import es.uvigo.ei.sing.rubioseq.gui.util.Utils; import es.uvigo.ei.sing.rubioseq.gui.view.models.experiments.Sample.SampleType; import es.uvigo.ei.sing.rubioseq.gui.view.models.progress.Measurable; import es.uvigo.ei.sing.rubioseq.gui.view.models.progress.RUbioSeqEvent; /** * * @author hlfernandez * */ public class CopyNumberVariationExperiment implements Measurable, RUbioSeqExperiment { public static final String CONFIG_DATA = "configData"; public static final String GENREF = "GenRef"; public static final String DBSNPANNOT = "DbSnpAnnot"; public static final String INDELANNOT = "IndelAnnot"; public static final String INTERVALS = "Intervals"; public static final String KNOWNINDELS = "KnownIndels"; public static final String PLATTFORM = "Platform"; public static final String CHECKCASAVA = "checkCasava"; public static final String DIROUTBASE = "dirOutBase"; public static final String PROJECTID = "ProjectId"; public static final String USERNAME = "UserName"; public static final String INDIRPREPROCESS = "InDirPreProcess"; public static final String SAMPLE = "Sample"; public static final String SAMPLE_NAME = "SampleName"; public static final String SAMPLE_FILES = "SampleFiles"; public static final String SAMPLE_SUFFIX = "SampleSuffix"; public static final String SAMPLE_TYPE = "SampleType"; public static final String FASTQC = "fastqc"; public static final String EXTRACONTRA = "extraContra"; public final static String BASELINE = "baseline"; public static final String MDFLAG = "MDFlag"; public static final String QUEUESGEPROJECT = "queueSGEProject"; private RUbioSeqFile genRefPath;// = "/media/hlfernandez/sd128GB/Investigacion/Colaboraciones/RubioSeq/data/VariantTestData/reference/hg19.chr20.fa"; private RUbioSeqFile dbSnpAnnotPath;// = "/media/hlfernandez/sd128GB/Investigacion/Colaboraciones/RubioSeq/data/VariantTestData/reference/dbsnp_132.hg19.chr20.vcf"; private RUbioSeqFile indelAnnotPath;// = "/media/hlfernandez/sd128GB/Investigacion/Colaboraciones/RubioSeq/data/VariantTestData/reference/refseqhg19.chr20.rod"; private RUbioSeqFile intervalsPath;// = "/media/hlfernandez/sd128GB/Investigacion/Colaboraciones/RubioSeq/data/VariantTestData/reference/intervals_hg19_chr20.bed"; private List<KnownIndels> knownIndels = new LinkedList<KnownIndels>(); private PlattformTech plattform; private Integer checkCasava = 1; private Integer checkCasava_DV = 1; private RUbioSeqFile dirOutBase;// = "/media/hlfernandez/sd128GB/Investigacion/Colaboraciones/RubioSeq/data/VariantTestData/"; private String projectId = "experiment-output"; private String userName = "Undefined"; private String userName_DV = "Undefined"; private RUbioSeqFile dataInDirpreProcess;// = "/media/hlfernandez/sd128GB/Investigacion/Colaboraciones/RubioSeq/data/VariantTestData/"; private Integer fastqc = 0; private Integer fastqc_DV = 0; private Integer mDFlag = 1; private Integer mDFlag_DV = 1; private String extraContra = ""; private String extraContra_DV = ""; private RUbioSeqFile baseline; private RUbioSeqFile baseline_DV = null; private String queueSGEProject = ""; private String queueSGEProject_DV = ""; private List<Sample> samples = new LinkedList<Sample>(); public enum PlattformTech { ILLUMINA("illumina"), SOLID("solid"), ION("ion"); private final String displayName; private PlattformTech(String displayName) { this.displayName = displayName; } public String getDisplayName() { return displayName; } } public List<PlattformTech> getPlattformValues() { return Arrays.asList(PlattformTech.values()); } public List<Integer> getCasavaValues() { return Arrays.asList(new Integer[] { 0, 1 }); } public List<Integer> getFastqcValues() { return Arrays.asList(new Integer[] { 0, 1 }); } public List<Integer> getMdFlagValues() { return Arrays.asList(new Integer[] { 0, 1 }); } public RUbioSeqFile getGenRefPath() { return genRefPath; } public void setGenRefPath(RUbioSeqFile genRefPath) { this.genRefPath = genRefPath; } public RUbioSeqFile getDbSnpAnnotPath() { return dbSnpAnnotPath; } public void setDbSnpAnnotPath(RUbioSeqFile dbSnpAnnotPath) { this.dbSnpAnnotPath = dbSnpAnnotPath; } public RUbioSeqFile getIndelAnnotPath() { return indelAnnotPath; } public void setIndelAnnotPath(RUbioSeqFile indelAnnotPath) { this.indelAnnotPath = indelAnnotPath; } public RUbioSeqFile getIntervalsPath() { return intervalsPath; } public void setIntervalsPath(RUbioSeqFile intervalsPath) { this.intervalsPath = intervalsPath; } public List<KnownIndels> getKnownIndels() { return knownIndels; } public void setKnownIndels(List<KnownIndels> knownIndels) { this.knownIndels = knownIndels; } public PlattformTech getPlattform() { return plattform; } public void setPlattform(PlattformTech plattform) { this.plattform = plattform; } public Integer getCheckCasava() { return checkCasava; } public void setCheckCasava(Integer checkCasava) { this.checkCasava = checkCasava; } public RUbioSeqFile getDirOutBase() { return dirOutBase; } public void setDirOutBase(RUbioSeqFile dirOutBase) { this.dirOutBase = dirOutBase; } public String getProjectId() { return projectId; } public void setProjectId(String projectId) { this.projectId = projectId; } public String getUserName() { return userName; } public void setUserName(String userName) { this.userName = userName; } public RUbioSeqFile getDataInDirpreProcess() { return dataInDirpreProcess; } public void setDataInDirpreProcess(RUbioSeqFile dataInDirpreProcess) { this.dataInDirpreProcess = dataInDirpreProcess; } public String getExtraContra() { return extraContra; } public void setExtraContra(String extraContra) { this.extraContra = extraContra; } public RUbioSeqFile getBaseline() { return baseline; } public void setBaseline(RUbioSeqFile baseline) { this.baseline = baseline; } public Integer getFastqc() { return fastqc; } public void setFastqc(Integer fastqc) { this.fastqc = fastqc; } public Integer getmDFlag() { return mDFlag; } public void setmDFlag(Integer mDFlag) { this.mDFlag = mDFlag; } public String getQueueSGEProject() { return queueSGEProject; } public void setQueueSGEProject(String queueSGEProject) { this.queueSGEProject = queueSGEProject; } public List<Sample> getSamples() { return samples; } public void setSamples(List<Sample> samples) { this.samples = samples; } public void generateXMLConfigurationFile(File output) throws IOException { Document configurationFile = new Document(); Element configData = new Element(CONFIG_DATA); configurationFile.setRootElement(configData); configData.setAttribute(ExperimentUtils.BRANCH, ExperimentUtils.BRANCH_CNV); Element genRef = new Element(GENREF); genRef.addContent(this.getGenRefPath().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(genRef); Element dbSnpAnnot = new Element(DBSNPANNOT); dbSnpAnnot.addContent(this.getDbSnpAnnotPath().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(dbSnpAnnot); Element indelAnnot = new Element(INDELANNOT); indelAnnot.addContent(this.getIndelAnnotPath().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(indelAnnot); Element intervals = new Element(INTERVALS); intervals.addContent(this.getIntervalsPath().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(intervals); if (this.getKnownIndels().size() > 0) { for (KnownIndels kI : this.getKnownIndels()) { Element knownIndels = new Element(KNOWNINDELS); knownIndels.addContent(kI.getFile().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(knownIndels); } } Element plattform = new Element(PLATTFORM); plattform.addContent(this.getPlattform().getDisplayName()); configurationFile.getRootElement().addContent(plattform); if (!this.getCheckCasava().equals(checkCasava_DV)) { Element checkCasava = new Element(CHECKCASAVA); checkCasava.addContent(this.getCheckCasava().toString()); configurationFile.getRootElement().addContent(checkCasava); } Element dirOutBase = new Element(DIROUTBASE); dirOutBase.addContent(this.getDirOutBase().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(dirOutBase); Element projectId = new Element(PROJECTID); projectId.addContent(this.getProjectId()); configurationFile.getRootElement().addContent(projectId); if (!this.getUserName().equals(userName_DV)) { Element userName = new Element(USERNAME); userName.addContent(this.getUserName()); configurationFile.getRootElement().addContent(userName); } Element dataInDirpreProcess = new Element(INDIRPREPROCESS); dataInDirpreProcess.addContent(this.getDataInDirpreProcess().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(dataInDirpreProcess); for (Sample s : this.getSamples()) { Element sample = new Element(SAMPLE); Element sampleName = new Element(SAMPLE_NAME); sampleName.addContent(s.getSampleName()); sample.addContent(sampleName); Element sampleFiles = new Element(SAMPLE_FILES); sampleFiles.addContent(s.getSampleFiles()); sample.addContent(sampleFiles); Element sampleSuffix = new Element(SAMPLE_SUFFIX); sampleSuffix.addContent(s.getSampleSuffix()); sample.addContent(sampleSuffix); Element sampleType = new Element(SAMPLE_TYPE); sampleType.addContent(s.getSampleType().getDisplayName()); sample.addContent(sampleType); configurationFile.getRootElement().addContent(sample); } if (!this.getFastqc().equals(fastqc_DV)) { Element fastqc = new Element(FASTQC); fastqc.addContent(this.getFastqc().toString()); configurationFile.getRootElement().addContent(fastqc); } if (!this.getExtraContra().equals(extraContra_DV)) { Element extraContra = new Element(EXTRACONTRA); extraContra.addContent(this.getExtraContra().toString()); configurationFile.getRootElement().addContent(extraContra); } if (this.getBaseline() != baseline_DV) { Element baseline = new Element(BASELINE); baseline.addContent(this.getBaseline().getFile().getAbsolutePath()); configurationFile.getRootElement().addContent(baseline); } if (!this.getmDFlag().equals(mDFlag_DV)) { Element mDFlag = new Element(MDFLAG); mDFlag.addContent(this.getmDFlag().toString()); configurationFile.getRootElement().addContent(mDFlag); } if (!this.getQueueSGEProject().equals(queueSGEProject_DV)) { Element queueSGEProject = new Element(QUEUESGEPROJECT); queueSGEProject.addContent(this.getQueueSGEProject().toString()); configurationFile.getRootElement().addContent(queueSGEProject); } XMLOutputter xmlOutput = new XMLOutputter(); xmlOutput.setFormat(Format.getPrettyFormat()); xmlOutput.output(configurationFile, new FileWriter(output)); } public Integer getCheckCasava_DV() { return checkCasava_DV; } public String getUserName_DV() { return userName_DV; } public Integer getFastqc_DV() { return fastqc_DV; } public Integer getmDFlag_DV() { return mDFlag_DV; } public RUbioSeqFile getBaseline_DV() { return baseline_DV; } public String getExtraContra_DV() { return extraContra_DV; } public String getQueueSGEProject_DV() { return queueSGEProject_DV; } public static String getWorkingDirectory(File inputFile) { SAXBuilder saxBuilder = new SAXBuilder(); try { Document document = saxBuilder.build(inputFile); Element configData = document.getRootElement(); Element dirOutBase = configData.getChild(DIROUTBASE); Element projectId = configData.getChild(PROJECTID); if (dirOutBase.getValue().endsWith("/")) { return dirOutBase.getValue() + projectId.getValue(); } else { return dirOutBase.getValue() + "/" + projectId.getValue(); } } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return ""; } public String getWorkingDirectory() { return this.getDirOutBase().getFile().getAbsolutePath() + "/" + this.getProjectId(); } public boolean validElement(Element e) { return e != null && e.getValue() != null && !e.getValue().equals(""); } public void loadDataFromFile(File editFile) throws InvalidRubioSeqParameterException { SAXBuilder saxBuilder = new SAXBuilder(); try { Document document = saxBuilder.build(editFile); Element configData = document.getRootElement(); Element genRef = configData.getChild(GENREF); if (validElement(genRef)) { this.setGenRefPath(Utils.getRUbioSeqFile(genRef.getValue())); } Element dbSnpAnnot = configData.getChild(DBSNPANNOT); if (validElement(dbSnpAnnot)) { this.setDbSnpAnnotPath(Utils.getRUbioSeqFile(dbSnpAnnot.getValue())); } Element indelAnnot = configData.getChild(INDELANNOT); if (validElement(indelAnnot)) { this.setIndelAnnotPath(Utils.getRUbioSeqFile(indelAnnot.getValue())); } Element intervals = configData.getChild(INTERVALS); if (validElement(intervals)) { this.setIntervalsPath(Utils.getRUbioSeqFile(intervals.getValue())); } for (Element knownIndels : configData.getChildren(KNOWNINDELS)) { if (validElement(knownIndels)) { KnownIndels kI = new KnownIndels(); kI.setFile(Utils.getRUbioSeqFile(knownIndels.getValue())); this.knownIndels.add(kI); } } Element plattform = configData.getChild(PLATTFORM); if (validElement(plattform)) { String plattformValue = plattform.getValue().toLowerCase(); if (plattformValue.equals("illumina")) { this.setPlattform(PlattformTech.ILLUMINA); } else if (plattformValue.equals("ion")) { this.setPlattform(PlattformTech.ION); } else if (plattformValue.equals("solid")) { this.setPlattform(PlattformTech.SOLID); } } Element checkCasava = configData.getChild(CHECKCASAVA); if (validElement(checkCasava)) { this.setCheckCasava(Integer.valueOf(checkCasava.getValue())); } Element dirOutBase = configData.getChild(DIROUTBASE); if (validElement(dirOutBase)) { this.setDirOutBase(Utils.getRUbioSeqFile(dirOutBase.getValue())); } Element projectId = configData.getChild(PROJECTID); if (validElement(projectId)) { this.setProjectId(projectId.getValue()); } Element userName = configData.getChild(USERNAME); if (validElement(userName)) { this.setUserName(userName.getValue()); } Element dataInDirpreProcess = configData.getChild(INDIRPREPROCESS); if (validElement(dataInDirpreProcess)) { this.setDataInDirpreProcess(Utils.getRUbioSeqFile(dataInDirpreProcess.getValue())); } for (Element sampleElement : configData.getChildren(SAMPLE)) { Sample newSample = new Sample(); Element sampleName = sampleElement.getChild(SAMPLE_NAME); if (validElement(sampleName)) { newSample.setSampleName(sampleName.getValue()); } Element sampleFiles = sampleElement.getChild(SAMPLE_FILES); if (validElement(sampleFiles)) { newSample.setSampleFiles(sampleFiles.getValue()); } Element sampleSuffix = sampleElement.getChild(SAMPLE_SUFFIX); if (validElement(sampleSuffix)) { newSample.setSampleSuffix(sampleSuffix.getValue()); } Element sampleType = sampleElement.getChild(SAMPLE_TYPE); if (validElement(sampleType)) { newSample.setSampleType( sampleType.getValue().equals("1") ? SampleType.SingleEnd : SampleType.PairedEnd); } this.getSamples().add(newSample); } Element fastqc = configData.getChild(FASTQC); if (validElement(fastqc)) { this.setFastqc(Integer.valueOf(fastqc.getValue())); } Element extraContra = configData.getChild(EXTRACONTRA); if (validElement(extraContra)) { this.setExtraContra(extraContra.getValue()); } Element baseline = configData.getChild(BASELINE); if (validElement(baseline)) { this.setBaseline(Utils.getRUbioSeqFile(baseline.getValue())); } Element mDFlag = configData.getChild(MDFLAG); if (validElement(mDFlag)) { this.setmDFlag(Integer.valueOf(mDFlag.getValue())); } Element queueSGEProject = configData.getChild(QUEUESGEPROJECT); if (validElement(queueSGEProject)) { this.setQueueSGEProject(queueSGEProject.getValue()); } } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { throw new InvalidRubioSeqParameterException(InvalidRubioSeqParameterException.DEFAULT_MESSAGE); } } @Override public List<String> getPathsToWatch() { List<String> pathsToWatch = new LinkedList<String>(); String workingDirectory = this.getWorkingDirectory(); for (Sample s : this.getSamples()) { pathsToWatch.add(workingDirectory + "/" + s.getSampleName()); } return pathsToWatch; } @Override public double getIncrement(RUbioSeqEvent event, int numStages) { System.err.print("\t[SingleNucleotideVariantExperiment] Get increment of execution level " + event.getExecutionLevel()); int k = event.getTotalSteps(); switch (event.getExecutionLevel()) { case 0: return 0; case 1: /* * Stage 1. Short-read alignment with a combination of BWA and BFAST aligners. * By sample. * Possible cases: * - samples not splitted: Total should be incremented in (1 / (samples x stages x k) ) x 100 * - samples splitted: Total should be incremented in (1 / ((sample_splits + 1 )x samples x stages x k) ) x 100 * (sample_splits + 1: this 1 extra is for the concatenation step that will be received as a SPLIT_FILES_CONCAT event). */ int sampleSplits = getSampleSplits(event.getSampleName()); if (sampleSplits == 1) { System.err.println("\t" + (double) 100 / (double) (getNumSamples() * numStages * k)); return (double) 100 / (double) (getNumSamples() * numStages * k); } else { System.err.println( "\t" + (double) 100 / (double) (getNumSamples() * numStages * k * (sampleSplits + 1))); return (double) 100 / (double) (getNumSamples() * numStages * k * (sampleSplits + 1)); } case 2: /* * 2. GATK-based realign, marks duplication and recalibration steps. * By sample. * Total should be incremented in (1 / (samples x stages x k) ) x 100 */ System.err.println("\t" + 100 / (getNumSamples() * numStages * k)); return (double) 100 / (double) (getNumSamples() * numStages * k); case 3: /* * 3. Cnvs detection(CONTRA). * If there is a baseline, progress is by (tumor) sample. If there is not a baseline and the * user provides a normal sample, progress is by pairs of samples. * Possible cases: * - If T/N Total should be incremented in (1 / ((samples/2) x stages x k) ) x 100 (By pairs of samples) * - If baseline Total should be incremented in (1 / (samples x stages x k) ) x 100 (By sample) */ if (getBaseline() != getBaseline_DV()) { System.err.println("\t" + 100 / (getNumSamples() * numStages * k)); return (double) 100 / (double) (getNumSamples() * numStages * k); } else { System.err.println("\t" + 100 / ((getNumSamples() / 2) * numStages * k)); return (double) 100 / (double) ((double) (getNumSamples() / (double) 2) * numStages * k); } default: /* * If execution level is not 1,2,3 or 4, we must se if it is an extra stage. */ if (event.getExtraStage().equals(RUbioSeqEvent.ExtraStage.SPLIT_FILES_CONCAT) && event.getMessage().equals(RUbioSeqEvent.SPLIT_FILES_CONCAT_MESSAGE)) { System.err.println("\t" + (double) 100 / (double) (getNumSamples() * numStages * k * (getSampleSplits(event.getSampleName()) + 1))); return (double) 100 / (double) (getNumSamples() * numStages * k * (getSampleSplits(event.getSampleName()) + 1)); } return 0; } } public int getNumSamples() { return this.getSamples().size(); } public int getSampleSplits(String sampleName) { for (Sample s : this.getSamples()) { if (s.getSampleName().equals(sampleName) // We get the sampleName (case of log_CON_sampleName.txt) || s.getSampleFiles().contains(sampleName) // We are passed the name of a sampleSplit (case of log_S1_sampleSplitName.txt) ) { try { String[] sampleSplit = s.getSampleFiles().split(":"); return sampleSplit.length; } catch (Exception e) { return 0; } } } return 0; } public boolean checkPaths() { if (this.getGenRefPath() instanceof InvalidRUbioSeqFile || this.getDbSnpAnnotPath() instanceof InvalidRUbioSeqFile || this.getIndelAnnotPath() instanceof InvalidRUbioSeqFile || this.getIntervalsPath() instanceof InvalidRUbioSeqFile || this.getDirOutBase() instanceof InvalidRUbioSeqFile || (this.getBaseline() != null && this.getBaseline() instanceof InvalidRUbioSeqFile) || this.getDataInDirpreProcess() instanceof InvalidRUbioSeqFile) return false; else return true; } @Override public boolean checkConfiguration() { return this.checkPaths(); } @Override public int getStagesCount(int executionLevel) { return executionLevel > 0 ? 1 : 3; } }