es.uvigo.ei.sing.rubioseq.gui.view.models.experiments.MethylationExperiment.java Source code

Java tutorial

Introduction

Here is the source code for es.uvigo.ei.sing.rubioseq.gui.view.models.experiments.MethylationExperiment.java

Source

/*
   This file is part of RUbioSeq-GUI.
    
   RUbioSeq-GUI is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
    
   RUbioSeq-GUI is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
    
   You should have received a copy of the GNU General Public License
   along with RUbioSeq-GUI.  If not, see <http://www.gnu.org/licenses/>.
*/
package es.uvigo.ei.sing.rubioseq.gui.view.models.experiments;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;

import es.uvigo.ei.sing.rubioseq.gui.macros.InvalidRUbioSeqFile;
import es.uvigo.ei.sing.rubioseq.gui.macros.RUbioSeqFile;
import es.uvigo.ei.sing.rubioseq.gui.util.Utils;
import es.uvigo.ei.sing.rubioseq.gui.view.models.progress.Measurable;
import es.uvigo.ei.sing.rubioseq.gui.view.models.progress.RUbioSeqEvent;

/**
 * 
 * @author hlfernandez
 *
 */
public class MethylationExperiment implements Measurable, RUbioSeqExperiment {

    public static final String CONFIG_DATA = "configData";
    public static final String REFERENCEPATH = "referencePath";
    public static final String INTERVALS = "intervals";
    public static final String PLATTFORM = "platform";
    public static final String PROJECT_COMPLETE_PATH = "projectCompletePath";
    public static final String SAMPLE1 = "sample1";
    public static final String SAMPLE2 = "sample2";
    public static final String READSPATH = "readsPath";
    public static final String SEED_LENGTH = "seed_length";
    public static final String NUM_MIS = "num_mis";
    public static final String TRIMTAGLENGTH = "trimTagLength";
    public static final String MINQUAL = "minQual";
    public static final String DEPTHFILTER = "depthFilter";
    public static final String METHYLTYPE = "methylType";
    public static final String CONTEXT = "context";
    public static final String MULTIEXEC = "multiexec";
    public static final String FASTQC = "fastqc";
    public static final String QUEUESGEPROJECT = "queueSGEProject";

    private RUbioSeqFile referencePath;
    private RUbioSeqFile intervalsPath;
    private RUbioSeqFile intervalsPath_DV = null;
    private PlattformTech plattform;
    private RUbioSeqFile projectCompletePath;
    private List<MethylationSample> samples = new LinkedList<MethylationSample>();
    private RUbioSeqFile readsPath;
    private Integer seedLength = 28; /* Minimum: 5 */
    private Integer seedLength_DV = 28;
    private Integer numMis = 2; /* 0..3 */
    private Integer numMis_DV = 2;
    private String trimTagLength = "";
    private String trimTagLength_DV = "";
    private Double minQual = 0.0;
    private Double minQual_DV = 0.0;
    private Double depthFilter = 0.0;
    private Double depthFilter_DV = 0.0;
    private MethylType methylType;
    private ContextType contextType = ContextType.ALL;
    private ContextType contextType_DV = ContextType.ALL;
    private Integer multiExec = 0;
    private Integer multiExec_DV = 0;
    private Integer fastqc = 0;
    private Integer fastqc_DV = 0;
    private String queueSGEProject = "";
    private String queueSGEProject_DV = "";

    public List<Integer> getMultiExecValues() {
        return Arrays.asList(new Integer[] { 0, 1 });
    }

    public List<Integer> getFastqcValues() {
        return Arrays.asList(new Integer[] { 0, 1 });
    }

    public enum ContextType {
        ALL("ALL"), CPG("CpG"), CHG("CHG"), CHH("CHH");

        private final String displayName;

        private ContextType(String displayName) {
            this.displayName = displayName;
        }

        public String getDisplayName() {
            return displayName;
        }
    }

    public List<ContextType> getContextTypeValues() {
        return Arrays.asList(ContextType.values());
    }

    public enum MethylType {
        LISTER("Lister"), COKUS("Cokus");

        private final String displayName;

        private MethylType(String displayName) {
            this.displayName = displayName;
        }

        public String getDisplayName() {
            return displayName;
        }
    }

    public List<MethylType> getMethylTypeValues() {
        return Arrays.asList(MethylType.values());
    }

    public enum PlattformTech {
        ILLUMINA("illumina"), FASTQ("fastq");

        private final String displayName;

        private PlattformTech(String displayName) {
            this.displayName = displayName;
        }

        public String getDisplayName() {
            return displayName;
        }
    }

    public List<PlattformTech> getPlattformValues() {
        return Arrays.asList(PlattformTech.values());
    }

    public RUbioSeqFile getReferencePath() {
        return referencePath;
    }

    public void setReferencePath(RUbioSeqFile referencePath) {
        this.referencePath = referencePath;
    }

    public RUbioSeqFile getIntervalsPath() {
        return intervalsPath;
    }

    public void setIntervalsPath(RUbioSeqFile intervalsPath) {
        this.intervalsPath = intervalsPath;
    }

    public PlattformTech getPlattform() {
        return plattform;
    }

    public void setPlattform(PlattformTech plattform) {
        this.plattform = plattform;
    }

    public RUbioSeqFile getProjectCompletePath() {
        return projectCompletePath;
    }

    public void setProjectCompletePath(RUbioSeqFile projectCompletePath) {
        this.projectCompletePath = projectCompletePath;
    }

    public List<MethylationSample> getSamples() {
        return samples;
    }

    public void setSamples(List<MethylationSample> samples) {
        this.samples = samples;
    }

    public RUbioSeqFile getReadsPath() {
        return readsPath;
    }

    public void setReadsPath(RUbioSeqFile readsPath) {
        this.readsPath = readsPath;
    }

    public Integer getSeedLength() {
        return seedLength;
    }

    public void setSeedLength(Integer seedLength) {
        this.seedLength = seedLength;
    }

    public Integer getSeedLength_DV() {
        return seedLength_DV;
    }

    public Integer getNumMis() {
        return numMis;
    }

    public void setNumMis(Integer numMis) {
        this.numMis = numMis;
    }

    public Double getMinQual() {
        return minQual;
    }

    public void setMinQual(Double minQual) {
        this.minQual = minQual;
    }

    public Double getDepthFilter() {
        return depthFilter;
    }

    public void setDepthFilter(Double depthFilter) {
        this.depthFilter = depthFilter;
    }

    public MethylType getMethylType() {
        return methylType;
    }

    public void setMethylType(MethylType methylType) {
        this.methylType = methylType;
    }

    public ContextType getContextType() {
        return contextType;
    }

    public void setContextType(ContextType contextType) {
        this.contextType = contextType;
    }

    public Integer getMultiExec() {
        return multiExec;
    }

    public void setMultiExec(Integer multiExec) {
        this.multiExec = multiExec;
    }

    public Integer getFastqc() {
        return fastqc;
    }

    public void setFastqc(Integer fastqc) {
        this.fastqc = fastqc;
    }

    public String getQueueSGEProject() {
        return queueSGEProject;
    }

    public void setQueueSGEProject(String queueSGEProject) {
        this.queueSGEProject = queueSGEProject;
    }

    public RUbioSeqFile getIntervalsPath_DV() {
        return intervalsPath_DV;
    }

    public Integer getNumMis_DV() {
        return numMis_DV;
    }

    public Double getMinQual_DV() {
        return minQual_DV;
    }

    public Double getDepthFilter_DV() {
        return depthFilter_DV;
    }

    public ContextType getContextType_DV() {
        return contextType_DV;
    }

    public Integer getMultiExec_DV() {
        return multiExec_DV;
    }

    public Integer getFastqc_DV() {
        return fastqc_DV;
    }

    public String getQueueSGEProject_DV() {
        return queueSGEProject_DV;
    }

    public String getTrimTagLength() {
        return trimTagLength;
    }

    public void setTrimTagLength(String trimTagLength) {
        this.trimTagLength = trimTagLength;
    }

    public String getTrimTagLength_DV() {
        return trimTagLength_DV;
    }

    public void generateXMLConfigurationFile(File output) throws IOException {
        Document configurationFile = new Document();
        Element configData = new Element(CONFIG_DATA);
        configurationFile.setRootElement(configData);
        configData.setAttribute(ExperimentUtils.BRANCH, ExperimentUtils.BRANCH_METHYLATION);

        Element referencePath = new Element(REFERENCEPATH);
        referencePath.addContent(this.getReferencePath().getFile().getAbsolutePath());
        configurationFile.getRootElement().addContent(referencePath);

        Element readsPath = new Element(READSPATH);
        readsPath.addContent(this.getReadsPath().getFile().getAbsolutePath());
        configurationFile.getRootElement().addContent(readsPath);

        Element projectCompletePath = new Element(PROJECT_COMPLETE_PATH);
        projectCompletePath.addContent(this.getProjectCompletePath().getFile().getAbsolutePath());
        configurationFile.getRootElement().addContent(projectCompletePath);

        Element plattform = new Element(PLATTFORM);
        plattform.addContent(this.getPlattform().getDisplayName());
        configurationFile.getRootElement().addContent(plattform);

        Element methylType = new Element(METHYLTYPE);
        methylType.addContent(this.getMethylType().toString());
        configurationFile.getRootElement().addContent(methylType);

        for (MethylationSample ms : this.getSamples()) {
            Element sample1 = new Element(SAMPLE1);
            sample1.addContent(ms.getSample1().getFile().getName());
            configurationFile.getRootElement().addContent(sample1);
        }

        for (MethylationSample ms : this.getSamples()) {
            if (ms.getSample2() != null) {
                Element sample2 = new Element(SAMPLE2);
                sample2.addContent(ms.getSample2().getFile().getName());
                configurationFile.getRootElement().addContent(sample2);
            }
        }

        if (!this.getSeedLength().equals(seedLength_DV)) {
            Element seedLength = new Element(SEED_LENGTH);
            seedLength.addContent(this.getSeedLength().toString());
            configurationFile.getRootElement().addContent(seedLength);
        }

        if (!this.getNumMis().equals(numMis_DV)) {
            Element numMiss = new Element(NUM_MIS);
            numMiss.addContent(this.getNumMis().toString());
            configurationFile.getRootElement().addContent(numMiss);
        }

        if (this.getIntervalsPath() != intervalsPath_DV) {
            Element intervalsPath = new Element(INTERVALS);
            intervalsPath.addContent(this.getIntervalsPath().getFile().getAbsolutePath());
            configurationFile.getRootElement().addContent(intervalsPath);
        }

        if (!this.getContextType().equals(contextType_DV)) {
            Element contextType = new Element(CONTEXT);
            contextType.addContent(this.getContextType().toString());
            configurationFile.getRootElement().addContent(contextType);
        }

        if (!this.getTrimTagLength().equals(trimTagLength_DV)) {
            Element trimTagLength = new Element(TRIMTAGLENGTH);
            trimTagLength.addContent(this.getTrimTagLength());
            configurationFile.getRootElement().addContent(trimTagLength);
        }

        if (!this.getMinQual().equals(minQual_DV)) {
            Element minQual = new Element(MINQUAL);
            minQual.addContent(this.getMinQual().toString());
            configurationFile.getRootElement().addContent(minQual);
        }

        if (!this.getFastqc().equals(fastqc_DV)) {
            Element fastqc = new Element(FASTQC);
            fastqc.addContent(this.getFastqc().toString());
            configurationFile.getRootElement().addContent(fastqc);
        }

        if (!this.getDepthFilter().equals(depthFilter_DV)) {
            Element depthFilter = new Element(DEPTHFILTER);
            depthFilter.addContent(this.getDepthFilter().toString());
            configurationFile.getRootElement().addContent(depthFilter);
        }

        if (!this.getQueueSGEProject().equals(queueSGEProject_DV)) {
            Element queueProject = new Element(QUEUESGEPROJECT);
            queueProject.addContent(this.getQueueSGEProject().toString());
            configurationFile.getRootElement().addContent(queueProject);
        }

        if (!this.getMultiExec().equals(multiExec_DV)) {
            Element multiExec = new Element(MULTIEXEC);
            multiExec.addContent(this.getMultiExec().toString());
            configurationFile.getRootElement().addContent(multiExec);
        }

        XMLOutputter xmlOutput = new XMLOutputter();
        xmlOutput.setFormat(Format.getPrettyFormat());
        xmlOutput.output(configurationFile, new FileWriter(output));
    }

    public static String getWorkingDirectory(File inputFile) {
        SAXBuilder saxBuilder = new SAXBuilder();
        try {
            Document document = saxBuilder.build(inputFile);
            Element configData = document.getRootElement();

            Element projectCompletePath = configData.getChild(PROJECT_COMPLETE_PATH);

            return projectCompletePath.getValue();

        } catch (JDOMException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return "";
    }

    public String getWorkingDirectory() {
        return this.getProjectCompletePath().getFile().getAbsolutePath();
    }

    public boolean validElement(Element e) {
        return e != null && e.getValue() != null && !e.getValue().equals("");
    }

    public void loadDataFromFile(File editFile) throws InvalidRubioSeqParameterException {
        SAXBuilder saxBuilder = new SAXBuilder();
        try {
            Document document = saxBuilder.build(editFile);
            Element configData = document.getRootElement();

            Element referencePath = configData.getChild(REFERENCEPATH);
            if (validElement(referencePath)) {
                this.setReferencePath(Utils.getRUbioSeqFile(referencePath.getValue()));
            }

            Element intervals = configData.getChild(INTERVALS);
            if (validElement(intervals)) {
                this.setIntervalsPath(Utils.getRUbioSeqFile(intervals.getValue()));
            }

            Element plattform = configData.getChild(PLATTFORM);
            if (validElement(plattform)) {
                String plattformValue = plattform.getValue().toLowerCase();
                if (plattformValue.equals("illumina")) {
                    this.setPlattform(PlattformTech.ILLUMINA);
                } else if (plattformValue.equals("fastq")) {
                    this.setPlattform(PlattformTech.FASTQ);
                }
            }

            Element projectCompletePath = configData.getChild(PROJECT_COMPLETE_PATH);
            if (validElement(projectCompletePath)) {
                this.setProjectCompletePath(Utils.getRUbioSeqFile(projectCompletePath.getValue(), false));
            }

            Element readsPath = configData.getChild(READSPATH);
            if (validElement(readsPath)) {
                this.setReadsPath(Utils.getRUbioSeqFile(readsPath.getValue()));
            }

            Iterator<Element> sample1It = configData.getChildren(SAMPLE1).iterator();
            Iterator<Element> sample2It = configData.getChildren(SAMPLE2).iterator();
            this.samples = new LinkedList<MethylationSample>();

            while (sample1It.hasNext()) {

                MethylationSample newMS = new MethylationSample();

                Element sample1 = sample1It.next();
                if (validElement(sample1)) {
                    newMS.setSample1(Utils.getRUbioSeqFile(
                            Utils.getDirectoryNameWithFinalSlash(this.getReadsPath().getFile().getAbsolutePath())
                                    + sample1.getValue()));
                }

                if (sample2It.hasNext()) {
                    Element sample2 = sample2It.next();
                    if (validElement(sample2)) {
                        newMS.setSample2(Utils.getRUbioSeqFile(Utils.getDirectoryNameWithFinalSlash(
                                this.getReadsPath().getFile().getAbsolutePath()) + sample2.getValue()));
                    }
                }

                this.getSamples().add(newMS);
            }

            Element seedLength = configData.getChild(SEED_LENGTH);
            if (validElement(seedLength)) {
                this.setSeedLength(Integer.valueOf(seedLength.getValue()));
            }

            Element numMiss = configData.getChild(NUM_MIS);
            if (validElement(numMiss)) {
                this.setNumMis(Integer.valueOf(numMiss.getValue()));
            }

            Element minQual = configData.getChild(MINQUAL);
            if (validElement(minQual)) {
                this.setNumMis(Integer.valueOf(minQual.getValue()));
            }

            Element depthFilter = configData.getChild(DEPTHFILTER);
            if (validElement(depthFilter)) {
                this.setDepthFilter(Double.valueOf(depthFilter.getValue()));
            }

            Element methylType = configData.getChild(METHYLTYPE);
            if (validElement(methylType)) {
                String methylTypeValue = methylType.getValue().toUpperCase();
                if (methylTypeValue.equals("LISTER")) {
                    this.setMethylType(MethylType.LISTER);
                } else if (methylTypeValue.equals("COKUS")) {
                    this.setMethylType(MethylType.COKUS);
                }
            }

            Element contextType = configData.getChild(CONTEXT);
            if (validElement(contextType)) {
                String contextTypeValue = contextType.getValue();
                if (contextTypeValue.equals("ALL")) {
                    this.setContextType(ContextType.ALL);
                } else if (contextTypeValue.equals("CpG")) {
                    this.setContextType(ContextType.CPG);
                } else if (contextTypeValue.equals("CHG")) {
                    this.setContextType(ContextType.CHG);
                } else if (contextTypeValue.equals("CHH")) {
                    this.setContextType(ContextType.CHH);
                }
            }

            Element multiExec = configData.getChild(MULTIEXEC);
            if (validElement(multiExec)) {
                this.setMultiExec(Integer.valueOf(multiExec.getValue()));
            }

            Element trimTagLenght = configData.getChild(TRIMTAGLENGTH);
            if (validElement(trimTagLenght)) {
                this.setTrimTagLength(trimTagLenght.getValue());
            }

            Element fastqc = configData.getChild(FASTQC);
            if (validElement(fastqc)) {
                this.setFastqc(Integer.valueOf(fastqc.getValue()));
            }

            Element queueSGEProject = configData.getChild(QUEUESGEPROJECT);
            if (validElement(queueSGEProject)) {
                this.setQueueSGEProject(queueSGEProject.getValue());
            }

        } catch (JDOMException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            throw new InvalidRubioSeqParameterException(InvalidRubioSeqParameterException.DEFAULT_MESSAGE);
        }
    }

    public boolean checkSamples() {
        int countSample1Instances = 0;
        int countSample2Instances = 0;
        for (MethylationSample ms : this.getSamples()) {
            if (ms.getSample1() == null) {
                return false;
            } else {
                countSample1Instances++;
            }
            if (ms.getSample2() != null) {
                countSample2Instances++;
            }
        }
        if (countSample2Instances == 0) {
            return true;
        } else {
            return countSample1Instances == countSample2Instances;
        }
    }

    @Override
    public List<String> getPathsToWatch() {
        List<String> pathsToWatch = new LinkedList<String>();
        String workingDirectory = this.getWorkingDirectory();
        if (this.getMultiExec().equals(1) && this.getSamples().size() > 1) {
            // Joint multisample mode --> All logs go to "multi"
            pathsToWatch.add(workingDirectory + "/" + "multi");
        } else {
            // Must watch one directory by <SampleName>
            for (MethylationSample s : this.getSamples()) {
                pathsToWatch.add(workingDirectory + "/" + s.getSample1().getFile().getName());
            }
        }
        return pathsToWatch;
    }

    @Override
    public double getIncrement(RUbioSeqEvent event, int numStages) {
        System.err.print("\t[MethylationExperiment] Get increment of execution level " + event.getExecutionLevel());
        int k = event.getTotalSteps();
        switch (event.getExecutionLevel()) {
        case 0:
            return 0;
        case 1:
            /*
             * Stage 1. Sequence alignment and methylation calling.
             * By sample. This phase may have an optional extra substage if trimTagLength parameter has been
             * configured. 
             * Possible cases: 
             *    Without trimming: Total should be incremented in (1 / (samples x stages x k) ) x 100
             *    With trimming: Total should be incremented in (1 / (samples x stages x k) ) x 100
             *       In this case, if the experiment is not paired the number of samples should be samples*2 (one step
             * for trim and another for aligment and duplicates marking) while if the experiment is paired the number
             * of samples should be samples*2 (one step for trim pair1, another for trim pair2 and another one for the
             * alignment and duplicates marking).
             */
            if (!this.getTrimTagLength().equals("")) {
                if (this.isPaired()) {
                    System.err.println("\t" + (double) 100 / (double) (3 * getNumSamples() * numStages * k)
                            + " [With trimming + Paired]");
                    return (double) 100 / (double) (3 * getNumSamples() * numStages * k);
                } else {
                    System.err.println("\t" + (double) 100 / (double) (2 * getNumSamples() * numStages * k)
                            + " [With trimming + Not Paired]");
                    return (double) 100 / (double) (2 * getNumSamples() * numStages * k);
                }
            } else {
                System.err.println(
                        "\t" + (double) 100 / (double) (getNumSamples() * numStages * k) + " [Without trimming]");
                return (double) 100 / (double) (getNumSamples() * numStages * k);
            }
        case 2:
            /*
             * 2. Methylation calls extraction.
             * This stage has two substages (two log files): extraction of methylation calls and formatting of
             * methylation files so that increment should be divided by two.
             * Two possible cases: By sample if multiExec = 0 and by group of samples if multiExec = 1
             *    By sample Total should be incremented in (1 / (2 x samples x stages x k) ) x 100 (becauase formatting is done for each sample)
             *  By group of samples Total should be incremented in (1 / (2 x (samples+1) stages x k) ) x 100 (because formatting is done for all the samples)
             */

            if (getMultiExec().equals(0)) {
                System.err.println("\t" + 100 / (2 * getNumSamples() * numStages * k) + " [Multiexec = 0]");
                return (double) 100 / (double) (2 * getNumSamples() * numStages * k);
            } else {
                //            if(event.getSampleName().equals("multi")){
                //               
                //            }
                System.err.println("\t" + 100 / ((getNumSamples() + 1) * numStages * k) + " [Multiexec = 1]");
                return (double) 100 / (double) ((getNumSamples() + 1) * numStages * k);
            }
        case 3:
            /*
             * 3. Intervals methylation calculation.
             * Two possible cases: By sample if multiExec = 0 and by group of samples if multiExec = 1
             *  By sample Total should be incremented in (1 / (samples x stages x k) ) x 100
             *  By group of samples Total should be incremented in (1 / (stages x k) ) x 100
             */
            if (getMultiExec().equals(0)) {
                System.err.println("\t" + 100 / (getNumSamples() * numStages * k) + " [Multiexec = 0]");
                return (double) 100 / (double) (getNumSamples() * numStages * k);
            } else {
                System.err.println("\t" + 100 / (numStages * k) + " [Multiexec = 1]");
                return (double) 100 / (double) (numStages * k);
            }
        default:
            /*
             * If execution level is not 1,2,3 or 4, we must se if it is an extra stage.
             */
            return 0;
        }
    }

    public boolean isPaired() {
        return this.getSamples().iterator().next().getSample2() != null;
    }

    public int getNumSamples() {
        return this.getSamples().size();
    }

    public boolean checkPaths() {
        if (this.getReferencePath() instanceof InvalidRUbioSeqFile
                || this.getProjectCompletePath() instanceof InvalidRUbioSeqFile
                || (this.getIntervalsPath() != null && this.getIntervalsPath() instanceof InvalidRUbioSeqFile)
                || this.getReadsPath() instanceof InvalidRUbioSeqFile)
            return false;
        else
            return true;
    }

    @Override
    public boolean checkConfiguration() {
        return this.checkPaths();
    }

    @Override
    public int getStagesCount(int executionLevel) {
        int stagesCount;
        if (executionLevel > 0) {
            stagesCount = 1;
        } else {
            stagesCount = 2;
            if (getIntervalsPath() != null) {
                stagesCount++;
            }
        }
        return stagesCount;
    }
}