org.isatools.isatab.export.pride.DraftPrideExporter.java Source code

Java tutorial

Introduction

Here is the source code for org.isatools.isatab.export.pride.DraftPrideExporter.java

Source

/*
    
 The ISAconverter, ISAvalidator & BII Management Tool are components of the ISA software suite (http://www.isa-tools.org)
    
 Exhibit A
 The ISAconverter, ISAvalidator & BII Management Tool are licensed under the Mozilla Public License (MPL) version
 1.1/GPL version 2.0/LGPL version 2.1
    
 "The contents of this file are subject to the Mozilla Public License
 Version 1.1 (the "License"). You may not use this file except in compliance with the License.
 You may obtain copies of the Licenses at http://www.mozilla.org/MPL/MPL-1.1.html.
    
 Software distributed under the License is distributed on an "AS IS"
 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 License for the specific language governing rights and limitations
 under the License.
    
 The Original Code is the ISAconverter, ISAvalidator & BII Management Tool.
    
 The Initial Developer of the Original Code is the ISA Team (Eamonn Maguire, eamonnmag@gmail.com;
 Philippe Rocca-Serra, proccaserra@gmail.com; Susanna-Assunta Sansone, sa.sanson@gmail.com;
 http://www.isa-tools.org). All portions of the code written by the ISA Team are Copyright (c)
 2007-2011 ISA Team. All Rights Reserved.
    
 Contributor(s):
 Rocca-Serra P, Brandizi M, Maguire E, Sklyar N, Taylor C, Begley K, Field D,
 Harris S, Hide W, Hofmann O, Neumann S, Sterk P, Tong W, Sansone SA. ISA software suite:
 supporting standards-compliant experimental annotation and enabling curation at the community level.
 Bioinformatics 2010;26(18):2354-6.
    
 Alternatively, the contents of this file may be used under the terms of either the GNU General
 Public License Version 2 or later (the "GPL") - http://www.gnu.org/licenses/gpl-2.0.html, or
 the GNU Lesser General Public License Version 2.1 or later (the "LGPL") -
 http://www.gnu.org/licenses/lgpl-2.1.html, in which case the provisions of the GPL
 or the LGPL are applicable instead of those above. If you wish to allow use of your version
 of this file only under the terms of either the GPL or the LGPL, and not to allow others to
 use your version of this file under the terms of the MPL, indicate your decision by deleting
 the provisions above and replace them with the notice and other provisions required by the
 GPL or the LGPL. If you do not delete the provisions above, a recipient may use your version
 of this file under the terms of any one of the MPL, the GPL or the LGPL.
    
 Sponsors:
 The ISA Team and the ISA software suite have been funded by the EU Carcinogenomics project
 (http://www.carcinogenomics.eu), the UK BBSRC (http://www.bbsrc.ac.uk), the UK NERC-NEBC
 (http://nebc.nerc.ac.uk) and in part by the EU NuGO consortium (http://www.nugo.org/everyone).
    
 */

package org.isatools.isatab.export.pride;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.isatools.isatab.mapping.AssayTypeEntries;
import org.isatools.isatab.mapping.ISATABMapper;
import org.isatools.tablib.exceptions.TabIOException;
import org.isatools.tablib.exceptions.TabMissingValueException;
import org.isatools.tablib.utils.BIIObjectStore;
import org.isatools.tablib.utils.logging.TabNDC;
import uk.ac.ebi.bioinvindex.model.*;
import uk.ac.ebi.bioinvindex.model.processing.*;
import uk.ac.ebi.bioinvindex.model.term.*;
import uk.ac.ebi.bioinvindex.model.xref.ReferenceSource;
import uk.ac.ebi.bioinvindex.utils.datasourceload.DataLocationManager;
import uk.ac.ebi.bioinvindex.utils.i18n;
import uk.ac.ebi.bioinvindex.utils.processing.ExperimentalPipelineVisitor;
import uk.ac.ebi.bioinvindex.utils.processing.ProcessingVisitAction;
import uk.ac.ebi.pride.model.implementation.core.ExperimentImpl;
import uk.ac.ebi.pride.model.implementation.core.ProtocolStepImpl;
import uk.ac.ebi.pride.model.implementation.mzData.CvParamImpl;
import uk.ac.ebi.pride.model.implementation.mzData.UserParamImpl;
import uk.ac.ebi.pride.model.interfaces.core.Experiment;
import uk.ac.ebi.pride.model.interfaces.core.Identification;
import uk.ac.ebi.pride.model.interfaces.core.ProtocolStep;
import uk.ac.ebi.pride.model.interfaces.mzdata.CvParam;
import uk.ac.ebi.pride.model.interfaces.mzdata.MzData;
import uk.ac.ebi.pride.model.interfaces.mzdata.Param;
import uk.ac.ebi.pride.model.interfaces.mzdata.UserParam;
import uk.ac.ebi.pride.xml.MzDataXMLUnmarshaller;
import uk.ac.ebi.pride.xml.XMLMarshaller;

import java.io.*;
import java.util.*;

/**
 * The PRIDE exporter. This is a first version, TODO: needs to be refactored, modularised etc.
 * <p/>
 * <dl><dt>Date:</dt><dd>May 14, 2008</dd></dl>
 *
 * @author brandizi
 */
public class DraftPrideExporter implements ProcessingVisitAction {
    private final BIIObjectStore store;
    private final String exportPath;
    private final String importPath;

    // Steps in the current assay
    private List<ProtocolStep> prideProtoSteps;
    // General params about the current assay
    private Set<Param> prideExperimentParams;
    // The mzData for the current assay
    private String mzDataPath;

    // The PRIDE files for the current assay
    private String identificationsPath, peptidesPath, modificationsPath;

    private final ExperimentalPipelineVisitor graphVisitor = new ExperimentalPipelineVisitor(this);

    protected static final Logger log = Logger.getLogger(DraftPrideExporter.class);

    public BIIObjectStore getStore() {
        return store;
    }

    /**
     * @param store      where the BII objects to export to pride are stored. This is usually created from {@link ISATABMapper}
     *                   and is supposed to contain the model about a single investigation.
     * @param importPath the path from which the ISATAB is read. This is needed to know where mzData files and identifications
     *                   files are to be taken from
     * @param exportPath the path where to export the results. A pride/ directory will be created there, together with one
     *                   directory per assay.
     */
    public DraftPrideExporter(BIIObjectStore store, String importPath, String exportPath) {
        this.store = store;
        this.importPath = importPath;
        this.exportPath = exportPath;
    }

    /**
     * walk across the experimental pipeline, passed to
     * {@link ExperimentalPipelineVisitor#visit(ProcessingVisitAction, uk.ac.ebi.bioinvindex.model.impl.processing.Node)}.
     */
    public boolean visit(GraphElement graphElement) {
        if (graphElement instanceof Processing<?, ?>) {
            Processing<?, ?> processing = (Processing<?, ?>) graphElement;
            for (ProtocolApplication protoApp : processing.getProtocolApplications()) {
                // TODO: filter on the protocol type

                // Processes the protocol
                //
                Set<CvParam> cvparams = new HashSet<CvParam>();
                HashSet<UserParam> uparams = new HashSet<UserParam>();
                Protocol protocol = protoApp.getProtocol();
                if (protocol != null) {
                    ProtocolType type = protocol.getType();
                    if (type != null) {
                        ReferenceSource typeSrc = type.getSource();
                        if (typeSrc == null) {
                            throw new TabMissingValueException(i18n.msg("ontoterm_without_source", type));
                        }
                        uparams.add(new UserParamImpl("BioInvestigation Index Protocol Type", 0, type.getName()));
                        cvparams.add(
                                new CvParamImpl(type.getAcc(), type.getSource().getAcc(), type.getName(), 0, ""));
                    }
                    uparams.add(new UserParamImpl("BioInvestigation Index Protocol Name", 2 + uparams.size(),
                            protocol.getName()));
                    uparams.add(new UserParamImpl("BioInvestigation Index Protocol Description", 2 + uparams.size(),
                            protocol.getDescription()));
                } else {
                    log.trace("PRIDE exporting, no protocol defined for the processing " + processing.getAcc());
                }

                // Adds up the parameters
                //
                for (ParameterValue param : protoApp.getParameterValues()) {
                    Parameter ptype = param.getType();
                    Collection<Param> prideParams = exportPropertyValue(param, ptype.getOrder() + 3);

                    if (ptype.getRole() == PropertyRole.FACTOR) {
                        prideExperimentParams.addAll(prideParams);
                    }

                    for (Param prideParam : prideParams) {
                        if (prideParam instanceof CvParam) {
                            cvparams.add((CvParam) prideParam);
                        } else if (prideParam instanceof UserParam) {
                            uparams.add((UserParam) prideParam);
                        }
                    }
                }

                ProtocolStep prideProtoStep = new ProtocolStepImpl(prideProtoSteps.size(), cvparams, uparams);
                prideProtoSteps.add(prideProtoStep);
            }
        } else if (graphElement instanceof MaterialNode) {
            Material material = ((MaterialNode) graphElement).getMaterial();

            // Get the factors
            for (CharacteristicValue characteristic : material.getCharacteristicValues()) {
                Characteristic ctype = characteristic.getType();

                if (ctype.getRole() == PropertyRole.FACTOR) {
                    prideExperimentParams.addAll(exportPropertyValue(characteristic, ctype.getOrder()));
                }
            }
        } else if (graphElement instanceof DataNode) {
            Data data = ((DataNode) graphElement).getData();
            String dataTypeAcc = data.getType().getAcc();

            if ("bii:ms_spec_raw_data".equals(dataTypeAcc)) {
                mzDataPath = data.getUrl();
            }

            // Get the proteins files from the MS/SPEC run material
            if ("bii:ms_spec_derived_data".equals(dataTypeAcc)
                    || "bii:ms_spec_normalized_data".equals(dataTypeAcc)) {
                String identificationsPath = StringUtils.trimToNull(data.getSingleAnnotationValue("proteinsFile"));
                if (identificationsPath != null) {
                    this.identificationsPath = importPath + "/" + identificationsPath;
                }

                String peptidesPath = StringUtils.trimToNull(data.getSingleAnnotationValue("peptidesFile"));
                if (peptidesPath != null) {
                    this.peptidesPath = importPath + "/" + peptidesPath;
                }

                String modificationsPath = StringUtils.trimToNull(data.getSingleAnnotationValue("ptmsFile"));
                if (modificationsPath != null) {
                    this.modificationsPath = importPath + "/" + modificationsPath;
                }
            }

        }
        // if graphElement

        return true;
    }

    /**
     * Processes a BII property and computes the corresponding set of {@link Param} which represents the property in PRIDE-XML
     */
    private static <PT extends Property<?>> List<Param> exportPropertyValue(PropertyValue<PT> value,
            int initialOrder) {
        Collection<OntologyTerm> valoes = value.getOntologyTerms();
        OntologyTerm valoe = null;
        if (valoes != null && !valoes.isEmpty()) {
            valoe = valoes.iterator().next();
        }

        PT type = value.getType();
        Collection<OntologyTerm> typeoes = type.getOntologyTerms();
        OntologyTerm typeoe = null;
        if (typeoes != null && !typeoes.isEmpty()) {
            typeoe = typeoes.iterator().next();
        }

        List<Param> result = new ArrayList<Param>();

        String valueStr = value.getValue();
        UnitValue valueUnit = value.getUnit();
        if (valueUnit != null) {
            valueStr += " " + valueUnit.getValue();
        }

        // Export the type
        //
        if (typeoe == null) {
            // The type is free text, let's map the value with a UD entry
            result.add(new UserParamImpl(type.getValue(), initialOrder + type.getOrder(), valueStr));
        } else {
            // Otherwise, we can create a CV entry
            //
            String typeoeAcc = StringUtils.trimToNull(typeoe.getAcc()),
                    typeoeName = StringUtils.trimToEmpty(typeoe.getName()),
                    typeValue = StringUtils.trimToEmpty(type.getValue());
            if (typeoeAcc == null) {
                log.error("The term '" + typeoeName
                        + "' has an empty accession, we are exporting it to PRIDE, but it won't probably work");
                typeoe.setAcc("");
            }
            if (typeoeName.equals(typeValue)) {
                result.add(new CvParamImpl(typeoeAcc, typeoe.getSource().getAcc(), typeValue,
                        initialOrder + type.getOrder(), valueStr));
            } else {
                // we have a free text + OE, we need two params
                result.add(new UserParamImpl(typeValue, initialOrder + type.getOrder(), valueStr));
                result.add(new CvParamImpl(typeoe.getAcc(), typeoe.getSource().getAcc(), typeoe.getName(),
                        initialOrder + type.getOrder(), typeValue));
            }
        }

        // Is the value an OE too? => Another annotation
        //
        if (valoe != null) {

            if (StringUtils.trimToNull(valoe.getAcc()) == null) {
                log.error("The term '" + valoe
                        + "' has an empty accession, we are exporting it to PRIDE, but it won't probably work");
                valoe.setAcc("");
            }

            result.add(new CvParamImpl(valoe.getAcc(), valoe.getSource().getAcc(), valoe.getName(),
                    initialOrder + type.getOrder(), valueStr));
        }

        // Is it a factor? We need a further annotation in case
        if (type.getRole() == PropertyRole.FACTOR) {
            result.add(new UserParamImpl("Factor Type", initialOrder + type.getOrder(), type.getValue()));
        }
        return result;
    }

    /**
     * Issues the assay processing, wich will go through the pipeline graph
     */
    private void processAssay(Assay assay) {
        prideProtoSteps = new ArrayList<ProtocolStep>();
        prideExperimentParams = new HashSet<Param>();
        identificationsPath = peptidesPath = modificationsPath = null;

        graphVisitor.reset();
        Material material = assay.getMaterial();
        MaterialNode mnode = material.getMaterialNode();
        graphVisitor.visitBackward(mnode);
        graphVisitor.visitForward(mnode, true);

        if (identificationsPath == null) {
            throw new TabMissingValueException(i18n.msg("missing_identification_file", material.getName()));
        }

        if (peptidesPath == null) {
            throw new TabMissingValueException(i18n.msg("missing_peptide_file", material.getName()));
        }

        if (modificationsPath == null) {
            throw new TabMissingValueException(i18n.msg("missing_ptm_file", material.getName()));
        }
    }

    /**
     * Gets the identifications about the current assay
     */
    private Collection<Identification> export2Identifications() {
        return new IdentificationCsvCollection(new File(identificationsPath), new File(peptidesPath),
                new File(modificationsPath));
    }

    /**
     * Exports the study attached to the assay
     * <p/>
     * TODO: add CVterms for investigations (or in description)
     */
    private Experiment export2PrideExperiment(Assay assay) {

        Study study = assay.getStudy();
        // Not used at the moment, PRIDE team assigns it
        String prideAccession = StringUtils.trimToNull(assay.getAcc());

        String prideTitle = study.getTitle();

        // TODO: constant class
        String backLink = StringUtils
                .trimToNull(System.getProperty("bioinvindex.converters.prideml.backlink", null));
        if (backLink != null) {
            backLink = backLink.replaceAll("\\$\\{study-acc\\}", study.getAcc());
            prideTitle += backLink;
        }

        // The protocol steps
        processAssay(assay);

        // The factors
        List<UserParam> exuparams = new ArrayList<UserParam>();
        List<CvParam> excvparams = new ArrayList<CvParam>();
        for (Param param : prideExperimentParams) {
            if (param instanceof UserParamImpl) {
                exuparams.add((UserParam) param);
            } else {
                excvparams.add((CvParam) param);
            }
        }

        // The mzData file
        String mzdataPath = importPath + "/" + mzDataPath;
        MzData mzdata;
        try {
            Reader reader = new BufferedReader(new FileReader(new File(mzdataPath)));
            MzDataXMLUnmarshaller unmarshaller = new MzDataXMLUnmarshaller();
            mzdata = unmarshaller.unMarshall(reader);
        } catch (IOException ex) {
            throw new TabIOException(i18n.msg("missing_mzdata", mzdataPath, ex.getMessage()), ex);
        }

        Experiment prideExperiment = new ExperimentImpl(
                // TODO: shall we support resubmission? In this case we must provide the accession that
                // we get from PRIDE, after first dispatching. Wherever this is put in the ISATAB.
                // prideAccession,                                                                  // accession
                null, prideTitle, // title
                null, // refs
                prideTitle, // Short label
                prideProtoSteps, // proto steps
                export2Identifications(), // identifications
                "Protocol steps as specified by the ISATAB submission", // protocol name
                mzdata, // mzData
                excvparams, // additional CV params
                exuparams // additional UDF params
        );

        return prideExperiment;
    }

    /**
     * Does the whole export job
     */
    public void export() {
        for (final Study study : store.valuesOfType(Study.class)) {
            TabNDC ndc = TabNDC.getInstance();
            final Investigation investigation = study.getUniqueInvestigation();
            if (investigation != null) {
                ndc.pushObject(investigation);
            }
            ndc.pushObject(study);

            log.trace("PRIDEExporter, Working on study " + study.getAcc());

            for (Assay assay : study.getAssays()) {
                if (!"prideml".equals(AssayTypeEntries.getDispatchTargetIdFromLabels(assay))) {
                    continue;
                }

                ndc.pushObject(assay);

                log.trace("PRIDEExporter, Working on assay " + assay.getAcc());

                // Exports the experiment associated to the assay
                Experiment prideExperiment = export2PrideExperiment(assay);
                Collection<Experiment> prideExperiments = new ArrayList<Experiment>();
                prideExperiments.add(prideExperiment);

                XMLMarshaller marshaller = new XMLMarshaller();
                String assayXPath = exportPath + "/pride";
                File assayXDir = new File(assayXPath);
                try {
                    if (!assayXDir.exists()) {
                        FileUtils.forceMkdir(assayXDir);
                    }

                    String prideMlRelPath = DataLocationManager.accession2FileName(assay.getAcc());
                    marshaller.marshallExperiments(prideExperiments,
                            new FileWriter(new File(assayXPath + "/" + prideMlRelPath + ".xml")));
                } catch (IOException ex) {
                    throw new TabIOException(
                            i18n.msg("cannot_export_pride_todir", assay.getAcc(), assayXPath, ex.getMessage()), ex);
                } // try-catch

                ndc.popObject(); // assay
            } // for assay

            ndc.popObject(); // study
            if (investigation != null) {
                ndc.popObject();
            } // investigation
        } // for study

    } // export()

}