fr.cirad.mgdb.exporting.markeroriented.BEDExportHandler.java Source code

Java tutorial

Introduction

Here is the source code for fr.cirad.mgdb.exporting.markeroriented.BEDExportHandler.java

Source

/*******************************************************************************
 * MGDB Export - Mongo Genotype DataBase, export handlers
 * Copyright (C) 2016 <CIRAD>
 *     
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * See <http://www.gnu.org/licenses/gpl-3.0.html> for details about
 * GNU Affero General Public License V3.
 *******************************************************************************/
package fr.cirad.mgdb.exporting.markeroriented;

import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import org.apache.log4j.Logger;
import org.springframework.data.mongodb.core.MongoTemplate;

import com.mongodb.DBCursor;
import com.mongodb.DBObject;

import fr.cirad.mgdb.model.mongo.maintypes.Individual;
import fr.cirad.mgdb.model.mongo.maintypes.VariantData;
import fr.cirad.mgdb.model.mongo.subtypes.ReferencePosition;
import fr.cirad.mgdb.model.mongo.subtypes.SampleId;
import fr.cirad.tools.ProgressIndicator;
import fr.cirad.tools.mongo.MongoTemplateManager;

/**
 * The Class BEDExportHandler.
 * 
 * @author Guilhem SEMPERE, Florian PHILIPPE
 */
public class BEDExportHandler extends AbstractMarkerOrientedExportHandler {

    /** The Constant LOG. */
    static final Logger LOG = Logger.getLogger(BEDExportHandler.class);

    /* (non-Javadoc)
     * @see fr.cirad.mgdb.exporting.IExportHandler#getExportFormatName()
     */
    @Override
    public String getExportFormatName() {
        return "BED";
    }

    /* (non-Javadoc)
     * @see fr.cirad.mgdb.exporting.IExportHandler#getExportFormatDescription()
     */
    @Override
    public String getExportFormatDescription() {
        return "Exports data in BED Format. See <a target='_blank' href='http://genome.ucsc.edu/FAQ/FAQformat.html#format1'>http://genome.ucsc.edu/FAQ/FAQformat.html#format1</a> for more details";
    }

    /* (non-Javadoc)
     * @see fr.cirad.mgdb.exporting.IExportHandler#getStepList()
     */
    @Override
    public List<String> getStepList() {
        return Arrays.asList(new String[] { "Exporting data to BED format" });
    }

    /* (non-Javadoc)
     * @see fr.cirad.mgdb.exporting.markeroriented.AbstractMarkerOrientedExportHandler#exportData(java.io.OutputStream, java.lang.String, java.util.List, fr.cirad.tools.ProgressIndicator, com.mongodb.DBCursor, java.util.Map, int, int, java.util.Map)
     */
    @Override
    public void exportData(OutputStream outputStream, String sModule, List<SampleId> sampleIDs,
            ProgressIndicator progress, DBCursor markerCursor, Map<Comparable, Comparable> markerSynonyms,
            int nMinimumGenotypeQuality, int nMinimumReadDepth, Map<String, InputStream> readyToExportFiles)
            throws Exception {
        MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
        ZipOutputStream zos = new ZipOutputStream(outputStream);

        if (readyToExportFiles != null)
            for (String readyToExportFile : readyToExportFiles.keySet()) {
                zos.putNextEntry(new ZipEntry(readyToExportFile));
                InputStream inputStream = readyToExportFiles.get(readyToExportFile);
                byte[] dataBlock = new byte[1024];
                int count = inputStream.read(dataBlock, 0, 1024);
                while (count != -1) {
                    zos.write(dataBlock, 0, count);
                    count = inputStream.read(dataBlock, 0, 1024);
                }
            }

        int markerCount = markerCursor.count();

        List<String> selectedIndividualList = new ArrayList<String>();
        for (Individual ind : getIndividualsFromSamples(sModule, sampleIDs))
            selectedIndividualList.add(ind.getId());

        String exportName = sModule + "_" + markerCount + "variants_" + selectedIndividualList.size()
                + "individuals";
        zos.putNextEntry(new ZipEntry(exportName + ".bed"));

        short nProgress = 0, nPreviousProgress = 0;
        int nChunkSize = Math.min(2000, markerCount), nLoadedMarkerCount = 0;
        while (markerCursor.hasNext()) {
            int nLoadedMarkerCountInLoop = 0;
            Map<Comparable, String> markerChromosomalPositions = new LinkedHashMap<Comparable, String>();
            boolean fStartingNewChunk = true;
            markerCursor.batchSize(nChunkSize);
            while (markerCursor.hasNext() && (fStartingNewChunk || nLoadedMarkerCountInLoop % nChunkSize != 0)) {
                DBObject exportVariant = markerCursor.next();
                DBObject refPos = (DBObject) exportVariant.get(VariantData.FIELDNAME_REFERENCE_POSITION);
                markerChromosomalPositions.put((Comparable) exportVariant.get("_id"),
                        refPos.get(ReferencePosition.FIELDNAME_SEQUENCE) + ":"
                                + refPos.get(ReferencePosition.FIELDNAME_START_SITE));
                nLoadedMarkerCountInLoop++;
                fStartingNewChunk = false;
            }

            for (Comparable variantId : markerChromosomalPositions.keySet()) // read data and write results into temporary files (one per sample)
            {
                String[] chromAndPos = markerChromosomalPositions.get(variantId).split(":");
                zos.write((chromAndPos[0] + "\t" + (Long.parseLong(chromAndPos[1]) - 1) + "\t"
                        + (Long.parseLong(chromAndPos[1]) - 1) + "\t" + variantId + "\t" + "0" + "\t" + "+")
                                .getBytes());
                zos.write((LINE_SEPARATOR).getBytes());
            }

            if (progress.hasAborted())
                return;

            nLoadedMarkerCount += nLoadedMarkerCountInLoop;
            nProgress = (short) (nLoadedMarkerCount * 100 / markerCount);
            if (nProgress > nPreviousProgress) {
                progress.setCurrentStepProgress(nProgress);
                nPreviousProgress = nProgress;
            }
        }

        zos.close();
        progress.setCurrentStepProgress((short) 100);
    }
}