org.gitools.matrix.format.TdmMatrixFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.gitools.matrix.format.TdmMatrixFormat.java

Source

/*
 * #%L
 * gitools-core
 * %%
 * Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the 
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public 
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-3.0.html>.
 * #L%
 */
package org.gitools.matrix.format;

import edu.upf.bg.mtabix.MTabixConfig;
import edu.upf.bg.mtabix.MTabixIndex;
import edu.upf.bg.mtabix.compress.BlockCompressedStreamConstants;
import edu.upf.bg.mtabix.parse.DefaultKeyParser;
import org.apache.commons.io.IOUtils;
import org.gitools.api.PersistenceException;
import org.gitools.api.analysis.IProgressMonitor;
import org.gitools.api.matrix.IMatrix;
import org.gitools.api.matrix.IMatrixDimension;
import org.gitools.api.matrix.IMatrixLayer;
import org.gitools.api.matrix.IMatrixLayers;
import org.gitools.api.resource.IResourceLocator;
import org.gitools.matrix.model.MatrixLayer;
import org.gitools.matrix.model.MatrixLayers;
import org.gitools.matrix.model.hashmatrix.HashMatrix;
import org.gitools.matrix.model.mtabixmatrix.MTabixMatrix;
import org.gitools.utils.readers.text.CSVReader;
import org.gitools.utils.readers.text.RawFlatTextWriter;
import org.gitools.utils.translators.DoubleTranslator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.enterprise.context.ApplicationScoped;
import java.io.*;
import java.net.URISyntaxException;
import java.net.URL;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import static com.google.common.collect.Lists.newArrayList;
import static org.gitools.api.matrix.MatrixDimensionKey.COLUMNS;
import static org.gitools.api.matrix.MatrixDimensionKey.ROWS;

@ApplicationScoped
public class TdmMatrixFormat extends AbstractMatrixFormat {

    private static final Logger LOGGER = LoggerFactory.getLogger(TdmMatrixFormat.class);
    public static final String EXSTENSION = "tdm";

    public TdmMatrixFormat() {
        super(EXSTENSION);
    }

    @Override
    public boolean isDefaultExtension() {
        return true;
    }

    @Override
    protected IMatrix readResource(IResourceLocator resourceLocator, IProgressMonitor progressMonitor)
            throws PersistenceException {

        try {

            MTabixIndex index = readMtabixIndex(resourceLocator, progressMonitor);

            InputStream in = resourceLocator.openInputStream(progressMonitor);
            CSVReader parser = new CSVReader(new InputStreamReader(in));

            String[] header = parser.readNext();
            if (header.length < 3) {
                throw new PersistenceException("At least 3 fields expected on one line.");
            }

            MatrixLayer<Double> layers[] = new MatrixLayer[header.length - 2];
            for (int i = 2; i < header.length; i++) {
                layers[i - 2] = new MatrixLayer<>(header[i], Double.class);
            }

            if (index != null) {

                in.close();

                return new MTabixMatrix(index, new MatrixLayers<MatrixLayer>(layers), ROWS, COLUMNS);
            }

            // Load all the matrix into memory
            HashMatrix resultsMatrix = new HashMatrix(new MatrixLayers<MatrixLayer>(layers), ROWS, COLUMNS);

            // read body
            String fields[];
            while ((fields = parser.readNext()) != null) {

                if (progressMonitor.isCancelled()) {
                    throw new CancellationException();
                }

                checkLine(fields, header, parser.getLineNumber());

                final String columnId = fields[0];
                final String rowId = fields[1];

                for (int i = 2; i < fields.length; i++) {
                    Double value = DoubleTranslator.get().stringToValue(fields[i]);
                    resultsMatrix.set(layers[i - 2], value, rowId, columnId);
                }
            }

            in.close();

            return resultsMatrix;

        } catch (Exception e) {
            throw new PersistenceException(e);
        }

    }

    private MTabixIndex readMtabixIndex(IResourceLocator resourceLocator, IProgressMonitor progressMonitor)
            throws IOException, URISyntaxException {

        // Check if we are using mtabix
        URL dataURL = resourceLocator.getURL();

        URL indexURL = null;

        if (!dataURL.getPath().endsWith("zip")) {
            IResourceLocator mtabix = resourceLocator.getReferenceLocator(resourceLocator.getName() + ".gz.mtabix");
            indexURL = mtabix.getURL();
        } else {
            //ZipFile zipFile = new ZipFile(new File(dataURL.toURI()));
            ZipFile zipFile = new ZipFile(resourceLocator.getReadFile());
            ZipEntry entry = zipFile.getEntry(resourceLocator.getName() + ".gz.mtabix");

            if (entry == null) {
                return null;
            }

            // Copy index to a temporal file
            File indexFile = File.createTempFile("gitools-cache-", "zip_mtabix");
            indexFile.deleteOnExit();
            IOUtils.copy(zipFile.getInputStream(entry), new FileOutputStream(indexFile));
            indexURL = indexFile.toURL();

            // Copy data to a temporal file
            File dataFile = File.createTempFile("gitools-cache-", "zip_bgz");
            dataFile.deleteOnExit();

            InputStream dataStream = resourceLocator.getParentLocator().openInputStream(progressMonitor);

            IOUtils.copy(dataStream, new FileOutputStream(dataFile));
            dataURL = dataFile.toURL();

        }

        File dataFile = new File(dataURL.toURI());
        File indexFile = new File(indexURL.toURI());

        if (!indexFile.exists()) {
            return null;
        }

        MTabixConfig mtabixConfig = new MTabixConfig(dataFile, indexFile, new DefaultKeyParser(1, 0));
        MTabixIndex index = new MTabixIndex(mtabixConfig);
        index.loadIndex();

        return index;

    }

    @Override
    protected void writeResource(IResourceLocator resourceLocator, IMatrix results, IProgressMonitor monitor)
            throws PersistenceException {

        monitor.begin("Saving results...", results.getColumns().size());

        try {
            OutputStream out = resourceLocator.openOutputStream(monitor);
            Writer writer = new OutputStreamWriter(new BufferedOutputStream(out,
                    BlockCompressedStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE * 100));
            writeCells(writer, results, monitor);
            writer.close();
        } catch (Exception e) {
            throw new PersistenceException(e);
        }

        try {
            writeMtabixIndex(resourceLocator, results, monitor);
        } catch (Exception e) {
            LOGGER.warn("Error creating mtabix index", e);
        }

    }

    private void writeMtabixIndex(IResourceLocator resourceLocator, IMatrix results, IProgressMonitor monitor)
            throws URISyntaxException, IOException, NoSuchAlgorithmException {

        IResourceLocator mtabix = resourceLocator.getReferenceLocator(resourceLocator.getName() + ".gz.mtabix");

        Map<Integer, List<String>> identifiers = new HashMap<>(2);
        identifiers.put(0, newArrayList(results.getColumns()));
        identifiers.put(1, newArrayList(results.getRows()));

        MTabixConfig mtabixConfig = new MTabixConfig(resourceLocator.getWriteFile(), mtabix.getWriteFile(),
                new DefaultKeyParser(1, 0), identifiers);

        MTabixIndex index = new MTabixIndex(mtabixConfig);
        index.buildIndex();
        mtabix.close(monitor);

    }

    private void writeCells(Writer writer, IMatrix resultsMatrix, IProgressMonitor progressMonitor) {

        RawFlatTextWriter out = new RawFlatTextWriter(writer, '\t', '"');

        out.writeQuotedValue("column");
        out.writeSeparator();
        out.writeQuotedValue("row");

        for (IMatrixLayer layer : resultsMatrix.getLayers()) {
            out.writeSeparator();
            out.writeQuotedValue(layer.getId());
        }

        out.writeNewLine();

        IMatrixDimension columns = resultsMatrix.getColumns();
        IMatrixDimension rows = resultsMatrix.getRows();

        IMatrixLayers layers = resultsMatrix.getLayers();
        String[] values = new String[layers.size()];
        for (String column : columns) {

            for (String row : rows) {
                boolean allNulls = true;
                for (int l = 0; l < layers.size(); l++) {
                    IMatrixLayer layer = layers.get(l);
                    Object value = resultsMatrix.get(layer, row, column);

                    //TODO Use IMatrixLayer translator
                    if (value instanceof Double) {
                        Double v = (Double) value;
                        values[l] = DoubleTranslator.get().valueToString(v);
                        allNulls = false;
                    } else if (value != null) {
                        values[l] = value.toString();
                        allNulls = false;
                    } else {
                        values[l] = "-";
                    }
                }

                if (!allNulls) {
                    out.writeValue(column);
                    out.writeSeparator();
                    out.writeValue(row);

                    for (int l = 0; l < layers.size(); l++) {
                        out.writeSeparator();
                        out.writeValue(values[l]);
                    }

                    out.writeNewLine();
                }
            }
            progressMonitor.worked(1);
            if (progressMonitor.isCancelled()) {
                throw new CancellationException();
            }
        }

    }

    @Deprecated
    public static String[] readHeader(File file) throws PersistenceException {

        String[] matrixHeaders = null;
        try {
            Reader reader = openReader(file);

            CSVReader parser = new CSVReader(reader);

            String[] line = parser.readNext();

            // read header
            if (line.length < 3) {
                throw new PersistenceException("At least 3 columns expected.");
            }

            int numAttributes = line.length - 2;
            matrixHeaders = new String[numAttributes];
            System.arraycopy(line, 2, matrixHeaders, 0, numAttributes);
        } catch (Exception e) {
            throw new PersistenceException(e);
        }
        return matrixHeaders;
    }

    private static Reader openReader(File path) throws IOException {
        if (path == null) {
            return null;
        }

        if (path.getName().endsWith(".gz")) {
            return new InputStreamReader(new GZIPInputStream(new FileInputStream(path)));
        } else {
            return new BufferedReader(new FileReader(path));
        }
    }

}