org.opencb.opencga.app.cli.analysis.executors.VariantCommandExecutor.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.opencga.app.cli.analysis.executors.VariantCommandExecutor.java

Source

/*
 * Copyright 2015-2016 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.opencga.app.cli.analysis.executors;

import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.opencga.analysis.AnalysisExecutionException;
import org.opencb.opencga.analysis.execution.plugins.PluginExecutor;
import org.opencb.opencga.analysis.execution.plugins.hist.VariantHistogramAnalysis;
import org.opencb.opencga.analysis.execution.plugins.ibs.IbsAnalysis;
import org.opencb.opencga.app.cli.analysis.options.VariantCommandOptions;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.models.DataStore;
import org.opencb.opencga.core.common.UriUtils;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.exceptions.VariantSearchException;
import org.opencb.opencga.storage.core.manager.variant.VariantStorageManager;
import org.opencb.opencga.storage.core.manager.variant.operations.StorageOperation;
import org.opencb.opencga.storage.core.manager.variant.operations.VariantFileIndexerStorageOperation;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.analysis.VariantSampleFilter;
import org.opencb.opencga.storage.core.variant.annotation.DefaultVariantAnnotationManager;
import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager;
import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotatorException;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;
import org.opencb.opencga.storage.core.variant.stats.DefaultVariantStatisticsManager;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.*;

import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.opencb.opencga.storage.core.manager.variant.operations.VariantFileIndexerStorageOperation.LOAD;
import static org.opencb.opencga.storage.core.manager.variant.operations.VariantFileIndexerStorageOperation.TRANSFORM;

/**
 * Created by imedina on 02/03/15.
 */
public class VariantCommandExecutor extends AnalysisCommandExecutor {

    //    private AnalysisCliOptionsParser.VariantCommandOptions variantCommandOptions;
    private VariantCommandOptions variantCommandOptions;
    private VariantStorageEngine variantStorageEngine;

    public VariantCommandExecutor(VariantCommandOptions variantCommandOptions) {
        super(variantCommandOptions.commonCommandOptions);
        this.variantCommandOptions = variantCommandOptions;
    }

    @Override
    public void execute() throws Exception {
        logger.debug("Executing variant command line");

        //        String subCommandString = variantCommandOptions.getParsedSubCommand();
        String subCommandString = getParsedSubCommand(variantCommandOptions.jCommander);
        configure();

        sessionId = getSessionId(variantCommandOptions.commonCommandOptions);

        switch (subCommandString) {
        case "ibs":
            ibs();
            break;
        case "delete":
            delete();
            break;
        case "query":
            query();
            break;
        case "export-frequencies":
            exportFrequencies();
            break;
        case "import":
            importData();
            break;
        case "index":
            index();
            break;
        case "index-search":
            indexSearch();
            break;
        case "stats":
            stats();
            break;
        case "annotate":
            annotate();
            break;
        case "samples":
            samples();
            break;
        case "histogram":
            histogram();
            break;
        default:
            logger.error("Subcommand not valid");
            break;
        }

    }

    private VariantStorageEngine initVariantStorageManager(DataStore dataStore)
            throws CatalogException, IllegalAccessException, InstantiationException, ClassNotFoundException {

        String storageEngine = dataStore.getStorageEngine();
        if (isEmpty(storageEngine)) {
            this.variantStorageEngine = storageEngineFactory.getVariantStorageEngine();
        } else {
            this.variantStorageEngine = storageEngineFactory.getVariantStorageEngine(storageEngine);
        }
        return variantStorageEngine;
    }

    private void ibs() throws CatalogException, AnalysisExecutionException {
        VariantCommandOptions.VariantIbsCommandOptions cliOptions = variantCommandOptions.ibsVariantCommandOptions;

        ObjectMap params = new ObjectMap();
        params.putIfNotEmpty(IbsAnalysis.SAMPLES, cliOptions.samples);
        params.putIfNotEmpty(IbsAnalysis.OUTDIR, cliOptions.outdir);

        new PluginExecutor(catalogManager, sessionId).execute(IbsAnalysis.class, "default",
                catalogManager.getStudyId(cliOptions.study, sessionId), params);

    }

    private void exportFrequencies() throws Exception {

        VariantCommandOptions.VariantExportStatsCommandOptions exportCliOptions = variantCommandOptions.exportVariantStatsCommandOptions;
        //        AnalysisCliOptionsParser.ExportVariantStatsCommandOptions exportCliOptions = variantCommandOptions.exportVariantStatsCommandOptions;
        //        AnalysisCliOptionsParser.QueryVariantCommandOptions queryCliOptions = variantCommandOptions.queryVariantCommandOptions;

        VariantCommandOptions.VariantQueryCommandOptions queryCliOptions = variantCommandOptions.queryVariantCommandOptions;

        queryCliOptions.commonOptions.outputFormat = exportCliOptions.commonOptions.outputFormat.toLowerCase()
                .replace("tsv", "stats");
        queryCliOptions.study = exportCliOptions.studies;
        queryCliOptions.genericVariantQueryOptions.returnStudy = exportCliOptions.studies;
        queryCliOptions.numericOptions.limit = exportCliOptions.numericOptions.limit;
        //        queryCliOptions.sort = true;
        queryCliOptions.numericOptions.skip = exportCliOptions.numericOptions.skip;
        queryCliOptions.genericVariantQueryOptions.region = exportCliOptions.region;
        queryCliOptions.genericVariantQueryOptions.regionFile = exportCliOptions.regionFile;
        queryCliOptions.output = exportCliOptions.output;
        queryCliOptions.genericVariantQueryOptions.gene = exportCliOptions.gene;
        queryCliOptions.numericOptions.count = exportCliOptions.numericOptions.count;
        queryCliOptions.genericVariantQueryOptions.returnSample = "";

        query();
    }

    private void query() throws Exception {

        //        AnalysisCliOptionsParser.QueryVariantCommandOptions cliOptions = variantCommandOptions.queryVariantCommandOptions;
        VariantCommandOptions.VariantQueryCommandOptions cliOptions = variantCommandOptions.queryVariantCommandOptions;

        if ("TEXT".equals(cliOptions.commonOptions.outputFormat)) {
            cliOptions.commonOptions.outputFormat = "VCF";
        }

        Map<Long, String> studyIds = getStudyIds(sessionId);
        Query query = VariantQueryCommandUtils.parseQuery(cliOptions, studyIds);
        QueryOptions queryOptions = VariantQueryCommandUtils.parseQueryOptions(cliOptions);
        queryOptions.put("summary", cliOptions.genericVariantQueryOptions.summary);

        VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory);

        if (cliOptions.numericOptions.count) {
            QueryResult<Long> result = variantManager.count(query, sessionId);
            System.out.println("Num. results\t" + result.getResult().get(0));
        } else if (StringUtils.isNotEmpty(cliOptions.genericVariantQueryOptions.groupBy)) {
            ObjectMapper objectMapper = new ObjectMapper();
            QueryResult groupBy = variantManager.groupBy(cliOptions.genericVariantQueryOptions.groupBy, query,
                    queryOptions, sessionId);
            System.out
                    .println("rank = " + objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(groupBy));
        } else if (StringUtils.isNotEmpty(cliOptions.genericVariantQueryOptions.rank)) {
            ObjectMapper objectMapper = new ObjectMapper();

            QueryResult rank = variantManager.rank(query, cliOptions.genericVariantQueryOptions.rank, 10, true,
                    sessionId);
            System.out.println("rank = " + objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(rank));
        } else {
            if (cliOptions.genericVariantQueryOptions.annotations != null) {
                queryOptions.add("annotations", cliOptions.genericVariantQueryOptions.annotations);
            }
            VariantWriterFactory.VariantOutputFormat outputFormat = VariantWriterFactory
                    .toOutputFormat(cliOptions.commonOptions.outputFormat, cliOptions.output);
            variantManager.exportData(cliOptions.output, outputFormat, query, queryOptions, sessionId);
        }
    }

    private void importData() throws URISyntaxException, CatalogException, StorageEngineException, IOException {
        VariantCommandOptions.VariantImportCommandOptions importVariantOptions = variantCommandOptions.importVariantCommandOptions;

        VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory);

        variantManager.importData(UriUtils.createUri(importVariantOptions.input), importVariantOptions.study,
                sessionId);

    }

    private void delete() {
        throw new UnsupportedOperationException();
    }

    private void index() throws CatalogException, AnalysisExecutionException, IOException, ClassNotFoundException,
            StorageEngineException, InstantiationException, IllegalAccessException, URISyntaxException {
        VariantCommandOptions.VariantIndexCommandOptions cliOptions = variantCommandOptions.indexVariantCommandOptions;

        QueryOptions queryOptions = new QueryOptions();
        queryOptions.put(LOAD, cliOptions.genericVariantIndexOptions.load);
        queryOptions.put(TRANSFORM, cliOptions.genericVariantIndexOptions.transform);

        queryOptions.put(VariantStorageEngine.Options.CALCULATE_STATS.key(),
                cliOptions.genericVariantIndexOptions.calculateStats);
        queryOptions.put(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(),
                cliOptions.genericVariantIndexOptions.extraFields);
        queryOptions.put(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(),
                cliOptions.genericVariantIndexOptions.excludeGenotype);
        queryOptions.put(VariantStorageEngine.Options.AGGREGATED_TYPE.key(),
                cliOptions.genericVariantIndexOptions.aggregated);
        queryOptions.put(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(),
                cliOptions.genericVariantIndexOptions.aggregationMappingFile);
        queryOptions.put(VariantStorageEngine.Options.GVCF.key(), cliOptions.genericVariantIndexOptions.gvcf);

        queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, cliOptions.catalogPath);
        queryOptions.putIfNotNull(VariantFileIndexerStorageOperation.TRANSFORMED_FILES,
                cliOptions.transformedPaths);

        queryOptions.put(VariantStorageEngine.Options.ANNOTATE.key(),
                cliOptions.genericVariantIndexOptions.annotate);
        if (cliOptions.genericVariantIndexOptions.annotator != null) {
            queryOptions.put(VariantAnnotationManager.ANNOTATION_SOURCE,
                    cliOptions.genericVariantIndexOptions.annotator);
        }
        queryOptions.put(VariantAnnotationManager.OVERWRITE_ANNOTATIONS,
                cliOptions.genericVariantIndexOptions.overwriteAnnotations);
        queryOptions.put(VariantStorageEngine.Options.RESUME.key(), cliOptions.genericVariantIndexOptions.resume);
        queryOptions.putAll(cliOptions.commonOptions.params);

        VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory);

        variantManager.index(cliOptions.study, cliOptions.fileId, cliOptions.outdir, queryOptions, sessionId);
    }

    private void indexSearch() throws CatalogException, AnalysisExecutionException, IOException,
            ClassNotFoundException, StorageEngineException, InstantiationException, IllegalAccessException,
            URISyntaxException, VariantSearchException {
        VariantCommandOptions.VariantIndexCommandOptions cliOptions = variantCommandOptions.indexVariantCommandOptions;

        QueryOptions queryOptions = new QueryOptions();
        queryOptions.putAll(cliOptions.commonOptions.params);

        VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory);

        variantManager.searchIndex(cliOptions.study, sessionId);
    }

    private void stats() throws CatalogException, AnalysisExecutionException, IOException, ClassNotFoundException,
            StorageEngineException, InstantiationException, IllegalAccessException, URISyntaxException {
        VariantCommandOptions.VariantStatsCommandOptions cliOptions = variantCommandOptions.statsVariantCommandOptions;

        VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory);

        QueryOptions options = new QueryOptions()
                .append(DefaultVariantStatisticsManager.OUTPUT_FILE_NAME,
                        cliOptions.genericVariantStatsOptions.fileName)
                //                .append(AnalysisFileIndexer.CREATE, cliOptions.create)
                //                .append(AnalysisFileIndexer.LOAD, cliOptions.load)
                .append(VariantStorageEngine.Options.OVERWRITE_STATS.key(),
                        cliOptions.genericVariantStatsOptions.overwriteStats)
                .append(VariantStorageEngine.Options.UPDATE_STATS.key(),
                        cliOptions.genericVariantStatsOptions.updateStats)
                .append(VariantStorageEngine.Options.AGGREGATED_TYPE.key(),
                        cliOptions.genericVariantStatsOptions.aggregated)
                .append(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(),
                        cliOptions.genericVariantStatsOptions.aggregationMappingFile)
                .append(VariantStorageEngine.Options.RESUME.key(), cliOptions.genericVariantStatsOptions.resume)
                .append(StorageOperation.CATALOG_PATH, cliOptions.catalogPath);
        options.putIfNotEmpty(VariantStorageEngine.Options.FILE_ID.key(),
                cliOptions.genericVariantStatsOptions.fileId);

        options.putAll(cliOptions.commonOptions.params);

        List<String> cohorts;
        if (StringUtils.isNotBlank(cliOptions.cohortIds)) {
            cohorts = Arrays.asList(cliOptions.cohortIds.split(","));
        } else {
            cohorts = Collections.emptyList();
        }

        variantManager.stats(cliOptions.study, cohorts, cliOptions.outdir, options, sessionId);
    }

    private void annotate() throws StorageEngineException, IOException, URISyntaxException,
            VariantAnnotatorException, CatalogException, AnalysisExecutionException, IllegalAccessException,
            InstantiationException, ClassNotFoundException {

        VariantCommandOptions.VariantAnnotateCommandOptions cliOptions = variantCommandOptions.annotateVariantCommandOptions;
        VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory);

        Query query = new Query()
                .append(VariantDBAdaptor.VariantQueryParams.REGION.key(),
                        cliOptions.genericVariantAnnotateOptions.filterRegion)
                .append(VariantDBAdaptor.VariantQueryParams.CHROMOSOME.key(),
                        cliOptions.genericVariantAnnotateOptions.filterChromosome)
                .append(VariantDBAdaptor.VariantQueryParams.GENE.key(),
                        cliOptions.genericVariantAnnotateOptions.filterGene)
                .append(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE.key(),
                        cliOptions.genericVariantAnnotateOptions.filterAnnotConsequenceType);

        QueryOptions options = new QueryOptions();
        options.put(VariantAnnotationManager.OVERWRITE_ANNOTATIONS,
                cliOptions.genericVariantAnnotateOptions.overwriteAnnotations);
        options.put(VariantAnnotationManager.CREATE, cliOptions.genericVariantAnnotateOptions.create);
        options.putIfNotEmpty(VariantAnnotationManager.LOAD_FILE, cliOptions.genericVariantAnnotateOptions.load);
        options.putIfNotEmpty(VariantAnnotationManager.CUSTOM_ANNOTATION_KEY,
                cliOptions.genericVariantAnnotateOptions.customAnnotationKey);
        options.putIfNotNull(VariantAnnotationManager.ANNOTATION_SOURCE,
                cliOptions.genericVariantAnnotateOptions.annotator);
        options.putIfNotEmpty(DefaultVariantAnnotationManager.FILE_NAME,
                cliOptions.genericVariantAnnotateOptions.fileName);
        options.put(StorageOperation.CATALOG_PATH, cliOptions.catalogPath);
        options.putAll(cliOptions.commonOptions.params);

        variantManager.annotate(cliOptions.project, cliOptions.study, query, cliOptions.outdir, options, sessionId);
    }

    private void samples() throws Exception {

        VariantCommandOptions.VariantSamplesFilterCommandOptions cliOptions = variantCommandOptions.samplesFilterCommandOptions;

        //        Map<Long, String> studyIds = getStudyIds(sessionId);
        Query query = VariantQueryCommandUtils.parseBasicVariantQuery(cliOptions.variantQueryOptions, new Query());

        VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory);

        VariantSampleFilter variantSampleFilter = new VariantSampleFilter(variantManager.iterable(sessionId));

        if (StringUtils.isNotEmpty(cliOptions.samples)) {
            query.append(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(),
                    Arrays.asList(cliOptions.samples.split(",")));
        }
        if (StringUtils.isNotEmpty(cliOptions.study)) {
            query.append(VariantDBAdaptor.VariantQueryParams.STUDIES.key(), cliOptions.study);
        }

        List<String> genotypes = Arrays.asList(cliOptions.genotypes.split(","));
        if (cliOptions.all) {
            Collection<String> samplesInAllVariants = variantSampleFilter.getSamplesInAllVariants(query, genotypes);
            System.out.println("##Samples in ALL variants with genotypes " + genotypes);
            for (String sample : samplesInAllVariants) {
                System.out.println(sample);
            }
        } else {
            Map<String, Set<Variant>> samplesInAnyVariants = variantSampleFilter.getSamplesInAnyVariants(query,
                    genotypes);
            System.out.println("##Samples in ANY variants with genotypes " + genotypes);
            Set<Variant> variants = new TreeSet<>((v1, o2) -> v1.getStart().compareTo(o2.getStart()));
            samplesInAnyVariants.forEach((sample, v) -> variants.addAll(v));

            System.out.print(StringUtils.rightPad("#SAMPLE", 10));
            //            System.out.print("|");
            for (Variant variant : variants) {
                System.out.print(StringUtils.center(variant.toString(), 15));
                //                System.out.print("|");
            }
            System.out.println();
            samplesInAnyVariants.forEach((sample, v) -> {
                System.out.print(StringUtils.rightPad(sample, 10));
                //                System.out.print("|");
                for (Variant variant : variants) {
                    if (v.contains(variant)) {
                        System.out.print(StringUtils.center("X", 15));
                    } else {
                        System.out.print(StringUtils.center("-", 15));
                    }
                    //                    System.out.print("|");
                }
                System.out.println();
            });

        }
    }

    private void histogram() throws Exception {
        VariantCommandOptions.VariantHistogramCommandOptions cliOptions = variantCommandOptions.histogramCommandOptions;
        ObjectMap params = new ObjectMap();
        params.putAll(cliOptions.commonOptions.params);
        params.put(VariantHistogramAnalysis.INTERVAL, cliOptions.interval.toString());
        params.put(VariantHistogramAnalysis.OUTDIR, cliOptions.outdir);
        Query query = VariantQueryCommandUtils.parseBasicVariantQuery(cliOptions.variantQueryOptions, new Query());
        params.putAll(query);

        new PluginExecutor(catalogManager, sessionId).execute(VariantHistogramAnalysis.class, "default",
                catalogManager.getStudyId(cliOptions.study, sessionId), params);

    }
}