Java tutorial
/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.app.cli.analysis.executors; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.analysis.AnalysisExecutionException; import org.opencb.opencga.analysis.execution.plugins.PluginExecutor; import org.opencb.opencga.analysis.execution.plugins.hist.VariantHistogramAnalysis; import org.opencb.opencga.analysis.execution.plugins.ibs.IbsAnalysis; import org.opencb.opencga.app.cli.analysis.options.VariantCommandOptions; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.models.DataStore; import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.manager.variant.VariantStorageManager; import org.opencb.opencga.storage.core.manager.variant.operations.StorageOperation; import org.opencb.opencga.storage.core.manager.variant.operations.VariantFileIndexerStorageOperation; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.analysis.VariantSampleFilter; import org.opencb.opencga.storage.core.variant.annotation.DefaultVariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotatorException; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; import org.opencb.opencga.storage.core.variant.stats.DefaultVariantStatisticsManager; import java.io.IOException; import java.net.URISyntaxException; import java.util.*; import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.opencb.opencga.storage.core.manager.variant.operations.VariantFileIndexerStorageOperation.LOAD; import static org.opencb.opencga.storage.core.manager.variant.operations.VariantFileIndexerStorageOperation.TRANSFORM; /** * Created by imedina on 02/03/15. */ public class VariantCommandExecutor extends AnalysisCommandExecutor { // private AnalysisCliOptionsParser.VariantCommandOptions variantCommandOptions; private VariantCommandOptions variantCommandOptions; private VariantStorageEngine variantStorageEngine; public VariantCommandExecutor(VariantCommandOptions variantCommandOptions) { super(variantCommandOptions.commonCommandOptions); this.variantCommandOptions = variantCommandOptions; } @Override public void execute() throws Exception { logger.debug("Executing variant command line"); // String subCommandString = variantCommandOptions.getParsedSubCommand(); String subCommandString = getParsedSubCommand(variantCommandOptions.jCommander); configure(); sessionId = getSessionId(variantCommandOptions.commonCommandOptions); switch (subCommandString) { case "ibs": ibs(); break; case "delete": delete(); break; case "query": query(); break; case "export-frequencies": exportFrequencies(); break; case "import": importData(); break; case "index": index(); break; case "index-search": indexSearch(); break; case "stats": stats(); break; case "annotate": annotate(); break; case "samples": samples(); break; case "histogram": histogram(); break; default: logger.error("Subcommand not valid"); break; } } private VariantStorageEngine initVariantStorageManager(DataStore dataStore) throws CatalogException, IllegalAccessException, InstantiationException, ClassNotFoundException { String storageEngine = dataStore.getStorageEngine(); if (isEmpty(storageEngine)) { this.variantStorageEngine = storageEngineFactory.getVariantStorageEngine(); } else { this.variantStorageEngine = storageEngineFactory.getVariantStorageEngine(storageEngine); } return variantStorageEngine; } private void ibs() throws CatalogException, AnalysisExecutionException { VariantCommandOptions.VariantIbsCommandOptions cliOptions = variantCommandOptions.ibsVariantCommandOptions; ObjectMap params = new ObjectMap(); params.putIfNotEmpty(IbsAnalysis.SAMPLES, cliOptions.samples); params.putIfNotEmpty(IbsAnalysis.OUTDIR, cliOptions.outdir); new PluginExecutor(catalogManager, sessionId).execute(IbsAnalysis.class, "default", catalogManager.getStudyId(cliOptions.study, sessionId), params); } private void exportFrequencies() throws Exception { VariantCommandOptions.VariantExportStatsCommandOptions exportCliOptions = variantCommandOptions.exportVariantStatsCommandOptions; // AnalysisCliOptionsParser.ExportVariantStatsCommandOptions exportCliOptions = variantCommandOptions.exportVariantStatsCommandOptions; // AnalysisCliOptionsParser.QueryVariantCommandOptions queryCliOptions = variantCommandOptions.queryVariantCommandOptions; VariantCommandOptions.VariantQueryCommandOptions queryCliOptions = variantCommandOptions.queryVariantCommandOptions; queryCliOptions.commonOptions.outputFormat = exportCliOptions.commonOptions.outputFormat.toLowerCase() .replace("tsv", "stats"); queryCliOptions.study = exportCliOptions.studies; queryCliOptions.genericVariantQueryOptions.returnStudy = exportCliOptions.studies; queryCliOptions.numericOptions.limit = exportCliOptions.numericOptions.limit; // queryCliOptions.sort = true; queryCliOptions.numericOptions.skip = exportCliOptions.numericOptions.skip; queryCliOptions.genericVariantQueryOptions.region = exportCliOptions.region; queryCliOptions.genericVariantQueryOptions.regionFile = exportCliOptions.regionFile; queryCliOptions.output = exportCliOptions.output; queryCliOptions.genericVariantQueryOptions.gene = exportCliOptions.gene; queryCliOptions.numericOptions.count = exportCliOptions.numericOptions.count; queryCliOptions.genericVariantQueryOptions.returnSample = ""; query(); } private void query() throws Exception { // AnalysisCliOptionsParser.QueryVariantCommandOptions cliOptions = variantCommandOptions.queryVariantCommandOptions; VariantCommandOptions.VariantQueryCommandOptions cliOptions = variantCommandOptions.queryVariantCommandOptions; if ("TEXT".equals(cliOptions.commonOptions.outputFormat)) { cliOptions.commonOptions.outputFormat = "VCF"; } Map<Long, String> studyIds = getStudyIds(sessionId); Query query = VariantQueryCommandUtils.parseQuery(cliOptions, studyIds); QueryOptions queryOptions = VariantQueryCommandUtils.parseQueryOptions(cliOptions); queryOptions.put("summary", cliOptions.genericVariantQueryOptions.summary); VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); if (cliOptions.numericOptions.count) { QueryResult<Long> result = variantManager.count(query, sessionId); System.out.println("Num. results\t" + result.getResult().get(0)); } else if (StringUtils.isNotEmpty(cliOptions.genericVariantQueryOptions.groupBy)) { ObjectMapper objectMapper = new ObjectMapper(); QueryResult groupBy = variantManager.groupBy(cliOptions.genericVariantQueryOptions.groupBy, query, queryOptions, sessionId); System.out .println("rank = " + objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(groupBy)); } else if (StringUtils.isNotEmpty(cliOptions.genericVariantQueryOptions.rank)) { ObjectMapper objectMapper = new ObjectMapper(); QueryResult rank = variantManager.rank(query, cliOptions.genericVariantQueryOptions.rank, 10, true, sessionId); System.out.println("rank = " + objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(rank)); } else { if (cliOptions.genericVariantQueryOptions.annotations != null) { queryOptions.add("annotations", cliOptions.genericVariantQueryOptions.annotations); } VariantWriterFactory.VariantOutputFormat outputFormat = VariantWriterFactory .toOutputFormat(cliOptions.commonOptions.outputFormat, cliOptions.output); variantManager.exportData(cliOptions.output, outputFormat, query, queryOptions, sessionId); } } private void importData() throws URISyntaxException, CatalogException, StorageEngineException, IOException { VariantCommandOptions.VariantImportCommandOptions importVariantOptions = variantCommandOptions.importVariantCommandOptions; VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); variantManager.importData(UriUtils.createUri(importVariantOptions.input), importVariantOptions.study, sessionId); } private void delete() { throw new UnsupportedOperationException(); } private void index() throws CatalogException, AnalysisExecutionException, IOException, ClassNotFoundException, StorageEngineException, InstantiationException, IllegalAccessException, URISyntaxException { VariantCommandOptions.VariantIndexCommandOptions cliOptions = variantCommandOptions.indexVariantCommandOptions; QueryOptions queryOptions = new QueryOptions(); queryOptions.put(LOAD, cliOptions.genericVariantIndexOptions.load); queryOptions.put(TRANSFORM, cliOptions.genericVariantIndexOptions.transform); queryOptions.put(VariantStorageEngine.Options.CALCULATE_STATS.key(), cliOptions.genericVariantIndexOptions.calculateStats); queryOptions.put(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), cliOptions.genericVariantIndexOptions.extraFields); queryOptions.put(VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), cliOptions.genericVariantIndexOptions.excludeGenotype); queryOptions.put(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), cliOptions.genericVariantIndexOptions.aggregated); queryOptions.put(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), cliOptions.genericVariantIndexOptions.aggregationMappingFile); queryOptions.put(VariantStorageEngine.Options.GVCF.key(), cliOptions.genericVariantIndexOptions.gvcf); queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, cliOptions.catalogPath); queryOptions.putIfNotNull(VariantFileIndexerStorageOperation.TRANSFORMED_FILES, cliOptions.transformedPaths); queryOptions.put(VariantStorageEngine.Options.ANNOTATE.key(), cliOptions.genericVariantIndexOptions.annotate); if (cliOptions.genericVariantIndexOptions.annotator != null) { queryOptions.put(VariantAnnotationManager.ANNOTATION_SOURCE, cliOptions.genericVariantIndexOptions.annotator); } queryOptions.put(VariantAnnotationManager.OVERWRITE_ANNOTATIONS, cliOptions.genericVariantIndexOptions.overwriteAnnotations); queryOptions.put(VariantStorageEngine.Options.RESUME.key(), cliOptions.genericVariantIndexOptions.resume); queryOptions.putAll(cliOptions.commonOptions.params); VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); variantManager.index(cliOptions.study, cliOptions.fileId, cliOptions.outdir, queryOptions, sessionId); } private void indexSearch() throws CatalogException, AnalysisExecutionException, IOException, ClassNotFoundException, StorageEngineException, InstantiationException, IllegalAccessException, URISyntaxException, VariantSearchException { VariantCommandOptions.VariantIndexCommandOptions cliOptions = variantCommandOptions.indexVariantCommandOptions; QueryOptions queryOptions = new QueryOptions(); queryOptions.putAll(cliOptions.commonOptions.params); VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); variantManager.searchIndex(cliOptions.study, sessionId); } private void stats() throws CatalogException, AnalysisExecutionException, IOException, ClassNotFoundException, StorageEngineException, InstantiationException, IllegalAccessException, URISyntaxException { VariantCommandOptions.VariantStatsCommandOptions cliOptions = variantCommandOptions.statsVariantCommandOptions; VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); QueryOptions options = new QueryOptions() .append(DefaultVariantStatisticsManager.OUTPUT_FILE_NAME, cliOptions.genericVariantStatsOptions.fileName) // .append(AnalysisFileIndexer.CREATE, cliOptions.create) // .append(AnalysisFileIndexer.LOAD, cliOptions.load) .append(VariantStorageEngine.Options.OVERWRITE_STATS.key(), cliOptions.genericVariantStatsOptions.overwriteStats) .append(VariantStorageEngine.Options.UPDATE_STATS.key(), cliOptions.genericVariantStatsOptions.updateStats) .append(VariantStorageEngine.Options.AGGREGATED_TYPE.key(), cliOptions.genericVariantStatsOptions.aggregated) .append(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), cliOptions.genericVariantStatsOptions.aggregationMappingFile) .append(VariantStorageEngine.Options.RESUME.key(), cliOptions.genericVariantStatsOptions.resume) .append(StorageOperation.CATALOG_PATH, cliOptions.catalogPath); options.putIfNotEmpty(VariantStorageEngine.Options.FILE_ID.key(), cliOptions.genericVariantStatsOptions.fileId); options.putAll(cliOptions.commonOptions.params); List<String> cohorts; if (StringUtils.isNotBlank(cliOptions.cohortIds)) { cohorts = Arrays.asList(cliOptions.cohortIds.split(",")); } else { cohorts = Collections.emptyList(); } variantManager.stats(cliOptions.study, cohorts, cliOptions.outdir, options, sessionId); } private void annotate() throws StorageEngineException, IOException, URISyntaxException, VariantAnnotatorException, CatalogException, AnalysisExecutionException, IllegalAccessException, InstantiationException, ClassNotFoundException { VariantCommandOptions.VariantAnnotateCommandOptions cliOptions = variantCommandOptions.annotateVariantCommandOptions; VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); Query query = new Query() .append(VariantDBAdaptor.VariantQueryParams.REGION.key(), cliOptions.genericVariantAnnotateOptions.filterRegion) .append(VariantDBAdaptor.VariantQueryParams.CHROMOSOME.key(), cliOptions.genericVariantAnnotateOptions.filterChromosome) .append(VariantDBAdaptor.VariantQueryParams.GENE.key(), cliOptions.genericVariantAnnotateOptions.filterGene) .append(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE.key(), cliOptions.genericVariantAnnotateOptions.filterAnnotConsequenceType); QueryOptions options = new QueryOptions(); options.put(VariantAnnotationManager.OVERWRITE_ANNOTATIONS, cliOptions.genericVariantAnnotateOptions.overwriteAnnotations); options.put(VariantAnnotationManager.CREATE, cliOptions.genericVariantAnnotateOptions.create); options.putIfNotEmpty(VariantAnnotationManager.LOAD_FILE, cliOptions.genericVariantAnnotateOptions.load); options.putIfNotEmpty(VariantAnnotationManager.CUSTOM_ANNOTATION_KEY, cliOptions.genericVariantAnnotateOptions.customAnnotationKey); options.putIfNotNull(VariantAnnotationManager.ANNOTATION_SOURCE, cliOptions.genericVariantAnnotateOptions.annotator); options.putIfNotEmpty(DefaultVariantAnnotationManager.FILE_NAME, cliOptions.genericVariantAnnotateOptions.fileName); options.put(StorageOperation.CATALOG_PATH, cliOptions.catalogPath); options.putAll(cliOptions.commonOptions.params); variantManager.annotate(cliOptions.project, cliOptions.study, query, cliOptions.outdir, options, sessionId); } private void samples() throws Exception { VariantCommandOptions.VariantSamplesFilterCommandOptions cliOptions = variantCommandOptions.samplesFilterCommandOptions; // Map<Long, String> studyIds = getStudyIds(sessionId); Query query = VariantQueryCommandUtils.parseBasicVariantQuery(cliOptions.variantQueryOptions, new Query()); VariantStorageManager variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); VariantSampleFilter variantSampleFilter = new VariantSampleFilter(variantManager.iterable(sessionId)); if (StringUtils.isNotEmpty(cliOptions.samples)) { query.append(VariantDBAdaptor.VariantQueryParams.RETURNED_SAMPLES.key(), Arrays.asList(cliOptions.samples.split(","))); } if (StringUtils.isNotEmpty(cliOptions.study)) { query.append(VariantDBAdaptor.VariantQueryParams.STUDIES.key(), cliOptions.study); } List<String> genotypes = Arrays.asList(cliOptions.genotypes.split(",")); if (cliOptions.all) { Collection<String> samplesInAllVariants = variantSampleFilter.getSamplesInAllVariants(query, genotypes); System.out.println("##Samples in ALL variants with genotypes " + genotypes); for (String sample : samplesInAllVariants) { System.out.println(sample); } } else { Map<String, Set<Variant>> samplesInAnyVariants = variantSampleFilter.getSamplesInAnyVariants(query, genotypes); System.out.println("##Samples in ANY variants with genotypes " + genotypes); Set<Variant> variants = new TreeSet<>((v1, o2) -> v1.getStart().compareTo(o2.getStart())); samplesInAnyVariants.forEach((sample, v) -> variants.addAll(v)); System.out.print(StringUtils.rightPad("#SAMPLE", 10)); // System.out.print("|"); for (Variant variant : variants) { System.out.print(StringUtils.center(variant.toString(), 15)); // System.out.print("|"); } System.out.println(); samplesInAnyVariants.forEach((sample, v) -> { System.out.print(StringUtils.rightPad(sample, 10)); // System.out.print("|"); for (Variant variant : variants) { if (v.contains(variant)) { System.out.print(StringUtils.center("X", 15)); } else { System.out.print(StringUtils.center("-", 15)); } // System.out.print("|"); } System.out.println(); }); } } private void histogram() throws Exception { VariantCommandOptions.VariantHistogramCommandOptions cliOptions = variantCommandOptions.histogramCommandOptions; ObjectMap params = new ObjectMap(); params.putAll(cliOptions.commonOptions.params); params.put(VariantHistogramAnalysis.INTERVAL, cliOptions.interval.toString()); params.put(VariantHistogramAnalysis.OUTDIR, cliOptions.outdir); Query query = VariantQueryCommandUtils.parseBasicVariantQuery(cliOptions.variantQueryOptions, new Query()); params.putAll(query); new PluginExecutor(catalogManager, sessionId).execute(VariantHistogramAnalysis.class, "default", catalogManager.getStudyId(cliOptions.study, sessionId), params); } }