Java tutorial
/* * Copyright 2015 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.analysis.storage.variant; import org.apache.commons.lang3.RandomStringUtils; import org.opencb.biodata.models.variant.VariantSource.Aggregation; import org.opencb.biodata.tools.variant.stats.VariantAggregatedStatsCalculator; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.commons.utils.FileUtils; import org.opencb.opencga.analysis.AnalysisExecutionException; import org.opencb.opencga.analysis.JobFactory; import org.opencb.opencga.analysis.storage.AnalysisFileIndexer; import org.opencb.opencga.analysis.variant.AbstractFileIndexer; import org.opencb.opencga.analysis.variant.CatalogStudyConfigurationFactory; import org.opencb.opencga.catalog.db.api.FileDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogDBException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.catalog.models.*; import org.opencb.opencga.catalog.monitor.executors.AbstractExecutor; import org.opencb.opencga.catalog.monitor.executors.old.ExecutorManager; import org.opencb.opencga.core.common.Config; import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.storage.core.StorageManagerFactory; import org.opencb.opencga.storage.core.exceptions.StorageManagerException; import org.opencb.opencga.storage.core.metadata.StudyConfiguration; import org.opencb.opencga.storage.core.variant.StudyConfigurationManager; import org.opencb.opencga.storage.core.variant.VariantStorageManager; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.stats.VariantStatisticsManager; import org.slf4j.LoggerFactory; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.opencb.opencga.storage.core.variant.VariantStorageManager.Options; /** * Created by jacobo on 06/03/15. */ public class VariantStorage extends AbstractFileIndexer { public VariantStorage(CatalogManager catalogManager) { super(catalogManager, LoggerFactory.getLogger(VariantStorage.class)); } public void calculateStats(long studyId, List<Long> cohortIds, String catalogOutDirIdStr, String outdirStr, String sessionId, QueryOptions options) throws AnalysisExecutionException, CatalogException, IOException, URISyntaxException { Job.Type step = Job.Type.COHORT_STATS; String fileIdStr = options.getString(Options.FILE_ID.key(), null); boolean overwriteStats = options.getBoolean(Options.OVERWRITE_STATS.key(), false); boolean updateStats = options.getBoolean(Options.UPDATE_STATS.key(), false); final Long fileId = fileIdStr == null ? null : catalogManager.getFileId(fileIdStr, sessionId); // Outdir must be empty URI outdirUri = UriUtils.createDirectoryUri(outdirStr); final Path outdir = Paths.get(outdirUri); outdirMustBeEmpty(outdir); cohortIds = checkCohorts(studyId, cohortIds, options, sessionId); Map<Long, Cohort> cohortsMap = checkCanCalculateCohorts(studyId, cohortIds, updateStats, sessionId); String region = options.getString(VariantDBAdaptor.VariantQueryParams.REGION.key()); String outputFileName = buildOutputFileName(cohortIds, options, cohortsMap, region); Long catalogOutDirId; if (catalogOutDirIdStr != null) { catalogOutDirId = catalogManager.getFileManager().getId(catalogOutDirIdStr, studyId, sessionId); if (catalogOutDirId <= 0) { throw new CatalogException( "Output directory " + catalogOutDirIdStr + " could not be found within catalog."); } } else { catalogOutDirId = null; } QueryOptions calculateStatsOptions = new QueryOptions(options) // .append(VariantStorageManager.Options.LOAD_BATCH_SIZE.key(), 100) // .append(VariantStorageManager.Options.LOAD_THREADS.key(), 6) .append(Options.OVERWRITE_STATS.key(), overwriteStats) .append(Options.UPDATE_STATS.key(), updateStats); calculateStatsOptions.putIfNotNull(Options.FILE_ID.key(), fileId); calculateStatsOptions.putIfNotEmpty(VariantDBAdaptor.VariantQueryParams.REGION.key(), region); // if the study is aggregated and a mapping file is provided, pass it to storage // and create in catalog the cohorts described in the mapping file Aggregation aggregation = getAggregation(studyId, options, sessionId); String aggregationMappingFile = options.getString(Options.AGGREGATION_MAPPING_PROPERTIES.key()); if (Aggregation.isAggregated(aggregation) && !aggregationMappingFile.isEmpty()) { try (InputStream is = FileUtils.newInputStream(Paths.get(aggregationMappingFile))) { Properties properties = new Properties(); properties.load(is); calculateStatsOptions.append(Options.AGGREGATION_MAPPING_PROPERTIES.key(), properties); } } DataStore dataStore = AbstractFileIndexer.getDataStore(catalogManager, studyId, File.Bioformat.VARIANT, sessionId); StudyConfiguration studyConfiguration = updateStudyConfiguration(sessionId, studyId, dataStore); Thread hook = buildHook(cohortIds, sessionId, outdir); writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.RUNNING, "Job has just started")); Runtime.getRuntime().addShutdownHook(hook); // Up to this point, catalog has not been modified try { // Modify cohort status to "CALCULATING" updateCohorts(cohortIds, sessionId, Cohort.CohortStatus.CALCULATING); VariantStorageManager variantStorageManager = StorageManagerFactory.get() .getVariantStorageManager(dataStore.getStorageEngine()); VariantStatisticsManager variantStatisticsManager = new VariantStatisticsManager(); VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(dataStore.getDbName()); Map<String, Integer> cohortNameIdMap = new HashMap<>(cohortIds.size()); Map<String, Set<String>> cohortSamplesMap = new HashMap<>(cohortIds.size()); for (Map.Entry<Long, Cohort> entry : cohortsMap.entrySet()) { cohortNameIdMap.put(entry.getValue().getName(), entry.getKey().intValue()); cohortSamplesMap.put(entry.getValue().getName(), entry.getValue().getSamples().stream().map(sampleId -> { return studyConfiguration.getSampleIds().inverse().get(sampleId.intValue()); }).collect(Collectors.toSet())); } URI stats = variantStatisticsManager.createStats(dbAdaptor, outdirUri.resolve(outputFileName), cohortSamplesMap, cohortNameIdMap, studyConfiguration, calculateStatsOptions); writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.RUNNING, "Job still running. Statistics created.")); variantStatisticsManager.loadStats(dbAdaptor, stats, studyConfiguration, options); if (catalogOutDirId != null) { copyResults(Paths.get(outdirUri), catalogOutDirId, sessionId); } writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.DONE, "Job completed")); // Modify cohort status to "READY" updateCohorts(cohortIds, sessionId, Cohort.CohortStatus.READY); } catch (Exception e) { // Error! logger.error("Error executing stats. Set cohorts status to " + Cohort.CohortStatus.INVALID, e); writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.ERROR, "Job with error : " + e.getMessage())); // Modify to "INVALID" updateCohorts(cohortIds, sessionId, Cohort.CohortStatus.INVALID); throw new AnalysisExecutionException("Error calculating statistics.", e); } finally { // Remove hook Runtime.getRuntime().removeShutdownHook(hook); } } /** * Accepts options: * {@link Options#FILE_ID} * {@link Options#UPDATE_STATS} * {@link Options#AGGREGATION_MAPPING_PROPERTIES} * {@link VariantStatisticsManager#OUTPUT_FILE_NAME} * {@link VariantDBAdaptor.VariantQueryParams#REGION} * {@link ExecutorManager#EXECUTE} * {@link ExecutorManager#SIMULATE} * {@link AnalysisFileIndexer#LOG_LEVEL} * {@link AnalysisFileIndexer#PARAMETERS} * * * @param outDirId * @param cohortIds * @param sessionId * @param options * @return * @throws AnalysisExecutionException * @throws CatalogException * @throws IOException * @deprecated use {@link #calculateStats(long, List, String, String, String, QueryOptions)} */ @Deprecated public QueryResult<Job> calculateStats(Long outDirId, List<Long> cohortIds, String sessionId, QueryOptions options) throws AnalysisExecutionException, CatalogException, IOException { if (options == null) { options = new QueryOptions(); } final boolean execute = options.getBoolean(ExecutorManager.EXECUTE); final boolean simulate = options.getBoolean(ExecutorManager.SIMULATE); String fileIdStr = options.getString(Options.FILE_ID.key(), null); boolean updateStats = options.getBoolean(Options.UPDATE_STATS.key(), false); final Long fileId = fileIdStr == null ? null : catalogManager.getFileId(fileIdStr, sessionId); final long start = System.currentTimeMillis(); cohortIds = checkCohorts(-1, cohortIds, options, sessionId); long studyId = catalogManager.getStudyIdByCohortId(cohortIds.get(0)); Map<Long, Cohort> cohortsMap = checkCanCalculateCohorts(studyId, cohortIds, updateStats, sessionId); String region = options.getString(VariantDBAdaptor.VariantQueryParams.REGION.key()); String outputFileName = buildOutputFileName(cohortIds, options, cohortsMap, region); updateCohorts(cohortIds, sessionId, Cohort.CohortStatus.CALCULATING); File outDir; if (outDirId == null || outDirId <= 0) { // outDir = catalogManager.getFileParent(indexedFileId, null, sessionId).first(); outDir = catalogManager.getAllFiles(studyId, new Query(FileDBAdaptor.QueryParams.PATH.key(), ""), new QueryOptions(), sessionId).first(); } else { outDir = catalogManager.getFile(outDirId, null, sessionId).first(); } /** Create temporal Job Outdir **/ final String randomString = "I_" + RandomStringUtils.randomAlphanumeric(10); final URI temporalOutDirUri; if (simulate) { temporalOutDirUri = AnalysisFileIndexer.createSimulatedOutDirUri(randomString); } else { temporalOutDirUri = catalogManager.createJobOutDir(studyId, randomString, sessionId); } /** create command line **/ String opencgaAnalysisBinPath = Paths .get(Config.getOpenCGAHome(), "bin", AnalysisFileIndexer.OPENCGA_ANALYSIS_BIN_NAME).toString(); DataStore dataStore = AbstractFileIndexer.getDataStore(catalogManager, studyId, File.Bioformat.VARIANT, sessionId); StringBuilder sb = new StringBuilder().append(opencgaAnalysisBinPath).append(" variant stats ") .append(" --study-id ").append(studyId).append(" --session-id ").append(sessionId) .append(" --output-filename ").append(outputFileName).append(" --job-id ").append(randomString); if (fileId != null) { sb.append(" --file-id ").append(fileId); } if (options.containsKey(AnalysisFileIndexer.LOG_LEVEL)) { sb.append(" --log-level ").append(options.getString(AnalysisFileIndexer.LOG_LEVEL)); } if (isNotEmpty(region)) { sb.append(" --region ").append(region); } if (updateStats) { sb.append(" --update-stats "); } // if the study is aggregated and a mapping file is provided, pass it to storage // and create in catalog the cohorts described in the mapping file Study study = catalogManager .getStudy(studyId, new QueryOptions("include", "projects.studies.attributes"), sessionId).first(); Aggregation studyAggregation = Aggregation.valueOf( study.getAttributes().getOrDefault(Options.AGGREGATED_TYPE.key(), Aggregation.NONE).toString()); if (Aggregation.isAggregated(studyAggregation) && !options.getString(Options.AGGREGATION_MAPPING_PROPERTIES.key()).isEmpty()) { sb.append(" --aggregation-mapping-file ") .append(options.getString(Options.AGGREGATION_MAPPING_PROPERTIES.key())); } if (!cohortsMap.isEmpty()) { sb.append(" --cohort-ids "); for (Iterator<Long> iterator = cohortsMap.keySet().iterator(); iterator.hasNext();) { Long cohortId = iterator.next(); sb.append(cohortId); if (iterator.hasNext()) { sb.append(","); } } } if (options.containsKey(AnalysisFileIndexer.PARAMETERS)) { List<String> extraParams = options.getAsStringList(AnalysisFileIndexer.PARAMETERS); for (String extraParam : extraParams) { sb.append(" ").append(extraParam); } } String commandLine = sb.toString(); logger.debug("CommandLine to calculate stats {}" + commandLine); /** Update StudyConfiguration **/ if (!simulate) { updateStudyConfiguration(sessionId, studyId, dataStore); } /** create job **/ String jobName = "calculate-stats"; String jobDescription = "Stats calculation for cohort " + cohortsMap.values().stream().map(Cohort::getName).collect(Collectors.toList()); HashMap<String, Object> attributes = new HashMap<>(); attributes.put(Job.TYPE, Job.Type.COHORT_STATS); attributes.put("cohortIds", cohortIds); HashMap<String, Object> resourceManagerAttributes = new HashMap<>(); JobFactory jobFactory = new JobFactory(catalogManager); return jobFactory.createJob(studyId, jobName, AnalysisFileIndexer.OPENCGA_ANALYSIS_BIN_NAME, jobDescription, outDir, Collections.emptyList(), sessionId, randomString, temporalOutDirUri, commandLine, execute, simulate, attributes, resourceManagerAttributes); } protected Thread buildHook(List<Long> cohortIds, String sessionId, Path outdir) { return new Thread(() -> { try { // If the status has not been changed by the method and is still running, we assume that the execution failed. Job.JobStatus status = readJobStatus(outdir); if (status.getName().equalsIgnoreCase(Job.JobStatus.RUNNING)) { writeJobStatus(outdir, new Job.JobStatus(Job.JobStatus.ERROR, "Job finished with an error.")); updateCohorts(cohortIds, sessionId, Cohort.CohortStatus.INVALID); } } catch (IOException | CatalogException e) { logger.error("Error modifying " + AbstractExecutor.JOB_STATUS_FILE, e); } }); } protected String buildOutputFileName(List<Long> cohortIds, QueryOptions options, Map<Long, Cohort> cohortsMap, String region) { final String outputFileName; if (isNotEmpty(options.getString(VariantStatisticsManager.OUTPUT_FILE_NAME))) { outputFileName = options.getString(VariantStatisticsManager.OUTPUT_FILE_NAME); } else { StringBuilder outputFileNameBuilder; outputFileNameBuilder = new StringBuilder("stats_"); if (isNotEmpty(region)) { outputFileNameBuilder.append(region).append("_"); } for (Iterator<Long> iterator = cohortIds.iterator(); iterator.hasNext();) { Long cohortId = iterator.next(); outputFileNameBuilder.append(cohortsMap.get(cohortId).getName()); if (iterator.hasNext()) { outputFileNameBuilder.append('_'); } } outputFileName = outputFileNameBuilder.toString(); } return outputFileName; } /** * Must provide a list of cohorts or a aggregation_mapping_properties file * @param studyId * @param cohortIds List of cohorts * @param options Options, where the aggregation mapping properties file will be * @param sessionId * @return Checked list of cohorts * @throws CatalogException if an error on Catalog */ protected List<Long> checkCohorts(long studyId, List<Long> cohortIds, QueryOptions options, String sessionId) throws CatalogException, IOException { if (cohortIds == null || cohortIds.isEmpty()) { String tagMap = options.getString(Options.AGGREGATION_MAPPING_PROPERTIES.key()); if (isBlank(tagMap)) { throw new CatalogException("Cohort list null or empty"); } else { cohortIds = createCohortsByAggregationMapFile(studyId, tagMap, sessionId); } } return cohortIds; } private List<Long> createCohortsByAggregationMapFile(long studyId, String aggregationMapFile, String sessionId) throws IOException, CatalogException { List<Long> cohorts = new ArrayList<>(); Properties tagmap = new Properties(); tagmap.load(new FileInputStream(aggregationMapFile)); Map<String, Long> catalogCohorts = catalogManager .getAllCohorts(studyId, null, new QueryOptions(QueryOptions.INCLUDE, "name,id"), sessionId) .getResult().stream().collect(Collectors.toMap(Cohort::getName, Cohort::getId)); for (String cohortName : VariantAggregatedStatsCalculator.getCohorts(tagmap)) { if (!catalogCohorts.containsKey(cohortName)) { QueryResult<Cohort> cohort = catalogManager.createCohort(studyId, cohortName, Study.Type.COLLECTION, "", Collections.emptyList(), null, sessionId); logger.info("Creating cohort {}", cohortName); cohorts.add(cohort.first().getId()); } else { logger.debug("cohort {} was already created", cohortName); cohorts.add(catalogCohorts.get(cohortName)); } } return cohorts; } /** * If the study is aggregated and a mapping file is provided, pass it to * and create in catalog the cohorts described in the mapping file * * If the study aggregation was not defined, updateStudy with the provided aggregation type * * @param studyId StudyId where calculate stats * @param options Options * @param sessionId Users sessionId * @return Effective study aggregation type * @throws CatalogException if something is wrong with catalog */ public Aggregation getAggregation(long studyId, QueryOptions options, String sessionId) throws CatalogException { Study study = catalogManager .getStudy(studyId, new QueryOptions("include", "projects.studies.attributes"), sessionId).first(); Aggregation argsAggregation = options.get(Options.AGGREGATED_TYPE.key(), Aggregation.class, Aggregation.NONE); String studyAggregationStr = study.getAttributes() .getOrDefault(Options.AGGREGATED_TYPE.key(), Aggregation.NONE).toString(); Aggregation studyAggregation = Aggregation.valueOf(studyAggregationStr); final Aggregation aggregation; if (Aggregation.isAggregated(argsAggregation)) { if (!studyAggregation.equals(argsAggregation)) { // FIXME: Throw an exception? logger.warn("Calculating statistics with aggregation " + argsAggregation + " instead of " + studyAggregation); } aggregation = argsAggregation; // If studyAggregation is not define, update study aggregation if (!study.getAttributes().containsKey(Options.AGGREGATED_TYPE.key())) { //update study aggregation Map<String, Aggregation> attributes = Collections.singletonMap(Options.AGGREGATED_TYPE.key(), argsAggregation); ObjectMap parameters = new ObjectMap("attributes", attributes); catalogManager.modifyStudy(studyId, parameters, sessionId); } } else { aggregation = studyAggregation; } return aggregation; } /** * Check if a set of given cohorts are available to calculate statistics * * @param cohortIds Set of cohorts * @param updateStats Update already existing stats * @param sessionId User's sessionId * @throws CatalogException */ protected Map<Long, Cohort> checkCanCalculateCohorts(long studyId, List<Long> cohortIds, boolean updateStats, String sessionId) throws CatalogException { Set<Long> studyIdSet = new HashSet<>(); Map<Long, Cohort> cohortMap = new HashMap<>(cohortIds.size()); for (Long cohortId : cohortIds) { Cohort cohort = catalogManager.getCohort(cohortId, null, sessionId).first(); long studyIdByCohortId = catalogManager.getStudyIdByCohortId(cohortId); studyIdSet.add(studyIdByCohortId); switch (cohort.getStatus().getName()) { case Cohort.CohortStatus.NONE: case Cohort.CohortStatus.INVALID: break; case Cohort.CohortStatus.READY: if (updateStats) { catalogManager.modifyCohort(cohortId, new ObjectMap("status.name", Cohort.CohortStatus.INVALID), new QueryOptions(), sessionId); break; } case Cohort.CohortStatus.CALCULATING: throw new CatalogException("Unable to calculate stats for cohort " + "{ id: " + cohort.getId() + " name: \"" + cohort.getName() + "\" }" + " with status \"" + cohort.getStatus().getName() + "\""); } cohortMap.put(cohortId, cohort); // QueryResult<Sample> sampleQueryResult = catalogManager.getAllSamples(studyIdByCohortId, new Query("id", cohort.getSamples()), new QueryOptions(), sessionId); } // Check that all cohorts are from the same study if (studyIdSet.size() != 1) { throw new CatalogException("Error: CohortIds are from multiple studies: " + studyIdSet.toString()); } if (!new ArrayList<>(studyIdSet).get(0).equals(studyId)) { throw new CatalogException( "Error: CohortIds are from a different study than provided: " + studyIdSet.toString()); } return cohortMap; } protected void updateCohorts(List<Long> cohortIds, String sessionId, String status) throws CatalogException { for (Long cohortId : cohortIds) { catalogManager.modifyCohort(cohortId, new ObjectMap("status.name", status), new QueryOptions(), sessionId); } } /** * Accepts options: * {@link ExecutorManager#EXECUTE} * {@link ExecutorManager#SIMULATE} * {@link AnalysisFileIndexer#LOG_LEVEL} * {@link AnalysisFileIndexer#PARAMETERS} * {@link AnalysisFileIndexer#CREATE} * {@link AnalysisFileIndexer#LOAD} * {@link VariantDBAdaptor.VariantQueryParams#REGION} * {@link VariantDBAdaptor.VariantQueryParams#GENE} * {@link VariantDBAdaptor.VariantQueryParams#CHROMOSOME} * {@link VariantDBAdaptor.VariantQueryParams#ANNOT_CONSEQUENCE_TYPE} * {@link VariantAnnotationManager#OVERWRITE_ANNOTATIONS} * {@link VariantAnnotationManager#FILE_NAME} * {@link VariantAnnotationManager#ANNOTATION_SOURCE} * {@link VariantAnnotationManager#SPECIES} * {@link VariantAnnotationManager#ASSEMBLY} * * * @param studyId * @param outDirId * @param sessionId * @param options * @return * @throws CatalogException * @throws AnalysisExecutionException */ public QueryResult<Job> annotateVariants(long studyId, long outDirId, String sessionId, QueryOptions options) throws CatalogException, AnalysisExecutionException { if (options == null) { options = new QueryOptions(); } final boolean execute = options.getBoolean(ExecutorManager.EXECUTE); final boolean simulate = options.getBoolean(ExecutorManager.SIMULATE); final long start = System.currentTimeMillis(); File outDir = catalogManager.getFile(outDirId, null, sessionId).first(); List<Long> inputFiles = new ArrayList<>(); /** Create temporal Job Outdir **/ final URI temporalOutDirUri; final String randomString = "I_" + RandomStringUtils.randomAlphanumeric(10); if (simulate) { temporalOutDirUri = AnalysisFileIndexer.createSimulatedOutDirUri(randomString); } else { temporalOutDirUri = catalogManager.createJobOutDir(studyId, randomString, sessionId); } /** create command line **/ String opencgaAnalysisBinPath = Paths .get(Config.getOpenCGAHome(), "bin", AnalysisFileIndexer.OPENCGA_ANALYSIS_BIN_NAME).toString(); DataStore dataStore = AbstractFileIndexer.getDataStore(catalogManager, studyId, File.Bioformat.VARIANT, sessionId); StringBuilder sb = new StringBuilder().append(opencgaAnalysisBinPath).append(" variant annotate ") .append(" --study-id ").append(studyId).append(" --session-id ").append(sessionId) .append(" --job-id ").append(randomString).append(" --outdir-id ").append(outDir.getId()); if (isNotEmpty(options.getString(AnalysisFileIndexer.LOAD))) { String fileIdstr = options.getString(AnalysisFileIndexer.LOAD); long fileId = catalogManager.getFileId(fileIdstr); if (fileId < 0) { throw CatalogDBException.idNotFound("File", fileIdstr); } sb.append(" --load ").append(fileId); inputFiles.add(fileId); } if (options.getBoolean(AnalysisFileIndexer.CREATE, false)) { sb.append(" --create "); } if (options.getBoolean(VariantAnnotationManager.OVERWRITE_ANNOTATIONS)) { sb.append(" --overwrite-annotations "); } //TODO: Read from Catalog? if (isNotEmpty(options.getString(VariantAnnotationManager.SPECIES))) { sb.append(" --species ").append(options.getString(VariantAnnotationManager.SPECIES)); } if (isNotEmpty(options.getString(VariantAnnotationManager.ASSEMBLY))) { sb.append(" --assembly ").append(options.getString(VariantAnnotationManager.ASSEMBLY)); } if (isNotEmpty(options.getString(VariantAnnotationManager.FILE_NAME))) { sb.append(" --output-filename ").append(options.getString(VariantAnnotationManager.FILE_NAME)); } if (isNotEmpty(options.getString(VariantAnnotationManager.CUSTOM_ANNOTATION_KEY))) { sb.append(" --custom-name ").append(options.getString(VariantAnnotationManager.CUSTOM_ANNOTATION_KEY)); } if (isNotEmpty(options.getString(VariantAnnotationManager.ANNOTATION_SOURCE))) { sb.append(" --annotator ").append(options.getString(VariantAnnotationManager.ANNOTATION_SOURCE)); } if (isNotEmpty(options.getString(VariantDBAdaptor.VariantQueryParams.REGION.key()))) { sb.append(" --filter-region ") .append(options.getString(VariantDBAdaptor.VariantQueryParams.REGION.key())); } if (isNotEmpty(options.getString(VariantDBAdaptor.VariantQueryParams.GENE.key()))) { sb.append(" --filter-gene ").append(options.getString(VariantDBAdaptor.VariantQueryParams.GENE.key())); } if (isNotEmpty(options.getString(VariantDBAdaptor.VariantQueryParams.CHROMOSOME.key()))) { sb.append(" --filter-chromosome ") .append(options.getString(VariantDBAdaptor.VariantQueryParams.CHROMOSOME.key())); } if (isNotEmpty(options.getString(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE.key()))) { sb.append(" --filter-annot-consequence-type ") .append(options.getString(VariantDBAdaptor.VariantQueryParams.ANNOT_CONSEQUENCE_TYPE.key())); } if (options.containsKey(AnalysisFileIndexer.LOG_LEVEL)) { sb.append(" --log-level ").append(options.getString(AnalysisFileIndexer.LOG_LEVEL)); } if (options.containsKey(AnalysisFileIndexer.PARAMETERS)) { List<String> extraParams = options.getAsStringList(AnalysisFileIndexer.PARAMETERS); for (String extraParam : extraParams) { sb.append(" ").append(extraParam); } } String commandLine = sb.toString(); logger.debug("CommandLine to annotate variants {}", commandLine); /** Update StudyConfiguration **/ if (!simulate) { try { StudyConfigurationManager studyConfigurationManager = StorageManagerFactory.get() .getVariantStorageManager(dataStore.getStorageEngine()).getDBAdaptor(dataStore.getDbName()) .getStudyConfigurationManager(); new CatalogStudyConfigurationFactory(catalogManager).updateStudyConfigurationFromCatalog(studyId, studyConfigurationManager, sessionId); } catch (StorageManagerException | ClassNotFoundException | InstantiationException | IllegalAccessException e) { e.printStackTrace(); } } /** create job **/ String jobDescription = "Variant annotation"; String jobName = "annotate-variants"; JobFactory jobFactory = new JobFactory(catalogManager); HashMap<String, Object> resourceManagerAttributes = new HashMap<>(); resourceManagerAttributes.put(Job.JOB_SCHEDULER_NAME, randomString); return jobFactory.createJob(studyId, jobName, AnalysisFileIndexer.OPENCGA_ANALYSIS_BIN_NAME, jobDescription, outDir, inputFiles, sessionId, randomString, temporalOutDirUri, commandLine, execute, simulate, new HashMap<>(), resourceManagerAttributes); } }