org.opencb.hpg.bigdata.app.cli.local.VariantCommandExecutor.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.hpg.bigdata.app.cli.local.VariantCommandExecutor.java

Source

/*
 * Copyright 2015 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.hpg.bigdata.app.cli.local;

import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFFileReader;
import org.apache.avro.Schema;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SparkSession;
import org.opencb.biodata.formats.variant.vcf4.FullVcfCodec;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.VariantAvro;
import org.opencb.biodata.models.variant.protobuf.VariantProto;
import org.opencb.biodata.tools.variant.converter.Converter;
import org.opencb.biodata.tools.variant.converter.VariantContextToVariantConverter;
import org.opencb.biodata.tools.variant.converter.VariantContextToVariantProtoConverter;
import org.opencb.commons.io.DataReader;
import org.opencb.commons.run.ParallelTaskRunner;
import org.opencb.commons.utils.FileUtils;
import org.opencb.hpg.bigdata.app.cli.CommandExecutor;
import org.opencb.hpg.bigdata.core.avro.VariantAvroAnnotator;
import org.opencb.hpg.bigdata.core.avro.VariantAvroSerializer;
import org.opencb.hpg.bigdata.core.converters.variation.ProtoEncoderTask;
import org.opencb.hpg.bigdata.core.converters.variation.VariantAvroEncoderTask;
import org.opencb.hpg.bigdata.core.converters.variation.VariantContext2VariantConverter;
import org.opencb.hpg.bigdata.core.io.VariantContextBlockIterator;
import org.opencb.hpg.bigdata.core.io.VcfBlockIterator;
import org.opencb.hpg.bigdata.core.io.avro.AvroFileWriter;
import org.opencb.hpg.bigdata.core.lib.SparkConfCreator;
import org.opencb.hpg.bigdata.core.lib.VariantDataset;
import org.opencb.hpg.bigdata.core.parquet.VariantParquetConverter;

import java.io.*;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
 * Created by imedina on 25/06/15.
 */
public class VariantCommandExecutor extends CommandExecutor {

    private LocalCliOptionsParser.VariantCommandOptions variantCommandOptions;

    public VariantCommandExecutor(LocalCliOptionsParser.VariantCommandOptions variantCommandOptions) {
        //      super(variantCommandOptions.c, fastqCommandOptions.verbose, fastqCommandOptions.conf);
        this.variantCommandOptions = variantCommandOptions;
    }

    @Override
    public void execute() throws Exception {
        String subCommandString = variantCommandOptions.getParsedSubCommand();
        switch (subCommandString) {
        case "convert":
            init(variantCommandOptions.convertVariantCommandOptions.commonOptions.logLevel,
                    variantCommandOptions.convertVariantCommandOptions.commonOptions.verbose,
                    variantCommandOptions.convertVariantCommandOptions.commonOptions.conf);
            convert();
            break;
        case "annotate":
            init(variantCommandOptions.convertVariantCommandOptions.commonOptions.logLevel,
                    variantCommandOptions.convertVariantCommandOptions.commonOptions.verbose,
                    variantCommandOptions.convertVariantCommandOptions.commonOptions.conf);
            annotate();
        case "query":
            init(variantCommandOptions.queryVariantCommandOptions.commonOptions.logLevel,
                    variantCommandOptions.queryVariantCommandOptions.commonOptions.verbose,
                    variantCommandOptions.queryVariantCommandOptions.commonOptions.conf);
            query();
            break;
        default:
            break;
        }
    }

    private void convert() throws Exception {
        // check mandatory parameter 'input file'
        Path inputPath = Paths.get(variantCommandOptions.convertVariantCommandOptions.input);
        FileUtils.checkFile(inputPath);

        // check mandatory parameter 'to'
        String to = variantCommandOptions.convertVariantCommandOptions.to;
        if (!to.equals("avro") && !to.equals("parquet") && !to.equals("json")) {
            throw new IllegalArgumentException(
                    "Unknown serialization format: " + to + ". Valid values: avro, parquet and json");
        }

        // check output
        String output = variantCommandOptions.convertVariantCommandOptions.output;
        boolean stdOutput = variantCommandOptions.convertVariantCommandOptions.stdOutput;
        OutputStream outputStream;
        if (stdOutput) {
            output = "STDOUT";
        } else {
            if (output != null && !output.isEmpty()) {
                Path parent = Paths.get(output).toAbsolutePath().getParent();
                if (parent != null) { // null if output is a file in the current directory
                    FileUtils.checkDirectory(parent, true); // Throws exception, if does not exist
                }
            } else {
                output = inputPath.toString() + "." + to;
            }
            outputStream = new FileOutputStream(output);
        }

        // compression
        String compression = variantCommandOptions.convertVariantCommandOptions.compression;

        // region filter
        List<Region> regions = null;
        if (StringUtils.isNotEmpty(variantCommandOptions.convertVariantCommandOptions.regions)) {
            regions = Region.parseRegions(variantCommandOptions.convertVariantCommandOptions.regions);
        }

        switch (variantCommandOptions.convertVariantCommandOptions.to) {
        case "avro":
            VariantAvroSerializer avroSerializer = new VariantAvroSerializer(compression);
            if (regions != null) {
                regions.forEach(avroSerializer::addRegionFilter);
            }
            avroSerializer.toAvro(inputPath.toString(), output);
            break;
        case "parquet":
            InputStream is = new FileInputStream(variantCommandOptions.convertVariantCommandOptions.input);
            VariantParquetConverter parquetConverter = new VariantParquetConverter();
            parquetConverter.toParquet(is, variantCommandOptions.convertVariantCommandOptions.output + "2");
            break;
        default:
            System.out.println("No valid format: " + variantCommandOptions.convertVariantCommandOptions.to);
            break;
        }

    }

    //    private void convert2() throws Exception {
    //        Path inputPath = Paths.get(variantCommandOptions.convertVariantCommandOptions.input);
    //        FileUtils.checkFile(inputPath);
    //
    //        // Creating file writer. If 'output' parameter is passed and it is different from
    //        // STDOUT then a file is created if parent folder exist, otherwise STDOUT is used.
    //        String output = variantCommandOptions.convertVariantCommandOptions.output;
    //        boolean isFile = false;
    //        OutputStream outputStream;
    //        if (output != null && !output.isEmpty() && !output.equalsIgnoreCase("STDOUT")) {
    //            Path parent = Paths.get(output).toAbsolutePath().getParent();
    //            if (parent != null) { // null if output is a file in the current directory
    //                FileUtils.checkDirectory(parent, true); // Throws exception, if does not exist
    //            }
    //            outputStream = new FileOutputStream(output);
    //            isFile = true;
    //        } else {
    //            outputStream = System.out;
    //            output = "STDOUT";
    //        }
    //
    //        String dataModel = variantCommandOptions.convertVariantCommandOptions.dataModel;
    //        dataModel = (dataModel != null && !dataModel.isEmpty()) ? dataModel : "opencb";
    //
    //        String compression = variantCommandOptions.convertVariantCommandOptions.compression;
    //        compression = (compression == null || compression.isEmpty()) ? "auto" :  compression.toLowerCase();
    //
    //        if (!variantCommandOptions.convertVariantCommandOptions.toJson
    //                && !variantCommandOptions.convertVariantCommandOptions.toAvro
    //                && !variantCommandOptions.convertVariantCommandOptions.toProtoBuf
    //                && !variantCommandOptions.convertVariantCommandOptions.fromAvro) {
    ////            variantCommandOptions.convertVariantCommandOptions.toAvro = true;
    //            variantCommandOptions.convertVariantCommandOptions.toParquet = true;
    //        }
    //
    //        /*
    //         * JSON converter. Mode 'auto' set to gzip is file name ends with .gz
    //         */
    //        if (variantCommandOptions.convertVariantCommandOptions.toJson) {
    //            if (compression.equals("auto")) {
    //                if (output.endsWith(".gz")) {
    //                    compression = "gzip";
    //                } else if (output.equalsIgnoreCase("STDOUT") || output.endsWith("json")) {
    //                    compression = "";
    //                } else {
    //                    throw new IllegalArgumentException("Unknown compression extension for " + output);
    //                }
    //            }
    //
    //            if (compression.equals("gzip")) {
    //                outputStream = new GZIPOutputStream(outputStream);
    //            }
    //            convertToJson(inputPath, dataModel, outputStream);
    //        }
    //
    //        /*
    //         * Protocol Buffer 3 converter. Mode 'auto' set to gzip is file name ends with .gz
    //         */
    //        if (variantCommandOptions.convertVariantCommandOptions.toProtoBuf) {
    //            if (compression.equals("auto")) {
    //                if (output.endsWith(".gz")) {
    //                    compression = "gzip";
    //                } else if (output.equalsIgnoreCase("STDOUT")
    //                        || output.endsWith("pb")
    //                        || output.endsWith("pb3")
    //                        || output.endsWith("proto")) {
    //                    compression = "";
    //                } else {
    //                    throw new IllegalArgumentException("Unknown compression extension for " + output);
    //                }
    //            }
    //
    //            if (compression.equals("gzip")) {
    //                outputStream = new GZIPOutputStream(outputStream);
    //            }
    //            convertToProtoBuf(inputPath, outputStream);
    //        }
    //
    //        /*
    //         * Avro converter. Mode 'auto' set to gzip is file name ends with .gz
    //         */
    //        if (variantCommandOptions.convertVariantCommandOptions.toAvro) {
    //            // if compression mode is set to 'auto' it is inferred from files extension
    //            if (compression.equals("auto")) {
    //                // if output is a defined file and contains an extension
    //                if (output.contains(".")) {
    //                    String[] split = output.split("\\.");
    //                    switch (split[split.length - 1]) {
    //                        case "gz":
    //                        case "deflate":
    //                            compression = "deflate";
    //                            break;
    //                        case "sz":
    //                        case "snz":
    //                            compression = "snappy";
    //                            break;
    //                        case "bzip2":
    //                            compression = "bzip2";
    //                            break;
    //                        case "xz":
    //                            compression = "xz";
    //                            break;
    //                        default:
    //                            compression = "deflate";
    //                            break;
    //                    }
    //                } else {    // if we reach this point is very likely output is set to STDOUT
    //                    compression = "deflate";
    //                }
    //            }
    //
    //            System.out.println("compression = " + compression);
    //            VariantAvroSerializer avroSerializer = new VariantAvroSerializer(compression);
    //            avroSerializer.toAvro(inputPath.toString(), output);
    //
    //            /*
    //            convertToAvro(inputPath, compression, dataModel, outputStream);
    //
    //            if (isFile) {
    //                String metaFile = output + ".meta";
    //                logger.info("Write metadata into " + metaFile);
    //                try (FileOutputStream out = new FileOutputStream(metaFile)) {
    //                    writeAvroStats(new AvroFileWriter<>(VariantFileMetadata.getClassSchema(), compression, out), output);
    //                }
    //            }
    //            */
    //        }
    //
    //        if (variantCommandOptions.convertVariantCommandOptions.toParquet) {
    //            InputStream is = new FileInputStream(variantCommandOptions.convertVariantCommandOptions.input);
    //            VariantParquetConverter parquetConverter = new VariantParquetConverter();
    ////            parquetConverter.addRegionFilter(new Region("1", 1, 800000))
    ////                    .addRegionFilter(new Region("1", 798801, 222800000))
    ////                    .addFilter(v -> v.getStudies().get(0).getFiles().get(0).getAttributes().get("NS").equals("60"));
    //            parquetConverter.toParquet(is, variantCommandOptions.convertVariantCommandOptions.output + "2");
    //
    //            is.close();
    //        }
    //
    //        if (outputStream != null) {
    //            outputStream.flush();
    //            outputStream.close();
    //        }
    //    }

    private void convertToJson(Path inputPath, String dataModel, OutputStream outputStream) throws IOException {
        VCFFileReader reader = new VCFFileReader(inputPath.toFile(), false);
        switch (dataModel.toLowerCase()) {
        case "opencb": {
            VariantContextToVariantConverter variantContextToVariantConverter = new VariantContextToVariantConverter(
                    "", "", Collections.emptyList());
            Variant variant;
            for (VariantContext variantContext : reader) {
                variant = variantContextToVariantConverter.convert(variantContext);
                outputStream.write(variant.toJson().getBytes());
                outputStream.write('\n');
            }
            break;
        }
        case "ga4gh": {
            // GA4GH Avro data models used
            VariantContext2VariantConverter variantContext2VariantConverter = new VariantContext2VariantConverter();
            org.ga4gh.models.Variant variant;
            for (VariantContext variantContext : reader) {
                variant = variantContext2VariantConverter.forward(variantContext);
                outputStream.write(variant.toString().getBytes());
                outputStream.write('\n');
            }
            break;
        }
        default:
            throw new IllegalArgumentException("Unknown dataModel \"" + dataModel + "\"");
        }
        reader.close();
    }

    private void convertToProtoBuf(Path inputPath, OutputStream outputStream) throws Exception {
        // Creating reader
        VcfBlockIterator iterator = (StringUtils.equals("-", inputPath.toAbsolutePath().toString()))
                ? new VcfBlockIterator(new BufferedInputStream(System.in), new FullVcfCodec())
                : new VcfBlockIterator(inputPath.toFile(), new FullVcfCodec());

        LocalCliOptionsParser.ConvertVariantCommandOptions cliOptions = variantCommandOptions.convertVariantCommandOptions;
        int numTasks = Math.max(cliOptions.numThreads, 1);
        int batchSize = Integer.parseInt(cliOptions.options.getOrDefault("batch.size", "50"));
        int bufferSize = Integer.parseInt(cliOptions.options.getOrDefault("buffer.size", "100000"));
        int capacity = numTasks + 1;
        ParallelTaskRunner.Config config = new ParallelTaskRunner.Config(numTasks, batchSize, capacity, true,
                false);

        ParallelTaskRunner<CharSequence, ByteBuffer> runner = new ParallelTaskRunner<>(iterator.toLineDataReader(),
                () -> { //Task supplier. Will supply a task instance for each thread.

                    //VCFCodec is not thread safe. MUST exist one instance per thread
                    VCFCodec codec = new FullVcfCodec(iterator.getHeader(), iterator.getVersion());
                    VariantContextBlockIterator blockIterator = new VariantContextBlockIterator(codec);
                    Converter<VariantContext, VariantProto.Variant> converter = new VariantContextToVariantProtoConverter();
                    return new ProtoEncoderTask<>(
                            charBuffer -> converter.convert(blockIterator.convert(charBuffer)), bufferSize);
                }, batch -> {
                    batch.forEach(byteBuffer -> {
                        try {
                            outputStream.write(byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit());
                        } catch (IOException e) {
                            throw new RuntimeException(e);
                        }
                    });
                    return true;
                }, config);
        runner.run();
        outputStream.close();

        //        InputStream inputStream = new FileInputStream(variantCommandOptions.convertVariantCommandOptions.output);
        //        if (outputStream instanceof GZIPOutputStream) {
        //            inputStream = new GZIPInputStream(inputStream);
        //        }
        //        VariantProto.Variant variant;
        //        int i = 0;
        //        try {
        //            while ((variant = VariantProto.Variant.parseDelimitedFrom(inputStream)) != null) {
        //                i++;
        //            System.out.println(variant.getChromosome() + ":" + variant.getStart()
        //                    + ":" + variant.getReference() + ":" + variant.getAlternate());
        ////            System.out.println("variant = " + variant.toString());
        //            }
        //        } finally {
        //            System.out.println("Num variants = " + i);
        //            inputStream.close();
        //        }
    }

    /*
        
    private void convertToAvro2(Path inputPath, String compression, String dataModel, OutputStream outputStream) throws Exception {
        
    VariantContextToVariantConverter converter = new VariantContextToVariantConverter("", "");
    VCFFileReader vcfFileReader = new VCFFileReader(inputPath.toFile(), false);
    VCFHeader fileHeader = vcfFileReader.getFileHeader();
    CloseableIterator<VariantContext> iterator = vcfFileReader.iterator();
    while (iterator.hasNext()) {
        VariantContext variantContext = iterator.next();
        System.out.println("======================================");
        System.out.println("variantContext = " + variantContext);
        System.out.println("variantContext.getCommonInfo().getAttributes() = " + variantContext.getCommonInfo().getAttributes());
        System.out.println("variantContext.getGenotypes().isLazyWithData() = " + variantContext.getGenotypes().isLazyWithData());
        ((LazyGenotypesContext)variantContext.getGenotypes()).decode();
        System.out.println("variantContext.getGenotypes().getUnparsedGenotypeData() = "
                + ((LazyGenotypesContext)variantContext.getGenotypes()).getUnparsedGenotypeData());
    //            System.out.println("variantContext.toStringDecodeGenotypes() = " + variantContext.toStringDecodeGenotypes());
        System.out.println("variantContext.getGenotypes().get(0) = " + variantContext.getGenotypes().get(0).hasAnyAttribute("GC"));
        System.out.println("variantContext.getGenotypes().get(0).getExtendedAttributes() = " + variantContext.getGenotypes().get(0)
                .getExtendedAttributes());
        Variant variant = converter.convert(variantContext);
        System.out.println("variant = " + variant);
        System.out.println("======================================");
    }
    }
     */

    private void convertToAvro(Path inputPath, String compression, String dataModel, OutputStream outputStream)
            throws Exception {
        // Creating reader
        VcfBlockIterator iterator = (StringUtils.equals("-", inputPath.toAbsolutePath().toString()))
                ? new VcfBlockIterator(new BufferedInputStream(System.in), new FullVcfCodec())
                : new VcfBlockIterator(inputPath.toFile(), new FullVcfCodec());
        DataReader<CharBuffer> vcfDataReader = iterator.toCharBufferDataReader();

        ArrayList<String> sampleNamesInOrder = iterator.getHeader().getSampleNamesInOrder();
        //        System.out.println("sampleNamesInOrder = " + sampleNamesInOrder);

        // main loop
        int numTasks = Math.max(variantCommandOptions.convertVariantCommandOptions.numThreads, 1);
        int batchSize = 1024 * 1024; //Batch size in bytes
        int capacity = numTasks + 1;
        //            VariantConverterContext variantConverterContext = new VariantConverterContext();

        //        long start = System.currentTimeMillis();

        //        final VariantContextToVariantConverter converter = new VariantContextToVariantConverter("", "", sampleNamesInOrder);
        //        List<CharBuffer> read;
        //        while ((read = vcfDataReader.read()) != null {
        //            converter.convert(read.)
        //        }

        //        Old implementation:

        ParallelTaskRunner.Config config = new ParallelTaskRunner.Config(numTasks, batchSize, capacity, false);
        ParallelTaskRunner<CharBuffer, ByteBuffer> runner;
        switch (dataModel.toLowerCase()) {
        case "opencb": {
            Schema classSchema = VariantAvro.getClassSchema();
            // Converter
            final VariantContextToVariantConverter converter = new VariantContextToVariantConverter("", "",
                    sampleNamesInOrder);
            // Writer
            AvroFileWriter<VariantAvro> avroFileWriter = new AvroFileWriter<>(classSchema, compression,
                    outputStream);

            runner = new ParallelTaskRunner<>(vcfDataReader,
                    () -> new VariantAvroEncoderTask<>(iterator.getHeader(), iterator.getVersion(),
                            variantContext -> converter.convert(variantContext).getImpl(), classSchema),
                    avroFileWriter, config);
            break;
        }
        case "ga4gh": {
            Schema classSchema = org.ga4gh.models.Variant.getClassSchema();
            // Converter
            final VariantContext2VariantConverter converter = new VariantContext2VariantConverter();
            converter.setVariantSetId(""); //TODO: Set VariantSetId
            // Writer
            AvroFileWriter<org.ga4gh.models.Variant> avroFileWriter = new AvroFileWriter<>(classSchema, compression,
                    outputStream);

            runner = new ParallelTaskRunner<>(vcfDataReader,
                    () -> new VariantAvroEncoderTask<>(iterator.getHeader(), iterator.getVersion(), converter,
                            classSchema),
                    avroFileWriter, config);
            break;
        }
        default:
            throw new IllegalArgumentException("Unknown dataModel \"" + dataModel + "\"");
        }
        long start = System.currentTimeMillis();
        runner.run();

        logger.debug("Time " + (System.currentTimeMillis() - start) / 1000.0 + "s");
    }
    /*
        private void writeAvroStats(AvroFileWriter<VariantFileMetadata> aw, String file) throws IOException {
    try {
        aw.open();
        Builder builder = VariantFileMetadata.newBuilder();
        builder.setStudyId(file).setFileId(file);
        Map<String, Object> meta = new HashMap<>();
        meta.put("FILTER_DEFAULT", "PASS");
        meta.put("QUAL_DEFAULT", StringUtils.EMPTY);
        meta.put("INFO_DEFAULT", "END,BLOCKAVG_min30p3a");
        meta.put("FORMAT_DEFAULT", "GT:GQX:DP:DPF");
        builder.setMetadata(meta);
        builder.setAggregation(Aggregation.NONE);
        builder.setStats(null);
        builder.setHeader(null);
        aw.writeDatum(builder.build());
    } finally {
        try {
            aw.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
        }
    */

    public void query() throws Exception {
        // check mandatory parameter 'input file'
        Path inputPath = Paths.get(variantCommandOptions.queryVariantCommandOptions.input);
        FileUtils.checkFile(inputPath);

        // TODO: to take the spark home from somewhere else
        SparkConf sparkConf = SparkConfCreator.getConf("variant query", "local", 1, true,
                "/home/jtarraga/soft/spark-2.0.0/");
        System.out.println("sparkConf = " + sparkConf.toDebugString());
        SparkSession sparkSession = new SparkSession(new SparkContext(sparkConf));

        //        SparkConf sparkConf = SparkConfCreator.getConf("MyTest", "local", 1, true, "/home/jtarraga/soft/spark-2.0.0/");
        //        SparkSession sparkSession = new SparkSession(new SparkContext(sparkConf));

        VariantDataset vd = new VariantDataset();

        vd.load(variantCommandOptions.queryVariantCommandOptions.input, sparkSession);
        vd.createOrReplaceTempView("vcf");

        // query for id
        if (StringUtils.isNotEmpty(variantCommandOptions.queryVariantCommandOptions.ids)) {
            String[] ids = StringUtils.split(variantCommandOptions.queryVariantCommandOptions.ids, ",");

            for (String id : ids) {
                vd.idFilter(id);
                logger.warn("Query for multiple IDs, not yet implemented. Currently, it queries for the first ID.");
                break;
            }
        }

        // query for type
        if (StringUtils.isNotEmpty(variantCommandOptions.queryVariantCommandOptions.types)) {
            String[] types = StringUtils.split(variantCommandOptions.queryVariantCommandOptions.types, ",");

            if (types.length == 1) {
                vd.typeFilter(types[0]);
            } else {
                vd.typeFilter(new ArrayList<>(Arrays.asList(types)));
            }
        }

        // query for region
        List<Region> regions = null;
        if (StringUtils.isNotEmpty(variantCommandOptions.queryVariantCommandOptions.regions)) {
            regions = Region.parseRegions(variantCommandOptions.queryVariantCommandOptions.regions);

            for (Region region : regions) {
                logger.warn("Query for region, not yet implemented.");
                break;
            }
        }

        // query for SO term name
        if (StringUtils.isNotEmpty(variantCommandOptions.queryVariantCommandOptions.so_names)) {
            String[] names = StringUtils.split(variantCommandOptions.queryVariantCommandOptions.so_names, ",");

            for (String name : names) {
                vd.annotationFilter("consequenceTypes.sequenceOntologyTerms.name", name);
                logger.warn("Query for multiple SO term names (consequence type), not yet implemented. "
                        + "Currently, it queries for the first SO term name.");
                break;
            }
        }

        // query for SO term accession
        if (StringUtils.isNotEmpty(variantCommandOptions.queryVariantCommandOptions.so_accessions)) {
            String[] accessions = StringUtils.split(variantCommandOptions.queryVariantCommandOptions.so_accessions,
                    ",");

            for (String accession : accessions) {
                vd.annotationFilter("consequenceTypes.sequenceOntologyTerms.accession", accession);
                logger.warn("Query for multiple SO term accessions (consequence type), not yet implemented. "
                        + "Currently, it queries for the first SO term accession.");
                break;
            }
        }

        // apply previous filters
        vd.update();

        // save the dataset
        logger.warn("The current query implementation saves the resulting dataset in Avro format.");
        vd.write().format("com.databricks.spark.avro")
                .save(variantCommandOptions.queryVariantCommandOptions.output);
    }

    public void annotate() throws IOException {
        VariantAvroAnnotator variantAvroAnnotator = new VariantAvroAnnotator();

        Path input = Paths.get(variantCommandOptions.annotateVariantCommandOptions.input);
        Path output = Paths.get(variantCommandOptions.annotateVariantCommandOptions.ouput);
        variantAvroAnnotator.annotate(input, output);

    }

}