uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationMongoWriter.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.ebi.eva.pipeline.io.writers.VepAnnotationMongoWriter.java

Source

/*
 * Copyright 2016 EMBL - European Bioinformatics Institute
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package uk.ac.ebi.eva.pipeline.io.writers;

import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;

import uk.ac.ebi.eva.utils.MongoDBHelper;

import org.opencb.biodata.models.variant.annotation.VariantAnnotation;
import org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantAnnotationConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.item.data.MongoItemWriter;
import org.springframework.data.mongodb.core.MongoOperations;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @author Diego Poggioli
 *
 * Write a list of {@link VariantAnnotation} into MongoDB
 *
 * A new annotation is added in the existing document.
 * In case of two annotations (or more) in the same variant the other annotations are appended:
 *
 * 20_63963_G/A    20:63963   A  ENSG00000178591    ENST00000382410    Transcript upstream_gene_variant  -  -  -  -  -  -  DISTANCE=4388;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=protein_coding;CANONICAL=YES;CCDS=CCDS12989.2;ENSP=ENSP00000371847;SWISSPROT=DB125_HUMAN;TREMBL=B2R4E8_HUMAN;UNIPARC=UPI00001A36DE
 * 20_63963_G/A    20:63963   A  ENSG00000178591    ENST00000608838    Transcript upstream_gene_variant  -  -  -  -  -  -  DISTANCE=3928;STRAND=1;SYMBOL=DEFB125;SYMBOL_SOURCE=HGNC;HGNC_ID=18105;BIOTYPE=processed_transcript
 *
 * Will be:
 *
 * "annot" : {
 *  "ct" : [
 *      { "gn" : "DEFB125", "ensg" : "ENSG00000178591", "enst" : "ENST00000382410", "codon" : "-", "strand" : "+", "bt" : "protein_coding", "aaChange" : "-", "so" : [ 1631 ] },
 *      { "gn" : "DEFB125", "ensg" : "ENSG00000178591", "enst" : "ENST00000608838", "codon" : "-", "strand" : "+", "bt" : "processed_transcript", "aaChange" : "-",
 *  "so" : [ 1631 ] } ],
 *  "xrefs" : [
 *      { "id" : "DEFB125", "src" : "HGNC" },
 *      { "id" : "ENST00000382410", "src" : "ensemblTranscript" },
 *      { "id" : "ENST00000608838", "src" : "ensemblTranscript" },
 *      { "id" : "ENSG00000178591", "src" : "ensemblGene"
 */
public class VepAnnotationMongoWriter extends MongoItemWriter<VariantAnnotation> {
    private static final Logger logger = LoggerFactory.getLogger(VepAnnotationMongoWriter.class);

    private MongoOperations mongoOperations;
    private String collection;
    private DBObjectToVariantAnnotationConverter converter;

    public VepAnnotationMongoWriter(MongoOperations mongoOperations) {
        this.mongoOperations = mongoOperations;
        this.converter = new DBObjectToVariantAnnotationConverter();
    }

    public VepAnnotationMongoWriter(MongoOperations mongoOperations, String collection) {
        this(mongoOperations);
        setCollection(collection);
        setTemplate(mongoOperations);
    }

    @Override
    public void setCollection(String collection) {
        super.setCollection(collection);
        this.collection = collection;
    }

    private Map<String, List<VariantAnnotation>> groupVariantAnnotationById(
            List<? extends VariantAnnotation> variantAnnotations) {
        // The following method is not working with java8 .<40. Should be resuscitated when travis is updated to a
        // more recent java version (1.8.0_31 atm)
        // http://stackoverflow.com/questions/37368060/why-this-code-compiles-with-jdk8u45-and-above-but-not-with-jdk8u25
        //Map<String, List<VariantAnnotation>> variantAnnotationsByStorageId = variantAnnotations.stream()
        //        .collect(Collectors.groupingBy(this::buildStorageIdFromVariantAnnotation));

        Map<String, List<VariantAnnotation>> variantAnnotationsByStorageId = new HashMap<>();
        for (VariantAnnotation variantAnnotation : variantAnnotations) {
            String id = buildStorageIdFromVariantAnnotation(variantAnnotation);

            variantAnnotationsByStorageId.putIfAbsent(id, new ArrayList<>());
            variantAnnotationsByStorageId.get(id).add(variantAnnotation);
        }

        return variantAnnotationsByStorageId;
    }

    @Override
    protected void doWrite(List<? extends VariantAnnotation> variantAnnotations) {

        Map<String, List<VariantAnnotation>> variantAnnotationsByStorageId = groupVariantAnnotationById(
                variantAnnotations);

        for (Map.Entry<String, List<VariantAnnotation>> annotationsIn : variantAnnotationsByStorageId.entrySet()) {
            String storageId = annotationsIn.getKey();
            List<VariantAnnotation> annotations = annotationsIn.getValue();

            VariantAnnotation variantAnnotation = annotations.get(0);

            if (annotations.size() > 1) {
                variantAnnotation = concatenateOtherAnnotations(variantAnnotation,
                        annotations.subList(1, annotations.size()));
            }

            writeVariantAnnotationInMongoDb(storageId, variantAnnotation);
        }

    }

    /**
     * Append multiple annotation into a single {@link VariantAnnotation}
     * Updated fields are ConsequenceTypes and Hgvs
     *
     * @param variantAnnotation annotation where other annotations will be appended
     * @param otherAnnotationsToConcatenate annotations to be appended
     * @return a single {@link VariantAnnotation} ready to be persisted
     */
    private VariantAnnotation concatenateOtherAnnotations(VariantAnnotation variantAnnotation,
            List<VariantAnnotation> otherAnnotationsToConcatenate) {

        for (VariantAnnotation annotationToAppend : otherAnnotationsToConcatenate) {

            //update ConsequenceTypes
            if (annotationToAppend.getConsequenceTypes() != null) {
                variantAnnotation.getConsequenceTypes().addAll(annotationToAppend.getConsequenceTypes());
            }

            //update Hgvs
            if (annotationToAppend.getHgvs() != null) {
                if (variantAnnotation.getHgvs() == null) {
                    variantAnnotation.setHgvs(new ArrayList<>());
                }
                variantAnnotation.getHgvs().addAll(annotationToAppend.getHgvs());
            }
        }

        return variantAnnotation;
    }

    private void writeVariantAnnotationInMongoDb(String storageId, VariantAnnotation variantAnnotation) {
        logger.debug("Writing annotations into mongo id: {}", storageId);

        DBObject storageVariantAnnotation = converter.convertToStorageType(variantAnnotation);

        BasicDBObject find = new BasicDBObject("_id", storageId);

        if (storageVariantAnnotation.get("ct") != null) {
            BasicDBObject updateCt = new BasicDBObject("$addToSet",
                    new BasicDBObject("annot.ct", new BasicDBObject("$each", storageVariantAnnotation.get("ct"))));
            mongoOperations.getCollection(collection).update(find, updateCt);
        }

        if (storageVariantAnnotation.get("xrefs") != null) {
            BasicDBObject updateXrefs = new BasicDBObject("$addToSet", new BasicDBObject("annot.xrefs",
                    new BasicDBObject("$each", storageVariantAnnotation.get("xrefs"))));
            mongoOperations.getCollection(collection).update(find, updateXrefs);
        }

    }

    private String buildStorageIdFromVariantAnnotation(VariantAnnotation variantAnnotation) {
        return MongoDBHelper.buildStorageId(variantAnnotation.getChromosome(), variantAnnotation.getStart(),
                variantAnnotation.getReferenceAllele(), variantAnnotation.getAlternativeAllele());
    }

}