org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.opencga.storage.mongodb.variant.DBObjectToVariantConverter.java

Source

/*
 * Copyright 2015 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.opencga.storage.mongodb.variant;

import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;

import java.util.*;

import org.opencb.biodata.models.variant.VariantSourceEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.annotation.VariantAnnotation;
import org.opencb.commons.utils.CryptoUtils;
import org.opencb.datastore.core.ComplexTypeConverter;

/**
 *
 * @author Cristina Yenyxe Gonzalez Garcia <cyenyxe@ebi.ac.uk>
 */
public class DBObjectToVariantConverter implements ComplexTypeConverter<Variant, DBObject> {

    public final static String CHROMOSOME_FIELD = "chromosome";
    public final static String START_FIELD = "start";
    public final static String END_FIELD = "end";
    public final static String LENGTH_FIELD = "length";
    public final static String REFERENCE_FIELD = "reference";
    public final static String ALTERNATE_FIELD = "alternate";
    public final static String IDS_FIELD = "ids";
    public final static String TYPE_FIELD = "type";

    public final static String HGVS_FIELD = "hgvs";
    public final static String HGVS_NAME_FIELD = "name";
    public final static String HGVS_TYPE_FIELD = "type";

    public final static String STUDIES_FIELD = "studies";
    public final static String ANNOTATION_FIELD = "annotation";
    public final static String STATS_FIELD = "stats";

    //    public final static String ID_FIELD = "id";
    //    public final static String FILES_FIELD = "files";
    //    public final static String EFFECTS_FIELD = "effs";
    //    public final static String SOTERM_FIELD = "so";
    //    public final static String GENE_FIELD = "gene";

    public final static Map<String, String> fieldsMap;

    static {
        fieldsMap = new HashMap<>();
        fieldsMap.put("chromosome", CHROMOSOME_FIELD);
        fieldsMap.put("start", START_FIELD);
        fieldsMap.put("end", END_FIELD);
        fieldsMap.put("length", LENGTH_FIELD);
        fieldsMap.put("reference", REFERENCE_FIELD);
        fieldsMap.put("alternate", ALTERNATE_FIELD);
        fieldsMap.put("ids", IDS_FIELD);
        fieldsMap.put("type", TYPE_FIELD);
        fieldsMap.put("hgvs", HGVS_FIELD);
        //        fieldsMap.put("hgvs.type", HGVS_FIELD + "." + HGVS_TYPE_FIELD);
        //        fieldsMap.put("hgvs.name", HGVS_FIELD + "." + HGVS_NAME_FIELD);
        fieldsMap.put("sourceEntries", STUDIES_FIELD);
        fieldsMap.put("annotation", ANNOTATION_FIELD);
        fieldsMap.put("sourceEntries.cohortStats", STATS_FIELD);
    }

    private DBObjectToVariantSourceEntryConverter variantSourceEntryConverter;
    private DBObjectToVariantAnnotationConverter variantAnnotationConverter;
    private DBObjectToVariantStatsConverter statsConverter;

    /**
     * Create a converter between Variant and DBObject entities when there is 
     * no need to convert the files the variant was read from.
     */
    public DBObjectToVariantConverter() {
        this(null, null);
    }

    /**
     * Create a converter between Variant and DBObject entities. A converter for 
     * the files the variant was read from can be provided in case those 
     * should be processed during the conversion.
     *
     * @param variantSourceEntryConverter The object used to convert the files
     * @param statsConverter
     */
    public DBObjectToVariantConverter(DBObjectToVariantSourceEntryConverter variantSourceEntryConverter,
            DBObjectToVariantStatsConverter statsConverter) {
        this.variantSourceEntryConverter = variantSourceEntryConverter;
        this.variantAnnotationConverter = new DBObjectToVariantAnnotationConverter();
        this.statsConverter = statsConverter;
    }

    @Override
    public Variant convertToDataModelType(DBObject object) {
        String chromosome = (String) object.get(CHROMOSOME_FIELD);
        int start = (int) object.get(START_FIELD);
        int end = (int) object.get(END_FIELD);
        String reference = (String) object.get(REFERENCE_FIELD);
        String alternate = (String) object.get(ALTERNATE_FIELD);
        Variant variant = new Variant(chromosome, start, end, reference, alternate);
        if (object.containsField(IDS_FIELD)) {
            Object ids = object.get(IDS_FIELD);
            variant.setIds(new HashSet<>(((Collection<String>) ids)));
        }

        // Transform HGVS: List of map entries -> Map of lists
        BasicDBList mongoHgvs = (BasicDBList) object.get(HGVS_FIELD);
        if (mongoHgvs != null) {
            for (Object o : mongoHgvs) {
                DBObject dbo = (DBObject) o;
                variant.addHgvs((String) dbo.get(HGVS_TYPE_FIELD), (String) dbo.get(HGVS_NAME_FIELD));
            }
        }

        // Files
        if (variantSourceEntryConverter != null) {
            BasicDBList mongoFiles = (BasicDBList) object.get(STUDIES_FIELD);
            if (mongoFiles != null) {
                for (Object o : mongoFiles) {
                    DBObject dbo = (DBObject) o;
                    variant.addSourceEntry(variantSourceEntryConverter.convertToDataModelType(dbo));
                }
            }
        }

        // Annotations
        DBObject mongoAnnotation;
        Object o = object.get(ANNOTATION_FIELD);
        if (o instanceof List) {
            if (!((List) o).isEmpty()) {
                mongoAnnotation = (DBObject) ((List) o).get(0);
            } else {
                mongoAnnotation = null;
            }
        } else {
            mongoAnnotation = (DBObject) object.get(ANNOTATION_FIELD);
        }
        if (mongoAnnotation != null) {
            VariantAnnotation annotation = variantAnnotationConverter.convertToDataModelType(mongoAnnotation);
            annotation.setChromosome(variant.getChromosome());
            annotation.setAlternateAllele(variant.getAlternate());
            annotation.setReferenceAllele(variant.getReference());
            annotation.setStart(variant.getStart());
            variant.setAnnotation(annotation);
        }

        // Statistics
        if (statsConverter != null && object.containsField(STATS_FIELD)) {
            DBObject stats = (DBObject) object.get(STATS_FIELD);
            statsConverter.convertCohortsToDataModelType(stats, variant);
        }
        return variant;
    }

    @Override
    public DBObject convertToStorageType(Variant variant) {
        // Attributes easily calculated
        BasicDBObject mongoVariant = new BasicDBObject("_id", buildStorageId(variant))
                //                .append(IDS_FIELD, object.getIds())    //Do not include IDs.
                .append(CHROMOSOME_FIELD, variant.getChromosome()).append(START_FIELD, variant.getStart())
                .append(END_FIELD, variant.getEnd()).append(LENGTH_FIELD, variant.getLength())
                .append(REFERENCE_FIELD, variant.getReference()).append(ALTERNATE_FIELD, variant.getAlternate())
                .append(TYPE_FIELD, variant.getType().name());

        // Internal fields used for query optimization (dictionary named "_at")
        BasicDBObject _at = new BasicDBObject();
        mongoVariant.append("_at", _at);

        // Two different chunk sizes are calculated for different resolution levels: 1k and 10k
        BasicDBList chunkIds = new BasicDBList();
        String chunkSmall = variant.getChromosome() + "_"
                + variant.getStart() / VariantMongoDBWriter.CHUNK_SIZE_SMALL + "_"
                + VariantMongoDBWriter.CHUNK_SIZE_SMALL / 1000 + "k";
        String chunkBig = variant.getChromosome() + "_" + variant.getStart() / VariantMongoDBWriter.CHUNK_SIZE_BIG
                + "_" + VariantMongoDBWriter.CHUNK_SIZE_BIG / 1000 + "k";
        chunkIds.add(chunkSmall);
        chunkIds.add(chunkBig);
        _at.append("chunkIds", chunkIds);

        // Transform HGVS: Map of lists -> List of map entries
        BasicDBList hgvs = new BasicDBList();
        for (Map.Entry<String, Set<String>> entry : variant.getHgvs().entrySet()) {
            for (String value : entry.getValue()) {
                hgvs.add(new BasicDBObject(HGVS_TYPE_FIELD, entry.getKey()).append(HGVS_NAME_FIELD, value));
            }
        }
        mongoVariant.append(HGVS_FIELD, hgvs);

        // Files
        if (variantSourceEntryConverter != null) {
            BasicDBList mongoFiles = new BasicDBList();
            for (VariantSourceEntry archiveFile : variant.getSourceEntries().values()) {
                mongoFiles.add(variantSourceEntryConverter.convertToStorageType(archiveFile));
            }
            mongoVariant.append(STUDIES_FIELD, mongoFiles);
        }

        //        // Annotations
        mongoVariant.append(ANNOTATION_FIELD, Collections.emptyList());
        //        if (variantAnnotationConverter != null) {
        //            if (object.getAnnotation() != null) {
        //                DBObject annotation = variantAnnotationConverter.convertToStorageType(object.getAnnotation());
        //                mongoVariant.append(ANNOTATION_FIELD, annotation);
        //            }
        //        }

        // Statistics
        if (statsConverter != null) {
            List mongoStats = statsConverter.convertCohortsToStorageType(variant.getSourceEntries());
            mongoVariant.put(STATS_FIELD, mongoStats);
        }

        return mongoVariant;
    }

    public String buildStorageId(Variant v) {
        return buildStorageId(v.getChromosome(), v.getStart(), v.getReference(), v.getAlternate());
    }

    public String buildStorageId(String chromosome, int start, String reference, String alternate) {
        StringBuilder builder = new StringBuilder(chromosome);
        builder.append("_");
        builder.append(start);
        builder.append("_");
        if (reference.equals("-")) {

        } else if (reference.length() < Variant.SV_THRESHOLD) {
            builder.append(reference);
        } else {
            builder.append(new String(CryptoUtils.encryptSha1(reference)));
        }

        builder.append("_");

        if (alternate.equals("-")) {

        } else if (alternate.length() < Variant.SV_THRESHOLD) {
            builder.append(alternate);
        } else {
            builder.append(new String(CryptoUtils.encryptSha1(alternate)));
        }

        return builder.toString();
    }

    public static String toShortFieldName(String longFieldName) {
        if (longFieldName.contains(".")) {
            String[] split = longFieldName.split("\\.");
            return fieldsMap.get(split[0]);
        }
        return fieldsMap.get(longFieldName);
    }

}