org.opencb.opencga.storage.mongodb.variant.converters.VariantStringIdConverter.java Source code

Java tutorial

Introduction

Here is the source code for org.opencb.opencga.storage.mongodb.variant.converters.VariantStringIdConverter.java

Source

/*
 * Copyright 2015-2016 OpenCB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.opencb.opencga.storage.mongodb.variant.converters;

import org.apache.commons.lang.StringUtils;
import org.bson.Document;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.commons.datastore.core.ComplexTypeConverter;
import org.opencb.commons.utils.CryptoUtils;

/**
 * Creates a sorted key for MongoDB.
 *
 * Format:
 * CHR:POS:REF:ALT
 *
 * Where CHR starts with " " if it's a single number chromosome, to sort 2 digits chromosomes.
 * Where POS has a left padding of 10 positions
 * Where REF and ALT are a SHA1 of the original allele if is bigger than {@link Variant#SV_THRESHOLD}
 *
 * Created on 12/05/16
 *
 * @author Jacobo Coll <jacobo167@gmail.com>
 */
public class VariantStringIdConverter implements ComplexTypeConverter<Variant, Document> {

    public static final String SEPARATOR = ":";
    public static final char SEPARATOR_CHAR = ':';
    public static final String ID_FIELD = "_id";
    public static final String END_FIELD = "end";
    public static final String REF_FIELD = "ref";
    public static final String ALT_FIELD = "alt";

    public Variant convertToDataModelType(String object) {
        String[] split = object.split(SEPARATOR, -1);
        return new Variant(split[0].trim(), Integer.parseInt(split[1].trim()), split[2], split[3]);
    }

    @Override
    public Variant convertToDataModelType(Document object) {
        String[] split = object.getString(ID_FIELD).split(SEPARATOR, -1);
        return new Variant(split[0].trim(), Integer.parseInt(split[1].trim()), object.getInteger(END_FIELD),
                object.getString(REF_FIELD), object.getString(ALT_FIELD));
    }

    @Override
    public Document convertToStorageType(Variant variant) {
        return new Document(ID_FIELD, buildId(variant)).append(REF_FIELD, variant.getReference())
                .append(ALT_FIELD, variant.getAlternate()).append(END_FIELD, variant.getEnd());
    }

    public String buildId(Variant variant) {
        return buildId(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate());
    }

    public String buildId(String chromosome, int start, String reference, String alternate) {
        StringBuilder stringBuilder = buildId(chromosome, start, new StringBuilder());

        stringBuilder.append(SEPARATOR_CHAR);

        if (reference.length() > Variant.SV_THRESHOLD) {
            stringBuilder.append(new String(CryptoUtils.encryptSha1(reference)));
        } else if (!reference.equals("-")) {
            stringBuilder.append(reference);
        }
        stringBuilder.append(SEPARATOR_CHAR);
        if (alternate.length() > Variant.SV_THRESHOLD) {
            stringBuilder.append(new String(CryptoUtils.encryptSha1(alternate)));
        } else if (!alternate.equals("-")) {
            stringBuilder.append(alternate);
        }
        return stringBuilder.toString();
    }

    public static String buildId(String chromosome, int start) {
        return buildId(chromosome, start, new StringBuilder()).toString();
    }

    private static StringBuilder buildId(String chromosome, int start, StringBuilder stringBuilder) {

        appendChromosome(chromosome, stringBuilder).append(SEPARATOR_CHAR)
                .append(StringUtils.leftPad(Integer.toString(start), 10, " "));
        return stringBuilder;
    }

    public static String convertChromosome(String chromosome) {
        return appendChromosome(chromosome, new StringBuilder()).toString();
    }

    protected static StringBuilder appendChromosome(String chromosome, StringBuilder stringBuilder) {
        if (chromosome.length() == 1 && Character.isDigit(chromosome.charAt(0))) {
            stringBuilder.append(' ');
        }
        return stringBuilder.append(chromosome);
    }
}