Example usage for com.mongodb BasicDBList BasicDBList

List of usage examples for com.mongodb BasicDBList BasicDBList


In this page you can find the example usage for com.mongodb BasicDBList BasicDBList.



Source Link


From source file:org.opencb.cellbase.mongodb.db.variation.ClinicalMongoDBAdaptor.java

License:Apache License

private DBObject getClinvarRegionAggregationFilterDBObject(List<Region> regionList) {
    BasicDBList orDBList = new BasicDBList();
    for (Region region : regionList) {
        BasicDBList andDBList = new BasicDBList();
        andDBList.add(new BasicDBObject("chromosome", region.getChromosome()));
        andDBList.add(new BasicDBObject("end", new BasicDBObject("$gte", region.getStart())));
        andDBList.add(new BasicDBObject("start", new BasicDBObject("$lte", region.getEnd())));
        orDBList.add(new BasicDBObject("$and", andDBList));
    }//  w  w w.  ja v  a  2s . c o  m

    return new BasicDBObject("$match", new BasicDBObject("$or", orDBList));

From source file:org.opencb.cellbase.mongodb.db.variation.VariationMongoDBAdaptor.java

License:Apache License

public List<QueryResult> getAllPhenotypeByRegion(List<Region> regions, QueryOptions options) {
    QueryBuilder builder = null;/*  ww w  .j  a v  a 2s .  c  om*/
    List<DBObject> queries = new ArrayList<>();

     * If source is present in options is it parsed and prepare first,
     * otherwise ti will be done for each iteration of regions.
    List<Object> source = options.getList("source", null);
    BasicDBList sourceIds = new BasicDBList();
    if (source != null && source.size() > 0) {

    //        List<Region> regions = Region.parseRegions(options.getString("region"));
    List<String> ids = new ArrayList<>(regions.size());
    for (Region region : regions) {
        if (region != null && !region.equals("")) {
            // If regions is 1 position then query can be optimize using chunks
            if (region.getStart() == region.getEnd()) {
                String chunkId = getChunkIdPrefix(region.getChromosome(), region.getStart(),
                builder = QueryBuilder.start("_chunkIds").is(chunkId).and("end")
            } else {
                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")

            if (sourceIds != null && sourceIds.size() > 0) {
                builder = builder.and("source").in(sourceIds);

    return executeQueryList2(ids, queries, options, mongoVariationPhenotypeDBCollection2);

From source file:org.opencb.cellbase.mongodb.db.variation.VariationMongoDBAdaptor.java

License:Apache License

public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) {
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());

    String phenotype = options.getString("phenotype");
    if (phenotype != null && !phenotype.equals("")) {
        for (Region region : regions) {
            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start")
            builder = builder.and("phenotype").is(phenotype);
            queries.add(builder.get());//from w  ww .java 2 s  . c o m
        return executeQueryList2(ids, queries, options, mongoVariationPhenotypeDBCollection2);
    } else {
        String consequenceTypes = options.getString("consequence_type", null);
        BasicDBList consequenceTypeDBList = new BasicDBList();
        if (consequenceTypes != null && !consequenceTypes.equals("")) {
            for (String ct : consequenceTypes.split(",")) {

        for (Region region : regions) {
            //         QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getSequenceName()).and("end").greaterThan(region.getStart()).and("start").lessThan(region.getEnd());
            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start")
            if (consequenceTypeDBList.size() > 0) {
                builder = builder.and("transcriptVariations.consequenceTypes").in(consequenceTypeDBList);

        return executeQueryList2(ids, queries, options);

From source file:org.opencb.cellbase.mongodb.db.VariationMongoDBAdaptor.java

License:Apache License

public List<QueryResult> getAllPhenotypeByRegion(List<Region> regions, QueryOptions options) {
    QueryBuilder builder = null;//w  w  w  .j  a  va2s.  co m
    List<DBObject> queries = new ArrayList<>();

     * If source is present in options is it parsed and prepare first,
     * otherwise ti will be done for each iteration of regions.
    List<Object> source = options.getList("source", null);
    BasicDBList sourceIds = new BasicDBList();
    if (source != null && source.size() > 0) {

    //        List<Region> regions = Region.parseRegions(options.getString("region"));
    List<String> ids = new ArrayList<>(regions.size());
    for (Region region : regions) {
        if (region != null && !region.equals("")) {
            // If regions is 1 position then query can be optimize using chunks
            if (region.getStart() == region.getEnd()) {
                String chunkId = getChunkIdPrefix(region.getChromosome(), region.getStart(),
                builder = QueryBuilder.start("_chunkIds").is(chunkId).and("end")
            } else {
                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")

            if (sourceIds != null && sourceIds.size() > 0) {
                builder = builder.and("source").in(sourceIds);

    return executeQueryList(ids, queries, options, db.getCollection("variation_phenotype_annotation"));

From source file:org.opencb.cellbase.mongodb.db.VariationMongoDBAdaptor.java

License:Apache License

public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) {
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());

    String phenotype = options.getString("phenotype");
    if (phenotype != null && !phenotype.equals("")) {
        for (Region region : regions) {
            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start")
            builder = builder.and("phenotype").is(phenotype);
            queries.add(builder.get());//from  w ww  .  j ava  2 s.c  om
        return executeQueryList(ids, queries, options, db.getCollection("variation_phenotype_annotation"));
    } else {
        String consequenceTypes = options.getString("consequence_type", null);
        BasicDBList consequenceTypeDBList = new BasicDBList();
        if (consequenceTypes != null && !consequenceTypes.equals("")) {
            for (String ct : consequenceTypes.split(",")) {

        for (Region region : regions) {
            //         QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getSequenceName()).and("end").greaterThan(region.getStart()).and("start").lessThan(region.getEnd());
            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start")
            if (consequenceTypeDBList.size() > 0) {
                builder = builder.and("transcriptVariations.consequenceTypes").in(consequenceTypeDBList);

        return executeQueryList(ids, queries, options);

From source file:org.opencb.cellbase.mongodb.db.XRefsMongoDBAdaptor.java

License:Apache License

public List<QueryResult> getAllByDBNameList(List<String> ids, QueryOptions options) {

    // Mel de romer
    //{$match: {"transcripts.xrefs.id": "ENST00000544455"}},
    //{$unwind: "$transcripts"},
    //{$unwind: "$transcripts.xrefs"},
    //{$match: {"transcripts.xrefs.dbNameShort":{$in:["go"]}}},
    //{$group:{_id:{id:"$transcripts.xrefs.id", dbNameShort:"$transcripts.xrefs.dbNameShort", description:"$transcripts.xrefs.description"}}},

    // Biotype if gene given: db.core.find({"transcripts.xrefs.id": "BRCA2"}, {"biotype":1})
    // Biotype if protein/transcript given: db.core.aggregate({$match: {"transcripts.xrefs.id": "ENST00000470094"}}, {$unwind: "$transcripts"}, {$match: {"transcripts.xrefs.id": "ENST00000470094"}}, {$group:{_id:{biotype:"$transcripts.biotype"}}}, {$project:{"transcripts.biotype":1}})
    List<DBObject[]> commandsList = new ArrayList<>(ids.size());
    for (String id : ids) {
        List<DBObject> commands = new ArrayList<>(ids.size());

        DBObject match = new BasicDBObject("$match", new BasicDBObject("transcripts.xrefs.id", id));
        DBObject unwind = new BasicDBObject("$unwind", "$transcripts");
        DBObject unwind2 = new BasicDBObject("$unwind", "$transcripts.xrefs");

        commands.add(match);//  w  ww. j a v  a2s  .  c  om

        //Check dbname option exists
        List<Object> list = options.getList("dbname", null);
        if (list != null && list.size() > 0) {
            BasicDBList dbnameDBList = new BasicDBList();
            DBObject dbnameMatch = new BasicDBObject("$match",
                    new BasicDBObject("transcripts.xrefs.dbName", new BasicDBObject("$in", dbnameDBList)));

        DBObject group = new BasicDBObject("$group", new BasicDBObject("_id",
                new BasicDBObject("id", "$transcripts.xrefs.id").append("dbName", "$transcripts.xrefs.dbName")
                        .append("dbDisplayName", "$transcripts.xrefs.dbDisplayName")
                        .append("description", "$transcripts.xrefs.description")));

        DBObject project = new BasicDBObject("$project",
                new BasicDBObject("_id", 0).append("id", "$_id.id").append("dbName", "$_id.dbName")
                        .append("dbDisplayName", "$_id.dbDisplayName")
                        .append("description", "$_id.description"));

        //ArrayList to array
        DBObject[] commandsArray = commands.toArray(new DBObject[0]);

    return executeAggregationList(ids, commandsList, options);

From source file:org.opencb.cellbase.mongodb.impl.ConservationMongoDBAdaptor.java

License:Apache License

@Deprecated/* www.ja  va  2  s.  c o m*/
public List<QueryResult> getAllScoresByRegionList(List regionList, QueryOptions options) {
    //TODO not finished yet
    List<Document> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regionList.size());
    List<Integer> integerChunkIds;

    List<Region> regions = regionList;
    for (Region region : regions) {
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
        if (region.getEnd() < 1) {

        QueryBuilder builder;
        int regionChunkStart = getChunkId(region.getStart(),
        int regionChunkEnd = getChunkId(region.getEnd(),
        if (regionChunkStart == regionChunkEnd) {
            builder = QueryBuilder.start("_chunkIds").is(getChunkIdPrefix(region.getChromosome(),
                    region.getStart(), MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE));
        } else {
            //                for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            //                    integerChunkIds.add(chunkId);
            //                }
            //    //            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosomeInfo()).and("chunkId").in(hunkIds);
            //                builder = QueryBuilder.start("chromosome").is(region.getChromosomeInfo()).and("chunkId").in(integerChunkIds);
            builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")

        queries.add(new Document(builder.get().toMap()));

        //            logger.debug(builder.get().toString());

    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    //        List<QueryResult> queryResults = executeQueryList(ids, queries, options);

    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);
        List<Document> list = (List<Document>) queryResult.getResult();

        Map<String, List<Float>> typeMap = new HashMap();

        //            int start = region.getStart();

        for (int j = 0; j < list.size(); j++) {
            Document chunk = list.get(j);

            if (!chunk.isEmpty()) {
                //                    BasicDBList valuesChunk = (BasicDBList) chunk.get("values");
                ArrayList valuesChunk = chunk.get("values", ArrayList.class);

                if (valuesChunk != null) { // TODO: temporary patch to skip empty chunks - remove as soon as conservation is reloaded
                    String source = chunk.getString("source");
                    List<Float> valuesList;
                    if (!typeMap.containsKey(source)) {
                        valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1);
                        for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) {
                        typeMap.put(source, valuesList);
                    } else {
                        valuesList = typeMap.get(source);

                    //                        valuesChunk = (BasicDBList) chunk.get("values");
                    valuesChunk = chunk.get("values", ArrayList.class);
                    int pos = 0;
                    if (region.getStart() > chunk.getInteger("start")) {
                        pos = region.getStart() - chunk.getInteger("start");

                    for (; pos < valuesChunk.size()
                            && (pos + chunk.getInteger("start") <= region.getEnd()); pos++) {
                        valuesList.set(pos + chunk.getInteger("start") - region.getStart(),
                                new Float((Double) valuesChunk.get(pos)));
                } else {


            BasicDBList resultList = new BasicDBList();
            for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) {
                for (Float value : elem.getValue()) {
                    if (value != null) {
                        resultList.add(new Score(new Double(value), elem.getKey()));
            if (!resultList.isEmpty()) {
            } else {

    return queryResults;

From source file:org.opencb.cellbase.mongodb.impl.ProteinMongoDBAdaptor.java

License:Apache License

public QueryResult<ProteinVariantAnnotation> getVariantAnnotation(String ensemblTranscriptId, int position,
        String aaReference, String aaAlternate, QueryOptions options) {
    QueryResult<ProteinVariantAnnotation> queryResult = new QueryResult<>();
    queryResult.setId(ensemblTranscriptId + "/" + position + "/" + aaAlternate);
    long dbTimeStart = System.currentTimeMillis();

    ProteinVariantAnnotation proteinVariantAnnotation = new ProteinVariantAnnotation();
    //        proteinVariantAnnotation.setSubstitutionScores(getProteinSubstitutionScores(ensemblTranscriptId, position, aaAlternate));
    Query query = new Query("transcript", ensemblTranscriptId).append("position", position).append("aa",
            aaAlternate);//  w ww. j  a  v  a 2 s . c  om
    //        try {
    //            if (ensemblTranscriptId.equals("ENST00000383037") || ensemblTranscriptId.equals("ENST00000428666")) {
    //                int a = 1;
    //            }
    proteinVariantAnnotation.setSubstitutionScores(getSubstitutionScores(query, null).getResult());
    //        } catch (Exception e) {
    //            int a = 1;
    //        }

    QueryResult proteinVariantData = null;
    String shortAlternativeAa = aaShortNameMap.get(aaAlternate);
    if (shortAlternativeAa != null) {
        List<Bson> pipeline = new ArrayList<>();

        //            BasicDBList andDBList1 = new BasicDBList();
        //            andDBList1.add(new Document("dbReference.id", ensemblTranscriptId));
        //            andDBList1.add(new Document("feature.location.position.position", position));
        //            andDBList1.add(new Document("feature.variation", shortAlternativeAa));
        //            pipeline.add(new Document("$match", new Document("$and", andDBList1)));

        pipeline.add(new Document("$match", new Document("dbReference.id", ensemblTranscriptId)));

        Document projection = new Document();
        projection.put("accession", 1);
        projection.put("keyword", 1);
        projection.put("feature", 1);
        pipeline.add(new Document("$project", projection));

        pipeline.add(new Document("$unwind", "$feature"));

        BasicDBList andDBList2 = new BasicDBList();
        andDBList2.add(new Document("feature.location.position.position", position));
        andDBList2.add(new Document("feature.variation", shortAlternativeAa));
        Document firstOr = new Document("$and", andDBList2);
        BasicDBList andDBList3 = new BasicDBList();
        andDBList3.add(new Document("feature.location.end.position", new Document("$gte", position)));
        andDBList3.add(new Document("feature.location.begin.position", new Document("$lte", position)));
        Document secondOr = new Document();
        secondOr.put("$and", andDBList3);
        BasicDBList orList = new BasicDBList();
        pipeline.add(new Document("$match", new Document("$or", orList)));
        //            pipeline.add(new Document("$match", firstOr));
        Document groupFields = new Document();
        groupFields.put("_id", "$accession");
        groupFields.put("keyword", new Document("$addToSet", "$keyword"));
        groupFields.put("feature", new Document("$addToSet", "$feature"));
        pipeline.add(new Document("$group", groupFields));

        //TODO:terminar el pipeline de agregacion
        //            QueryBuilder builder = QueryBuilder.start("dbReference.id").is(ensemblTranscriptId)
        //                    .and("feature.location.position.position").is(position)
        //                    .and("feature.variation").is(shortAlternativeAa);
        //            Document firstOr = new Document();
        //            firstOr.put("location.position.position", position);
        //            firstOr.put("variation", shortAlternativeAa);
        //            BasicDBList andList = new BasicDBList();
        //            andList.add(new Document("location.end.position", new Document("$gte", position)));
        //            andList.add(new Document("location.begin.position", new Document("$lte", position)));
        //            Document secondOr = new Document();
        //            secondOr.put("$and", andList);
        //            BasicDBList orList = new BasicDBList();
        //            orList.add(firstOr);
        //            orList.add(secondOr);
        //            Document elemMatch = new Document();
        //            elemMatch.put("$elemMatch", new Document("$or", orList));
        //            Document projection = new Document();
        //            projection.put("feature", elemMatch);
        //            QueryOptions localQueryOptions = new QueryOptions();
        //            localQueryOptions.put("elemMatch",projection);
        //            localQueryOptions.put("include","accession,keyword,feature");
        //            proteinVariantData = executeQuery(ensemblTranscriptId + "_" + String.valueOf(position) + "_"
        //                            + aaAlternate, new Document(builder.get().toMap()), localQueryOptions);
        proteinVariantData = executeAggregation2(
                ensemblTranscriptId + "_" + String.valueOf(position) + "_" + aaAlternate, pipeline,
                new QueryOptions());
        if (proteinVariantData.getNumResults() > 0) {
            proteinVariantAnnotation = processProteinVariantData(proteinVariantAnnotation, shortAlternativeAa,
                    (Document) proteinVariantData.getResult().get(0));

    long dbTimeEnd = System.currentTimeMillis();
    queryResult.setDbTime(Long.valueOf(dbTimeEnd - dbTimeStart).intValue());

    //        if (proteinVariantAnnotation.getSubstitutionScores() != null || proteinVariantAnnotation.getUniprotAccession() != null) {
    //        }
    return queryResult;

From source file:org.opencb.cellbase.mongodb.loader.converters.VariantEffectConverter.java

License:Apache License

public DBObject convertToStorageSchema(VariantAnnotation variantAnnotation) {
    BasicDBObject mongoDbSchema = new BasicDBObject("chr", variantAnnotation.getChromosome())
            .append("start", variantAnnotation.getStart()).append("end", variantAnnotation.getEnd())
            .append("ref", variantAnnotation.getReferenceAllele());

    // All consequence types for the different ALT are stored
    // in an array. A compression using '*' is implemented.
    BasicDBList consequenceTypeSchemaList = new BasicDBList();

    // 'keys' contains all the ALT alleles simulated
    Set<String> keys = variantAnnotation.getEffects().keySet();
    Iterator<String> iterator = keys.iterator();

    // Note://from  w  w w.ja  v a2  s.  com
    // During the simulation 2 types of variants are simulated:
    // - common: which means only one allele is simulated since no different results are produced by VEP. This is
    //          the case of UPSTREAM, DOWNSTREAM and INTRONS variants
    // - all: the 3 possible SNV and '-' are simulated. This is done for EXONS and REGULATORY regions
    //        System.out.println("num. alleles:\t"+keys.size());
    //        if(keys.size() == 3) {
    //            System.out.println("==3: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n");
    //        }
    //        if(keys.size() == 2 && !keys.contains("T") && !keys.contains("C")) {
    //            System.out.println("==2: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n");
    //        }

    // If the different allele at this genomic position can produce different results the 4 possible SNV ALT alleles
    // are pre-computed: one of these A, C, G or T; and '-'
    // No more are possible can be found (in theory), but maybe some funny SNPs like AA/TTA exist so
    // if "size >= 4" then all possible values are stored.
    if (keys.size() >= 4) {
        //            System.out.println(">=4: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n");
        while (iterator.hasNext()) {
            String key = iterator.next();
            List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key);

            BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key);
            BasicDBList consequenceTypeDBList = new BasicDBList();
            for (VariantEffect consequenceType : consequenceTypes) {
                BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType);
            consequenceTypeDBObject.append("val", consequenceTypeDBList);

            // Add Consequence Type object of this ALT to the List
        // If less than the 4 ALT alleles are found means that either 1 ALT allele and the '-' allele were pre-computed
        // (to minimize the number of calls to VEP), or a few multiallelic SNPs were found at that position.
        // ALT allele must be stored as '*' allele when possible to save space.
    } else {
        boolean commonFound = false;
        while (iterator.hasNext()) {
            String key = iterator.next();
            if (key.equals("-")) {
                List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key);

                BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key);
                BasicDBList consequenceTypeDBList = new BasicDBList();
                for (VariantEffect consequenceType : consequenceTypes) {
                    BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType);
                consequenceTypeDBObject.append("val", consequenceTypeDBList);

                // Add Consequence Type object of this ALT to the List
            } else {
                // Only the first non '-' is stored using '*'
                if (!commonFound) {
                    commonFound = true;
                    List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key);

                    BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", "*");
                    BasicDBList consequenceTypeDBList = new BasicDBList();
                    for (VariantEffect consequenceType : consequenceTypes) {
                        // HGVS must be encoded with '*'
                        if (consequenceType.getHgvsc() != null) {
                            if (consequenceType.getFeatureStrand().equals("1")) {
                                String s = consequenceType.getHgvsc().replace(">" + key, ">*");
                            } else {
                                String complement = "";
                                switch (key) {
                                case "A":
                                    complement = "T";
                                case "T":
                                    complement = "A";
                                case "C":
                                    complement = "G";
                                case "G":
                                    complement = "C";
                                String s = consequenceType.getHgvsc().replace(">" + complement, ">*");
                        BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(
                    consequenceTypeDBObject.append("val", consequenceTypeDBList);

                    // Add Consequence Type object of this ALT to the List

        //            // If '-' is found then an allele independent position has been found and 1 allele and '-' are expected
        //            if(keys.contains("-")) {
        //                System.out.println("if: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n");
        ////                System.out.println(keys+" "+variantAnnotation.getStart()+"-"+variantAnnotation.getEnd());
        //                boolean commonFound = false;
        //                while(iterator.hasNext()) {
        //                    String key = iterator.next();
        //                    if(key.equals("-")) {
        //                        List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key);
        //                        BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key);
        //                        BasicDBList consequenceTypeDBList = new BasicDBList();
        //                        for(VariantEffect consequenceType: consequenceTypes) {
        //                            BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType);
        //                            consequenceTypeDBList.add(consequenceTypeItemDBObject);
        //                        }
        //                        consequenceTypeDBObject.append("val", consequenceTypeDBList);
        //                        // Add Consequence Type object of this ALT to the List
        //                        consequenceTypeSchemaList.add(consequenceTypeDBObject);
        //                    }else {
        //                        // Only the first non '-' is stored using '*'
        //                        if(!commonFound) {
        //                            commonFound = true;
        //                            List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key);
        //                            BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", "*");
        //                            BasicDBList consequenceTypeDBList = new BasicDBList();
        //                            for(VariantEffect consequenceType: consequenceTypes) {
        //                                // HGVS must be encoded with '*'
        //                                if(consequenceType.getHgvsc() != null) {
        //                                    if(consequenceType.getFeatureStrand().equals("1")) {
        //                                        String s = consequenceType.getHgvsc().replace(">"+key, ">*");
        //                                        consequenceType.setHgvsc(s);
        //                                    }else {
        //                                        String complement = "";
        //                                        switch (key) {
        //                                            case "A": complement = "T"; break;
        //                                            case "T": complement = "A"; break;
        //                                            case "C": complement = "G"; break;
        //                                            case "G": complement = "C"; break;
        //                                        }
        //                                        String s = consequenceType.getHgvsc().replace(">"+complement, ">*");
        //                                        consequenceType.setHgvsc(s);
        //                                    }
        //                                }
        //                                BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType);
        //                                consequenceTypeDBList.add(consequenceTypeItemDBObject);
        //                            }
        //                            consequenceTypeDBObject.append("val", consequenceTypeDBList);
        //                            // Add Consequence Type object of this ALT to the List
        //                            consequenceTypeSchemaList.add(consequenceTypeDBObject);
        //                        }
        //                    }
        //                }
        //            }
        //            // If no '-' is found we process all of them.
        //            else {
        //                // 1077844
        //                System.out.println("else: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n");
        //                while(iterator.hasNext()) {
        //                    String key = iterator.next();
        //                    List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key);
        //                    BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key);
        //                    BasicDBList consequenceTypeDBList = new BasicDBList();
        //                    for(VariantEffect consequenceType: consequenceTypes) {
        //                        BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType);
        //                        consequenceTypeDBList.add(consequenceTypeItemDBObject);
        //                    }
        //                    consequenceTypeDBObject.append("val", consequenceTypeDBList);
        //                    // Add Consequence Type object of this ALT to the List
        //                    consequenceTypeSchemaList.add(consequenceTypeDBObject);
        //                }
        //            }
    mongoDbSchema.append("eff", consequenceTypeSchemaList);

    // Parsing Frequencies from VEP
    BasicDBObject frequencyDBObject = parseFrequencies(variantAnnotation.getFrequencies());
    if (frequencyDBObject.size() != 0) {
        mongoDbSchema.append("freqs", frequencyDBObject);

    // Parsing ProteinSubstitutionScores from VEP
    BasicDBObject proteinSubstitutionScoresDBObject = parseProteinSubstituionScores(
    if (proteinSubstitutionScoresDBObject.size() != 0) {
        mongoDbSchema.append("pss", proteinSubstitutionScoresDBObject);

    // Parsing ProteinSubstitutionScores from VEP
    BasicDBObject regulatoryEffectDBObject = parseRegulatoryEffect(variantAnnotation.getRegulatoryEffect());
    if (regulatoryEffectDBObject.size() != 0) {
        mongoDbSchema.append("reg", regulatoryEffectDBObject);

    // Parsing ProteinSubstitutionScores from VEP
    //        BasicDBObject genesDBObject = parseGenes(variantAnnotation.getEffects());
    //        if(genesDBObject.size() != 0) {
    //            mongoDbSchema.append("gn", genesDBObject);
    //        }

    return mongoDbSchema;

From source file:org.opencb.opencga.storage.mongodb.alignment.BamManager.java

License:Apache License

public String getByRegion(Path fullFilePath, String regionStr, Map<String, List<String>> params)
        throws IOException {
    long totalTime = System.currentTimeMillis();

    Region region = Region.parseRegion(regionStr);
    String chr = region.getChromosome();
    int start = region.getStart();
    int end = region.getEnd();

    logger.info("chr: " + chr + " start: " + start + " end: " + end);

    if (params.get("cellbasehost") != null) {
        cellbasehost = params.get("cellbasehost").get(0);
        if (cellbasehost.equals("")) {
            return "{'error':'cellbase host not valid'}";
        }/*from  w  w w  . j  av  a2  s.c  o m*/
    if (params.get("species") != null) {
        species = params.get("species").get(0);
        if (species.equals("")) {
            return "{'error':'species not valid'}";

    Boolean viewAsPairs = false;
    if (params.get("view_as_pairs") != null) {
        viewAsPairs = Boolean.parseBoolean(params.get("view_as_pairs").get(0));
    Boolean showSoftclipping = false;
    if (params.get("show_softclipping") != null) {
        showSoftclipping = Boolean.parseBoolean(params.get("show_softclipping").get(0));
    Boolean histogram = false;
    if (params.get("histogram") != null) {
        histogram = Boolean.parseBoolean(params.get("histogram").get(0));
    int interval = 200000;
    if (params.get("interval") != null) {
        interval = Integer.parseInt(params.get("interval").get(0));

    File inputBamFile = new File(fullFilePath.toString());
    File inputBamIndexFile = new File(fullFilePath + ".bai");

    if (inputBamIndexFile == null) {
        logger.info("BamManager: " + "creating bam index for: " + fullFilePath);
        // createIndex(inputBamFile, inputBamIndexFile);
        return "{error:'no index found'}";

    long t = System.currentTimeMillis();
    SAMFileReader inputSam = new SAMFileReader(inputBamFile, inputBamIndexFile);
    System.out.println("new SamFileReader in " + (System.currentTimeMillis() - t) + "ms");
    System.out.println("hasIndex " + inputSam.hasIndex());

    t = System.currentTimeMillis();
    SAMRecordIterator recordsFound = inputSam.query(chr, start, end, false);
    System.out.println("query SamFileReader in " + (System.currentTimeMillis() - t) + "ms");

    ArrayList<SAMRecord> records = new ArrayList<SAMRecord>();
    t = System.currentTimeMillis();
    while (recordsFound.hasNext()) {
        SAMRecord record = recordsFound.next();
    System.out.println(records.size() + " elements added in: " + (System.currentTimeMillis() - t) + "ms");

     * Check histogram
    if (histogram) {
        int numIntervals = (region.getEnd() - region.getStart()) / interval + 1;
        System.out.println("numIntervals :" + numIntervals);
        int[] intervalCount = new int[numIntervals];

        for (SAMRecord record : records) {
            //            System.out.println("---*-*-*-*-" + numIntervals);
            //            System.out.println("---*-*-*-*-" + record.getAlignmentStart());
            //            System.out.println("---*-*-*-*-" + interval);
            if (record.getAlignmentStart() >= region.getStart()
                    && record.getAlignmentStart() <= region.getEnd()) {
                int intervalIndex = (record.getAlignmentStart() - region.getStart()) / interval; // truncate
                //               System.out.print(intervalIndex + " ");

        int intervalStart = region.getStart();
        int intervalEnd = intervalStart + interval - 1;
        BasicDBList intervalList = new BasicDBList();
        for (int i = 0; i < numIntervals; i++) {
            BasicDBObject intervalObj = new BasicDBObject();
            intervalObj.put("start", intervalStart);
            intervalObj.put("end", intervalEnd);
            intervalObj.put("interval", i);
            intervalObj.put("defaultValue", intervalCount[i]);
            intervalStart = intervalEnd + 1;
            intervalEnd = intervalStart + interval - 1;

        return intervalList.toString();

    t = System.currentTimeMillis();
    String forwardSequence = getSequence(chr, start, end);
    String reverseSequence = revcomp(forwardSequence);
    // System.out.println(forwardSequence);
    // System.out.println(reverseSequence);
    System.out.println("Get genome sequence in " + (System.currentTimeMillis() - t) + "ms");
    short[] coverageArray = new short[end - start + 1];
    short[] aBaseArray = new short[end - start + 1];
    short[] cBaseArray = new short[end - start + 1];
    short[] gBaseArray = new short[end - start + 1];
    short[] tBaseArray = new short[end - start + 1];

    if (viewAsPairs) {
        t = System.currentTimeMillis();
        Collections.sort(records, new Comparator<SAMRecord>() {
            public int compare(SAMRecord o1, SAMRecord o2) {
                if (o1 != null && o1.getReadName() != null && o2 != null) {
                    return o1.getReadName().compareTo(o2.getReadName());
                return -1;
        System.out.println(records.size() + " elements sorted in: " + (System.currentTimeMillis() - t) + "ms");

    t = System.currentTimeMillis();
    StringBuilder sb = new StringBuilder();
    StringBuilder attrString;
    String readStr;
    int readPos;
    //        logger.info("Processing SAM records");
    for (SAMRecord record : records) {
        //            logger.info(record.getReadName());

        Boolean condition = (!record.getReadUnmappedFlag());
        if (condition) {
            attrString = new StringBuilder();
            for (SAMRecord.SAMTagAndValue attr : record.getAttributes()) {
                attrString.append("\"" + attr.tag + "\":\""
                        + attr.value.toString().replace("\\", "\\\\").replace("\"", "\\\"") + "\",");
            // Remove last comma
            if (attrString.length() > 1) {
                attrString.replace(attrString.length() - 1, attrString.length(), "");

            readStr = record.getReadString();

            if (true) {// TEST
                // if(record.getReadNegativeStrandFlag()
                // ){
                // if(record.getReadName().equals("SRR081241.8998181") ||
                // record.getReadName().equals("SRR081241.645807")
                // ){

                // System.out.println("#############################################################################################################################################");
                // System.out.println("#############################################################################################################################################");
                // System.out.println("Unclipped Start:"+(record.getUnclippedStart()-start));
                // System.out.println("Unclipped End:"+(record.getUnclippedEnd()-start+1));
                // System.out.println(record.getCigarString()+"   Alig Length:"+(record.getAlignmentEnd()-record.getAlignmentStart()+1)+"   Unclipped length:"+(record.getUnclippedEnd()-record.getUnclippedStart()+1));

                String refStr = forwardSequence.substring((500 + record.getUnclippedStart() - start),
                        (500 + record.getUnclippedEnd() - start + 1));

                // System.out.println("refe:"+refStr+"  refe.length:"+refStr.length());
                // System.out.println("read:"+readStr+"  readStr.length:"+readStr.length()+"   getReadLength:"+record.getReadLength());
                StringBuilder diffStr = new StringBuilder();

                int index = 0;
                int indexRef = 0;
                // System.out.println(jsonObjectMapper.toJson(record.getCigar().getCigarElements()));

                //                    logger.info("checking cigar: " + record.getCigar().toString());
                for (int i = 0; i < record.getCigar().getCigarElements().size(); i++) {
                    CigarElement cigarEl = record.getCigar().getCigarElement(i);
                    CigarOperator cigarOp = cigarEl.getOperator();
                    int cigarLen = cigarEl.getLength();
                    //                        logger.info(cigarOp + " found" + " index:" + index + " indexRef:" + indexRef + " cigarLen:" + cigarLen);

                    if (cigarOp == CigarOperator.M || cigarOp == CigarOperator.EQ
                            || cigarOp == CigarOperator.X) {
                        String subref = refStr.substring(indexRef, indexRef + cigarLen);
                        String subread = readStr.substring(index, index + cigarLen);
                        diffStr.append(getDiff(subref, subread));
                        index = index + cigarLen;
                        indexRef = indexRef + cigarLen;
                    if (cigarOp == CigarOperator.I) {
                        diffStr.append(readStr.substring(index, index + cigarLen).toLowerCase());
                        index = index + cigarLen;
                        // TODO save insertions
                    if (cigarOp == CigarOperator.D) {
                        for (int bi = 0; bi < cigarLen; bi++) {
                        indexRef = indexRef + cigarLen;
                    if (cigarOp == CigarOperator.N) {
                        for (int bi = 0; bi < cigarLen; bi++) {
                        indexRef = indexRef + cigarLen;
                    if (cigarOp == CigarOperator.S) {
                        if (showSoftclipping) {
                            String subread = readStr.substring(index, index + cigarLen);
                            index = index + cigarLen;
                            indexRef = indexRef + cigarLen;
                        } else {
                            for (int bi = 0; bi < cigarLen; bi++) {
                                diffStr.append(" ");
                            index = index + cigarLen;
                            indexRef = indexRef + cigarLen;
                    if (cigarOp == CigarOperator.H) {
                        for (int bi = 0; bi < cigarLen; bi++) {
                        indexRef = indexRef + cigarLen;
                    if (cigarOp == CigarOperator.P) {
                        for (int bi = 0; bi < cigarLen; bi++) {
                        indexRef = indexRef + cigarLen;
                    // if(cigarOp == CigarOperator.EQ) {
                    // }
                    // if(cigarOp == CigarOperator.X) {
                    // }
                // System.out.println("diff:"+diffStr);
                String empty = diffStr.toString().replace(" ", "");
                // System.out.println("diff:"+diffStr);

                sb.append("\"start\":" + record.getAlignmentStart() + ",");
                sb.append("\"end\":" + record.getAlignmentEnd() + ",");
                sb.append("\"unclippedStart\":" + record.getUnclippedStart() + ",");
                sb.append("\"unclippedEnd\":" + record.getUnclippedEnd() + ",");
                sb.append("\"chromosome\":\"" + chr + "\",");
                sb.append("\"flags\":\"" + record.getFlags() + "\",");// with
                // flags
                // the
                // strand
                // will
                // be
                // calculated
                sb.append("\"cigar\":\"" + record.getCigarString() + "\",");
                sb.append("\"name\":\"" + record.getReadName() + "\",");
                sb.append("\"blocks\":\"" + record.getAlignmentBlocks().get(0).getLength() + "\",");

                sb.append("\"attributes\":" + attrString.toString() + ",");

                // sb.append("\"readGroupId\":\""+record.getReadGroup().getId()+"\",");
                // sb.append("\"readGroupPlatform\":\""+record.getReadGroup().getPlatform()+"\",");
                // sb.append("\"readGroupLibrary\":\""+record.getReadGroup().getLibrary()+"\",");
                sb.append("\"referenceName\":\"" + record.getReferenceName() + "\",");
                        + record.getBaseQualityString().replace("\\", "\\\\").replace("\"", "\\\"") + "\",");// the
                // "
                // char
                // unables
                // parse
                // from
                // javascript
                // sb.append("\"baseQualityString\":\""+jsonObjectMapper.toJson(baseQualityArray)+"\",");//
                // the " char unables parse from javascript
                sb.append("\"header\":\"" + record.getHeader().toString() + "\",");
                sb.append("\"readLength\":" + record.getReadLength() + ",");
                sb.append("\"mappingQuality\":" + record.getMappingQuality() + ",");

                sb.append("\"mateReferenceName\":\"" + record.getMateReferenceName() + "\",");
                sb.append("\"mateAlignmentStart\":" + record.getMateAlignmentStart() + ",");
                sb.append("\"inferredInsertSize\":" + record.getInferredInsertSize() + ",");

                if (!empty.isEmpty()) {
                    sb.append("\"diff\":\"" + diffStr + "\",");

                sb.append("\"read\":\"" + readStr + "\"");

            } // IF TEST BY READ NAME

            //                logger.info("Creating coverage array");
            // TODO cigar check for correct coverage calculation and
            int refgenomeOffset = 0;
            int readOffset = 0;
            int offset = record.getAlignmentStart() - start;
            for (int i = 0; i < record.getCigar().getCigarElements().size(); i++) {
                if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.M) {
                    //                        logger.info("start: "+start);
                    //                        logger.info("r a start: "+record.getAlignmentStart());
                    //                        logger.info("refgenomeOffset: "+refgenomeOffset);
                    //                        logger.info("r c lenght: "+record.getCigar().getCigarElement(i).getLength());
                    //                        logger.info(record.getAlignmentStart() - start + refgenomeOffset);
                    //                        logger.info("readStr: "+readStr.length());
                    //                        logger.info("readStr: "+readStr.length());

                    for (int j = record.getAlignmentStart() - start + refgenomeOffset, cont = 0; cont < record
                            .getCigar().getCigarElement(i).getLength(); j++, cont++) {
                        if (j >= 0 && j < coverageArray.length) {
                            readPos = j - offset;
                            // if(record.getAlignmentStart() == 32877696){
                            // System.out.println(i-(record.getAlignmentStart()-start));
                            // System.out.println(record.getAlignmentStart()-start);
                            // }
                            // System.out.print(" - "+(cont+readOffset));
                            // System.out.print("|"+readStr.length());
                            int total = cont + readOffset;
                            // if(total < readStr.length()){
                            //                                logger.info(readStr.length());
                            switch (readStr.charAt(total)) {
                            case 'A':
                            case 'C':
                            case 'G':
                            case 'T':
                            // }
                if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.I) {
                    readOffset += record.getCigar().getCigarElement(i).getLength() - 1;
                } else if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.D) {
                    refgenomeOffset += record.getCigar().getCigarElement(i).getLength() - 1;
                } else if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.H) {
                    //Ignored Hardclipping and do not update offset pointers
                } else {
                    refgenomeOffset += record.getCigar().getCigarElement(i).getLength() - 1;
                    readOffset += record.getCigar().getCigarElement(i).getLength() - 1;
                //                    if (record.getCigar().getCigarElement(i).getOperator() != CigarOperator.I) {
                //                    } else if(){
                //                    }
            //                logger.info("coverage array created");
        //            logger.info(" ");

    // Remove last comma
    int sbLength = sb.length();
    int sbLastPos = sbLength - 1;
    if (sbLength > 1 && sb.charAt(sbLastPos) == ',') {
        sb.replace(sbLastPos, sbLength, "");

    // //FIXME
    // sb.append("]");
    // sb.append(",\"coverage\":"+jsonObjectMapper.toJson(coverageArray));
    // sb.append("}");

    // FIXME
    sb.append(",\"coverage\":{\"all\":" + jsonObjectWriter.writeValueAsString(coverageArray));
    sb.append(",\"a\":" + jsonObjectWriter.writeValueAsString(aBaseArray));
    sb.append(",\"c\":" + jsonObjectWriter.writeValueAsString(cBaseArray));
    sb.append(",\"g\":" + jsonObjectWriter.writeValueAsString(gBaseArray));
    sb.append(",\"t\":" + jsonObjectWriter.writeValueAsString(tBaseArray));

    String json = sb.toString();
    System.out.println("Result String created in " + (System.currentTimeMillis() - t) + "ms");

    // IOUtils.write("/tmp/dqslastgetByRegionCall", json);


    System.out.println("TOTAL " + (System.currentTimeMillis() - totalTime) + "ms");
    return json;