Example usage for com.mongodb BasicDBObject get

List of usage examples for com.mongodb BasicDBObject get

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject get.

Prototype

public Object get(final String key) 

Source Link

Document

Gets a value from this object

Usage

From source file:org.opencb.cellbase.mongodb.db.ConservationMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllScoresByRegionList(List<Region> regions, QueryOptions options) {
    //TODO not finished yet
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);/*from   w w  w .  j av a  2s .c  o m*/
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        QueryBuilder builder;
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        if (regionChunkStart == regionChunkEnd) {
            builder = QueryBuilder.start("_chunkIds")
                    .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), chunkSize));
        } else {
            //                for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            //                    integerChunkIds.add(chunkId);
            //                }
            //    //            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds);
            //                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds);
            builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")
                    .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd());
        }
        /****/

        queries.add(builder.get());
        ids.add(region.toString());

        logger.info(builder.get().toString());

    }
    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    //        List<QueryResult> queryResults = executeQueryList(ids, queries, options);

    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);
        List<BasicDBObject> list = (List<BasicDBObject>) queryResult.getResult();

        Map<String, List<Float>> typeMap = new HashMap();

        //            int start = region.getStart();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            String type = chunk.getString("type");
            List<Float> valuesList;
            if (!typeMap.containsKey(type)) {
                valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1);
                for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) {
                    valuesList.add(null);
                }
                typeMap.put(type, valuesList);
            } else {
                valuesList = typeMap.get(type);
            }

            BasicDBList valuesChunk = (BasicDBList) chunk.get("values");

            int pos = 0;
            if (region.getStart() > chunk.getInt("start")) {
                pos = region.getStart() - chunk.getInt("start");
            }

            for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) {
                valuesList.set(pos + chunk.getInt("start") - region.getStart(),
                        new Float((Double) valuesChunk.get(pos)));
            }
        }

        BasicDBList resultList = new BasicDBList();
        for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) {
            for (Float value : elem.getValue()) {
                resultList.add(value != null ? (new Score(new Double(value), elem.getKey())) : null);
            }
        }
        queryResult.setResult(resultList);
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.core.ConservationMongoDBAdaptor.java

License:Apache License

@Deprecated
@Override//  w  w  w. j av  a 2 s  .  c om
public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) {
    //TODO not finished yet
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<String> integerChunkIds;
    for (Region region : regions) {
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        // Max region size is 10000bp
        if (region.getEnd() - region.getStart() > 10000) {
            region.setEnd(region.getStart() + 10000);
        }

        QueryBuilder builder;
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        if (regionChunkStart == regionChunkEnd) {
            builder = QueryBuilder.start("_chunkIds")
                    .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), this.chunkSize));
        } else {
            //                for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            ////                    integerChunkIds.add(chunkId);
            //                    integerChunkIds.add(region.getChromosome() + "_" + chunkId + "_" + this.chunkSize/1000 + "k");
            //                }
            //                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds);
            builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")
                    .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd());
        }
        //            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds);
        /****/

        queries.add(builder.get());
        ids.add(region.toString());

        logger.debug(builder.get().toString());
    }

    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);
        //            BasicDBList list = (BasicDBList) queryResult.getResult();
        List list = queryResult.getResult();

        Map<String, List<Float>> typeMap = new HashMap();

        //            int start = region.getStart();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            String type = chunk.getString("type");
            List<Float> valuesList;
            if (!typeMap.containsKey(type)) {
                valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1);
                for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) {
                    valuesList.add(null);
                }
                typeMap.put(type, valuesList);
            } else {
                valuesList = typeMap.get(type);
            }

            BasicDBList valuesChunk = (BasicDBList) chunk.get("values");

            int pos = 0;
            if (region.getStart() > chunk.getInt("start")) {
                pos = region.getStart() - chunk.getInt("start");
            }

            for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) {
                //                    System.out.println("valuesList SIZE = " + valuesList.size());
                //                    System.out.println("pos = " + pos);
                //                    System.out.println("DIV " + (chunk.getInt("start") - region.getStart()));
                //                    System.out.println("valuesChunk = " + valuesChunk.get(pos));
                //                    System.out.println("indexFinal = " + (pos + chunk.getInt("start") - region.getStart()));
                valuesList.set(pos + chunk.getInt("start") - region.getStart(),
                        new Float((Double) valuesChunk.get(pos)));
            }
        }
        //
        BasicDBList resultList = new BasicDBList();
        ConservedRegionFeature conservedRegionChunk;
        for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) {
            conservedRegionChunk = new ConservedRegionFeature(region.getChromosome(), region.getStart(),
                    region.getEnd(), elem.getKey(), elem.getValue());
            resultList.add(conservedRegionChunk);
        }
        queryResult.setResult(resultList);
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.core.ConservationMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllScoresByRegionList(List<Region> regions, QueryOptions options) {
    //TODO not finished yet
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);//from w ww .j av a 2 s.c o m
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        QueryBuilder builder;
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        if (regionChunkStart == regionChunkEnd) {
            builder = QueryBuilder.start("_chunkIds")
                    .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), chunkSize));
        } else {
            //                for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            //                    integerChunkIds.add(chunkId);
            //                }
            //    //            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds);
            //                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds);
            builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")
                    .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd());
        }
        /****/

        queries.add(builder.get());
        ids.add(region.toString());

        logger.debug(builder.get().toString());

    }
    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    //        List<QueryResult> queryResults = executeQueryList(ids, queries, options);

    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);
        List<BasicDBObject> list = (List<BasicDBObject>) queryResult.getResult();

        Map<String, List<Float>> typeMap = new HashMap();

        //            int start = region.getStart();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            String type = chunk.getString("type");
            List<Float> valuesList;
            if (!typeMap.containsKey(type)) {
                valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1);
                for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) {
                    valuesList.add(null);
                }
                typeMap.put(type, valuesList);
            } else {
                valuesList = typeMap.get(type);
            }

            BasicDBList valuesChunk = (BasicDBList) chunk.get("values");

            int pos = 0;
            if (region.getStart() > chunk.getInt("start")) {
                pos = region.getStart() - chunk.getInt("start");
            }

            for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) {
                valuesList.set(pos + chunk.getInt("start") - region.getStart(),
                        new Float((Double) valuesChunk.get(pos)));
            }
        }

        BasicDBList resultList = new BasicDBList();
        for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) {
            for (Float value : elem.getValue()) {
                if (value != null) {
                    resultList.add(new Score(new Double(value), elem.getKey()));
                }
            }
        }
        if (!resultList.isEmpty()) {
            queryResult.setResult(resultList);
        } else {
            queryResult.setResult(null);
        }
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.core.GenomeMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllSequencesByRegionList(List<Region> regions, QueryOptions options) {
    /****///w w w.  j a v a 2  s. co m
    String chunkIdSuffix = this.chunkSize / 1000 + "k";
    /****/

    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<String> chunkIds;
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        chunkIds = new ArrayList<>();
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            String chunkIdStr = region.getChromosome() + "_" + chunkId + "_" + chunkIdSuffix;
            chunkIds.add(chunkIdStr);
            integerChunkIds.add(chunkId);
        }
        //            QueryBuilder builder = QueryBuilder.start("sequenceName").is(region.getChromosome()).and("_chunkIds").in(chunkIds);
        QueryBuilder builder = QueryBuilder.start("_chunkIds").in(chunkIds);
        /****/
        queries.add(builder.get());
        ids.add(region.toString());

        logger.info(builder.get().toString());
    }

    List<QueryResult> queryResults = executeQueryList2(ids, queries, options, genomeSequenceCollection);
    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);

        List list = queryResult.getResult();
        StringBuilder sb = new StringBuilder();
        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            sb.append(chunk.get("sequence"));
        }

        int startStr = getOffset(region.getStart());
        int endStr = getOffset(region.getStart()) + (region.getEnd() - region.getStart()) + 1;

        String subStr = "";

        if (getChunkId(region.getStart(), this.chunkSize) > 0) {
            if (sb.toString().length() > 0 && sb.toString().length() >= endStr) {
                subStr = sb.toString().substring(startStr, endStr);
            }
        } else {
            if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) {
                subStr = sb.toString().substring(startStr - 1, endStr - 1);
            }
        }
        logger.info("((BasicDBObject)list.get(0)).getString(\"sequenceType\") = {}",
                ((BasicDBObject) list.get(0)).getString("sequenceType"));
        logger.info("((BasicDBObject)list.get(0)).getString(\"assembly\") = {}",
                ((BasicDBObject) list.get(0)).getString("assembly"));
        GenomeSequenceFeature genomeSequenceFeature = new GenomeSequenceFeature(region.getChromosome(),
                region.getStart(), region.getEnd(), 1, ((BasicDBObject) list.get(0)).getString("sequenceType"),
                ((BasicDBObject) list.get(0)).getString("assembly"), subStr);
        //            GenomeSequenceChunk genomeSequenceChunk = new GenomeSequenceChunk(region.getSequenceName(), region.getStart(), region.getEnd(), subStr);

        queryResult.setResult(Arrays.asList(genomeSequenceFeature));
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.GenomeSequenceMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) {
    /****///ww  w.ja va  2s. com
    String chunkIdSuffix = this.chunkSize / 1000 + "k";
    /****/

    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<String> chunkIds;
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        chunkIds = new ArrayList<>();
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        int regionChunkStart = getChunk(region.getStart());
        int regionChunkEnd = getChunk(region.getEnd());
        for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            String chunkIdStr = region.getChromosome() + "_" + chunkId + "_" + chunkIdSuffix;
            chunkIds.add(chunkIdStr);
            integerChunkIds.add(chunkId);
        }
        //            QueryBuilder builder = QueryBuilder.start("sequenceName").is(region.getChromosome()).and("_chunkIds").in(chunkIds);
        QueryBuilder builder = QueryBuilder.start("_chunkIds").in(chunkIds);
        /****/
        queries.add(builder.get());
        ids.add(region.toString());

        logger.info(builder.get().toString());
    }

    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);

        List list = queryResult.getResult();
        StringBuilder sb = new StringBuilder();
        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            sb.append(chunk.get("sequence"));
        }

        int startStr = getOffset(region.getStart());
        int endStr = getOffset(region.getStart()) + (region.getEnd() - region.getStart()) + 1;

        String subStr = "";

        if (getChunk(region.getStart()) > 0) {
            if (sb.toString().length() > 0 && sb.toString().length() >= endStr) {
                subStr = sb.toString().substring(startStr, endStr);
            }
        } else {
            if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) {
                subStr = sb.toString().substring(startStr - 1, endStr - 1);
            }
        }
        GenomeSequenceFeature genomeSequenceFeature = new GenomeSequenceFeature(region.getChromosome(),
                region.getStart(), region.getEnd(), 1, ((BasicDBObject) list.get(0)).getString("sequenceType"),
                ((BasicDBObject) list.get(0)).getString("assembly"), subStr);
        //            GenomeSequenceChunk genomeSequenceChunk = new GenomeSequenceChunk(region.getSequenceName(), region.getStart(), region.getEnd(), subStr);

        queryResult.setResult(Arrays.asList(genomeSequenceFeature));
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.regulatory.TfbsMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllByTargetGeneIdList(List<String> targetGeneIdList, QueryOptions options) {
    //        DBCollection coreMongoDBCollection = db.getCollection("gene");

    List<DBObject[]> commandList = new ArrayList<>();
    for (String targetGeneId : targetGeneIdList) {
        DBObject[] commands = new DBObject[3];
        DBObject match = new BasicDBObject("$match", new BasicDBObject("transcripts.xrefs.id", targetGeneId));
        DBObject unwind = new BasicDBObject("$unwind", "$transcripts");
        BasicDBObject projectObj = new BasicDBObject("_id", 0);
        projectObj.append("transcripts.id", 1);
        projectObj.append("transcripts.tfbs", 1);
        DBObject project = new BasicDBObject("$project", projectObj);
        commands[0] = match;//from   w  ww. j a va  2s .co  m
        commands[1] = unwind;
        commands[2] = project;
        commandList.add(commands);
    }

    //        List<QueryResult> queryResults = executeAggregationList(targetGeneIdList, commandList, options, coreMongoDBCollection);
    List<QueryResult> queryResults = new ArrayList<>();
    for (int i = 0; i < targetGeneIdList.size(); i++) {
        String targetGeneId = targetGeneIdList.get(0);
        //            QueryResult queryResult = queryResults.get(0);
        QueryResult queryResult = new QueryResult();
        BasicDBList list = (BasicDBList) queryResult.getResult();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject gene = (BasicDBObject) list.get(j);
            BasicDBObject transcript = (BasicDBObject) gene.get("transcripts");
            String transcriptId = transcript.getString("id");
            if (transcriptId.toUpperCase().equals(targetGeneId)) {
                BasicDBList tfbs = (BasicDBList) transcript.get("tfbs");
                queryResult.setResult(tfbs);
                break;
            }
        }
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

private void solvePositiveCodingEffect(Boolean splicing, String transcriptSequence, Integer transcriptEnd,
        Integer genomicCodingEnd, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart,
        Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt,
        HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) {

    Boolean codingAnnotationAdded = false; // This will indicate wether it is needed to add the "coding_sequence_variant" annotation or not

    if (variantAlt.equals("-")) { // Deletion
        if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null
                || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
            SoNames.add("initiator_codon_variant");
            codingAnnotationAdded = true;
        }//from   w  w w  .j a v  a 2  s .c o  m
        if (cdnaVariantEnd != null) {
            int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3;
            Boolean stopToSolve = true;
            if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null
                codingAnnotationAdded = true;
                if (variantRef.length() % 3 == 0) {
                    SoNames.add("inframe_deletion");
                } else {
                    SoNames.add("frameshift_variant");
                }
                stopToSolve = false; // Stop codon annotation will be solved in the line below.
                solveStopCodonPositiveDeletion(transcriptSequence, cdnaCodingStart, cdnaVariantStart,
                        cdnaVariantEnd, SoNames);
            }
            if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) {
                if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) {
                    if (finalNtPhase != 2) {
                        SoNames.add("incomplete_terminal_codon_variant");
                    }
                } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above
                    SoNames.add("stop_lost");
                }
            }
        }
    } else {
        if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion. Be careful: insertion coordinates are special, alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd
            codingAnnotationAdded = true;
            if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0
                    || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
                SoNames.add("initiator_codon_variant");
            }
            int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
            if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase))
                    && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                SoNames.add("incomplete_terminal_codon_variant");
            }
            if (variantAlt.length() % 3 == 0) {
                SoNames.add("inframe_insertion");
            } else {
                SoNames.add("frameshift_variant");
            }
            solveStopCodonPositiveInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, variantAlt,
                    SoNames);
            //                if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0
            //                    if (cdnaVariantStart != null && cdnaVariantStart > (cdnaCodingEnd - 3)) { // -3 because alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd
            //                        char[] modifiedCodonArray = solveStopCodonPositiveInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, variantAlt);
            //                        if(isStopCodon(String.valueOf(modifiedCodonArray))) {
            //                            SoNames.add("stop_retained_variant");
            //                        } else {
            //                            SoNames.add("stop_lost");
            //                        }
            //                    }
            //                } else {
            // Be careful, strict > since this is a insertion, inserted nts are pasted on the left of cdnaVariantStart
            //                }
        } else { // SNV
            if (cdnaVariantStart != null) {
                int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
                if (!splicing) {
                    if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase))
                            && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                        SoNames.add("incomplete_terminal_codon_variant"); //  If not, avoid calculating reference/modified codon
                    } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0
                        Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3;
                        int modifiedCodonStart = cdnaVariantStart - variantPhaseShift;
                        String referenceCodon = transcriptSequence.substring(modifiedCodonStart - 1,
                                modifiedCodonStart + 2); // -1 and +2 because of base 0 String indexing
                        char[] modifiedCodonArray = referenceCodon.toCharArray();
                        modifiedCodonArray[variantPhaseShift] = variantAlt.toCharArray()[0];
                        codingAnnotationAdded = true;
                        String referenceA = codonToA.get(referenceCodon);
                        String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray));
                        if (isSynonymousCodon.get(referenceCodon).get(String.valueOf(modifiedCodonArray))) {
                            if (isStopCodon(referenceCodon)) {
                                SoNames.add("stop_retained_variant");
                            } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant
                                SoNames.add("synonymous_variant");
                            }
                        } else {
                            if (cdnaVariantStart < (cdnaCodingStart + 3)) {
                                SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below
                                if (isStopCodon(String.valueOf(modifiedCodonArray))) {
                                    SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below
                                }
                            } else if (isStopCodon(String.valueOf(referenceCodon))) {
                                SoNames.add("stop_lost");
                            } else {
                                SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained"
                                        : "missense_variant");
                            }
                            if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2
                                QueryResult proteinSubstitutionScoresQueryResult = proteinFunctionPredictorDBAdaptor
                                        .getByAaChange(consequenceTypeTemplate.getEnsemblTranscriptId(),
                                                consequenceTypeTemplate.getAaPosition(), alternativeA,
                                                new QueryOptions());
                                if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) {
                                    BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult
                                            .getResult().get(0);
                                    if (proteinSubstitutionScores.get("ss") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ss")),
                                                "Sift",
                                                siftDescriptions.get(proteinSubstitutionScores.get("se"))));
                                    }
                                    if (proteinSubstitutionScores.get("ps") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ps")),
                                                "Polyphen",
                                                polyphenDescriptions.get(proteinSubstitutionScores.get("pe"))));
                                    }
                                }
                            }
                        }
                        // Set consequenceTypeTemplate.aChange
                        consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA);
                        // Set consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters
                        char[] referenceCodonArray = referenceCodon.toLowerCase().toCharArray();
                        referenceCodonArray[variantPhaseShift] = Character
                                .toUpperCase(referenceCodonArray[variantPhaseShift]);
                        modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray();
                        modifiedCodonArray[variantPhaseShift] = Character
                                .toUpperCase(modifiedCodonArray[variantPhaseShift]);
                        consequenceTypeTemplate.setCodon(
                                String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray));
                    }
                }
            }
        }
    }
    if (!codingAnnotationAdded) {
        SoNames.add("coding_sequence_variant");
    }
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

private void solveNegativeCodingEffect(Boolean splicing, String transcriptSequence, Integer transcriptStart,
        Integer genomicCodingStart, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart,
        Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt,
        HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) {

    Boolean codingAnnotationAdded = false;

    if (variantAlt.equals("-")) { // Deletion
        if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null
                || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
            SoNames.add("initiator_codon_variant");
            codingAnnotationAdded = true;
        }//from w ww .  j a  va  2  s  .co m
        if (cdnaVariantEnd != null) {
            int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3;
            Boolean stopToSolve = true;
            if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null
                codingAnnotationAdded = true;
                if (variantRef.length() % 3 == 0) {
                    SoNames.add("inframe_deletion");
                } else {
                    SoNames.add("frameshift_variant");
                }
                stopToSolve = false; // Stop codon annotation will be solved in the line below.
                solveStopCodonNegativeDeletion(transcriptSequence, cdnaCodingStart, cdnaVariantStart,
                        cdnaVariantEnd, SoNames);
            }
            if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) {
                if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) {
                    if (finalNtPhase != 2) {
                        SoNames.add("incomplete_terminal_codon_variant");
                    }
                } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above
                    SoNames.add("stop_lost");
                }
            }
        }
    } else {
        if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion  TODO: I've seen insertions within Cellbase-mongo with a ref != -
            codingAnnotationAdded = true;
            if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0
                    || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
                SoNames.add("initiator_codon_variant");
            }
            int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
            if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase))
                    && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                SoNames.add("incomplete_terminal_codon_variant");
            }
            if (variantAlt.length() % 3 == 0) {
                SoNames.add("inframe_insertion");
            } else {
                SoNames.add("frameshift_variant");
            }
            solveStopCodonNegativeInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantEnd, variantAlt,
                    SoNames); // Be careful, cdnaVariantEnd is being used in this case!!!

            //                if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0
            //                    if (cdnaVariantEnd != null && cdnaVariantEnd > (cdnaCodingEnd - 3)) {  // -3 because alternative nts are pasted on the left of >>>genomic<<<VariantStart
            //                        char[] modifiedCodonArray = solveStopCodonNegativeInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantEnd, variantAlt); // Be careful, cdnaVariantEnd is being used in this case!!!
            //                        if(isStopCodon(String.valueOf(modifiedCodonArray))) {
            //                            SoNames.add("stop_retained_variant");
            //                        } else {
            //                            SoNames.add("stop_lost");
            //                        }
            //                    }
            //                } else {
            //                }
            //                if(cdnaVariantStart != null) {
            //                if(!splicing && cdnaVariantStart != null) {
            //                }
        } else { // SNV
            if (cdnaVariantStart != null) {
                int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
                if (!splicing) {
                    if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase))
                            && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                        SoNames.add("incomplete_terminal_codon_variant"); // If that is the case and variant ocurs in the last complete/incomplete codon, no coding prediction is needed
                    } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0
                        Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3;
                        int modifiedCodonStart = cdnaVariantStart - variantPhaseShift;
                        String reverseCodon = new StringBuilder(transcriptSequence.substring(
                                transcriptSequence.length() - modifiedCodonStart - 2,
                                transcriptSequence.length() - modifiedCodonStart + 1)).reverse().toString(); // Rigth limit of the substring sums +1 because substring does not include that position
                        char[] referenceCodon = reverseCodon.toCharArray();
                        referenceCodon[0] = complementaryNt.get(referenceCodon[0]);
                        referenceCodon[1] = complementaryNt.get(referenceCodon[1]);
                        referenceCodon[2] = complementaryNt.get(referenceCodon[2]);
                        char[] modifiedCodonArray = referenceCodon.clone();
                        modifiedCodonArray[variantPhaseShift] = complementaryNt
                                .get(variantAlt.toCharArray()[0]);
                        codingAnnotationAdded = true;
                        String referenceA = codonToA.get(String.valueOf(referenceCodon));
                        String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray));

                        if (isSynonymousCodon.get(String.valueOf(referenceCodon))
                                .get(String.valueOf(modifiedCodonArray))) {
                            if (isStopCodon(String.valueOf(referenceCodon))) {
                                SoNames.add("stop_retained_variant");
                            } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant
                                SoNames.add("synonymous_variant");
                            }
                        } else {
                            if (cdnaVariantStart < (cdnaCodingStart + 3)) {
                                SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below
                                if (isStopCodon(String.valueOf(modifiedCodonArray))) {
                                    SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below
                                }
                            } else if (isStopCodon(String.valueOf(referenceCodon))) {
                                SoNames.add("stop_lost");
                            } else {
                                SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained"
                                        : "missense_variant");
                            }
                            if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2
                                QueryResult proteinSubstitutionScoresQueryResult = proteinFunctionPredictorDBAdaptor
                                        .getByAaChange(consequenceTypeTemplate.getEnsemblTranscriptId(),
                                                consequenceTypeTemplate.getAaPosition(), alternativeA,
                                                new QueryOptions());
                                if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) {
                                    BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult
                                            .getResult().get(0);
                                    if (proteinSubstitutionScores.get("ss") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ss")),
                                                "Sift",
                                                siftDescriptions.get(proteinSubstitutionScores.get("se"))));
                                    }
                                    if (proteinSubstitutionScores.get("ps") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ps")),
                                                "Polyphen",
                                                polyphenDescriptions.get(proteinSubstitutionScores.get("pe"))));
                                    }
                                }
                            }
                        }
                        // Set consequenceTypeTemplate.aChange
                        consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA);
                        // Fill consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters
                        char[] referenceCodonArray = String.valueOf(referenceCodon).toLowerCase().toCharArray();
                        referenceCodonArray[variantPhaseShift] = Character
                                .toUpperCase(referenceCodonArray[variantPhaseShift]);
                        modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray();
                        modifiedCodonArray[variantPhaseShift] = Character
                                .toUpperCase(modifiedCodonArray[variantPhaseShift]);
                        consequenceTypeTemplate.setCodon(
                                String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray));
                    }
                }
            }
        }
    }
    if (!codingAnnotationAdded) {
        SoNames.add("coding_sequence_variant");
    }
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

@Override
public QueryResult getAllConsequenceTypesByVariant(GenomicVariant variant, QueryOptions options) {

    Logger logger = LoggerFactory.getLogger(this.getClass());

    HashSet<String> SoNames = new HashSet<>();
    List<ConsequenceType> consequenceTypeList = new ArrayList<>();
    QueryResult queryResult = new QueryResult();
    QueryBuilder builderGene = null;/*from w w w  .ja  v  a  2s .co m*/
    QueryBuilder builderRegulatory = null;
    BasicDBList transcriptInfoList = null;
    BasicDBList exonInfoList;
    BasicDBObject miRnaInfo;
    BasicDBObject transcriptInfo, exonInfo;
    BasicDBObject geneInfo;
    BasicDBObject regulatoryInfo;
    Integer geneStart, geneEnd, transcriptStart, transcriptEnd, exonStart, exonEnd, genomicCodingStart,
            genomicCodingEnd;
    Integer cdnaCodingStart, cdnaCodingEnd, cdnaExonStart, cdnaExonEnd, cdnaVariantStart, cdnaVariantEnd,
            prevSpliceSite;
    Integer regulatoryStart, regulatoryEnd, cdsLength;
    Integer variantStart;
    Integer variantEnd;
    String geneStrand, transcriptStrand, exonSequence, transcriptSequence;
    String regulatoryChromosome, regulatoryType;
    String nextCodonNucleotides = "";
    String ensemblTranscriptId;
    String geneName;
    String ensemblGeneId;
    int transcriptBiotype;
    long dbTimeStart, dbTimeEnd;
    Boolean splicing, coding, exonsRemain, variantAhead, exonVariant, TFBSFound;
    int exonCounter, i;
    ConsequenceType consequenceTypeTemplate = new ConsequenceType();

    variantEnd = variant.getPosition() + variant.getReference().length() - 1; //TODO: Check deletion input format to ensure that variantEnd is correctly calculated
    Boolean isInsertion = variant.getReference().equals("-");
    if (isInsertion) {
        variantStart = variant.getPosition() - 1;
    } else {
        variantStart = variant.getPosition();
    }

    //        builderGene = QueryBuilder.start("chromosome").is(variant.getChromosome()).and("end")
    //                    .greaterThanEquals(variant.getPosition() - 5000).and("start").lessThanEquals(variantEnd + 5000); // variantEnd is used rather than variant.getPosition() to account for deletions which end falls within the 5kb left area of the gene

    // Get all regulatory regions surrounding the variant
    //        String chunkId = getChunkIdPrefix(variant.getChromosome(), variant.getPosition(), regulatoryRegionChunkSize);
    //        BasicDBList chunksId = new BasicDBList();
    //        chunksId.add(chunkId);
    //        builderRegulatory = QueryBuilder.start("chunkIds").in(chunksId).and("start").lessThanEquals(variantEnd).and("end")
    //                .greaterThanEquals(variant.getPosition()); // variantEnd is used rather than variant.getPosition() to account for deletions which end falls within the 5kb left area of the gene

    // Execute query and calculate time
    //        mongoDBCollection = db.getCollection("gene");
    dbTimeStart = System.currentTimeMillis();
    //        QueryResult geneQueryResult = executeQuery(variant.toString(), builderGene.get(), options);
    QueryOptions geneQueryOptions = new QueryOptions();
    geneQueryOptions.add("include",
            "name,id,transcripts.id,transcripts.start,transcripts.end,transcripts.strand,transcripts.cdsLength,transcripts.annotationFlags,transcripts.biotype,transcripts.genomicCodingStart,transcripts.genomicCodingEnd,transcripts.cdnaCodingStart,transcripts.cdnaCodingEnd,transcripts.exons.start,transcripts.exons.end,transcripts.exons.sequence,transcripts.exons.phase,mirna.matures,mirna.sequence,mirna.matures.cdnaStart,mirna.matures.cdnaEnd");
    QueryResult geneQueryResult = geneDBAdaptor.getAllByRegion(
            new Region(variant.getChromosome(), variantStart - 5000, variantEnd + 5000), geneQueryOptions);
    //        mongoDBCollection = db.getCollection("regulatory_region");
    //        QueryResult regulatoryQueryResult = executeQuery(variant.toString(), builderRegulatory.get(), options);
    QueryResult regulatoryQueryResult = regulatoryRegionDBAdaptor
            .getAllByRegion(new Region(variant.getChromosome(), variantStart, variantEnd), options);

    dbTimeEnd = System.currentTimeMillis();
    LinkedList geneInfoList = (LinkedList) geneQueryResult.getResult();
    //        BasicDBList geneInfoList = (BasicDBList) geneQueryResult.getResult();

    for (Object geneInfoObject : geneInfoList) {
        geneInfo = (BasicDBObject) geneInfoObject;
        consequenceTypeTemplate.setGeneName((String) geneInfo.get("name"));
        consequenceTypeTemplate.setEnsemblGeneId((String) geneInfo.get("id"));

        transcriptInfoList = (BasicDBList) geneInfo.get("transcripts");
        for (Object transcriptInfoObject : transcriptInfoList) {
            transcriptInfo = (BasicDBObject) transcriptInfoObject;
            ensemblTranscriptId = (String) transcriptInfo.get("id");
            transcriptStart = (Integer) transcriptInfo.get("start");
            transcriptEnd = (Integer) transcriptInfo.get("end");
            transcriptStrand = (String) transcriptInfo.get("strand");
            cdsLength = (Integer) transcriptInfo.get("cdsLength");
            BasicDBList transcriptFlags = (BasicDBList) transcriptInfo.get("annotationFlags");

            try {
                transcriptBiotype = biotypes.get((String) transcriptInfo.get("biotype"));
            } catch (NullPointerException e) {
                //                    logger.info("WARNING: biotype not found within the list of hardcoded biotypes - "+transcriptInfo.get("biotype"));
                //                    logger.info("WARNING: transcript: "+ensemblTranscriptId);
                //                    logger.info("WARNING: setting transcript biotype to non_coding ");
                transcriptBiotype = 45;
            }
            SoNames.clear();
            consequenceTypeTemplate.setEnsemblTranscriptId(ensemblTranscriptId);
            consequenceTypeTemplate.setcDnaPosition(null);
            consequenceTypeTemplate.setCdsPosition(null);
            consequenceTypeTemplate.setAaPosition(null);
            consequenceTypeTemplate.setAaChange(null);
            consequenceTypeTemplate.setCodon(null);
            consequenceTypeTemplate.setStrand((String) geneInfo.get("strand"));
            consequenceTypeTemplate.setBiotype((String) transcriptInfo.get("biotype"));
            consequenceTypeTemplate.setProteinSubstitutionScores(null);
            miRnaInfo = null;

            if (transcriptStrand.equals("+")) {
                if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed
                    consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                            consequenceTypeTemplate.getEnsemblGeneId(),
                            consequenceTypeTemplate.getEnsemblTranscriptId(),
                            consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                            Collections.singletonList("transcript_ablation")));
                } else {
                    // Check variant overlaps transcript start/end coordinates
                    if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd)
                            && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt
                                    variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt
                        if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion
                            SoNames.add("feature_truncation");
                        }
                        switch (transcriptBiotype) {
                        /**
                         * Coding biotypes
                         */
                        case 30:
                            SoNames.add("NMD_transcript_variant");
                        case 1:
                        case 3:
                        case 4:
                        case 6:
                        case 10: // TR_C_gene
                        case 11: // TR_D_gene
                        case 12: // TR_J_gene
                        case 14: // TR_V_gene
                        case 20:
                        case 23: // protein_coding
                        case 34: // non_stop_decay
                        case 36:
                        case 50: // translated_unprocessed_pseudogene
                        case 51: // LRG_gene
                            solveCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength,
                                    transcriptFlags, consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(),
                                    consequenceTypeTemplate.getCdsPosition(),
                                    consequenceTypeTemplate.getAaPosition(),
                                    consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(),
                                    consequenceTypeTemplate.getProteinSubstitutionScores(),
                                    new ArrayList<>(SoNames)));
                            break;
                        /**
                         * pseudogenes, antisense should not be annotated as non-coding genes
                         */
                        case 39:
                        case 40:
                        case 41:
                        case 42:
                        case 43:
                        case 44:
                        case 49:
                            solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, null, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        /**
                         * Non-coding biotypes
                         */
                        case 18: // miRNA
                            miRnaInfo = (BasicDBObject) geneInfo.get("mirna");
                        case 2: //
                        case 5: //
                        case 7: // IG_V_pseudogene
                        case 13:
                        case 15:
                        case 0: // 3prime_overlapping_ncrna
                        case 16: // antisense  TODO: move to coding?
                        case 17: // lincRNA
                        case 19:
                        case 21: // processed_pseudogene
                        case 22: // processed_transcript
                        case 24: // pseudogene
                        case 25:
                        case 26: // sense_intronic
                        case 27: // sense_overlapping
                        case 28:
                        case 29:
                        case 31: // unprocessed_pseudogene
                        case 32: // transcribed_unprocessed_pseudogene
                        case 33: // retained_intron
                        case 35: // unitary_pseudogene
                        case 37: // transcribed_processed_pseudogene
                        case 38:
                        case 45:
                        case 46:
                        case 47:
                        case 48:
                            solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        }
                    } else {
                        solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart,
                                variantEnd, "upstream_gene_variant", "downstream_gene_variant");
                        if (SoNames.size() > 0) { // Variant does not overlap gene region, just may have upstream/downstream annotations
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    new ArrayList<>(SoNames)));
                        }
                    }
                }
            } else {
                if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed
                    consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                            consequenceTypeTemplate.getEnsemblGeneId(),
                            consequenceTypeTemplate.getEnsemblTranscriptId(),
                            consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                            Collections.singletonList("transcript_ablation")));
                } else {
                    // Check overlaps transcript start/end coordinates
                    if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd)
                            && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt
                                    variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt
                        if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion
                            SoNames.add("feature_truncation");
                        }
                        switch (transcriptBiotype) {
                        /**
                         * Coding biotypes
                         */
                        case 30:
                            SoNames.add("NMD_transcript_variant");
                        case 1:
                        case 3:
                        case 4:
                        case 6:
                        case 10: // TR_C_gene
                        case 11: // TR_D_gene
                        case 12: // TR_J_gene
                        case 14: // TR_V_gene
                        case 20:
                        case 23:
                        case 34: // non_stop_decay
                        case 36:
                        case 50: // translated_unprocessed_pseudogene
                        case 51: // LRG_gene
                            solveCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength,
                                    transcriptFlags, consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(),
                                    consequenceTypeTemplate.getCdsPosition(),
                                    consequenceTypeTemplate.getAaPosition(),
                                    consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(),
                                    consequenceTypeTemplate.getProteinSubstitutionScores(),
                                    new ArrayList<>(SoNames)));
                            break;
                        /**
                         * pseudogenes, antisense should not be annotated as non-coding genes
                         */
                        case 39:
                        case 40:
                        case 41:
                        case 42:
                        case 43:
                        case 44:
                        case 49:
                            solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, null, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        /**
                         * Non-coding biotypes
                         */
                        case 18: // miRNA
                            miRnaInfo = (BasicDBObject) geneInfo.get("mirna");
                        case 2: //
                        case 5: //
                        case 7: // IG_V_pseudogene
                        case 13:
                        case 15:
                        case 0: // 3prime_overlapping_ncrna
                        case 17: // lincRNA
                        case 16: // antisense  TODO: move to coding?
                        case 19:
                        case 21: // processed_pseudogene
                        case 22: // processed_transcript
                        case 24: // pseudogene
                        case 25:
                        case 26: // sense_intronic
                        case 27: // sense_overlapping
                        case 28:
                        case 29:
                        case 31: // unprocessed_pseudogene
                        case 32: // transcribed_unprocessed_pseudogen
                        case 33: // retained_intron
                        case 35: // unitary_pseudogene
                        case 37: // transcribed_processed_pseudogene
                        case 38:
                        case 45:
                        case 46:
                        case 47:
                        case 48:
                            solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        }
                    } else {
                        solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart,
                                variantEnd, "downstream_gene_variant", "upstream_gene_variant");
                        if (SoNames.size() > 0) { // Variant does not overlap gene region, just has upstream/downstream annotations
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    new ArrayList<>(SoNames)));
                        }
                    }
                }
            }
        }
    }

    if (consequenceTypeList.size() == 0) {
        consequenceTypeList.add(new ConsequenceType("intergenic_variant"));
    }

    LinkedList regulatoryInfoList = (LinkedList) regulatoryQueryResult.getResult();
    //        BasicDBList regulatoryInfoList = (BasicDBList) regulatoryQueryResult.getResult();
    if (!regulatoryInfoList.isEmpty()) {
        consequenceTypeList.add(new ConsequenceType("regulatory_region_variant"));
        i = 0;
        do {
            regulatoryInfo = (BasicDBObject) regulatoryInfoList.get(i);
            regulatoryType = (String) regulatoryInfo.get("featureType");
            TFBSFound = regulatoryType.equals("TF_binding_site")
                    || regulatoryType.equals("TF_binding_site_motif");
            i++;
        } while (i < regulatoryInfoList.size() && !TFBSFound);
        if (TFBSFound) {
            consequenceTypeList.add(new ConsequenceType("TF_binding_site_variant"));
        }
    } else {
        int b;
        b = 1;
    }

    //        if(transcriptInfoList == null) {
    //            consequenceTypeList.add(new ConsequenceType("intergenic_variant"));
    //        }

    //        consequenceTypeList = filterConsequenceTypesBySoTerms(consequenceTypeList, options.getAsStringList("so"));
    // setting queryResult fields
    queryResult.setId(variant.toString());
    queryResult.setDbTime(Long.valueOf(dbTimeEnd - dbTimeStart).intValue());
    queryResult.setNumResults(consequenceTypeList.size());
    queryResult.setResult(consequenceTypeList);

    return queryResult;
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

private void solveCodingPositiveTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames,
        BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, Integer variantStart,
        Integer variantEnd, Integer cdsLength, BasicDBList transcriptFlags,
        ConsequenceType consequenceTypeTemplate) {
    Integer genomicCodingStart;//from  w w  w . j a  v a 2 s  .c o  m
    Integer genomicCodingEnd;
    Integer cdnaCodingStart;
    Integer cdnaCodingEnd;
    BasicDBList exonInfoList;
    BasicDBObject exonInfo;
    Integer exonStart;
    Integer exonEnd;
    String transcriptSequence;
    Boolean variantAhead;
    Integer cdnaExonEnd;
    Integer cdnaVariantStart;
    Integer cdnaVariantEnd;
    Boolean splicing;
    int exonCounter;
    int firstCdsPhase = -1;
    Integer prevSpliceSite;
    Boolean[] junctionSolution = { false, false };

    genomicCodingStart = (Integer) transcriptInfo.get("genomicCodingStart");
    genomicCodingEnd = (Integer) transcriptInfo.get("genomicCodingEnd");
    cdnaCodingStart = (Integer) transcriptInfo.get("cdnaCodingStart");
    cdnaCodingEnd = (Integer) transcriptInfo.get("cdnaCodingEnd");
    exonInfoList = (BasicDBList) transcriptInfo.get("exons");
    exonInfo = (BasicDBObject) exonInfoList.get(0);
    exonStart = (Integer) exonInfo.get("start");
    exonEnd = (Integer) exonInfo.get("end");
    transcriptSequence = (String) exonInfo.get("sequence");
    variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed
    cdnaExonEnd = (exonEnd - exonStart + 1);
    cdnaVariantStart = null;
    cdnaVariantEnd = null;
    junctionSolution[0] = false;
    junctionSolution[1] = false;
    splicing = false;

    if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) {
        firstCdsPhase = (int) exonInfo.get("phase");
    }
    if (variantStart >= exonStart) {
        if (variantStart <= exonEnd) { // Variant start within the exon
            cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart);
            consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart);
            if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon  ----||||S|||||E||||----
                cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
            }
        }
    } else {
        if (variantEnd <= exonEnd) {
            //                                if(variantEnd >= exonStart) {  // Only variant end within the exon  ----||||||||||E||||----
            // We do not contemplate that variant end can be located before this exon since this is the first exon
            cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
            //                                }
        } // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon
    }

    exonCounter = 1;
    while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction  until
        //        while(exonCounter<exonInfoList.size() && !splicing && variantAhead) {  // This is not a do-while since we cannot call solveJunction  until
        exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded
        exonStart = (Integer) exonInfo.get("start");
        prevSpliceSite = exonEnd + 1;
        exonEnd = (Integer) exonInfo.get("end");
        transcriptSequence = transcriptSequence + ((String) exonInfo.get("sequence"));
        if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) { // Set firsCdsPhase only when the first coding exon is reached
            firstCdsPhase = (int) exonInfo.get("phase");
        }
        solveJunction(isInsertion, prevSpliceSite, exonStart - 1, variantStart, variantEnd, SoNames,
                "splice_donor_variant", "splice_acceptor_variant", junctionSolution);
        splicing = (splicing || junctionSolution[0]);

        if (variantStart >= exonStart) {
            cdnaExonEnd += (exonEnd - exonStart + 1);
            if (variantStart <= exonEnd) { // Variant start within the exon
                cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart);
                consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart);
                if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon  ----||||S|||||E||||----
                    cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
                }
            }
        } else {
            if (variantEnd <= exonEnd) {
                if (variantEnd >= exonStart) { // Only variant end within the exon  ----||||||||||E||||----
                    cdnaExonEnd += (exonEnd - exonStart + 1);
                    cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
                } else { // Variant does not include this exon, variant is located before this exon
                    variantAhead = false;
                }
            } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon
                cdnaExonEnd += (exonEnd - exonStart + 1);
            }
        }
        exonCounter++;
    }
    // Is not intron variant (both ends fall within the same intron)
    if (!junctionSolution[1]) {
        if (isInsertion) {
            if (cdnaVariantStart == null && cdnaVariantEnd != null) { // To account for those insertions in the 3' end of an intron
                cdnaVariantStart = cdnaVariantEnd - 1;
            } else if (cdnaVariantEnd == null && cdnaVariantStart != null) { // To account for those insertions in the 5' end of an intron
                cdnaVariantEnd = cdnaVariantStart + 1;
            }
        }
        solveCodingPositiveTranscriptEffect(splicing, transcriptSequence, transcriptStart, transcriptEnd,
                genomicCodingStart, genomicCodingEnd, variantStart, variantEnd, cdnaCodingStart, cdnaCodingEnd,
                cdnaVariantStart, cdnaVariantEnd, // Be careful, originalVariantStart is used here!
                cdsLength, transcriptFlags, firstCdsPhase, variant.getReference(), variant.getAlternative(),
                SoNames, consequenceTypeTemplate);
    }
}