Example usage for com.mongodb BasicDBObject get

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject get.

Prototype

public Object get(final String key)

Source Link

Document

Gets a value from this object

Usage

From source file:org.opencb.cellbase.mongodb.db.ConservationMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllScoresByRegionList(List<Region> regions, QueryOptions options) {
    //TODO not finished yet
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);/*from   w w  w .  j av a  2s .c  o m*/
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        QueryBuilder builder;
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        if (regionChunkStart == regionChunkEnd) {
            builder = QueryBuilder.start("_chunkIds")
                    .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), chunkSize));
        } else {
            //                for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            //                    integerChunkIds.add(chunkId);
            //                }
            //    //            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds);
            //                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds);
            builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")
                    .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd());
        }
        /****/

        queries.add(builder.get());
        ids.add(region.toString());

        logger.info(builder.get().toString());

    }
    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    //        List<QueryResult> queryResults = executeQueryList(ids, queries, options);

    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);
        List<BasicDBObject> list = (List<BasicDBObject>) queryResult.getResult();

        Map<String, List<Float>> typeMap = new HashMap();

        //            int start = region.getStart();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            String type = chunk.getString("type");
            List<Float> valuesList;
            if (!typeMap.containsKey(type)) {
                valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1);
                for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) {
                    valuesList.add(null);
                }
                typeMap.put(type, valuesList);
            } else {
                valuesList = typeMap.get(type);
            }

            BasicDBList valuesChunk = (BasicDBList) chunk.get("values");

            int pos = 0;
            if (region.getStart() > chunk.getInt("start")) {
                pos = region.getStart() - chunk.getInt("start");
            }

            for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) {
                valuesList.set(pos + chunk.getInt("start") - region.getStart(),
                        new Float((Double) valuesChunk.get(pos)));
            }
        }

        BasicDBList resultList = new BasicDBList();
        for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) {
            for (Float value : elem.getValue()) {
                resultList.add(value != null ? (new Score(new Double(value), elem.getKey())) : null);
            }
        }
        queryResult.setResult(resultList);
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.core.ConservationMongoDBAdaptor.java

License:Apache License

@Deprecated
@Override//  w  w  w. j av  a 2 s  .  c om
public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) {
    //TODO not finished yet
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<String> integerChunkIds;
    for (Region region : regions) {
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        // Max region size is 10000bp
        if (region.getEnd() - region.getStart() > 10000) {
            region.setEnd(region.getStart() + 10000);
        }

        QueryBuilder builder;
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        if (regionChunkStart == regionChunkEnd) {
            builder = QueryBuilder.start("_chunkIds")
                    .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), this.chunkSize));
        } else {
            //                for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            ////                    integerChunkIds.add(chunkId);
            //                    integerChunkIds.add(region.getChromosome() + "_" + chunkId + "_" + this.chunkSize/1000 + "k");
            //                }
            //                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds);
            builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")
                    .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd());
        }
        //            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds);
        /****/

        queries.add(builder.get());
        ids.add(region.toString());

        logger.debug(builder.get().toString());
    }

    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);
        //            BasicDBList list = (BasicDBList) queryResult.getResult();
        List list = queryResult.getResult();

        Map<String, List<Float>> typeMap = new HashMap();

        //            int start = region.getStart();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            String type = chunk.getString("type");
            List<Float> valuesList;
            if (!typeMap.containsKey(type)) {
                valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1);
                for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) {
                    valuesList.add(null);
                }
                typeMap.put(type, valuesList);
            } else {
                valuesList = typeMap.get(type);
            }

            BasicDBList valuesChunk = (BasicDBList) chunk.get("values");

            int pos = 0;
            if (region.getStart() > chunk.getInt("start")) {
                pos = region.getStart() - chunk.getInt("start");
            }

            for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) {
                //                    System.out.println("valuesList SIZE = " + valuesList.size());
                //                    System.out.println("pos = " + pos);
                //                    System.out.println("DIV " + (chunk.getInt("start") - region.getStart()));
                //                    System.out.println("valuesChunk = " + valuesChunk.get(pos));
                //                    System.out.println("indexFinal = " + (pos + chunk.getInt("start") - region.getStart()));
                valuesList.set(pos + chunk.getInt("start") - region.getStart(),
                        new Float((Double) valuesChunk.get(pos)));
            }
        }
        //
        BasicDBList resultList = new BasicDBList();
        ConservedRegionFeature conservedRegionChunk;
        for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) {
            conservedRegionChunk = new ConservedRegionFeature(region.getChromosome(), region.getStart(),
                    region.getEnd(), elem.getKey(), elem.getValue());
            resultList.add(conservedRegionChunk);
        }
        queryResult.setResult(resultList);
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.core.ConservationMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllScoresByRegionList(List<Region> regions, QueryOptions options) {
    //TODO not finished yet
    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);//from w ww .j av a 2 s.c o m
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        QueryBuilder builder;
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        if (regionChunkStart == regionChunkEnd) {
            builder = QueryBuilder.start("_chunkIds")
                    .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), chunkSize));
        } else {
            //                for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            //                    integerChunkIds.add(chunkId);
            //                }
            //    //            QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds);
            //                builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds);
            builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end")
                    .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd());
        }
        /****/

        queries.add(builder.get());
        ids.add(region.toString());

        logger.debug(builder.get().toString());

    }
    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    //        List<QueryResult> queryResults = executeQueryList(ids, queries, options);

    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);
        List<BasicDBObject> list = (List<BasicDBObject>) queryResult.getResult();

        Map<String, List<Float>> typeMap = new HashMap();

        //            int start = region.getStart();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            String type = chunk.getString("type");
            List<Float> valuesList;
            if (!typeMap.containsKey(type)) {
                valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1);
                for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) {
                    valuesList.add(null);
                }
                typeMap.put(type, valuesList);
            } else {
                valuesList = typeMap.get(type);
            }

            BasicDBList valuesChunk = (BasicDBList) chunk.get("values");

            int pos = 0;
            if (region.getStart() > chunk.getInt("start")) {
                pos = region.getStart() - chunk.getInt("start");
            }

            for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) {
                valuesList.set(pos + chunk.getInt("start") - region.getStart(),
                        new Float((Double) valuesChunk.get(pos)));
            }
        }

        BasicDBList resultList = new BasicDBList();
        for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) {
            for (Float value : elem.getValue()) {
                if (value != null) {
                    resultList.add(new Score(new Double(value), elem.getKey()));
                }
            }
        }
        if (!resultList.isEmpty()) {
            queryResult.setResult(resultList);
        } else {
            queryResult.setResult(null);
        }
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.core.GenomeMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllSequencesByRegionList(List<Region> regions, QueryOptions options) {
    /****///w w w.  j a v a 2  s. co m
    String chunkIdSuffix = this.chunkSize / 1000 + "k";
    /****/

    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<String> chunkIds;
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        chunkIds = new ArrayList<>();
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        int regionChunkStart = getChunkId(region.getStart(), this.chunkSize);
        int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize);
        for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            String chunkIdStr = region.getChromosome() + "_" + chunkId + "_" + chunkIdSuffix;
            chunkIds.add(chunkIdStr);
            integerChunkIds.add(chunkId);
        }
        //            QueryBuilder builder = QueryBuilder.start("sequenceName").is(region.getChromosome()).and("_chunkIds").in(chunkIds);
        QueryBuilder builder = QueryBuilder.start("_chunkIds").in(chunkIds);
        /****/
        queries.add(builder.get());
        ids.add(region.toString());

        logger.info(builder.get().toString());
    }

    List<QueryResult> queryResults = executeQueryList2(ids, queries, options, genomeSequenceCollection);
    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);

        List list = queryResult.getResult();
        StringBuilder sb = new StringBuilder();
        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            sb.append(chunk.get("sequence"));
        }

        int startStr = getOffset(region.getStart());
        int endStr = getOffset(region.getStart()) + (region.getEnd() - region.getStart()) + 1;

        String subStr = "";

        if (getChunkId(region.getStart(), this.chunkSize) > 0) {
            if (sb.toString().length() > 0 && sb.toString().length() >= endStr) {
                subStr = sb.toString().substring(startStr, endStr);
            }
        } else {
            if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) {
                subStr = sb.toString().substring(startStr - 1, endStr - 1);
            }
        }
        logger.info("((BasicDBObject)list.get(0)).getString(\"sequenceType\") = {}",
                ((BasicDBObject) list.get(0)).getString("sequenceType"));
        logger.info("((BasicDBObject)list.get(0)).getString(\"assembly\") = {}",
                ((BasicDBObject) list.get(0)).getString("assembly"));
        GenomeSequenceFeature genomeSequenceFeature = new GenomeSequenceFeature(region.getChromosome(),
                region.getStart(), region.getEnd(), 1, ((BasicDBObject) list.get(0)).getString("sequenceType"),
                ((BasicDBObject) list.get(0)).getString("assembly"), subStr);
        //            GenomeSequenceChunk genomeSequenceChunk = new GenomeSequenceChunk(region.getSequenceName(), region.getStart(), region.getEnd(), subStr);

        queryResult.setResult(Arrays.asList(genomeSequenceFeature));
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.GenomeSequenceMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) {
    /****///ww  w.ja va  2s. com
    String chunkIdSuffix = this.chunkSize / 1000 + "k";
    /****/

    List<DBObject> queries = new ArrayList<>();
    List<String> ids = new ArrayList<>(regions.size());
    List<String> chunkIds;
    List<Integer> integerChunkIds;
    for (Region region : regions) {
        chunkIds = new ArrayList<>();
        integerChunkIds = new ArrayList<>();
        // positions below 1 are not allowed
        if (region.getStart() < 1) {
            region.setStart(1);
        }
        if (region.getEnd() < 1) {
            region.setEnd(1);
        }

        /****/
        int regionChunkStart = getChunk(region.getStart());
        int regionChunkEnd = getChunk(region.getEnd());
        for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) {
            String chunkIdStr = region.getChromosome() + "_" + chunkId + "_" + chunkIdSuffix;
            chunkIds.add(chunkIdStr);
            integerChunkIds.add(chunkId);
        }
        //            QueryBuilder builder = QueryBuilder.start("sequenceName").is(region.getChromosome()).and("_chunkIds").in(chunkIds);
        QueryBuilder builder = QueryBuilder.start("_chunkIds").in(chunkIds);
        /****/
        queries.add(builder.get());
        ids.add(region.toString());

        logger.info(builder.get().toString());
    }

    List<QueryResult> queryResults = executeQueryList2(ids, queries, options);
    for (int i = 0; i < regions.size(); i++) {
        Region region = regions.get(i);
        QueryResult queryResult = queryResults.get(i);

        List list = queryResult.getResult();
        StringBuilder sb = new StringBuilder();
        for (int j = 0; j < list.size(); j++) {
            BasicDBObject chunk = (BasicDBObject) list.get(j);
            sb.append(chunk.get("sequence"));
        }

        int startStr = getOffset(region.getStart());
        int endStr = getOffset(region.getStart()) + (region.getEnd() - region.getStart()) + 1;

        String subStr = "";

        if (getChunk(region.getStart()) > 0) {
            if (sb.toString().length() > 0 && sb.toString().length() >= endStr) {
                subStr = sb.toString().substring(startStr, endStr);
            }
        } else {
            if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) {
                subStr = sb.toString().substring(startStr - 1, endStr - 1);
            }
        }
        GenomeSequenceFeature genomeSequenceFeature = new GenomeSequenceFeature(region.getChromosome(),
                region.getStart(), region.getEnd(), 1, ((BasicDBObject) list.get(0)).getString("sequenceType"),
                ((BasicDBObject) list.get(0)).getString("assembly"), subStr);
        //            GenomeSequenceChunk genomeSequenceChunk = new GenomeSequenceChunk(region.getSequenceName(), region.getStart(), region.getEnd(), subStr);

        queryResult.setResult(Arrays.asList(genomeSequenceFeature));
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.regulatory.TfbsMongoDBAdaptor.java

License:Apache License

@Override
public List<QueryResult> getAllByTargetGeneIdList(List<String> targetGeneIdList, QueryOptions options) {
    //        DBCollection coreMongoDBCollection = db.getCollection("gene");

    List<DBObject[]> commandList = new ArrayList<>();
    for (String targetGeneId : targetGeneIdList) {
        DBObject[] commands = new DBObject[3];
        DBObject match = new BasicDBObject("$match", new BasicDBObject("transcripts.xrefs.id", targetGeneId));
        DBObject unwind = new BasicDBObject("$unwind", "$transcripts");
        BasicDBObject projectObj = new BasicDBObject("_id", 0);
        projectObj.append("transcripts.id", 1);
        projectObj.append("transcripts.tfbs", 1);
        DBObject project = new BasicDBObject("$project", projectObj);
        commands[0] = match;//from   w  ww. j a va  2s .co  m
        commands[1] = unwind;
        commands[2] = project;
        commandList.add(commands);
    }

    //        List<QueryResult> queryResults = executeAggregationList(targetGeneIdList, commandList, options, coreMongoDBCollection);
    List<QueryResult> queryResults = new ArrayList<>();
    for (int i = 0; i < targetGeneIdList.size(); i++) {
        String targetGeneId = targetGeneIdList.get(0);
        //            QueryResult queryResult = queryResults.get(0);
        QueryResult queryResult = new QueryResult();
        BasicDBList list = (BasicDBList) queryResult.getResult();

        for (int j = 0; j < list.size(); j++) {
            BasicDBObject gene = (BasicDBObject) list.get(j);
            BasicDBObject transcript = (BasicDBObject) gene.get("transcripts");
            String transcriptId = transcript.getString("id");
            if (transcriptId.toUpperCase().equals(targetGeneId)) {
                BasicDBList tfbs = (BasicDBList) transcript.get("tfbs");
                queryResult.setResult(tfbs);
                break;
            }
        }
    }

    return queryResults;
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

private void solvePositiveCodingEffect(Boolean splicing, String transcriptSequence, Integer transcriptEnd,
        Integer genomicCodingEnd, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart,
        Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt,
        HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) {

    Boolean codingAnnotationAdded = false; // This will indicate wether it is needed to add the "coding_sequence_variant" annotation or not

    if (variantAlt.equals("-")) { // Deletion
        if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null
                || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
            SoNames.add("initiator_codon_variant");
            codingAnnotationAdded = true;
        }//from   w  w w  .j a v  a 2  s .c o  m
        if (cdnaVariantEnd != null) {
            int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3;
            Boolean stopToSolve = true;
            if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null
                codingAnnotationAdded = true;
                if (variantRef.length() % 3 == 0) {
                    SoNames.add("inframe_deletion");
                } else {
                    SoNames.add("frameshift_variant");
                }
                stopToSolve = false; // Stop codon annotation will be solved in the line below.
                solveStopCodonPositiveDeletion(transcriptSequence, cdnaCodingStart, cdnaVariantStart,
                        cdnaVariantEnd, SoNames);
            }
            if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) {
                if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) {
                    if (finalNtPhase != 2) {
                        SoNames.add("incomplete_terminal_codon_variant");
                    }
                } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above
                    SoNames.add("stop_lost");
                }
            }
        }
    } else {
        if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion. Be careful: insertion coordinates are special, alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd
            codingAnnotationAdded = true;
            if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0
                    || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
                SoNames.add("initiator_codon_variant");
            }
            int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
            if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase))
                    && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                SoNames.add("incomplete_terminal_codon_variant");
            }
            if (variantAlt.length() % 3 == 0) {
                SoNames.add("inframe_insertion");
            } else {
                SoNames.add("frameshift_variant");
            }
            solveStopCodonPositiveInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, variantAlt,
                    SoNames);
            //                if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0
            //                    if (cdnaVariantStart != null && cdnaVariantStart > (cdnaCodingEnd - 3)) { // -3 because alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd
            //                        char[] modifiedCodonArray = solveStopCodonPositiveInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, variantAlt);
            //                        if(isStopCodon(String.valueOf(modifiedCodonArray))) {
            //                            SoNames.add("stop_retained_variant");
            //                        } else {
            //                            SoNames.add("stop_lost");
            //                        }
            //                    }
            //                } else {
            // Be careful, strict > since this is a insertion, inserted nts are pasted on the left of cdnaVariantStart
            //                }
        } else { // SNV
            if (cdnaVariantStart != null) {
                int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
                if (!splicing) {
                    if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase))
                            && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                        SoNames.add("incomplete_terminal_codon_variant"); //  If not, avoid calculating reference/modified codon
                    } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0
                        Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3;
                        int modifiedCodonStart = cdnaVariantStart - variantPhaseShift;
                        String referenceCodon = transcriptSequence.substring(modifiedCodonStart - 1,
                                modifiedCodonStart + 2); // -1 and +2 because of base 0 String indexing
                        char[] modifiedCodonArray = referenceCodon.toCharArray();
                        modifiedCodonArray[variantPhaseShift] = variantAlt.toCharArray()[0];
                        codingAnnotationAdded = true;
                        String referenceA = codonToA.get(referenceCodon);
                        String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray));
                        if (isSynonymousCodon.get(referenceCodon).get(String.valueOf(modifiedCodonArray))) {
                            if (isStopCodon(referenceCodon)) {
                                SoNames.add("stop_retained_variant");
                            } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant
                                SoNames.add("synonymous_variant");
                            }
                        } else {
                            if (cdnaVariantStart < (cdnaCodingStart + 3)) {
                                SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below
                                if (isStopCodon(String.valueOf(modifiedCodonArray))) {
                                    SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below
                                }
                            } else if (isStopCodon(String.valueOf(referenceCodon))) {
                                SoNames.add("stop_lost");
                            } else {
                                SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained"
                                        : "missense_variant");
                            }
                            if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2
                                QueryResult proteinSubstitutionScoresQueryResult = proteinFunctionPredictorDBAdaptor
                                        .getByAaChange(consequenceTypeTemplate.getEnsemblTranscriptId(),
                                                consequenceTypeTemplate.getAaPosition(), alternativeA,
                                                new QueryOptions());
                                if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) {
                                    BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult
                                            .getResult().get(0);
                                    if (proteinSubstitutionScores.get("ss") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ss")),
                                                "Sift",
                                                siftDescriptions.get(proteinSubstitutionScores.get("se"))));
                                    }
                                    if (proteinSubstitutionScores.get("ps") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ps")),
                                                "Polyphen",
                                                polyphenDescriptions.get(proteinSubstitutionScores.get("pe"))));
                                    }
                                }
                            }
                        }
                        // Set consequenceTypeTemplate.aChange
                        consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA);
                        // Set consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters
                        char[] referenceCodonArray = referenceCodon.toLowerCase().toCharArray();
                        referenceCodonArray[variantPhaseShift] = Character
                                .toUpperCase(referenceCodonArray[variantPhaseShift]);
                        modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray();
                        modifiedCodonArray[variantPhaseShift] = Character
                                .toUpperCase(modifiedCodonArray[variantPhaseShift]);
                        consequenceTypeTemplate.setCodon(
                                String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray));
                    }
                }
            }
        }
    }
    if (!codingAnnotationAdded) {
        SoNames.add("coding_sequence_variant");
    }
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

private void solveNegativeCodingEffect(Boolean splicing, String transcriptSequence, Integer transcriptStart,
        Integer genomicCodingStart, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart,
        Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt,
        HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) {

    Boolean codingAnnotationAdded = false;

    if (variantAlt.equals("-")) { // Deletion
        if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null
                || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
            SoNames.add("initiator_codon_variant");
            codingAnnotationAdded = true;
        }//from w ww .  j a  va  2  s  .co m
        if (cdnaVariantEnd != null) {
            int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3;
            Boolean stopToSolve = true;
            if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null
                codingAnnotationAdded = true;
                if (variantRef.length() % 3 == 0) {
                    SoNames.add("inframe_deletion");
                } else {
                    SoNames.add("frameshift_variant");
                }
                stopToSolve = false; // Stop codon annotation will be solved in the line below.
                solveStopCodonNegativeDeletion(transcriptSequence, cdnaCodingStart, cdnaVariantStart,
                        cdnaVariantEnd, SoNames);
            }
            if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) {
                if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) {
                    if (finalNtPhase != 2) {
                        SoNames.add("incomplete_terminal_codon_variant");
                    }
                } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above
                    SoNames.add("stop_lost");
                }
            }
        }
    } else {
        if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion  TODO: I've seen insertions within Cellbase-mongo with a ref != -
            codingAnnotationAdded = true;
            if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0
                    || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0
                SoNames.add("initiator_codon_variant");
            }
            int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
            if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase))
                    && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                SoNames.add("incomplete_terminal_codon_variant");
            }
            if (variantAlt.length() % 3 == 0) {
                SoNames.add("inframe_insertion");
            } else {
                SoNames.add("frameshift_variant");
            }
            solveStopCodonNegativeInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantEnd, variantAlt,
                    SoNames); // Be careful, cdnaVariantEnd is being used in this case!!!

            //                if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0
            //                    if (cdnaVariantEnd != null && cdnaVariantEnd > (cdnaCodingEnd - 3)) {  // -3 because alternative nts are pasted on the left of >>>genomic<<<VariantStart
            //                        char[] modifiedCodonArray = solveStopCodonNegativeInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantEnd, variantAlt); // Be careful, cdnaVariantEnd is being used in this case!!!
            //                        if(isStopCodon(String.valueOf(modifiedCodonArray))) {
            //                            SoNames.add("stop_retained_variant");
            //                        } else {
            //                            SoNames.add("stop_lost");
            //                        }
            //                    }
            //                } else {
            //                }
            //                if(cdnaVariantStart != null) {
            //                if(!splicing && cdnaVariantStart != null) {
            //                }
        } else { // SNV
            if (cdnaVariantStart != null) {
                int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3;
                if (!splicing) {
                    if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase))
                            && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { //  Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3.
                        SoNames.add("incomplete_terminal_codon_variant"); // If that is the case and variant ocurs in the last complete/incomplete codon, no coding prediction is needed
                    } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0
                        Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3;
                        int modifiedCodonStart = cdnaVariantStart - variantPhaseShift;
                        String reverseCodon = new StringBuilder(transcriptSequence.substring(
                                transcriptSequence.length() - modifiedCodonStart - 2,
                                transcriptSequence.length() - modifiedCodonStart + 1)).reverse().toString(); // Rigth limit of the substring sums +1 because substring does not include that position
                        char[] referenceCodon = reverseCodon.toCharArray();
                        referenceCodon[0] = complementaryNt.get(referenceCodon[0]);
                        referenceCodon[1] = complementaryNt.get(referenceCodon[1]);
                        referenceCodon[2] = complementaryNt.get(referenceCodon[2]);
                        char[] modifiedCodonArray = referenceCodon.clone();
                        modifiedCodonArray[variantPhaseShift] = complementaryNt
                                .get(variantAlt.toCharArray()[0]);
                        codingAnnotationAdded = true;
                        String referenceA = codonToA.get(String.valueOf(referenceCodon));
                        String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray));

                        if (isSynonymousCodon.get(String.valueOf(referenceCodon))
                                .get(String.valueOf(modifiedCodonArray))) {
                            if (isStopCodon(String.valueOf(referenceCodon))) {
                                SoNames.add("stop_retained_variant");
                            } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant
                                SoNames.add("synonymous_variant");
                            }
                        } else {
                            if (cdnaVariantStart < (cdnaCodingStart + 3)) {
                                SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below
                                if (isStopCodon(String.valueOf(modifiedCodonArray))) {
                                    SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below
                                }
                            } else if (isStopCodon(String.valueOf(referenceCodon))) {
                                SoNames.add("stop_lost");
                            } else {
                                SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained"
                                        : "missense_variant");
                            }
                            if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2
                                QueryResult proteinSubstitutionScoresQueryResult = proteinFunctionPredictorDBAdaptor
                                        .getByAaChange(consequenceTypeTemplate.getEnsemblTranscriptId(),
                                                consequenceTypeTemplate.getAaPosition(), alternativeA,
                                                new QueryOptions());
                                if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) {
                                    BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult
                                            .getResult().get(0);
                                    if (proteinSubstitutionScores.get("ss") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ss")),
                                                "Sift",
                                                siftDescriptions.get(proteinSubstitutionScores.get("se"))));
                                    }
                                    if (proteinSubstitutionScores.get("ps") != null) {
                                        consequenceTypeTemplate.addProteinSubstitutionScore(new Score(
                                                Double.parseDouble("" + proteinSubstitutionScores.get("ps")),
                                                "Polyphen",
                                                polyphenDescriptions.get(proteinSubstitutionScores.get("pe"))));
                                    }
                                }
                            }
                        }
                        // Set consequenceTypeTemplate.aChange
                        consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA);
                        // Fill consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters
                        char[] referenceCodonArray = String.valueOf(referenceCodon).toLowerCase().toCharArray();
                        referenceCodonArray[variantPhaseShift] = Character
                                .toUpperCase(referenceCodonArray[variantPhaseShift]);
                        modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray();
                        modifiedCodonArray[variantPhaseShift] = Character
                                .toUpperCase(modifiedCodonArray[variantPhaseShift]);
                        consequenceTypeTemplate.setCodon(
                                String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray));
                    }
                }
            }
        }
    }
    if (!codingAnnotationAdded) {
        SoNames.add("coding_sequence_variant");
    }
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

@Override
public QueryResult getAllConsequenceTypesByVariant(GenomicVariant variant, QueryOptions options) {

    Logger logger = LoggerFactory.getLogger(this.getClass());

    HashSet<String> SoNames = new HashSet<>();
    List<ConsequenceType> consequenceTypeList = new ArrayList<>();
    QueryResult queryResult = new QueryResult();
    QueryBuilder builderGene = null;/*from w w w  .ja  v  a  2s .co m*/
    QueryBuilder builderRegulatory = null;
    BasicDBList transcriptInfoList = null;
    BasicDBList exonInfoList;
    BasicDBObject miRnaInfo;
    BasicDBObject transcriptInfo, exonInfo;
    BasicDBObject geneInfo;
    BasicDBObject regulatoryInfo;
    Integer geneStart, geneEnd, transcriptStart, transcriptEnd, exonStart, exonEnd, genomicCodingStart,
            genomicCodingEnd;
    Integer cdnaCodingStart, cdnaCodingEnd, cdnaExonStart, cdnaExonEnd, cdnaVariantStart, cdnaVariantEnd,
            prevSpliceSite;
    Integer regulatoryStart, regulatoryEnd, cdsLength;
    Integer variantStart;
    Integer variantEnd;
    String geneStrand, transcriptStrand, exonSequence, transcriptSequence;
    String regulatoryChromosome, regulatoryType;
    String nextCodonNucleotides = "";
    String ensemblTranscriptId;
    String geneName;
    String ensemblGeneId;
    int transcriptBiotype;
    long dbTimeStart, dbTimeEnd;
    Boolean splicing, coding, exonsRemain, variantAhead, exonVariant, TFBSFound;
    int exonCounter, i;
    ConsequenceType consequenceTypeTemplate = new ConsequenceType();

    variantEnd = variant.getPosition() + variant.getReference().length() - 1; //TODO: Check deletion input format to ensure that variantEnd is correctly calculated
    Boolean isInsertion = variant.getReference().equals("-");
    if (isInsertion) {
        variantStart = variant.getPosition() - 1;
    } else {
        variantStart = variant.getPosition();
    }

    //        builderGene = QueryBuilder.start("chromosome").is(variant.getChromosome()).and("end")
    //                    .greaterThanEquals(variant.getPosition() - 5000).and("start").lessThanEquals(variantEnd + 5000); // variantEnd is used rather than variant.getPosition() to account for deletions which end falls within the 5kb left area of the gene

    // Get all regulatory regions surrounding the variant
    //        String chunkId = getChunkIdPrefix(variant.getChromosome(), variant.getPosition(), regulatoryRegionChunkSize);
    //        BasicDBList chunksId = new BasicDBList();
    //        chunksId.add(chunkId);
    //        builderRegulatory = QueryBuilder.start("chunkIds").in(chunksId).and("start").lessThanEquals(variantEnd).and("end")
    //                .greaterThanEquals(variant.getPosition()); // variantEnd is used rather than variant.getPosition() to account for deletions which end falls within the 5kb left area of the gene

    // Execute query and calculate time
    //        mongoDBCollection = db.getCollection("gene");
    dbTimeStart = System.currentTimeMillis();
    //        QueryResult geneQueryResult = executeQuery(variant.toString(), builderGene.get(), options);
    QueryOptions geneQueryOptions = new QueryOptions();
    geneQueryOptions.add("include",
            "name,id,transcripts.id,transcripts.start,transcripts.end,transcripts.strand,transcripts.cdsLength,transcripts.annotationFlags,transcripts.biotype,transcripts.genomicCodingStart,transcripts.genomicCodingEnd,transcripts.cdnaCodingStart,transcripts.cdnaCodingEnd,transcripts.exons.start,transcripts.exons.end,transcripts.exons.sequence,transcripts.exons.phase,mirna.matures,mirna.sequence,mirna.matures.cdnaStart,mirna.matures.cdnaEnd");
    QueryResult geneQueryResult = geneDBAdaptor.getAllByRegion(
            new Region(variant.getChromosome(), variantStart - 5000, variantEnd + 5000), geneQueryOptions);
    //        mongoDBCollection = db.getCollection("regulatory_region");
    //        QueryResult regulatoryQueryResult = executeQuery(variant.toString(), builderRegulatory.get(), options);
    QueryResult regulatoryQueryResult = regulatoryRegionDBAdaptor
            .getAllByRegion(new Region(variant.getChromosome(), variantStart, variantEnd), options);

    dbTimeEnd = System.currentTimeMillis();
    LinkedList geneInfoList = (LinkedList) geneQueryResult.getResult();
    //        BasicDBList geneInfoList = (BasicDBList) geneQueryResult.getResult();

    for (Object geneInfoObject : geneInfoList) {
        geneInfo = (BasicDBObject) geneInfoObject;
        consequenceTypeTemplate.setGeneName((String) geneInfo.get("name"));
        consequenceTypeTemplate.setEnsemblGeneId((String) geneInfo.get("id"));

        transcriptInfoList = (BasicDBList) geneInfo.get("transcripts");
        for (Object transcriptInfoObject : transcriptInfoList) {
            transcriptInfo = (BasicDBObject) transcriptInfoObject;
            ensemblTranscriptId = (String) transcriptInfo.get("id");
            transcriptStart = (Integer) transcriptInfo.get("start");
            transcriptEnd = (Integer) transcriptInfo.get("end");
            transcriptStrand = (String) transcriptInfo.get("strand");
            cdsLength = (Integer) transcriptInfo.get("cdsLength");
            BasicDBList transcriptFlags = (BasicDBList) transcriptInfo.get("annotationFlags");

            try {
                transcriptBiotype = biotypes.get((String) transcriptInfo.get("biotype"));
            } catch (NullPointerException e) {
                //                    logger.info("WARNING: biotype not found within the list of hardcoded biotypes - "+transcriptInfo.get("biotype"));
                //                    logger.info("WARNING: transcript: "+ensemblTranscriptId);
                //                    logger.info("WARNING: setting transcript biotype to non_coding ");
                transcriptBiotype = 45;
            }
            SoNames.clear();
            consequenceTypeTemplate.setEnsemblTranscriptId(ensemblTranscriptId);
            consequenceTypeTemplate.setcDnaPosition(null);
            consequenceTypeTemplate.setCdsPosition(null);
            consequenceTypeTemplate.setAaPosition(null);
            consequenceTypeTemplate.setAaChange(null);
            consequenceTypeTemplate.setCodon(null);
            consequenceTypeTemplate.setStrand((String) geneInfo.get("strand"));
            consequenceTypeTemplate.setBiotype((String) transcriptInfo.get("biotype"));
            consequenceTypeTemplate.setProteinSubstitutionScores(null);
            miRnaInfo = null;

            if (transcriptStrand.equals("+")) {
                if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed
                    consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                            consequenceTypeTemplate.getEnsemblGeneId(),
                            consequenceTypeTemplate.getEnsemblTranscriptId(),
                            consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                            Collections.singletonList("transcript_ablation")));
                } else {
                    // Check variant overlaps transcript start/end coordinates
                    if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd)
                            && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt
                                    variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt
                        if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion
                            SoNames.add("feature_truncation");
                        }
                        switch (transcriptBiotype) {
                        /**
                         * Coding biotypes
                         */
                        case 30:
                            SoNames.add("NMD_transcript_variant");
                        case 1:
                        case 3:
                        case 4:
                        case 6:
                        case 10: // TR_C_gene
                        case 11: // TR_D_gene
                        case 12: // TR_J_gene
                        case 14: // TR_V_gene
                        case 20:
                        case 23: // protein_coding
                        case 34: // non_stop_decay
                        case 36:
                        case 50: // translated_unprocessed_pseudogene
                        case 51: // LRG_gene
                            solveCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength,
                                    transcriptFlags, consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(),
                                    consequenceTypeTemplate.getCdsPosition(),
                                    consequenceTypeTemplate.getAaPosition(),
                                    consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(),
                                    consequenceTypeTemplate.getProteinSubstitutionScores(),
                                    new ArrayList<>(SoNames)));
                            break;
                        /**
                         * pseudogenes, antisense should not be annotated as non-coding genes
                         */
                        case 39:
                        case 40:
                        case 41:
                        case 42:
                        case 43:
                        case 44:
                        case 49:
                            solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, null, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        /**
                         * Non-coding biotypes
                         */
                        case 18: // miRNA
                            miRnaInfo = (BasicDBObject) geneInfo.get("mirna");
                        case 2: //
                        case 5: //
                        case 7: // IG_V_pseudogene
                        case 13:
                        case 15:
                        case 0: // 3prime_overlapping_ncrna
                        case 16: // antisense  TODO: move to coding?
                        case 17: // lincRNA
                        case 19:
                        case 21: // processed_pseudogene
                        case 22: // processed_transcript
                        case 24: // pseudogene
                        case 25:
                        case 26: // sense_intronic
                        case 27: // sense_overlapping
                        case 28:
                        case 29:
                        case 31: // unprocessed_pseudogene
                        case 32: // transcribed_unprocessed_pseudogene
                        case 33: // retained_intron
                        case 35: // unitary_pseudogene
                        case 37: // transcribed_processed_pseudogene
                        case 38:
                        case 45:
                        case 46:
                        case 47:
                        case 48:
                            solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        }
                    } else {
                        solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart,
                                variantEnd, "upstream_gene_variant", "downstream_gene_variant");
                        if (SoNames.size() > 0) { // Variant does not overlap gene region, just may have upstream/downstream annotations
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    new ArrayList<>(SoNames)));
                        }
                    }
                }
            } else {
                if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed
                    consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                            consequenceTypeTemplate.getEnsemblGeneId(),
                            consequenceTypeTemplate.getEnsemblTranscriptId(),
                            consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                            Collections.singletonList("transcript_ablation")));
                } else {
                    // Check overlaps transcript start/end coordinates
                    if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd)
                            && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt
                                    variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt
                        if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion
                            SoNames.add("feature_truncation");
                        }
                        switch (transcriptBiotype) {
                        /**
                         * Coding biotypes
                         */
                        case 30:
                            SoNames.add("NMD_transcript_variant");
                        case 1:
                        case 3:
                        case 4:
                        case 6:
                        case 10: // TR_C_gene
                        case 11: // TR_D_gene
                        case 12: // TR_J_gene
                        case 14: // TR_V_gene
                        case 20:
                        case 23:
                        case 34: // non_stop_decay
                        case 36:
                        case 50: // translated_unprocessed_pseudogene
                        case 51: // LRG_gene
                            solveCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength,
                                    transcriptFlags, consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(),
                                    consequenceTypeTemplate.getCdsPosition(),
                                    consequenceTypeTemplate.getAaPosition(),
                                    consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(),
                                    consequenceTypeTemplate.getProteinSubstitutionScores(),
                                    new ArrayList<>(SoNames)));
                            break;
                        /**
                         * pseudogenes, antisense should not be annotated as non-coding genes
                         */
                        case 39:
                        case 40:
                        case 41:
                        case 42:
                        case 43:
                        case 44:
                        case 49:
                            solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, null, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        /**
                         * Non-coding biotypes
                         */
                        case 18: // miRNA
                            miRnaInfo = (BasicDBObject) geneInfo.get("mirna");
                        case 2: //
                        case 5: //
                        case 7: // IG_V_pseudogene
                        case 13:
                        case 15:
                        case 0: // 3prime_overlapping_ncrna
                        case 17: // lincRNA
                        case 16: // antisense  TODO: move to coding?
                        case 19:
                        case 21: // processed_pseudogene
                        case 22: // processed_transcript
                        case 24: // pseudogene
                        case 25:
                        case 26: // sense_intronic
                        case 27: // sense_overlapping
                        case 28:
                        case 29:
                        case 31: // unprocessed_pseudogene
                        case 32: // transcribed_unprocessed_pseudogen
                        case 33: // retained_intron
                        case 35: // unitary_pseudogene
                        case 37: // transcribed_processed_pseudogene
                        case 38:
                        case 45:
                        case 46:
                        case 47:
                        case 48:
                            solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo,
                                    transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd,
                                    consequenceTypeTemplate);
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames)));
                            break;
                        }
                    } else {
                        solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart,
                                variantEnd, "downstream_gene_variant", "upstream_gene_variant");
                        if (SoNames.size() > 0) { // Variant does not overlap gene region, just has upstream/downstream annotations
                            consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(),
                                    consequenceTypeTemplate.getEnsemblGeneId(),
                                    consequenceTypeTemplate.getEnsemblTranscriptId(),
                                    consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(),
                                    new ArrayList<>(SoNames)));
                        }
                    }
                }
            }
        }
    }

    if (consequenceTypeList.size() == 0) {
        consequenceTypeList.add(new ConsequenceType("intergenic_variant"));
    }

    LinkedList regulatoryInfoList = (LinkedList) regulatoryQueryResult.getResult();
    //        BasicDBList regulatoryInfoList = (BasicDBList) regulatoryQueryResult.getResult();
    if (!regulatoryInfoList.isEmpty()) {
        consequenceTypeList.add(new ConsequenceType("regulatory_region_variant"));
        i = 0;
        do {
            regulatoryInfo = (BasicDBObject) regulatoryInfoList.get(i);
            regulatoryType = (String) regulatoryInfo.get("featureType");
            TFBSFound = regulatoryType.equals("TF_binding_site")
                    || regulatoryType.equals("TF_binding_site_motif");
            i++;
        } while (i < regulatoryInfoList.size() && !TFBSFound);
        if (TFBSFound) {
            consequenceTypeList.add(new ConsequenceType("TF_binding_site_variant"));
        }
    } else {
        int b;
        b = 1;
    }

    //        if(transcriptInfoList == null) {
    //            consequenceTypeList.add(new ConsequenceType("intergenic_variant"));
    //        }

    //        consequenceTypeList = filterConsequenceTypesBySoTerms(consequenceTypeList, options.getAsStringList("so"));
    // setting queryResult fields
    queryResult.setId(variant.toString());
    queryResult.setDbTime(Long.valueOf(dbTimeEnd - dbTimeStart).intValue());
    queryResult.setNumResults(consequenceTypeList.size());
    queryResult.setResult(consequenceTypeList);

    return queryResult;
}

From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java

License:Apache License

private void solveCodingPositiveTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames,
        BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, Integer variantStart,
        Integer variantEnd, Integer cdsLength, BasicDBList transcriptFlags,
        ConsequenceType consequenceTypeTemplate) {
    Integer genomicCodingStart;//from  w w  w . j a  v a 2 s  .c o  m
    Integer genomicCodingEnd;
    Integer cdnaCodingStart;
    Integer cdnaCodingEnd;
    BasicDBList exonInfoList;
    BasicDBObject exonInfo;
    Integer exonStart;
    Integer exonEnd;
    String transcriptSequence;
    Boolean variantAhead;
    Integer cdnaExonEnd;
    Integer cdnaVariantStart;
    Integer cdnaVariantEnd;
    Boolean splicing;
    int exonCounter;
    int firstCdsPhase = -1;
    Integer prevSpliceSite;
    Boolean[] junctionSolution = { false, false };

    genomicCodingStart = (Integer) transcriptInfo.get("genomicCodingStart");
    genomicCodingEnd = (Integer) transcriptInfo.get("genomicCodingEnd");
    cdnaCodingStart = (Integer) transcriptInfo.get("cdnaCodingStart");
    cdnaCodingEnd = (Integer) transcriptInfo.get("cdnaCodingEnd");
    exonInfoList = (BasicDBList) transcriptInfo.get("exons");
    exonInfo = (BasicDBObject) exonInfoList.get(0);
    exonStart = (Integer) exonInfo.get("start");
    exonEnd = (Integer) exonInfo.get("end");
    transcriptSequence = (String) exonInfo.get("sequence");
    variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed
    cdnaExonEnd = (exonEnd - exonStart + 1);
    cdnaVariantStart = null;
    cdnaVariantEnd = null;
    junctionSolution[0] = false;
    junctionSolution[1] = false;
    splicing = false;

    if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) {
        firstCdsPhase = (int) exonInfo.get("phase");
    }
    if (variantStart >= exonStart) {
        if (variantStart <= exonEnd) { // Variant start within the exon
            cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart);
            consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart);
            if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon  ----||||S|||||E||||----
                cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
            }
        }
    } else {
        if (variantEnd <= exonEnd) {
            //                                if(variantEnd >= exonStart) {  // Only variant end within the exon  ----||||||||||E||||----
            // We do not contemplate that variant end can be located before this exon since this is the first exon
            cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
            //                                }
        } // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon
    }

    exonCounter = 1;
    while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction  until
        //        while(exonCounter<exonInfoList.size() && !splicing && variantAhead) {  // This is not a do-while since we cannot call solveJunction  until
        exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded
        exonStart = (Integer) exonInfo.get("start");
        prevSpliceSite = exonEnd + 1;
        exonEnd = (Integer) exonInfo.get("end");
        transcriptSequence = transcriptSequence + ((String) exonInfo.get("sequence"));
        if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) { // Set firsCdsPhase only when the first coding exon is reached
            firstCdsPhase = (int) exonInfo.get("phase");
        }
        solveJunction(isInsertion, prevSpliceSite, exonStart - 1, variantStart, variantEnd, SoNames,
                "splice_donor_variant", "splice_acceptor_variant", junctionSolution);
        splicing = (splicing || junctionSolution[0]);

        if (variantStart >= exonStart) {
            cdnaExonEnd += (exonEnd - exonStart + 1);
            if (variantStart <= exonEnd) { // Variant start within the exon
                cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart);
                consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart);
                if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon  ----||||S|||||E||||----
                    cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
                }
            }
        } else {
            if (variantEnd <= exonEnd) {
                if (variantEnd >= exonStart) { // Only variant end within the exon  ----||||||||||E||||----
                    cdnaExonEnd += (exonEnd - exonStart + 1);
                    cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd);
                } else { // Variant does not include this exon, variant is located before this exon
                    variantAhead = false;
                }
            } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon
                cdnaExonEnd += (exonEnd - exonStart + 1);
            }
        }
        exonCounter++;
    }
    // Is not intron variant (both ends fall within the same intron)
    if (!junctionSolution[1]) {
        if (isInsertion) {
            if (cdnaVariantStart == null && cdnaVariantEnd != null) { // To account for those insertions in the 3' end of an intron
                cdnaVariantStart = cdnaVariantEnd - 1;
            } else if (cdnaVariantEnd == null && cdnaVariantStart != null) { // To account for those insertions in the 5' end of an intron
                cdnaVariantEnd = cdnaVariantStart + 1;
            }
        }
        solveCodingPositiveTranscriptEffect(splicing, transcriptSequence, transcriptStart, transcriptEnd,
                genomicCodingStart, genomicCodingEnd, variantStart, variantEnd, cdnaCodingStart, cdnaCodingEnd,
                cdnaVariantStart, cdnaVariantEnd, // Be careful, originalVariantStart is used here!
                cdsLength, transcriptFlags, firstCdsPhase, variant.getReference(), variant.getAlternative(),
                SoNames, consequenceTypeTemplate);
    }
}