List of usage examples for com.mongodb BasicDBObject get
public Object get(final String key)
From source file:org.opencb.cellbase.mongodb.db.ConservationMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllScoresByRegionList(List<Region> regions, QueryOptions options) { //TODO not finished yet List<DBObject> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regions.size()); List<Integer> integerChunkIds; for (Region region : regions) { integerChunkIds = new ArrayList<>(); // positions below 1 are not allowed if (region.getStart() < 1) { region.setStart(1);/*from w w w . j av a 2s .c o m*/ } if (region.getEnd() < 1) { region.setEnd(1); } /****/ QueryBuilder builder; int regionChunkStart = getChunkId(region.getStart(), this.chunkSize); int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize); if (regionChunkStart == regionChunkEnd) { builder = QueryBuilder.start("_chunkIds") .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), chunkSize)); } else { // for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) { // integerChunkIds.add(chunkId); // } // // QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds); // builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds); builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } /****/ queries.add(builder.get()); ids.add(region.toString()); logger.info(builder.get().toString()); } List<QueryResult> queryResults = executeQueryList2(ids, queries, options); // List<QueryResult> queryResults = executeQueryList(ids, queries, options); for (int i = 0; i < regions.size(); i++) { Region region = regions.get(i); QueryResult queryResult = queryResults.get(i); List<BasicDBObject> list = (List<BasicDBObject>) queryResult.getResult(); Map<String, List<Float>> typeMap = new HashMap(); // int start = region.getStart(); for (int j = 0; j < list.size(); j++) { BasicDBObject chunk = (BasicDBObject) list.get(j); String type = chunk.getString("type"); List<Float> valuesList; if (!typeMap.containsKey(type)) { valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1); for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) { valuesList.add(null); } typeMap.put(type, valuesList); } else { valuesList = typeMap.get(type); } BasicDBList valuesChunk = (BasicDBList) chunk.get("values"); int pos = 0; if (region.getStart() > chunk.getInt("start")) { pos = region.getStart() - chunk.getInt("start"); } for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) { valuesList.set(pos + chunk.getInt("start") - region.getStart(), new Float((Double) valuesChunk.get(pos))); } } BasicDBList resultList = new BasicDBList(); for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) { for (Float value : elem.getValue()) { resultList.add(value != null ? (new Score(new Double(value), elem.getKey())) : null); } } queryResult.setResult(resultList); } return queryResults; }
From source file:org.opencb.cellbase.mongodb.db.core.ConservationMongoDBAdaptor.java
License:Apache License
@Deprecated @Override// w w w. j av a 2 s . c om public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) { //TODO not finished yet List<DBObject> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regions.size()); List<String> integerChunkIds; for (Region region : regions) { integerChunkIds = new ArrayList<>(); // positions below 1 are not allowed if (region.getStart() < 1) { region.setStart(1); } if (region.getEnd() < 1) { region.setEnd(1); } // Max region size is 10000bp if (region.getEnd() - region.getStart() > 10000) { region.setEnd(region.getStart() + 10000); } QueryBuilder builder; int regionChunkStart = getChunkId(region.getStart(), this.chunkSize); int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize); if (regionChunkStart == regionChunkEnd) { builder = QueryBuilder.start("_chunkIds") .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), this.chunkSize)); } else { // for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) { //// integerChunkIds.add(chunkId); // integerChunkIds.add(region.getChromosome() + "_" + chunkId + "_" + this.chunkSize/1000 + "k"); // } // builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds); builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } // QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds); /****/ queries.add(builder.get()); ids.add(region.toString()); logger.debug(builder.get().toString()); } List<QueryResult> queryResults = executeQueryList2(ids, queries, options); for (int i = 0; i < regions.size(); i++) { Region region = regions.get(i); QueryResult queryResult = queryResults.get(i); // BasicDBList list = (BasicDBList) queryResult.getResult(); List list = queryResult.getResult(); Map<String, List<Float>> typeMap = new HashMap(); // int start = region.getStart(); for (int j = 0; j < list.size(); j++) { BasicDBObject chunk = (BasicDBObject) list.get(j); String type = chunk.getString("type"); List<Float> valuesList; if (!typeMap.containsKey(type)) { valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1); for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) { valuesList.add(null); } typeMap.put(type, valuesList); } else { valuesList = typeMap.get(type); } BasicDBList valuesChunk = (BasicDBList) chunk.get("values"); int pos = 0; if (region.getStart() > chunk.getInt("start")) { pos = region.getStart() - chunk.getInt("start"); } for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) { // System.out.println("valuesList SIZE = " + valuesList.size()); // System.out.println("pos = " + pos); // System.out.println("DIV " + (chunk.getInt("start") - region.getStart())); // System.out.println("valuesChunk = " + valuesChunk.get(pos)); // System.out.println("indexFinal = " + (pos + chunk.getInt("start") - region.getStart())); valuesList.set(pos + chunk.getInt("start") - region.getStart(), new Float((Double) valuesChunk.get(pos))); } } // BasicDBList resultList = new BasicDBList(); ConservedRegionFeature conservedRegionChunk; for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) { conservedRegionChunk = new ConservedRegionFeature(region.getChromosome(), region.getStart(), region.getEnd(), elem.getKey(), elem.getValue()); resultList.add(conservedRegionChunk); } queryResult.setResult(resultList); } return queryResults; }
From source file:org.opencb.cellbase.mongodb.db.core.ConservationMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllScoresByRegionList(List<Region> regions, QueryOptions options) { //TODO not finished yet List<DBObject> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regions.size()); List<Integer> integerChunkIds; for (Region region : regions) { integerChunkIds = new ArrayList<>(); // positions below 1 are not allowed if (region.getStart() < 1) { region.setStart(1);//from w ww .j av a 2 s.c o m } if (region.getEnd() < 1) { region.setEnd(1); } /****/ QueryBuilder builder; int regionChunkStart = getChunkId(region.getStart(), this.chunkSize); int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize); if (regionChunkStart == regionChunkEnd) { builder = QueryBuilder.start("_chunkIds") .is(getChunkIdPrefix(region.getChromosome(), region.getStart(), chunkSize)); } else { // for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) { // integerChunkIds.add(chunkId); // } // // QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(hunkIds); // builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("chunkId").in(integerChunkIds); builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } /****/ queries.add(builder.get()); ids.add(region.toString()); logger.debug(builder.get().toString()); } List<QueryResult> queryResults = executeQueryList2(ids, queries, options); // List<QueryResult> queryResults = executeQueryList(ids, queries, options); for (int i = 0; i < regions.size(); i++) { Region region = regions.get(i); QueryResult queryResult = queryResults.get(i); List<BasicDBObject> list = (List<BasicDBObject>) queryResult.getResult(); Map<String, List<Float>> typeMap = new HashMap(); // int start = region.getStart(); for (int j = 0; j < list.size(); j++) { BasicDBObject chunk = (BasicDBObject) list.get(j); String type = chunk.getString("type"); List<Float> valuesList; if (!typeMap.containsKey(type)) { valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1); for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) { valuesList.add(null); } typeMap.put(type, valuesList); } else { valuesList = typeMap.get(type); } BasicDBList valuesChunk = (BasicDBList) chunk.get("values"); int pos = 0; if (region.getStart() > chunk.getInt("start")) { pos = region.getStart() - chunk.getInt("start"); } for (; pos < valuesChunk.size() && (pos + chunk.getInt("start") <= region.getEnd()); pos++) { valuesList.set(pos + chunk.getInt("start") - region.getStart(), new Float((Double) valuesChunk.get(pos))); } } BasicDBList resultList = new BasicDBList(); for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) { for (Float value : elem.getValue()) { if (value != null) { resultList.add(new Score(new Double(value), elem.getKey())); } } } if (!resultList.isEmpty()) { queryResult.setResult(resultList); } else { queryResult.setResult(null); } } return queryResults; }
From source file:org.opencb.cellbase.mongodb.db.core.GenomeMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllSequencesByRegionList(List<Region> regions, QueryOptions options) { /****///w w w. j a v a 2 s. co m String chunkIdSuffix = this.chunkSize / 1000 + "k"; /****/ List<DBObject> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regions.size()); List<String> chunkIds; List<Integer> integerChunkIds; for (Region region : regions) { chunkIds = new ArrayList<>(); integerChunkIds = new ArrayList<>(); // positions below 1 are not allowed if (region.getStart() < 1) { region.setStart(1); } if (region.getEnd() < 1) { region.setEnd(1); } /****/ int regionChunkStart = getChunkId(region.getStart(), this.chunkSize); int regionChunkEnd = getChunkId(region.getEnd(), this.chunkSize); for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) { String chunkIdStr = region.getChromosome() + "_" + chunkId + "_" + chunkIdSuffix; chunkIds.add(chunkIdStr); integerChunkIds.add(chunkId); } // QueryBuilder builder = QueryBuilder.start("sequenceName").is(region.getChromosome()).and("_chunkIds").in(chunkIds); QueryBuilder builder = QueryBuilder.start("_chunkIds").in(chunkIds); /****/ queries.add(builder.get()); ids.add(region.toString()); logger.info(builder.get().toString()); } List<QueryResult> queryResults = executeQueryList2(ids, queries, options, genomeSequenceCollection); for (int i = 0; i < regions.size(); i++) { Region region = regions.get(i); QueryResult queryResult = queryResults.get(i); List list = queryResult.getResult(); StringBuilder sb = new StringBuilder(); for (int j = 0; j < list.size(); j++) { BasicDBObject chunk = (BasicDBObject) list.get(j); sb.append(chunk.get("sequence")); } int startStr = getOffset(region.getStart()); int endStr = getOffset(region.getStart()) + (region.getEnd() - region.getStart()) + 1; String subStr = ""; if (getChunkId(region.getStart(), this.chunkSize) > 0) { if (sb.toString().length() > 0 && sb.toString().length() >= endStr) { subStr = sb.toString().substring(startStr, endStr); } } else { if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) { subStr = sb.toString().substring(startStr - 1, endStr - 1); } } logger.info("((BasicDBObject)list.get(0)).getString(\"sequenceType\") = {}", ((BasicDBObject) list.get(0)).getString("sequenceType")); logger.info("((BasicDBObject)list.get(0)).getString(\"assembly\") = {}", ((BasicDBObject) list.get(0)).getString("assembly")); GenomeSequenceFeature genomeSequenceFeature = new GenomeSequenceFeature(region.getChromosome(), region.getStart(), region.getEnd(), 1, ((BasicDBObject) list.get(0)).getString("sequenceType"), ((BasicDBObject) list.get(0)).getString("assembly"), subStr); // GenomeSequenceChunk genomeSequenceChunk = new GenomeSequenceChunk(region.getSequenceName(), region.getStart(), region.getEnd(), subStr); queryResult.setResult(Arrays.asList(genomeSequenceFeature)); } return queryResults; }
From source file:org.opencb.cellbase.mongodb.db.GenomeSequenceMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) { /****///ww w.ja va 2s. com String chunkIdSuffix = this.chunkSize / 1000 + "k"; /****/ List<DBObject> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regions.size()); List<String> chunkIds; List<Integer> integerChunkIds; for (Region region : regions) { chunkIds = new ArrayList<>(); integerChunkIds = new ArrayList<>(); // positions below 1 are not allowed if (region.getStart() < 1) { region.setStart(1); } if (region.getEnd() < 1) { region.setEnd(1); } /****/ int regionChunkStart = getChunk(region.getStart()); int regionChunkEnd = getChunk(region.getEnd()); for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) { String chunkIdStr = region.getChromosome() + "_" + chunkId + "_" + chunkIdSuffix; chunkIds.add(chunkIdStr); integerChunkIds.add(chunkId); } // QueryBuilder builder = QueryBuilder.start("sequenceName").is(region.getChromosome()).and("_chunkIds").in(chunkIds); QueryBuilder builder = QueryBuilder.start("_chunkIds").in(chunkIds); /****/ queries.add(builder.get()); ids.add(region.toString()); logger.info(builder.get().toString()); } List<QueryResult> queryResults = executeQueryList2(ids, queries, options); for (int i = 0; i < regions.size(); i++) { Region region = regions.get(i); QueryResult queryResult = queryResults.get(i); List list = queryResult.getResult(); StringBuilder sb = new StringBuilder(); for (int j = 0; j < list.size(); j++) { BasicDBObject chunk = (BasicDBObject) list.get(j); sb.append(chunk.get("sequence")); } int startStr = getOffset(region.getStart()); int endStr = getOffset(region.getStart()) + (region.getEnd() - region.getStart()) + 1; String subStr = ""; if (getChunk(region.getStart()) > 0) { if (sb.toString().length() > 0 && sb.toString().length() >= endStr) { subStr = sb.toString().substring(startStr, endStr); } } else { if (sb.toString().length() > 0 && sb.toString().length() + 1 >= endStr) { subStr = sb.toString().substring(startStr - 1, endStr - 1); } } GenomeSequenceFeature genomeSequenceFeature = new GenomeSequenceFeature(region.getChromosome(), region.getStart(), region.getEnd(), 1, ((BasicDBObject) list.get(0)).getString("sequenceType"), ((BasicDBObject) list.get(0)).getString("assembly"), subStr); // GenomeSequenceChunk genomeSequenceChunk = new GenomeSequenceChunk(region.getSequenceName(), region.getStart(), region.getEnd(), subStr); queryResult.setResult(Arrays.asList(genomeSequenceFeature)); } return queryResults; }
From source file:org.opencb.cellbase.mongodb.db.regulatory.TfbsMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllByTargetGeneIdList(List<String> targetGeneIdList, QueryOptions options) { // DBCollection coreMongoDBCollection = db.getCollection("gene"); List<DBObject[]> commandList = new ArrayList<>(); for (String targetGeneId : targetGeneIdList) { DBObject[] commands = new DBObject[3]; DBObject match = new BasicDBObject("$match", new BasicDBObject("transcripts.xrefs.id", targetGeneId)); DBObject unwind = new BasicDBObject("$unwind", "$transcripts"); BasicDBObject projectObj = new BasicDBObject("_id", 0); projectObj.append("transcripts.id", 1); projectObj.append("transcripts.tfbs", 1); DBObject project = new BasicDBObject("$project", projectObj); commands[0] = match;//from w ww. j a va 2s .co m commands[1] = unwind; commands[2] = project; commandList.add(commands); } // List<QueryResult> queryResults = executeAggregationList(targetGeneIdList, commandList, options, coreMongoDBCollection); List<QueryResult> queryResults = new ArrayList<>(); for (int i = 0; i < targetGeneIdList.size(); i++) { String targetGeneId = targetGeneIdList.get(0); // QueryResult queryResult = queryResults.get(0); QueryResult queryResult = new QueryResult(); BasicDBList list = (BasicDBList) queryResult.getResult(); for (int j = 0; j < list.size(); j++) { BasicDBObject gene = (BasicDBObject) list.get(j); BasicDBObject transcript = (BasicDBObject) gene.get("transcripts"); String transcriptId = transcript.getString("id"); if (transcriptId.toUpperCase().equals(targetGeneId)) { BasicDBList tfbs = (BasicDBList) transcript.get("tfbs"); queryResult.setResult(tfbs); break; } } } return queryResults; }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solvePositiveCodingEffect(Boolean splicing, String transcriptSequence, Integer transcriptEnd, Integer genomicCodingEnd, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart, Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt, HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) { Boolean codingAnnotationAdded = false; // This will indicate wether it is needed to add the "coding_sequence_variant" annotation or not if (variantAlt.equals("-")) { // Deletion if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); codingAnnotationAdded = true; }//from w w w .j a v a 2 s .c o m if (cdnaVariantEnd != null) { int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3; Boolean stopToSolve = true; if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null codingAnnotationAdded = true; if (variantRef.length() % 3 == 0) { SoNames.add("inframe_deletion"); } else { SoNames.add("frameshift_variant"); } stopToSolve = false; // Stop codon annotation will be solved in the line below. solveStopCodonPositiveDeletion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, cdnaVariantEnd, SoNames); } if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) { if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) { if (finalNtPhase != 2) { SoNames.add("incomplete_terminal_codon_variant"); } } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above SoNames.add("stop_lost"); } } } } else { if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion. Be careful: insertion coordinates are special, alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd codingAnnotationAdded = true; if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); } int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase)) && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); } if (variantAlt.length() % 3 == 0) { SoNames.add("inframe_insertion"); } else { SoNames.add("frameshift_variant"); } solveStopCodonPositiveInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, variantAlt, SoNames); // if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0 // if (cdnaVariantStart != null && cdnaVariantStart > (cdnaCodingEnd - 3)) { // -3 because alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd // char[] modifiedCodonArray = solveStopCodonPositiveInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, variantAlt); // if(isStopCodon(String.valueOf(modifiedCodonArray))) { // SoNames.add("stop_retained_variant"); // } else { // SoNames.add("stop_lost"); // } // } // } else { // Be careful, strict > since this is a insertion, inserted nts are pasted on the left of cdnaVariantStart // } } else { // SNV if (cdnaVariantStart != null) { int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if (!splicing) { if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase)) && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); // If not, avoid calculating reference/modified codon } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0 Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3; int modifiedCodonStart = cdnaVariantStart - variantPhaseShift; String referenceCodon = transcriptSequence.substring(modifiedCodonStart - 1, modifiedCodonStart + 2); // -1 and +2 because of base 0 String indexing char[] modifiedCodonArray = referenceCodon.toCharArray(); modifiedCodonArray[variantPhaseShift] = variantAlt.toCharArray()[0]; codingAnnotationAdded = true; String referenceA = codonToA.get(referenceCodon); String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray)); if (isSynonymousCodon.get(referenceCodon).get(String.valueOf(modifiedCodonArray))) { if (isStopCodon(referenceCodon)) { SoNames.add("stop_retained_variant"); } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant SoNames.add("synonymous_variant"); } } else { if (cdnaVariantStart < (cdnaCodingStart + 3)) { SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below if (isStopCodon(String.valueOf(modifiedCodonArray))) { SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below } } else if (isStopCodon(String.valueOf(referenceCodon))) { SoNames.add("stop_lost"); } else { SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained" : "missense_variant"); } if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2 QueryResult proteinSubstitutionScoresQueryResult = proteinFunctionPredictorDBAdaptor .getByAaChange(consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getAaPosition(), alternativeA, new QueryOptions()); if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) { BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult .getResult().get(0); if (proteinSubstitutionScores.get("ss") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ss")), "Sift", siftDescriptions.get(proteinSubstitutionScores.get("se")))); } if (proteinSubstitutionScores.get("ps") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ps")), "Polyphen", polyphenDescriptions.get(proteinSubstitutionScores.get("pe")))); } } } } // Set consequenceTypeTemplate.aChange consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA); // Set consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters char[] referenceCodonArray = referenceCodon.toLowerCase().toCharArray(); referenceCodonArray[variantPhaseShift] = Character .toUpperCase(referenceCodonArray[variantPhaseShift]); modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray(); modifiedCodonArray[variantPhaseShift] = Character .toUpperCase(modifiedCodonArray[variantPhaseShift]); consequenceTypeTemplate.setCodon( String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray)); } } } } } if (!codingAnnotationAdded) { SoNames.add("coding_sequence_variant"); } }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveNegativeCodingEffect(Boolean splicing, String transcriptSequence, Integer transcriptStart, Integer genomicCodingStart, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart, Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt, HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) { Boolean codingAnnotationAdded = false; if (variantAlt.equals("-")) { // Deletion if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); codingAnnotationAdded = true; }//from w ww . j a va 2 s .co m if (cdnaVariantEnd != null) { int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3; Boolean stopToSolve = true; if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null codingAnnotationAdded = true; if (variantRef.length() % 3 == 0) { SoNames.add("inframe_deletion"); } else { SoNames.add("frameshift_variant"); } stopToSolve = false; // Stop codon annotation will be solved in the line below. solveStopCodonNegativeDeletion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, cdnaVariantEnd, SoNames); } if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) { if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) { if (finalNtPhase != 2) { SoNames.add("incomplete_terminal_codon_variant"); } } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above SoNames.add("stop_lost"); } } } } else { if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion TODO: I've seen insertions within Cellbase-mongo with a ref != - codingAnnotationAdded = true; if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); } int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase)) && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); } if (variantAlt.length() % 3 == 0) { SoNames.add("inframe_insertion"); } else { SoNames.add("frameshift_variant"); } solveStopCodonNegativeInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantEnd, variantAlt, SoNames); // Be careful, cdnaVariantEnd is being used in this case!!! // if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0 // if (cdnaVariantEnd != null && cdnaVariantEnd > (cdnaCodingEnd - 3)) { // -3 because alternative nts are pasted on the left of >>>genomic<<<VariantStart // char[] modifiedCodonArray = solveStopCodonNegativeInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantEnd, variantAlt); // Be careful, cdnaVariantEnd is being used in this case!!! // if(isStopCodon(String.valueOf(modifiedCodonArray))) { // SoNames.add("stop_retained_variant"); // } else { // SoNames.add("stop_lost"); // } // } // } else { // } // if(cdnaVariantStart != null) { // if(!splicing && cdnaVariantStart != null) { // } } else { // SNV if (cdnaVariantStart != null) { int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if (!splicing) { if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase)) && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); // If that is the case and variant ocurs in the last complete/incomplete codon, no coding prediction is needed } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0 Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3; int modifiedCodonStart = cdnaVariantStart - variantPhaseShift; String reverseCodon = new StringBuilder(transcriptSequence.substring( transcriptSequence.length() - modifiedCodonStart - 2, transcriptSequence.length() - modifiedCodonStart + 1)).reverse().toString(); // Rigth limit of the substring sums +1 because substring does not include that position char[] referenceCodon = reverseCodon.toCharArray(); referenceCodon[0] = complementaryNt.get(referenceCodon[0]); referenceCodon[1] = complementaryNt.get(referenceCodon[1]); referenceCodon[2] = complementaryNt.get(referenceCodon[2]); char[] modifiedCodonArray = referenceCodon.clone(); modifiedCodonArray[variantPhaseShift] = complementaryNt .get(variantAlt.toCharArray()[0]); codingAnnotationAdded = true; String referenceA = codonToA.get(String.valueOf(referenceCodon)); String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray)); if (isSynonymousCodon.get(String.valueOf(referenceCodon)) .get(String.valueOf(modifiedCodonArray))) { if (isStopCodon(String.valueOf(referenceCodon))) { SoNames.add("stop_retained_variant"); } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant SoNames.add("synonymous_variant"); } } else { if (cdnaVariantStart < (cdnaCodingStart + 3)) { SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below if (isStopCodon(String.valueOf(modifiedCodonArray))) { SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below } } else if (isStopCodon(String.valueOf(referenceCodon))) { SoNames.add("stop_lost"); } else { SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained" : "missense_variant"); } if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2 QueryResult proteinSubstitutionScoresQueryResult = proteinFunctionPredictorDBAdaptor .getByAaChange(consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getAaPosition(), alternativeA, new QueryOptions()); if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) { BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult .getResult().get(0); if (proteinSubstitutionScores.get("ss") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ss")), "Sift", siftDescriptions.get(proteinSubstitutionScores.get("se")))); } if (proteinSubstitutionScores.get("ps") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ps")), "Polyphen", polyphenDescriptions.get(proteinSubstitutionScores.get("pe")))); } } } } // Set consequenceTypeTemplate.aChange consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA); // Fill consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters char[] referenceCodonArray = String.valueOf(referenceCodon).toLowerCase().toCharArray(); referenceCodonArray[variantPhaseShift] = Character .toUpperCase(referenceCodonArray[variantPhaseShift]); modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray(); modifiedCodonArray[variantPhaseShift] = Character .toUpperCase(modifiedCodonArray[variantPhaseShift]); consequenceTypeTemplate.setCodon( String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray)); } } } } } if (!codingAnnotationAdded) { SoNames.add("coding_sequence_variant"); } }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
@Override public QueryResult getAllConsequenceTypesByVariant(GenomicVariant variant, QueryOptions options) { Logger logger = LoggerFactory.getLogger(this.getClass()); HashSet<String> SoNames = new HashSet<>(); List<ConsequenceType> consequenceTypeList = new ArrayList<>(); QueryResult queryResult = new QueryResult(); QueryBuilder builderGene = null;/*from w w w .ja v a 2s .co m*/ QueryBuilder builderRegulatory = null; BasicDBList transcriptInfoList = null; BasicDBList exonInfoList; BasicDBObject miRnaInfo; BasicDBObject transcriptInfo, exonInfo; BasicDBObject geneInfo; BasicDBObject regulatoryInfo; Integer geneStart, geneEnd, transcriptStart, transcriptEnd, exonStart, exonEnd, genomicCodingStart, genomicCodingEnd; Integer cdnaCodingStart, cdnaCodingEnd, cdnaExonStart, cdnaExonEnd, cdnaVariantStart, cdnaVariantEnd, prevSpliceSite; Integer regulatoryStart, regulatoryEnd, cdsLength; Integer variantStart; Integer variantEnd; String geneStrand, transcriptStrand, exonSequence, transcriptSequence; String regulatoryChromosome, regulatoryType; String nextCodonNucleotides = ""; String ensemblTranscriptId; String geneName; String ensemblGeneId; int transcriptBiotype; long dbTimeStart, dbTimeEnd; Boolean splicing, coding, exonsRemain, variantAhead, exonVariant, TFBSFound; int exonCounter, i; ConsequenceType consequenceTypeTemplate = new ConsequenceType(); variantEnd = variant.getPosition() + variant.getReference().length() - 1; //TODO: Check deletion input format to ensure that variantEnd is correctly calculated Boolean isInsertion = variant.getReference().equals("-"); if (isInsertion) { variantStart = variant.getPosition() - 1; } else { variantStart = variant.getPosition(); } // builderGene = QueryBuilder.start("chromosome").is(variant.getChromosome()).and("end") // .greaterThanEquals(variant.getPosition() - 5000).and("start").lessThanEquals(variantEnd + 5000); // variantEnd is used rather than variant.getPosition() to account for deletions which end falls within the 5kb left area of the gene // Get all regulatory regions surrounding the variant // String chunkId = getChunkIdPrefix(variant.getChromosome(), variant.getPosition(), regulatoryRegionChunkSize); // BasicDBList chunksId = new BasicDBList(); // chunksId.add(chunkId); // builderRegulatory = QueryBuilder.start("chunkIds").in(chunksId).and("start").lessThanEquals(variantEnd).and("end") // .greaterThanEquals(variant.getPosition()); // variantEnd is used rather than variant.getPosition() to account for deletions which end falls within the 5kb left area of the gene // Execute query and calculate time // mongoDBCollection = db.getCollection("gene"); dbTimeStart = System.currentTimeMillis(); // QueryResult geneQueryResult = executeQuery(variant.toString(), builderGene.get(), options); QueryOptions geneQueryOptions = new QueryOptions(); geneQueryOptions.add("include", "name,id,transcripts.id,transcripts.start,transcripts.end,transcripts.strand,transcripts.cdsLength,transcripts.annotationFlags,transcripts.biotype,transcripts.genomicCodingStart,transcripts.genomicCodingEnd,transcripts.cdnaCodingStart,transcripts.cdnaCodingEnd,transcripts.exons.start,transcripts.exons.end,transcripts.exons.sequence,transcripts.exons.phase,mirna.matures,mirna.sequence,mirna.matures.cdnaStart,mirna.matures.cdnaEnd"); QueryResult geneQueryResult = geneDBAdaptor.getAllByRegion( new Region(variant.getChromosome(), variantStart - 5000, variantEnd + 5000), geneQueryOptions); // mongoDBCollection = db.getCollection("regulatory_region"); // QueryResult regulatoryQueryResult = executeQuery(variant.toString(), builderRegulatory.get(), options); QueryResult regulatoryQueryResult = regulatoryRegionDBAdaptor .getAllByRegion(new Region(variant.getChromosome(), variantStart, variantEnd), options); dbTimeEnd = System.currentTimeMillis(); LinkedList geneInfoList = (LinkedList) geneQueryResult.getResult(); // BasicDBList geneInfoList = (BasicDBList) geneQueryResult.getResult(); for (Object geneInfoObject : geneInfoList) { geneInfo = (BasicDBObject) geneInfoObject; consequenceTypeTemplate.setGeneName((String) geneInfo.get("name")); consequenceTypeTemplate.setEnsemblGeneId((String) geneInfo.get("id")); transcriptInfoList = (BasicDBList) geneInfo.get("transcripts"); for (Object transcriptInfoObject : transcriptInfoList) { transcriptInfo = (BasicDBObject) transcriptInfoObject; ensemblTranscriptId = (String) transcriptInfo.get("id"); transcriptStart = (Integer) transcriptInfo.get("start"); transcriptEnd = (Integer) transcriptInfo.get("end"); transcriptStrand = (String) transcriptInfo.get("strand"); cdsLength = (Integer) transcriptInfo.get("cdsLength"); BasicDBList transcriptFlags = (BasicDBList) transcriptInfo.get("annotationFlags"); try { transcriptBiotype = biotypes.get((String) transcriptInfo.get("biotype")); } catch (NullPointerException e) { // logger.info("WARNING: biotype not found within the list of hardcoded biotypes - "+transcriptInfo.get("biotype")); // logger.info("WARNING: transcript: "+ensemblTranscriptId); // logger.info("WARNING: setting transcript biotype to non_coding "); transcriptBiotype = 45; } SoNames.clear(); consequenceTypeTemplate.setEnsemblTranscriptId(ensemblTranscriptId); consequenceTypeTemplate.setcDnaPosition(null); consequenceTypeTemplate.setCdsPosition(null); consequenceTypeTemplate.setAaPosition(null); consequenceTypeTemplate.setAaChange(null); consequenceTypeTemplate.setCodon(null); consequenceTypeTemplate.setStrand((String) geneInfo.get("strand")); consequenceTypeTemplate.setBiotype((String) transcriptInfo.get("biotype")); consequenceTypeTemplate.setProteinSubstitutionScores(null); miRnaInfo = null; if (transcriptStrand.equals("+")) { if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), Collections.singletonList("transcript_ablation"))); } else { // Check variant overlaps transcript start/end coordinates if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd) && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion SoNames.add("feature_truncation"); } switch (transcriptBiotype) { /** * Coding biotypes */ case 30: SoNames.add("NMD_transcript_variant"); case 1: case 3: case 4: case 6: case 10: // TR_C_gene case 11: // TR_D_gene case 12: // TR_J_gene case 14: // TR_V_gene case 20: case 23: // protein_coding case 34: // non_stop_decay case 36: case 50: // translated_unprocessed_pseudogene case 51: // LRG_gene solveCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength, transcriptFlags, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), consequenceTypeTemplate.getCdsPosition(), consequenceTypeTemplate.getAaPosition(), consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(), consequenceTypeTemplate.getProteinSubstitutionScores(), new ArrayList<>(SoNames))); break; /** * pseudogenes, antisense should not be annotated as non-coding genes */ case 39: case 40: case 41: case 42: case 43: case 44: case 49: solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, null, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames))); break; /** * Non-coding biotypes */ case 18: // miRNA miRnaInfo = (BasicDBObject) geneInfo.get("mirna"); case 2: // case 5: // case 7: // IG_V_pseudogene case 13: case 15: case 0: // 3prime_overlapping_ncrna case 16: // antisense TODO: move to coding? case 17: // lincRNA case 19: case 21: // processed_pseudogene case 22: // processed_transcript case 24: // pseudogene case 25: case 26: // sense_intronic case 27: // sense_overlapping case 28: case 29: case 31: // unprocessed_pseudogene case 32: // transcribed_unprocessed_pseudogene case 33: // retained_intron case 35: // unitary_pseudogene case 37: // transcribed_processed_pseudogene case 38: case 45: case 46: case 47: case 48: solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames))); break; } } else { solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart, variantEnd, "upstream_gene_variant", "downstream_gene_variant"); if (SoNames.size() > 0) { // Variant does not overlap gene region, just may have upstream/downstream annotations consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), new ArrayList<>(SoNames))); } } } } else { if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), Collections.singletonList("transcript_ablation"))); } else { // Check overlaps transcript start/end coordinates if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd) && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion SoNames.add("feature_truncation"); } switch (transcriptBiotype) { /** * Coding biotypes */ case 30: SoNames.add("NMD_transcript_variant"); case 1: case 3: case 4: case 6: case 10: // TR_C_gene case 11: // TR_D_gene case 12: // TR_J_gene case 14: // TR_V_gene case 20: case 23: case 34: // non_stop_decay case 36: case 50: // translated_unprocessed_pseudogene case 51: // LRG_gene solveCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength, transcriptFlags, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), consequenceTypeTemplate.getCdsPosition(), consequenceTypeTemplate.getAaPosition(), consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(), consequenceTypeTemplate.getProteinSubstitutionScores(), new ArrayList<>(SoNames))); break; /** * pseudogenes, antisense should not be annotated as non-coding genes */ case 39: case 40: case 41: case 42: case 43: case 44: case 49: solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, null, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames))); break; /** * Non-coding biotypes */ case 18: // miRNA miRnaInfo = (BasicDBObject) geneInfo.get("mirna"); case 2: // case 5: // case 7: // IG_V_pseudogene case 13: case 15: case 0: // 3prime_overlapping_ncrna case 17: // lincRNA case 16: // antisense TODO: move to coding? case 19: case 21: // processed_pseudogene case 22: // processed_transcript case 24: // pseudogene case 25: case 26: // sense_intronic case 27: // sense_overlapping case 28: case 29: case 31: // unprocessed_pseudogene case 32: // transcribed_unprocessed_pseudogen case 33: // retained_intron case 35: // unitary_pseudogene case 37: // transcribed_processed_pseudogene case 38: case 45: case 46: case 47: case 48: solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames))); break; } } else { solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart, variantEnd, "downstream_gene_variant", "upstream_gene_variant"); if (SoNames.size() > 0) { // Variant does not overlap gene region, just has upstream/downstream annotations consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), new ArrayList<>(SoNames))); } } } } } } if (consequenceTypeList.size() == 0) { consequenceTypeList.add(new ConsequenceType("intergenic_variant")); } LinkedList regulatoryInfoList = (LinkedList) regulatoryQueryResult.getResult(); // BasicDBList regulatoryInfoList = (BasicDBList) regulatoryQueryResult.getResult(); if (!regulatoryInfoList.isEmpty()) { consequenceTypeList.add(new ConsequenceType("regulatory_region_variant")); i = 0; do { regulatoryInfo = (BasicDBObject) regulatoryInfoList.get(i); regulatoryType = (String) regulatoryInfo.get("featureType"); TFBSFound = regulatoryType.equals("TF_binding_site") || regulatoryType.equals("TF_binding_site_motif"); i++; } while (i < regulatoryInfoList.size() && !TFBSFound); if (TFBSFound) { consequenceTypeList.add(new ConsequenceType("TF_binding_site_variant")); } } else { int b; b = 1; } // if(transcriptInfoList == null) { // consequenceTypeList.add(new ConsequenceType("intergenic_variant")); // } // consequenceTypeList = filterConsequenceTypesBySoTerms(consequenceTypeList, options.getAsStringList("so")); // setting queryResult fields queryResult.setId(variant.toString()); queryResult.setDbTime(Long.valueOf(dbTimeEnd - dbTimeStart).intValue()); queryResult.setNumResults(consequenceTypeList.size()); queryResult.setResult(consequenceTypeList); return queryResult; }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveCodingPositiveTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames, BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, Integer variantStart, Integer variantEnd, Integer cdsLength, BasicDBList transcriptFlags, ConsequenceType consequenceTypeTemplate) { Integer genomicCodingStart;//from w w w . j a v a 2 s .c o m Integer genomicCodingEnd; Integer cdnaCodingStart; Integer cdnaCodingEnd; BasicDBList exonInfoList; BasicDBObject exonInfo; Integer exonStart; Integer exonEnd; String transcriptSequence; Boolean variantAhead; Integer cdnaExonEnd; Integer cdnaVariantStart; Integer cdnaVariantEnd; Boolean splicing; int exonCounter; int firstCdsPhase = -1; Integer prevSpliceSite; Boolean[] junctionSolution = { false, false }; genomicCodingStart = (Integer) transcriptInfo.get("genomicCodingStart"); genomicCodingEnd = (Integer) transcriptInfo.get("genomicCodingEnd"); cdnaCodingStart = (Integer) transcriptInfo.get("cdnaCodingStart"); cdnaCodingEnd = (Integer) transcriptInfo.get("cdnaCodingEnd"); exonInfoList = (BasicDBList) transcriptInfo.get("exons"); exonInfo = (BasicDBObject) exonInfoList.get(0); exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = (String) exonInfo.get("sequence"); variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed cdnaExonEnd = (exonEnd - exonStart + 1); cdnaVariantStart = null; cdnaVariantEnd = null; junctionSolution[0] = false; junctionSolution[1] = false; splicing = false; if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) { firstCdsPhase = (int) exonInfo.get("phase"); } if (variantStart >= exonStart) { if (variantStart <= exonEnd) { // Variant start within the exon cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { // if(variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- // We do not contemplate that variant end can be located before this exon since this is the first exon cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); // } } // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon } exonCounter = 1; while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction until // while(exonCounter<exonInfoList.size() && !splicing && variantAhead) { // This is not a do-while since we cannot call solveJunction until exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded exonStart = (Integer) exonInfo.get("start"); prevSpliceSite = exonEnd + 1; exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = transcriptSequence + ((String) exonInfo.get("sequence")); if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) { // Set firsCdsPhase only when the first coding exon is reached firstCdsPhase = (int) exonInfo.get("phase"); } solveJunction(isInsertion, prevSpliceSite, exonStart - 1, variantStart, variantEnd, SoNames, "splice_donor_variant", "splice_acceptor_variant", junctionSolution); splicing = (splicing || junctionSolution[0]); if (variantStart >= exonStart) { cdnaExonEnd += (exonEnd - exonStart + 1); if (variantStart <= exonEnd) { // Variant start within the exon cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { if (variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- cdnaExonEnd += (exonEnd - exonStart + 1); cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } else { // Variant does not include this exon, variant is located before this exon variantAhead = false; } } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon cdnaExonEnd += (exonEnd - exonStart + 1); } } exonCounter++; } // Is not intron variant (both ends fall within the same intron) if (!junctionSolution[1]) { if (isInsertion) { if (cdnaVariantStart == null && cdnaVariantEnd != null) { // To account for those insertions in the 3' end of an intron cdnaVariantStart = cdnaVariantEnd - 1; } else if (cdnaVariantEnd == null && cdnaVariantStart != null) { // To account for those insertions in the 5' end of an intron cdnaVariantEnd = cdnaVariantStart + 1; } } solveCodingPositiveTranscriptEffect(splicing, transcriptSequence, transcriptStart, transcriptEnd, genomicCodingStart, genomicCodingEnd, variantStart, variantEnd, cdnaCodingStart, cdnaCodingEnd, cdnaVariantStart, cdnaVariantEnd, // Be careful, originalVariantStart is used here! cdsLength, transcriptFlags, firstCdsPhase, variant.getReference(), variant.getAlternative(), SoNames, consequenceTypeTemplate); } }