List of usage examples for com.mongodb BasicDBObject get
public Object get(final String key)
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveCodingNegativeTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames, BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, Integer variantStart, Integer variantEnd, Integer cdsLength, BasicDBList transcriptFlags, ConsequenceType consequenceTypeTemplate) { Integer genomicCodingStart;/*from w ww. j a va 2 s. c om*/ Integer genomicCodingEnd; Integer cdnaCodingStart; Integer cdnaCodingEnd; BasicDBList exonInfoList; BasicDBObject exonInfo; Integer exonStart; Integer exonEnd; String transcriptSequence; Boolean variantAhead; Integer cdnaExonEnd; Integer cdnaVariantStart; Integer cdnaVariantEnd; Boolean splicing; int exonCounter; int firstCdsPhase = -1; Integer prevSpliceSite; Boolean[] junctionSolution = { false, false }; genomicCodingStart = (Integer) transcriptInfo.get("genomicCodingStart"); genomicCodingEnd = (Integer) transcriptInfo.get("genomicCodingEnd"); cdnaCodingStart = (Integer) transcriptInfo.get("cdnaCodingStart"); cdnaCodingEnd = (Integer) transcriptInfo.get("cdnaCodingEnd"); exonInfoList = (BasicDBList) transcriptInfo.get("exons"); exonInfo = (BasicDBObject) exonInfoList.get(0); exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = (String) exonInfo.get("sequence"); variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed cdnaExonEnd = (exonEnd - exonStart + 1); // cdnaExonEnd poinst to the same base than exonStart cdnaVariantStart = null; // cdnaVariantStart points to the same base than variantEnd cdnaVariantEnd = null; // cdnaVariantEnd points to the same base than variantStart junctionSolution[0] = false; junctionSolution[1] = false; splicing = false; if (firstCdsPhase == -1 && genomicCodingEnd >= exonStart) { firstCdsPhase = (int) exonInfo.get("phase"); } if (variantEnd <= exonEnd) { if (variantEnd >= exonStart) { // Variant end within the exon cdnaVariantStart = cdnaExonEnd - (variantEnd - exonStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantStart >= exonStart) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } } } else { if (variantStart >= exonStart) { // if(variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- // We do not contemplate that variant end can be located before this exon since this is the first exon cdnaVariantEnd = cdnaExonEnd - (variantEnd - exonStart); // } } // Variant includes the whole exon. Variant end is located before the exon, variant start is located after the exon } exonCounter = 1; while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction until // while(exonCounter<exonInfoList.size() && !splicing && variantAhead) { // This is not a do-while since we cannot call solveJunction until exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded prevSpliceSite = exonStart - 1; exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = ((String) exonInfo.get("sequence")) + transcriptSequence; if (firstCdsPhase == -1 && genomicCodingEnd >= exonStart) { // Set firsCdsPhase only when the first coding exon is reached firstCdsPhase = (int) exonInfo.get("phase"); } solveJunction(isInsertion, exonEnd + 1, prevSpliceSite, variantStart, variantEnd, SoNames, "splice_acceptor_variant", "splice_donor_variant", junctionSolution); splicing = (splicing || junctionSolution[0]); if (variantEnd <= exonEnd) { cdnaExonEnd += (exonEnd - exonStart + 1); if (variantEnd >= exonStart) { // Variant end within the exon cdnaVariantStart = cdnaExonEnd - (variantEnd - exonStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantStart >= exonStart) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } } } else { if (variantStart >= exonStart) { if (variantStart <= exonEnd) { // Only variant start within the exon ----||||||||||E||||---- cdnaExonEnd += (exonEnd - exonStart + 1); cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } else { // Variant does not include this exon, variant is located before this exon variantAhead = false; } } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon cdnaExonEnd += (exonEnd - exonStart + 1); } } exonCounter++; } // Is not intron variant (both ends fall within the same intron) if (!junctionSolution[1]) { if (isInsertion) { if (cdnaVariantStart == null && cdnaVariantEnd != null) { // To account for those insertions in the 3' end of an intron cdnaVariantStart = cdnaVariantEnd - 1; } else if (cdnaVariantEnd == null && cdnaVariantStart != null) { // To account for those insertions in the 5' end of an intron cdnaVariantEnd = cdnaVariantStart + 1; } } solveCodingNegativeTranscriptEffect(splicing, transcriptSequence, transcriptStart, transcriptEnd, genomicCodingStart, genomicCodingEnd, variantStart, variantEnd, cdnaCodingStart, cdnaCodingEnd, cdnaVariantStart, cdnaVariantEnd, cdsLength, transcriptFlags, firstCdsPhase, variant.getReference(), variant.getAlternative(), SoNames, consequenceTypeTemplate); } }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveNonCodingPositiveTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames, BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, BasicDBObject miRnaInfo, Integer variantStart, Integer variantEnd, ConsequenceType consequenceTypeTemplate) { BasicDBList exonInfoList;/* www .j a v a2s .c om*/ BasicDBObject exonInfo; Integer exonStart; Integer exonEnd; String transcriptSequence; Boolean variantAhead; Integer cdnaExonEnd; Integer cdnaVariantStart; Integer cdnaVariantEnd; Boolean splicing; int exonCounter; Integer prevSpliceSite; Boolean[] junctionSolution = { false, false }; exonInfoList = (BasicDBList) transcriptInfo.get("exons"); exonInfo = (BasicDBObject) exonInfoList.get(0); exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = (String) exonInfo.get("sequence"); variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed cdnaExonEnd = (exonEnd - exonStart + 1); cdnaVariantStart = null; cdnaVariantEnd = null; junctionSolution[0] = false; junctionSolution[1] = false; splicing = false; if (variantStart >= exonStart) { if (variantStart <= exonEnd) { // Variant start within the exon. Set cdnaPosition in consequenceTypeTemplate cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { // if(variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- // We do not contemplate that variant end can be located before this exon since this is the first exon cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); // } } // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon } exonCounter = 1; while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction until // while(exonCounter<exonInfoList.size() && !splicing && variantAhead) { // This is not a do-while since we cannot call solveJunction until exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded exonStart = (Integer) exonInfo.get("start"); prevSpliceSite = exonEnd + 1; exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = transcriptSequence + ((String) exonInfo.get("sequence")); solveJunction(isInsertion, prevSpliceSite, exonStart - 1, variantStart, variantEnd, SoNames, "splice_donor_variant", "splice_acceptor_variant", junctionSolution); splicing = (splicing || junctionSolution[0]); if (variantStart >= exonStart) { cdnaExonEnd += (exonEnd - exonStart + 1); if (variantStart <= exonEnd) { // Variant start within the exon. Set cdnaPosition in consequenceTypeTemplate cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { if (variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } else { // Variant does not include this exon, variant is located before this exon variantAhead = false; } } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon cdnaExonEnd += (exonEnd - exonStart + 1); } } exonCounter++; } if (miRnaInfo != null) { // miRNA with miRBase data BasicDBList matureMiRnaInfo = (BasicDBList) miRnaInfo.get("matures"); if (cdnaVariantStart == null) { // Probably deletion starting before the miRNA location cdnaVariantStart = 1; // Truncate to the first transcript position to avoid null exception } if (cdnaVariantEnd == null) { // Probably deletion ending after the miRNA location cdnaVariantEnd = ((String) miRnaInfo.get("sequence")).length(); // Truncate to the last transcript position to avoid null exception } int i = 0; while (i < matureMiRnaInfo.size() && !regionsOverlap((Integer) ((BasicDBObject) matureMiRnaInfo.get(i)).get("cdnaStart"), (Integer) ((BasicDBObject) matureMiRnaInfo.get(i)).get("cdnaEnd"), cdnaVariantStart, cdnaVariantEnd)) { i++; } if (i < matureMiRnaInfo.size()) { // Variant overlaps at least one mature miRNA SoNames.add("mature_miRNA_variant"); } else { if (!junctionSolution[1]) { // Exon variant SoNames.add("non_coding_transcript_exon_variant"); } SoNames.add("non_coding_transcript_variant"); } } else { if (!junctionSolution[1]) { // Exon variant SoNames.add("non_coding_transcript_exon_variant"); } SoNames.add("non_coding_transcript_variant"); } }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveNonCodingNegativeTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames, BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, BasicDBObject miRnaInfo, Integer variantStart, Integer variantEnd, ConsequenceType consequenceTypeTemplate) { BasicDBList exonInfoList;/*from w ww . ja v a 2 s . c o m*/ BasicDBObject exonInfo; Integer exonStart; Integer exonEnd; String transcriptSequence; Boolean variantAhead; Integer cdnaExonEnd; Integer cdnaVariantStart; Integer cdnaVariantEnd; Boolean splicing; int exonCounter; Integer prevSpliceSite; Boolean[] junctionSolution = { false, false }; exonInfoList = (BasicDBList) transcriptInfo.get("exons"); exonInfo = (BasicDBObject) exonInfoList.get(0); exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = (String) exonInfo.get("sequence"); variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed cdnaExonEnd = (exonEnd - exonStart + 1); // cdnaExonEnd poinst to the same base than exonStart cdnaVariantStart = null; // cdnaVariantStart points to the same base than variantEnd cdnaVariantEnd = null; // cdnaVariantEnd points to the same base than variantStart junctionSolution[0] = false; junctionSolution[1] = false; splicing = false; if (variantEnd <= exonEnd) { if (variantEnd >= exonStart) { // Variant end within the exon cdnaVariantStart = cdnaExonEnd - (variantEnd - exonStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantStart >= exonStart) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } } } else { if (variantStart >= exonStart) { // if(variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- // We do not contemplate that variant end can be located before this exon since this is the first exon cdnaVariantEnd = cdnaExonEnd - (variantEnd - exonStart); // } } // Variant includes the whole exon. Variant end is located before the exon, variant start is located after the exon } exonCounter = 1; while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction until // while(exonCounter<exonInfoList.size() && !splicing && variantAhead) { // This is not a do-while since we cannot call solveJunction until exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded prevSpliceSite = exonStart - 1; exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = ((String) exonInfo.get("sequence")) + transcriptSequence; solveJunction(isInsertion, exonEnd + 1, prevSpliceSite, variantStart, variantEnd, SoNames, "splice_acceptor_variant", "splice_donor_variant", junctionSolution); splicing = (splicing || junctionSolution[0]); if (variantEnd <= exonEnd) { cdnaExonEnd += (exonEnd - exonStart + 1); if (variantEnd >= exonStart) { // Variant end within the exon cdnaVariantStart = cdnaExonEnd - (variantEnd - exonStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantStart >= exonStart) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } } } else { if (variantStart >= exonStart) { if (variantStart <= exonEnd) { // Only variant start within the exon ----||||||||||E||||---- cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } else { // Variant does not include this exon, variant is located before this exon variantAhead = false; } } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon cdnaExonEnd += (exonEnd - exonStart + 1); } } exonCounter++; } if (miRnaInfo != null) { // miRNA with miRBase data BasicDBList matureMiRnaInfo = (BasicDBList) miRnaInfo.get("matures"); int i = 0; while (i < matureMiRnaInfo.size() && !regionsOverlap((Integer) ((BasicDBObject) matureMiRnaInfo.get(i)).get("cdnaStart"), (Integer) ((BasicDBObject) matureMiRnaInfo.get(i)).get("cdnaEnd"), cdnaVariantStart, cdnaVariantEnd)) { i++; } if (i < matureMiRnaInfo.size()) { // Variant overlaps at least one mature miRNA SoNames.add("mature_miRNA_variant"); } else { if (!junctionSolution[1]) { // Exon variant SoNames.add("non_coding_transcript_exon_variant"); } SoNames.add("non_coding_transcript_variant"); } } else { if (!junctionSolution[1]) { // Exon variant SoNames.add("non_coding_transcript_exon_variant"); } SoNames.add("non_coding_transcript_variant"); } }
From source file:org.opencb.cellbase.mongodb.db.VariantAnnotationMongoDBAdaptor.java
License:Apache License
public List<QueryResult> getAnnotationByVariantList(List<GenomicVariant> variantList, QueryOptions queryOptions) {/*from w w w.j av a2 s . c o m*/ List<QueryResult> variationQueryResultList = variationDBAdaptor.getAllByVariantList(variantList, queryOptions); List<QueryResult> clinicalQueryResultList = clinicalDBAdaptor.getAllByGenomicVariantList(variantList, queryOptions); List<QueryResult> variationConsequenceTypeList = getAllConsequenceTypesByVariantList(variantList, queryOptions); List<QueryResult> conservedRegionQueryResultList = conservedRegionDBAdaptor .getAllScoresByRegionList(variantListToRegionList(variantList), queryOptions); VariantAnnotation variantAnnotation; Integer i = 0; for (QueryResult clinicalQueryResult : clinicalQueryResultList) { Map<String, Object> phenotype = new HashMap<>(); if (clinicalQueryResult.getResult() != null && clinicalQueryResult.getResult().size() > 0) { phenotype = (Map<String, Object>) clinicalQueryResult.getResult().get(0); } List<ConsequenceType> consequenceTypeList = (List<ConsequenceType>) variationConsequenceTypeList.get(i) .getResult(); // TODO: start & end are both being set to variantList.get(i).getPosition(), modify this for indels variantAnnotation = new VariantAnnotation(variantList.get(i).getChromosome(), variantList.get(i).getPosition(), variantList.get(i).getPosition(), variantList.get(i).getReference(), variantList.get(i).getAlternative()); variantAnnotation.setClinicalData(phenotype); variantAnnotation.setConsequenceTypes(consequenceTypeList); variantAnnotation .setConservedRegionScores((List<Score>) conservedRegionQueryResultList.get(i).getResult()); List<BasicDBObject> variationDBList = (List<BasicDBObject>) variationQueryResultList.get(i).getResult(); if (variationDBList != null && variationDBList.size() > 0) { String id = null; id = ((BasicDBObject) variationDBList.get(0)).get("id").toString(); variantAnnotation.setId(id); BasicDBList freqsDBList = null; if ((freqsDBList = (BasicDBList) ((BasicDBObject) variationDBList.get(0)) .get("populationFrequencies")) != null) { BasicDBObject freqDBObject; for (int j = 0; j < freqsDBList.size(); j++) { freqDBObject = ((BasicDBObject) freqsDBList.get(j)); variantAnnotation.addPopulationFrequency(new PopulationFrequency( freqDBObject.get("study").toString(), freqDBObject.get("pop").toString(), freqDBObject.get("superPop").toString(), freqDBObject.get("refAllele").toString(), freqDBObject.get("altAllele").toString(), Float.valueOf(freqDBObject.get("refAlleleFreq").toString()), Float.valueOf(freqDBObject.get("altAlleleFreq").toString()))); } } } List<VariantAnnotation> value = Collections.singletonList(variantAnnotation); clinicalQueryResult.setResult(value); i++; } return clinicalQueryResultList; }
From source file:org.opencb.cellbase.mongodb.db.variation.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solvePositiveCodingEffect(Boolean splicing, String transcriptSequence, String chromosome, Integer transcriptEnd, Integer genomicCodingEnd, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart, Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt, HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) { Boolean codingAnnotationAdded = false; // This will indicate wether it is needed to add the "coding_sequence_variant" annotation or not if (variantAlt.equals("-")) { // Deletion if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); codingAnnotationAdded = true; }//from w w w . j ava2 s . co m if (cdnaVariantEnd != null) { int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3; Boolean stopToSolve = true; if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null codingAnnotationAdded = true; if (variantRef.length() % 3 == 0) { SoNames.add("inframe_deletion"); } else { SoNames.add("frameshift_variant"); } stopToSolve = false; // Stop codon annotation will be solved in the line below. solveStopCodonPositiveDeletion(transcriptSequence, chromosome, transcriptEnd, cdnaCodingStart, cdnaVariantStart, cdnaVariantEnd, SoNames); } if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) { if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) { if (finalNtPhase != 2) { SoNames.add("incomplete_terminal_codon_variant"); } } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above SoNames.add("stop_lost"); } } } } else { if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion. Be careful: insertion coordinates are special, alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd codingAnnotationAdded = true; if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); } int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase)) && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); } if (variantAlt.length() % 3 == 0) { SoNames.add("inframe_insertion"); } else { SoNames.add("frameshift_variant"); } solveStopCodonPositiveInsertion(transcriptSequence, chromosome, transcriptEnd, cdnaCodingStart, cdnaVariantStart, variantAlt, SoNames); // if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0 // if (cdnaVariantStart != null && cdnaVariantStart > (cdnaCodingEnd - 3)) { // -3 because alternative nts are pasted between cdnaVariantStart and cdnaVariantEnd // char[] modifiedCodonArray = solveStopCodonPositiveInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantStart, variantAlt); // if(isStopCodon(String.valueOf(modifiedCodonArray))) { // SoNames.add("stop_retained_variant"); // } else { // SoNames.add("stop_lost"); // } // } // } else { // Be careful, strict > since this is a insertion, inserted nts are pasted on the left of cdnaVariantStart // } } else { // SNV if (cdnaVariantStart != null) { int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if (!splicing) { if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase)) && (transcriptEnd.equals(genomicCodingEnd)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); // If not, avoid calculating reference/modified codon } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0 Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3; int modifiedCodonStart = cdnaVariantStart - variantPhaseShift; String referenceCodon = transcriptSequence.substring(modifiedCodonStart - 1, modifiedCodonStart + 2); // -1 and +2 because of base 0 String indexing char[] modifiedCodonArray = referenceCodon.toCharArray(); modifiedCodonArray[variantPhaseShift] = variantAlt.toCharArray()[0]; codingAnnotationAdded = true; String referenceA = codonToA.get(referenceCodon); String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray)); if (isSynonymousCodon.get(referenceCodon).get(String.valueOf(modifiedCodonArray))) { if (isStopCodon(referenceCodon)) { SoNames.add("stop_retained_variant"); } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant SoNames.add("synonymous_variant"); } } else { if (cdnaVariantStart < (cdnaCodingStart + 3)) { SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below if (isStopCodon(String.valueOf(modifiedCodonArray))) { SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below } } else if (isStopCodon(String.valueOf(referenceCodon))) { SoNames.add("stop_lost"); } else { SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained" : "missense_variant"); } if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2 QueryResult proteinSubstitutionScoresQueryResult = proteinDBAdaptor .getFunctionPredictionByAaChange( consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getAaPosition(), alternativeA, new QueryOptions()); if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) { BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult .getResult().get(0); if (proteinSubstitutionScores.get("ss") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ss")), "sift", siftDescriptions.get(proteinSubstitutionScores.get("se")))); } if (proteinSubstitutionScores.get("ps") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ps")), "polyphen", polyphenDescriptions.get(proteinSubstitutionScores.get("pe")))); } } } } // Set consequenceTypeTemplate.aChange consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA); // Set consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters char[] referenceCodonArray = referenceCodon.toLowerCase().toCharArray(); referenceCodonArray[variantPhaseShift] = Character .toUpperCase(referenceCodonArray[variantPhaseShift]); modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray(); modifiedCodonArray[variantPhaseShift] = Character .toUpperCase(modifiedCodonArray[variantPhaseShift]); consequenceTypeTemplate.setCodon( String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray)); } } } } } if (!codingAnnotationAdded) { SoNames.add("coding_sequence_variant"); } }
From source file:org.opencb.cellbase.mongodb.db.variation.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveNegativeCodingEffect(Boolean splicing, String transcriptSequence, String chromosome, Integer transcriptStart, Integer genomicCodingStart, Integer cdnaCodingStart, Integer cdnaCodingEnd, Integer cdnaVariantStart, Integer cdnaVariantEnd, BasicDBList transcriptFlags, String variantRef, String variantAlt, HashSet<String> SoNames, ConsequenceType consequenceTypeTemplate) { Boolean codingAnnotationAdded = false; if (variantAlt.equals("-")) { // Deletion if (cdnaVariantStart != null && cdnaVariantStart < (cdnaCodingStart + 3) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); codingAnnotationAdded = true; }/* w w w . j a va 2 s.c o m*/ if (cdnaVariantEnd != null) { int finalNtPhase = (cdnaCodingEnd - cdnaCodingStart) % 3; Boolean stopToSolve = true; if (!splicing && cdnaVariantStart != null) { // just checks cdnaVariantStart!=null because no splicing means cdnaVariantEnd is also != null codingAnnotationAdded = true; if (variantRef.length() % 3 == 0) { SoNames.add("inframe_deletion"); } else { SoNames.add("frameshift_variant"); } stopToSolve = false; // Stop codon annotation will be solved in the line below. solveStopCodonNegativeDeletion(transcriptSequence, chromosome, transcriptStart, cdnaCodingStart, cdnaVariantStart, cdnaVariantEnd, SoNames); } if (cdnaVariantEnd >= (cdnaCodingEnd - finalNtPhase)) { if (transcriptFlags != null && transcriptFlags.contains("cds_end_NF")) { if (finalNtPhase != 2) { SoNames.add("incomplete_terminal_codon_variant"); } } else if (stopToSolve) { // Only if stop codon annotation was not already solved in the if block above SoNames.add("stop_lost"); } } } } else { if (variantRef.equals("-") && (cdnaVariantStart != null)) { // Insertion TODO: I've seen insertions within Cellbase-mongo with a ref != - codingAnnotationAdded = true; if (cdnaVariantStart < (cdnaCodingStart + 2) && (transcriptFlags == null || cdnaCodingStart > 0 || !transcriptFlags.contains("cds_start_NF"))) { // cdnaVariantStart=null if variant is intronic. cdnaCodingStart<1 if cds_start_NF and phase!=0 SoNames.add("initiator_codon_variant"); } int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if ((cdnaVariantStart >= (transcriptSequence.length() - finalNtPhase)) && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); } if (variantAlt.length() % 3 == 0) { SoNames.add("inframe_insertion"); } else { SoNames.add("frameshift_variant"); } solveStopCodonNegativeInsertion(transcriptSequence, chromosome, transcriptStart, cdnaCodingStart, cdnaVariantEnd, variantAlt, SoNames); // Be careful, cdnaVariantEnd is being used in this case!!! // if(cdnaCodingEnd!=0) { // Some transcripts do not have a STOP codon annotated in the ENSEMBL gtf. This causes CellbaseBuilder to leave cdnaVariantEnd to 0 // if (cdnaVariantEnd != null && cdnaVariantEnd > (cdnaCodingEnd - 3)) { // -3 because alternative nts are pasted on the left of >>>genomic<<<VariantStart // char[] modifiedCodonArray = solveStopCodonNegativeInsertion(transcriptSequence, cdnaCodingStart, cdnaVariantEnd, variantAlt); // Be careful, cdnaVariantEnd is being used in this case!!! // if(isStopCodon(String.valueOf(modifiedCodonArray))) { // SoNames.add("stop_retained_variant"); // } else { // SoNames.add("stop_lost"); // } // } // } else { // } // if(cdnaVariantStart != null) { // if(!splicing && cdnaVariantStart != null) { // } } else { // SNV if (cdnaVariantStart != null) { int finalNtPhase = (transcriptSequence.length() - cdnaCodingStart) % 3; if (!splicing) { if ((cdnaVariantEnd >= (transcriptSequence.length() - finalNtPhase)) && (transcriptStart.equals(genomicCodingStart)) && finalNtPhase != 2) { // Variant in the last codon of a transcript without stop codon. finalNtPhase==2 if the cds length is multiple of 3. SoNames.add("incomplete_terminal_codon_variant"); // If that is the case and variant ocurs in the last complete/incomplete codon, no coding prediction is needed } else if (cdnaVariantStart > (cdnaCodingStart + 2) || cdnaCodingStart > 0) { // cdnaCodingStart<1 if cds_start_NF and phase!=0 Integer variantPhaseShift = (cdnaVariantStart - cdnaCodingStart) % 3; int modifiedCodonStart = cdnaVariantStart - variantPhaseShift; String reverseCodon = new StringBuilder(transcriptSequence.substring( transcriptSequence.length() - modifiedCodonStart - 2, transcriptSequence.length() - modifiedCodonStart + 1)).reverse().toString(); // Rigth limit of the substring sums +1 because substring does not include that position char[] referenceCodon = reverseCodon.toCharArray(); referenceCodon[0] = complementaryNt.get(referenceCodon[0]); referenceCodon[1] = complementaryNt.get(referenceCodon[1]); referenceCodon[2] = complementaryNt.get(referenceCodon[2]); char[] modifiedCodonArray = referenceCodon.clone(); modifiedCodonArray[variantPhaseShift] = complementaryNt .get(variantAlt.toCharArray()[0]); codingAnnotationAdded = true; String referenceA = codonToA.get(String.valueOf(referenceCodon)); String alternativeA = codonToA.get(String.valueOf(modifiedCodonArray)); if (isSynonymousCodon.get(String.valueOf(referenceCodon)) .get(String.valueOf(modifiedCodonArray))) { if (isStopCodon(String.valueOf(referenceCodon))) { SoNames.add("stop_retained_variant"); } else { // coding end may be not correctly annotated (incomplete_terminal_codon_variant), but if the length of the cds%3=0, annotation should be synonymous variant SoNames.add("synonymous_variant"); } } else { if (cdnaVariantStart < (cdnaCodingStart + 3)) { SoNames.add("initiator_codon_variant"); // Gary - initiator codon SO terms not compatible with the terms below if (isStopCodon(String.valueOf(modifiedCodonArray))) { SoNames.add("stop_gained"); // Gary - initiator codon SO terms not compatible with the terms below } } else if (isStopCodon(String.valueOf(referenceCodon))) { SoNames.add("stop_lost"); } else { SoNames.add(isStopCodon(String.valueOf(modifiedCodonArray)) ? "stop_gained" : "missense_variant"); } if (cdnaVariantEnd < (cdnaCodingEnd - 2)) { // Variant does not affect the last codon (probably stop codon). If the 3prime end is incompletely annotated and execution reaches this line, finalNtPhase can only be 2 QueryResult proteinSubstitutionScoresQueryResult = proteinDBAdaptor .getFunctionPredictionByAaChange( consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getAaPosition(), alternativeA, new QueryOptions()); if (proteinSubstitutionScoresQueryResult.getNumResults() == 1) { BasicDBObject proteinSubstitutionScores = (BasicDBObject) proteinSubstitutionScoresQueryResult .getResult().get(0); if (proteinSubstitutionScores.get("ss") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ss")), "Sift", siftDescriptions.get(proteinSubstitutionScores.get("se")))); } if (proteinSubstitutionScores.get("ps") != null) { consequenceTypeTemplate.addProteinSubstitutionScore(new Score( Double.parseDouble("" + proteinSubstitutionScores.get("ps")), "Polyphen", polyphenDescriptions.get(proteinSubstitutionScores.get("pe")))); } } } } // Set consequenceTypeTemplate.aChange consequenceTypeTemplate.setAaChange(referenceA + "/" + alternativeA); // Fill consequenceTypeTemplate.codon leaving only the nt that changes in uppercase. Careful with upper/lower case letters char[] referenceCodonArray = String.valueOf(referenceCodon).toLowerCase().toCharArray(); referenceCodonArray[variantPhaseShift] = Character .toUpperCase(referenceCodonArray[variantPhaseShift]); modifiedCodonArray = String.valueOf(modifiedCodonArray).toLowerCase().toCharArray(); modifiedCodonArray[variantPhaseShift] = Character .toUpperCase(modifiedCodonArray[variantPhaseShift]); consequenceTypeTemplate.setCodon( String.valueOf(referenceCodonArray) + "/" + String.valueOf(modifiedCodonArray)); } } } } } if (!codingAnnotationAdded) { SoNames.add("coding_sequence_variant"); } }
From source file:org.opencb.cellbase.mongodb.db.variation.VariantAnnotationMongoDBAdaptor.java
License:Apache License
@Override public QueryResult getAllConsequenceTypesByVariant(GenomicVariant variant, QueryOptions options) { Logger logger = LoggerFactory.getLogger(this.getClass()); HashSet<String> SoNames = new HashSet<>(); List<ConsequenceType> consequenceTypeList = new ArrayList<>(); QueryResult queryResult = new QueryResult(); QueryBuilder builderGene = null;/*from w w w . j a v a 2 s. c om*/ QueryBuilder builderRegulatory = null; BasicDBList transcriptInfoList = null; BasicDBList exonInfoList; BasicDBObject miRnaInfo; BasicDBObject transcriptInfo, exonInfo; BasicDBObject geneInfo; BasicDBObject regulatoryInfo; Integer geneStart, geneEnd, transcriptStart, transcriptEnd, exonStart, exonEnd, genomicCodingStart, genomicCodingEnd; Integer cdnaCodingStart, cdnaCodingEnd, cdnaExonStart, cdnaExonEnd, cdnaVariantStart, cdnaVariantEnd, prevSpliceSite; Integer regulatoryStart, regulatoryEnd, cdsLength; Integer variantStart; Integer variantEnd; String geneStrand, transcriptStrand, exonSequence, transcriptSequence; String regulatoryChromosome, regulatoryType; String nextCodonNucleotides = ""; String ensemblTranscriptId; String geneName; String ensemblGeneId; int transcriptBiotype; long dbTimeStart, dbTimeEnd; Boolean splicing, coding, exonsRemain, variantAhead, exonVariant, TFBSFound; int exonCounter, i; ConsequenceType consequenceTypeTemplate = new ConsequenceType(); variantEnd = variant.getPosition() + variant.getReference().length() - 1; //TODO: Check deletion input format to ensure that variantEnd is correctly calculated Boolean isInsertion = variant.getReference().equals("-"); if (isInsertion) { variantStart = variant.getPosition() - 1; } else { variantStart = variant.getPosition(); } if (variant.getAlternative().equalsIgnoreCase("<INS>") || variant.getAlternative().equalsIgnoreCase("<DEL>")) { queryResult.setErrorMsg("INS and DEL are not yet implemented"); queryResult.setNumResults(1); queryResult.setResult(consequenceTypeList); return queryResult; } // Execute query and calculate time dbTimeStart = System.currentTimeMillis(); getAffectedGenesInfo(variant.getChromosome(), variantStart, variantEnd); QueryResult regulatoryQueryResult = regulatoryRegionDBAdaptor .getAllByRegion(new Region(variant.getChromosome(), variantStart, variantEnd), options); dbTimeEnd = System.currentTimeMillis(); for (Object geneInfoObject : geneInfoList) { geneInfo = (BasicDBObject) geneInfoObject; consequenceTypeTemplate.setGeneName((String) geneInfo.get("name")); consequenceTypeTemplate.setEnsemblGeneId((String) geneInfo.get("id")); consequenceTypeTemplate.setExpressionValues(new ArrayList<ExpressionValue>()); // if(geneInfo.get("expressionValues")!=null) { // ObjectMapper objectMapper = new ObjectMapper(); // for (Object expressionBasicDBObject : (BasicDBList) geneInfo.get("expressionValues")) { // consequenceTypeTemplate.getExpressionValues().add(objectMapper.convertValue(expressionBasicDBObject, ExpressionValue.class)); // } // } transcriptInfoList = (BasicDBList) geneInfo.get("transcripts"); for (Object transcriptInfoObject : transcriptInfoList) { transcriptInfo = (BasicDBObject) transcriptInfoObject; ensemblTranscriptId = (String) transcriptInfo.get("id"); transcriptStart = (Integer) transcriptInfo.get("start"); transcriptEnd = (Integer) transcriptInfo.get("end"); transcriptStrand = (String) transcriptInfo.get("strand"); cdsLength = (Integer) transcriptInfo.get("cdsLength"); BasicDBList transcriptFlags = (BasicDBList) transcriptInfo.get("annotationFlags"); try { transcriptBiotype = biotypes.get((String) transcriptInfo.get("biotype")); } catch (NullPointerException e) { // logger.info("WARNING: biotype not found within the list of hardcoded biotypes - "+transcriptInfo.get("biotype")); // logger.info("WARNING: transcript: "+ensemblTranscriptId); // logger.info("WARNING: setting transcript biotype to non_coding "); transcriptBiotype = 45; } SoNames.clear(); consequenceTypeTemplate.setEnsemblTranscriptId(ensemblTranscriptId); consequenceTypeTemplate.setcDnaPosition(null); consequenceTypeTemplate.setCdsPosition(null); consequenceTypeTemplate.setAaPosition(null); consequenceTypeTemplate.setAaChange(null); consequenceTypeTemplate.setCodon(null); consequenceTypeTemplate.setStrand(transcriptStrand); consequenceTypeTemplate.setBiotype((String) transcriptInfo.get("biotype")); consequenceTypeTemplate.setProteinSubstitutionScores(null); miRnaInfo = null; if (transcriptStrand.equals("+")) { if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), Collections.singletonList("transcript_ablation"), consequenceTypeTemplate.getExpressionValues())); } else { // Check variant overlaps transcript start/end coordinates if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd) && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion SoNames.add("feature_truncation"); } switch (transcriptBiotype) { /** * Coding biotypes */ case 30: SoNames.add("NMD_transcript_variant"); case 1: case 3: case 4: case 6: case 10: // TR_C_gene case 11: // TR_D_gene case 12: // TR_J_gene case 14: // TR_V_gene case 20: case 23: // protein_coding case 34: // non_stop_decay case 36: case 50: // translated_unprocessed_pseudogene case 51: // LRG_gene solveCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength, transcriptFlags, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), consequenceTypeTemplate.getCdsPosition(), consequenceTypeTemplate.getAaPosition(), consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(), consequenceTypeTemplate.getProteinSubstitutionScores(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); break; /** * pseudogenes, antisense should not be annotated as non-coding genes */ case 39: case 40: case 41: case 42: case 43: case 44: case 49: solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, null, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); break; /** * Non-coding biotypes */ case 18: // miRNA miRnaInfo = (BasicDBObject) geneInfo.get("mirna"); case 2: // case 5: // case 7: // IG_V_pseudogene case 13: case 15: case 0: // 3prime_overlapping_ncrna case 16: // antisense TODO: move to coding? case 17: // lincRNA case 19: case 21: // processed_pseudogene case 22: // processed_transcript case 24: // pseudogene case 25: case 26: // sense_intronic case 27: // sense_overlapping case 28: case 29: case 31: // unprocessed_pseudogene case 32: // transcribed_unprocessed_pseudogene case 33: // retained_intron case 35: // unitary_pseudogene case 37: // transcribed_processed_pseudogene case 38: case 45: case 46: case 47: case 48: solveNonCodingPositiveTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); break; } } else { solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart, variantEnd, "upstream_gene_variant", "downstream_gene_variant"); if (SoNames.size() > 0) { // Variant does not overlap gene region, just may have upstream/downstream annotations consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); } } } } else { if (variantStart <= transcriptStart && variantEnd >= transcriptEnd) { // Deletion - whole transcript removed consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), Collections.singletonList("transcript_ablation"), consequenceTypeTemplate.getExpressionValues())); } else { // Check overlaps transcript start/end coordinates if (regionsOverlap(transcriptStart, transcriptEnd, variantStart, variantEnd) && !(isInsertion && (variantEnd.equals(transcriptStart) || // Insertion just before the first transcript nt variantStart.equals(transcriptEnd)))) { // Insertion just after the last transcript nt if ((variantEnd - variantStart) > bigVariantSizeThreshold) { // Big deletion SoNames.add("feature_truncation"); } switch (transcriptBiotype) { /** * Coding biotypes */ case 30: SoNames.add("NMD_transcript_variant"); case 1: case 3: case 4: case 6: case 10: // TR_C_gene case 11: // TR_D_gene case 12: // TR_J_gene case 14: // TR_V_gene case 20: case 23: case 34: // non_stop_decay case 36: case 50: // translated_unprocessed_pseudogene case 51: // LRG_gene solveCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, variantStart, variantEnd, cdsLength, transcriptFlags, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), consequenceTypeTemplate.getCdsPosition(), consequenceTypeTemplate.getAaPosition(), consequenceTypeTemplate.getAaChange(), consequenceTypeTemplate.getCodon(), consequenceTypeTemplate.getProteinSubstitutionScores(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); break; /** * pseudogenes, antisense should not be annotated as non-coding genes */ case 39: case 40: case 41: case 42: case 43: case 44: case 49: solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, null, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); break; /** * Non-coding biotypes */ case 18: // miRNA miRnaInfo = (BasicDBObject) geneInfo.get("mirna"); case 2: // case 5: // case 7: // IG_V_pseudogene case 13: case 15: case 0: // 3prime_overlapping_ncrna case 17: // lincRNA case 16: // antisense TODO: move to coding? case 19: case 21: // processed_pseudogene case 22: // processed_transcript case 24: // pseudogene case 25: case 26: // sense_intronic case 27: // sense_overlapping case 28: case 29: case 31: // unprocessed_pseudogene case 32: // transcribed_unprocessed_pseudogen case 33: // retained_intron case 35: // unitary_pseudogene case 37: // transcribed_processed_pseudogene case 38: case 45: case 46: case 47: case 48: solveNonCodingNegativeTranscript(isInsertion, variant, SoNames, transcriptInfo, transcriptStart, transcriptEnd, miRnaInfo, variantStart, variantEnd, consequenceTypeTemplate); consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), consequenceTypeTemplate.getcDnaPosition(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); break; } } else { solveTranscriptFlankingRegions(SoNames, transcriptStart, transcriptEnd, variantStart, variantEnd, "downstream_gene_variant", "upstream_gene_variant"); if (SoNames.size() > 0) { // Variant does not overlap gene region, just has upstream/downstream annotations consequenceTypeList.add(new ConsequenceType(consequenceTypeTemplate.getGeneName(), consequenceTypeTemplate.getEnsemblGeneId(), consequenceTypeTemplate.getEnsemblTranscriptId(), consequenceTypeTemplate.getStrand(), consequenceTypeTemplate.getBiotype(), new ArrayList<>(SoNames), consequenceTypeTemplate.getExpressionValues())); } } } } } } if (consequenceTypeList.size() == 0) { consequenceTypeList.add(new ConsequenceType("intergenic_variant")); } LinkedList regulatoryInfoList = (LinkedList) regulatoryQueryResult.getResult(); // BasicDBList regulatoryInfoList = (BasicDBList) regulatoryQueryResult.getResult(); if (!regulatoryInfoList.isEmpty()) { consequenceTypeList.add(new ConsequenceType("regulatory_region_variant")); i = 0; do { regulatoryInfo = (BasicDBObject) regulatoryInfoList.get(i); regulatoryType = (String) regulatoryInfo.get("featureType"); TFBSFound = regulatoryType.equals("TF_binding_site") || regulatoryType.equals("TF_binding_site_motif"); i++; } while (i < regulatoryInfoList.size() && !TFBSFound); if (TFBSFound) { consequenceTypeList.add(new ConsequenceType("TF_binding_site_variant")); } } else { int b; b = 1; } // if(transcriptInfoList == null) { // consequenceTypeList.add(new ConsequenceType("intergenic_variant")); // } // consequenceTypeList = filterConsequenceTypesBySoTerms(consequenceTypeList, options.getAsStringList("so")); // setting queryResult fields queryResult.setId(variant.toString()); queryResult.setDbTime(Long.valueOf(dbTimeEnd - dbTimeStart).intValue()); queryResult.setNumResults(consequenceTypeList.size()); queryResult.setResult(consequenceTypeList); return queryResult; }
From source file:org.opencb.cellbase.mongodb.db.variation.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveCodingPositiveTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames, BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, Integer variantStart, Integer variantEnd, Integer cdsLength, BasicDBList transcriptFlags, ConsequenceType consequenceTypeTemplate) { Integer genomicCodingStart;// w w w. j av a 2 s . c o m Integer genomicCodingEnd; Integer cdnaCodingStart; Integer cdnaCodingEnd; BasicDBList exonInfoList; BasicDBObject exonInfo; Integer exonStart; Integer exonEnd; String transcriptSequence; Boolean variantAhead; Integer cdnaExonEnd; Integer cdnaVariantStart; Integer cdnaVariantEnd; Boolean splicing; int exonCounter; int firstCdsPhase = -1; Integer prevSpliceSite; Boolean[] junctionSolution = { false, false }; genomicCodingStart = (Integer) transcriptInfo.get("genomicCodingStart"); genomicCodingEnd = (Integer) transcriptInfo.get("genomicCodingEnd"); cdnaCodingStart = (Integer) transcriptInfo.get("cdnaCodingStart"); cdnaCodingEnd = (Integer) transcriptInfo.get("cdnaCodingEnd"); exonInfoList = (BasicDBList) transcriptInfo.get("exons"); exonInfo = (BasicDBObject) exonInfoList.get(0); exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = (String) exonInfo.get("sequence"); variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed cdnaExonEnd = (exonEnd - exonStart + 1); cdnaVariantStart = null; cdnaVariantEnd = null; junctionSolution[0] = false; junctionSolution[1] = false; splicing = false; if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) { firstCdsPhase = (int) exonInfo.get("phase"); } if (variantStart >= exonStart) { if (variantStart <= exonEnd) { // Variant start within the exon cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { // if(variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- // We do not contemplate that variant end can be located before this exon since this is the first exon cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); // } } // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon } exonCounter = 1; while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction until // while(exonCounter<exonInfoList.size() && !splicing && variantAhead) { // This is not a do-while since we cannot call solveJunction until exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded exonStart = (Integer) exonInfo.get("start"); prevSpliceSite = exonEnd + 1; exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = transcriptSequence + ((String) exonInfo.get("sequence")); if (firstCdsPhase == -1 && genomicCodingStart <= exonEnd) { // Set firsCdsPhase only when the first coding exon is reached firstCdsPhase = (int) exonInfo.get("phase"); } solveJunction(isInsertion, prevSpliceSite, exonStart - 1, variantStart, variantEnd, SoNames, "splice_donor_variant", "splice_acceptor_variant", junctionSolution); splicing = (splicing || junctionSolution[0]); if (variantStart >= exonStart) { cdnaExonEnd += (exonEnd - exonStart + 1); if (variantStart <= exonEnd) { // Variant start within the exon cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { if (variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- cdnaExonEnd += (exonEnd - exonStart + 1); cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } else { // Variant does not include this exon, variant is located before this exon variantAhead = false; } } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon cdnaExonEnd += (exonEnd - exonStart + 1); } } exonCounter++; } // Is not intron variant (both ends fall within the same intron) if (!junctionSolution[1]) { if (isInsertion) { if (cdnaVariantStart == null && cdnaVariantEnd != null) { // To account for those insertions in the 3' end of an intron cdnaVariantStart = cdnaVariantEnd - 1; } else if (cdnaVariantEnd == null && cdnaVariantStart != null) { // To account for those insertions in the 5' end of an intron cdnaVariantEnd = cdnaVariantStart + 1; } } solveCodingPositiveTranscriptEffect(splicing, transcriptSequence, variant.getChromosome(), transcriptStart, transcriptEnd, genomicCodingStart, genomicCodingEnd, variantStart, variantEnd, cdnaCodingStart, cdnaCodingEnd, cdnaVariantStart, cdnaVariantEnd, cdsLength, transcriptFlags, // Be careful, originalVariantStart is used here! firstCdsPhase, variant.getReference(), variant.getAlternative(), SoNames, consequenceTypeTemplate); } }
From source file:org.opencb.cellbase.mongodb.db.variation.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveCodingNegativeTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames, BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, Integer variantStart, Integer variantEnd, Integer cdsLength, BasicDBList transcriptFlags, ConsequenceType consequenceTypeTemplate) { Integer genomicCodingStart;//from ww w .jav a 2 s .c o m Integer genomicCodingEnd; Integer cdnaCodingStart; Integer cdnaCodingEnd; BasicDBList exonInfoList; BasicDBObject exonInfo; Integer exonStart; Integer exonEnd; String transcriptSequence; Boolean variantAhead; Integer cdnaExonEnd; Integer cdnaVariantStart; Integer cdnaVariantEnd; Boolean splicing; int exonCounter; int firstCdsPhase = -1; Integer prevSpliceSite; Boolean[] junctionSolution = { false, false }; genomicCodingStart = (Integer) transcriptInfo.get("genomicCodingStart"); genomicCodingEnd = (Integer) transcriptInfo.get("genomicCodingEnd"); cdnaCodingStart = (Integer) transcriptInfo.get("cdnaCodingStart"); cdnaCodingEnd = (Integer) transcriptInfo.get("cdnaCodingEnd"); exonInfoList = (BasicDBList) transcriptInfo.get("exons"); exonInfo = (BasicDBObject) exonInfoList.get(0); exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = (String) exonInfo.get("sequence"); variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed cdnaExonEnd = (exonEnd - exonStart + 1); // cdnaExonEnd poinst to the same base than exonStart cdnaVariantStart = null; // cdnaVariantStart points to the same base than variantEnd cdnaVariantEnd = null; // cdnaVariantEnd points to the same base than variantStart junctionSolution[0] = false; junctionSolution[1] = false; splicing = false; if (firstCdsPhase == -1 && genomicCodingEnd >= exonStart) { firstCdsPhase = (int) exonInfo.get("phase"); } if (variantEnd <= exonEnd) { if (variantEnd >= exonStart) { // Variant end within the exon cdnaVariantStart = cdnaExonEnd - (variantEnd - exonStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantStart >= exonStart) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } } } else { if (variantStart >= exonStart) { // if(variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- // We do not contemplate that variant end can be located before this exon since this is the first exon cdnaVariantEnd = cdnaExonEnd - (variantEnd - exonStart); // } } // Variant includes the whole exon. Variant end is located before the exon, variant start is located after the exon } exonCounter = 1; while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction until // while(exonCounter<exonInfoList.size() && !splicing && variantAhead) { // This is not a do-while since we cannot call solveJunction until exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded prevSpliceSite = exonStart - 1; exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = ((String) exonInfo.get("sequence")) + transcriptSequence; if (firstCdsPhase == -1 && genomicCodingEnd >= exonStart) { // Set firsCdsPhase only when the first coding exon is reached firstCdsPhase = (int) exonInfo.get("phase"); } solveJunction(isInsertion, exonEnd + 1, prevSpliceSite, variantStart, variantEnd, SoNames, "splice_acceptor_variant", "splice_donor_variant", junctionSolution); splicing = (splicing || junctionSolution[0]); if (variantEnd <= exonEnd) { cdnaExonEnd += (exonEnd - exonStart + 1); if (variantEnd >= exonStart) { // Variant end within the exon cdnaVariantStart = cdnaExonEnd - (variantEnd - exonStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantStart >= exonStart) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } } } else { if (variantStart >= exonStart) { if (variantStart <= exonEnd) { // Only variant start within the exon ----||||||||||E||||---- cdnaExonEnd += (exonEnd - exonStart + 1); cdnaVariantEnd = cdnaExonEnd - (variantStart - exonStart); } else { // Variant does not include this exon, variant is located before this exon variantAhead = false; } } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon cdnaExonEnd += (exonEnd - exonStart + 1); } } exonCounter++; } // Is not intron variant (both ends fall within the same intron) if (!junctionSolution[1]) { if (isInsertion) { if (cdnaVariantStart == null && cdnaVariantEnd != null) { // To account for those insertions in the 3' end of an intron cdnaVariantStart = cdnaVariantEnd - 1; } else if (cdnaVariantEnd == null && cdnaVariantStart != null) { // To account for those insertions in the 5' end of an intron cdnaVariantEnd = cdnaVariantStart + 1; } } solveCodingNegativeTranscriptEffect(splicing, transcriptSequence, variant.getChromosome(), transcriptStart, transcriptEnd, genomicCodingStart, genomicCodingEnd, variantStart, variantEnd, cdnaCodingStart, cdnaCodingEnd, cdnaVariantStart, cdnaVariantEnd, cdsLength, transcriptFlags, firstCdsPhase, variant.getReference(), variant.getAlternative(), SoNames, consequenceTypeTemplate); } }
From source file:org.opencb.cellbase.mongodb.db.variation.VariantAnnotationMongoDBAdaptor.java
License:Apache License
private void solveNonCodingPositiveTranscript(Boolean isInsertion, GenomicVariant variant, HashSet<String> SoNames, BasicDBObject transcriptInfo, Integer transcriptStart, Integer transcriptEnd, BasicDBObject miRnaInfo, Integer variantStart, Integer variantEnd, ConsequenceType consequenceTypeTemplate) { BasicDBList exonInfoList;/*from w w w .j a v a 2 s. c o m*/ BasicDBObject exonInfo; Integer exonStart; Integer exonEnd; String transcriptSequence; Boolean variantAhead; Integer cdnaExonEnd; Integer cdnaVariantStart; Integer cdnaVariantEnd; Boolean splicing; int exonCounter; Integer prevSpliceSite; Boolean[] junctionSolution = { false, false }; exonInfoList = (BasicDBList) transcriptInfo.get("exons"); exonInfo = (BasicDBObject) exonInfoList.get(0); exonStart = (Integer) exonInfo.get("start"); exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = (String) exonInfo.get("sequence"); variantAhead = true; // we need a first iteration within the while to ensure junction is solved in case needed cdnaExonEnd = (exonEnd - exonStart + 1); cdnaVariantStart = null; cdnaVariantEnd = null; junctionSolution[0] = false; junctionSolution[1] = false; splicing = false; if (variantStart >= exonStart) { if (variantStart <= exonEnd) { // Variant start within the exon. Set cdnaPosition in consequenceTypeTemplate cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { // if(variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- // We do not contemplate that variant end can be located before this exon since this is the first exon cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); // } } // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon } exonCounter = 1; while (exonCounter < exonInfoList.size() && variantAhead) { // This is not a do-while since we cannot call solveJunction until // while(exonCounter<exonInfoList.size() && !splicing && variantAhead) { // This is not a do-while since we cannot call solveJunction until exonInfo = (BasicDBObject) exonInfoList.get(exonCounter); // next exon has been loaded exonStart = (Integer) exonInfo.get("start"); prevSpliceSite = exonEnd + 1; exonEnd = (Integer) exonInfo.get("end"); transcriptSequence = transcriptSequence + ((String) exonInfo.get("sequence")); solveJunction(isInsertion, prevSpliceSite, exonStart - 1, variantStart, variantEnd, SoNames, "splice_donor_variant", "splice_acceptor_variant", junctionSolution); splicing = (splicing || junctionSolution[0]); if (variantStart >= exonStart) { cdnaExonEnd += (exonEnd - exonStart + 1); if (variantStart <= exonEnd) { // Variant start within the exon. Set cdnaPosition in consequenceTypeTemplate cdnaVariantStart = cdnaExonEnd - (exonEnd - variantStart); consequenceTypeTemplate.setcDnaPosition(cdnaVariantStart); if (variantEnd <= exonEnd) { // Both variant start and variant end within the exon ----||||S|||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } } } else { if (variantEnd <= exonEnd) { if (variantEnd >= exonStart) { // Only variant end within the exon ----||||||||||E||||---- cdnaVariantEnd = cdnaExonEnd - (exonEnd - variantEnd); } else { // Variant does not include this exon, variant is located before this exon variantAhead = false; } } else { // Variant includes the whole exon. Variant start is located before the exon, variant end is located after the exon cdnaExonEnd += (exonEnd - exonStart + 1); } } exonCounter++; } if (miRnaInfo != null) { // miRNA with miRBase data BasicDBList matureMiRnaInfo = (BasicDBList) miRnaInfo.get("matures"); if (cdnaVariantStart == null) { // Probably deletion starting before the miRNA location cdnaVariantStart = 1; // Truncate to the first transcript position to avoid null exception } if (cdnaVariantEnd == null) { // Probably deletion ending after the miRNA location cdnaVariantEnd = ((String) miRnaInfo.get("sequence")).length(); // Truncate to the last transcript position to avoid null exception } int i = 0; while (i < matureMiRnaInfo.size() && !regionsOverlap((Integer) ((BasicDBObject) matureMiRnaInfo.get(i)).get("cdnaStart"), (Integer) ((BasicDBObject) matureMiRnaInfo.get(i)).get("cdnaEnd"), cdnaVariantStart == null ? 1 : cdnaVariantStart, cdnaVariantEnd == null ? cdnaExonEnd : cdnaVariantEnd)) { i++; } if (i < matureMiRnaInfo.size()) { // Variant overlaps at least one mature miRNA SoNames.add("mature_miRNA_variant"); } else { if (!junctionSolution[1]) { // Exon variant SoNames.add("non_coding_transcript_exon_variant"); } SoNames.add("non_coding_transcript_variant"); } } else { if (!junctionSolution[1]) { // Exon variant SoNames.add("non_coding_transcript_exon_variant"); } SoNames.add("non_coding_transcript_variant"); } }