List of usage examples for com.mongodb BasicDBList BasicDBList
BasicDBList
From source file:org.opencb.cellbase.mongodb.db.variation.ClinicalMongoDBAdaptor.java
License:Apache License
@Deprecated private DBObject getClinvarRegionAggregationFilterDBObject(List<Region> regionList) { BasicDBList orDBList = new BasicDBList(); for (Region region : regionList) { BasicDBList andDBList = new BasicDBList(); andDBList.add(new BasicDBObject("chromosome", region.getChromosome())); andDBList.add(new BasicDBObject("end", new BasicDBObject("$gte", region.getStart()))); andDBList.add(new BasicDBObject("start", new BasicDBObject("$lte", region.getEnd()))); orDBList.add(new BasicDBObject("$and", andDBList)); }// w w w. ja v a 2s . c o m return new BasicDBObject("$match", new BasicDBObject("$or", orDBList)); }
From source file:org.opencb.cellbase.mongodb.db.variation.VariationMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllPhenotypeByRegion(List<Region> regions, QueryOptions options) { QueryBuilder builder = null;/* ww w .j a v a 2s . c om*/ List<DBObject> queries = new ArrayList<>(); /** * If source is present in options is it parsed and prepare first, * otherwise ti will be done for each iteration of regions. */ List<Object> source = options.getList("source", null); BasicDBList sourceIds = new BasicDBList(); if (source != null && source.size() > 0) { sourceIds.addAll(source); } // List<Region> regions = Region.parseRegions(options.getString("region")); List<String> ids = new ArrayList<>(regions.size()); for (Region region : regions) { if (region != null && !region.equals("")) { // If regions is 1 position then query can be optimize using chunks if (region.getStart() == region.getEnd()) { String chunkId = getChunkIdPrefix(region.getChromosome(), region.getStart(), variationChunkSize); System.out.println(chunkId); builder = QueryBuilder.start("_chunkIds").is(chunkId).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } else { builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } if (sourceIds != null && sourceIds.size() > 0) { builder = builder.and("source").in(sourceIds); } queries.add(builder.get()); ids.add(region.toString()); } } return executeQueryList2(ids, queries, options, mongoVariationPhenotypeDBCollection2); }
From source file:org.opencb.cellbase.mongodb.db.variation.VariationMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) { List<DBObject> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regions.size()); String phenotype = options.getString("phenotype"); if (phenotype != null && !phenotype.equals("")) { for (Region region : regions) { QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start") .greaterThanEquals(region.getStart()).lessThanEquals(region.getEnd()); builder = builder.and("phenotype").is(phenotype); queries.add(builder.get());//from w ww .java 2 s . c o m ids.add(region.toString()); } return executeQueryList2(ids, queries, options, mongoVariationPhenotypeDBCollection2); } else { String consequenceTypes = options.getString("consequence_type", null); BasicDBList consequenceTypeDBList = new BasicDBList(); if (consequenceTypes != null && !consequenceTypes.equals("")) { for (String ct : consequenceTypes.split(",")) { consequenceTypeDBList.add(ct); } } for (Region region : regions) { // QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getSequenceName()).and("end").greaterThan(region.getStart()).and("start").lessThan(region.getEnd()); QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start") .greaterThanEquals(region.getStart()).lessThanEquals(region.getEnd()); if (consequenceTypeDBList.size() > 0) { builder = builder.and("transcriptVariations.consequenceTypes").in(consequenceTypeDBList); } queries.add(builder.get()); ids.add(region.toString()); } return executeQueryList2(ids, queries, options); } }
From source file:org.opencb.cellbase.mongodb.db.VariationMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllPhenotypeByRegion(List<Region> regions, QueryOptions options) { QueryBuilder builder = null;//w w w .j a va2s. co m List<DBObject> queries = new ArrayList<>(); /** * If source is present in options is it parsed and prepare first, * otherwise ti will be done for each iteration of regions. */ List<Object> source = options.getList("source", null); BasicDBList sourceIds = new BasicDBList(); if (source != null && source.size() > 0) { sourceIds.addAll(source); } // List<Region> regions = Region.parseRegions(options.getString("region")); List<String> ids = new ArrayList<>(regions.size()); for (Region region : regions) { if (region != null && !region.equals("")) { // If regions is 1 position then query can be optimize using chunks if (region.getStart() == region.getEnd()) { String chunkId = getChunkIdPrefix(region.getChromosome(), region.getStart(), variationChunkSize); System.out.println(chunkId); builder = QueryBuilder.start("_chunkIds").is(chunkId).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } else { builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } if (sourceIds != null && sourceIds.size() > 0) { builder = builder.and("source").in(sourceIds); } queries.add(builder.get()); ids.add(region.toString()); } } return executeQueryList(ids, queries, options, db.getCollection("variation_phenotype_annotation")); }
From source file:org.opencb.cellbase.mongodb.db.VariationMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllByRegionList(List<Region> regions, QueryOptions options) { List<DBObject> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regions.size()); String phenotype = options.getString("phenotype"); if (phenotype != null && !phenotype.equals("")) { for (Region region : regions) { QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start") .greaterThanEquals(region.getStart()).lessThanEquals(region.getEnd()); builder = builder.and("phenotype").is(phenotype); queries.add(builder.get());//from w ww . j ava 2 s.c om ids.add(region.toString()); } return executeQueryList(ids, queries, options, db.getCollection("variation_phenotype_annotation")); } else { String consequenceTypes = options.getString("consequence_type", null); BasicDBList consequenceTypeDBList = new BasicDBList(); if (consequenceTypes != null && !consequenceTypes.equals("")) { for (String ct : consequenceTypes.split(",")) { consequenceTypeDBList.add(ct); } } for (Region region : regions) { // QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getSequenceName()).and("end").greaterThan(region.getStart()).and("start").lessThan(region.getEnd()); QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("start") .greaterThanEquals(region.getStart()).lessThanEquals(region.getEnd()); if (consequenceTypeDBList.size() > 0) { builder = builder.and("transcriptVariations.consequenceTypes").in(consequenceTypeDBList); } queries.add(builder.get()); ids.add(region.toString()); } return executeQueryList(ids, queries, options); } }
From source file:org.opencb.cellbase.mongodb.db.XRefsMongoDBAdaptor.java
License:Apache License
@Override public List<QueryResult> getAllByDBNameList(List<String> ids, QueryOptions options) { // Mel de romer //db.core.aggregate( //{$match: {"transcripts.xrefs.id": "ENST00000544455"}}, //{$unwind: "$transcripts"}, //{$unwind: "$transcripts.xrefs"}, //{$match: {"transcripts.xrefs.dbNameShort":{$in:["go"]}}}, //{$group:{_id:{id:"$transcripts.xrefs.id", dbNameShort:"$transcripts.xrefs.dbNameShort", description:"$transcripts.xrefs.description"}}}, //{$project:{"_id":0,"id":"$_id.id","dbNameShort":"$_id.dbNameShort","description":"$_id.description"}}) // Biotype if gene given: db.core.find({"transcripts.xrefs.id": "BRCA2"}, {"biotype":1}) // Biotype if protein/transcript given: db.core.aggregate({$match: {"transcripts.xrefs.id": "ENST00000470094"}}, {$unwind: "$transcripts"}, {$match: {"transcripts.xrefs.id": "ENST00000470094"}}, {$group:{_id:{biotype:"$transcripts.biotype"}}}, {$project:{"transcripts.biotype":1}}) List<DBObject[]> commandsList = new ArrayList<>(ids.size()); for (String id : ids) { List<DBObject> commands = new ArrayList<>(ids.size()); DBObject match = new BasicDBObject("$match", new BasicDBObject("transcripts.xrefs.id", id)); DBObject unwind = new BasicDBObject("$unwind", "$transcripts"); DBObject unwind2 = new BasicDBObject("$unwind", "$transcripts.xrefs"); commands.add(match);// w ww. j a v a2s . c om commands.add(unwind); commands.add(match); commands.add(unwind2); //Check dbname option exists List<Object> list = options.getList("dbname", null); if (list != null && list.size() > 0) { BasicDBList dbnameDBList = new BasicDBList(); dbnameDBList.addAll(list); DBObject dbnameMatch = new BasicDBObject("$match", new BasicDBObject("transcripts.xrefs.dbName", new BasicDBObject("$in", dbnameDBList))); commands.add(dbnameMatch); } DBObject group = new BasicDBObject("$group", new BasicDBObject("_id", new BasicDBObject("id", "$transcripts.xrefs.id").append("dbName", "$transcripts.xrefs.dbName") .append("dbDisplayName", "$transcripts.xrefs.dbDisplayName") .append("description", "$transcripts.xrefs.description"))); commands.add(group); DBObject project = new BasicDBObject("$project", new BasicDBObject("_id", 0).append("id", "$_id.id").append("dbName", "$_id.dbName") .append("dbDisplayName", "$_id.dbDisplayName") .append("description", "$_id.description")); commands.add(project); //ArrayList to array DBObject[] commandsArray = commands.toArray(new DBObject[0]); commandsList.add(commandsArray); } return executeAggregationList(ids, commandsList, options); }
From source file:org.opencb.cellbase.mongodb.impl.ConservationMongoDBAdaptor.java
License:Apache License
@Override @Deprecated/* www.ja va 2 s. c o m*/ public List<QueryResult> getAllScoresByRegionList(List regionList, QueryOptions options) { //TODO not finished yet List<Document> queries = new ArrayList<>(); List<String> ids = new ArrayList<>(regionList.size()); List<Integer> integerChunkIds; List<Region> regions = regionList; for (Region region : regions) { integerChunkIds = new ArrayList<>(); // positions below 1 are not allowed if (region.getStart() < 1) { region.setStart(1); } if (region.getEnd() < 1) { region.setEnd(1); } /****/ QueryBuilder builder; int regionChunkStart = getChunkId(region.getStart(), MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE); int regionChunkEnd = getChunkId(region.getEnd(), MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE); if (regionChunkStart == regionChunkEnd) { builder = QueryBuilder.start("_chunkIds").is(getChunkIdPrefix(region.getChromosome(), region.getStart(), MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE)); } else { // for (int chunkId = regionChunkStart; chunkId <= regionChunkEnd; chunkId++) { // integerChunkIds.add(chunkId); // } // // QueryBuilder builder = QueryBuilder.start("chromosome").is(region.getChromosomeInfo()).and("chunkId").in(hunkIds); // builder = QueryBuilder.start("chromosome").is(region.getChromosomeInfo()).and("chunkId").in(integerChunkIds); builder = QueryBuilder.start("chromosome").is(region.getChromosome()).and("end") .greaterThanEquals(region.getStart()).and("start").lessThanEquals(region.getEnd()); } /****/ queries.add(new Document(builder.get().toMap())); ids.add(region.toString()); // logger.debug(builder.get().toString()); } List<QueryResult> queryResults = executeQueryList2(ids, queries, options); // List<QueryResult> queryResults = executeQueryList(ids, queries, options); for (int i = 0; i < regions.size(); i++) { Region region = regions.get(i); QueryResult queryResult = queryResults.get(i); List<Document> list = (List<Document>) queryResult.getResult(); Map<String, List<Float>> typeMap = new HashMap(); // int start = region.getStart(); for (int j = 0; j < list.size(); j++) { Document chunk = list.get(j); if (!chunk.isEmpty()) { // BasicDBList valuesChunk = (BasicDBList) chunk.get("values"); ArrayList valuesChunk = chunk.get("values", ArrayList.class); if (valuesChunk != null) { // TODO: temporary patch to skip empty chunks - remove as soon as conservation is reloaded String source = chunk.getString("source"); List<Float> valuesList; if (!typeMap.containsKey(source)) { valuesList = new ArrayList<>(region.getEnd() - region.getStart() + 1); for (int val = 0; val < region.getEnd() - region.getStart() + 1; val++) { valuesList.add(null); } typeMap.put(source, valuesList); } else { valuesList = typeMap.get(source); } // valuesChunk = (BasicDBList) chunk.get("values"); valuesChunk = chunk.get("values", ArrayList.class); int pos = 0; if (region.getStart() > chunk.getInteger("start")) { pos = region.getStart() - chunk.getInteger("start"); } for (; pos < valuesChunk.size() && (pos + chunk.getInteger("start") <= region.getEnd()); pos++) { valuesList.set(pos + chunk.getInteger("start") - region.getStart(), new Float((Double) valuesChunk.get(pos))); } } else { continue; } } BasicDBList resultList = new BasicDBList(); for (Map.Entry<String, List<Float>> elem : typeMap.entrySet()) { for (Float value : elem.getValue()) { if (value != null) { resultList.add(new Score(new Double(value), elem.getKey())); } } } if (!resultList.isEmpty()) { queryResult.setResult(resultList); } else { queryResult.setResult(null); } } } return queryResults; }
From source file:org.opencb.cellbase.mongodb.impl.ProteinMongoDBAdaptor.java
License:Apache License
@Override public QueryResult<ProteinVariantAnnotation> getVariantAnnotation(String ensemblTranscriptId, int position, String aaReference, String aaAlternate, QueryOptions options) { QueryResult<ProteinVariantAnnotation> queryResult = new QueryResult<>(); queryResult.setId(ensemblTranscriptId + "/" + position + "/" + aaAlternate); long dbTimeStart = System.currentTimeMillis(); ProteinVariantAnnotation proteinVariantAnnotation = new ProteinVariantAnnotation(); proteinVariantAnnotation.setPosition(position); proteinVariantAnnotation.setReference(aaReference); proteinVariantAnnotation.setAlternate(aaAlternate); // proteinVariantAnnotation.setSubstitutionScores(getProteinSubstitutionScores(ensemblTranscriptId, position, aaAlternate)); Query query = new Query("transcript", ensemblTranscriptId).append("position", position).append("aa", aaAlternate);// w ww. j a v a 2 s . c om // try { // if (ensemblTranscriptId.equals("ENST00000383037") || ensemblTranscriptId.equals("ENST00000428666")) { // int a = 1; // } proteinVariantAnnotation.setSubstitutionScores(getSubstitutionScores(query, null).getResult()); // } catch (Exception e) { // int a = 1; // } QueryResult proteinVariantData = null; String shortAlternativeAa = aaShortNameMap.get(aaAlternate); if (shortAlternativeAa != null) { List<Bson> pipeline = new ArrayList<>(); // BasicDBList andDBList1 = new BasicDBList(); // andDBList1.add(new Document("dbReference.id", ensemblTranscriptId)); // andDBList1.add(new Document("feature.location.position.position", position)); // andDBList1.add(new Document("feature.variation", shortAlternativeAa)); // pipeline.add(new Document("$match", new Document("$and", andDBList1))); pipeline.add(new Document("$match", new Document("dbReference.id", ensemblTranscriptId))); Document projection = new Document(); projection.put("accession", 1); projection.put("keyword", 1); projection.put("feature", 1); pipeline.add(new Document("$project", projection)); pipeline.add(new Document("$unwind", "$feature")); BasicDBList andDBList2 = new BasicDBList(); andDBList2.add(new Document("feature.location.position.position", position)); andDBList2.add(new Document("feature.variation", shortAlternativeAa)); Document firstOr = new Document("$and", andDBList2); BasicDBList andDBList3 = new BasicDBList(); andDBList3.add(new Document("feature.location.end.position", new Document("$gte", position))); andDBList3.add(new Document("feature.location.begin.position", new Document("$lte", position))); Document secondOr = new Document(); secondOr.put("$and", andDBList3); BasicDBList orList = new BasicDBList(); orList.add(firstOr); orList.add(secondOr); pipeline.add(new Document("$match", new Document("$or", orList))); // pipeline.add(new Document("$match", firstOr)); // Document groupFields = new Document(); groupFields.put("_id", "$accession"); groupFields.put("keyword", new Document("$addToSet", "$keyword")); groupFields.put("feature", new Document("$addToSet", "$feature")); pipeline.add(new Document("$group", groupFields)); //TODO:terminar el pipeline de agregacion // QueryBuilder builder = QueryBuilder.start("dbReference.id").is(ensemblTranscriptId) // .and("feature.location.position.position").is(position) // .and("feature.variation").is(shortAlternativeAa); // // Document firstOr = new Document(); // firstOr.put("location.position.position", position); // firstOr.put("variation", shortAlternativeAa); // // BasicDBList andList = new BasicDBList(); // andList.add(new Document("location.end.position", new Document("$gte", position))); // andList.add(new Document("location.begin.position", new Document("$lte", position))); // Document secondOr = new Document(); // secondOr.put("$and", andList); // // BasicDBList orList = new BasicDBList(); // orList.add(firstOr); // orList.add(secondOr); // // Document elemMatch = new Document(); // elemMatch.put("$elemMatch", new Document("$or", orList)); // // Document projection = new Document(); // projection.put("feature", elemMatch); // // QueryOptions localQueryOptions = new QueryOptions(); // localQueryOptions.put("elemMatch",projection); // localQueryOptions.put("include","accession,keyword,feature"); // proteinVariantData = executeQuery(ensemblTranscriptId + "_" + String.valueOf(position) + "_" // + aaAlternate, new Document(builder.get().toMap()), localQueryOptions); proteinVariantData = executeAggregation2( ensemblTranscriptId + "_" + String.valueOf(position) + "_" + aaAlternate, pipeline, new QueryOptions()); if (proteinVariantData.getNumResults() > 0) { proteinVariantAnnotation = processProteinVariantData(proteinVariantAnnotation, shortAlternativeAa, (Document) proteinVariantData.getResult().get(0)); } } long dbTimeEnd = System.currentTimeMillis(); queryResult.setDbTime(Long.valueOf(dbTimeEnd - dbTimeStart).intValue()); // if (proteinVariantAnnotation.getSubstitutionScores() != null || proteinVariantAnnotation.getUniprotAccession() != null) { queryResult.setNumResults(1); queryResult.setResult(Collections.singletonList(proteinVariantAnnotation)); // } return queryResult; }
From source file:org.opencb.cellbase.mongodb.loader.converters.VariantEffectConverter.java
License:Apache License
@Override public DBObject convertToStorageSchema(VariantAnnotation variantAnnotation) { BasicDBObject mongoDbSchema = new BasicDBObject("chr", variantAnnotation.getChromosome()) .append("start", variantAnnotation.getStart()).append("end", variantAnnotation.getEnd()) .append("ref", variantAnnotation.getReferenceAllele()); // All consequence types for the different ALT are stored // in an array. A compression using '*' is implemented. BasicDBList consequenceTypeSchemaList = new BasicDBList(); // 'keys' contains all the ALT alleles simulated Set<String> keys = variantAnnotation.getEffects().keySet(); Iterator<String> iterator = keys.iterator(); // Note://from w w w.ja v a2 s. com // During the simulation 2 types of variants are simulated: // - common: which means only one allele is simulated since no different results are produced by VEP. This is // the case of UPSTREAM, DOWNSTREAM and INTRONS variants // - all: the 3 possible SNV and '-' are simulated. This is done for EXONS and REGULATORY regions // System.out.println("num. alleles:\t"+keys.size()); // if(keys.size() == 3) { // System.out.println("==3: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n"); // } // if(keys.size() == 2 && !keys.contains("T") && !keys.contains("C")) { // System.out.println("==2: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n"); // } // If the different allele at this genomic position can produce different results the 4 possible SNV ALT alleles // are pre-computed: one of these A, C, G or T; and '-' // No more are possible can be found (in theory), but maybe some funny SNPs like AA/TTA exist so // if "size >= 4" then all possible values are stored. if (keys.size() >= 4) { // System.out.println(">=4: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n"); while (iterator.hasNext()) { String key = iterator.next(); List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key); BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key); BasicDBList consequenceTypeDBList = new BasicDBList(); for (VariantEffect consequenceType : consequenceTypes) { BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType); consequenceTypeDBList.add(consequenceTypeItemDBObject); } consequenceTypeDBObject.append("val", consequenceTypeDBList); // Add Consequence Type object of this ALT to the List consequenceTypeSchemaList.add(consequenceTypeDBObject); } // If less than the 4 ALT alleles are found means that either 1 ALT allele and the '-' allele were pre-computed // (to minimize the number of calls to VEP), or a few multiallelic SNPs were found at that position. // ALT allele must be stored as '*' allele when possible to save space. } else { boolean commonFound = false; while (iterator.hasNext()) { String key = iterator.next(); if (key.equals("-")) { List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key); BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key); BasicDBList consequenceTypeDBList = new BasicDBList(); for (VariantEffect consequenceType : consequenceTypes) { BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType); consequenceTypeDBList.add(consequenceTypeItemDBObject); } consequenceTypeDBObject.append("val", consequenceTypeDBList); // Add Consequence Type object of this ALT to the List consequenceTypeSchemaList.add(consequenceTypeDBObject); } else { // Only the first non '-' is stored using '*' if (!commonFound) { commonFound = true; List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key); BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", "*"); BasicDBList consequenceTypeDBList = new BasicDBList(); for (VariantEffect consequenceType : consequenceTypes) { // HGVS must be encoded with '*' if (consequenceType.getHgvsc() != null) { if (consequenceType.getFeatureStrand().equals("1")) { String s = consequenceType.getHgvsc().replace(">" + key, ">*"); consequenceType.setHgvsc(s); } else { String complement = ""; switch (key) { case "A": complement = "T"; break; case "T": complement = "A"; break; case "C": complement = "G"; break; case "G": complement = "C"; break; } String s = consequenceType.getHgvsc().replace(">" + complement, ">*"); consequenceType.setHgvsc(s); } } BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject( consequenceType); consequenceTypeDBList.add(consequenceTypeItemDBObject); } consequenceTypeDBObject.append("val", consequenceTypeDBList); // Add Consequence Type object of this ALT to the List consequenceTypeSchemaList.add(consequenceTypeDBObject); } } } // // If '-' is found then an allele independent position has been found and 1 allele and '-' are expected // if(keys.contains("-")) { // System.out.println("if: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n"); //// System.out.println(keys+" "+variantAnnotation.getStart()+"-"+variantAnnotation.getEnd()); // boolean commonFound = false; // while(iterator.hasNext()) { // String key = iterator.next(); // if(key.equals("-")) { // List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key); // // BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key); // BasicDBList consequenceTypeDBList = new BasicDBList(); // for(VariantEffect consequenceType: consequenceTypes) { // BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType); // consequenceTypeDBList.add(consequenceTypeItemDBObject); // } // consequenceTypeDBObject.append("val", consequenceTypeDBList); // // // Add Consequence Type object of this ALT to the List // consequenceTypeSchemaList.add(consequenceTypeDBObject); // }else { // // Only the first non '-' is stored using '*' // if(!commonFound) { // commonFound = true; // List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key); // // BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", "*"); // BasicDBList consequenceTypeDBList = new BasicDBList(); // for(VariantEffect consequenceType: consequenceTypes) { // // HGVS must be encoded with '*' // if(consequenceType.getHgvsc() != null) { // if(consequenceType.getFeatureStrand().equals("1")) { // String s = consequenceType.getHgvsc().replace(">"+key, ">*"); // consequenceType.setHgvsc(s); // }else { // String complement = ""; // switch (key) { // case "A": complement = "T"; break; // case "T": complement = "A"; break; // case "C": complement = "G"; break; // case "G": complement = "C"; break; // } // String s = consequenceType.getHgvsc().replace(">"+complement, ">*"); // consequenceType.setHgvsc(s); // } // } // BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType); // consequenceTypeDBList.add(consequenceTypeItemDBObject); // } // consequenceTypeDBObject.append("val", consequenceTypeDBList); // // // Add Consequence Type object of this ALT to the List // consequenceTypeSchemaList.add(consequenceTypeDBObject); // } // } // } // } // // If no '-' is found we process all of them. // else { // // 1077844 // System.out.println("else: "+variantAnnotation+":"+variantAnnotation.getStart()+" alleles: "+keys+"\n"); // while(iterator.hasNext()) { // String key = iterator.next(); // List<VariantEffect> consequenceTypes = variantAnnotation.getEffects().get(key); // // BasicDBObject consequenceTypeDBObject = new BasicDBObject("alt", key); // BasicDBList consequenceTypeDBList = new BasicDBList(); // for(VariantEffect consequenceType: consequenceTypes) { // BasicDBObject consequenceTypeItemDBObject = parseConsequenceTypeToDBObject(consequenceType); // consequenceTypeDBList.add(consequenceTypeItemDBObject); // } // consequenceTypeDBObject.append("val", consequenceTypeDBList); // // // Add Consequence Type object of this ALT to the List // consequenceTypeSchemaList.add(consequenceTypeDBObject); // } // } } mongoDbSchema.append("eff", consequenceTypeSchemaList); // Parsing Frequencies from VEP BasicDBObject frequencyDBObject = parseFrequencies(variantAnnotation.getFrequencies()); if (frequencyDBObject.size() != 0) { mongoDbSchema.append("freqs", frequencyDBObject); } // Parsing ProteinSubstitutionScores from VEP BasicDBObject proteinSubstitutionScoresDBObject = parseProteinSubstituionScores( variantAnnotation.getProteinSubstitutionScores()); if (proteinSubstitutionScoresDBObject.size() != 0) { mongoDbSchema.append("pss", proteinSubstitutionScoresDBObject); } // Parsing ProteinSubstitutionScores from VEP BasicDBObject regulatoryEffectDBObject = parseRegulatoryEffect(variantAnnotation.getRegulatoryEffect()); if (regulatoryEffectDBObject.size() != 0) { mongoDbSchema.append("reg", regulatoryEffectDBObject); } // Parsing ProteinSubstitutionScores from VEP // BasicDBObject genesDBObject = parseGenes(variantAnnotation.getEffects()); // if(genesDBObject.size() != 0) { // mongoDbSchema.append("gn", genesDBObject); // } return mongoDbSchema; }
From source file:org.opencb.opencga.storage.mongodb.alignment.BamManager.java
License:Apache License
@Deprecated public String getByRegion(Path fullFilePath, String regionStr, Map<String, List<String>> params) throws IOException { long totalTime = System.currentTimeMillis(); Region region = Region.parseRegion(regionStr); String chr = region.getChromosome(); int start = region.getStart(); int end = region.getEnd(); logger.info("chr: " + chr + " start: " + start + " end: " + end); if (params.get("cellbasehost") != null) { cellbasehost = params.get("cellbasehost").get(0); if (cellbasehost.equals("")) { return "{'error':'cellbase host not valid'}"; }/*from w w w . j av a2 s.c o m*/ } if (params.get("species") != null) { species = params.get("species").get(0); if (species.equals("")) { return "{'error':'species not valid'}"; } } Boolean viewAsPairs = false; if (params.get("view_as_pairs") != null) { viewAsPairs = Boolean.parseBoolean(params.get("view_as_pairs").get(0)); } Boolean showSoftclipping = false; if (params.get("show_softclipping") != null) { showSoftclipping = Boolean.parseBoolean(params.get("show_softclipping").get(0)); } Boolean histogram = false; if (params.get("histogram") != null) { histogram = Boolean.parseBoolean(params.get("histogram").get(0)); } int interval = 200000; if (params.get("interval") != null) { interval = Integer.parseInt(params.get("interval").get(0)); } File inputBamFile = new File(fullFilePath.toString()); File inputBamIndexFile = new File(fullFilePath + ".bai"); if (inputBamIndexFile == null) { logger.info("BamManager: " + "creating bam index for: " + fullFilePath); // createIndex(inputBamFile, inputBamIndexFile); return "{error:'no index found'}"; } long t = System.currentTimeMillis(); SAMFileReader inputSam = new SAMFileReader(inputBamFile, inputBamIndexFile); System.out.println("new SamFileReader in " + (System.currentTimeMillis() - t) + "ms"); System.out.println("hasIndex " + inputSam.hasIndex()); t = System.currentTimeMillis(); SAMRecordIterator recordsFound = inputSam.query(chr, start, end, false); System.out.println("query SamFileReader in " + (System.currentTimeMillis() - t) + "ms"); /** * ARRAY LIST */ ArrayList<SAMRecord> records = new ArrayList<SAMRecord>(); t = System.currentTimeMillis(); while (recordsFound.hasNext()) { SAMRecord record = recordsFound.next(); records.add(record); } System.out.println(records.size() + " elements added in: " + (System.currentTimeMillis() - t) + "ms"); /** * Check histogram */ if (histogram) { int numIntervals = (region.getEnd() - region.getStart()) / interval + 1; System.out.println("numIntervals :" + numIntervals); int[] intervalCount = new int[numIntervals]; System.out.println(region.getChromosome()); System.out.println(region.getStart()); System.out.println(region.getEnd()); for (SAMRecord record : records) { // System.out.println("---*-*-*-*-" + numIntervals); // System.out.println("---*-*-*-*-" + record.getAlignmentStart()); // System.out.println("---*-*-*-*-" + interval); if (record.getAlignmentStart() >= region.getStart() && record.getAlignmentStart() <= region.getEnd()) { int intervalIndex = (record.getAlignmentStart() - region.getStart()) / interval; // truncate // System.out.print(intervalIndex + " "); intervalCount[intervalIndex]++; } } int intervalStart = region.getStart(); int intervalEnd = intervalStart + interval - 1; BasicDBList intervalList = new BasicDBList(); for (int i = 0; i < numIntervals; i++) { BasicDBObject intervalObj = new BasicDBObject(); intervalObj.put("start", intervalStart); intervalObj.put("end", intervalEnd); intervalObj.put("interval", i); intervalObj.put("defaultValue", intervalCount[i]); intervalList.add(intervalObj); intervalStart = intervalEnd + 1; intervalEnd = intervalStart + interval - 1; } System.out.println(region.getChromosome()); System.out.println(region.getStart()); System.out.println(region.getEnd()); return intervalList.toString(); } /** * GET GENOME SEQUENCE */ t = System.currentTimeMillis(); String forwardSequence = getSequence(chr, start, end); String reverseSequence = revcomp(forwardSequence); // System.out.println(forwardSequence); // System.out.println(reverseSequence); System.out.println("Get genome sequence in " + (System.currentTimeMillis() - t) + "ms"); /** * COVERAGE */ short[] coverageArray = new short[end - start + 1]; short[] aBaseArray = new short[end - start + 1]; short[] cBaseArray = new short[end - start + 1]; short[] gBaseArray = new short[end - start + 1]; short[] tBaseArray = new short[end - start + 1]; if (viewAsPairs) { t = System.currentTimeMillis(); Collections.sort(records, new Comparator<SAMRecord>() { @Override public int compare(SAMRecord o1, SAMRecord o2) { if (o1 != null && o1.getReadName() != null && o2 != null) { return o1.getReadName().compareTo(o2.getReadName()); } return -1; } }); System.out.println(records.size() + " elements sorted in: " + (System.currentTimeMillis() - t) + "ms"); } t = System.currentTimeMillis(); StringBuilder sb = new StringBuilder(); sb.append("{"); sb.append("\"reads\":["); StringBuilder attrString; String readStr; int readPos; // logger.info("Processing SAM records"); for (SAMRecord record : records) { // logger.info(record.getReadName()); Boolean condition = (!record.getReadUnmappedFlag()); if (condition) { attrString = new StringBuilder(); attrString.append("{"); for (SAMRecord.SAMTagAndValue attr : record.getAttributes()) { attrString.append("\"" + attr.tag + "\":\"" + attr.value.toString().replace("\\", "\\\\").replace("\"", "\\\"") + "\","); } // Remove last comma if (attrString.length() > 1) { attrString.replace(attrString.length() - 1, attrString.length(), ""); } attrString.append("}"); readStr = record.getReadString(); /***************************************************************************/ if (true) {// TEST // if(record.getReadNegativeStrandFlag() // ){ // if(record.getReadName().equals("SRR081241.8998181") || // record.getReadName().equals("SRR081241.645807") // ){ // System.out.println("#############################################################################################################################################"); // System.out.println("#############################################################################################################################################"); // System.out.println("Unclipped Start:"+(record.getUnclippedStart()-start)); // System.out.println("Unclipped End:"+(record.getUnclippedEnd()-start+1)); // System.out.println(record.getCigarString()+" Alig Length:"+(record.getAlignmentEnd()-record.getAlignmentStart()+1)+" Unclipped length:"+(record.getUnclippedEnd()-record.getUnclippedStart()+1)); String refStr = forwardSequence.substring((500 + record.getUnclippedStart() - start), (500 + record.getUnclippedEnd() - start + 1)); // System.out.println("refe:"+refStr+" refe.length:"+refStr.length()); // System.out.println("read:"+readStr+" readStr.length:"+readStr.length()+" getReadLength:"+record.getReadLength()); StringBuilder diffStr = new StringBuilder(); int index = 0; int indexRef = 0; // System.out.println(jsonObjectMapper.toJson(record.getCigar().getCigarElements())); // logger.info("checking cigar: " + record.getCigar().toString()); for (int i = 0; i < record.getCigar().getCigarElements().size(); i++) { CigarElement cigarEl = record.getCigar().getCigarElement(i); CigarOperator cigarOp = cigarEl.getOperator(); int cigarLen = cigarEl.getLength(); // logger.info(cigarOp + " found" + " index:" + index + " indexRef:" + indexRef + " cigarLen:" + cigarLen); if (cigarOp == CigarOperator.M || cigarOp == CigarOperator.EQ || cigarOp == CigarOperator.X) { String subref = refStr.substring(indexRef, indexRef + cigarLen); String subread = readStr.substring(index, index + cigarLen); diffStr.append(getDiff(subref, subread)); index = index + cigarLen; indexRef = indexRef + cigarLen; } if (cigarOp == CigarOperator.I) { diffStr.append(readStr.substring(index, index + cigarLen).toLowerCase()); index = index + cigarLen; // TODO save insertions } if (cigarOp == CigarOperator.D) { for (int bi = 0; bi < cigarLen; bi++) { diffStr.append("d"); } indexRef = indexRef + cigarLen; } if (cigarOp == CigarOperator.N) { for (int bi = 0; bi < cigarLen; bi++) { diffStr.append("n"); } indexRef = indexRef + cigarLen; } if (cigarOp == CigarOperator.S) { if (showSoftclipping) { String subread = readStr.substring(index, index + cigarLen); diffStr.append(subread); index = index + cigarLen; indexRef = indexRef + cigarLen; } else { for (int bi = 0; bi < cigarLen; bi++) { diffStr.append(" "); } index = index + cigarLen; indexRef = indexRef + cigarLen; } } if (cigarOp == CigarOperator.H) { for (int bi = 0; bi < cigarLen; bi++) { diffStr.append("h"); } indexRef = indexRef + cigarLen; } if (cigarOp == CigarOperator.P) { for (int bi = 0; bi < cigarLen; bi++) { diffStr.append("p"); } indexRef = indexRef + cigarLen; } // if(cigarOp == CigarOperator.EQ) { // // } // if(cigarOp == CigarOperator.X) { // // } } // System.out.println("diff:"+diffStr); String empty = diffStr.toString().replace(" ", ""); // System.out.println("diff:"+diffStr); /*************************************************************************/ sb.append("{"); sb.append("\"start\":" + record.getAlignmentStart() + ","); sb.append("\"end\":" + record.getAlignmentEnd() + ","); sb.append("\"unclippedStart\":" + record.getUnclippedStart() + ","); sb.append("\"unclippedEnd\":" + record.getUnclippedEnd() + ","); sb.append("\"chromosome\":\"" + chr + "\","); sb.append("\"flags\":\"" + record.getFlags() + "\",");// with // flags // the // strand // will // be // calculated sb.append("\"cigar\":\"" + record.getCigarString() + "\","); sb.append("\"name\":\"" + record.getReadName() + "\","); sb.append("\"blocks\":\"" + record.getAlignmentBlocks().get(0).getLength() + "\","); sb.append("\"attributes\":" + attrString.toString() + ","); // sb.append("\"readGroupId\":\""+record.getReadGroup().getId()+"\","); // sb.append("\"readGroupPlatform\":\""+record.getReadGroup().getPlatform()+"\","); // sb.append("\"readGroupLibrary\":\""+record.getReadGroup().getLibrary()+"\","); sb.append("\"referenceName\":\"" + record.getReferenceName() + "\","); sb.append("\"baseQualityString\":\"" + record.getBaseQualityString().replace("\\", "\\\\").replace("\"", "\\\"") + "\",");// the // " // char // unables // parse // from // javascript // sb.append("\"baseQualityString\":\""+jsonObjectMapper.toJson(baseQualityArray)+"\",");// // the " char unables parse from javascript sb.append("\"header\":\"" + record.getHeader().toString() + "\","); sb.append("\"readLength\":" + record.getReadLength() + ","); sb.append("\"mappingQuality\":" + record.getMappingQuality() + ","); sb.append("\"mateReferenceName\":\"" + record.getMateReferenceName() + "\","); sb.append("\"mateAlignmentStart\":" + record.getMateAlignmentStart() + ","); sb.append("\"inferredInsertSize\":" + record.getInferredInsertSize() + ","); if (!empty.isEmpty()) { sb.append("\"diff\":\"" + diffStr + "\","); } sb.append("\"read\":\"" + readStr + "\""); sb.append("},"); } // IF TEST BY READ NAME // logger.info("Creating coverage array"); // TODO cigar check for correct coverage calculation and int refgenomeOffset = 0; int readOffset = 0; int offset = record.getAlignmentStart() - start; for (int i = 0; i < record.getCigar().getCigarElements().size(); i++) { if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.M) { // logger.info("start: "+start); // logger.info("r a start: "+record.getAlignmentStart()); // logger.info("refgenomeOffset: "+refgenomeOffset); // logger.info("r c lenght: "+record.getCigar().getCigarElement(i).getLength()); // logger.info(record.getAlignmentStart() - start + refgenomeOffset); // logger.info("readStr: "+readStr.length()); // logger.info("readStr: "+readStr.length()); for (int j = record.getAlignmentStart() - start + refgenomeOffset, cont = 0; cont < record .getCigar().getCigarElement(i).getLength(); j++, cont++) { if (j >= 0 && j < coverageArray.length) { coverageArray[j]++; readPos = j - offset; // if(record.getAlignmentStart() == 32877696){ // System.out.println(i-(record.getAlignmentStart()-start)); // System.out.println(record.getAlignmentStart()-start); // } // System.out.print(" - "+(cont+readOffset)); // System.out.print("|"+readStr.length()); int total = cont + readOffset; // if(total < readStr.length()){ // logger.info(readStr.length()); switch (readStr.charAt(total)) { case 'A': aBaseArray[j]++; break; case 'C': cBaseArray[j]++; break; case 'G': gBaseArray[j]++; break; case 'T': tBaseArray[j]++; break; } // } } } } if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.I) { refgenomeOffset++; readOffset += record.getCigar().getCigarElement(i).getLength() - 1; } else if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.D) { refgenomeOffset += record.getCigar().getCigarElement(i).getLength() - 1; readOffset++; } else if (record.getCigar().getCigarElement(i).getOperator() == CigarOperator.H) { //Ignored Hardclipping and do not update offset pointers } else { refgenomeOffset += record.getCigar().getCigarElement(i).getLength() - 1; readOffset += record.getCigar().getCigarElement(i).getLength() - 1; } // if (record.getCigar().getCigarElement(i).getOperator() != CigarOperator.I) { // } else if(){ // } } // logger.info("coverage array created"); } // logger.info(" "); } // Remove last comma int sbLength = sb.length(); int sbLastPos = sbLength - 1; if (sbLength > 1 && sb.charAt(sbLastPos) == ',') { sb.replace(sbLastPos, sbLength, ""); } // //FIXME // sb.append("]"); // sb.append(",\"coverage\":"+jsonObjectMapper.toJson(coverageArray)); // sb.append("}"); // FIXME sb.append("]"); sb.append(",\"coverage\":{\"all\":" + jsonObjectWriter.writeValueAsString(coverageArray)); sb.append(",\"a\":" + jsonObjectWriter.writeValueAsString(aBaseArray)); sb.append(",\"c\":" + jsonObjectWriter.writeValueAsString(cBaseArray)); sb.append(",\"g\":" + jsonObjectWriter.writeValueAsString(gBaseArray)); sb.append(",\"t\":" + jsonObjectWriter.writeValueAsString(tBaseArray)); sb.append("}"); sb.append("}"); String json = sb.toString(); System.out.println("Result String created in " + (System.currentTimeMillis() - t) + "ms"); // IOUtils.write("/tmp/dqslastgetByRegionCall", json); inputSam.close(); System.out.println("TOTAL " + (System.currentTimeMillis() - totalTime) + "ms"); return json; }