List of usage examples for com.mongodb AggregationOptions builder
public static Builder builder()
From source file:com.petpet.c3po.dao.mongo.MongoPersistenceLayer.java
License:Apache License
public List<BasicDBObject> aggregate(String property, Filter filter, Boolean getStats) { LOG.debug("Starting aggregation for the following property: {}", property); long start = System.currentTimeMillis(); Property prop = getCache().getProperty(property); String propType = prop.getType(); List<BasicDBObject> result = new ArrayList<BasicDBObject>(); DBCollection collection = this.getCollection(Element.class); BasicDBList basicAggregationStages = new BasicDBList(); BasicDBList basicDBList = new BasicDBList(); if (propType.equals(PropertyType.STRING.toString())) { basicAggregationStages = getBasicAggregationStages(property, filter, "$sourcedValue.value"); } else if (propType.equals(PropertyType.INTEGER.toString()) || propType.equals(PropertyType.FLOAT.toString())) { //TODO: choose a better strategy to address this. Think of bins for numerical values. basicAggregationStages = getBasicAggregationStages(property, filter, "$sourcedValue.value"); } else if (propType.equals(PropertyType.DATE.toString())) { /* BasicDBList cond = new BasicDBList(); BasicDBList eq = new BasicDBList(); eq.add("$sourcedValue.value");// w ww. j av a 2 s .c om eq.add(0); cond.add(new BasicDBObject("$ifNull", eq)); cond.add(new BasicDBObject("$year", "$sourcedValue.value")); cond.add(-1); */ BasicDBObject conditionalValue = new BasicDBObject("$year", "$sourcedValue.value"); basicAggregationStages = getBasicAggregationStages(property, filter, conditionalValue); } else if (propType.equals(PropertyType.BOOL.toString())) { basicAggregationStages = getBasicAggregationStages(property, filter, "$sourcedValue.value"); } if (getStats) basicAggregationStages.add(new BasicDBObject("$group", new BasicDBObject("_id", "$property") .append("stdDev", new BasicDBObject("$stdDevPop", "$value")) .append("min", new BasicDBObject("$min", "$value")) .append("max", new BasicDBObject("$max", "$value")) .append("avg", new BasicDBObject("$avg", "$value")) .append("sum", new BasicDBObject("$sum", "$value")) .append("count", new BasicDBObject("$sum", 1)))); else { if (propType.equals(PropertyType.INTEGER.toString())) basicAggregationStages.add(new BasicDBObject("$bucketAuto", new BasicDBObject("groupBy", "$value").append("buckets", 10))); // basicAggregationStages.add(new BasicDBObject("$group", new BasicDBObject("_id", "$value").append("count", new BasicDBObject("$sum", 1)))); } //AggregationOutput aggregate = collection.aggregate(basicAggregationStages); String s = basicAggregationStages.toString(); List<DBObject> pipeline = new ArrayList<DBObject>(); for (Object basicAggregationStage : basicAggregationStages) { pipeline.add((DBObject) basicAggregationStage); } AggregationOptions build = AggregationOptions.builder().allowDiskUse(true).build(); Cursor aggregate = collection.aggregate(pipeline, build); // while(aggregate.hasNext()){ // result.add((BasicDBObject) aggregate.next()); // } //Iterable<DBObject> resultIterable = collection.aggregate(pipeline,build).results(); //for (DBObject object : resultIterable) { // result.add((BasicDBObject) object); //} long end = System.currentTimeMillis(); LOG.debug("The aggregation job took {} seconds", (end - start) / 1000); return result; }
From source file:datapreparation.MongoStatistics.java
public void usersOfUrl(String URL) { // TODO code application logic here // TODO code application logic here int limit = 0; String filename = "Users_Of_Url_" + URL + ".txt"; // To directly connect to a single MongoDB server (note that this will not auto-discover the primary even MongoClient mongoClient;/*ww w . j a v a2 s. c o m*/ try { mongoClient = new MongoClient("localhost"); //use database DB db = mongoClient.getDB("users"); //get collection DBCollection coll = db.getCollection("urls"); // build the $projection operation // DBObject fields = new BasicDBObject("user", 1); // fields.put("_id", 0); // BasicDBObject project = new BasicDBObject("$project", fields); //build the match operation DBObject matchFields = new BasicDBObject("url", URL); DBObject match = new BasicDBObject("$match", matchFields); // Now the $group operation DBObject groupFields = new BasicDBObject("_id", "$user"); groupFields.put("count", new BasicDBObject("$sum", 1)); DBObject group = new BasicDBObject("$group", groupFields); // Finally the $sort operation BasicDBObject sort = new BasicDBObject("$sort", new BasicDBObject("count", -1)); // run aggregation List<DBObject> pipeline; if (limit == 0) {// without limits! pipeline = Arrays.asList(match, group, sort); } else { // create new BasicDBObject that limit query result in only 100 rows DBObject limitRes = new BasicDBObject("$limit", limit); pipeline = Arrays.asList(match, group, sort, limitRes); } AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100) .outputMode(AggregationOptions.OutputMode.CURSOR).allowDiskUse(true).build(); Cursor cursor = coll.aggregate(pipeline, aggregationOptions); writeToFile(cursor, filename, "User\t Count"); cursor.close(); mongoClient.close(); } catch (IOException ex) { System.out.println("Something's Wrong! " + ex); } }
From source file:datapreparation.MongoStatistics.java
public void topUrls() { // TODO code application logic here int limit = 0; String filename = "Top_Urls_More.txt"; // To directly connect to a single MongoDB server (note that this will not auto-discover the primary even MongoClient mongoClient;//from w w w. j av a 2 s . com try { mongoClient = new MongoClient("localhost"); //use database DB db = mongoClient.getDB("users"); //get collection DBCollection coll = db.getCollection("urls"); // build the $projection operation DBObject fields = new BasicDBObject("url", 1); fields.put("_id", 0); BasicDBObject project = new BasicDBObject("$project", fields); // Now the $group operation DBObject groupFields = new BasicDBObject("_id", "$url"); groupFields.put("count", new BasicDBObject("$sum", 1)); DBObject group = new BasicDBObject("$group", groupFields); // Finally the $sort operation BasicDBObject sort = new BasicDBObject("$sort", new BasicDBObject("count", -1)); // run aggregation List<DBObject> pipeline; if (limit == 0) {// without limits! pipeline = Arrays.asList(project, group, sort); } else { // create new BasicDBObject that limit query result in only 100 rows DBObject limitRes = new BasicDBObject("$limit", limit); pipeline = Arrays.asList(project, group, sort, limitRes); } AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100) .outputMode(AggregationOptions.OutputMode.CURSOR).allowDiskUse(true).build(); Cursor cursor = coll.aggregate(pipeline, aggregationOptions); writeToFile2(cursor, filename, "URL\t Count"); cursor.close(); mongoClient.close(); } catch (IOException ex) { System.out.println("Something's Wrong! " + ex); } }
From source file:datapreparation.MongoStatistics.java
public void timeIntervals() { // TODO code application logic here int limit = 0; String filename = "Times.txt"; // To directly connect to a single MongoDB server (note that this will not auto-discover the primary even MongoClient mongoClient;//w w w . ja va2 s. c o m try { mongoClient = new MongoClient("localhost"); //use database DB db = mongoClient.getDB("users"); //get collection DBCollection coll = db.getCollection("urls"); // build the $projection operation DBObject fields = new BasicDBObject("time", 1); fields.put("_id", 0); BasicDBObject project = new BasicDBObject("$project", fields); // Now the $group operation DBObject groupFields = new BasicDBObject("_id", "$time"); //groupFields.put("count", new BasicDBObject("$sum", 1)); DBObject group = new BasicDBObject("$group", groupFields); // Finally the $sort operation //BasicDBObject sort = new BasicDBObject("$sort", new BasicDBObject("count", -1)); // run aggregation List<DBObject> pipeline; if (limit == 0) {// without limits! pipeline = Arrays.asList(project, group); } else { // create new BasicDBObject that limit query result in only 100 rows DBObject limitRes = new BasicDBObject("$limit", limit); pipeline = Arrays.asList(project, group, limitRes); } AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100) .outputMode(AggregationOptions.OutputMode.CURSOR).allowDiskUse(true).build(); Cursor cursor = coll.aggregate(pipeline, aggregationOptions); writeToFile3(cursor, filename, "Times"); cursor.close(); mongoClient.close(); } catch (IOException ex) { System.out.println("Something's Wrong! " + ex); } }
From source file:example.AggregationExample.java
License:Apache License
/** * Run this main method to see the output of this quick example. * * @param args takes no args/*ww w . ja v a 2 s . com*/ * @throws UnknownHostException if it cannot connect to a MongoDB instance at localhost:27017 */ public static void main(final String[] args) throws UnknownHostException { // connect to the local database server MongoClient mongoClient = new MongoClient(); // get handle to "mydb" DB db = mongoClient.getDB("mydb"); // Authenticate - optional // boolean auth = db.authenticate("foo", "bar"); // Add some sample data DBCollection coll = db.getCollection("aggregationExample"); coll.insert(new BasicDBObjectBuilder().add("employee", 1).add("department", "Sales").add("amount", 71) .add("type", "airfare").get()); coll.insert(new BasicDBObjectBuilder().add("employee", 2).add("department", "Engineering").add("amount", 15) .add("type", "airfare").get()); coll.insert(new BasicDBObjectBuilder().add("employee", 4).add("department", "Human Resources") .add("amount", 5).add("type", "airfare").get()); coll.insert(new BasicDBObjectBuilder().add("employee", 42).add("department", "Sales").add("amount", 77) .add("type", "airfare").get()); // create our pipeline operations, first with the $match DBObject match = new BasicDBObject("$match", new BasicDBObject("type", "airfare")); // build the $projection operation DBObject fields = new BasicDBObject("department", 1); fields.put("amount", 1); fields.put("_id", 0); DBObject project = new BasicDBObject("$project", fields); // Now the $group operation DBObject groupFields = new BasicDBObject("_id", "$department"); groupFields.put("average", new BasicDBObject("$avg", "$amount")); DBObject group = new BasicDBObject("$group", groupFields); // Finally the $sort operation DBObject sort = new BasicDBObject("$sort", new BasicDBObject("average", -1)); // run aggregation List<DBObject> pipeline = Arrays.asList(match, project, group, sort); AggregationOutput output = coll.aggregate(pipeline); // Output the results for (DBObject result : output.results()) { System.out.println(result); } // Aggregation Cursor AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100) .outputMode(AggregationOptions.OutputMode.CURSOR).allowDiskUse(true).build(); Cursor cursor = coll.aggregate(pipeline, aggregationOptions); while (cursor.hasNext()) { System.out.println(cursor.next()); } // clean up db.dropDatabase(); mongoClient.close(); }
From source file:ezbake.data.mongo.HandlerForDriverFindCalls.java
License:Apache License
protected QueryResultIterator convertFindForDriver(String collectionName, DBObject jsonQuery, DBObject projection, String jsonSort, int skip, int limit, int batchSize, ReadPreference readPref, EzSecurityToken token, String operationType) throws Exception { appLog.info("convertFindForDriver() query: " + jsonQuery); AggregationOptions opts = null;//w ww. j av a 2s .c o m if (batchSize > 0) { opts = AggregationOptions.builder().outputMode(AggregationOptions.OutputMode.CURSOR) .batchSize(batchSize).build(); } else { opts = AggregationOptions.builder().build(); } Object distinct = jsonQuery.get("distinct"); Object key = null; if (distinct != null) { key = jsonQuery.get("key"); Object q = jsonQuery.get("query"); if (q != null) { jsonQuery = (DBObject) q; } } jsonQuery = checkForQueryComment(jsonQuery); jsonQuery = checkForshowDiskLoc(jsonQuery); Object returnKey = jsonQuery.get("$returnKey"); if (returnKey != null) { Object q = jsonQuery.get("$query"); if (q != null) { jsonQuery = (DBObject) q; } } Object snapshot = jsonQuery.get("$snapshot"); if (snapshot != null) { Object ob = jsonQuery.get("$orderby"); if (ob != null) { throw new MongoException("Do not use $snapshot with cursor.hint() and cursor.sort() methods"); } Object hint = jsonQuery.get("$hint"); if (hint != null) { throw new MongoException("Do not use $snapshot with cursor.hint() and cursor.sort() methods"); } Object q = jsonQuery.get("$query"); if (q != null) { jsonQuery = (DBObject) q; } } Object explain = jsonQuery.get("$explain"); if (explain != null) { Object q = jsonQuery.get("$query"); if (q != null) { jsonQuery = (DBObject) q; } } Object orderby = jsonQuery.get("$orderby"); if (orderby != null) { Object q = jsonQuery.get("$query"); if (q != null) { jsonQuery = (DBObject) q; } jsonSort = orderby.toString(); } Object maxScan = jsonQuery.get("$maxScan"); if (maxScan != null) { Object q = jsonQuery.get("$query"); if (q != null) { jsonQuery = (DBObject) q; } limit = (Integer) maxScan; } Object min = jsonQuery.get("$min"); if (min != null) { Object q = jsonQuery.get("$query"); if (q != null) { jsonQuery = (DBObject) q; } } Object max = jsonQuery.get("$max"); if (max != null) { Object q = jsonQuery.get("$query"); if (q != null) { jsonQuery = (DBObject) q; } } QueryResultIterator qri = null; DBObject query = null; if (jsonQuery != null && jsonQuery.keySet().size() > 0) { query = new BasicDBObject("$match", jsonQuery); } DBObject[] additionalOps = parent_handler.handler.getMongoFindHelper().getFindAggregationCommandsArray(skip, limit, (projection != null && projection.keySet().size() > 0) ? projection.toString() : "", jsonSort, token, operationType); List<DBObject> pipeline = new ArrayList<DBObject>(); if (query != null) { pipeline.add(query); } Collections.addAll(pipeline, additionalOps); appLog.info("convertFindForDriver() final pipeline query: " + pipeline); Cursor cursor = null; if (distinct != null) { qri = handleDistinctCall(jsonQuery, readPref, token, opts, distinct, key, pipeline); } else if (max != null && min != null) { // TODO can max AND min be possible? investigate... } else if (max != null) { qri = handleMaxCall(collectionName, max, jsonQuery, readPref, token, opts, pipeline); } else if (min != null) { qri = handleMinCall(collectionName, min, jsonQuery, readPref, token, opts, pipeline); } else { cursor = parent_handler.handler.db.getCollection(collectionName).aggregate(pipeline, opts, readPref); if (cursor instanceof QueryResultIterator) { qri = (QueryResultIterator) cursor; } else { appLog.info("UNKNOWN CURSOR RETURNED: {}", cursor.toString()); throw new Exception("Find converted to Aggregate pipeline did not return a QueryResultIterator: " + cursor.toString()); } } return qri; }
From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java
License:Open Source License
/** * This method returns the number of variants that match provided parameters. * * @param request the request/*w ww.ja v a2 s . c o m*/ * @param sModule the module * @param projId the proj id * @param selectedVariantTypes the selected variant types * @param selectedSequences the selected sequences * @param selectedIndividuals the selected individuals * @param gtPattern the gt code * @param genotypeQualityThreshold the genotype quality threshold * @param readDepthThreshold the read depth threshold * @param missingData the missing data * @param minmaf the minmaf * @param maxmaf the maxmaf * @param minposition the minposition * @param maxposition the maxposition * @param alleleCount the allele count * @param geneName the gene name * @param variantEffects the variant effects * @param processID the process id * @return the long * @throws Exception the exception */ @RequestMapping(variantCountURL) protected @ResponseBody long countVariants(HttpServletRequest request, @RequestParam("module") String sModule, @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes, @RequestParam("sequences") String selectedSequences, @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern, @RequestParam("genotypeQualityThreshold") Integer genotypeQualityThreshold, @RequestParam("readDepthThreshold") Integer readDepthThreshold, @RequestParam("missingData") Double missingData, @RequestParam(value = "minmaf", required = false) Float minmaf, @RequestParam(value = "maxmaf", required = false) Float maxmaf, @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects, @RequestParam("processID") final String processID) throws Exception { final ProgressIndicator progress = new ProgressIndicator(processID.substring(1 + processID.indexOf('|')), new String[0]); ProgressIndicator.registerProgressIndicator(progress); DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(), true /*empty it*/); try { String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences, selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects); final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule); DBCollection cachedCountcollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS); // cachedCountcollection.drop(); DBCursor countCursor = cachedCountcollection.find(new BasicDBObject("_id", queryKey)); Long count = null; if (countCursor.hasNext()) { count = 0l; for (Object aPartialCount : ((BasicDBList) countCursor.next() .get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray()) count += (Long) aPartialCount; } LOG.debug((count == null ? "new" : "existing") + " queryKey hash: " + queryKey); if (count == null) { long before = System.currentTimeMillis(); progress.addStep("Counting matching variants"); String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap() .get(gtPattern); List<String> alleleCountList = alleleCount.length() == 0 ? null : Arrays.asList(alleleCount.split(";")); GenotypingProject genotypingProject = mongoTemplate.findById(projId, GenotypingProject.class); if (genotypingProject.getAlleleCounts().size() != 1 || genotypingProject.getAlleleCounts().iterator().next() != 2) { // Project does not only have bi-allelic data: make sure we can apply MAF filter on selection boolean fExactlyOneNumberOfAllelesSelected = alleleCountList != null && alleleCountList.size() == 1; boolean fBiAllelicSelected = fExactlyOneNumberOfAllelesSelected && "2".equals(alleleCountList.get(0)); boolean fMafRequested = (maxmaf != null && maxmaf < 50) || (minmaf != null && minmaf > 0); if (fMafRequested && !fBiAllelicSelected) { progress.setError("MAF is only supported on biallelic data!"); return 0l; } } String actualSequenceSelection = selectedSequences; if (actualSequenceSelection.length() == 0) { ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule); if (externallySelectedSeqs != null) actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";"); } boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId, sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, geneName, variantEffects); BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId, selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")), actualSequenceSelection.length() == 0 ? null : Arrays.asList(actualSequenceSelection.split(";")), minposition, maxposition, alleleCountList); if (variantQueryDBList.isEmpty()) { if (!fNeedToFilterOnGenotypingData && mongoTemplate.count(null, GenotypingProject.class) == 1) count = mongoTemplate.count(new Query(), VariantData.class); // no filter whatsoever } else { if (!fNeedToFilterOnGenotypingData) { // filtering on variant features only: we just need a count count = mongoTemplate.getCollection(mongoTemplate.getCollectionName(VariantData.class)) .count(new BasicDBObject("$and", variantQueryDBList)); } else { // filtering on variant features and genotyping data: we need a list of variant IDs to restrict the genotyping data search to long beforeAggQuery = System.currentTimeMillis(); progress.setProgressDescription("Filtering variants for count..."); DBCollection variantColl = mongoTemplate .getCollection(mongoTemplate.getCollectionName(VariantData.class)); List<DBObject> pipeline = new ArrayList<DBObject>(); pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList))); BasicDBObject projectObject = new BasicDBObject("_id", "$_id"); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE); projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE); projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST, "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST); pipeline.add(new BasicDBObject("$project", projectObject)); pipeline.add(new BasicDBObject("$out", tmpVarColl.getName())); variantColl.aggregate(pipeline); mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED); LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in " + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s"); progress.setProgressDescription(null); if (tmpVarColl.count() == 0) count = 0l; // no need to search any further } } if (count != null) { BasicDBObject dbo = new BasicDBObject("_id", queryKey); dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, new Long[] { count }); cachedCountcollection.save(dbo); } else { // now filter on genotyping data List<String> selectedIndividualList = selectedIndividuals.length() == 0 ? null : Arrays.asList(selectedIndividuals.split(";")); if (selectedIndividualList == null) selectedIndividualList = getIndividualsInDbOrder(sModule, projId); GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest(); gsvr.setAlleleCount(alleleCount); if (minposition != null) gsvr.setStart(minposition); if (maxposition != null) gsvr.setEnd(maxposition); gsvr.setGeneName(geneName); gsvr.setReferenceName(selectedSequences); gsvr.setSelectedVariantTypes(selectedVariantTypes); gsvr.setVariantEffect(variantEffects); gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId); gsvr.setMissingData(missingData); gsvr.setMinmaf(minmaf); gsvr.setMaxmaf(maxmaf); gsvr.setGtPattern(gtPattern); HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>(); annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold); annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold); gsvr.setAnnotationFieldThresholds(annotationFieldThresholds); gsvr.setCallSetIds(selectedIndividualList); GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr, tmpVarColl); try { final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries(); if (nChunkCount > 1) LOG.debug("Query split into " + nChunkCount); final Long[] partialCountArray = new Long[nChunkCount]; final Builder aggOpts = AggregationOptions.builder().allowDiskUse(false); final ArrayList<Thread> threadsToWaitFor = new ArrayList<Thread>(); final AtomicInteger finishedThreadCount = new AtomicInteger(0); ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList(); while (genotypingDataQueryBuilder.hasNext()) genotypingDataPipelines.add(genotypingDataQueryBuilder.next()); ArrayList<Integer> chunkIndices = new ArrayList<Integer>(); for (int i = 0; i < genotypingDataPipelines.size(); i++) chunkIndices.add(i); Collections.shuffle(chunkIndices); for (int i = 0; i < chunkIndices.size()/*/2*/; i++) { final List<DBObject> genotypingDataPipeline = genotypingDataPipelines .get(chunkIndices.get(i)); // Now the $group operation, used for counting DBObject groupFields = new BasicDBObject("_id", null); groupFields.put("count", new BasicDBObject("$sum", 1)); genotypingDataPipeline.add(new BasicDBObject("$group", groupFields)); if (i == 0 && tmpVarColl.count() <= 5) LOG.debug(genotypingDataPipeline); if (progress.hasAborted()) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain return 0l; } final int chunkIndex = i; Thread t = new Thread() { public void run() { // long b4 = System.currentTimeMillis(); Cursor it = mongoTemplate .getCollection(MongoTemplateManager .getMongoCollectionName(VariantRunData.class)) .aggregate(genotypingDataPipeline, aggOpts.build()); partialCountArray[chunkIndex] = it.hasNext() ? ((Number) it.next().get("count")).longValue() : 0; progress.setCurrentStepProgress( (short) (finishedThreadCount.incrementAndGet() * 100 / nChunkCount)); // System.out.println("chunk " + chunkIndex + " took " + (System.currentTimeMillis() - b4)); genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT) } }; if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS - 1)) { t.run(); // run synchronously } else { threadsToWaitFor.add(t); t.start(); // run asynchronously for better speed } } for (Thread t : threadsToWaitFor) // wait for all threads before moving to next phase t.join(); progress.setCurrentStepProgress(100); count = 0l; for (Long partialCount : partialCountArray) count += partialCount; BasicDBObject dbo = new BasicDBObject("_id", queryKey); dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, partialCountArray); cachedCountcollection.save(dbo); } catch (Exception e) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain throw e; } } LOG.info("countVariants found " + count + " results in " + (System.currentTimeMillis() - before) / 1000d + "s"); } progress.markAsComplete(); if (progress.hasAborted()) return 0l; return count; } finally { // getTemporaryVariantCollection(sModule, progress.getProcessId(), true); // always empty it } }
From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java
License:Open Source License
/** * Find variants./* w ww.j a v a2 s . c om*/ * * @param request the request * @param sModule the module * @param projId the proj id * @param selectedVariantTypes the selected variant types * @param selectedSequences the selected sequences * @param selectedIndividuals the selected individuals * @param gtPattern the gt code * @param genotypeQualityThreshold the genotype quality threshold * @param readDepthThreshold the read depth threshold * @param missingData the missing data * @param minmaf the minmaf * @param maxmaf the maxmaf * @param minposition the minposition * @param maxposition the maxposition * @param alleleCount the allele count * @param geneName the gene name * @param variantEffects the variant effects * @param wantedFields the wanted fields * @param page the page * @param size the size * @param sortBy the sort by * @param sortDir the sort dir * @param processID the process id * @return true, if successful * @throws Exception the exception */ @RequestMapping(variantFindURL) /** * This method build a list of variants in a temporary collection, that may be used later for browsing or exporting results */ protected @ResponseBody boolean findVariants(HttpServletRequest request, @RequestParam("module") String sModule, @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes, @RequestParam("sequences") String selectedSequences, @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern, @RequestParam("genotypeQualityThreshold") int genotypeQualityThreshold, @RequestParam("readDepthThreshold") int readDepthThreshold, @RequestParam("missingData") double missingData, @RequestParam("minmaf") Float minmaf, @RequestParam("maxmaf") Float maxmaf, @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects, @RequestParam("wantedFields") String wantedFields, @RequestParam("page") int page, @RequestParam("size") int size, @RequestParam("sortBy") String sortBy, @RequestParam("sortDir") String sortDir, @RequestParam("processID") String processID) throws Exception { long before = System.currentTimeMillis(); String token = processID.substring(1 + processID.indexOf('|')); final ProgressIndicator progress = new ProgressIndicator(token, new String[0]); ProgressIndicator.registerProgressIndicator(progress); progress.addStep("Loading results"); String actualSequenceSelection = selectedSequences; if (actualSequenceSelection.length() == 0) { ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule); if (externallySelectedSeqs != null) actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";"); } List<String> selectedSequenceList = actualSequenceSelection.length() == 0 ? null : Arrays.asList(actualSequenceSelection.split(";")); String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences, selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects); final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule); DBCollection cachedCountCollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS); DBCursor countCursor = cachedCountCollection.find(new BasicDBObject("_id", queryKey)); final DBCollection variantColl = mongoTemplate .getCollection(mongoTemplate.getCollectionName(VariantData.class)); final Object[] partialCountArray = !countCursor.hasNext() ? null : ((BasicDBList) countCursor.next().get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray(); final DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(), false); String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap() .get(gtPattern); boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId, sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, geneName, variantEffects); final BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId, selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")), selectedSequenceList, minposition, maxposition, alleleCount.length() == 0 ? null : Arrays.asList(alleleCount.split(";"))); if (!variantQueryDBList.isEmpty() && tmpVarColl.count() == 0 /* otherwise we kept the preliminary list from the count procedure */) { // apply filter on variant features progress.setProgressDescription("Filtering variants for display..."); long beforeAggQuery = System.currentTimeMillis(); List<DBObject> pipeline = new ArrayList<DBObject>(); pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList))); BasicDBObject projectObject = new BasicDBObject("_id", "$_id"); projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE); projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE); projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE); projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST, "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST); pipeline.add(new BasicDBObject("$project", projectObject)); pipeline.add(new BasicDBObject("$out", tmpVarColl.getName())); variantColl.aggregate(pipeline); LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in " + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s"); progress.setProgressDescription(null); } else if (fNeedToFilterOnGenotypingData && tmpVarColl.count() > 0) LOG.debug( "Re-using " + tmpVarColl.count() + " results from count procedure's variant preliminary query"); if (progress.hasAborted()) return false; if (fNeedToFilterOnGenotypingData) { // now filter on genotyping data final ConcurrentLinkedQueue<Thread> queryThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>(), removalThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>(); final AtomicInteger finishedThreadCount = new AtomicInteger(0); final ConcurrentSkipListSet<Comparable> allVariantsThatPassRunFilter = new ConcurrentSkipListSet<Comparable>(); GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest(); gsvr.setAlleleCount(alleleCount); if (minposition != null) gsvr.setStart(minposition); if (maxposition != null) gsvr.setEnd(maxposition); gsvr.setGeneName(geneName); gsvr.setReferenceName(selectedSequences); gsvr.setSelectedVariantTypes(selectedVariantTypes); gsvr.setVariantEffect(variantEffects); gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId); gsvr.setMissingData(missingData); gsvr.setMinmaf(minmaf); gsvr.setMaxmaf(maxmaf); gsvr.setGtPattern(gtPattern); HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>(); annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold); annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold); gsvr.setAnnotationFieldThresholds(annotationFieldThresholds); gsvr.setCallSetIds(selectedIndividuals == null || selectedIndividuals.length() == 0 ? getIndividualsInDbOrder(sModule, projId) : Arrays.asList(selectedIndividuals.split(";"))); final GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr, tmpVarColl); genotypingDataQueryBuilder.keepTrackOfPreFilters(!variantQueryDBList.isEmpty()); try { final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries(); if (nChunkCount != partialCountArray.length) { LOG.error("Different number of chunks between counting and listing variant rows!"); progress.setError("Different number of chunks between counting and listing variant rows!"); return false; } if (nChunkCount > 1) LOG.debug("Query split into " + nChunkCount); ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList(); while (genotypingDataQueryBuilder.hasNext()) genotypingDataPipelines.add(genotypingDataQueryBuilder.next()); ArrayList<Integer> chunkIndices = new ArrayList<Integer>(); for (int i = 0; i < genotypingDataPipelines.size(); i++) chunkIndices.add(i); Collections.shuffle(chunkIndices); for (int i = 0; i < chunkIndices.size(); i++) { final int chunkIndex = chunkIndices.get(i); final List<DBObject> genotypingDataPipeline = genotypingDataPipelines.get(chunkIndex); if (progress.hasAborted()) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain return false; } Thread t = new Thread() { public void run() { Cursor genotypingDataCursor = mongoTemplate .getCollection( MongoTemplateManager.getMongoCollectionName(VariantRunData.class)) .aggregate(genotypingDataPipeline, AggregationOptions.builder().allowDiskUse(true).build()); final ArrayList<Comparable> variantsThatPassedRunFilter = new ArrayList<Comparable>(); while (genotypingDataCursor.hasNext()) variantsThatPassedRunFilter .add((Comparable) genotypingDataCursor.next().get("_id")); if (variantQueryDBList.isEmpty()) // otherwise we won't need it allVariantsThatPassRunFilter.addAll(variantsThatPassedRunFilter); else { // mark the results we want to keep final List<Comparable> lastUsedPreFilter = genotypingDataQueryBuilder .getPreFilteredIDsForChunk(chunkIndex); Thread removalThread = new Thread() { public void run() { genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT) long beforeTempCollUpdate = System.currentTimeMillis(); if (variantsThatPassedRunFilter.size() == lastUsedPreFilter.size()) return; // none to remove Collection<Comparable> filteredOutVariants = variantsThatPassedRunFilter .size() == 0 ? lastUsedPreFilter : CollectionUtils.subtract(lastUsedPreFilter, variantsThatPassedRunFilter); BasicDBObject removalQuery = GenotypingDataQueryBuilder .tryAndShrinkIdList("_id", filteredOutVariants, 4); WriteResult wr = tmpVarColl.remove(removalQuery); LOG.debug("Chunk N." + (chunkIndex) + ": " + wr.getN() + " filtered-out temp records removed in " + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d + "s"); progress.setCurrentStepProgress( (short) (finishedThreadCount.incrementAndGet() * 100 / nChunkCount)); } }; removalThreadsToWaitFor.add(removalThread); removalThread.start(); } } }; if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS - 1)) t.run(); // sometimes run synchronously so that all queries are not sent at the same time (also helps smooth progress display) else { queryThreadsToWaitFor.add(t); t.start(); // run asynchronously for better speed } } // wait for all threads before moving to next phase for (Thread t : queryThreadsToWaitFor) t.join(); for (Thread t : removalThreadsToWaitFor) t.join(); } catch (Exception e) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain throw e; } if (progress.hasAborted()) return false; progress.addStep("Updating temporary results"); progress.moveToNextStep(); final long beforeTempCollUpdate = System.currentTimeMillis(); mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED); if (variantQueryDBList.isEmpty()) { // we filtered on runs only: keep track of the final dataset List<BasicDBObject> pipeline = new ArrayList<>(); pipeline.add(new BasicDBObject("$match", GenotypingDataQueryBuilder.tryAndShrinkIdList("_id", allVariantsThatPassRunFilter, 4))); BasicDBObject projectObject = new BasicDBObject("_id", "$_id"); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE); projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE); projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST, "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST); projectObject.put(VariantData.FIELDNAME_VERSION, "$" + VariantData.FIELDNAME_VERSION); pipeline.add(new BasicDBObject("$project", projectObject)); pipeline.add(new BasicDBObject("$out", tmpVarColl.getName())); variantColl.aggregate(pipeline); LOG.debug(tmpVarColl.count() + " temp records created in " + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d + "s"); } } progress.markAsComplete(); LOG.info("findVariants took " + (System.currentTimeMillis() - before) / 1000d + "s"); return true; }
From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java
License:Open Source License
/** * Builds the variant rows.// w w w .j a v a 2s. c om * * @param mongoTemplate the mongo template * @param variantsToBuildRowsFor the variants to build rows for * @param sortBy the sort by * @param sortDir the sort dir * @param page the page * @param size the size * @param variantFieldMap the variant field map * @param runDataFieldMap the run data field map * @return the array list */ private ArrayList<Object[]> buildVariantRows(MongoTemplate mongoTemplate, DBCursor variantsToBuildRowsFor, String sortBy, String sortDir, int page, int size, HashMap<Integer, String> variantFieldMap, Map<Integer, String> runDataFieldMap) { if (sortBy != null && sortBy.length() > 0) { String cleanSortField = sortBy.replaceFirst("%23", ""); variantsToBuildRowsFor.sort( new BasicDBObject(cleanSortField, Integer.valueOf("DESC".equalsIgnoreCase(sortDir) ? -1 : 1))); } variantsToBuildRowsFor.skip(page * size).limit(size); ArrayList<Object[]> variantRows = new ArrayList<Object[]>(); HashMap<Comparable, Object[]> variantIdToRowMap = new HashMap<Comparable, Object[]>(); Collection<Comparable> currentVariants = new ArrayList<Comparable>(); while (variantsToBuildRowsFor.hasNext()) { DBObject record = variantsToBuildRowsFor.next(); Object[] aRow = new Object[variantFieldMap.size() + runDataFieldMap.size()]; for (int i : variantFieldMap.keySet()) aRow[i] = Helper.readPossiblyNestedField(record, variantFieldMap.get(i)); variantRows.add(aRow); variantIdToRowMap.put((Comparable) aRow[0], aRow); currentVariants.add((Comparable) aRow[0]); } if (!runDataFieldMap.isEmpty()) { // Query on VariantRunData so we can fill run-related fields ArrayList<DBObject> genotypingDataAggregationParams2 = new ArrayList<DBObject>(); genotypingDataAggregationParams2.add(new BasicDBObject("$match", new BasicDBObject( "_id." + VariantRunDataId.FIELDNAME_VARIANT_ID, new BasicDBObject("$in", currentVariants)))); DBObject project = new BasicDBObject(); for (String sField : runDataFieldMap.values()) project.put(sField.replaceAll("\\.", ""), "$" + sField); genotypingDataAggregationParams2.add(new BasicDBObject("$project", project)); // long beforeQuery = System.currentTimeMillis(); Cursor genotypingDataCursor = mongoTemplate .getCollection(MongoTemplateManager.getMongoCollectionName(VariantRunData.class)) .aggregate(genotypingDataAggregationParams2, AggregationOptions.builder().allowDiskUse(true).build()); while (genotypingDataCursor.hasNext()) { DBObject record = genotypingDataCursor.next(); Object[] aRow = variantIdToRowMap.get( Helper.readPossiblyNestedField(record, "_id." + VariantRunDataId.FIELDNAME_VARIANT_ID)); for (int fieldIndex : runDataFieldMap.keySet()) aRow[fieldIndex] = record.get(runDataFieldMap.get(fieldIndex).replaceAll("\\.", "")); } // LOG.debug("Genotyping data main query treated in " + (System.currentTimeMillis() - beforeQuery) + "ms"); } return variantRows; }
From source file:hulop.hokoukukan.utils.MongoAdapter.java
License:Open Source License
@Override public JSONArray getLogStats(String event) { List<DBObject> pipeline = new ArrayList<DBObject>(); if (event != null) { pipeline.add(new BasicDBObject("$match", new BasicDBObject("event", event))); }/* w ww . j ava2 s . c o m*/ pipeline.add( new BasicDBObject("$project", new BasicDBObject("client", 1).append("timestamp", "$timestamp"))); pipeline.add(new BasicDBObject("$group", new BasicDBObject().append("_id", new BasicDBObject("client", "$client")) .append("count", new BasicDBObject("$sum", 1)) .append("min", new BasicDBObject("$min", "$timestamp")) .append("max", new BasicDBObject("$max", "$timestamp")))); // Iterator<DBObject> it = logCol.aggregate(pipeline).results().iterator(); Cursor it = logCol.aggregate(pipeline, AggregationOptions.builder().build()); JSONArray result = new JSONArray(); while (it.hasNext()) { try { DBObject elm = it.next(); result.add(new JSONObject().put("clientId", ((DBObject) elm.get("_id")).get("client")).put("stats", new JSONObject().put("count", elm.get("count")).put("min", elm.get("min")).put("max", elm.get("max")))); } catch (JSONException e) { e.printStackTrace(); } } return result; }