List of usage examples for com.mongodb DBCollection aggregate
@Deprecated public AggregationOutput aggregate(final List<? extends DBObject> pipeline)
From source file:com.mobileman.moments.core.services.user.impl.UserStatsServiceImpl.java
License:Apache License
/** * @param userId//from w ww .ja va2 s . com */ private void computeNumberOfQuestions(final ObjectId userId) { mongoTemplate.execute(Question.class, new CollectionCallback<Void>() { @Override public Void doInCollection(DBCollection collection) throws MongoException, DataAccessException { final List<DBObject> pipeline = new ArrayList<DBObject>(); pipeline.add(new BasicDBObject("$match", new BasicDBObject("created_by._id", userId))); pipeline.add(new BasicDBObject("$group", new BasicDBObject("_id", null).append("count", new BasicDBObject("$sum", 1)))); AggregationOutput aggregationOutput = collection.aggregate(pipeline); long questionCount = 0; for (DBObject result : aggregationOutput.results()) { questionCount = Number.class.cast(result.get("count")).longValue(); break; } mongoTemplate.findAndModify(Query.query(Criteria.where("_id").is(userId)), new Update().set("userStats.numberOfQuestions", questionCount), User.class); return null; } }); }
From source file:com.petpet.c3po.dao.mongo.MongoPersistenceLayer.java
License:Apache License
public long countConflicts(Filter filter, List<String> properties) { LOG.info("Calculating conflicts count"); DBCollection collection = this.getCollection(Element.class); List<DBObject> list = new ArrayList<DBObject>(); list.add(new BasicDBObject("$match", this.getCachedFilter(filter))); list.add(new BasicDBObject("$unwind", "$metadata")); list.add(new BasicDBObject("$project", new BasicDBObject("status", "$metadata.status").append("uid", 1) .append("property", "$metadata.property"))); list.add(new BasicDBObject("$match", new BasicDBObject("property", new BasicDBObject("$in", properties)))); list.add(new BasicDBObject("$group", new BasicDBObject("_id", "$uid").append("statuses", new BasicDBObject("$addToSet", "$status")))); BasicDBList in = new BasicDBList(); in.add("CONFLICT"); list.add(new BasicDBObject("$match", new BasicDBObject("statuses", new BasicDBObject("$in", in)))); list.add(new BasicDBObject("$group", new BasicDBObject("_id", null).append("count", new BasicDBObject("$sum", 1)))); Iterable<DBObject> resultIterable = collection.aggregate(list).results(); BasicDBObject result = (BasicDBObject) resultIterable.iterator().next(); return result.getLong("count"); }
From source file:com.ricardolorenzo.identity.user.impl.UserIdentityManagerMongoDB.java
License:Open Source License
/** * All the scripts should have the following format: * * {/*from w w w.j a v a 2 s . c o m*/ * database.collection: { * operation: insert|update|find|aggregate|delete * query: {} * } * } * * For update operations, you should specify the following: * * query: { * find: {} * update: {} * } */ private List<DBObject> runQueryScript(final String scriptType, final Map<String, Object[]> attributes) throws IdentityException { List<DBObject> results = new ArrayList<>(); try { DB database = mongoClient.getDB(this.properties.getProperty("mongodb.database")); final ScriptCollection sc = getScriptCollection(); if (sc.hasScript(scriptType)) { final String scriptContent = sc.getScript(scriptType); String query = createQueryFromScript(scriptContent, attributes); DBObject collectionOperation = DBObject.class.cast(JSON.parse(query)); for (String collection : collectionOperation.keySet()) { if (!database.collectionExists(collection)) { throw new IdentityException("collection [" + collection + "] does not exists"); } DBObject dbObject = DBObject.class.cast(collectionOperation.get(collection)); if (!dbObject.containsField("operation")) { throw new IdentityException("operation field not specified"); } String dbOperation = String.class.cast(dbObject.get("operation")).toLowerCase(); if (!OPERATIONS.contains(dbOperation)) { throw new IdentityException("operation [" + dbOperation + "] not supported"); } DBObject dbQuery = DBObject.class.cast(dbObject.get("query")); if (dbQuery == null) { throw new IdentityException("query field not specified"); } DBCollection coll = database.getCollection(collection); switch (dbOperation) { case "insert": { coll.insert(dbQuery); } case "update": { if (!dbObject.containsField("find")) { throw new IdentityException("find field not found inside the update operation"); } if (!dbObject.containsField("update")) { throw new IdentityException("update field not found inside the update operation"); } DBObject dbUpdateFind = DBObject.class.cast(dbQuery.get("find")); DBObject dbUpdateFields = DBObject.class.cast(dbQuery.get("update")); coll.update(dbUpdateFind, dbUpdateFields, false, false); } case "delete": { coll.remove(dbQuery); } case "find": { DBCursor cursor = coll.find(dbQuery); while (cursor.hasNext()) { results.add(cursor.next()); } } case "aggregate": { List<DBObject> aggregate = new ArrayList<DBObject>(); aggregate.add(dbQuery); for (DBObject o : coll.aggregate(aggregate).results()) { results.add(o); } } } } return results; } } catch (final NoSuchAlgorithmException e) { throw new IdentityException(e.getMessage()); } finally { /** * TODO close cursors */ } return null; }
From source file:com.stratio.connector.mongodb.core.engine.query.AggregationLogicalWorkflowExecutor.java
License:Apache License
/** * Execute an aggregation query.//from w w w . j a v a 2 s . c om * * @param mongoClient * the MongoDB client. * @return the Crossdata ResultSet. * @throws ExecutionException * if the execution fails. */ @Override public ResultSet executeQuery(MongoClient mongoClient) throws ExecutionException { DB db = mongoClient.getDB(logicalWorkflowData.getProject().getCatalogName()); DBCollection collection = db.getCollection(logicalWorkflowData.getProject().getTableName().getName()); ResultSet resultSet = new ResultSet(); resultSet.setColumnMetadata( MetaResultUtils.createMetadata(logicalWorkflowData.getProject(), logicalWorkflowData.getSelect())); // AggregationOptions aggOptions = AggregationOptions.builder() // .allowDiskUse(true) // .batchSize(size) // pipeline,aggOptions => dbcursor try { int stage = 1; for (DBObject aggregationStage : query) { logger.debug("Aggregate framework stage (" + (stage++) + ") : " + aggregationStage); } AggregationOutput aggOutput = collection.aggregate(query); for (DBObject result : aggOutput.results()) { if (logger.isDebugEnabled()) { logger.debug("AggResult: " + result); } resultSet.add(MetaResultUtils.createRowWithAlias(result, logicalWorkflowData.getSelect())); } } catch (MongoException mongoException) { logger.error("Error executing an aggregation query:" + mongoException.getMessage()); throw new MongoExecutionException(mongoException.getMessage(), mongoException); } return resultSet; }
From source file:es.bsc.mongoal.QueryGenerator.java
/** * Sends a query to the MongoDB database * @param queryString MongoAL query// w w w .java2s . c o m * @return An iterable collection of MongoDB DBobjects with the results of the query */ public Iterable<DBObject> query(String queryString) { MongoALLexer lexer = new MongoALLexer(new org.antlr.v4.runtime.ANTLRInputStream(queryString)); CommonTokenStream tokens = new CommonTokenStream(lexer); MongoALParser parser = new MongoALParser(tokens); Object[] ret = (Object[]) queryVisitor.visitQuery(parser.query()); System.out.println(JSON.serialize(ret[1])); DBCollection events = database.getCollection((String) ret[0]); if (ret[1] == null) { return events.find(); } else { return events.aggregate((List<DBObject>) ret[1]).results(); } }
From source file:eu.eubrazilcc.lvl.storage.mongodb.MongoDBConnector.java
License:EUPL
private AggregationOutput execPipeline(final List<DBObject> pipeline, final String collection) { checkArgument(pipeline != null, "Uninitialized pipeline"); checkArgument(isNotBlank(collection), "Uninitialized or invalid collection"); final DB db = client().getDB(CONFIG_MANAGER.getDbName()); final DBCollection dbcol = db.getCollection(collection); return dbcol.aggregate(pipeline); }
From source file:example.AggregationExample.java
License:Apache License
/** * Run this main method to see the output of this quick example. * * @param args takes no args// w ww.ja v a2 s . c o m * @throws UnknownHostException if it cannot connect to a MongoDB instance at localhost:27017 */ public static void main(final String[] args) throws UnknownHostException { // connect to the local database server MongoClient mongoClient = new MongoClient(); // get handle to "mydb" DB db = mongoClient.getDB("mydb"); // Authenticate - optional // boolean auth = db.authenticate("foo", "bar"); // Add some sample data DBCollection coll = db.getCollection("aggregationExample"); coll.insert(new BasicDBObjectBuilder().add("employee", 1).add("department", "Sales").add("amount", 71) .add("type", "airfare").get()); coll.insert(new BasicDBObjectBuilder().add("employee", 2).add("department", "Engineering").add("amount", 15) .add("type", "airfare").get()); coll.insert(new BasicDBObjectBuilder().add("employee", 4).add("department", "Human Resources") .add("amount", 5).add("type", "airfare").get()); coll.insert(new BasicDBObjectBuilder().add("employee", 42).add("department", "Sales").add("amount", 77) .add("type", "airfare").get()); // create our pipeline operations, first with the $match DBObject match = new BasicDBObject("$match", new BasicDBObject("type", "airfare")); // build the $projection operation DBObject fields = new BasicDBObject("department", 1); fields.put("amount", 1); fields.put("_id", 0); DBObject project = new BasicDBObject("$project", fields); // Now the $group operation DBObject groupFields = new BasicDBObject("_id", "$department"); groupFields.put("average", new BasicDBObject("$avg", "$amount")); DBObject group = new BasicDBObject("$group", groupFields); // Finally the $sort operation DBObject sort = new BasicDBObject("$sort", new BasicDBObject("average", -1)); // run aggregation List<DBObject> pipeline = Arrays.asList(match, project, group, sort); AggregationOutput output = coll.aggregate(pipeline); // Output the results for (DBObject result : output.results()) { System.out.println(result); } // Aggregation Cursor AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100) .outputMode(AggregationOptions.OutputMode.CURSOR).allowDiskUse(true).build(); Cursor cursor = coll.aggregate(pipeline, aggregationOptions); while (cursor.hasNext()) { System.out.println(cursor.next()); } // clean up db.dropDatabase(); mongoClient.close(); }
From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java
License:Open Source License
/** * This method returns the number of variants that match provided parameters. * * @param request the request/*from ww w.j a v a 2 s .c om*/ * @param sModule the module * @param projId the proj id * @param selectedVariantTypes the selected variant types * @param selectedSequences the selected sequences * @param selectedIndividuals the selected individuals * @param gtPattern the gt code * @param genotypeQualityThreshold the genotype quality threshold * @param readDepthThreshold the read depth threshold * @param missingData the missing data * @param minmaf the minmaf * @param maxmaf the maxmaf * @param minposition the minposition * @param maxposition the maxposition * @param alleleCount the allele count * @param geneName the gene name * @param variantEffects the variant effects * @param processID the process id * @return the long * @throws Exception the exception */ @RequestMapping(variantCountURL) protected @ResponseBody long countVariants(HttpServletRequest request, @RequestParam("module") String sModule, @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes, @RequestParam("sequences") String selectedSequences, @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern, @RequestParam("genotypeQualityThreshold") Integer genotypeQualityThreshold, @RequestParam("readDepthThreshold") Integer readDepthThreshold, @RequestParam("missingData") Double missingData, @RequestParam(value = "minmaf", required = false) Float minmaf, @RequestParam(value = "maxmaf", required = false) Float maxmaf, @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects, @RequestParam("processID") final String processID) throws Exception { final ProgressIndicator progress = new ProgressIndicator(processID.substring(1 + processID.indexOf('|')), new String[0]); ProgressIndicator.registerProgressIndicator(progress); DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(), true /*empty it*/); try { String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences, selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects); final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule); DBCollection cachedCountcollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS); // cachedCountcollection.drop(); DBCursor countCursor = cachedCountcollection.find(new BasicDBObject("_id", queryKey)); Long count = null; if (countCursor.hasNext()) { count = 0l; for (Object aPartialCount : ((BasicDBList) countCursor.next() .get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray()) count += (Long) aPartialCount; } LOG.debug((count == null ? "new" : "existing") + " queryKey hash: " + queryKey); if (count == null) { long before = System.currentTimeMillis(); progress.addStep("Counting matching variants"); String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap() .get(gtPattern); List<String> alleleCountList = alleleCount.length() == 0 ? null : Arrays.asList(alleleCount.split(";")); GenotypingProject genotypingProject = mongoTemplate.findById(projId, GenotypingProject.class); if (genotypingProject.getAlleleCounts().size() != 1 || genotypingProject.getAlleleCounts().iterator().next() != 2) { // Project does not only have bi-allelic data: make sure we can apply MAF filter on selection boolean fExactlyOneNumberOfAllelesSelected = alleleCountList != null && alleleCountList.size() == 1; boolean fBiAllelicSelected = fExactlyOneNumberOfAllelesSelected && "2".equals(alleleCountList.get(0)); boolean fMafRequested = (maxmaf != null && maxmaf < 50) || (minmaf != null && minmaf > 0); if (fMafRequested && !fBiAllelicSelected) { progress.setError("MAF is only supported on biallelic data!"); return 0l; } } String actualSequenceSelection = selectedSequences; if (actualSequenceSelection.length() == 0) { ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule); if (externallySelectedSeqs != null) actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";"); } boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId, sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, geneName, variantEffects); BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId, selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")), actualSequenceSelection.length() == 0 ? null : Arrays.asList(actualSequenceSelection.split(";")), minposition, maxposition, alleleCountList); if (variantQueryDBList.isEmpty()) { if (!fNeedToFilterOnGenotypingData && mongoTemplate.count(null, GenotypingProject.class) == 1) count = mongoTemplate.count(new Query(), VariantData.class); // no filter whatsoever } else { if (!fNeedToFilterOnGenotypingData) { // filtering on variant features only: we just need a count count = mongoTemplate.getCollection(mongoTemplate.getCollectionName(VariantData.class)) .count(new BasicDBObject("$and", variantQueryDBList)); } else { // filtering on variant features and genotyping data: we need a list of variant IDs to restrict the genotyping data search to long beforeAggQuery = System.currentTimeMillis(); progress.setProgressDescription("Filtering variants for count..."); DBCollection variantColl = mongoTemplate .getCollection(mongoTemplate.getCollectionName(VariantData.class)); List<DBObject> pipeline = new ArrayList<DBObject>(); pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList))); BasicDBObject projectObject = new BasicDBObject("_id", "$_id"); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE); projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE); projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST, "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST); pipeline.add(new BasicDBObject("$project", projectObject)); pipeline.add(new BasicDBObject("$out", tmpVarColl.getName())); variantColl.aggregate(pipeline); mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED); LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in " + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s"); progress.setProgressDescription(null); if (tmpVarColl.count() == 0) count = 0l; // no need to search any further } } if (count != null) { BasicDBObject dbo = new BasicDBObject("_id", queryKey); dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, new Long[] { count }); cachedCountcollection.save(dbo); } else { // now filter on genotyping data List<String> selectedIndividualList = selectedIndividuals.length() == 0 ? null : Arrays.asList(selectedIndividuals.split(";")); if (selectedIndividualList == null) selectedIndividualList = getIndividualsInDbOrder(sModule, projId); GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest(); gsvr.setAlleleCount(alleleCount); if (minposition != null) gsvr.setStart(minposition); if (maxposition != null) gsvr.setEnd(maxposition); gsvr.setGeneName(geneName); gsvr.setReferenceName(selectedSequences); gsvr.setSelectedVariantTypes(selectedVariantTypes); gsvr.setVariantEffect(variantEffects); gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId); gsvr.setMissingData(missingData); gsvr.setMinmaf(minmaf); gsvr.setMaxmaf(maxmaf); gsvr.setGtPattern(gtPattern); HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>(); annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold); annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold); gsvr.setAnnotationFieldThresholds(annotationFieldThresholds); gsvr.setCallSetIds(selectedIndividualList); GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr, tmpVarColl); try { final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries(); if (nChunkCount > 1) LOG.debug("Query split into " + nChunkCount); final Long[] partialCountArray = new Long[nChunkCount]; final Builder aggOpts = AggregationOptions.builder().allowDiskUse(false); final ArrayList<Thread> threadsToWaitFor = new ArrayList<Thread>(); final AtomicInteger finishedThreadCount = new AtomicInteger(0); ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList(); while (genotypingDataQueryBuilder.hasNext()) genotypingDataPipelines.add(genotypingDataQueryBuilder.next()); ArrayList<Integer> chunkIndices = new ArrayList<Integer>(); for (int i = 0; i < genotypingDataPipelines.size(); i++) chunkIndices.add(i); Collections.shuffle(chunkIndices); for (int i = 0; i < chunkIndices.size()/*/2*/; i++) { final List<DBObject> genotypingDataPipeline = genotypingDataPipelines .get(chunkIndices.get(i)); // Now the $group operation, used for counting DBObject groupFields = new BasicDBObject("_id", null); groupFields.put("count", new BasicDBObject("$sum", 1)); genotypingDataPipeline.add(new BasicDBObject("$group", groupFields)); if (i == 0 && tmpVarColl.count() <= 5) LOG.debug(genotypingDataPipeline); if (progress.hasAborted()) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain return 0l; } final int chunkIndex = i; Thread t = new Thread() { public void run() { // long b4 = System.currentTimeMillis(); Cursor it = mongoTemplate .getCollection(MongoTemplateManager .getMongoCollectionName(VariantRunData.class)) .aggregate(genotypingDataPipeline, aggOpts.build()); partialCountArray[chunkIndex] = it.hasNext() ? ((Number) it.next().get("count")).longValue() : 0; progress.setCurrentStepProgress( (short) (finishedThreadCount.incrementAndGet() * 100 / nChunkCount)); // System.out.println("chunk " + chunkIndex + " took " + (System.currentTimeMillis() - b4)); genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT) } }; if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS - 1)) { t.run(); // run synchronously } else { threadsToWaitFor.add(t); t.start(); // run asynchronously for better speed } } for (Thread t : threadsToWaitFor) // wait for all threads before moving to next phase t.join(); progress.setCurrentStepProgress(100); count = 0l; for (Long partialCount : partialCountArray) count += partialCount; BasicDBObject dbo = new BasicDBObject("_id", queryKey); dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, partialCountArray); cachedCountcollection.save(dbo); } catch (Exception e) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain throw e; } } LOG.info("countVariants found " + count + " results in " + (System.currentTimeMillis() - before) / 1000d + "s"); } progress.markAsComplete(); if (progress.hasAborted()) return 0l; return count; } finally { // getTemporaryVariantCollection(sModule, progress.getProcessId(), true); // always empty it } }
From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java
License:Open Source License
/** * Find variants.// www.j a va2 s .com * * @param request the request * @param sModule the module * @param projId the proj id * @param selectedVariantTypes the selected variant types * @param selectedSequences the selected sequences * @param selectedIndividuals the selected individuals * @param gtPattern the gt code * @param genotypeQualityThreshold the genotype quality threshold * @param readDepthThreshold the read depth threshold * @param missingData the missing data * @param minmaf the minmaf * @param maxmaf the maxmaf * @param minposition the minposition * @param maxposition the maxposition * @param alleleCount the allele count * @param geneName the gene name * @param variantEffects the variant effects * @param wantedFields the wanted fields * @param page the page * @param size the size * @param sortBy the sort by * @param sortDir the sort dir * @param processID the process id * @return true, if successful * @throws Exception the exception */ @RequestMapping(variantFindURL) /** * This method build a list of variants in a temporary collection, that may be used later for browsing or exporting results */ protected @ResponseBody boolean findVariants(HttpServletRequest request, @RequestParam("module") String sModule, @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes, @RequestParam("sequences") String selectedSequences, @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern, @RequestParam("genotypeQualityThreshold") int genotypeQualityThreshold, @RequestParam("readDepthThreshold") int readDepthThreshold, @RequestParam("missingData") double missingData, @RequestParam("minmaf") Float minmaf, @RequestParam("maxmaf") Float maxmaf, @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects, @RequestParam("wantedFields") String wantedFields, @RequestParam("page") int page, @RequestParam("size") int size, @RequestParam("sortBy") String sortBy, @RequestParam("sortDir") String sortDir, @RequestParam("processID") String processID) throws Exception { long before = System.currentTimeMillis(); String token = processID.substring(1 + processID.indexOf('|')); final ProgressIndicator progress = new ProgressIndicator(token, new String[0]); ProgressIndicator.registerProgressIndicator(progress); progress.addStep("Loading results"); String actualSequenceSelection = selectedSequences; if (actualSequenceSelection.length() == 0) { ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule); if (externallySelectedSeqs != null) actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";"); } List<String> selectedSequenceList = actualSequenceSelection.length() == 0 ? null : Arrays.asList(actualSequenceSelection.split(";")); String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences, selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects); final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule); DBCollection cachedCountCollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS); DBCursor countCursor = cachedCountCollection.find(new BasicDBObject("_id", queryKey)); final DBCollection variantColl = mongoTemplate .getCollection(mongoTemplate.getCollectionName(VariantData.class)); final Object[] partialCountArray = !countCursor.hasNext() ? null : ((BasicDBList) countCursor.next().get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray(); final DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(), false); String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap() .get(gtPattern); boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId, sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, geneName, variantEffects); final BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId, selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")), selectedSequenceList, minposition, maxposition, alleleCount.length() == 0 ? null : Arrays.asList(alleleCount.split(";"))); if (!variantQueryDBList.isEmpty() && tmpVarColl.count() == 0 /* otherwise we kept the preliminary list from the count procedure */) { // apply filter on variant features progress.setProgressDescription("Filtering variants for display..."); long beforeAggQuery = System.currentTimeMillis(); List<DBObject> pipeline = new ArrayList<DBObject>(); pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList))); BasicDBObject projectObject = new BasicDBObject("_id", "$_id"); projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE); projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE); projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE); projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST, "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST); pipeline.add(new BasicDBObject("$project", projectObject)); pipeline.add(new BasicDBObject("$out", tmpVarColl.getName())); variantColl.aggregate(pipeline); LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in " + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s"); progress.setProgressDescription(null); } else if (fNeedToFilterOnGenotypingData && tmpVarColl.count() > 0) LOG.debug( "Re-using " + tmpVarColl.count() + " results from count procedure's variant preliminary query"); if (progress.hasAborted()) return false; if (fNeedToFilterOnGenotypingData) { // now filter on genotyping data final ConcurrentLinkedQueue<Thread> queryThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>(), removalThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>(); final AtomicInteger finishedThreadCount = new AtomicInteger(0); final ConcurrentSkipListSet<Comparable> allVariantsThatPassRunFilter = new ConcurrentSkipListSet<Comparable>(); GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest(); gsvr.setAlleleCount(alleleCount); if (minposition != null) gsvr.setStart(minposition); if (maxposition != null) gsvr.setEnd(maxposition); gsvr.setGeneName(geneName); gsvr.setReferenceName(selectedSequences); gsvr.setSelectedVariantTypes(selectedVariantTypes); gsvr.setVariantEffect(variantEffects); gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId); gsvr.setMissingData(missingData); gsvr.setMinmaf(minmaf); gsvr.setMaxmaf(maxmaf); gsvr.setGtPattern(gtPattern); HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>(); annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold); annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold); gsvr.setAnnotationFieldThresholds(annotationFieldThresholds); gsvr.setCallSetIds(selectedIndividuals == null || selectedIndividuals.length() == 0 ? getIndividualsInDbOrder(sModule, projId) : Arrays.asList(selectedIndividuals.split(";"))); final GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr, tmpVarColl); genotypingDataQueryBuilder.keepTrackOfPreFilters(!variantQueryDBList.isEmpty()); try { final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries(); if (nChunkCount != partialCountArray.length) { LOG.error("Different number of chunks between counting and listing variant rows!"); progress.setError("Different number of chunks between counting and listing variant rows!"); return false; } if (nChunkCount > 1) LOG.debug("Query split into " + nChunkCount); ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList(); while (genotypingDataQueryBuilder.hasNext()) genotypingDataPipelines.add(genotypingDataQueryBuilder.next()); ArrayList<Integer> chunkIndices = new ArrayList<Integer>(); for (int i = 0; i < genotypingDataPipelines.size(); i++) chunkIndices.add(i); Collections.shuffle(chunkIndices); for (int i = 0; i < chunkIndices.size(); i++) { final int chunkIndex = chunkIndices.get(i); final List<DBObject> genotypingDataPipeline = genotypingDataPipelines.get(chunkIndex); if (progress.hasAborted()) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain return false; } Thread t = new Thread() { public void run() { Cursor genotypingDataCursor = mongoTemplate .getCollection( MongoTemplateManager.getMongoCollectionName(VariantRunData.class)) .aggregate(genotypingDataPipeline, AggregationOptions.builder().allowDiskUse(true).build()); final ArrayList<Comparable> variantsThatPassedRunFilter = new ArrayList<Comparable>(); while (genotypingDataCursor.hasNext()) variantsThatPassedRunFilter .add((Comparable) genotypingDataCursor.next().get("_id")); if (variantQueryDBList.isEmpty()) // otherwise we won't need it allVariantsThatPassRunFilter.addAll(variantsThatPassedRunFilter); else { // mark the results we want to keep final List<Comparable> lastUsedPreFilter = genotypingDataQueryBuilder .getPreFilteredIDsForChunk(chunkIndex); Thread removalThread = new Thread() { public void run() { genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT) long beforeTempCollUpdate = System.currentTimeMillis(); if (variantsThatPassedRunFilter.size() == lastUsedPreFilter.size()) return; // none to remove Collection<Comparable> filteredOutVariants = variantsThatPassedRunFilter .size() == 0 ? lastUsedPreFilter : CollectionUtils.subtract(lastUsedPreFilter, variantsThatPassedRunFilter); BasicDBObject removalQuery = GenotypingDataQueryBuilder .tryAndShrinkIdList("_id", filteredOutVariants, 4); WriteResult wr = tmpVarColl.remove(removalQuery); LOG.debug("Chunk N." + (chunkIndex) + ": " + wr.getN() + " filtered-out temp records removed in " + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d + "s"); progress.setCurrentStepProgress( (short) (finishedThreadCount.incrementAndGet() * 100 / nChunkCount)); } }; removalThreadsToWaitFor.add(removalThread); removalThread.start(); } } }; if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS - 1)) t.run(); // sometimes run synchronously so that all queries are not sent at the same time (also helps smooth progress display) else { queryThreadsToWaitFor.add(t); t.start(); // run asynchronously for better speed } } // wait for all threads before moving to next phase for (Thread t : queryThreadsToWaitFor) t.join(); for (Thread t : removalThreadsToWaitFor) t.join(); } catch (Exception e) { genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain throw e; } if (progress.hasAborted()) return false; progress.addStep("Updating temporary results"); progress.moveToNextStep(); final long beforeTempCollUpdate = System.currentTimeMillis(); mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED); if (variantQueryDBList.isEmpty()) { // we filtered on runs only: keep track of the final dataset List<BasicDBObject> pipeline = new ArrayList<>(); pipeline.add(new BasicDBObject("$match", GenotypingDataQueryBuilder.tryAndShrinkIdList("_id", allVariantsThatPassRunFilter, 4))); BasicDBObject projectObject = new BasicDBObject("_id", "$_id"); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE); projectObject.put( VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE, "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE); projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE); projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST, "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST); projectObject.put(VariantData.FIELDNAME_VERSION, "$" + VariantData.FIELDNAME_VERSION); pipeline.add(new BasicDBObject("$project", projectObject)); pipeline.add(new BasicDBObject("$out", tmpVarColl.getName())); variantColl.aggregate(pipeline); LOG.debug(tmpVarColl.count() + " temp records created in " + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d + "s"); } } progress.markAsComplete(); LOG.info("findVariants took " + (System.currentTimeMillis() - before) / 1000d + "s"); return true; }
From source file:MDBInt.DBMongo.java
License:Apache License
private Iterable<DBObject> operate(String dbName, String collectionName, String campoMatch, String valoreMatch, String campoOperazione, String nomeOperazione, String operation) { DB database = this.getDB(dbName); DBCollection collezione; DBObject match, fields, project, groupFields, group, sort; AggregationOutput output;/*from www .ja v a 2 s . c o m*/ Iterable<DBObject> cursor; List<DBObject> pipeline; collezione = database.getCollection(collectionName); match = new BasicDBObject("$match", new BasicDBObject(campoMatch, valoreMatch)); fields = new BasicDBObject(campoOperazione, 1); fields.put("_id", 0); project = new BasicDBObject("$project", fields); groupFields = new BasicDBObject("_id", campoOperazione); groupFields.put(nomeOperazione, new BasicDBObject(operation, "$" + campoOperazione)); group = new BasicDBObject("$group", groupFields); sort = new BasicDBObject("$sort", new BasicDBObject(campoOperazione, -1)); pipeline = Arrays.asList(match, project, group, sort); output = collezione.aggregate(pipeline); cursor = output.results(); return cursor; }