Example usage for com.mongodb DBCollection aggregate

Introduction

In this page you can find the example usage for com.mongodb DBCollection aggregate.

Prototype

@Deprecated
public AggregationOutput aggregate(final List<? extends DBObject> pipeline)

Source Link

Document

Method implements aggregation framework.

Usage

From source file:com.mobileman.moments.core.services.user.impl.UserStatsServiceImpl.java

License:Apache License

/**
 * @param userId//from  w ww .ja  va2 s .  com
 */
private void computeNumberOfQuestions(final ObjectId userId) {
    mongoTemplate.execute(Question.class, new CollectionCallback<Void>() {

        @Override
        public Void doInCollection(DBCollection collection) throws MongoException, DataAccessException {

            final List<DBObject> pipeline = new ArrayList<DBObject>();
            pipeline.add(new BasicDBObject("$match", new BasicDBObject("created_by._id", userId)));
            pipeline.add(new BasicDBObject("$group",
                    new BasicDBObject("_id", null).append("count", new BasicDBObject("$sum", 1))));
            AggregationOutput aggregationOutput = collection.aggregate(pipeline);
            long questionCount = 0;
            for (DBObject result : aggregationOutput.results()) {
                questionCount = Number.class.cast(result.get("count")).longValue();
                break;
            }

            mongoTemplate.findAndModify(Query.query(Criteria.where("_id").is(userId)),
                    new Update().set("userStats.numberOfQuestions", questionCount), User.class);

            return null;
        }
    });
}

From source file:com.petpet.c3po.dao.mongo.MongoPersistenceLayer.java

License:Apache License

public long countConflicts(Filter filter, List<String> properties) {
    LOG.info("Calculating conflicts count");
    DBCollection collection = this.getCollection(Element.class);
    List<DBObject> list = new ArrayList<DBObject>();
    list.add(new BasicDBObject("$match", this.getCachedFilter(filter)));
    list.add(new BasicDBObject("$unwind", "$metadata"));
    list.add(new BasicDBObject("$project", new BasicDBObject("status", "$metadata.status").append("uid", 1)
            .append("property", "$metadata.property")));
    list.add(new BasicDBObject("$match", new BasicDBObject("property", new BasicDBObject("$in", properties))));
    list.add(new BasicDBObject("$group",
            new BasicDBObject("_id", "$uid").append("statuses", new BasicDBObject("$addToSet", "$status"))));
    BasicDBList in = new BasicDBList();
    in.add("CONFLICT");
    list.add(new BasicDBObject("$match", new BasicDBObject("statuses", new BasicDBObject("$in", in))));
    list.add(new BasicDBObject("$group",
            new BasicDBObject("_id", null).append("count", new BasicDBObject("$sum", 1))));
    Iterable<DBObject> resultIterable = collection.aggregate(list).results();
    BasicDBObject result = (BasicDBObject) resultIterable.iterator().next();
    return result.getLong("count");

}

From source file:com.ricardolorenzo.identity.user.impl.UserIdentityManagerMongoDB.java

License:Open Source License

/**
 * All the scripts should have the following format:
 *
 * {/*from  w  w  w.j a  v a  2 s .  c o  m*/
 *    database.collection: {
 *        operation: insert|update|find|aggregate|delete
 *        query: {}
 *    }
 * }
 *
 * For update operations, you should specify the following:
 *
 * query: {
 *     find: {}
 *     update: {}
 * }
 */
private List<DBObject> runQueryScript(final String scriptType, final Map<String, Object[]> attributes)
        throws IdentityException {
    List<DBObject> results = new ArrayList<>();
    try {
        DB database = mongoClient.getDB(this.properties.getProperty("mongodb.database"));
        final ScriptCollection sc = getScriptCollection();
        if (sc.hasScript(scriptType)) {
            final String scriptContent = sc.getScript(scriptType);
            String query = createQueryFromScript(scriptContent, attributes);
            DBObject collectionOperation = DBObject.class.cast(JSON.parse(query));

            for (String collection : collectionOperation.keySet()) {
                if (!database.collectionExists(collection)) {
                    throw new IdentityException("collection [" + collection + "] does not exists");
                }

                DBObject dbObject = DBObject.class.cast(collectionOperation.get(collection));
                if (!dbObject.containsField("operation")) {
                    throw new IdentityException("operation field not specified");
                }

                String dbOperation = String.class.cast(dbObject.get("operation")).toLowerCase();
                if (!OPERATIONS.contains(dbOperation)) {
                    throw new IdentityException("operation [" + dbOperation + "] not supported");
                }

                DBObject dbQuery = DBObject.class.cast(dbObject.get("query"));
                if (dbQuery == null) {
                    throw new IdentityException("query field not specified");
                }

                DBCollection coll = database.getCollection(collection);
                switch (dbOperation) {
                case "insert": {
                    coll.insert(dbQuery);
                }
                case "update": {
                    if (!dbObject.containsField("find")) {
                        throw new IdentityException("find field not found inside the update operation");
                    }
                    if (!dbObject.containsField("update")) {
                        throw new IdentityException("update field not found inside the update operation");
                    }
                    DBObject dbUpdateFind = DBObject.class.cast(dbQuery.get("find"));
                    DBObject dbUpdateFields = DBObject.class.cast(dbQuery.get("update"));
                    coll.update(dbUpdateFind, dbUpdateFields, false, false);
                }
                case "delete": {
                    coll.remove(dbQuery);
                }
                case "find": {
                    DBCursor cursor = coll.find(dbQuery);
                    while (cursor.hasNext()) {
                        results.add(cursor.next());
                    }
                }
                case "aggregate": {
                    List<DBObject> aggregate = new ArrayList<DBObject>();
                    aggregate.add(dbQuery);
                    for (DBObject o : coll.aggregate(aggregate).results()) {
                        results.add(o);
                    }
                }
                }
            }
            return results;
        }
    } catch (final NoSuchAlgorithmException e) {
        throw new IdentityException(e.getMessage());
    } finally {
        /**
         * TODO close cursors
         */
    }
    return null;
}

From source file:com.stratio.connector.mongodb.core.engine.query.AggregationLogicalWorkflowExecutor.java

License:Apache License

/**
 * Execute an aggregation query.//from   w  w  w . j  a v  a  2  s  . c  om
 *
 * @param mongoClient
 *            the MongoDB client.
 * @return the Crossdata ResultSet.
 * @throws ExecutionException
 *             if the execution fails.
 */
@Override
public ResultSet executeQuery(MongoClient mongoClient) throws ExecutionException {

    DB db = mongoClient.getDB(logicalWorkflowData.getProject().getCatalogName());
    DBCollection collection = db.getCollection(logicalWorkflowData.getProject().getTableName().getName());
    ResultSet resultSet = new ResultSet();
    resultSet.setColumnMetadata(
            MetaResultUtils.createMetadata(logicalWorkflowData.getProject(), logicalWorkflowData.getSelect()));

    // AggregationOptions aggOptions = AggregationOptions.builder()
    // .allowDiskUse(true)
    // .batchSize(size)
    // pipeline,aggOptions => dbcursor
    try {
        int stage = 1;
        for (DBObject aggregationStage : query) {
            logger.debug("Aggregate framework stage (" + (stage++) + ") : " + aggregationStage);
        }
        AggregationOutput aggOutput = collection.aggregate(query);
        for (DBObject result : aggOutput.results()) {
            if (logger.isDebugEnabled()) {
                logger.debug("AggResult: " + result);
            }
            resultSet.add(MetaResultUtils.createRowWithAlias(result, logicalWorkflowData.getSelect()));
        }
    } catch (MongoException mongoException) {
        logger.error("Error executing an aggregation query:" + mongoException.getMessage());
        throw new MongoExecutionException(mongoException.getMessage(), mongoException);
    }

    return resultSet;
}

From source file:es.bsc.mongoal.QueryGenerator.java

/**
 * Sends a query to the MongoDB database
 * @param queryString MongoAL query//  w  w  w  .java2s  . c  o m
 * @return An iterable collection of MongoDB DBobjects with the results of the query
 */
public Iterable<DBObject> query(String queryString) {
    MongoALLexer lexer = new MongoALLexer(new org.antlr.v4.runtime.ANTLRInputStream(queryString));
    CommonTokenStream tokens = new CommonTokenStream(lexer);
    MongoALParser parser = new MongoALParser(tokens);
    Object[] ret = (Object[]) queryVisitor.visitQuery(parser.query());
    System.out.println(JSON.serialize(ret[1]));
    DBCollection events = database.getCollection((String) ret[0]);
    if (ret[1] == null) {
        return events.find();
    } else {
        return events.aggregate((List<DBObject>) ret[1]).results();
    }
}

From source file:eu.eubrazilcc.lvl.storage.mongodb.MongoDBConnector.java

License:EUPL

private AggregationOutput execPipeline(final List<DBObject> pipeline, final String collection) {
    checkArgument(pipeline != null, "Uninitialized pipeline");
    checkArgument(isNotBlank(collection), "Uninitialized or invalid collection");
    final DB db = client().getDB(CONFIG_MANAGER.getDbName());
    final DBCollection dbcol = db.getCollection(collection);
    return dbcol.aggregate(pipeline);
}

From source file:example.AggregationExample.java

License:Apache License

/**
 * Run this main method to see the output of this quick example.
 *
 * @param args takes no args//  w ww.ja  v  a2 s  .  c  o  m
 * @throws UnknownHostException if it cannot connect to a MongoDB instance at localhost:27017
 */
public static void main(final String[] args) throws UnknownHostException {
    // connect to the local database server
    MongoClient mongoClient = new MongoClient();

    // get handle to "mydb"
    DB db = mongoClient.getDB("mydb");

    // Authenticate - optional
    // boolean auth = db.authenticate("foo", "bar");

    // Add some sample data
    DBCollection coll = db.getCollection("aggregationExample");
    coll.insert(new BasicDBObjectBuilder().add("employee", 1).add("department", "Sales").add("amount", 71)
            .add("type", "airfare").get());
    coll.insert(new BasicDBObjectBuilder().add("employee", 2).add("department", "Engineering").add("amount", 15)
            .add("type", "airfare").get());
    coll.insert(new BasicDBObjectBuilder().add("employee", 4).add("department", "Human Resources")
            .add("amount", 5).add("type", "airfare").get());
    coll.insert(new BasicDBObjectBuilder().add("employee", 42).add("department", "Sales").add("amount", 77)
            .add("type", "airfare").get());

    // create our pipeline operations, first with the $match
    DBObject match = new BasicDBObject("$match", new BasicDBObject("type", "airfare"));

    // build the $projection operation
    DBObject fields = new BasicDBObject("department", 1);
    fields.put("amount", 1);
    fields.put("_id", 0);
    DBObject project = new BasicDBObject("$project", fields);

    // Now the $group operation
    DBObject groupFields = new BasicDBObject("_id", "$department");
    groupFields.put("average", new BasicDBObject("$avg", "$amount"));
    DBObject group = new BasicDBObject("$group", groupFields);

    // Finally the $sort operation
    DBObject sort = new BasicDBObject("$sort", new BasicDBObject("average", -1));

    // run aggregation
    List<DBObject> pipeline = Arrays.asList(match, project, group, sort);
    AggregationOutput output = coll.aggregate(pipeline);

    // Output the results
    for (DBObject result : output.results()) {
        System.out.println(result);
    }

    // Aggregation Cursor
    AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100)
            .outputMode(AggregationOptions.OutputMode.CURSOR).allowDiskUse(true).build();

    Cursor cursor = coll.aggregate(pipeline, aggregationOptions);
    while (cursor.hasNext()) {
        System.out.println(cursor.next());
    }

    // clean up
    db.dropDatabase();
    mongoClient.close();
}

From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java

License:Open Source License

/**
 * This method returns the number of variants that match provided parameters.
 *
 * @param request the request/*from   ww w.j a v  a 2 s  .c om*/
 * @param sModule the module
 * @param projId the proj id
 * @param selectedVariantTypes the selected variant types
 * @param selectedSequences the selected sequences
 * @param selectedIndividuals the selected individuals
 * @param gtPattern the gt code
 * @param genotypeQualityThreshold the genotype quality threshold
 * @param readDepthThreshold the read depth threshold
 * @param missingData the missing data
 * @param minmaf the minmaf
 * @param maxmaf the maxmaf
 * @param minposition the minposition
 * @param maxposition the maxposition
 * @param alleleCount the allele count
 * @param geneName the gene name
 * @param variantEffects the variant effects
 * @param processID the process id
 * @return the long
 * @throws Exception the exception
 */
@RequestMapping(variantCountURL)
protected @ResponseBody long countVariants(HttpServletRequest request, @RequestParam("module") String sModule,
        @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes,
        @RequestParam("sequences") String selectedSequences,
        @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern,
        @RequestParam("genotypeQualityThreshold") Integer genotypeQualityThreshold,
        @RequestParam("readDepthThreshold") Integer readDepthThreshold,
        @RequestParam("missingData") Double missingData,
        @RequestParam(value = "minmaf", required = false) Float minmaf,
        @RequestParam(value = "maxmaf", required = false) Float maxmaf,
        @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition,
        @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName,
        @RequestParam("variantEffects") String variantEffects,
        @RequestParam("processID") final String processID) throws Exception {
    final ProgressIndicator progress = new ProgressIndicator(processID.substring(1 + processID.indexOf('|')),
            new String[0]);
    ProgressIndicator.registerProgressIndicator(progress);

    DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(),
            true /*empty it*/);
    try {
        String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences,
                selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData,
                minmaf, maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects);

        final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
        DBCollection cachedCountcollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS);
        //         cachedCountcollection.drop();
        DBCursor countCursor = cachedCountcollection.find(new BasicDBObject("_id", queryKey));
        Long count = null;
        if (countCursor.hasNext()) {
            count = 0l;
            for (Object aPartialCount : ((BasicDBList) countCursor.next()
                    .get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray())
                count += (Long) aPartialCount;
        }
        LOG.debug((count == null ? "new" : "existing") + " queryKey hash: " + queryKey);
        if (count == null) {
            long before = System.currentTimeMillis();

            progress.addStep("Counting matching variants");
            String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap()
                    .get(gtPattern);

            List<String> alleleCountList = alleleCount.length() == 0 ? null
                    : Arrays.asList(alleleCount.split(";"));

            GenotypingProject genotypingProject = mongoTemplate.findById(projId, GenotypingProject.class);
            if (genotypingProject.getAlleleCounts().size() != 1
                    || genotypingProject.getAlleleCounts().iterator().next() != 2) { // Project does not only have bi-allelic data: make sure we can apply MAF filter on selection
                boolean fExactlyOneNumberOfAllelesSelected = alleleCountList != null
                        && alleleCountList.size() == 1;
                boolean fBiAllelicSelected = fExactlyOneNumberOfAllelesSelected
                        && "2".equals(alleleCountList.get(0));
                boolean fMafRequested = (maxmaf != null && maxmaf < 50) || (minmaf != null && minmaf > 0);
                if (fMafRequested && !fBiAllelicSelected) {
                    progress.setError("MAF is only supported on biallelic data!");
                    return 0l;
                }
            }

            String actualSequenceSelection = selectedSequences;
            if (actualSequenceSelection.length() == 0) {
                ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule);
                if (externallySelectedSeqs != null)
                    actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";");
            }

            boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId,
                    sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData,
                    minmaf, maxmaf, geneName, variantEffects);

            BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId,
                    selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")),
                    actualSequenceSelection.length() == 0 ? null
                            : Arrays.asList(actualSequenceSelection.split(";")),
                    minposition, maxposition, alleleCountList);
            if (variantQueryDBList.isEmpty()) {
                if (!fNeedToFilterOnGenotypingData && mongoTemplate.count(null, GenotypingProject.class) == 1)
                    count = mongoTemplate.count(new Query(), VariantData.class); // no filter whatsoever
            } else {
                if (!fNeedToFilterOnGenotypingData) { // filtering on variant features only: we just need a count
                    count = mongoTemplate.getCollection(mongoTemplate.getCollectionName(VariantData.class))
                            .count(new BasicDBObject("$and", variantQueryDBList));
                } else { // filtering on variant features and genotyping data: we need a list of variant IDs to restrict the genotyping data search to
                    long beforeAggQuery = System.currentTimeMillis();
                    progress.setProgressDescription("Filtering variants for count...");

                    DBCollection variantColl = mongoTemplate
                            .getCollection(mongoTemplate.getCollectionName(VariantData.class));
                    List<DBObject> pipeline = new ArrayList<DBObject>();
                    pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList)));
                    BasicDBObject projectObject = new BasicDBObject("_id", "$_id");
                    projectObject.put(
                            VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_SEQUENCE,
                            "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_SEQUENCE);
                    projectObject.put(
                            VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_START_SITE,
                            "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_START_SITE);
                    projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE);
                    projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST,
                            "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST);
                    pipeline.add(new BasicDBObject("$project", projectObject));
                    pipeline.add(new BasicDBObject("$out", tmpVarColl.getName()));
                    variantColl.aggregate(pipeline);

                    mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED);
                    LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in "
                            + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s");

                    progress.setProgressDescription(null);
                    if (tmpVarColl.count() == 0)
                        count = 0l; // no need to search any further
                }
            }

            if (count != null) {
                BasicDBObject dbo = new BasicDBObject("_id", queryKey);
                dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, new Long[] { count });
                cachedCountcollection.save(dbo);
            } else { // now filter on genotyping data
                List<String> selectedIndividualList = selectedIndividuals.length() == 0 ? null
                        : Arrays.asList(selectedIndividuals.split(";"));
                if (selectedIndividualList == null)
                    selectedIndividualList = getIndividualsInDbOrder(sModule, projId);

                GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest();
                gsvr.setAlleleCount(alleleCount);
                if (minposition != null)
                    gsvr.setStart(minposition);
                if (maxposition != null)
                    gsvr.setEnd(maxposition);
                gsvr.setGeneName(geneName);
                gsvr.setReferenceName(selectedSequences);
                gsvr.setSelectedVariantTypes(selectedVariantTypes);
                gsvr.setVariantEffect(variantEffects);
                gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId);

                gsvr.setMissingData(missingData);
                gsvr.setMinmaf(minmaf);
                gsvr.setMaxmaf(maxmaf);
                gsvr.setGtPattern(gtPattern);

                HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>();
                annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold);
                annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold);
                gsvr.setAnnotationFieldThresholds(annotationFieldThresholds);
                gsvr.setCallSetIds(selectedIndividualList);

                GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr,
                        tmpVarColl);
                try {
                    final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries();
                    if (nChunkCount > 1)
                        LOG.debug("Query split into " + nChunkCount);

                    final Long[] partialCountArray = new Long[nChunkCount];
                    final Builder aggOpts = AggregationOptions.builder().allowDiskUse(false);
                    final ArrayList<Thread> threadsToWaitFor = new ArrayList<Thread>();
                    final AtomicInteger finishedThreadCount = new AtomicInteger(0);

                    ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList();
                    while (genotypingDataQueryBuilder.hasNext())
                        genotypingDataPipelines.add(genotypingDataQueryBuilder.next());

                    ArrayList<Integer> chunkIndices = new ArrayList<Integer>();
                    for (int i = 0; i < genotypingDataPipelines.size(); i++)
                        chunkIndices.add(i);
                    Collections.shuffle(chunkIndices);

                    for (int i = 0; i < chunkIndices.size()/*/2*/; i++) {
                        final List<DBObject> genotypingDataPipeline = genotypingDataPipelines
                                .get(chunkIndices.get(i));

                        // Now the $group operation, used for counting
                        DBObject groupFields = new BasicDBObject("_id", null);
                        groupFields.put("count", new BasicDBObject("$sum", 1));
                        genotypingDataPipeline.add(new BasicDBObject("$group", groupFields));

                        if (i == 0 && tmpVarColl.count() <= 5)
                            LOG.debug(genotypingDataPipeline);

                        if (progress.hasAborted()) {
                            genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
                            return 0l;
                        }

                        final int chunkIndex = i;

                        Thread t = new Thread() {
                            public void run() {
                                //                               long b4 = System.currentTimeMillis();
                                Cursor it = mongoTemplate
                                        .getCollection(MongoTemplateManager
                                                .getMongoCollectionName(VariantRunData.class))
                                        .aggregate(genotypingDataPipeline, aggOpts.build());
                                partialCountArray[chunkIndex] = it.hasNext()
                                        ? ((Number) it.next().get("count")).longValue()
                                        : 0;
                                progress.setCurrentStepProgress(
                                        (short) (finishedThreadCount.incrementAndGet() * 100 / nChunkCount));
                                //                           System.out.println("chunk " + chunkIndex + " took " + (System.currentTimeMillis() - b4));
                                genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT)
                            }
                        };

                        if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS
                                - 1)) {
                            t.run(); // run synchronously
                        } else {
                            threadsToWaitFor.add(t);
                            t.start(); // run asynchronously for better speed
                        }
                    }

                    for (Thread t : threadsToWaitFor) // wait for all threads before moving to next phase
                        t.join();

                    progress.setCurrentStepProgress(100);

                    count = 0l;
                    for (Long partialCount : partialCountArray)
                        count += partialCount;

                    BasicDBObject dbo = new BasicDBObject("_id", queryKey);
                    dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, partialCountArray);
                    cachedCountcollection.save(dbo);
                } catch (Exception e) {
                    genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
                    throw e;
                }
            }
            LOG.info("countVariants found " + count + " results in "
                    + (System.currentTimeMillis() - before) / 1000d + "s");
        }

        progress.markAsComplete();
        if (progress.hasAborted())
            return 0l;

        return count;
    } finally {
        //         getTemporaryVariantCollection(sModule, progress.getProcessId(), true);   // always empty it
    }
}

From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java

License:Open Source License

/**
 * Find variants.// www.j  a va2 s .com
 *
 * @param request the request
 * @param sModule the module
 * @param projId the proj id
 * @param selectedVariantTypes the selected variant types
 * @param selectedSequences the selected sequences
 * @param selectedIndividuals the selected individuals
 * @param gtPattern the gt code
 * @param genotypeQualityThreshold the genotype quality threshold
 * @param readDepthThreshold the read depth threshold
 * @param missingData the missing data
 * @param minmaf the minmaf
 * @param maxmaf the maxmaf
 * @param minposition the minposition
 * @param maxposition the maxposition
 * @param alleleCount the allele count
 * @param geneName the gene name
 * @param variantEffects the variant effects
 * @param wantedFields the wanted fields
 * @param page the page
 * @param size the size
 * @param sortBy the sort by
 * @param sortDir the sort dir
 * @param processID the process id
 * @return true, if successful
 * @throws Exception the exception
 */
@RequestMapping(variantFindURL)
/**
 *  This method build a list of variants in a temporary collection, that may be used later for browsing or exporting results
 */
protected @ResponseBody boolean findVariants(HttpServletRequest request, @RequestParam("module") String sModule,
        @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes,
        @RequestParam("sequences") String selectedSequences,
        @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern,
        @RequestParam("genotypeQualityThreshold") int genotypeQualityThreshold,
        @RequestParam("readDepthThreshold") int readDepthThreshold,
        @RequestParam("missingData") double missingData, @RequestParam("minmaf") Float minmaf,
        @RequestParam("maxmaf") Float maxmaf, @RequestParam("minposition") Long minposition,
        @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount,
        @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects,
        @RequestParam("wantedFields") String wantedFields, @RequestParam("page") int page,
        @RequestParam("size") int size, @RequestParam("sortBy") String sortBy,
        @RequestParam("sortDir") String sortDir, @RequestParam("processID") String processID) throws Exception {
    long before = System.currentTimeMillis();

    String token = processID.substring(1 + processID.indexOf('|'));

    final ProgressIndicator progress = new ProgressIndicator(token, new String[0]);
    ProgressIndicator.registerProgressIndicator(progress);
    progress.addStep("Loading results");

    String actualSequenceSelection = selectedSequences;
    if (actualSequenceSelection.length() == 0) {
        ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule);
        if (externallySelectedSeqs != null)
            actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";");
    }

    List<String> selectedSequenceList = actualSequenceSelection.length() == 0 ? null
            : Arrays.asList(actualSequenceSelection.split(";"));
    String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences,
            selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf,
            maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects);

    final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    DBCollection cachedCountCollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS);
    DBCursor countCursor = cachedCountCollection.find(new BasicDBObject("_id", queryKey));

    final DBCollection variantColl = mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantData.class));
    final Object[] partialCountArray = !countCursor.hasNext() ? null
            : ((BasicDBList) countCursor.next().get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray();

    final DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(), false);

    String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap()
            .get(gtPattern);
    boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId,
            sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf,
            maxmaf, geneName, variantEffects);
    final BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId,
            selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")),
            selectedSequenceList, minposition, maxposition,
            alleleCount.length() == 0 ? null : Arrays.asList(alleleCount.split(";")));

    if (!variantQueryDBList.isEmpty()
            && tmpVarColl.count() == 0 /* otherwise we kept the preliminary list from the count procedure */) { // apply filter on variant features
        progress.setProgressDescription("Filtering variants for display...");
        long beforeAggQuery = System.currentTimeMillis();
        List<DBObject> pipeline = new ArrayList<DBObject>();
        pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList)));
        BasicDBObject projectObject = new BasicDBObject("_id", "$_id");
        projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE,
                "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE);
        projectObject.put(
                VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE,
                "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE);
        projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE,
                "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE);
        projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE);
        projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST,
                "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST);
        pipeline.add(new BasicDBObject("$project", projectObject));

        pipeline.add(new BasicDBObject("$out", tmpVarColl.getName()));
        variantColl.aggregate(pipeline);

        LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in "
                + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s");
        progress.setProgressDescription(null);
    } else if (fNeedToFilterOnGenotypingData && tmpVarColl.count() > 0)
        LOG.debug(
                "Re-using " + tmpVarColl.count() + " results from count procedure's variant preliminary query");

    if (progress.hasAborted())
        return false;

    if (fNeedToFilterOnGenotypingData) { // now filter on genotyping data
        final ConcurrentLinkedQueue<Thread> queryThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>(),
                removalThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>();
        final AtomicInteger finishedThreadCount = new AtomicInteger(0);
        final ConcurrentSkipListSet<Comparable> allVariantsThatPassRunFilter = new ConcurrentSkipListSet<Comparable>();

        GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest();
        gsvr.setAlleleCount(alleleCount);
        if (minposition != null)
            gsvr.setStart(minposition);
        if (maxposition != null)
            gsvr.setEnd(maxposition);
        gsvr.setGeneName(geneName);
        gsvr.setReferenceName(selectedSequences);
        gsvr.setSelectedVariantTypes(selectedVariantTypes);
        gsvr.setVariantEffect(variantEffects);
        gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId);

        gsvr.setMissingData(missingData);
        gsvr.setMinmaf(minmaf);
        gsvr.setMaxmaf(maxmaf);
        gsvr.setGtPattern(gtPattern);
        HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>();
        annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold);
        annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold);
        gsvr.setAnnotationFieldThresholds(annotationFieldThresholds);
        gsvr.setCallSetIds(selectedIndividuals == null || selectedIndividuals.length() == 0
                ? getIndividualsInDbOrder(sModule, projId)
                : Arrays.asList(selectedIndividuals.split(";")));

        final GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr,
                tmpVarColl);
        genotypingDataQueryBuilder.keepTrackOfPreFilters(!variantQueryDBList.isEmpty());
        try {
            final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries();
            if (nChunkCount != partialCountArray.length) {
                LOG.error("Different number of chunks between counting and listing variant rows!");
                progress.setError("Different number of chunks between counting and listing variant rows!");
                return false;
            }
            if (nChunkCount > 1)
                LOG.debug("Query split into " + nChunkCount);

            ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList();
            while (genotypingDataQueryBuilder.hasNext())
                genotypingDataPipelines.add(genotypingDataQueryBuilder.next());

            ArrayList<Integer> chunkIndices = new ArrayList<Integer>();
            for (int i = 0; i < genotypingDataPipelines.size(); i++)
                chunkIndices.add(i);
            Collections.shuffle(chunkIndices);

            for (int i = 0; i < chunkIndices.size(); i++) {
                final int chunkIndex = chunkIndices.get(i);
                final List<DBObject> genotypingDataPipeline = genotypingDataPipelines.get(chunkIndex);

                if (progress.hasAborted()) {
                    genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
                    return false;
                }

                Thread t = new Thread() {
                    public void run() {
                        Cursor genotypingDataCursor = mongoTemplate
                                .getCollection(
                                        MongoTemplateManager.getMongoCollectionName(VariantRunData.class))
                                .aggregate(genotypingDataPipeline,
                                        AggregationOptions.builder().allowDiskUse(true).build());
                        final ArrayList<Comparable> variantsThatPassedRunFilter = new ArrayList<Comparable>();
                        while (genotypingDataCursor.hasNext())
                            variantsThatPassedRunFilter
                                    .add((Comparable) genotypingDataCursor.next().get("_id"));

                        if (variantQueryDBList.isEmpty()) // otherwise we won't need it
                            allVariantsThatPassRunFilter.addAll(variantsThatPassedRunFilter);
                        else { // mark the results we want to keep
                            final List<Comparable> lastUsedPreFilter = genotypingDataQueryBuilder
                                    .getPreFilteredIDsForChunk(chunkIndex);

                            Thread removalThread = new Thread() {
                                public void run() {
                                    genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT)

                                    long beforeTempCollUpdate = System.currentTimeMillis();
                                    if (variantsThatPassedRunFilter.size() == lastUsedPreFilter.size())
                                        return; // none to remove

                                    Collection<Comparable> filteredOutVariants = variantsThatPassedRunFilter
                                            .size() == 0 ? lastUsedPreFilter
                                                    : CollectionUtils.subtract(lastUsedPreFilter,
                                                            variantsThatPassedRunFilter);
                                    BasicDBObject removalQuery = GenotypingDataQueryBuilder
                                            .tryAndShrinkIdList("_id", filteredOutVariants, 4);
                                    WriteResult wr = tmpVarColl.remove(removalQuery);
                                    LOG.debug("Chunk N." + (chunkIndex) + ": " + wr.getN()
                                            + " filtered-out temp records removed in "
                                            + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d
                                            + "s");

                                    progress.setCurrentStepProgress(
                                            (short) (finishedThreadCount.incrementAndGet() * 100
                                                    / nChunkCount));
                                }
                            };
                            removalThreadsToWaitFor.add(removalThread);
                            removalThread.start();
                        }
                    }
                };

                if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS - 1))
                    t.run(); // sometimes run synchronously so that all queries are not sent at the same time (also helps smooth progress display)
                else {
                    queryThreadsToWaitFor.add(t);
                    t.start(); // run asynchronously for better speed
                }
            }

            // wait for all threads before moving to next phase
            for (Thread t : queryThreadsToWaitFor)
                t.join();
            for (Thread t : removalThreadsToWaitFor)
                t.join();
        } catch (Exception e) {
            genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
            throw e;
        }

        if (progress.hasAborted())
            return false;

        progress.addStep("Updating temporary results");
        progress.moveToNextStep();
        final long beforeTempCollUpdate = System.currentTimeMillis();
        mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED);
        if (variantQueryDBList.isEmpty()) { // we filtered on runs only: keep track of the final dataset
            List<BasicDBObject> pipeline = new ArrayList<>();
            pipeline.add(new BasicDBObject("$match",
                    GenotypingDataQueryBuilder.tryAndShrinkIdList("_id", allVariantsThatPassRunFilter, 4)));
            BasicDBObject projectObject = new BasicDBObject("_id", "$_id");
            projectObject.put(
                    VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE,
                    "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                            + ReferencePosition.FIELDNAME_SEQUENCE);
            projectObject.put(
                    VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE,
                    "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                            + ReferencePosition.FIELDNAME_START_SITE);
            projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE);
            projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST,
                    "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST);
            projectObject.put(VariantData.FIELDNAME_VERSION, "$" + VariantData.FIELDNAME_VERSION);
            pipeline.add(new BasicDBObject("$project", projectObject));
            pipeline.add(new BasicDBObject("$out", tmpVarColl.getName()));
            variantColl.aggregate(pipeline);
            LOG.debug(tmpVarColl.count() + " temp records created in "
                    + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d + "s");
        }
    }

    progress.markAsComplete();
    LOG.info("findVariants took " + (System.currentTimeMillis() - before) / 1000d + "s");
    return true;
}

From source file:MDBInt.DBMongo.java

License:Apache License

private Iterable<DBObject> operate(String dbName, String collectionName, String campoMatch, String valoreMatch,
        String campoOperazione, String nomeOperazione, String operation) {

    DB database = this.getDB(dbName);
    DBCollection collezione;
    DBObject match, fields, project, groupFields, group, sort;
    AggregationOutput output;/*from  www .ja v  a  2  s  . c  o m*/
    Iterable<DBObject> cursor;
    List<DBObject> pipeline;

    collezione = database.getCollection(collectionName);
    match = new BasicDBObject("$match", new BasicDBObject(campoMatch, valoreMatch));
    fields = new BasicDBObject(campoOperazione, 1);
    fields.put("_id", 0);

    project = new BasicDBObject("$project", fields);

    groupFields = new BasicDBObject("_id", campoOperazione);
    groupFields.put(nomeOperazione, new BasicDBObject(operation, "$" + campoOperazione));

    group = new BasicDBObject("$group", groupFields);
    sort = new BasicDBObject("$sort", new BasicDBObject(campoOperazione, -1));
    pipeline = Arrays.asList(match, project, group, sort);
    output = collezione.aggregate(pipeline);
    cursor = output.results();
    return cursor;

}