Example usage for com.mongodb DBCollection count

List of usage examples for com.mongodb DBCollection count

Introduction

In this page you can find the example usage for com.mongodb DBCollection count.

Prototype

public long count() 

Source Link

Document

Same as #getCount()

Usage

From source file:eu.delving.core.storage.impl.UserRepoImpl.java

License:EUPL

private DBCollection users() {
    PortalTheme portalTheme = ThemeFilter.getTheme();
    String collectionName = portalTheme == null ? USERS_COLLECTION
            : String.format("%s_%s", USERS_COLLECTION, portalTheme.getName());
    DBCollection collection = mongoFactory.getMongo().getDB(databaseName).getCollection(collectionName);
    if (collection.count() == 0) {
        DBObject object = mob(User.EMAIL, "supervisor@delving.eu", User.ENABLED, true, User.FIRST_NAME,
                "Delving", User.LAST_NAME, "Melvin", User.USER_NAME, "supervisor", User.ROLE,
                User.Role.ROLE_GOD.toString());
        UserImpl user = new UserImpl(object);
        user.setPassword("melvin");
        collection.save(object);/*from ww w  .j  a  va  2  s  .  c  om*/
    }
    return collection;
}

From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java

License:Open Source License

/**
 * This method returns the number of variants that match provided parameters.
 *
 * @param request the request/*from   w ww  .jav  a 2 s  . c  o m*/
 * @param sModule the module
 * @param projId the proj id
 * @param selectedVariantTypes the selected variant types
 * @param selectedSequences the selected sequences
 * @param selectedIndividuals the selected individuals
 * @param gtPattern the gt code
 * @param genotypeQualityThreshold the genotype quality threshold
 * @param readDepthThreshold the read depth threshold
 * @param missingData the missing data
 * @param minmaf the minmaf
 * @param maxmaf the maxmaf
 * @param minposition the minposition
 * @param maxposition the maxposition
 * @param alleleCount the allele count
 * @param geneName the gene name
 * @param variantEffects the variant effects
 * @param processID the process id
 * @return the long
 * @throws Exception the exception
 */
@RequestMapping(variantCountURL)
protected @ResponseBody long countVariants(HttpServletRequest request, @RequestParam("module") String sModule,
        @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes,
        @RequestParam("sequences") String selectedSequences,
        @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern,
        @RequestParam("genotypeQualityThreshold") Integer genotypeQualityThreshold,
        @RequestParam("readDepthThreshold") Integer readDepthThreshold,
        @RequestParam("missingData") Double missingData,
        @RequestParam(value = "minmaf", required = false) Float minmaf,
        @RequestParam(value = "maxmaf", required = false) Float maxmaf,
        @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition,
        @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName,
        @RequestParam("variantEffects") String variantEffects,
        @RequestParam("processID") final String processID) throws Exception {
    final ProgressIndicator progress = new ProgressIndicator(processID.substring(1 + processID.indexOf('|')),
            new String[0]);
    ProgressIndicator.registerProgressIndicator(progress);

    DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(),
            true /*empty it*/);
    try {
        String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences,
                selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData,
                minmaf, maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects);

        final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
        DBCollection cachedCountcollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS);
        //         cachedCountcollection.drop();
        DBCursor countCursor = cachedCountcollection.find(new BasicDBObject("_id", queryKey));
        Long count = null;
        if (countCursor.hasNext()) {
            count = 0l;
            for (Object aPartialCount : ((BasicDBList) countCursor.next()
                    .get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray())
                count += (Long) aPartialCount;
        }
        LOG.debug((count == null ? "new" : "existing") + " queryKey hash: " + queryKey);
        if (count == null) {
            long before = System.currentTimeMillis();

            progress.addStep("Counting matching variants");
            String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap()
                    .get(gtPattern);

            List<String> alleleCountList = alleleCount.length() == 0 ? null
                    : Arrays.asList(alleleCount.split(";"));

            GenotypingProject genotypingProject = mongoTemplate.findById(projId, GenotypingProject.class);
            if (genotypingProject.getAlleleCounts().size() != 1
                    || genotypingProject.getAlleleCounts().iterator().next() != 2) { // Project does not only have bi-allelic data: make sure we can apply MAF filter on selection
                boolean fExactlyOneNumberOfAllelesSelected = alleleCountList != null
                        && alleleCountList.size() == 1;
                boolean fBiAllelicSelected = fExactlyOneNumberOfAllelesSelected
                        && "2".equals(alleleCountList.get(0));
                boolean fMafRequested = (maxmaf != null && maxmaf < 50) || (minmaf != null && minmaf > 0);
                if (fMafRequested && !fBiAllelicSelected) {
                    progress.setError("MAF is only supported on biallelic data!");
                    return 0l;
                }
            }

            String actualSequenceSelection = selectedSequences;
            if (actualSequenceSelection.length() == 0) {
                ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule);
                if (externallySelectedSeqs != null)
                    actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";");
            }

            boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId,
                    sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData,
                    minmaf, maxmaf, geneName, variantEffects);

            BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId,
                    selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")),
                    actualSequenceSelection.length() == 0 ? null
                            : Arrays.asList(actualSequenceSelection.split(";")),
                    minposition, maxposition, alleleCountList);
            if (variantQueryDBList.isEmpty()) {
                if (!fNeedToFilterOnGenotypingData && mongoTemplate.count(null, GenotypingProject.class) == 1)
                    count = mongoTemplate.count(new Query(), VariantData.class); // no filter whatsoever
            } else {
                if (!fNeedToFilterOnGenotypingData) { // filtering on variant features only: we just need a count
                    count = mongoTemplate.getCollection(mongoTemplate.getCollectionName(VariantData.class))
                            .count(new BasicDBObject("$and", variantQueryDBList));
                } else { // filtering on variant features and genotyping data: we need a list of variant IDs to restrict the genotyping data search to
                    long beforeAggQuery = System.currentTimeMillis();
                    progress.setProgressDescription("Filtering variants for count...");

                    DBCollection variantColl = mongoTemplate
                            .getCollection(mongoTemplate.getCollectionName(VariantData.class));
                    List<DBObject> pipeline = new ArrayList<DBObject>();
                    pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList)));
                    BasicDBObject projectObject = new BasicDBObject("_id", "$_id");
                    projectObject.put(
                            VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_SEQUENCE,
                            "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_SEQUENCE);
                    projectObject.put(
                            VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_START_SITE,
                            "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                                    + ReferencePosition.FIELDNAME_START_SITE);
                    projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE);
                    projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST,
                            "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST);
                    pipeline.add(new BasicDBObject("$project", projectObject));
                    pipeline.add(new BasicDBObject("$out", tmpVarColl.getName()));
                    variantColl.aggregate(pipeline);

                    mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED);
                    LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in "
                            + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s");

                    progress.setProgressDescription(null);
                    if (tmpVarColl.count() == 0)
                        count = 0l; // no need to search any further
                }
            }

            if (count != null) {
                BasicDBObject dbo = new BasicDBObject("_id", queryKey);
                dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, new Long[] { count });
                cachedCountcollection.save(dbo);
            } else { // now filter on genotyping data
                List<String> selectedIndividualList = selectedIndividuals.length() == 0 ? null
                        : Arrays.asList(selectedIndividuals.split(";"));
                if (selectedIndividualList == null)
                    selectedIndividualList = getIndividualsInDbOrder(sModule, projId);

                GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest();
                gsvr.setAlleleCount(alleleCount);
                if (minposition != null)
                    gsvr.setStart(minposition);
                if (maxposition != null)
                    gsvr.setEnd(maxposition);
                gsvr.setGeneName(geneName);
                gsvr.setReferenceName(selectedSequences);
                gsvr.setSelectedVariantTypes(selectedVariantTypes);
                gsvr.setVariantEffect(variantEffects);
                gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId);

                gsvr.setMissingData(missingData);
                gsvr.setMinmaf(minmaf);
                gsvr.setMaxmaf(maxmaf);
                gsvr.setGtPattern(gtPattern);

                HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>();
                annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold);
                annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold);
                gsvr.setAnnotationFieldThresholds(annotationFieldThresholds);
                gsvr.setCallSetIds(selectedIndividualList);

                GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr,
                        tmpVarColl);
                try {
                    final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries();
                    if (nChunkCount > 1)
                        LOG.debug("Query split into " + nChunkCount);

                    final Long[] partialCountArray = new Long[nChunkCount];
                    final Builder aggOpts = AggregationOptions.builder().allowDiskUse(false);
                    final ArrayList<Thread> threadsToWaitFor = new ArrayList<Thread>();
                    final AtomicInteger finishedThreadCount = new AtomicInteger(0);

                    ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList();
                    while (genotypingDataQueryBuilder.hasNext())
                        genotypingDataPipelines.add(genotypingDataQueryBuilder.next());

                    ArrayList<Integer> chunkIndices = new ArrayList<Integer>();
                    for (int i = 0; i < genotypingDataPipelines.size(); i++)
                        chunkIndices.add(i);
                    Collections.shuffle(chunkIndices);

                    for (int i = 0; i < chunkIndices.size()/*/2*/; i++) {
                        final List<DBObject> genotypingDataPipeline = genotypingDataPipelines
                                .get(chunkIndices.get(i));

                        // Now the $group operation, used for counting
                        DBObject groupFields = new BasicDBObject("_id", null);
                        groupFields.put("count", new BasicDBObject("$sum", 1));
                        genotypingDataPipeline.add(new BasicDBObject("$group", groupFields));

                        if (i == 0 && tmpVarColl.count() <= 5)
                            LOG.debug(genotypingDataPipeline);

                        if (progress.hasAborted()) {
                            genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
                            return 0l;
                        }

                        final int chunkIndex = i;

                        Thread t = new Thread() {
                            public void run() {
                                //                               long b4 = System.currentTimeMillis();
                                Cursor it = mongoTemplate
                                        .getCollection(MongoTemplateManager
                                                .getMongoCollectionName(VariantRunData.class))
                                        .aggregate(genotypingDataPipeline, aggOpts.build());
                                partialCountArray[chunkIndex] = it.hasNext()
                                        ? ((Number) it.next().get("count")).longValue()
                                        : 0;
                                progress.setCurrentStepProgress(
                                        (short) (finishedThreadCount.incrementAndGet() * 100 / nChunkCount));
                                //                           System.out.println("chunk " + chunkIndex + " took " + (System.currentTimeMillis() - b4));
                                genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT)
                            }
                        };

                        if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS
                                - 1)) {
                            t.run(); // run synchronously
                        } else {
                            threadsToWaitFor.add(t);
                            t.start(); // run asynchronously for better speed
                        }
                    }

                    for (Thread t : threadsToWaitFor) // wait for all threads before moving to next phase
                        t.join();

                    progress.setCurrentStepProgress(100);

                    count = 0l;
                    for (Long partialCount : partialCountArray)
                        count += partialCount;

                    BasicDBObject dbo = new BasicDBObject("_id", queryKey);
                    dbo.append(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE, partialCountArray);
                    cachedCountcollection.save(dbo);
                } catch (Exception e) {
                    genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
                    throw e;
                }
            }
            LOG.info("countVariants found " + count + " results in "
                    + (System.currentTimeMillis() - before) / 1000d + "s");
        }

        progress.markAsComplete();
        if (progress.hasAborted())
            return 0l;

        return count;
    } finally {
        //         getTemporaryVariantCollection(sModule, progress.getProcessId(), true);   // always empty it
    }
}

From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java

License:Open Source License

/**
 * Find variants./* w w w .j  a  va  2 s  . c o  m*/
 *
 * @param request the request
 * @param sModule the module
 * @param projId the proj id
 * @param selectedVariantTypes the selected variant types
 * @param selectedSequences the selected sequences
 * @param selectedIndividuals the selected individuals
 * @param gtPattern the gt code
 * @param genotypeQualityThreshold the genotype quality threshold
 * @param readDepthThreshold the read depth threshold
 * @param missingData the missing data
 * @param minmaf the minmaf
 * @param maxmaf the maxmaf
 * @param minposition the minposition
 * @param maxposition the maxposition
 * @param alleleCount the allele count
 * @param geneName the gene name
 * @param variantEffects the variant effects
 * @param wantedFields the wanted fields
 * @param page the page
 * @param size the size
 * @param sortBy the sort by
 * @param sortDir the sort dir
 * @param processID the process id
 * @return true, if successful
 * @throws Exception the exception
 */
@RequestMapping(variantFindURL)
/**
 *  This method build a list of variants in a temporary collection, that may be used later for browsing or exporting results
 */
protected @ResponseBody boolean findVariants(HttpServletRequest request, @RequestParam("module") String sModule,
        @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes,
        @RequestParam("sequences") String selectedSequences,
        @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern,
        @RequestParam("genotypeQualityThreshold") int genotypeQualityThreshold,
        @RequestParam("readDepthThreshold") int readDepthThreshold,
        @RequestParam("missingData") double missingData, @RequestParam("minmaf") Float minmaf,
        @RequestParam("maxmaf") Float maxmaf, @RequestParam("minposition") Long minposition,
        @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount,
        @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects,
        @RequestParam("wantedFields") String wantedFields, @RequestParam("page") int page,
        @RequestParam("size") int size, @RequestParam("sortBy") String sortBy,
        @RequestParam("sortDir") String sortDir, @RequestParam("processID") String processID) throws Exception {
    long before = System.currentTimeMillis();

    String token = processID.substring(1 + processID.indexOf('|'));

    final ProgressIndicator progress = new ProgressIndicator(token, new String[0]);
    ProgressIndicator.registerProgressIndicator(progress);
    progress.addStep("Loading results");

    String actualSequenceSelection = selectedSequences;
    if (actualSequenceSelection.length() == 0) {
        ArrayList<String> externallySelectedSeqs = getSequenceIDsBeingFilteredOn(request, sModule);
        if (externallySelectedSeqs != null)
            actualSequenceSelection = StringUtils.join(externallySelectedSeqs, ";");
    }

    List<String> selectedSequenceList = actualSequenceSelection.length() == 0 ? null
            : Arrays.asList(actualSequenceSelection.split(";"));
    String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences,
            selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf,
            maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects);

    final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    DBCollection cachedCountCollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS);
    DBCursor countCursor = cachedCountCollection.find(new BasicDBObject("_id", queryKey));

    final DBCollection variantColl = mongoTemplate
            .getCollection(mongoTemplate.getCollectionName(VariantData.class));
    final Object[] partialCountArray = !countCursor.hasNext() ? null
            : ((BasicDBList) countCursor.next().get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray();

    final DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, progress.getProcessId(), false);

    String sRegexOrAggregationOperator = GenotypingDataQueryBuilder.getGenotypePatternToQueryMap()
            .get(gtPattern);
    boolean fNeedToFilterOnGenotypingData = needToFilterOnGenotypingData(sModule, projId,
            sRegexOrAggregationOperator, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf,
            maxmaf, geneName, variantEffects);
    final BasicDBList variantQueryDBList = buildVariantDataQuery(sModule, projId,
            selectedVariantTypes.length() == 0 ? null : Arrays.asList(selectedVariantTypes.split(";")),
            selectedSequenceList, minposition, maxposition,
            alleleCount.length() == 0 ? null : Arrays.asList(alleleCount.split(";")));

    if (!variantQueryDBList.isEmpty()
            && tmpVarColl.count() == 0 /* otherwise we kept the preliminary list from the count procedure */) { // apply filter on variant features
        progress.setProgressDescription("Filtering variants for display...");
        long beforeAggQuery = System.currentTimeMillis();
        List<DBObject> pipeline = new ArrayList<DBObject>();
        pipeline.add(new BasicDBObject("$match", new BasicDBObject("$and", variantQueryDBList)));
        BasicDBObject projectObject = new BasicDBObject("_id", "$_id");
        projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE,
                "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE);
        projectObject.put(
                VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE,
                "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE);
        projectObject.put(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE,
                "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_END_SITE);
        projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE);
        projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST,
                "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST);
        pipeline.add(new BasicDBObject("$project", projectObject));

        pipeline.add(new BasicDBObject("$out", tmpVarColl.getName()));
        variantColl.aggregate(pipeline);

        LOG.debug("Variant preliminary query found " + tmpVarColl.count() + " results in "
                + (System.currentTimeMillis() - beforeAggQuery) / 1000f + "s");
        progress.setProgressDescription(null);
    } else if (fNeedToFilterOnGenotypingData && tmpVarColl.count() > 0)
        LOG.debug(
                "Re-using " + tmpVarColl.count() + " results from count procedure's variant preliminary query");

    if (progress.hasAborted())
        return false;

    if (fNeedToFilterOnGenotypingData) { // now filter on genotyping data
        final ConcurrentLinkedQueue<Thread> queryThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>(),
                removalThreadsToWaitFor = new ConcurrentLinkedQueue<Thread>();
        final AtomicInteger finishedThreadCount = new AtomicInteger(0);
        final ConcurrentSkipListSet<Comparable> allVariantsThatPassRunFilter = new ConcurrentSkipListSet<Comparable>();

        GigwaSearchVariantsExportRequest gsvr = new GigwaSearchVariantsExportRequest();
        gsvr.setAlleleCount(alleleCount);
        if (minposition != null)
            gsvr.setStart(minposition);
        if (maxposition != null)
            gsvr.setEnd(maxposition);
        gsvr.setGeneName(geneName);
        gsvr.setReferenceName(selectedSequences);
        gsvr.setSelectedVariantTypes(selectedVariantTypes);
        gsvr.setVariantEffect(variantEffects);
        gsvr.setVariantSetId(sModule + ServiceInterface.ID_SEPARATOR + projId);

        gsvr.setMissingData(missingData);
        gsvr.setMinmaf(minmaf);
        gsvr.setMaxmaf(maxmaf);
        gsvr.setGtPattern(gtPattern);
        HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>();
        annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold);
        annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold);
        gsvr.setAnnotationFieldThresholds(annotationFieldThresholds);
        gsvr.setCallSetIds(selectedIndividuals == null || selectedIndividuals.length() == 0
                ? getIndividualsInDbOrder(sModule, projId)
                : Arrays.asList(selectedIndividuals.split(";")));

        final GenotypingDataQueryBuilder genotypingDataQueryBuilder = new GenotypingDataQueryBuilder(gsvr,
                tmpVarColl);
        genotypingDataQueryBuilder.keepTrackOfPreFilters(!variantQueryDBList.isEmpty());
        try {
            final int nChunkCount = genotypingDataQueryBuilder.getNumberOfQueries();
            if (nChunkCount != partialCountArray.length) {
                LOG.error("Different number of chunks between counting and listing variant rows!");
                progress.setError("Different number of chunks between counting and listing variant rows!");
                return false;
            }
            if (nChunkCount > 1)
                LOG.debug("Query split into " + nChunkCount);

            ArrayList<List<DBObject>> genotypingDataPipelines = new ArrayList();
            while (genotypingDataQueryBuilder.hasNext())
                genotypingDataPipelines.add(genotypingDataQueryBuilder.next());

            ArrayList<Integer> chunkIndices = new ArrayList<Integer>();
            for (int i = 0; i < genotypingDataPipelines.size(); i++)
                chunkIndices.add(i);
            Collections.shuffle(chunkIndices);

            for (int i = 0; i < chunkIndices.size(); i++) {
                final int chunkIndex = chunkIndices.get(i);
                final List<DBObject> genotypingDataPipeline = genotypingDataPipelines.get(chunkIndex);

                if (progress.hasAborted()) {
                    genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
                    return false;
                }

                Thread t = new Thread() {
                    public void run() {
                        Cursor genotypingDataCursor = mongoTemplate
                                .getCollection(
                                        MongoTemplateManager.getMongoCollectionName(VariantRunData.class))
                                .aggregate(genotypingDataPipeline,
                                        AggregationOptions.builder().allowDiskUse(true).build());
                        final ArrayList<Comparable> variantsThatPassedRunFilter = new ArrayList<Comparable>();
                        while (genotypingDataCursor.hasNext())
                            variantsThatPassedRunFilter
                                    .add((Comparable) genotypingDataCursor.next().get("_id"));

                        if (variantQueryDBList.isEmpty()) // otherwise we won't need it
                            allVariantsThatPassRunFilter.addAll(variantsThatPassedRunFilter);
                        else { // mark the results we want to keep
                            final List<Comparable> lastUsedPreFilter = genotypingDataQueryBuilder
                                    .getPreFilteredIDsForChunk(chunkIndex);

                            Thread removalThread = new Thread() {
                                public void run() {
                                    genotypingDataPipeline.clear(); // release memory (VERY IMPORTANT)

                                    long beforeTempCollUpdate = System.currentTimeMillis();
                                    if (variantsThatPassedRunFilter.size() == lastUsedPreFilter.size())
                                        return; // none to remove

                                    Collection<Comparable> filteredOutVariants = variantsThatPassedRunFilter
                                            .size() == 0 ? lastUsedPreFilter
                                                    : CollectionUtils.subtract(lastUsedPreFilter,
                                                            variantsThatPassedRunFilter);
                                    BasicDBObject removalQuery = GenotypingDataQueryBuilder
                                            .tryAndShrinkIdList("_id", filteredOutVariants, 4);
                                    WriteResult wr = tmpVarColl.remove(removalQuery);
                                    LOG.debug("Chunk N." + (chunkIndex) + ": " + wr.getN()
                                            + " filtered-out temp records removed in "
                                            + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d
                                            + "s");

                                    progress.setCurrentStepProgress(
                                            (short) (finishedThreadCount.incrementAndGet() * 100
                                                    / nChunkCount));
                                }
                            };
                            removalThreadsToWaitFor.add(removalThread);
                            removalThread.start();
                        }
                    }
                };

                if (i % NUMBER_OF_SIMULTANEOUS_QUERY_THREADS == (NUMBER_OF_SIMULTANEOUS_QUERY_THREADS - 1))
                    t.run(); // sometimes run synchronously so that all queries are not sent at the same time (also helps smooth progress display)
                else {
                    queryThreadsToWaitFor.add(t);
                    t.start(); // run asynchronously for better speed
                }
            }

            // wait for all threads before moving to next phase
            for (Thread t : queryThreadsToWaitFor)
                t.join();
            for (Thread t : removalThreadsToWaitFor)
                t.join();
        } catch (Exception e) {
            genotypingDataQueryBuilder.cleanup(); // otherwise a pending db-cursor will remain
            throw e;
        }

        if (progress.hasAborted())
            return false;

        progress.addStep("Updating temporary results");
        progress.moveToNextStep();
        final long beforeTempCollUpdate = System.currentTimeMillis();
        mongoTemplate.getDb().setWriteConcern(WriteConcern.ACKNOWLEDGED);
        if (variantQueryDBList.isEmpty()) { // we filtered on runs only: keep track of the final dataset
            List<BasicDBObject> pipeline = new ArrayList<>();
            pipeline.add(new BasicDBObject("$match",
                    GenotypingDataQueryBuilder.tryAndShrinkIdList("_id", allVariantsThatPassRunFilter, 4)));
            BasicDBObject projectObject = new BasicDBObject("_id", "$_id");
            projectObject.put(
                    VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE,
                    "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                            + ReferencePosition.FIELDNAME_SEQUENCE);
            projectObject.put(
                    VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE,
                    "$" + VariantData.FIELDNAME_REFERENCE_POSITION + "."
                            + ReferencePosition.FIELDNAME_START_SITE);
            projectObject.put(VariantData.FIELDNAME_TYPE, "$" + VariantData.FIELDNAME_TYPE);
            projectObject.put(VariantData.FIELDNAME_KNOWN_ALLELE_LIST,
                    "$" + VariantData.FIELDNAME_KNOWN_ALLELE_LIST);
            projectObject.put(VariantData.FIELDNAME_VERSION, "$" + VariantData.FIELDNAME_VERSION);
            pipeline.add(new BasicDBObject("$project", projectObject));
            pipeline.add(new BasicDBObject("$out", tmpVarColl.getName()));
            variantColl.aggregate(pipeline);
            LOG.debug(tmpVarColl.count() + " temp records created in "
                    + (System.currentTimeMillis() - beforeTempCollUpdate) / 1000d + "s");
        }
    }

    progress.markAsComplete();
    LOG.info("findVariants took " + (System.currentTimeMillis() - before) / 1000d + "s");
    return true;
}

From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java

License:Open Source License

/**
 * List variants.//from  ww  w.j  a  v a 2  s .c  o  m
 *
 * @param request the request
 * @param sModule the module
 * @param projId the proj id
 * @param selectedVariantTypes the selected variant types
 * @param selectedSequences the selected sequences
 * @param selectedIndividuals the selected individuals
 * @param gtPattern the gt code
 * @param genotypeQualityThreshold the genotype quality threshold
 * @param readDepthThreshold the read depth threshold
 * @param missingData the missing data
 * @param minmaf the minmaf
 * @param maxmaf the maxmaf
 * @param minposition the minposition
 * @param maxposition the maxposition
 * @param alleleCount the allele count
 * @param geneName the gene name
 * @param variantEffects the variant effects
 * @param wantedFields the wanted fields
 * @param page the page
 * @param size the size
 * @param sortBy the sort by
 * @param sortDir the sort dir
 * @param processID the process id
 * @return the array list
 * @throws Exception the exception
 */
@RequestMapping(variantListURL)
/**
 *  This method returns a list of variants from the current selection
 */
protected @ResponseBody ArrayList<Comparable[]> listVariants(HttpServletRequest request,
        @RequestParam("module") String sModule, @RequestParam("project") int projId,
        @RequestParam("variantTypes") String selectedVariantTypes,
        @RequestParam("sequences") String selectedSequences,
        @RequestParam("individuals") String selectedIndividuals, @RequestParam("gtPattern") String gtPattern,
        @RequestParam("genotypeQualityThreshold") int genotypeQualityThreshold,
        @RequestParam("readDepthThreshold") int readDepthThreshold,
        @RequestParam("missingData") double missingData, @RequestParam("minmaf") Float minmaf,
        @RequestParam("maxmaf") Float maxmaf, @RequestParam("minposition") Long minposition,
        @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount,
        @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects,
        @RequestParam("wantedFields") String wantedFields, @RequestParam("page") int page,
        @RequestParam("size") int size, @RequestParam("sortBy") String sortBy,
        @RequestParam("sortDir") String sortDir, @RequestParam("processID") String processID) throws Exception {
    String[] usedFields = wantedFields.split(";");

    String token = processID.substring(1 + processID.indexOf('|'));
    String queryKey = getQueryKey(request, sModule, projId, selectedVariantTypes, selectedSequences,
            selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf,
            maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects);
    MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);
    DBCollection cachedCountcollection = mongoTemplate.getCollection(MgdbDao.COLLECTION_NAME_CACHED_COUNTS);
    //      cachedCountcollection.drop();
    DBCursor countCursor = cachedCountcollection.find(new BasicDBObject("_id", queryKey));
    Object[] partialCountArray = !countCursor.hasNext() ? null
            : ((BasicDBList) countCursor.next().get(MgdbDao.FIELD_NAME_CACHED_COUNT_VALUE)).toArray();

    HashMap<Integer, String> variantFieldMap = new HashMap<Integer, String>(),
            runDataFieldMap = new HashMap<Integer, String>();
    for (int i = 0; i < usedFields.length; i++)
        if (usedFields[i].startsWith("#"))
            variantFieldMap.put(i, usedFields[i].substring(1));
        else
            runDataFieldMap.put(i, usedFields[i]);

    long expectedCount = 0;
    for (Object aPartialCount : partialCountArray)
        expectedCount += (Long) aPartialCount;
    DBCollection tmpVarCollOrView = getTemporaryVariantCollection(sModule, token, false);
    boolean fGotTempData = tmpVarCollOrView.findOne() != null;

    ArrayList<Comparable[]> result = new ArrayList<Comparable[]>();
    DBCollection variantColl = mongoTemplate.getCollection(mongoTemplate.getCollectionName(VariantData.class));
    if (fGotTempData || expectedCount == variantColl.count()) // otherwise we return an empty list because there seems to be a problem (missing temp records)
    {
        boolean fProjectHasAnnotations = getProjectEffectAnnotations(sModule, projId).size() > 0;

        DBCollection varCollForBuildingRows = fGotTempData ? tmpVarCollOrView : variantColl;
        DBCursor variantsInFilterCursor = varCollForBuildingRows.find();

        ArrayList<Object[]> variantRows = buildVariantRows(mongoTemplate, variantsInFilterCursor, sortBy,
                sortDir, page, size, variantFieldMap, runDataFieldMap);
        for (Object[] aRow : variantRows) {
            List<Comparable> anOutputRow = new ArrayList<Comparable>();
            for (int i = 0; i < aRow.length; i++) {
                String val = null;
                if (!usedFields[i].startsWith(VariantRunData.SECTION_ADDITIONAL_INFO + "."))
                    val = aRow[i] == null ? "" : aRow[i].toString();
                else if (aRow[i] != null && fProjectHasAnnotations)
                    val = aRow[i].toString().replaceAll("[\\[\\]\"]", ""); // it's an annotation field: make its content look cleaner
                if (val != null)
                    anOutputRow.add(val);
            }
            anOutputRow.add(anOutputRow.get(0)); // for details link
            result.add(anOutputRow.toArray(new Comparable[anOutputRow.size()]));
        }
    }

    if (fGotTempData && page == 0 && tmpVarCollOrView.getIndexInfo().size() <= 1)
        new Thread() { // temp data needs to be indexed for faster browsing
            public void run() {
                long b4 = System.currentTimeMillis();
                tmpVarCollOrView.createIndex(VariantData.FIELDNAME_VERSION);
                tmpVarCollOrView.createIndex(
                        VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE);
                tmpVarCollOrView.createIndex(VariantData.FIELDNAME_REFERENCE_POSITION + "."
                        + ReferencePosition.FIELDNAME_START_SITE);
                tmpVarCollOrView.createIndex(VariantData.FIELDNAME_TYPE);
                LOG.debug("Indexing " + tmpVarCollOrView.count() + " temp variants took "
                        + (System.currentTimeMillis() - b4) / 1000f + "s");
            }
        }.start();
    return result;
}

From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java

License:Open Source License

/**
 * Distinct sequences in selection.//from   ww  w .j av  a 2  s. c o m
 *
 * @param request the request
 * @param sModule the module
 * @param projId the proj id
 * @param processID the process id
 * @return the collection
 * @throws Exception the exception
 */
@RequestMapping(distinctSequencesInSelectionURL)
protected @ResponseBody Collection<String> distinctSequencesInSelection(HttpServletRequest request,
        @RequestParam("module") String sModule, @RequestParam("project") int projId,
        @RequestParam("processID") String processID) throws Exception {
    processID = URLDecoder.decode(processID, "UTF-8");
    String token = processID.substring(1 + processID.indexOf('|'));
    DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, token, false);
    if (tmpVarColl.count() == 0)
        return listSequences(request, sModule, projId); // working on full dataset

    List<String> distinctSequences = tmpVarColl
            .distinct(VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE);
    TreeSet<String> sortedResult = new TreeSet<String>(new AlphaNumericComparator());
    sortedResult.addAll(distinctSequences);
    return sortedResult;
}

From source file:javasensei.db.managments.RankingManager.java

protected String getRecommenders(DBCollection collection, AbstractRecommender recommender, int cantidad,
        boolean random) {
    String result = "{}";

    List<RecommendedItem> recommenders = new ArrayList<>();
    try {/*w ww  . j  a va  2  s . co m*/
        List<Long> array = new ArrayList();

        try {
            recommenders = getRecommendersItems(recommender, cantidad);//recommenderEjercicios.recommend(estudiante.getId(), cantidad); //5 Recomendaciones
        } catch (Exception ex) {
            System.out.println("El usuario no existe aun en el modelo de datos: " + estudiante.getId());
        }
        //Se agrega un item aleatorio...
        if (random && recommenders.size() < 1) { //RandomRecommender no funciona....

            double number = collection.count();

            DBObject object = collection.find().limit(1).skip((int) Math.floor(Math.random() * number)).next();

            array.add(Math.round(Double.parseDouble(object.get("idEjercicio").toString())));
        }

        //Las recomendaciones se transforman en json array
        for (RecommendedItem item : recommenders) {
            array.add(item.getItemID());
        }

        //Se crea un json array con los id obtenidos de los ejercicios
        result = ejercicios.find(QueryBuilder.start("id").in(array).get()).toArray().toString();
    } catch (Exception ex) {
        JavaException.printMessage(ex, System.out);
    }

    return result;
}

From source file:me.yyam.mongodbutils.MongoDbOperater.java

public long count(String dbname, String collName, Map queryMap) {
    DB db = mongoClient.getDB(dbname);//from w  ww .j  a v  a  2  s .  c o  m
    DBCollection coll = db.getCollection(collName);
    long count;
    if (queryMap == null) {
        count = coll.count();
    } else {
        BasicDBObject query = new BasicDBObject(queryMap);
        count = coll.count(query);
    }
    return count;
}

From source file:MongoDb.Main.java

public static void insertCategoryAndSubCategory() throws UnknownHostException, JSONException {

    long startTime = System.nanoTime();

    //Init connect mongodb
    MongoClient mongoClient = new MongoClient(MongoDb.Config.HOST, MongoDb.Config.defaultPort);
    DB db = mongoClient.getDB(MongoDb.Config.Schema);
    String colletioName = "Category";
    DBCollection collections = db.getCollection(colletioName);

    System.out.println(collections.count());
    DBCollection subCategoryCollection = db.getCollection("subCategory");

    List categoryList = Parser.getJSONCategory();
    for (int i = 0; i < categoryList.size(); i++) {

        //Insert Category
        JSONObject jsonCategoty = (JSONObject) categoryList.get(i);
        String categoryName = jsonCategoty.getString("categoryName");
        JSONObject jsonOfCatrgory = new JSONObject();
        jsonOfCatrgory.put("categoryName", categoryName);
        String _idCategory = MongoDBController.addDocumentFromJsonToJsonObject(jsonOfCatrgory, collections);
        System.out.println(_idCategory);
        if (_idCategory.length() > 5) {
            System.out.println("? insert category");
            JSONArray subCategoryArray = jsonCategoty.getJSONArray("subList");

            for (int j = 0; j < subCategoryArray.length(); j++) {
                JSONObject jsonSubCategory = subCategoryArray.getJSONObject(j);
                jsonSubCategory.put("categoryId", _idCategory);
                String _idSubCategory = MongoDBController.addDocumentFromJsonToJsonObject(jsonSubCategory,
                        subCategoryCollection);
                if (_idSubCategory.length() > 5) {
                    System.out.println("? insert subcategory");
                }/*from  ww w.  java2s .  com*/
            }
        }
    }

}

From source file:mx.org.cedn.avisosconagua.mongo.MongoInterface.java

License:Open Source License

/**
 * Gets the count of all generated advices in the Mongo database.
 * @return Count of generated advices.// w  w  w.  j  a  v a 2s  . c  o m
 */
public long countPublishedAdvices() {
    DBCollection col = mongoDB.getCollection(GENERATED_COL);
    return col.count();
}

From source file:mypackage.CollInformation.java

protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    try (PrintWriter out = response.getWriter()) {
        /* TODO output your page here. You may use following sample code. */
        try {//from  w  w  w  .java 2s.  c  o  m
            String getDBName = request.getParameter("dbname");
            String getCollName = request.getParameter("collname");
            MongoClient mongoClient = new MongoClient("localhost", 27017);
            DB mongoDatabase = mongoClient.getDB(getDBName);
            DBCollection coll = mongoDatabase.getCollection(getCollName);
            long totalDoc = coll.count();
            JSONObject jSONObject = new JSONObject();
            JSONArray jSONArray = new JSONArray();

            DBCursor cursor = coll.find();
            int i = 0;
            while (cursor.hasNext()) {
                jSONArray.put(cursor.next());
                i++;
            }
            jSONObject.put("db", jSONArray);
            jSONObject.put("counter", i);
            jSONObject.put("totalDoc", totalDoc);
            out.println(jSONObject);

        } catch (Exception e) {

        }
    }
}