Example usage for com.mongodb Bytes QUERYOPTION_NOTIMEOUT

List of usage examples for com.mongodb Bytes QUERYOPTION_NOTIMEOUT

Introduction

In this page you can find the example usage for com.mongodb Bytes QUERYOPTION_NOTIMEOUT.

Prototype

int QUERYOPTION_NOTIMEOUT

To view the source code for com.mongodb Bytes QUERYOPTION_NOTIMEOUT.

Click Source Link

Document

The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use.

Usage

From source file:essex.bigessexnew.OplogListener.java

public void listen(boolean processHistory, String... logFields) {
    BasicDBObject query = prepareOplogListenQuery(processHistory);
    BasicDBObject fields = prepareOplogListenFields(logFields);

    DBCursor cur = collection.find(query, fields).sort((BasicDBObjectBuilder.start("$natural", 1)).get())
            .addOption(Bytes.QUERYOPTION_TAILABLE | Bytes.QUERYOPTION_AWAITDATA | Bytes.QUERYOPTION_NOTIMEOUT);

    performListenTask(cur);/*from   w  w  w  .jav a2 s.  com*/
}

From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java

License:Open Source License

/**
 * Export variants.//from   w w  w.  j a va  2  s.c  o m
 *
 * @param request the request
 * @param response the response
 * @param sModule the module
 * @param fKeepExportOnServer whether or not to keep export on server
 * @param sExportFormat the export format
 * @param exportID the export id
 * @param projId the proj id
 * @param selectedVariantTypes the selected variant types
 * @param selectedSequences the selected sequences
 * @param selectedIndividuals the selected individuals
 * @param gtPattern the gt code
 * @param genotypeQualityThreshold the genotype quality threshold
 * @param readDepthThreshold the read depth threshold
 * @param missingData the missing data
 * @param minmaf the minmaf
 * @param maxmaf the maxmaf
 * @param minposition the minposition
 * @param maxposition the maxposition
 * @param alleleCount the allele count
 * @param geneName the gene name
 * @param variantEffects the variant effects
 * @throws Exception the exception
 */
@RequestMapping(variantExportDataURL)
protected void exportVariants(HttpServletRequest request, HttpServletResponse response,
        @RequestParam("module") String sModule, @RequestParam("keepExportOnServer") boolean fKeepExportOnServer,
        @RequestParam("exportFormat") String sExportFormat, @RequestParam("exportID") String exportID,
        @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes,
        @RequestParam("sequences") String selectedSequences,
        @RequestParam(value = "individuals", required = false) String selectedIndividuals,
        @RequestParam("gtPattern") String gtPattern,
        @RequestParam("genotypeQualityThreshold") int genotypeQualityThreshold,
        @RequestParam("readDepthThreshold") int readDepthThreshold,
        @RequestParam("missingData") double missingData,
        @RequestParam(value = "minmaf", required = false) Float minmaf,
        @RequestParam(value = "maxmaf", required = false) Float maxmaf,
        @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition,
        @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName,
        @RequestParam("variantEffects") String variantEffects) throws Exception {
    //      exportID = URLDecoder.decode(exportID, "UTF-8");
    String token = exportID.substring(1 + exportID.indexOf('|'));

    ProgressIndicator progress = ProgressIndicator.get(token);
    if (progress == null) {
        progress = new ProgressIndicator(token, new String[] { "Identifying matching variants" });
        ProgressIndicator.registerProgressIndicator(progress);
    }

    long before = System.currentTimeMillis();
    final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule);

    List<String> selectedIndividualList = selectedIndividuals.length() == 0
            ? getIndividualsInDbOrder(sModule, projId)
            /* no selection means all selected */ : Arrays.asList(selectedIndividuals.split(";"));

    long count = countVariants(request, sModule, projId, selectedVariantTypes, selectedSequences,
            selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf,
            maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects,
            "" /* if we pass exportID then the progress indicator is going to be replaced by another, and we don't need it for counting since we cache count values */);
    DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, token, false);
    long nTempVarCount = mongoTemplate.count(new Query(), tmpVarColl.getName());
    boolean fWorkingOnFullDataset = mongoTemplate.count(null, VariantData.class) == count;
    if (!fWorkingOnFullDataset && nTempVarCount == 0) {
        progress.setError(MESSAGE_TEMP_RECORDS_NOT_FOUND);
        return;
    }

    // use a cursor to avoid using too much memory
    DBObject query = count == nTempVarCount ? null
            : new BasicDBObject(VariantData.FIELDNAME_VERSION, new BasicDBObject("$exists", true));
    String sequenceField = VariantData.FIELDNAME_REFERENCE_POSITION + "."
            + ReferencePosition.FIELDNAME_SEQUENCE;
    String startField = VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE;
    BasicDBObject sort = new BasicDBObject("_id",
            1); /* necessary for MgdbDao.getSampleGenotypes to work properly */
    DBObject projection = new BasicDBObject();
    projection.put(sequenceField, 1);
    projection.put(startField, 1);

    DBCursor markerCursor = mongoTemplate.getCollection(
            !fWorkingOnFullDataset ? tmpVarColl.getName() : mongoTemplate.getCollectionName(VariantData.class))
            .find(query, projection).sort(sort);
    markerCursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);

    try {
        AbstractIndividualOrientedExportHandler individualOrientedExportHandler = AbstractIndividualOrientedExportHandler
                .getIndividualOrientedExportHandlers().get(sExportFormat);
        AbstractMarkerOrientedExportHandler markerOrientedExportHandler = AbstractMarkerOrientedExportHandler
                .getMarkerOrientedExportHandlers().get(sExportFormat);

        GenotypingProject project = mongoTemplate.findById(projId, GenotypingProject.class);
        String filename = sModule + "_" + project.getName() + "_"
                + new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + "_" + count + "variants_"
                + sExportFormat + "."
                + (individualOrientedExportHandler != null ? individualOrientedExportHandler
                        : markerOrientedExportHandler).getExportFileExtension();
        OutputStream os;
        LOG.info((fKeepExportOnServer ? "On-server" : "Direct-download") + " export requested: " + token);
        if (fKeepExportOnServer) {
            String relativeOutputFolder = File.separator + FRONTEND_URL + File.separator + TMP_OUTPUT_FOLDER
                    + File.separator + token.replaceAll("\\|", "_") + File.separator;
            File outputLocation = new File(
                    request.getSession().getServletContext().getRealPath(relativeOutputFolder));
            if (!outputLocation.exists() && !outputLocation.mkdirs())
                throw new Exception("Unable to create folder: " + outputLocation);
            os = new FileOutputStream(new File(outputLocation.getAbsolutePath() + File.separator + filename));
            response.setContentType("text/plain");
        } else {
            os = response.getOutputStream();
            response.setContentType("application/zip");
            response.setHeader("Content-disposition", "inline; filename=" + filename);
        }

        ArrayList<SampleId> sampleIDs = new ArrayList<SampleId>();
        for (String individual : selectedIndividualList)
            for (Integer individualSampleIndex : project.getIndividualSampleIndexes(individual))
                sampleIDs.add(new SampleId(projId, individualSampleIndex));

        if (fKeepExportOnServer) {
            String relativeOutputFolder = FRONTEND_URL + File.separator + TMP_OUTPUT_FOLDER + File.separator
                    + token.replaceAll("\\|", "_") + File.separator;
            String relativeOutputFolderUrl = request.getContextPath() + "/"
                    + relativeOutputFolder.replace(File.separator, "/");
            String exportURL = relativeOutputFolderUrl + filename;
            LOG.debug("On-server export file for export " + token + ": " + exportURL);
            response.getWriter().write(exportURL);
            response.flushBuffer();
        }
        //         else
        //         {
        //            // The two next lines are an ugly hack that makes the client believe transfer has started. Otherwise we may end-up with a client-side timeout (search for network.http.response.timeout for more details)
        //            response.getOutputStream().print(" ");
        //            response.getOutputStream().flush();
        //         }

        HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>();
        annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold);
        annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold);

        if (individualOrientedExportHandler != null) {
            progress.addStep("Reading and re-organizing genotypes"); // initial step will consist in organizing genotypes by individual rather than by marker
            progress.moveToNextStep(); // done with identifying variants
            TreeMap<String, File> exportFiles = individualOrientedExportHandler.createExportFiles(sModule,
                    markerCursor.copy(), sampleIDs, new ArrayList<SampleId>(), token, annotationFieldThresholds,
                    new HashMap<String, Integer>(), project.getSampleIdToIndividualMap(selectedIndividualList),
                    progress);
            if (!progress.hasAborted()) {
                for (String step : individualOrientedExportHandler.getStepList())
                    progress.addStep(step);
                progress.moveToNextStep();
                individualOrientedExportHandler.exportData(os, sModule, exportFiles.values(), true, progress,
                        markerCursor, null, null);
            }
        } else if (markerOrientedExportHandler != null) {
            for (String step : markerOrientedExportHandler.getStepList())
                progress.addStep(step);
            progress.moveToNextStep(); // done with identifying variants

            markerOrientedExportHandler.exportData(os, sModule, sampleIDs, new ArrayList<SampleId>(), progress,
                    markerCursor, null, annotationFieldThresholds, new HashMap<String, Integer>(),
                    project.getSampleIdToIndividualMap(selectedIndividualList), null);
            LOG.debug("done with exportData");
        } else
            throw new Exception("No export handler found for format " + sExportFormat);

        if (!progress.hasAborted()) {
            LOG.info("doVariantExport took " + (System.currentTimeMillis() - before) / 1000d + "s to process "
                    + count + " variants and " + selectedIndividualList.size() + " individuals");
            progress.markAsComplete();
        }
    } catch (Throwable t) {
        LOG.error("Error exporting data", t);
        progress.setError("Error exporting data: " + t.getClass().getSimpleName()
                + (t.getMessage() != null ? " - " + t.getMessage() : ""));
        return;
    } finally {
        markerCursor.close();
    }
}

From source file:org.apache.gora.mongodb.store.MongoStore.java

License:Apache License

/**
 * Execute the query and return the result.
 *///from   w w  w .  j a v a2  s  . c om
@Override
public Result<K, T> execute(final Query<K, T> query) {

    String[] fields = getFieldsToQuery(query.getFields());
    // Build the actual MongoDB query
    DBObject q = MongoDBQuery.toDBQuery(query);
    DBObject p = MongoDBQuery.toProjection(fields, mapping);

    if (query.getFilter() != null) {
        boolean succeeded = filterUtil.setFilter(q, query.getFilter(), this);
        if (succeeded) {
            // don't need local filter
            query.setLocalFilterEnabled(false);
        }
    }

    // Execute the query on the collection
    DBCursor cursor = mongoClientColl.find(q, p);
    if (query.getLimit() > 0)
        cursor = cursor.limit((int) query.getLimit());
    cursor.batchSize(100);
    cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);

    // Build the result
    MongoDBResult<K, T> mongoResult = new MongoDBResult<K, T>(this, query);
    mongoResult.setCursor(cursor);

    return mongoResult;
}

From source file:org.apache.hadoop.contrib.mongoreduce.MongoRecordReader.java

License:Apache License

private void connect(String location, Configuration conf) throws IOException {

    String[] parts = location.split(":");

    // default port for sharded server
    int port = 27018;
    if (parts.length > 1)
        port = Integer.parseInt(parts[1]);

    Mongo mongo = new Mongo(parts[0], port);

    // figure out if we can read from this server

    // allow reading from secondaries
    mongo.slaveOk();/*from w w  w  .  j  a  v a 2s .  c om*/

    String database = conf.get("mongo.input.database");
    String collection = conf.get("mongo.input.collection");
    String query = conf.get("mongo.input.query", "");
    String select = conf.get("mongo.input.select", "");

    if (!query.equals("")) {
        DBObject q = (DBObject) JSON.parse(query);

        if (!select.equals("")) {
            DBObject s = (DBObject) JSON.parse(select);
            cursor = mongo.getDB(database).getCollection(collection).find(q, s);
        } else {
            cursor = mongo.getDB(database).getCollection(collection).find(q);
        }
    } else {
        if (!select.equals("")) {
            DBObject s = (DBObject) JSON.parse(select);
            cursor = mongo.getDB(database).getCollection(collection).find(new BasicDBObject(), s);
        } else {
            cursor = mongo.getDB(database).getCollection(collection).find();
        }
    }

    cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);

    // thanks mongo, for this handy method
    totalResults = cursor.count();
    resultsRead = 0.0f;

}

From source file:org.axonframework.eventstore.mongo.MongoEventStore.java

License:Apache License

@Override
public void visitEvents(Criteria criteria, EventVisitor visitor) {
    DBCursor cursor = storageStrategy.findEvents(mongoTemplate.domainEventCollection(),
            (MongoCriteria) criteria);//  www.  ja v a 2 s .  c o  m
    cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    CursorBackedDomainEventStream events = new CursorBackedDomainEventStream(cursor, null, null, true);
    try {
        while (events.hasNext()) {
            visitor.doWithEvent(events.next());
        }
    } finally {
        events.close();
    }
}

From source file:org.eclipse.birt.data.oda.mongodb.internal.impl.MDbOperation.java

License:Open Source License

/**
 * Applies data set query properties and hints on DBCursor, except
 * for cursor limit.//from   w w  w . jav  a2s  .  co m
 * @see #applyPropertiesToCursor(DBCursor,QueryProperties,boolean,boolean)
 */
static void applyPropertiesToCursor(DBCursor rowsCursor, QueryProperties queryProps, boolean includeSortExpr) {
    if (includeSortExpr) // normally done only when executing a query to get full result set
    {
        DBObject sortExprObj = null;
        try {
            sortExprObj = queryProps.getSortExprAsParsedObject();
        } catch (OdaException ex) {
            // log warning and ignore
            DriverUtil.getLogger().log(Level.WARNING,
                    Messages.bind("Unable to parse the user-defined Sort Expression: {0}", //$NON-NLS-1$
                            queryProps.getSortExpr()), ex);
        }

        if (sortExprObj != null)
            rowsCursor.sort(sortExprObj);
    }

    ReadPreference readPref = queryProps.getTaggableReadPreference();
    if (readPref != null)
        rowsCursor.setReadPreference(readPref);

    if (queryProps.getBatchSize() > 0)
        rowsCursor.batchSize(queryProps.getBatchSize());

    if (queryProps.getNumDocsToSkip() > 0)
        rowsCursor.skip(queryProps.getNumDocsToSkip());

    DBObject hintObj = queryProps.getIndexHintsAsParsedObject();
    if (hintObj != null)
        rowsCursor.hint(hintObj);
    else // try to pass the hint string value as is
    {
        String hintValue = queryProps.getIndexHints();
        if (!hintValue.isEmpty())
            rowsCursor.hint(hintValue);
    }

    if (queryProps.hasNoTimeOut())
        rowsCursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    if (queryProps.isPartialResultsOk())
        rowsCursor.addOption(Bytes.QUERYOPTION_PARTIAL);
}

From source file:org.geotools.data.mongodb.MongoLayer.java

License:LGPL

/**
 * Get mapping of field names and types, and counts for different types
 * /* w w  w.j  ava 2  s  .c  o m*/
 * @param collection where metadata from map-reduce job stored, in format: { "_id" : {
 *            "fieldname" : "geometry.type", "type" : "Point"}, "value" : 2 } { "_id" : {
 *            "fieldname" : "properties.ActivityDescription", "type" : "number" }, "value" : 1 }
 *            { "_id" : { "fieldname" : "properties.ActivityDescription", "type" : "string" },
 *            "value" : 3 } where value is number of occurrences for given type
 * @return mapping of field names to ClassCount holding type and count info
 */
private HashMap<String, ClassCount> getFieldMap(DBCollection metaResultsColl) {
    // cursor over collection
    BasicDBObject query = new BasicDBObject();
    DBCursor cursor = metaResultsColl.find(query);
    // avoid cursor timeout
    cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    // map to store fieldname and ClasCount object holding type and type-count info
    HashMap<String, ClassCount> fieldMap = new HashMap<String, ClassCount>();

    try {
        // iterate over each record
        while (cursor.hasNext()) {
            // check type found for current field
            DBObject currRec = cursor.next();
            DBObject currField = (DBObject) currRec.get("_id");
            String fieldName = (String) currField.get("fieldname");
            String fieldType = (String) currField.get("type");
            int typeCount = ((Double) currRec.get("value")).intValue();
            // if first occurrence of field name instantiate counter
            if (!fieldMap.containsKey(fieldName)) {
                fieldMap.put(fieldName, new ClassCount(fieldType, typeCount));
            }
            // else increment count for given type
            else {
                ClassCount currCount = fieldMap.get(fieldName);
                currCount.add(fieldType, typeCount);
                fieldMap.put(fieldName, currCount);
            }
        }
    } finally {
        // need to explicitly release cursor since notimeout option set
        cursor.close();
    }

    return fieldMap;
}

From source file:org.ossmeter.platform.mining.msr14.Extractor.java

License:Open Source License

public static void main(String[] args) throws Exception {
    long start = System.currentTimeMillis();

    Mongo msrMongo = new Mongo(new ServerAddress("localhost", 1234)); // GitHub challenge data
    Mongo bioMongo = new Mongo(new ServerAddress("localhost", 12345));// Extracted data

    // Create indexes
    Biodiversity bio = new Biodiversity(bioMongo.getDB("biodiversity"));
    bio.setClearPongoCacheOnSync(true);//  w w  w  .  j a  v a  2 s. c  o m
    bioMongo.getDB("biodiversity").getCollection("users").ensureIndex(new BasicDBObject("login", 1));

    BasicDBObject index = new BasicDBObject();
    index.put("name", 1);
    index.put("ownerName", 1);
    bioMongo.getDB("biodiversity").getCollection("projects").ensureIndex(index);

    index = new BasicDBObject();
    index.put("projectName", 1);
    index.put("projectOwner", 1);
    bioMongo.getDB("biodiversity").getCollection("projectMemberships").ensureIndex(index);

    index = new BasicDBObject();
    index.put("projectName", 1);
    index.put("userName", 1);
    bioMongo.getDB("biodiversity").getCollection("projectMemberships").ensureIndex(index);

    bioMongo.getDB("biodiversity").getCollection("projectMemberships")
            .ensureIndex(new BasicDBObject("userName", 1));

    DB msrDb = msrMongo.getDB("msr14");

    //      #1 User extraction
    System.out.println("Extracting users...");
    DBCursor cursor = msrDb.getCollection("users").find();
    cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    Iterator<DBObject> it = cursor.iterator();

    int count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //         
    //         User user = new User();
    //         user.setGhId(obj.getString("id"));
    //         user.setLogin(obj.getString("login"));
    //         user.setLocation(obj.getString("location"));
    //         user.setPublicRepos(obj.getInt("public_repos", 0));
    //         user.setJoinedDate(obj.getString("created_at"));
    //         user.setFollowerCount(obj.getInt("followers", 0));
    //         user.setFollowingCount(obj.getInt("following", 0));
    //         user.setPublicGists(obj.getInt("public_gists", 0));
    //         
    //         bio.getUsers().add(user);
    //         
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();
    //      
    ////      #1.2 Project extraction
    //      System.out.println("Extracting projects...");
    //      cursor = msrDb.getCollection("repos").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //         
    //         Project project = new Project();
    //         project.setName(obj.getString("name"));
    //         project.setGhId(obj.getString("id"));
    //         project.setCreatedAt(obj.getString("created_at"));
    //         project.setSize(obj.getInt("size", 0));
    //         project.setWatchersCount(obj.getInt("watchers",0));
    //         project.setWatchersCount2(obj.getInt("watchers_count",0));
    //         project.setLanguage(obj.getString("language"));
    //         project.setForks(obj.getInt("forks", 0));
    //         project.setForksCount(obj.getInt("forks_count", 0));
    //         project.setOpenIssues(obj.getInt("open_issues",0));
    //         project.setOpenIssuesCount(obj.getInt("open_issues_count",0));
    //         project.setOpenIssues(obj.getInt("open_issues",0));
    //         project.setNetworkCount(obj.getInt("network_count", 0));
    //         
    //         BasicDBObject ownerObj = (BasicDBObject) obj.get("owner");
    //         User owner = null;
    //         if (ownerObj != null) {
    //            owner = bio.getUsers().findOne(User.LOGIN.eq(ownerObj.getString("login")));
    //            if (owner !=null) {
    //               project.setOwner(owner);
    //               project.setOwnerName(owner.getLogin());
    //            }
    //         }
    //         bio.getProjects().add(project);
    //         
    //         if (owner != null) { // This comes here as to reference the project, we need to have added to the project list first
    //            ProjectMembership pm = getProjectMembership(bio, owner, project);
    //            pm.setOwner(true);
    //         }
    //         
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();
    //      
    //      System.exit(0);

    ////      #2 Follower/following extraction
    //      System.out.println("Extracting followers...");
    //      cursor = msrDb.getCollection("followers").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //         
    //         String followerLogin = obj.getString("login");
    //         String followedLogin = obj.getString("follows");
    //         
    //         User follower = bio.getUsers().findOne(User.LOGIN.eq(followerLogin));
    //         User followed = bio.getUsers().findOne(User.LOGIN.eq(followedLogin));
    //         
    //         if (follower != null && followed != null) {
    //            follower.getFollowing().add(followed);
    //            followed.getFollowers().add(follower);
    //         } else{
    ////            System.err.println("Follower or followed is null. Follower: " +follower + ". followed: " + followed);
    //         }
    //         if (follower != null) follower.setFollowingCount(follower.getFollowingCount()+1);
    //         if (followed != null) followed.setFollowerCount(followed.getFollowerCount()+1);
    //         
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();
    //      
    //      System.exit(0);

    System.out.println("Clearing ProjectMembership commit data");

    for (ProjectMembership pm : bio.getProjectMemberships()) {
        pm.setCommitCount(0);
        pm.setCommitTotalChanges(0);
        pm.setCommitAdditions(0);
        pm.setCommitDeletions(0);
        pm.setCommitsAsAuthor(0);
        pm.setCommitsAsCommitter(0);
        pm.setCommitTotalFiles(0);
        pm.setAverageFilesPerCommit(0);
        pm.getCommitTimes().clear();
    }
    bio.sync();
    System.out.println("cleared.");

    //      #3 Commits
    System.out.println("Extracting commits...");
    cursor = msrDb.getCollection("commits").find();
    cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    it = cursor.iterator();

    count = 0;
    while (it.hasNext()) {
        BasicDBObject obj = (BasicDBObject) it.next();

        // Author and committer
        BasicDBObject commitAuthor = (BasicDBObject) obj.get("author");
        BasicDBObject commitCommitter = (BasicDBObject) obj.get("committer");

        String authorLogin = "";
        if (commitAuthor != null)
            authorLogin = commitAuthor.getString("login");
        String committerLogin = "";
        if (commitCommitter != null)
            committerLogin = commitCommitter.getString("login");

        // Stats
        BasicDBObject stats = (BasicDBObject) obj.get("stats");
        if (stats == null)
            stats = new BasicDBObject(); // Create a new one so we can get zeroed values
        int total = stats.getInt("total", 0);
        int additions = stats.getInt("additions", 0);
        int deletions = stats.getInt("deletions", 0);

        String commitDate = ((BasicDBObject) ((BasicDBObject) obj.get("commit")).get("author"))
                .getString("date");

        BasicDBList files = (BasicDBList) obj.get("files");
        String[] url = convertUrlIntoProjectNameAndOwner(obj.getString("url"));

        ProjectMembership authorPm = null;
        ProjectMembership committerPm = null;

        if (authorLogin != null) {
            authorPm = getProjectMembership(bio, authorLogin, url[1], url[0]);
            authorPm.setCommitCount(authorPm.getCommitCount() + 1);
            authorPm.setCommitTotalChanges(authorPm.getCommitTotalChanges() + total);
            authorPm.setCommitAdditions(authorPm.getCommitAdditions() + additions);
            authorPm.setCommitDeletions(authorPm.getCommitDeletions() + deletions);
            authorPm.setCommitsAsAuthor(authorPm.getCommitsAsAuthor() + 1);
            if (files != null)
                authorPm.setCommitTotalFiles(authorPm.getCommitTotalChanges() + files.size());
            authorPm.setAverageFilesPerCommit(authorPm.getCommitTotalFiles() / authorPm.getCommitCount());
            authorPm.getCommitTimes().add(commitDate);
        }

        if (authorLogin != null && !authorLogin.equals(committerLogin)) {
            committerPm = getProjectMembership(bio, committerLogin, url[1], url[0]);

            committerPm.setCommitCount(committerPm.getCommitCount() + 1);
            //            committerPm.setCommitTotalChanges(committerPm.getCommitTotalChanges()+total);
            //            committerPm.setCommitAdditions(committerPm.getCommitAdditions()+additions);
            //            committerPm.setCommitDeletions(committerPm.getCommitDeletions()+deletions);
            committerPm.setCommitsAsCommitter(committerPm.getCommitsAsCommitter() + 1);
            committerPm.setCommitTotalFiles(committerPm.getCommitTotalChanges() + files.size());
            committerPm.setAverageFilesPerCommit(committerPm.getCommitTotalFiles() / authorPm.getCommitCount());
            if (files != null)
                committerPm.setCommitTotalFiles(committerPm.getCommitTotalChanges() + files.size());
            committerPm
                    .setAverageFilesPerCommit(committerPm.getCommitTotalFiles() / committerPm.getCommitCount());
            committerPm.getCommitTimes().add(commitDate);
        }

        bio.sync();
        count++;
        if (count % 1000 == 0) {
            System.out.print(count + ", ");
            bio.sync();
        }
    }
    cursor.close();
    bio.sync();
    System.out.println();

    System.exit(0);

    //         if (author != null) {
    ////            if (author.getCommits() ==null) author.setCommits(new Commits());
    //            author.setCommitCount(author.getCommitCount()+1);
    //            author.setCommitTotalChanges(author.getCommitTotalChanges()+total);
    //            author.setCommitAdditions(author.getCommitAdditions()+additions);
    //            author.setCommitDeletions(author.getCommitDeletions()+deletions);
    //            author.setCommitsAsAuthor(author.getCommitsAsAuthor()+1);
    //            author.getCommitTimes().add(commitDate);
    //         }
    //         if (committer != null) {
    ////            if (committer.getCommits() ==null) committer.setCommits(new Commits());
    //            committer.setCommitCount(committer.getCommitCount()+1);
    //            committer.setCommitTotalChanges(committer.getCommitTotalChanges()+total);
    //            committer.setCommitAdditions(committer.getCommitAdditions()+additions);
    //            committer.setCommitDeletions(committer.getCommitDeletions()+deletions);
    //            committer.setCommitsAsCommitter(committer.getCommitsAsCommitter()+1);
    //            committer.getCommitTimes().add(commitDate);
    //         }
    //         
    //         ProjectMembership authorPm = null;
    //         ProjectMembership committerPm = null;
    //         
    ////          Only a very small number of commit comments actually reference the repo
    ////          Instead we're going to have to strip the string 
    //         String[] url = convertUrlIntoProjectNameAndOwner(obj.getString("url"));
    //         Project project = null;
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(url[1]), Project.OWNERNAME.eq(url[0])).iterator();
    //         if (repoIt.hasNext()) {
    //            project = repoIt.next();
    //            if (project != null) {
    //      
    //               project.setCommitCount(project.getCommitCount()+1);
    //               project.setCommitTotalChanges(project.getCommitTotalChanges()+total);
    //               project.setCommitAdditions(project.getCommitAdditions()+additions);
    //               project.setCommitDeletions(project.getCommitDeletions()+deletions);
    //               project.getCommitTimes().add(commitDate);
    //               
    //               if (author != null) {
    //                  authorPm = getProjectMembership(bio, author, project);
    //                  authorPm.setCommitCount(authorPm.getCommitCount()+1);
    //                  authorPm.setCommitTotalChanges(authorPm.getCommitTotalChanges()+total);
    //                  authorPm.setCommitAdditions(authorPm.getCommitAdditions()+additions);
    //                  authorPm.setCommitDeletions(authorPm.getCommitDeletions()+deletions);
    //                  authorPm.setCommitsAsAuthor(authorPm.getCommitsAsAuthor()+1);
    //                  
    //                  // Avoid duplicating information
    //                  if (committer != null && author.getLogin().equals(committer.getLogin())) {
    //                     authorPm.setCommitsAsCommitter(authorPm.getCommitsAsCommitter()+1);
    //                  }
    //                  
    //                  authorPm.getCommitTimes().add(commitDate);
    //               }
    //               if (committer != null && author != null && !author.getLogin().equals(committer.getLogin())) {
    //                  committerPm = getProjectMembership(bio, committer, project);
    //                  committerPm.setCommitCount(committerPm.getCommitCount()+1);
    //                  committerPm.setCommitTotalChanges(committerPm.getCommitTotalChanges()+total);
    //                  committerPm.setCommitAdditions(committerPm.getCommitAdditions()+additions);
    //                  committerPm.setCommitDeletions(committerPm.getCommitDeletions()+deletions);
    //                  committerPm.setCommitsAsCommitter(committerPm.getCommitsAsCommitter()+1);
    //                  
    //                  committerPm.getCommitTimes().add(commitDate);
    //               }
    //            } 
    //         }
    //         else {
    //            System.err.println("Didn't find project:" + url[0] + ":"+url[1] + ", prestrip: " + obj.getString("url"));
    //         }
    //         bio.getProjectMemberships().sync();
    //         bio.sync();
    //         
    //
    //         // Files
    //         BasicDBList files = (BasicDBList) obj.get("files");
    //         if (files != null) {
    //            for (Object f : files) {
    //               BasicDBObject file = (BasicDBObject)f;
    //               
    //               String filename = file.getString("filename");
    //               if (filename.lastIndexOf(".") != -1) { // If it has an extension, we want that. If not, use the entire filename
    //                  filename = filename.substring(filename.lastIndexOf("."));
    //                  filename = filename.toLowerCase(); // Ensure consistency
    //               }
    //         // FIXME: Should strip any /'s if there is no '.' - i.e. just the last one
    //               
    //               if (author != null) addArtefact(author, filename);
    //               if (committer != null) addArtefact(committer, filename);
    ////               if (project != null) addArtefact(project, filename);
    //            }
    //         }
    //         
    //         if (author != null && files !=null) {
    //            author.setCommitTotalFiles(author.getCommitTotalFiles()+files.size());
    //            author.setAverageFilesPerCommit(author.getCommitTotalFiles()/author.getCommitCount());
    //         }
    //         if (committer != null && files !=null && (author==null || !committer.getLogin().equals(author.getLogin()))) {
    //            committer.setCommitTotalFiles(committer.getCommitTotalFiles()+files.size());
    //            committer.setAverageFilesPerCommit(committer.getCommitTotalFiles()/committer.getCommitCount());
    //         }
    //         if (authorPm !=null && files != null) {
    //            authorPm.setCommitTotalFiles(authorPm.getCommitTotalChanges()+files.size());
    //            authorPm.setAverageFilesPerCommit(authorPm.getCommitTotalFiles()/authorPm.getCommitCount());
    //         }
    //         if (committerPm != null && files != null) {
    //            committerPm.setCommitTotalFiles(committerPm.getCommitTotalChanges()+files.size());
    //            committerPm.setAverageFilesPerCommit(committerPm.getCommitTotalFiles()/committerPm.getCommitCount());
    //         }
    //         
    //         if (project!=null && files != null) {
    //            project.setCommitTotalFiles(project.getCommitTotalChanges()+files.size());
    //            project.setAverageFilesPerCommit(project.getCommitTotalFiles()/project.getCommitCount());
    //         }

    //         bio.getProjectMemberships().sync();
    //         bio.sync();
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      cursor.close();
    //      bio.sync();
    //      System.out.println();
    //
    //      System.exit(0);

    ////      #4 Commit comments
    //      System.out.println("Extracting commit comments...");
    //      cursor = msrDb.getCollection("commit_comments").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //      
    //         String username = getUserLoginName(bio, "user", "login", obj);
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(username));
    //         if (user == null) {
    //            System.err.println("Found commit comment with unrecognised user: " + username);
    //            continue;
    //         }
    //         
    //         user.setNumberOfCommitComments(user.getNumberOfCommitComments()+1);
    //         
    ////         if (!user.getDbObject().containsField("commitCommentTimes")) {
    ////            user.getDbObject().put("commitCommentTimes", new BasicDBList());
    ////         }
    ////         user.getCommitCommentTimes().add(obj.getString("created_at"));
    //         
    //         // Only a very small number of commit comments actually reference the repo
    //         // Instead we're going to have to strip the string 
    //         String[] url = convertUrlIntoProjectNameAndOwner(obj.getString("url"));
    //         
    ////         System.out.println("Querying project " + url[1] + " and owner " + url[0]);
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(url[1]), Project.OWNERNAME.eq(url[0])).iterator();
    ////         if (repoIt.hasNext()) {
    //            Project project = repoIt.next();
    //            if (project != null) {
    //               project.setNumberOfCommitComments(project.getNumberOfCommitComments()+1);
    //               
    //               if (!project.getDbObject().containsField("commitCommentTimes")) {
    //                  project.getDbObject().put("commitCommentTimes", new BasicDBList());
    //               }
    //               project.getCommitCommentTimes().add(obj.getString("created_at"));
    //               
    //               ProjectMembership pm = getProjectMembership(bio, user, project);
    //               pm.setNumberOfCommitComments(pm.getNumberOfCommitComments()+1);
    //               
    //               if (!pm.getDbObject().containsField("commitCommentTimes")) {
    //                  pm.getDbObject().put("commitCommentTimes", new BasicDBList());
    //               }
    //               pm.getCommitCommentTimes().add(obj.getString("created_at"));
    //            }
    ////         }
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      cursor.close();
    //      bio.sync();
    //      System.out.println();
    //      System.exit(0);

    ////      //FIXME: THIS IS CAUSING THE CPU TO HIS 350% AND THEN KILLS THE LAPTOP?!?!?!?!? 
    //      #5 Pull requests
    System.out.println("Extracting pull requests...");
    cursor = msrDb.getCollection("pull_requests").find();
    cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    it = cursor.iterator();

    System.out.println("Clearing previous data");
    for (User u : bio.getUsers()) {
        if (!u.getDbObject().containsField("pullRequestTimes")) {
            u.getDbObject().put("pullRequestTimes", new BasicDBList());
        }
        u.getPullRequestTimes().clear();
        u.setNumberOfPullRequests(0);
    }
    bio.sync();
    for (Project u : bio.getProjects()) {
        if (!u.getDbObject().containsField("pullRequestTimes")) {
            u.getDbObject().put("pullRequestTimes", new BasicDBList());
        }
        u.getPullRequestTimes().clear();
        u.setNumberOfPullRequests(0);
    }
    bio.sync();

    for (ProjectMembership u : bio.getProjectMemberships()) {
        if (!u.getDbObject().containsField("pullRequestTimes")) {
            u.getDbObject().put("pullRequestTimes", new BasicDBList());
        }
        u.getPullRequestTimes().clear();
        u.setNumberOfPullRequests(0);
    }
    bio.sync();

    System.out.println("Cleared!");

    count = 0;
    while (it.hasNext()) {

        BasicDBObject obj = (BasicDBObject) it.next();

        String username = getUserLoginName(bio, "user", "login", obj);
        User user = bio.getUsers().findOne(User.LOGIN.eq(username));
        if (user == null) {
            //            System.err.println("Found pull request with unrecognised user:" + username);
            continue;
        }

        if (!user.getDbObject().containsField("pullRequestTimes")) {
            user.getDbObject().put("pullRequestTimes", new BasicDBList());
        }
        user.getPullRequestTimes().add(obj.getString("created_at"));

        user.setNumberOfPullRequests(user.getNumberOfPullRequests() + 1);

        // Project
        System.out.println(obj.getString("repo") + " " + obj.getString("owner") + obj.getString("_id"));

        ProjectMembership pm = getProjectMembership(bio, user.getLogin(), obj.getString("repo"),
                obj.getString("owner"));
        pm.setNumberOfPullRequests(pm.getNumberOfPullRequests() + 1);

        if (!pm.getDbObject().containsField("pullRequestTimes")) {
            pm.getDbObject().put("pullRequestTimes", new BasicDBList());
        }
        pm.getPullRequestTimes().add(obj.getString("created_at"));

        //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator();
        //         if (repoIt.hasNext()) { // FIXME Causes it to run out of heap!
        //            Project project = repoIt.next();
        //            if (project != null) {
        //               project.setNumberOfPullRequests(project.getNumberOfPullRequests()+1);
        //               
        //               if (!project.getDbObject().containsField("pullRequestTimes")) {
        //                  project.getDbObject().put("pullRequestTimes", new BasicDBList());
        //               }
        //               project.getPullRequestTimes().add(obj.getString("created_at"));
        //               
        //            }
        //         } else {
        //            System.err.println("Didn't find project:" + obj.getString("repo") + ":"+obj.getString("owner"));
        //         }

        count++;
        if (count % 1000 == 0) {
            System.out.print(count + ", ");
            bio.sync();
            System.gc();
        }
    }
    bio.sync();
    System.out.println();
    System.exit(0);

    ////      #6 Pull request comments
    //      System.out.println("Extracting pull request comments...");
    //      cursor = msrDb.getCollection("pull_request_comments").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //      
    //         String username = getUserLoginName(bio, "user", "login", obj);
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(username));
    //         if (user == null) {
    ////            System.err.println("Found pull request comment with unrecognised user:" + username);
    //            continue;
    //         }
    //         
    //         if (!user.getDbObject().containsField("pullRequestCommentTimes")) {
    //            user.getDbObject().put("pullRequestCommentTimes", new BasicDBList());
    //         }
    //         user.getPullRequestCommentTimes().add(obj.getString("created_at"));
    //         user.setNumberOfPullRequestComments(user.getNumberOfPullRequestComments()+1);
    //         
    //         // Project
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator();
    ////         if (repoIt.hasNext()) {
    //            Project project = repoIt.next();
    //            if (project != null) {
    //               project.setNumberOfPullRequestComments(project.getNumberOfPullRequestComments()+1);
    //               if (!project.getDbObject().containsField("pullRequestCommentTimes")) {
    //                  project.getDbObject().put("pullRequestCommentTimes", new BasicDBList());
    //               }
    //               project.getPullRequestCommentTimes().add(obj.getString("created_at"));
    //               
    //               ProjectMembership pm = getProjectMembership(bio, user, project);
    //               pm.setNumberOfPullRequestComments(pm.getNumberOfPullRequestComments()+1);
    //               
    //               if (!pm.getDbObject().containsField("pullRequestCommentTimes")) {
    //                  pm.getDbObject().put("pullRequestCommentTimes", new BasicDBList());
    //               }
    //               pm.getPullRequestCommentTimes().add(obj.getString("created_at"));
    //            }
    ////         }
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();
    //      System.exit(0);

    ////      #7 Issues
    //      System.out.println("Extracting issues...");
    //      cursor = msrDb.getCollection("issues").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //      
    //         String username = getUserLoginName(bio, "user", "login", obj);
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(username));
    //         if (user == null) {
    ////            System.err.println("Found issue with unrecognised user:" + username);
    //            continue;
    //         }
    //         
    //         if (!user.getDbObject().containsField("issueTimes")) {
    //            user.getDbObject().put("issueTimes", new BasicDBList());
    //         }
    //         user.getIssueTimes().add(obj.getString("created_at"));
    //         user.setNumberOfIssues(user.getNumberOfIssues()+1);
    //         
    //         // Project
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator();
    //         if (repoIt.hasNext()) {
    //            Project project = repoIt.next();
    //            if (project != null) {
    //               project.setNumberOfIssues(project.getNumberOfIssues()+1);
    //               
    //               if (!project.getDbObject().containsField("issueTimes")) {
    //                  project.getDbObject().put("issueTimes", new BasicDBList());
    //               }
    //               project.getIssueTimes().add(obj.getString("created_at"));
    //               
    //               ProjectMembership pm = getProjectMembership(bio, user, project);
    //               pm.setNumberOfIssues(pm.getNumberOfIssues()+1);
    //               
    //               if (!pm.getDbObject().containsField("issueTimes")) {
    //                  pm.getDbObject().put("issueTimes", new BasicDBList());
    //               }
    //               pm.getIssueTimes().add(obj.getString("created_at"));
    //            }
    //         }
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();
    //      System.exit(0);

    ////      #8 Issue comments
    //      System.out.println("Extracting issue comments...");
    //      cursor = msrDb.getCollection("issue_comments").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //      
    //         String username = getUserLoginName(bio, "user", "login", obj);
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(username));
    //         if (user == null) {
    ////            System.err.println("Found issue comment with unrecognised user:" + username);
    //            continue;
    //         }
    //         
    //         if (!user.getDbObject().containsField("issueCommentTimes")) {
    //            user.getDbObject().put("issueCommentTimes", new BasicDBList());
    //         }
    //         user.getIssueCommentTimes().add(obj.getString("created_at"));
    //         user.setNumberOfIssueComments(user.getNumberOfIssueComments()+1);
    //         
    //         // Project
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator();
    //         if (repoIt.hasNext()) {
    //            Project project = repoIt.next();
    //            if (project != null) {
    //               project.setNumberOfIssueComments(project.getNumberOfIssueComments()+1);
    //               
    //               if (!project.getDbObject().containsField("issueCommentTimes")) {
    //                  project.getDbObject().put("issueCommentTimes", new BasicDBList());
    //               }
    //               project.getIssueCommentTimes().add(obj.getString("created_at"));
    //               
    //               ProjectMembership pm = getProjectMembership(bio, user, project);
    //               pm.setNumberOfIssueComments(pm.getNumberOfIssueComments()+1);
    //               
    //               if (!pm.getDbObject().containsField("issueCommentTimes")) {
    //                  pm.getDbObject().put("issueCommentTimes", new BasicDBList());
    //               }
    //               pm.getIssueCommentTimes().add(obj.getString("created_at"));
    //            }
    //         }
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();
    //      System.exit(0);

    ////      #9 Issue events
    //      System.out.println("Extracting issue events...");
    //      cursor = msrDb.getCollection("issue_events").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //      
    //         String username = getUserLoginName(bio, "actor", "login", obj);
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(username));
    //         if (user == null) {
    ////            System.err.println("Found issue event with unrecognised user:" + username);
    //            continue;
    //         }
    //         
    //         String eventKind = obj.getString("event");
    //         IssueEventKind kind = null; //FIXME
    //         
    //         switch (eventKind) {
    //            case "closed": kind = IssueEventKind.CLOSED; break;
    //            case "assigned": kind = IssueEventKind.ASSIGNED; break;
    //            case "mentioned": kind = IssueEventKind.MENTIONED; break;
    //            case "merged": kind = IssueEventKind.MERGED; break;
    //            case "referenced": kind = IssueEventKind.REFERENCED; break;
    //            case "reopened": kind = IssueEventKind.REOPENED; break;
    //            case "subscribed": kind = IssueEventKind.SUBSCRIBED; break;
    //            case "head_ref_deleted" : kind = IssueEventKind.HEAD_REF_DELETED; break;
    //            case "head_ref_restored" : kind = IssueEventKind.HEAD_REF_RESTORED; break;
    //            case "head_ref_cleaned" : kind = IssueEventKind.HEAD_REF_CLEANED; break;
    //            case "unsubscribed" : kind = IssueEventKind.UNSUBSCRIBED; break;
    //            default:
    //               System.err.println("Unrecognised issue event kind: " + eventKind);
    //         }
    //         if (kind == null) continue;
    //
    //         boolean eventKindFound = false;
    //         
    //         if (!user.getDbObject().containsField("issueEvents")) {
    //            user.getDbObject().put("issueEvents", new BasicDBList());
    //         }
    //         
    //         for (IssueEvent ie : user.getIssueEvents()) {
    //            if (ie.getEventKind().equals(kind)) {
    //               ie.setCount(ie.getCount()+1);
    //               eventKindFound = true;
    //               break;
    //            }
    //         }
    //         if (!eventKindFound) {
    //            IssueEvent ie = new IssueEvent();
    //            ie.setEventKind(kind);
    //            ie.setCount(1);
    //            user.getIssueEvents().add(ie);
    //         }
    //         
    //         // Project
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator();
    //         if (repoIt.hasNext()) {
    //            Project project = repoIt.next();
    //            
    //            if (!project.getDbObject().containsField("issueEvents")) {
    //               project.getDbObject().put("issueEvents", new BasicDBList());
    //            }
    //         
    //            eventKindFound = false;
    //            for (IssueEvent ie : project.getIssueEvents()) {
    //               if (ie.getEventKind().equals(kind)) {
    //                  ie.setCount(ie.getCount()+1);
    //                  eventKindFound = true;
    //                  break;
    //               }
    //            }
    //            if (!eventKindFound) {
    //               IssueEvent ie = new IssueEvent();
    //               ie.setEventKind(kind);
    //               ie.setCount(1);
    //               project.getIssueEvents().add(ie);
    //            }
    //            
    //            ProjectMembership pm = getProjectMembership(bio, user, project);
    //            
    //            if (!pm.getDbObject().containsField("issueEvents")) {
    //               pm.getDbObject().put("issueEvents", new BasicDBList());
    //            }
    //            
    //            eventKindFound = false;
    //            for (IssueEvent ie : pm.getIssueEvents()) {
    //               if (ie.getEventKind().equals(kind)) {
    //                  ie.setCount(ie.getCount()+1);
    //                  eventKindFound = true;
    //                  break;
    //               }
    //            }
    //            if (!eventKindFound) {
    //               IssueEvent ie = new IssueEvent();
    //               ie.setEventKind(kind);
    //               ie.setCount(1);
    //               pm.getIssueEvents().add(ie);
    //            }
    //         }
    //         
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();
    //      System.exit(0);
    //      
    ////      Watchers
    //      System.out.println("Extracting watchers...");
    //      cursor = msrDb.getCollection("watchers").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //         
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(obj.getString("login")));
    //         if (user == null) continue;
    //         
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator();
    //         if (repoIt.hasNext()) {
    //            Project project = repoIt.next();
    //            if (project != null && !project.getWatchers().contains(user)) project.getWatchers().add(user);
    //            if (!user.getWatches().contains(project)) user.getWatches().add(project);
    //         }
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      System.out.println();

    ////      Org members FIXME: INCOMPLETE: Cannot match the org name against ANYTHING....
    //      System.out.println("Extracting org members...");
    //      cursor = msrDb.getCollection("org_members").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //         
    //         String login = obj.getString("login");
    //         String orgName = obj.getString("org");
    //         
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(login));
    //         
    //         User org = bio.getUsers().findOne(User.LOGIN.eq(orgName));
    //         
    //         if (org!=null){
    //            System.err.println("Found org! " + orgName);
    //            
    //         }
    //         
    ////         Project project = bio.getProjects().findOne(Project.OWNERNAME.eq("orgName"));
    ////         if (project==null) {
    ////            System.err.println("Didn't find project: " + orgName);
    ////            continue;
    ////         }
    ////         ProjectMembership pm = getProjectMembership(bio, user, project);
    ////         pm.setOrgMember(true);
    //      }
    //      bio.sync();
    //      System.out.println();

    //      Repo collaborators
    //      System.out.println("Extracting repo collaborators...");
    //      cursor = msrDb.getCollection("repo_collaborators").find();
    //      cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
    //      it = cursor.iterator();
    //      
    //      count = 0;
    //      while(it.hasNext()){
    //         BasicDBObject obj = (BasicDBObject) it.next();
    //         
    //         String login = obj.getString("login");
    //         String projectName = obj.getString("repo");
    //         String ownerName = obj.getString("owner");
    //         
    //         User user = bio.getUsers().findOne(User.LOGIN.eq(login));
    //         Iterator<Project> repoIt = bio.getProjects().find(Project.OWNERNAME.eq(ownerName), Project.NAME.eq(projectName)).iterator();
    //         if (repoIt.hasNext()) {
    //            Project project = repoIt.next();
    //   
    //            ProjectMembership pm = getProjectMembership(bio, user, project);
    //            pm.setCollaborator(true);
    //         } else {
    //            System.err.println("Couldn't find repo. owner: " + ownerName + ", repo: " + projectName);
    //         }
    //         count++;
    //         if (count % 1000 == 0) {
    //            System.out.print(count + ", ");
    //            bio.sync();
    //         }
    //      }
    //      bio.sync();
    //      
    long end = System.currentTimeMillis();
    System.out.println("Finished at " + new Date());

    long duration = end - start;
    System.out.println("Duration: " + duration);

}

From source file:recommender.Recommender.java

public Map<String, Double> recommend() {
    DBCursor cursor = db.findMatrixRows().addOption(Bytes.QUERYOPTION_NOTIMEOUT);

    results = new HashMap<DBObject, Double>();

    while (cursor.hasNext()) {
        DBObject obj = cursor.next();//from  ww  w  . j ava2  s.  c om

        results.put(obj, computeSimilarity(obj));
    }

    similarResults = getSimilarResults(results);

    String[] sources = { "twitter", "facebook" };

    for (DBObject matrixRow : similarResults) {
        for (int i = 0; i < Event.NAMES.length; i++) {
            for (String source : sources) {
                boolean hasAttribute = (matrixRow.get("days_" + source + "_" + Event.NAMES[i]) != null);
                if (hasAttribute) {
                    int attributeValue = (Integer) matrixRow.get("days_" + source + "_" + Event.NAMES[i]);

                    Double eventWeight = (Double) matrixRow.get("event_score_" + Event.NAMES[i]);
                    eventWeight = (eventWeight == null ? 0.5 : eventWeight);

                    System.out.println(eventWeight);

                    counters[i] += eventWeight;
                    sums[i] += eventWeight * attributeValue;
                }
            }
        }
    }

    HashMap<String, Double> finalResult = new HashMap<String, Double>();

    for (int i = 0; i < Event.NAMES.length; i++) {
        if (counters[i] > 0) {
            finalResult.put("days_" + Event.NAMES[i], ((double) sums[i]) / counters[i]);
        }
    }

    return sortByValue(finalResult, false);
}

From source file:Rpackage.MongoFixPostCode.java

public static void main(String[] args) {

    YelpAPI yelpApi = new YelpAPI();
    JSONParser parser = new JSONParser();
    try {//w  w w. ja v a  2  s . c o  m

        // Connect to mongodb
        MongoClient mongo = new MongoClient("10.33.2.142", 27017);

        // get database
        // if database doesn't exists, mongodb will create it for you
        DB db = mongo.getDB("test");

        // get collection
        // if collection doesn't exists, mongodb will create it for you
        DBCollection collection = db.getCollection("twitter_Stream");

        DBCursor cursor;
        BasicDBObject query;
        //------------------------------------
        // ( 1 ) collection.find() --> get all document

        DBObject clause1 = new BasicDBObject("Latitude", new BasicDBObject("$exists", true));
        DBObject clause2 = new BasicDBObject("tweet_Country_Code", new BasicDBObject("$exists", true));
        DBObject clause3 = new BasicDBObject("R_Post_Code", new BasicDBObject("$exists", false));
        /*{
          "$and":[
            {
              "tweet_Country_Code":{
                "$exists":true
              }
            },
            {
              "Latitude":{
                "$exists":false
              }
            },
            {
              "tweet_Country_Code":"AU"
            }
          ]
        }                
        */
        BasicDBList or = new BasicDBList();
        or.add(clause1);
        or.add(clause2);
        or.add(clause3);
        query = new BasicDBObject("$and", or);

        //{  "$and":[    {      "Latitude":{        "$exists":true      }    },    {      "R_Post_Code":{        "$exists":false      }    }  ]}            
        cursor = collection.find(query).addOption(Bytes.QUERYOPTION_NOTIMEOUT) //
                .addOption(Bytes.QUERYOPTION_AWAITDATA);

        System.out.println("( 1 ) .get the user id within  latitide");
        System.out.println("results --> " + cursor.count());
        FoursquareAPI_backup qui4squreApi = new FoursquareAPI_backup();
        try {
            while (cursor.hasNext()) {
                DBObject data = cursor.next();
                String v_user_Name = (String) data.get("user_name");
                Long v_twitte_id = (Long) data.get("tweet_ID");
                String v_twitte_text = (String) data.get("tweet_text");
                Long v_user_Id = (Long) data.get("user_Id");
                Double v_Latitude = (Double) data.get("Latitude");
                Double v_Longtitude = (Double) data.get("Longitude");
                String v_tweet_Place_Name = (String) data.get("tweet_Place_Name");
                String v_tweet_Country_Code = (String) data.get("tweet_Country_Code");

                if (v_user_Id == null) {
                    /*                        System.out.print("update:" +v_user_Name+"/status/"+ v_twitte_id);
                     try{
                     Status status = twitter.showStatus(v_twitte_id);
                     BasicDBObject jo = GetMongoRecord(status);
                     System.out.println( "-->" + status.getId() + " : " + jo.getString("Re_user_screenName") + ":" + jo.getString("tweet_text"));
                     collection.update(new BasicDBObject("tweet_ID", v_twitte_id), jo); // set the document in the DB to the new document for Jo                     
                     }catch (TwitterException ex){
                     if (ex.getStatusCode()==144) continue;
                     }
                     */
                    continue;
                }
                JSONObject businesses = yelpApi.search4Yelp("city", v_Latitude, v_Longtitude);//-27.497835,153.017472);                      
                boolean searchAgain = false;

                if (businesses == null)
                    searchAgain = true;
                else if (businesses.size() < 1)
                    searchAgain = true;

                if (searchAgain) {
                    System.out.println("La:" + v_Latitude + "\tLo:" + v_Longtitude);
                    businesses = qui4squreApi.search4Square("city", v_Latitude, v_Longtitude);
                    searchAgain = false;
                }
                if (businesses == null)
                    searchAgain = true;
                else if (businesses.size() < 1)
                    searchAgain = true;

                if (searchAgain) {
                    businesses = qui4squreApi.searchGoogleMap("city", v_Latitude, v_Longtitude);
                    if (businesses == null) {
                        System.out.println("\t" + v_tweet_Country_Code + "\t:" + v_tweet_Place_Name);
                        continue;
                    } else if (businesses.size() < 1) {
                        System.out.println("\t" + v_tweet_Country_Code + "\t:" + v_tweet_Place_Name);
                        continue;
                    }
                }

                String country_code = (String) businesses.get("country_code");
                String city_code = (String) businesses.get("city_name");
                String state_code = (String) businesses.get("state_code");
                String post_code = (String) businesses.get("post_code");

                BasicDBObject setNewFieldQuery = new BasicDBObject().append("$set",
                        new BasicDBObject().append("R_Country_Code", country_code)
                                .append("R_State_Code", state_code).append("R_City", city_code)
                                .append("R_Post_Code", post_code));

                collection.update(new BasicDBObject().append("tweet_ID", v_twitte_id), setNewFieldQuery); // set the document in the DB to the new document for Jo                                             

                setNewFieldQuery.clear();
                setNewFieldQuery = null;
            }
        } finally {
            cursor.close();
        }

        System.out.println("---------------------------------");

    } catch (UnknownHostException e) {
        e.printStackTrace();
    } catch (MongoException e) {
        e.printStackTrace();
    }

}