List of usage examples for com.mongodb Bytes QUERYOPTION_NOTIMEOUT
int QUERYOPTION_NOTIMEOUT
To view the source code for com.mongodb Bytes QUERYOPTION_NOTIMEOUT.
Click Source Link
From source file:essex.bigessexnew.OplogListener.java
public void listen(boolean processHistory, String... logFields) { BasicDBObject query = prepareOplogListenQuery(processHistory); BasicDBObject fields = prepareOplogListenFields(logFields); DBCursor cur = collection.find(query, fields).sort((BasicDBObjectBuilder.start("$natural", 1)).get()) .addOption(Bytes.QUERYOPTION_TAILABLE | Bytes.QUERYOPTION_AWAITDATA | Bytes.QUERYOPTION_NOTIMEOUT); performListenTask(cur);/*from w w w .jav a2 s. com*/ }
From source file:fr.cirad.web.controller.gigwa.base.AbstractVariantController.java
License:Open Source License
/** * Export variants.//from w w w. j a va 2 s.c o m * * @param request the request * @param response the response * @param sModule the module * @param fKeepExportOnServer whether or not to keep export on server * @param sExportFormat the export format * @param exportID the export id * @param projId the proj id * @param selectedVariantTypes the selected variant types * @param selectedSequences the selected sequences * @param selectedIndividuals the selected individuals * @param gtPattern the gt code * @param genotypeQualityThreshold the genotype quality threshold * @param readDepthThreshold the read depth threshold * @param missingData the missing data * @param minmaf the minmaf * @param maxmaf the maxmaf * @param minposition the minposition * @param maxposition the maxposition * @param alleleCount the allele count * @param geneName the gene name * @param variantEffects the variant effects * @throws Exception the exception */ @RequestMapping(variantExportDataURL) protected void exportVariants(HttpServletRequest request, HttpServletResponse response, @RequestParam("module") String sModule, @RequestParam("keepExportOnServer") boolean fKeepExportOnServer, @RequestParam("exportFormat") String sExportFormat, @RequestParam("exportID") String exportID, @RequestParam("project") int projId, @RequestParam("variantTypes") String selectedVariantTypes, @RequestParam("sequences") String selectedSequences, @RequestParam(value = "individuals", required = false) String selectedIndividuals, @RequestParam("gtPattern") String gtPattern, @RequestParam("genotypeQualityThreshold") int genotypeQualityThreshold, @RequestParam("readDepthThreshold") int readDepthThreshold, @RequestParam("missingData") double missingData, @RequestParam(value = "minmaf", required = false) Float minmaf, @RequestParam(value = "maxmaf", required = false) Float maxmaf, @RequestParam("minposition") Long minposition, @RequestParam("maxposition") Long maxposition, @RequestParam("alleleCount") String alleleCount, @RequestParam("geneName") String geneName, @RequestParam("variantEffects") String variantEffects) throws Exception { // exportID = URLDecoder.decode(exportID, "UTF-8"); String token = exportID.substring(1 + exportID.indexOf('|')); ProgressIndicator progress = ProgressIndicator.get(token); if (progress == null) { progress = new ProgressIndicator(token, new String[] { "Identifying matching variants" }); ProgressIndicator.registerProgressIndicator(progress); } long before = System.currentTimeMillis(); final MongoTemplate mongoTemplate = MongoTemplateManager.get(sModule); List<String> selectedIndividualList = selectedIndividuals.length() == 0 ? getIndividualsInDbOrder(sModule, projId) /* no selection means all selected */ : Arrays.asList(selectedIndividuals.split(";")); long count = countVariants(request, sModule, projId, selectedVariantTypes, selectedSequences, selectedIndividuals, gtPattern, genotypeQualityThreshold, readDepthThreshold, missingData, minmaf, maxmaf, minposition, maxposition, alleleCount, geneName, variantEffects, "" /* if we pass exportID then the progress indicator is going to be replaced by another, and we don't need it for counting since we cache count values */); DBCollection tmpVarColl = getTemporaryVariantCollection(sModule, token, false); long nTempVarCount = mongoTemplate.count(new Query(), tmpVarColl.getName()); boolean fWorkingOnFullDataset = mongoTemplate.count(null, VariantData.class) == count; if (!fWorkingOnFullDataset && nTempVarCount == 0) { progress.setError(MESSAGE_TEMP_RECORDS_NOT_FOUND); return; } // use a cursor to avoid using too much memory DBObject query = count == nTempVarCount ? null : new BasicDBObject(VariantData.FIELDNAME_VERSION, new BasicDBObject("$exists", true)); String sequenceField = VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_SEQUENCE; String startField = VariantData.FIELDNAME_REFERENCE_POSITION + "." + ReferencePosition.FIELDNAME_START_SITE; BasicDBObject sort = new BasicDBObject("_id", 1); /* necessary for MgdbDao.getSampleGenotypes to work properly */ DBObject projection = new BasicDBObject(); projection.put(sequenceField, 1); projection.put(startField, 1); DBCursor markerCursor = mongoTemplate.getCollection( !fWorkingOnFullDataset ? tmpVarColl.getName() : mongoTemplate.getCollectionName(VariantData.class)) .find(query, projection).sort(sort); markerCursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); try { AbstractIndividualOrientedExportHandler individualOrientedExportHandler = AbstractIndividualOrientedExportHandler .getIndividualOrientedExportHandlers().get(sExportFormat); AbstractMarkerOrientedExportHandler markerOrientedExportHandler = AbstractMarkerOrientedExportHandler .getMarkerOrientedExportHandlers().get(sExportFormat); GenotypingProject project = mongoTemplate.findById(projId, GenotypingProject.class); String filename = sModule + "_" + project.getName() + "_" + new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + "_" + count + "variants_" + sExportFormat + "." + (individualOrientedExportHandler != null ? individualOrientedExportHandler : markerOrientedExportHandler).getExportFileExtension(); OutputStream os; LOG.info((fKeepExportOnServer ? "On-server" : "Direct-download") + " export requested: " + token); if (fKeepExportOnServer) { String relativeOutputFolder = File.separator + FRONTEND_URL + File.separator + TMP_OUTPUT_FOLDER + File.separator + token.replaceAll("\\|", "_") + File.separator; File outputLocation = new File( request.getSession().getServletContext().getRealPath(relativeOutputFolder)); if (!outputLocation.exists() && !outputLocation.mkdirs()) throw new Exception("Unable to create folder: " + outputLocation); os = new FileOutputStream(new File(outputLocation.getAbsolutePath() + File.separator + filename)); response.setContentType("text/plain"); } else { os = response.getOutputStream(); response.setContentType("application/zip"); response.setHeader("Content-disposition", "inline; filename=" + filename); } ArrayList<SampleId> sampleIDs = new ArrayList<SampleId>(); for (String individual : selectedIndividualList) for (Integer individualSampleIndex : project.getIndividualSampleIndexes(individual)) sampleIDs.add(new SampleId(projId, individualSampleIndex)); if (fKeepExportOnServer) { String relativeOutputFolder = FRONTEND_URL + File.separator + TMP_OUTPUT_FOLDER + File.separator + token.replaceAll("\\|", "_") + File.separator; String relativeOutputFolderUrl = request.getContextPath() + "/" + relativeOutputFolder.replace(File.separator, "/"); String exportURL = relativeOutputFolderUrl + filename; LOG.debug("On-server export file for export " + token + ": " + exportURL); response.getWriter().write(exportURL); response.flushBuffer(); } // else // { // // The two next lines are an ugly hack that makes the client believe transfer has started. Otherwise we may end-up with a client-side timeout (search for network.http.response.timeout for more details) // response.getOutputStream().print(" "); // response.getOutputStream().flush(); // } HashMap<String, Integer> annotationFieldThresholds = new HashMap<String, Integer>(); annotationFieldThresholds.put(VCFConstants.GENOTYPE_QUALITY_KEY, genotypeQualityThreshold); annotationFieldThresholds.put(VCFConstants.DEPTH_KEY, readDepthThreshold); if (individualOrientedExportHandler != null) { progress.addStep("Reading and re-organizing genotypes"); // initial step will consist in organizing genotypes by individual rather than by marker progress.moveToNextStep(); // done with identifying variants TreeMap<String, File> exportFiles = individualOrientedExportHandler.createExportFiles(sModule, markerCursor.copy(), sampleIDs, new ArrayList<SampleId>(), token, annotationFieldThresholds, new HashMap<String, Integer>(), project.getSampleIdToIndividualMap(selectedIndividualList), progress); if (!progress.hasAborted()) { for (String step : individualOrientedExportHandler.getStepList()) progress.addStep(step); progress.moveToNextStep(); individualOrientedExportHandler.exportData(os, sModule, exportFiles.values(), true, progress, markerCursor, null, null); } } else if (markerOrientedExportHandler != null) { for (String step : markerOrientedExportHandler.getStepList()) progress.addStep(step); progress.moveToNextStep(); // done with identifying variants markerOrientedExportHandler.exportData(os, sModule, sampleIDs, new ArrayList<SampleId>(), progress, markerCursor, null, annotationFieldThresholds, new HashMap<String, Integer>(), project.getSampleIdToIndividualMap(selectedIndividualList), null); LOG.debug("done with exportData"); } else throw new Exception("No export handler found for format " + sExportFormat); if (!progress.hasAborted()) { LOG.info("doVariantExport took " + (System.currentTimeMillis() - before) / 1000d + "s to process " + count + " variants and " + selectedIndividualList.size() + " individuals"); progress.markAsComplete(); } } catch (Throwable t) { LOG.error("Error exporting data", t); progress.setError("Error exporting data: " + t.getClass().getSimpleName() + (t.getMessage() != null ? " - " + t.getMessage() : "")); return; } finally { markerCursor.close(); } }
From source file:org.apache.gora.mongodb.store.MongoStore.java
License:Apache License
/** * Execute the query and return the result. *///from w w w . j a v a2 s . c om @Override public Result<K, T> execute(final Query<K, T> query) { String[] fields = getFieldsToQuery(query.getFields()); // Build the actual MongoDB query DBObject q = MongoDBQuery.toDBQuery(query); DBObject p = MongoDBQuery.toProjection(fields, mapping); if (query.getFilter() != null) { boolean succeeded = filterUtil.setFilter(q, query.getFilter(), this); if (succeeded) { // don't need local filter query.setLocalFilterEnabled(false); } } // Execute the query on the collection DBCursor cursor = mongoClientColl.find(q, p); if (query.getLimit() > 0) cursor = cursor.limit((int) query.getLimit()); cursor.batchSize(100); cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // Build the result MongoDBResult<K, T> mongoResult = new MongoDBResult<K, T>(this, query); mongoResult.setCursor(cursor); return mongoResult; }
From source file:org.apache.hadoop.contrib.mongoreduce.MongoRecordReader.java
License:Apache License
private void connect(String location, Configuration conf) throws IOException { String[] parts = location.split(":"); // default port for sharded server int port = 27018; if (parts.length > 1) port = Integer.parseInt(parts[1]); Mongo mongo = new Mongo(parts[0], port); // figure out if we can read from this server // allow reading from secondaries mongo.slaveOk();/*from w w w . j a v a 2s . c om*/ String database = conf.get("mongo.input.database"); String collection = conf.get("mongo.input.collection"); String query = conf.get("mongo.input.query", ""); String select = conf.get("mongo.input.select", ""); if (!query.equals("")) { DBObject q = (DBObject) JSON.parse(query); if (!select.equals("")) { DBObject s = (DBObject) JSON.parse(select); cursor = mongo.getDB(database).getCollection(collection).find(q, s); } else { cursor = mongo.getDB(database).getCollection(collection).find(q); } } else { if (!select.equals("")) { DBObject s = (DBObject) JSON.parse(select); cursor = mongo.getDB(database).getCollection(collection).find(new BasicDBObject(), s); } else { cursor = mongo.getDB(database).getCollection(collection).find(); } } cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // thanks mongo, for this handy method totalResults = cursor.count(); resultsRead = 0.0f; }
From source file:org.axonframework.eventstore.mongo.MongoEventStore.java
License:Apache License
@Override public void visitEvents(Criteria criteria, EventVisitor visitor) { DBCursor cursor = storageStrategy.findEvents(mongoTemplate.domainEventCollection(), (MongoCriteria) criteria);// www. ja v a 2 s . c o m cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); CursorBackedDomainEventStream events = new CursorBackedDomainEventStream(cursor, null, null, true); try { while (events.hasNext()) { visitor.doWithEvent(events.next()); } } finally { events.close(); } }
From source file:org.eclipse.birt.data.oda.mongodb.internal.impl.MDbOperation.java
License:Open Source License
/** * Applies data set query properties and hints on DBCursor, except * for cursor limit.//from w w w . jav a2s . co m * @see #applyPropertiesToCursor(DBCursor,QueryProperties,boolean,boolean) */ static void applyPropertiesToCursor(DBCursor rowsCursor, QueryProperties queryProps, boolean includeSortExpr) { if (includeSortExpr) // normally done only when executing a query to get full result set { DBObject sortExprObj = null; try { sortExprObj = queryProps.getSortExprAsParsedObject(); } catch (OdaException ex) { // log warning and ignore DriverUtil.getLogger().log(Level.WARNING, Messages.bind("Unable to parse the user-defined Sort Expression: {0}", //$NON-NLS-1$ queryProps.getSortExpr()), ex); } if (sortExprObj != null) rowsCursor.sort(sortExprObj); } ReadPreference readPref = queryProps.getTaggableReadPreference(); if (readPref != null) rowsCursor.setReadPreference(readPref); if (queryProps.getBatchSize() > 0) rowsCursor.batchSize(queryProps.getBatchSize()); if (queryProps.getNumDocsToSkip() > 0) rowsCursor.skip(queryProps.getNumDocsToSkip()); DBObject hintObj = queryProps.getIndexHintsAsParsedObject(); if (hintObj != null) rowsCursor.hint(hintObj); else // try to pass the hint string value as is { String hintValue = queryProps.getIndexHints(); if (!hintValue.isEmpty()) rowsCursor.hint(hintValue); } if (queryProps.hasNoTimeOut()) rowsCursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); if (queryProps.isPartialResultsOk()) rowsCursor.addOption(Bytes.QUERYOPTION_PARTIAL); }
From source file:org.geotools.data.mongodb.MongoLayer.java
License:LGPL
/** * Get mapping of field names and types, and counts for different types * /* w w w.j ava 2 s .c o m*/ * @param collection where metadata from map-reduce job stored, in format: { "_id" : { * "fieldname" : "geometry.type", "type" : "Point"}, "value" : 2 } { "_id" : { * "fieldname" : "properties.ActivityDescription", "type" : "number" }, "value" : 1 } * { "_id" : { "fieldname" : "properties.ActivityDescription", "type" : "string" }, * "value" : 3 } where value is number of occurrences for given type * @return mapping of field names to ClassCount holding type and count info */ private HashMap<String, ClassCount> getFieldMap(DBCollection metaResultsColl) { // cursor over collection BasicDBObject query = new BasicDBObject(); DBCursor cursor = metaResultsColl.find(query); // avoid cursor timeout cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // map to store fieldname and ClasCount object holding type and type-count info HashMap<String, ClassCount> fieldMap = new HashMap<String, ClassCount>(); try { // iterate over each record while (cursor.hasNext()) { // check type found for current field DBObject currRec = cursor.next(); DBObject currField = (DBObject) currRec.get("_id"); String fieldName = (String) currField.get("fieldname"); String fieldType = (String) currField.get("type"); int typeCount = ((Double) currRec.get("value")).intValue(); // if first occurrence of field name instantiate counter if (!fieldMap.containsKey(fieldName)) { fieldMap.put(fieldName, new ClassCount(fieldType, typeCount)); } // else increment count for given type else { ClassCount currCount = fieldMap.get(fieldName); currCount.add(fieldType, typeCount); fieldMap.put(fieldName, currCount); } } } finally { // need to explicitly release cursor since notimeout option set cursor.close(); } return fieldMap; }
From source file:org.ossmeter.platform.mining.msr14.Extractor.java
License:Open Source License
public static void main(String[] args) throws Exception { long start = System.currentTimeMillis(); Mongo msrMongo = new Mongo(new ServerAddress("localhost", 1234)); // GitHub challenge data Mongo bioMongo = new Mongo(new ServerAddress("localhost", 12345));// Extracted data // Create indexes Biodiversity bio = new Biodiversity(bioMongo.getDB("biodiversity")); bio.setClearPongoCacheOnSync(true);// w w w . j a v a 2 s. c o m bioMongo.getDB("biodiversity").getCollection("users").ensureIndex(new BasicDBObject("login", 1)); BasicDBObject index = new BasicDBObject(); index.put("name", 1); index.put("ownerName", 1); bioMongo.getDB("biodiversity").getCollection("projects").ensureIndex(index); index = new BasicDBObject(); index.put("projectName", 1); index.put("projectOwner", 1); bioMongo.getDB("biodiversity").getCollection("projectMemberships").ensureIndex(index); index = new BasicDBObject(); index.put("projectName", 1); index.put("userName", 1); bioMongo.getDB("biodiversity").getCollection("projectMemberships").ensureIndex(index); bioMongo.getDB("biodiversity").getCollection("projectMemberships") .ensureIndex(new BasicDBObject("userName", 1)); DB msrDb = msrMongo.getDB("msr14"); // #1 User extraction System.out.println("Extracting users..."); DBCursor cursor = msrDb.getCollection("users").find(); cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); Iterator<DBObject> it = cursor.iterator(); int count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // User user = new User(); // user.setGhId(obj.getString("id")); // user.setLogin(obj.getString("login")); // user.setLocation(obj.getString("location")); // user.setPublicRepos(obj.getInt("public_repos", 0)); // user.setJoinedDate(obj.getString("created_at")); // user.setFollowerCount(obj.getInt("followers", 0)); // user.setFollowingCount(obj.getInt("following", 0)); // user.setPublicGists(obj.getInt("public_gists", 0)); // // bio.getUsers().add(user); // // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); // //// #1.2 Project extraction // System.out.println("Extracting projects..."); // cursor = msrDb.getCollection("repos").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // Project project = new Project(); // project.setName(obj.getString("name")); // project.setGhId(obj.getString("id")); // project.setCreatedAt(obj.getString("created_at")); // project.setSize(obj.getInt("size", 0)); // project.setWatchersCount(obj.getInt("watchers",0)); // project.setWatchersCount2(obj.getInt("watchers_count",0)); // project.setLanguage(obj.getString("language")); // project.setForks(obj.getInt("forks", 0)); // project.setForksCount(obj.getInt("forks_count", 0)); // project.setOpenIssues(obj.getInt("open_issues",0)); // project.setOpenIssuesCount(obj.getInt("open_issues_count",0)); // project.setOpenIssues(obj.getInt("open_issues",0)); // project.setNetworkCount(obj.getInt("network_count", 0)); // // BasicDBObject ownerObj = (BasicDBObject) obj.get("owner"); // User owner = null; // if (ownerObj != null) { // owner = bio.getUsers().findOne(User.LOGIN.eq(ownerObj.getString("login"))); // if (owner !=null) { // project.setOwner(owner); // project.setOwnerName(owner.getLogin()); // } // } // bio.getProjects().add(project); // // if (owner != null) { // This comes here as to reference the project, we need to have added to the project list first // ProjectMembership pm = getProjectMembership(bio, owner, project); // pm.setOwner(true); // } // // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); // // System.exit(0); //// #2 Follower/following extraction // System.out.println("Extracting followers..."); // cursor = msrDb.getCollection("followers").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String followerLogin = obj.getString("login"); // String followedLogin = obj.getString("follows"); // // User follower = bio.getUsers().findOne(User.LOGIN.eq(followerLogin)); // User followed = bio.getUsers().findOne(User.LOGIN.eq(followedLogin)); // // if (follower != null && followed != null) { // follower.getFollowing().add(followed); // followed.getFollowers().add(follower); // } else{ //// System.err.println("Follower or followed is null. Follower: " +follower + ". followed: " + followed); // } // if (follower != null) follower.setFollowingCount(follower.getFollowingCount()+1); // if (followed != null) followed.setFollowerCount(followed.getFollowerCount()+1); // // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); // // System.exit(0); System.out.println("Clearing ProjectMembership commit data"); for (ProjectMembership pm : bio.getProjectMemberships()) { pm.setCommitCount(0); pm.setCommitTotalChanges(0); pm.setCommitAdditions(0); pm.setCommitDeletions(0); pm.setCommitsAsAuthor(0); pm.setCommitsAsCommitter(0); pm.setCommitTotalFiles(0); pm.setAverageFilesPerCommit(0); pm.getCommitTimes().clear(); } bio.sync(); System.out.println("cleared."); // #3 Commits System.out.println("Extracting commits..."); cursor = msrDb.getCollection("commits").find(); cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); it = cursor.iterator(); count = 0; while (it.hasNext()) { BasicDBObject obj = (BasicDBObject) it.next(); // Author and committer BasicDBObject commitAuthor = (BasicDBObject) obj.get("author"); BasicDBObject commitCommitter = (BasicDBObject) obj.get("committer"); String authorLogin = ""; if (commitAuthor != null) authorLogin = commitAuthor.getString("login"); String committerLogin = ""; if (commitCommitter != null) committerLogin = commitCommitter.getString("login"); // Stats BasicDBObject stats = (BasicDBObject) obj.get("stats"); if (stats == null) stats = new BasicDBObject(); // Create a new one so we can get zeroed values int total = stats.getInt("total", 0); int additions = stats.getInt("additions", 0); int deletions = stats.getInt("deletions", 0); String commitDate = ((BasicDBObject) ((BasicDBObject) obj.get("commit")).get("author")) .getString("date"); BasicDBList files = (BasicDBList) obj.get("files"); String[] url = convertUrlIntoProjectNameAndOwner(obj.getString("url")); ProjectMembership authorPm = null; ProjectMembership committerPm = null; if (authorLogin != null) { authorPm = getProjectMembership(bio, authorLogin, url[1], url[0]); authorPm.setCommitCount(authorPm.getCommitCount() + 1); authorPm.setCommitTotalChanges(authorPm.getCommitTotalChanges() + total); authorPm.setCommitAdditions(authorPm.getCommitAdditions() + additions); authorPm.setCommitDeletions(authorPm.getCommitDeletions() + deletions); authorPm.setCommitsAsAuthor(authorPm.getCommitsAsAuthor() + 1); if (files != null) authorPm.setCommitTotalFiles(authorPm.getCommitTotalChanges() + files.size()); authorPm.setAverageFilesPerCommit(authorPm.getCommitTotalFiles() / authorPm.getCommitCount()); authorPm.getCommitTimes().add(commitDate); } if (authorLogin != null && !authorLogin.equals(committerLogin)) { committerPm = getProjectMembership(bio, committerLogin, url[1], url[0]); committerPm.setCommitCount(committerPm.getCommitCount() + 1); // committerPm.setCommitTotalChanges(committerPm.getCommitTotalChanges()+total); // committerPm.setCommitAdditions(committerPm.getCommitAdditions()+additions); // committerPm.setCommitDeletions(committerPm.getCommitDeletions()+deletions); committerPm.setCommitsAsCommitter(committerPm.getCommitsAsCommitter() + 1); committerPm.setCommitTotalFiles(committerPm.getCommitTotalChanges() + files.size()); committerPm.setAverageFilesPerCommit(committerPm.getCommitTotalFiles() / authorPm.getCommitCount()); if (files != null) committerPm.setCommitTotalFiles(committerPm.getCommitTotalChanges() + files.size()); committerPm .setAverageFilesPerCommit(committerPm.getCommitTotalFiles() / committerPm.getCommitCount()); committerPm.getCommitTimes().add(commitDate); } bio.sync(); count++; if (count % 1000 == 0) { System.out.print(count + ", "); bio.sync(); } } cursor.close(); bio.sync(); System.out.println(); System.exit(0); // if (author != null) { //// if (author.getCommits() ==null) author.setCommits(new Commits()); // author.setCommitCount(author.getCommitCount()+1); // author.setCommitTotalChanges(author.getCommitTotalChanges()+total); // author.setCommitAdditions(author.getCommitAdditions()+additions); // author.setCommitDeletions(author.getCommitDeletions()+deletions); // author.setCommitsAsAuthor(author.getCommitsAsAuthor()+1); // author.getCommitTimes().add(commitDate); // } // if (committer != null) { //// if (committer.getCommits() ==null) committer.setCommits(new Commits()); // committer.setCommitCount(committer.getCommitCount()+1); // committer.setCommitTotalChanges(committer.getCommitTotalChanges()+total); // committer.setCommitAdditions(committer.getCommitAdditions()+additions); // committer.setCommitDeletions(committer.getCommitDeletions()+deletions); // committer.setCommitsAsCommitter(committer.getCommitsAsCommitter()+1); // committer.getCommitTimes().add(commitDate); // } // // ProjectMembership authorPm = null; // ProjectMembership committerPm = null; // //// Only a very small number of commit comments actually reference the repo //// Instead we're going to have to strip the string // String[] url = convertUrlIntoProjectNameAndOwner(obj.getString("url")); // Project project = null; // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(url[1]), Project.OWNERNAME.eq(url[0])).iterator(); // if (repoIt.hasNext()) { // project = repoIt.next(); // if (project != null) { // // project.setCommitCount(project.getCommitCount()+1); // project.setCommitTotalChanges(project.getCommitTotalChanges()+total); // project.setCommitAdditions(project.getCommitAdditions()+additions); // project.setCommitDeletions(project.getCommitDeletions()+deletions); // project.getCommitTimes().add(commitDate); // // if (author != null) { // authorPm = getProjectMembership(bio, author, project); // authorPm.setCommitCount(authorPm.getCommitCount()+1); // authorPm.setCommitTotalChanges(authorPm.getCommitTotalChanges()+total); // authorPm.setCommitAdditions(authorPm.getCommitAdditions()+additions); // authorPm.setCommitDeletions(authorPm.getCommitDeletions()+deletions); // authorPm.setCommitsAsAuthor(authorPm.getCommitsAsAuthor()+1); // // // Avoid duplicating information // if (committer != null && author.getLogin().equals(committer.getLogin())) { // authorPm.setCommitsAsCommitter(authorPm.getCommitsAsCommitter()+1); // } // // authorPm.getCommitTimes().add(commitDate); // } // if (committer != null && author != null && !author.getLogin().equals(committer.getLogin())) { // committerPm = getProjectMembership(bio, committer, project); // committerPm.setCommitCount(committerPm.getCommitCount()+1); // committerPm.setCommitTotalChanges(committerPm.getCommitTotalChanges()+total); // committerPm.setCommitAdditions(committerPm.getCommitAdditions()+additions); // committerPm.setCommitDeletions(committerPm.getCommitDeletions()+deletions); // committerPm.setCommitsAsCommitter(committerPm.getCommitsAsCommitter()+1); // // committerPm.getCommitTimes().add(commitDate); // } // } // } // else { // System.err.println("Didn't find project:" + url[0] + ":"+url[1] + ", prestrip: " + obj.getString("url")); // } // bio.getProjectMemberships().sync(); // bio.sync(); // // // // Files // BasicDBList files = (BasicDBList) obj.get("files"); // if (files != null) { // for (Object f : files) { // BasicDBObject file = (BasicDBObject)f; // // String filename = file.getString("filename"); // if (filename.lastIndexOf(".") != -1) { // If it has an extension, we want that. If not, use the entire filename // filename = filename.substring(filename.lastIndexOf(".")); // filename = filename.toLowerCase(); // Ensure consistency // } // // FIXME: Should strip any /'s if there is no '.' - i.e. just the last one // // if (author != null) addArtefact(author, filename); // if (committer != null) addArtefact(committer, filename); //// if (project != null) addArtefact(project, filename); // } // } // // if (author != null && files !=null) { // author.setCommitTotalFiles(author.getCommitTotalFiles()+files.size()); // author.setAverageFilesPerCommit(author.getCommitTotalFiles()/author.getCommitCount()); // } // if (committer != null && files !=null && (author==null || !committer.getLogin().equals(author.getLogin()))) { // committer.setCommitTotalFiles(committer.getCommitTotalFiles()+files.size()); // committer.setAverageFilesPerCommit(committer.getCommitTotalFiles()/committer.getCommitCount()); // } // if (authorPm !=null && files != null) { // authorPm.setCommitTotalFiles(authorPm.getCommitTotalChanges()+files.size()); // authorPm.setAverageFilesPerCommit(authorPm.getCommitTotalFiles()/authorPm.getCommitCount()); // } // if (committerPm != null && files != null) { // committerPm.setCommitTotalFiles(committerPm.getCommitTotalChanges()+files.size()); // committerPm.setAverageFilesPerCommit(committerPm.getCommitTotalFiles()/committerPm.getCommitCount()); // } // // if (project!=null && files != null) { // project.setCommitTotalFiles(project.getCommitTotalChanges()+files.size()); // project.setAverageFilesPerCommit(project.getCommitTotalFiles()/project.getCommitCount()); // } // bio.getProjectMemberships().sync(); // bio.sync(); // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // cursor.close(); // bio.sync(); // System.out.println(); // // System.exit(0); //// #4 Commit comments // System.out.println("Extracting commit comments..."); // cursor = msrDb.getCollection("commit_comments").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String username = getUserLoginName(bio, "user", "login", obj); // User user = bio.getUsers().findOne(User.LOGIN.eq(username)); // if (user == null) { // System.err.println("Found commit comment with unrecognised user: " + username); // continue; // } // // user.setNumberOfCommitComments(user.getNumberOfCommitComments()+1); // //// if (!user.getDbObject().containsField("commitCommentTimes")) { //// user.getDbObject().put("commitCommentTimes", new BasicDBList()); //// } //// user.getCommitCommentTimes().add(obj.getString("created_at")); // // // Only a very small number of commit comments actually reference the repo // // Instead we're going to have to strip the string // String[] url = convertUrlIntoProjectNameAndOwner(obj.getString("url")); // //// System.out.println("Querying project " + url[1] + " and owner " + url[0]); // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(url[1]), Project.OWNERNAME.eq(url[0])).iterator(); //// if (repoIt.hasNext()) { // Project project = repoIt.next(); // if (project != null) { // project.setNumberOfCommitComments(project.getNumberOfCommitComments()+1); // // if (!project.getDbObject().containsField("commitCommentTimes")) { // project.getDbObject().put("commitCommentTimes", new BasicDBList()); // } // project.getCommitCommentTimes().add(obj.getString("created_at")); // // ProjectMembership pm = getProjectMembership(bio, user, project); // pm.setNumberOfCommitComments(pm.getNumberOfCommitComments()+1); // // if (!pm.getDbObject().containsField("commitCommentTimes")) { // pm.getDbObject().put("commitCommentTimes", new BasicDBList()); // } // pm.getCommitCommentTimes().add(obj.getString("created_at")); // } //// } // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // cursor.close(); // bio.sync(); // System.out.println(); // System.exit(0); //// //FIXME: THIS IS CAUSING THE CPU TO HIS 350% AND THEN KILLS THE LAPTOP?!?!?!?!? // #5 Pull requests System.out.println("Extracting pull requests..."); cursor = msrDb.getCollection("pull_requests").find(); cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); it = cursor.iterator(); System.out.println("Clearing previous data"); for (User u : bio.getUsers()) { if (!u.getDbObject().containsField("pullRequestTimes")) { u.getDbObject().put("pullRequestTimes", new BasicDBList()); } u.getPullRequestTimes().clear(); u.setNumberOfPullRequests(0); } bio.sync(); for (Project u : bio.getProjects()) { if (!u.getDbObject().containsField("pullRequestTimes")) { u.getDbObject().put("pullRequestTimes", new BasicDBList()); } u.getPullRequestTimes().clear(); u.setNumberOfPullRequests(0); } bio.sync(); for (ProjectMembership u : bio.getProjectMemberships()) { if (!u.getDbObject().containsField("pullRequestTimes")) { u.getDbObject().put("pullRequestTimes", new BasicDBList()); } u.getPullRequestTimes().clear(); u.setNumberOfPullRequests(0); } bio.sync(); System.out.println("Cleared!"); count = 0; while (it.hasNext()) { BasicDBObject obj = (BasicDBObject) it.next(); String username = getUserLoginName(bio, "user", "login", obj); User user = bio.getUsers().findOne(User.LOGIN.eq(username)); if (user == null) { // System.err.println("Found pull request with unrecognised user:" + username); continue; } if (!user.getDbObject().containsField("pullRequestTimes")) { user.getDbObject().put("pullRequestTimes", new BasicDBList()); } user.getPullRequestTimes().add(obj.getString("created_at")); user.setNumberOfPullRequests(user.getNumberOfPullRequests() + 1); // Project System.out.println(obj.getString("repo") + " " + obj.getString("owner") + obj.getString("_id")); ProjectMembership pm = getProjectMembership(bio, user.getLogin(), obj.getString("repo"), obj.getString("owner")); pm.setNumberOfPullRequests(pm.getNumberOfPullRequests() + 1); if (!pm.getDbObject().containsField("pullRequestTimes")) { pm.getDbObject().put("pullRequestTimes", new BasicDBList()); } pm.getPullRequestTimes().add(obj.getString("created_at")); // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator(); // if (repoIt.hasNext()) { // FIXME Causes it to run out of heap! // Project project = repoIt.next(); // if (project != null) { // project.setNumberOfPullRequests(project.getNumberOfPullRequests()+1); // // if (!project.getDbObject().containsField("pullRequestTimes")) { // project.getDbObject().put("pullRequestTimes", new BasicDBList()); // } // project.getPullRequestTimes().add(obj.getString("created_at")); // // } // } else { // System.err.println("Didn't find project:" + obj.getString("repo") + ":"+obj.getString("owner")); // } count++; if (count % 1000 == 0) { System.out.print(count + ", "); bio.sync(); System.gc(); } } bio.sync(); System.out.println(); System.exit(0); //// #6 Pull request comments // System.out.println("Extracting pull request comments..."); // cursor = msrDb.getCollection("pull_request_comments").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String username = getUserLoginName(bio, "user", "login", obj); // User user = bio.getUsers().findOne(User.LOGIN.eq(username)); // if (user == null) { //// System.err.println("Found pull request comment with unrecognised user:" + username); // continue; // } // // if (!user.getDbObject().containsField("pullRequestCommentTimes")) { // user.getDbObject().put("pullRequestCommentTimes", new BasicDBList()); // } // user.getPullRequestCommentTimes().add(obj.getString("created_at")); // user.setNumberOfPullRequestComments(user.getNumberOfPullRequestComments()+1); // // // Project // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator(); //// if (repoIt.hasNext()) { // Project project = repoIt.next(); // if (project != null) { // project.setNumberOfPullRequestComments(project.getNumberOfPullRequestComments()+1); // if (!project.getDbObject().containsField("pullRequestCommentTimes")) { // project.getDbObject().put("pullRequestCommentTimes", new BasicDBList()); // } // project.getPullRequestCommentTimes().add(obj.getString("created_at")); // // ProjectMembership pm = getProjectMembership(bio, user, project); // pm.setNumberOfPullRequestComments(pm.getNumberOfPullRequestComments()+1); // // if (!pm.getDbObject().containsField("pullRequestCommentTimes")) { // pm.getDbObject().put("pullRequestCommentTimes", new BasicDBList()); // } // pm.getPullRequestCommentTimes().add(obj.getString("created_at")); // } //// } // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); // System.exit(0); //// #7 Issues // System.out.println("Extracting issues..."); // cursor = msrDb.getCollection("issues").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String username = getUserLoginName(bio, "user", "login", obj); // User user = bio.getUsers().findOne(User.LOGIN.eq(username)); // if (user == null) { //// System.err.println("Found issue with unrecognised user:" + username); // continue; // } // // if (!user.getDbObject().containsField("issueTimes")) { // user.getDbObject().put("issueTimes", new BasicDBList()); // } // user.getIssueTimes().add(obj.getString("created_at")); // user.setNumberOfIssues(user.getNumberOfIssues()+1); // // // Project // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator(); // if (repoIt.hasNext()) { // Project project = repoIt.next(); // if (project != null) { // project.setNumberOfIssues(project.getNumberOfIssues()+1); // // if (!project.getDbObject().containsField("issueTimes")) { // project.getDbObject().put("issueTimes", new BasicDBList()); // } // project.getIssueTimes().add(obj.getString("created_at")); // // ProjectMembership pm = getProjectMembership(bio, user, project); // pm.setNumberOfIssues(pm.getNumberOfIssues()+1); // // if (!pm.getDbObject().containsField("issueTimes")) { // pm.getDbObject().put("issueTimes", new BasicDBList()); // } // pm.getIssueTimes().add(obj.getString("created_at")); // } // } // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); // System.exit(0); //// #8 Issue comments // System.out.println("Extracting issue comments..."); // cursor = msrDb.getCollection("issue_comments").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String username = getUserLoginName(bio, "user", "login", obj); // User user = bio.getUsers().findOne(User.LOGIN.eq(username)); // if (user == null) { //// System.err.println("Found issue comment with unrecognised user:" + username); // continue; // } // // if (!user.getDbObject().containsField("issueCommentTimes")) { // user.getDbObject().put("issueCommentTimes", new BasicDBList()); // } // user.getIssueCommentTimes().add(obj.getString("created_at")); // user.setNumberOfIssueComments(user.getNumberOfIssueComments()+1); // // // Project // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator(); // if (repoIt.hasNext()) { // Project project = repoIt.next(); // if (project != null) { // project.setNumberOfIssueComments(project.getNumberOfIssueComments()+1); // // if (!project.getDbObject().containsField("issueCommentTimes")) { // project.getDbObject().put("issueCommentTimes", new BasicDBList()); // } // project.getIssueCommentTimes().add(obj.getString("created_at")); // // ProjectMembership pm = getProjectMembership(bio, user, project); // pm.setNumberOfIssueComments(pm.getNumberOfIssueComments()+1); // // if (!pm.getDbObject().containsField("issueCommentTimes")) { // pm.getDbObject().put("issueCommentTimes", new BasicDBList()); // } // pm.getIssueCommentTimes().add(obj.getString("created_at")); // } // } // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); // System.exit(0); //// #9 Issue events // System.out.println("Extracting issue events..."); // cursor = msrDb.getCollection("issue_events").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String username = getUserLoginName(bio, "actor", "login", obj); // User user = bio.getUsers().findOne(User.LOGIN.eq(username)); // if (user == null) { //// System.err.println("Found issue event with unrecognised user:" + username); // continue; // } // // String eventKind = obj.getString("event"); // IssueEventKind kind = null; //FIXME // // switch (eventKind) { // case "closed": kind = IssueEventKind.CLOSED; break; // case "assigned": kind = IssueEventKind.ASSIGNED; break; // case "mentioned": kind = IssueEventKind.MENTIONED; break; // case "merged": kind = IssueEventKind.MERGED; break; // case "referenced": kind = IssueEventKind.REFERENCED; break; // case "reopened": kind = IssueEventKind.REOPENED; break; // case "subscribed": kind = IssueEventKind.SUBSCRIBED; break; // case "head_ref_deleted" : kind = IssueEventKind.HEAD_REF_DELETED; break; // case "head_ref_restored" : kind = IssueEventKind.HEAD_REF_RESTORED; break; // case "head_ref_cleaned" : kind = IssueEventKind.HEAD_REF_CLEANED; break; // case "unsubscribed" : kind = IssueEventKind.UNSUBSCRIBED; break; // default: // System.err.println("Unrecognised issue event kind: " + eventKind); // } // if (kind == null) continue; // // boolean eventKindFound = false; // // if (!user.getDbObject().containsField("issueEvents")) { // user.getDbObject().put("issueEvents", new BasicDBList()); // } // // for (IssueEvent ie : user.getIssueEvents()) { // if (ie.getEventKind().equals(kind)) { // ie.setCount(ie.getCount()+1); // eventKindFound = true; // break; // } // } // if (!eventKindFound) { // IssueEvent ie = new IssueEvent(); // ie.setEventKind(kind); // ie.setCount(1); // user.getIssueEvents().add(ie); // } // // // Project // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator(); // if (repoIt.hasNext()) { // Project project = repoIt.next(); // // if (!project.getDbObject().containsField("issueEvents")) { // project.getDbObject().put("issueEvents", new BasicDBList()); // } // // eventKindFound = false; // for (IssueEvent ie : project.getIssueEvents()) { // if (ie.getEventKind().equals(kind)) { // ie.setCount(ie.getCount()+1); // eventKindFound = true; // break; // } // } // if (!eventKindFound) { // IssueEvent ie = new IssueEvent(); // ie.setEventKind(kind); // ie.setCount(1); // project.getIssueEvents().add(ie); // } // // ProjectMembership pm = getProjectMembership(bio, user, project); // // if (!pm.getDbObject().containsField("issueEvents")) { // pm.getDbObject().put("issueEvents", new BasicDBList()); // } // // eventKindFound = false; // for (IssueEvent ie : pm.getIssueEvents()) { // if (ie.getEventKind().equals(kind)) { // ie.setCount(ie.getCount()+1); // eventKindFound = true; // break; // } // } // if (!eventKindFound) { // IssueEvent ie = new IssueEvent(); // ie.setEventKind(kind); // ie.setCount(1); // pm.getIssueEvents().add(ie); // } // } // // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); // System.exit(0); // //// Watchers // System.out.println("Extracting watchers..."); // cursor = msrDb.getCollection("watchers").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // User user = bio.getUsers().findOne(User.LOGIN.eq(obj.getString("login"))); // if (user == null) continue; // // Iterator<Project> repoIt = bio.getProjects().find(Project.NAME.eq(obj.getString("repo")), Project.OWNERNAME.eq(obj.getString("owner"))).iterator(); // if (repoIt.hasNext()) { // Project project = repoIt.next(); // if (project != null && !project.getWatchers().contains(user)) project.getWatchers().add(user); // if (!user.getWatches().contains(project)) user.getWatches().add(project); // } // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // System.out.println(); //// Org members FIXME: INCOMPLETE: Cannot match the org name against ANYTHING.... // System.out.println("Extracting org members..."); // cursor = msrDb.getCollection("org_members").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String login = obj.getString("login"); // String orgName = obj.getString("org"); // // User user = bio.getUsers().findOne(User.LOGIN.eq(login)); // // User org = bio.getUsers().findOne(User.LOGIN.eq(orgName)); // // if (org!=null){ // System.err.println("Found org! " + orgName); // // } // //// Project project = bio.getProjects().findOne(Project.OWNERNAME.eq("orgName")); //// if (project==null) { //// System.err.println("Didn't find project: " + orgName); //// continue; //// } //// ProjectMembership pm = getProjectMembership(bio, user, project); //// pm.setOrgMember(true); // } // bio.sync(); // System.out.println(); // Repo collaborators // System.out.println("Extracting repo collaborators..."); // cursor = msrDb.getCollection("repo_collaborators").find(); // cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); // it = cursor.iterator(); // // count = 0; // while(it.hasNext()){ // BasicDBObject obj = (BasicDBObject) it.next(); // // String login = obj.getString("login"); // String projectName = obj.getString("repo"); // String ownerName = obj.getString("owner"); // // User user = bio.getUsers().findOne(User.LOGIN.eq(login)); // Iterator<Project> repoIt = bio.getProjects().find(Project.OWNERNAME.eq(ownerName), Project.NAME.eq(projectName)).iterator(); // if (repoIt.hasNext()) { // Project project = repoIt.next(); // // ProjectMembership pm = getProjectMembership(bio, user, project); // pm.setCollaborator(true); // } else { // System.err.println("Couldn't find repo. owner: " + ownerName + ", repo: " + projectName); // } // count++; // if (count % 1000 == 0) { // System.out.print(count + ", "); // bio.sync(); // } // } // bio.sync(); // long end = System.currentTimeMillis(); System.out.println("Finished at " + new Date()); long duration = end - start; System.out.println("Duration: " + duration); }
From source file:recommender.Recommender.java
public Map<String, Double> recommend() { DBCursor cursor = db.findMatrixRows().addOption(Bytes.QUERYOPTION_NOTIMEOUT); results = new HashMap<DBObject, Double>(); while (cursor.hasNext()) { DBObject obj = cursor.next();//from ww w . j ava2 s. c om results.put(obj, computeSimilarity(obj)); } similarResults = getSimilarResults(results); String[] sources = { "twitter", "facebook" }; for (DBObject matrixRow : similarResults) { for (int i = 0; i < Event.NAMES.length; i++) { for (String source : sources) { boolean hasAttribute = (matrixRow.get("days_" + source + "_" + Event.NAMES[i]) != null); if (hasAttribute) { int attributeValue = (Integer) matrixRow.get("days_" + source + "_" + Event.NAMES[i]); Double eventWeight = (Double) matrixRow.get("event_score_" + Event.NAMES[i]); eventWeight = (eventWeight == null ? 0.5 : eventWeight); System.out.println(eventWeight); counters[i] += eventWeight; sums[i] += eventWeight * attributeValue; } } } } HashMap<String, Double> finalResult = new HashMap<String, Double>(); for (int i = 0; i < Event.NAMES.length; i++) { if (counters[i] > 0) { finalResult.put("days_" + Event.NAMES[i], ((double) sums[i]) / counters[i]); } } return sortByValue(finalResult, false); }
From source file:Rpackage.MongoFixPostCode.java
public static void main(String[] args) { YelpAPI yelpApi = new YelpAPI(); JSONParser parser = new JSONParser(); try {//w w w. ja v a 2 s . c o m // Connect to mongodb MongoClient mongo = new MongoClient("10.33.2.142", 27017); // get database // if database doesn't exists, mongodb will create it for you DB db = mongo.getDB("test"); // get collection // if collection doesn't exists, mongodb will create it for you DBCollection collection = db.getCollection("twitter_Stream"); DBCursor cursor; BasicDBObject query; //------------------------------------ // ( 1 ) collection.find() --> get all document DBObject clause1 = new BasicDBObject("Latitude", new BasicDBObject("$exists", true)); DBObject clause2 = new BasicDBObject("tweet_Country_Code", new BasicDBObject("$exists", true)); DBObject clause3 = new BasicDBObject("R_Post_Code", new BasicDBObject("$exists", false)); /*{ "$and":[ { "tweet_Country_Code":{ "$exists":true } }, { "Latitude":{ "$exists":false } }, { "tweet_Country_Code":"AU" } ] } */ BasicDBList or = new BasicDBList(); or.add(clause1); or.add(clause2); or.add(clause3); query = new BasicDBObject("$and", or); //{ "$and":[ { "Latitude":{ "$exists":true } }, { "R_Post_Code":{ "$exists":false } } ]} cursor = collection.find(query).addOption(Bytes.QUERYOPTION_NOTIMEOUT) // .addOption(Bytes.QUERYOPTION_AWAITDATA); System.out.println("( 1 ) .get the user id within latitide"); System.out.println("results --> " + cursor.count()); FoursquareAPI_backup qui4squreApi = new FoursquareAPI_backup(); try { while (cursor.hasNext()) { DBObject data = cursor.next(); String v_user_Name = (String) data.get("user_name"); Long v_twitte_id = (Long) data.get("tweet_ID"); String v_twitte_text = (String) data.get("tweet_text"); Long v_user_Id = (Long) data.get("user_Id"); Double v_Latitude = (Double) data.get("Latitude"); Double v_Longtitude = (Double) data.get("Longitude"); String v_tweet_Place_Name = (String) data.get("tweet_Place_Name"); String v_tweet_Country_Code = (String) data.get("tweet_Country_Code"); if (v_user_Id == null) { /* System.out.print("update:" +v_user_Name+"/status/"+ v_twitte_id); try{ Status status = twitter.showStatus(v_twitte_id); BasicDBObject jo = GetMongoRecord(status); System.out.println( "-->" + status.getId() + " : " + jo.getString("Re_user_screenName") + ":" + jo.getString("tweet_text")); collection.update(new BasicDBObject("tweet_ID", v_twitte_id), jo); // set the document in the DB to the new document for Jo }catch (TwitterException ex){ if (ex.getStatusCode()==144) continue; } */ continue; } JSONObject businesses = yelpApi.search4Yelp("city", v_Latitude, v_Longtitude);//-27.497835,153.017472); boolean searchAgain = false; if (businesses == null) searchAgain = true; else if (businesses.size() < 1) searchAgain = true; if (searchAgain) { System.out.println("La:" + v_Latitude + "\tLo:" + v_Longtitude); businesses = qui4squreApi.search4Square("city", v_Latitude, v_Longtitude); searchAgain = false; } if (businesses == null) searchAgain = true; else if (businesses.size() < 1) searchAgain = true; if (searchAgain) { businesses = qui4squreApi.searchGoogleMap("city", v_Latitude, v_Longtitude); if (businesses == null) { System.out.println("\t" + v_tweet_Country_Code + "\t:" + v_tweet_Place_Name); continue; } else if (businesses.size() < 1) { System.out.println("\t" + v_tweet_Country_Code + "\t:" + v_tweet_Place_Name); continue; } } String country_code = (String) businesses.get("country_code"); String city_code = (String) businesses.get("city_name"); String state_code = (String) businesses.get("state_code"); String post_code = (String) businesses.get("post_code"); BasicDBObject setNewFieldQuery = new BasicDBObject().append("$set", new BasicDBObject().append("R_Country_Code", country_code) .append("R_State_Code", state_code).append("R_City", city_code) .append("R_Post_Code", post_code)); collection.update(new BasicDBObject().append("tweet_ID", v_twitte_id), setNewFieldQuery); // set the document in the DB to the new document for Jo setNewFieldQuery.clear(); setNewFieldQuery = null; } } finally { cursor.close(); } System.out.println("---------------------------------"); } catch (UnknownHostException e) { e.printStackTrace(); } catch (MongoException e) { e.printStackTrace(); } }