List of usage examples for com.mongodb BasicDBObject copy
public Object copy()
From source file:fr.eolya.crawler.queue.mongodb.MongoDBSourceQueue.java
License:Apache License
public Map<String, Object> pop() { DBCursor cur = null;/*from w ww .j ava 2s. c om*/ String query = getQuery(test, interactiveOnly, suspiciousOnly, accountId, sourceId, engineId); query = String.format( "{\"$and\": [{\"$or\": [{\"_poped\": { \"$exists\": false }},{\"_poped\": false}]}, %1$s]}", query); BasicDBObject docsearch = MongoDBHelper.JSON2BasicDBObject(query); synchronized (collMonitor) { // TODO : sort by priority and next crawl date cur = coll.getColl().find(docsearch) .sort(new BasicDBObject("crawl_priority", -1).append("crawl_nexttime", 1)); if (cur.hasNext()) { BasicDBObject doc = (BasicDBObject) cur.next(); BasicDBObject doc2 = (BasicDBObject) doc.copy(); doc2.put("_poped", true); coll.update(doc, doc2); //return doc.toMap(); return MongoDBHelper.BasicDBObject2Map(doc); //return doc.toString(); } } return null; }
From source file:fr.eolya.crawler.queue.mongodb.MongoDBSourceQueue.java
License:Apache License
public void unpop(int id) { DBCursor cur = null;/*from w w w . j a va 2 s .co m*/ String query = String.format("{\"id\": %1$s}", id); BasicDBObject docsearch = MongoDBHelper.JSON2BasicDBObject(query); synchronized (collMonitor) { cur = coll.getColl().find(docsearch); if (cur.hasNext()) { BasicDBObject doc = (BasicDBObject) cur.next(); BasicDBObject doc2 = (BasicDBObject) doc.copy(); doc2.put("_poped", false); coll.update(doc, doc2); } } }
From source file:uk.ac.soton.itinnovation.sad.plugins.basicstats.Main.java
License:Open Source License
public void run(String[] args) { long startTime = System.currentTimeMillis(); // Always get Plugins helper first PluginsHelper ph = new PluginsHelper(args); // Retrieve argument "num_posts" String num_posts = ph.getArgumentValue("num_posts"); if (num_posts == null) { ph.dealWithException("ERROR: Failed to get argument value for 'num_posts'", null); }//from w w w.j av a 2s. c o m logger.debug("Running with: num_posts='" + num_posts); // Get Twitter input data ArrayList<JSONObject> twitterInputData = ph.getInputData("twitter-static-search-raw", num_posts); // Report the number of tweets submitted to ECC ph.sendMetric(PluginEccClient.ENTITY_SocialNetworksDataGroup, PluginEccClient.ATTRIBUTE_TWEETS_ANALYSED, Integer.toString(twitterInputData.size())); // Check requested output types ArrayList<String> requestedOutputTypes = ph.getRequestedOutputTypes(); if (twitterInputData.isEmpty()) { logger.debug("No Twitter input data found (of type 'twitter-static-search-raw')"); } else { // Do Twitter analysis int[] analysisResults = doTwitterAnalysis(twitterInputData); // Save data as requested JSONObject dataAsJson; Timestamp whenCreated = new Timestamp(System.currentTimeMillis()); if (requestedOutputTypes.contains("twitter-basic-stats")) { logger.debug("Saving data of type 'twitter-basic-stats'"); dataAsJson = new JSONObject(); dataAsJson.put("unique_tweets", analysisResults[0]); dataAsJson.put("unique_users", analysisResults[1]); dataAsJson.put("unique_languages", analysisResults[2]); dataAsJson.put("unique_sources", analysisResults[3]); ph.saveData("twitter-basic-stats", dataAsJson, whenCreated); } if (requestedOutputTypes.contains("twitter-static-search-raw")) { logger.debug("Saving data of type 'twitter-static-search-raw'"); whenCreated = new Timestamp(System.currentTimeMillis()); for (JSONObject tweet : twitterInputData) { if (tweetIdsHolder.contains(tweet.getString("id"))) { ph.saveData("twitter-static-search-raw", tweet, whenCreated); } } } } // Get Facebook input data ArrayList<JSONObject> facebookInputData = ph.getInputData("facebook-posts-raw", num_posts); // Report the number of Facebook messages submitted to ECC ph.sendMetric(PluginEccClient.ENTITY_SocialNetworksDataGroup, PluginEccClient.ATTRIBUTE_MESSAGES_ANALYSED, Integer.toString(facebookInputData.size())); if (facebookInputData.isEmpty()) { logger.debug("No Facebook input data found (of type 'facebook-posts-raw')"); } else { // Do Facebook analysis int[] analysisResults = doFacebookAnalysis(facebookInputData); // Save data as requested JSONObject dataAsJson; Timestamp whenCreated = new Timestamp(System.currentTimeMillis()); if (requestedOutputTypes.contains("facebook-basic-stats")) { logger.debug("Saving data of type 'facebook-basic-stats'"); dataAsJson = new JSONObject(); dataAsJson.put("unique_posts", analysisResults[0]); dataAsJson.put("unique_users", analysisResults[1]); ph.saveData("facebook-basic-stats", dataAsJson, whenCreated); } if (requestedOutputTypes.contains("facebook-posts-raw")) { logger.debug("Saving data of type 'facebook-posts-raw'"); whenCreated = new Timestamp(System.currentTimeMillis()); for (JSONObject post : facebookInputData) { if (fbPostsIdsHolder.contains(post.getString("id"))) { ph.saveData("facebook-posts-raw", post, whenCreated); } } } } // Searching database for various things DBCollection collection = ph.getDataCollection(); // database collection with all the data BasicDBObject inputDataQuery = ph.getInputDataQuery(); // query that limits data to input jobs/plugins logger.debug("Total posts in input: " + collection.find(inputDataQuery).count()); // Count number of tweets in input data BasicDBObject tweetsQuery = (BasicDBObject) inputDataQuery.copy(); tweetsQuery.append("dataType", "twitter-static-search-raw"); // initial query expanded to limit data by dataType logger.debug("Tweets: " + collection.find(tweetsQuery).count()); // Count number of Facebook posts in input data BasicDBObject fbPostsQuery = (BasicDBObject) inputDataQuery.copy(); fbPostsQuery.append("dataType", "facebook-posts-raw"); logger.debug("FB posts: " + collection.find(fbPostsQuery).count()); // Count number of tweets with 'lang' = 'en' (Facebook posts don't have that field) in input data BasicDBObject englishTweetsQuery = (BasicDBObject) inputDataQuery.copy(); englishTweetsQuery.append("jsonData.lang", "en"); logger.debug("Tweets in English: " + collection.find(englishTweetsQuery).count()); // Find media files in tweets and save URLs with text tweetsQuery.append("jsonData.entities.media", new BasicDBObject("$exists", true)); BasicDBObject keysToReturn = new BasicDBObject(); keysToReturn.append("jsonData.text", 1); keysToReturn.append("jsonData.entities.media.media_url", 1); int mlFilesCounter = 0; DBCursor cursor = collection.find(tweetsQuery, keysToReturn); JSONObject next; JSONObject dataAsJson; String text, media_url; Timestamp whenCreated = new Timestamp(System.currentTimeMillis()); while (cursor.hasNext()) { next = JSONObject.fromObject(cursor.next().toString()); text = next.getJSONObject("jsonData").getString("text"); media_url = next.getJSONObject("jsonData").getJSONObject("entities").getJSONArray("media") .getJSONObject(0).getString("media_url"); logger.debug(media_url + " (" + text + ")"); if (requestedOutputTypes.contains("media-links-with-descriptions")) { logger.debug("Saving data of type 'media-links-with-descriptions'"); dataAsJson = new JSONObject(); dataAsJson.put("text", text); dataAsJson.put("media_url", media_url); ph.saveData("media-links-with-descriptions", dataAsJson, whenCreated); mlFilesCounter++; } } // Find media files in Facebook posts and save URLs with text fbPostsQuery.append("jsonData.picture", new BasicDBObject("$exists", true)); keysToReturn = new BasicDBObject(); keysToReturn.append("jsonData.message", 1); keysToReturn.append("jsonData.story", 1); keysToReturn.append("jsonData.name", 1); keysToReturn.append("jsonData.picture", 1); whenCreated = new Timestamp(System.currentTimeMillis()); cursor = collection.find(fbPostsQuery, keysToReturn); while (cursor.hasNext()) { next = JSONObject.fromObject(cursor.next().toString()); if (next.getJSONObject("jsonData").containsKey("message")) { text = next.getJSONObject("jsonData").getString("message"); } else { if (next.getJSONObject("jsonData").containsKey("name")) { text = next.getJSONObject("jsonData").getString("name"); } else { if (next.getJSONObject("jsonData").containsKey("story")) { text = next.getJSONObject("jsonData").getString("story"); } else { text = "no description"; } } } media_url = next.getJSONObject("jsonData").getString("picture"); logger.debug(media_url + " (" + text + ")"); if (requestedOutputTypes.contains("media-links-with-descriptions")) { logger.debug("Saving data of type 'media-links-with-descriptions'"); dataAsJson = new JSONObject(); dataAsJson.put("text", text); dataAsJson.put("media_url", media_url); ph.saveData("media-links-with-descriptions", dataAsJson, whenCreated); mlFilesCounter++; } } // Report number of files with media links ph.sendMetric(PluginEccClient.ENTITY_SocialNetworksDataGroup, PluginEccClient.ATTRIBUTE_MEDIA_LINKS, Integer.toString(mlFilesCounter)); // Create new attribute to record requested output data types, report to ECC // If the attribute already exists, it will not be created String newAttribute = "Requested output data types"; ph.addAttribute(PluginEccClient.ENTITY_PLUGINS, newAttribute, "Output data types", "nominal", "Data type"); for (String dataType : requestedOutputTypes) { ph.sendMetric(PluginEccClient.ENTITY_PLUGINS, newAttribute, dataType); } // Report duration ph.sendMetric(PluginEccClient.ENTITY_PLUGINS, PluginEccClient.ATTRIBUTE_EXECUTION_DURATION, Long.toString(System.currentTimeMillis() - startTime)); // Report success - failure reported automatically ph.reportExecutionSuccess(); System.exit(0); }