Example usage for com.mongodb BasicDBObject copy

List of usage examples for com.mongodb BasicDBObject copy

Introduction

In this page you can find the example usage for com.mongodb BasicDBObject copy.

Prototype

public Object copy() 

Source Link

Document

Creates a new instance which is a copy of this BasicDBObject.

Usage

From source file:fr.eolya.crawler.queue.mongodb.MongoDBSourceQueue.java

License:Apache License

public Map<String, Object> pop() {
    DBCursor cur = null;/*from   w  ww .j ava 2s. c om*/
    String query = getQuery(test, interactiveOnly, suspiciousOnly, accountId, sourceId, engineId);
    query = String.format(
            "{\"$and\": [{\"$or\": [{\"_poped\": { \"$exists\": false }},{\"_poped\": false}]}, %1$s]}", query);

    BasicDBObject docsearch = MongoDBHelper.JSON2BasicDBObject(query);

    synchronized (collMonitor) {
        // TODO : sort by priority and next crawl date
        cur = coll.getColl().find(docsearch)
                .sort(new BasicDBObject("crawl_priority", -1).append("crawl_nexttime", 1));

        if (cur.hasNext()) {
            BasicDBObject doc = (BasicDBObject) cur.next();
            BasicDBObject doc2 = (BasicDBObject) doc.copy();
            doc2.put("_poped", true);
            coll.update(doc, doc2);
            //return doc.toMap();
            return MongoDBHelper.BasicDBObject2Map(doc);
            //return doc.toString();
        }
    }
    return null;
}

From source file:fr.eolya.crawler.queue.mongodb.MongoDBSourceQueue.java

License:Apache License

public void unpop(int id) {
    DBCursor cur = null;/*from   w w  w . j  a  va 2  s .co  m*/
    String query = String.format("{\"id\": %1$s}", id);
    BasicDBObject docsearch = MongoDBHelper.JSON2BasicDBObject(query);

    synchronized (collMonitor) {
        cur = coll.getColl().find(docsearch);
        if (cur.hasNext()) {
            BasicDBObject doc = (BasicDBObject) cur.next();
            BasicDBObject doc2 = (BasicDBObject) doc.copy();
            doc2.put("_poped", false);
            coll.update(doc, doc2);
        }
    }
}

From source file:uk.ac.soton.itinnovation.sad.plugins.basicstats.Main.java

License:Open Source License

public void run(String[] args) {

    long startTime = System.currentTimeMillis();

    // Always get Plugins helper first
    PluginsHelper ph = new PluginsHelper(args);

    // Retrieve argument "num_posts"
    String num_posts = ph.getArgumentValue("num_posts");
    if (num_posts == null) {
        ph.dealWithException("ERROR: Failed to get argument value for 'num_posts'", null);
    }//from w w w.j av  a 2s. c  o m

    logger.debug("Running with: num_posts='" + num_posts);

    // Get Twitter input data
    ArrayList<JSONObject> twitterInputData = ph.getInputData("twitter-static-search-raw", num_posts);

    // Report the number of tweets submitted to ECC
    ph.sendMetric(PluginEccClient.ENTITY_SocialNetworksDataGroup, PluginEccClient.ATTRIBUTE_TWEETS_ANALYSED,
            Integer.toString(twitterInputData.size()));

    // Check requested output types
    ArrayList<String> requestedOutputTypes = ph.getRequestedOutputTypes();

    if (twitterInputData.isEmpty()) {
        logger.debug("No Twitter input data found (of type 'twitter-static-search-raw')");
    } else {

        // Do Twitter analysis
        int[] analysisResults = doTwitterAnalysis(twitterInputData);

        // Save data as requested
        JSONObject dataAsJson;
        Timestamp whenCreated = new Timestamp(System.currentTimeMillis());
        if (requestedOutputTypes.contains("twitter-basic-stats")) {
            logger.debug("Saving data of type 'twitter-basic-stats'");
            dataAsJson = new JSONObject();
            dataAsJson.put("unique_tweets", analysisResults[0]);
            dataAsJson.put("unique_users", analysisResults[1]);
            dataAsJson.put("unique_languages", analysisResults[2]);
            dataAsJson.put("unique_sources", analysisResults[3]);
            ph.saveData("twitter-basic-stats", dataAsJson, whenCreated);
        }
        if (requestedOutputTypes.contains("twitter-static-search-raw")) {
            logger.debug("Saving data of type 'twitter-static-search-raw'");
            whenCreated = new Timestamp(System.currentTimeMillis());
            for (JSONObject tweet : twitterInputData) {
                if (tweetIdsHolder.contains(tweet.getString("id"))) {
                    ph.saveData("twitter-static-search-raw", tweet, whenCreated);
                }
            }
        }
    }

    // Get Facebook input data
    ArrayList<JSONObject> facebookInputData = ph.getInputData("facebook-posts-raw", num_posts);

    // Report the number of Facebook messages submitted to ECC
    ph.sendMetric(PluginEccClient.ENTITY_SocialNetworksDataGroup, PluginEccClient.ATTRIBUTE_MESSAGES_ANALYSED,
            Integer.toString(facebookInputData.size()));

    if (facebookInputData.isEmpty()) {
        logger.debug("No Facebook input data found (of type 'facebook-posts-raw')");
    } else {

        // Do Facebook analysis
        int[] analysisResults = doFacebookAnalysis(facebookInputData);

        // Save data as requested
        JSONObject dataAsJson;
        Timestamp whenCreated = new Timestamp(System.currentTimeMillis());
        if (requestedOutputTypes.contains("facebook-basic-stats")) {
            logger.debug("Saving data of type 'facebook-basic-stats'");
            dataAsJson = new JSONObject();
            dataAsJson.put("unique_posts", analysisResults[0]);
            dataAsJson.put("unique_users", analysisResults[1]);
            ph.saveData("facebook-basic-stats", dataAsJson, whenCreated);
        }
        if (requestedOutputTypes.contains("facebook-posts-raw")) {
            logger.debug("Saving data of type 'facebook-posts-raw'");
            whenCreated = new Timestamp(System.currentTimeMillis());
            for (JSONObject post : facebookInputData) {
                if (fbPostsIdsHolder.contains(post.getString("id"))) {
                    ph.saveData("facebook-posts-raw", post, whenCreated);
                }
            }
        }
    }

    // Searching database for various things
    DBCollection collection = ph.getDataCollection(); // database collection with all the data
    BasicDBObject inputDataQuery = ph.getInputDataQuery(); // query that limits data to input jobs/plugins
    logger.debug("Total posts in input: " + collection.find(inputDataQuery).count());

    // Count number of tweets in input data
    BasicDBObject tweetsQuery = (BasicDBObject) inputDataQuery.copy();
    tweetsQuery.append("dataType", "twitter-static-search-raw"); // initial query expanded to limit data by dataType
    logger.debug("Tweets: " + collection.find(tweetsQuery).count());

    // Count number of Facebook posts in input data
    BasicDBObject fbPostsQuery = (BasicDBObject) inputDataQuery.copy();
    fbPostsQuery.append("dataType", "facebook-posts-raw");
    logger.debug("FB posts: " + collection.find(fbPostsQuery).count());

    // Count number of tweets with 'lang' = 'en' (Facebook posts don't have that field) in input data
    BasicDBObject englishTweetsQuery = (BasicDBObject) inputDataQuery.copy();
    englishTweetsQuery.append("jsonData.lang", "en");
    logger.debug("Tweets in English: " + collection.find(englishTweetsQuery).count());

    // Find media files in tweets and save URLs with text
    tweetsQuery.append("jsonData.entities.media", new BasicDBObject("$exists", true));
    BasicDBObject keysToReturn = new BasicDBObject();
    keysToReturn.append("jsonData.text", 1);
    keysToReturn.append("jsonData.entities.media.media_url", 1);

    int mlFilesCounter = 0;
    DBCursor cursor = collection.find(tweetsQuery, keysToReturn);
    JSONObject next;
    JSONObject dataAsJson;
    String text, media_url;
    Timestamp whenCreated = new Timestamp(System.currentTimeMillis());
    while (cursor.hasNext()) {
        next = JSONObject.fromObject(cursor.next().toString());
        text = next.getJSONObject("jsonData").getString("text");
        media_url = next.getJSONObject("jsonData").getJSONObject("entities").getJSONArray("media")
                .getJSONObject(0).getString("media_url");
        logger.debug(media_url + " (" + text + ")");
        if (requestedOutputTypes.contains("media-links-with-descriptions")) {
            logger.debug("Saving data of type 'media-links-with-descriptions'");
            dataAsJson = new JSONObject();
            dataAsJson.put("text", text);
            dataAsJson.put("media_url", media_url);
            ph.saveData("media-links-with-descriptions", dataAsJson, whenCreated);
            mlFilesCounter++;
        }
    }

    // Find media files in Facebook posts and save URLs with text
    fbPostsQuery.append("jsonData.picture", new BasicDBObject("$exists", true));
    keysToReturn = new BasicDBObject();
    keysToReturn.append("jsonData.message", 1);
    keysToReturn.append("jsonData.story", 1);
    keysToReturn.append("jsonData.name", 1);
    keysToReturn.append("jsonData.picture", 1);

    whenCreated = new Timestamp(System.currentTimeMillis());
    cursor = collection.find(fbPostsQuery, keysToReturn);
    while (cursor.hasNext()) {
        next = JSONObject.fromObject(cursor.next().toString());
        if (next.getJSONObject("jsonData").containsKey("message")) {
            text = next.getJSONObject("jsonData").getString("message");
        } else {
            if (next.getJSONObject("jsonData").containsKey("name")) {
                text = next.getJSONObject("jsonData").getString("name");
            } else {
                if (next.getJSONObject("jsonData").containsKey("story")) {
                    text = next.getJSONObject("jsonData").getString("story");
                } else {
                    text = "no description";
                }
            }
        }
        media_url = next.getJSONObject("jsonData").getString("picture");
        logger.debug(media_url + " (" + text + ")");
        if (requestedOutputTypes.contains("media-links-with-descriptions")) {
            logger.debug("Saving data of type 'media-links-with-descriptions'");
            dataAsJson = new JSONObject();
            dataAsJson.put("text", text);
            dataAsJson.put("media_url", media_url);
            ph.saveData("media-links-with-descriptions", dataAsJson, whenCreated);
            mlFilesCounter++;
        }
    }

    // Report number of files with media links
    ph.sendMetric(PluginEccClient.ENTITY_SocialNetworksDataGroup, PluginEccClient.ATTRIBUTE_MEDIA_LINKS,
            Integer.toString(mlFilesCounter));

    // Create new attribute to record requested output data types, report to ECC
    // If the attribute already exists, it will not be created
    String newAttribute = "Requested output data types";
    ph.addAttribute(PluginEccClient.ENTITY_PLUGINS, newAttribute, "Output data types", "nominal", "Data type");
    for (String dataType : requestedOutputTypes) {
        ph.sendMetric(PluginEccClient.ENTITY_PLUGINS, newAttribute, dataType);
    }

    // Report duration
    ph.sendMetric(PluginEccClient.ENTITY_PLUGINS, PluginEccClient.ATTRIBUTE_EXECUTION_DURATION,
            Long.toString(System.currentTimeMillis() - startTime));

    // Report success - failure reported automatically
    ph.reportExecutionSuccess();
    System.exit(0);
}