Source code

Java tutorial


Here is the source code for


 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
package datapreparation;

import com.mongodb.AggregationOptions;
import com.mongodb.BasicDBObject;
import com.mongodb.Cursor;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import java.util.Arrays;
import java.util.List;

 * @author Gregory
public class MongoStatistics {
    public void usersOfUrl(String URL) {
        // TODO code application logic here

        // TODO code application logic here
        int limit = 0;
        String filename = "Users_Of_Url_" + URL + ".txt";

        // To directly connect to a single MongoDB server (note that this will not auto-discover the primary even
        MongoClient mongoClient;

        try {
            mongoClient = new MongoClient("localhost");

            //use database
            DB db = mongoClient.getDB("users");

            //get collection
            DBCollection coll = db.getCollection("urls");

            // build the $projection operation
            //            DBObject fields = new BasicDBObject("user", 1);
            //            fields.put("_id", 0);
            //            BasicDBObject project = new BasicDBObject("$project", fields);

            //build the match operation
            DBObject matchFields = new BasicDBObject("url", URL);
            DBObject match = new BasicDBObject("$match", matchFields);

            // Now the $group operation
            DBObject groupFields = new BasicDBObject("_id", "$user");
            groupFields.put("count", new BasicDBObject("$sum", 1));
            DBObject group = new BasicDBObject("$group", groupFields);

            // Finally the $sort operation
            BasicDBObject sort = new BasicDBObject("$sort", new BasicDBObject("count", -1));

            // run aggregation
            List<DBObject> pipeline;
            if (limit == 0) {// without limits!
                pipeline = Arrays.asList(match, group, sort);
            } else {
                // create new BasicDBObject that limit query result in only 100 rows
                DBObject limitRes = new BasicDBObject("$limit", limit);
                pipeline = Arrays.asList(match, group, sort, limitRes);
            AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100)

            Cursor cursor = coll.aggregate(pipeline, aggregationOptions);

            writeToFile(cursor, filename, "User\t Count");


        } catch (IOException ex) {
            System.out.println("Something's Wrong! " + ex);

    public void topUrls() {
        // TODO code application logic here
        int limit = 0;
        String filename = "Top_Urls_More.txt";
        // To directly connect to a single MongoDB server (note that this will not auto-discover the primary even
        MongoClient mongoClient;

        try {
            mongoClient = new MongoClient("localhost");

            //use database
            DB db = mongoClient.getDB("users");

            //get collection
            DBCollection coll = db.getCollection("urls");

            // build the $projection operation
            DBObject fields = new BasicDBObject("url", 1);
            fields.put("_id", 0);
            BasicDBObject project = new BasicDBObject("$project", fields);

            // Now the $group operation
            DBObject groupFields = new BasicDBObject("_id", "$url");
            groupFields.put("count", new BasicDBObject("$sum", 1));
            DBObject group = new BasicDBObject("$group", groupFields);

            // Finally the $sort operation
            BasicDBObject sort = new BasicDBObject("$sort", new BasicDBObject("count", -1));

            // run aggregation
            List<DBObject> pipeline;
            if (limit == 0) {// without limits!
                pipeline = Arrays.asList(project, group, sort);
            } else {
                // create new BasicDBObject that limit query result in only 100 rows
                DBObject limitRes = new BasicDBObject("$limit", limit);
                pipeline = Arrays.asList(project, group, sort, limitRes);
            AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100)

            Cursor cursor = coll.aggregate(pipeline, aggregationOptions);

            writeToFile2(cursor, filename, "URL\t Count");


        } catch (IOException ex) {
            System.out.println("Something's Wrong! " + ex);

    public void timeIntervals() {
        // TODO code application logic here
        int limit = 0;
        String filename = "Times.txt";
        // To directly connect to a single MongoDB server (note that this will not auto-discover the primary even
        MongoClient mongoClient;

        try {
            mongoClient = new MongoClient("localhost");

            //use database
            DB db = mongoClient.getDB("users");

            //get collection
            DBCollection coll = db.getCollection("urls");

            // build the $projection operation
            DBObject fields = new BasicDBObject("time", 1);
            fields.put("_id", 0);
            BasicDBObject project = new BasicDBObject("$project", fields);

            // Now the $group operation
            DBObject groupFields = new BasicDBObject("_id", "$time");
            //groupFields.put("count", new BasicDBObject("$sum", 1));
            DBObject group = new BasicDBObject("$group", groupFields);

            // Finally the $sort operation
            //BasicDBObject sort = new BasicDBObject("$sort", new BasicDBObject("count", -1));

            // run aggregation
            List<DBObject> pipeline;
            if (limit == 0) {// without limits!
                pipeline = Arrays.asList(project, group);
            } else {
                // create new BasicDBObject that limit query result in only 100 rows
                DBObject limitRes = new BasicDBObject("$limit", limit);
                pipeline = Arrays.asList(project, group, limitRes);
            AggregationOptions aggregationOptions = AggregationOptions.builder().batchSize(100)

            Cursor cursor = coll.aggregate(pipeline, aggregationOptions);

            writeToFile3(cursor, filename, "Times");


        } catch (IOException ex) {
            System.out.println("Something's Wrong! " + ex);

    public void usersToUrls() {
        // To directly connect to a single MongoDB server (note that this will not auto-discover the primary even
        MongoClient mongoClient;

        try {
            mongoClient = new MongoClient("localhost");

            //use database
            DB db = mongoClient.getDB("users");

            //get collection
            DBCollection coll = db.getCollection("urls");

            //iterate with a cursor
            BasicDBObject query = new BasicDBObject("source", new BasicDBObject("$exists", true));

            DBCursor cursor = coll.find(query);

            BufferedWriter writer = null;
            try {
                writer = new BufferedWriter(new FileWriter("Users_To_Urls.txt"));

                while (cursor.hasNext()) {
                    BasicDBObject tweet = (BasicDBObject);

                    String time = tweet.get("created_at").toString();
                    String user = tweet.get("from_user").toString();
                    String urls[] = tweet.get("source").toString().replaceAll("&quot", "").split(";");

                    for (String url : urls) {
                        if (url.matches("http.*")) {
                            //The user posted one link, write it in the file!
                            writer.write(url + "," + user + "," + time + "\n");
            } finally {
        } catch (IOException ex) {
            System.out.println("Something's Wrong! " + ex);

    private static void writeToFile(Cursor cursor, String filename, String titleline) {
        PrintWriter writer;
        try {
            writer = new PrintWriter(filename);
            while (cursor.hasNext()) {
                DBObject old_temp =;
                String user = old_temp.get("_id").toString();
                String count = old_temp.get("count").toString();
                writer.println(user + "\t" + count);
            System.out.println("File created successfully!");
        } catch (FileNotFoundException ex) {
            System.out.println("Could not open file! " + ex);

    private static void writeToFile2(Cursor cursor, String filename, String titleline) {
        PrintWriter writer;
        try {
            writer = new PrintWriter(filename);
            while (cursor.hasNext()) {
                DBObject old_temp =;
                String url = old_temp.get("_id").toString();
                String count = old_temp.get("count").toString();
                writer.println(url + "\t" + count);
            System.out.println("File created successfully!");
        } catch (FileNotFoundException ex) {
            System.out.println("Could not open file! " + ex);

    private static void writeToFile3(Cursor cursor, String filename, String titleline) {
        PrintWriter writer;
        try {
            writer = new PrintWriter(filename);
            while (cursor.hasNext()) {
                DBObject old_temp =;
                String time = old_temp.get("_id").toString();
                //String count = old_temp.get("count").toString();
            System.out.println("File created successfully!");
        } catch (FileNotFoundException ex) {
            System.out.println("Could not open file! " + ex);