connector.DBConnector.java Source code

Java tutorial

Introduction

Here is the source code for connector.DBConnector.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package connector;

import com.mongodb.BasicDBObject;
import com.mongodb.Block;
import com.mongodb.MongoClient;
import com.mongodb.client.AggregateIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Accumulators;
import com.mongodb.client.model.Aggregates;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Indexes;
import com.mongodb.client.model.Projections;
import com.mongodb.client.model.Sorts;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.bson.BsonValue;
import org.bson.Document;

/**
 *
 * @author abj
 */
public class DBConnector {

    private static String host = "localhost";
    private static int port = 27017;
    private static MongoDatabase database;

    public static void StartConnection() {
        try {
            // To connect to mongodb server
            MongoClient mongoClient = new MongoClient(host, port);
            database = mongoClient.getDatabase("test");
            MongoCollection<Document> coll = database.getCollection("tweets");
            coll.createIndex(Indexes.text("text"));
            System.out.println("Connect to database successfully : " + database.getName());
        } catch (Exception e) {
            System.err.println(e.getClass().getName() + ": " + e.getMessage());
        }
    }

    public static int getNumberOfTwitterUsers() {

        MongoCollection<Document> coll = database.getCollection("tweets");

        /*
        Well, the root cause of the error here is because you have a String type as expected output and one of the distinct values is actually null.
         */
        //Or accept the results as BsonValue and handle those:
        ArrayList<BsonValue> distinctUsers = coll.distinct("user", BsonValue.class).into(new ArrayList<>());

        /*
        But in the latter case, you still need to handle the types returned. 
        There are methods on BsonValue to allow you to code for this, but it is also a fair bit of overkill for just getting a list of distinct values.
         */
        System.out.println("Collection size: " + coll.count());
        System.out.println("Unique users: " + distinctUsers.size());

        return distinctUsers.size();

    }

    //Which Twitter users link the most to other Twitter users? (Provide the top ten.)
    public static List<Object> tUsers() {
        MongoCollection<Document> coll = database.getCollection("tweets");
        AggregateIterable<Document> output = coll.aggregate(Arrays.asList(
                new Document("$match", new Document("text", new Document("$regex", ".*@.*"))),
                new Document("$group",
                        new Document("_id", new Document("user", "$user").append("tweet_id", "$id"))),
                new Document("$group",
                        new Document("_id", "$_id.user").append("tweet_count", new Document("$sum", 1))),
                new Document("$project", new Document("_id", 0).append("user", "$_id").append("tweet_count", 1)),
                new Document("$sort", new Document("tweet_count", -1)), new Document("$limit", 10)))
                .allowDiskUse(Boolean.TRUE);

        List<Object> topUsers = new ArrayList<>();

        for (Document dbObject : output) {
            topUsers.add(dbObject.get("user") + ", " + dbObject.get("tweet_count"));
        }

        return topUsers;
    }

    //Who is are the most mentioned Twitter users? (Provide the top five.)

    //String pattern = "/@\\w+\\/";

    //        /////// Frebz, Danny, Stabil -------
    //        
    public static List<Map<String, String>> mentionedTwitterUsers() {
        List<Map<String, Integer>> countList = new ArrayList();
        List<Map<String, String>> resultList = new ArrayList();
        MongoCollection<Document> coll = database.getCollection("tweets");
        try (MongoCursor<BsonValue> cursor = coll.distinct("user", BsonValue.class).iterator()) {
            while (cursor.hasNext()) {
                String tempUser = cursor.next().toString().split("'")[1];
                try (MongoCursor<Document> cursor2 = coll
                        .aggregate(Arrays.asList(new BasicDBObject("$match",
                                new BasicDBObject("text", new BasicDBObject("$regex", "@" + tempUser)))))
                        .iterator()) {
                    int count = 0;
                    while (cursor2.hasNext()) {
                        cursor2.next();
                        count++;
                    }
                    Map<String, Integer> m = new HashMap();
                    m.put(tempUser, count);
                    countList.add(m);
                }
            }

        } catch (Exception e) {
            System.out.println(e.getMessage());
        }
        countList.sort((Map<String, Integer> o1, Map<String, Integer> o2) -> {
            if (o1.values().iterator().next() < o2.values().iterator().next()) {
                return 1;
            } else {
                return -1;
            }
        });
        for (int i = 0; i < 5; i++) {
            Map<String, String> m = new HashMap();
            m.put(countList.get(i).keySet().iterator().next(), countList.get(i).values().iterator().next() + "");
            resultList.add(m);
        }

        return resultList;
    }

    //Who are the most active Twitter users (top ten)?

    public static void mostActiveTwitterUsers() {
        MongoCollection<Document> coll = database.getCollection("tweets");

        AggregateIterable<Document> output = coll.aggregate(Arrays.asList(
                new Document("$group", new Document("_id", "$user").append("count", new Document("$sum", 1))),
                new Document("$sort", new Document("count", -1)), new Document("$limit", 10)));
        for (Document dbObject : output) {
            System.out.println(dbObject);
        }
    }

    /*
    Who are the five most grumpy (most negative tweets) and the most happy (most positive tweets)? (Provide five users for each group)
     */
    /*
      Regex : \b\w{4}\b
        */
    public static List<Object> mostGrumpy() {
        MongoCollection<Document> coll = database.getCollection("tweets");
        List<Object> condArray = new ArrayList<>();
        List<Object> eqArray = new ArrayList<>();
        List<Object> divideArray = new ArrayList<>();
        eqArray.add("$polarity");
        eqArray.add(0);
        divideArray.add("$tweet_count");
        divideArray.add("$polarity");

        condArray.add(new Document("$eq", eqArray));
        condArray.add(0);
        condArray.add(new Document("$divide", divideArray));

        AggregateIterable<Document> output = coll.aggregate(Arrays.asList(
                new Document("$group",
                        new Document("_id", "$user").append("polarity", new Document("$sum", "$polarity"))
                                .append("tweet_count", new Document("$sum", 1))),
                new Document("$project",
                        new Document("_id", 0).append("user", "$_id")
                                .append("avg_polarity", new Document("$cond", condArray)).append("polarity", 1)
                                .append("tweet_count", 1)),
                new Document("$sort", new Document("avg_polarity", 1).append("tweet_count", -1)),
                new Document("$limit", 5))).allowDiskUse(Boolean.TRUE);

        List<Object> mostGrumpy = new ArrayList<>();

        for (Document dbObject : output) {
            System.out.println(dbObject);

            mostGrumpy.add(
                    dbObject.get("user") + ", " + dbObject.get("tweet_count") + ", " + dbObject.get("polarity"));
        }

        return mostGrumpy;
    }

    public static List<Object> mostHappy() {
        MongoCollection<Document> coll = database.getCollection("tweets");
        List<Object> condArray = new ArrayList<>();
        List<Object> eqArray = new ArrayList<>();
        List<Object> divideArray = new ArrayList<>();
        eqArray.add("$polarity");
        eqArray.add(0);
        divideArray.add("$tweet_count");
        divideArray.add("$polarity");

        condArray.add(new Document("$eq", eqArray));
        condArray.add(0);
        condArray.add(new Document("$divide", divideArray));

        AggregateIterable<Document> output;
        output = coll.aggregate(Arrays.asList(
                new Document("$group",
                        new Document("_id", "$user").append("polarity", new Document("$sum", "$polarity"))
                                .append("tweet_count", new Document("$sum", 1))),
                new Document("$project", new Document("_id", 0).append("user", "$_id")
                        // Results are added to the avg_polarity variable in the document.
                        .append("avg_polarity", new Document("$cond", condArray)).append("polarity", 1)
                        .append("tweet_count", 1)),
                new Document("$sort", new Document("avg_polarity", -1).append("tweet_count", -1)),
                new Document("$limit", 5))).allowDiskUse(Boolean.TRUE);

        List<Object> mostHappy = new ArrayList<>();

        for (Document dbObject : output) {
            System.out.println(dbObject);

            mostHappy.add(
                    dbObject.get("user") + ", " + dbObject.get("tweet_count") + ", " + dbObject.get("polarity"));
        }

        return mostHappy;
    }

}