Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package connector; import com.mongodb.BasicDBObject; import com.mongodb.Block; import com.mongodb.MongoClient; import com.mongodb.client.AggregateIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.MongoDatabase; import com.mongodb.client.model.Accumulators; import com.mongodb.client.model.Aggregates; import com.mongodb.client.model.Filters; import com.mongodb.client.model.Indexes; import com.mongodb.client.model.Projections; import com.mongodb.client.model.Sorts; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.bson.BsonValue; import org.bson.Document; /** * * @author abj */ public class DBConnector { private static String host = "localhost"; private static int port = 27017; private static MongoDatabase database; public static void StartConnection() { try { // To connect to mongodb server MongoClient mongoClient = new MongoClient(host, port); database = mongoClient.getDatabase("test"); MongoCollection<Document> coll = database.getCollection("tweets"); coll.createIndex(Indexes.text("text")); System.out.println("Connect to database successfully : " + database.getName()); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } } public static int getNumberOfTwitterUsers() { MongoCollection<Document> coll = database.getCollection("tweets"); /* Well, the root cause of the error here is because you have a String type as expected output and one of the distinct values is actually null. */ //Or accept the results as BsonValue and handle those: ArrayList<BsonValue> distinctUsers = coll.distinct("user", BsonValue.class).into(new ArrayList<>()); /* But in the latter case, you still need to handle the types returned. There are methods on BsonValue to allow you to code for this, but it is also a fair bit of overkill for just getting a list of distinct values. */ System.out.println("Collection size: " + coll.count()); System.out.println("Unique users: " + distinctUsers.size()); return distinctUsers.size(); } //Which Twitter users link the most to other Twitter users? (Provide the top ten.) public static List<Object> tUsers() { MongoCollection<Document> coll = database.getCollection("tweets"); AggregateIterable<Document> output = coll.aggregate(Arrays.asList( new Document("$match", new Document("text", new Document("$regex", ".*@.*"))), new Document("$group", new Document("_id", new Document("user", "$user").append("tweet_id", "$id"))), new Document("$group", new Document("_id", "$_id.user").append("tweet_count", new Document("$sum", 1))), new Document("$project", new Document("_id", 0).append("user", "$_id").append("tweet_count", 1)), new Document("$sort", new Document("tweet_count", -1)), new Document("$limit", 10))) .allowDiskUse(Boolean.TRUE); List<Object> topUsers = new ArrayList<>(); for (Document dbObject : output) { topUsers.add(dbObject.get("user") + ", " + dbObject.get("tweet_count")); } return topUsers; } //Who is are the most mentioned Twitter users? (Provide the top five.) //String pattern = "/@\\w+\\/"; // /////// Frebz, Danny, Stabil ------- // public static List<Map<String, String>> mentionedTwitterUsers() { List<Map<String, Integer>> countList = new ArrayList(); List<Map<String, String>> resultList = new ArrayList(); MongoCollection<Document> coll = database.getCollection("tweets"); try (MongoCursor<BsonValue> cursor = coll.distinct("user", BsonValue.class).iterator()) { while (cursor.hasNext()) { String tempUser = cursor.next().toString().split("'")[1]; try (MongoCursor<Document> cursor2 = coll .aggregate(Arrays.asList(new BasicDBObject("$match", new BasicDBObject("text", new BasicDBObject("$regex", "@" + tempUser))))) .iterator()) { int count = 0; while (cursor2.hasNext()) { cursor2.next(); count++; } Map<String, Integer> m = new HashMap(); m.put(tempUser, count); countList.add(m); } } } catch (Exception e) { System.out.println(e.getMessage()); } countList.sort((Map<String, Integer> o1, Map<String, Integer> o2) -> { if (o1.values().iterator().next() < o2.values().iterator().next()) { return 1; } else { return -1; } }); for (int i = 0; i < 5; i++) { Map<String, String> m = new HashMap(); m.put(countList.get(i).keySet().iterator().next(), countList.get(i).values().iterator().next() + ""); resultList.add(m); } return resultList; } //Who are the most active Twitter users (top ten)? public static void mostActiveTwitterUsers() { MongoCollection<Document> coll = database.getCollection("tweets"); AggregateIterable<Document> output = coll.aggregate(Arrays.asList( new Document("$group", new Document("_id", "$user").append("count", new Document("$sum", 1))), new Document("$sort", new Document("count", -1)), new Document("$limit", 10))); for (Document dbObject : output) { System.out.println(dbObject); } } /* Who are the five most grumpy (most negative tweets) and the most happy (most positive tweets)? (Provide five users for each group) */ /* Regex : \b\w{4}\b */ public static List<Object> mostGrumpy() { MongoCollection<Document> coll = database.getCollection("tweets"); List<Object> condArray = new ArrayList<>(); List<Object> eqArray = new ArrayList<>(); List<Object> divideArray = new ArrayList<>(); eqArray.add("$polarity"); eqArray.add(0); divideArray.add("$tweet_count"); divideArray.add("$polarity"); condArray.add(new Document("$eq", eqArray)); condArray.add(0); condArray.add(new Document("$divide", divideArray)); AggregateIterable<Document> output = coll.aggregate(Arrays.asList( new Document("$group", new Document("_id", "$user").append("polarity", new Document("$sum", "$polarity")) .append("tweet_count", new Document("$sum", 1))), new Document("$project", new Document("_id", 0).append("user", "$_id") .append("avg_polarity", new Document("$cond", condArray)).append("polarity", 1) .append("tweet_count", 1)), new Document("$sort", new Document("avg_polarity", 1).append("tweet_count", -1)), new Document("$limit", 5))).allowDiskUse(Boolean.TRUE); List<Object> mostGrumpy = new ArrayList<>(); for (Document dbObject : output) { System.out.println(dbObject); mostGrumpy.add( dbObject.get("user") + ", " + dbObject.get("tweet_count") + ", " + dbObject.get("polarity")); } return mostGrumpy; } public static List<Object> mostHappy() { MongoCollection<Document> coll = database.getCollection("tweets"); List<Object> condArray = new ArrayList<>(); List<Object> eqArray = new ArrayList<>(); List<Object> divideArray = new ArrayList<>(); eqArray.add("$polarity"); eqArray.add(0); divideArray.add("$tweet_count"); divideArray.add("$polarity"); condArray.add(new Document("$eq", eqArray)); condArray.add(0); condArray.add(new Document("$divide", divideArray)); AggregateIterable<Document> output; output = coll.aggregate(Arrays.asList( new Document("$group", new Document("_id", "$user").append("polarity", new Document("$sum", "$polarity")) .append("tweet_count", new Document("$sum", 1))), new Document("$project", new Document("_id", 0).append("user", "$_id") // Results are added to the avg_polarity variable in the document. .append("avg_polarity", new Document("$cond", condArray)).append("polarity", 1) .append("tweet_count", 1)), new Document("$sort", new Document("avg_polarity", -1).append("tweet_count", -1)), new Document("$limit", 5))).allowDiskUse(Boolean.TRUE); List<Object> mostHappy = new ArrayList<>(); for (Document dbObject : output) { System.out.println(dbObject); mostHappy.add( dbObject.get("user") + ", " + dbObject.get("tweet_count") + ", " + dbObject.get("polarity")); } return mostHappy; } }