us.daveread.education.mongo.honeypot.BasicStatistics.java Source code

Java tutorial

Introduction

Here is the source code for us.daveread.education.mongo.honeypot.BasicStatistics.java

Source

package us.daveread.education.mongo.honeypot;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;

import org.bson.Document;

import com.mongodb.MongoClient;
import com.mongodb.MongoClientOptions;
import com.mongodb.ServerAddress;
import com.mongodb.client.AggregateIterable;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;

/**
 * Calculate basic statistics from the honeypot data. This class demonstrates
 * basic Java-MongoDB interactions including retrieving documents directly as
 * well as using the aggregation framework to use MongoDB's summarization
 * capabilities.
 * <p>
 * This program is intended to demonstrate the use of MongoDB within a Java
 * program.
 * <p>
 * Copyright (C) 2016 David S. Read
 * <p>
 * This program is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Affero General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or (at your option) any
 * later version.
 * <p>
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
 * details.
 * <p>
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see http://www.gnu.org/licenses/
 * <p>
 * For information on MongoDB: https://www.mongodb.com/
 * @author readda
 * @version 01.00.00
 */
public class BasicStatistics {
    /**
     * The logger
     */
    private static final Logger LOG = Logger.getLogger(BasicStatistics.class);

    /**
     * The MongoDB server
     */
    private static final String MONGO_DB_IP = "localhost";

    /**
     * The MongoDB port
     */
    private static final int MONGO_DB_PORT = 27017;

    /**
     * The database containing the honeypot-related collections.
     */
    private static final String HONEYPOT_DATABASE = "infuzitDemo";

    /**
     * The collection containing the raw anonymized honeypot data.
     */
    private static final String HONEYPOT_COLLECTION = "honeypotData";

    /**
     * The maximum number of documents to display when reporting results.
     */
    private static final int NUMBER_OF_ITEMS_TO_DISPLAY = 10;

    /**
     * The Mongo client instance - our connection to the server.
     */
    private MongoClient mongoClient;

    /**
     * The Mongo database instance - our access to a specific database.
     */

    private MongoDatabase mongoDatabase;

    /**
     * The total number of attacks in the collection. Each document represents
     * one attack.
     */
    private long totalAttacks;

    /**
     * The total number of countries represented in the attack data.
     */
    private int totalAttackingCountries;

    /**
     * Create the instance. This will populate the aggregate count of documents
     * (attacks) and countries.
     * @see #totalAttacks
     * @see #computeAttackCountryCount()
     */
    public BasicStatistics() {
        LOG.info("Connecting to server: " + MONGO_DB_IP + ":" + MONGO_DB_PORT + " and database: "
                + HONEYPOT_DATABASE);

        /**
         * Set a short timeout for connecting so that we don't wait the default 30
         * seconds to detect a problem.
         */
        MongoClientOptions.Builder optionsBuilder = new MongoClientOptions.Builder();
        optionsBuilder.serverSelectionTimeout(2000);

        MongoClientOptions options = optionsBuilder.build();

        /**
         * Create the MongoClient instance.
         */
        mongoClient = new MongoClient(new ServerAddress(MONGO_DB_IP, MONGO_DB_PORT), options);

        /**
         * Create the MongoDatabase instance.
         */
        mongoDatabase = mongoClient.getDatabase(HONEYPOT_DATABASE);

        /**
         * Get the MongoCollection instance which provides read and write access
         * (bed on permissions) to a specific collection in the database.
         */
        MongoCollection<Document> collection = accessCollection(HONEYPOT_COLLECTION);

        /**
         * Get the count of documents in the collection. If the collection cannot be
         * accessed an exception will the thrown.
         */
        try {
            totalAttacks = collection.count();
        } catch (Throwable throwable) {
            LOG.fatal("Unable to connect to the MongoDB instance at " + MONGO_DB_IP + ":" + MONGO_DB_PORT,
                    throwable);
            throw new IllegalStateException("Unable to connect to the MongoDB instance at " + MONGO_DB_IP + ":"
                    + MONGO_DB_PORT + ". Are you sure it is running?", throwable);
        }

        /**
         * If no documents were retrieved then apparently the demo collection was
         * not loaded into the database.
         */
        if (totalAttacks == 0) {
            throw new IllegalStateException("Unable to load documents from the collection " + HONEYPOT_COLLECTION
                    + " in the database " + HONEYPOT_DATABASE + ". Are you sure it was loaded?");
        }

        computeAttackCountryCount();
    }

    /**
     * Get a Mongo collection instance.
     * @param collection
     *          The name of the collection
     * @return The Mongo collection instance
     */
    private MongoCollection<Document> accessCollection(String collection) {
        return mongoDatabase.getCollection(collection);
    }

    /**
     * Calculate the number of countries found in the honeypot attack data and
     * set the attribute.
     * @see #totalAttackingCountries
     */
    private void computeAttackCountryCount() {
        MongoCollection<Document> collection = accessCollection(HONEYPOT_COLLECTION);

        Map<String, String> attackCountries = new HashMap<>();
        List<Document> attacks = collection.find()
                .projection(new Document("_id", 0).append("client_country_code", 1))
                .into(new ArrayList<Document>());
        for (Document attack : attacks) {
            String countryCode = attack.getString("client_country_code");
            if (attackCountries.get(countryCode) == null) {
                attackCountries.put(countryCode, countryCode);
            }
        }
        totalAttackingCountries = attackCountries.size();

    }

    /**
     * Report the overall statistics on the console.
     * @see #totalAttacks
     * @see #totalAttackingCountries
     */
    private void overallStats() {
        printHeader("Overall Statistics");
        System.out.println("Total Attacks: " + totalAttacks);
        System.out.println("Total Attacking Countries: " + totalAttackingCountries);
    }

    /**
     * Summarize the count of attacks by country. This method queries the
     * collection for all attack documents and then uses a Map to aggregate the
     * data. The resulting Map is then used to populate an array of CountryCode
     * instances which is then sorted in order to find the top attacking
     * countries.
     * <p>
     * Compare this to the countryBreakdownAggregation which produces the same
     * report but uses MongoDB's aggregation framework.
     * @see #countryBreakdownAggregation()
     */
    private void countryBreakdownCoded() {
        Map<String, Integer> attacksByCountry = new HashMap<>();
        MongoCollection<Document> collection = accessCollection(HONEYPOT_COLLECTION);

        /**
         * Retrieve the client_country_code from each document. The find()
         * method returns an iterable which will iterate through all the
         * documents. The projection() is then used to limit the retrieved
         * fields to just the one with the client system's country code.
         */
        FindIterable<Document> attacks = collection.find()
                .projection(new Document("_id", 0).append("client_country_code", 1));

        /**
         * Iterate through all the documents and keep a count of matches by
         * country code.
         */
        for (Document attack : attacks) {
            String countryCode = attack.getString("client_country_code");
            Integer attackCount = attacksByCountry.get(countryCode);
            if (attackCount == null) {
                attackCount = 0;
            }
            attackCount++;
            attacksByCountry.put(countryCode, attackCount);
        }

        /**
         * Create an array to house the resulting country codes and counts.
         */
        List<CountryCount> countryAttackCount = new ArrayList<>();

        /**
         * Populate the array with country codes and counts from the map.
         */
        for (String countryCode : attacksByCountry.keySet()) {
            countryAttackCount.add(new CountryCount(countryCode, attacksByCountry.get(countryCode)));
        }

        /**
         * Sort the resulting array (note that CountryCount implements
         * Comparable). The sort is then reversed to put the largest number
         * first.
         */
        Collections.sort(countryAttackCount);
        Collections.reverse(countryAttackCount);

        /**
         * Report the top country codes with their associated attack counts.
         */
        int limit = Math.min(totalAttackingCountries, NUMBER_OF_ITEMS_TO_DISPLAY);
        printHeader("Top " + limit + " Attack Countries (using Java coded aggregation)");
        for (int index = 0; index < limit; ++index) {
            int numAttacks = countryAttackCount.get(index).getAttackCount();
            System.out.println("  " + countryAttackCount.get(index).getCountryCode() + ": " + numAttacks + " ("
                    + (int) ((numAttacks * 100) / (double) totalAttacks) + "%)");
        }
    }

    /**
     * Summarize the number of attacks recorded by each honeypot server and
     * channel (sensor). This method queries the collection for all attack
     * documents and then uses a Map to aggregate the data. The resulting Map is
     * then used to populate an array of ServerChannelCount instances which is
     * then sorted in order to present the servers and channels in order.
     * <p>
     * Compare this to the honeypotBreakdownAggregation which produces the same
     * report but uses MongoDB's aggregation framework.
     * @see #honeypotBreakdownAggregation()
     */
    private void honeypotBreakdownCoded() {
        Map<String, Integer> attacksByServerAndChannel = new HashMap<>();
        MongoCollection<Document> collection = accessCollection(HONEYPOT_COLLECTION);

        /**
         * Retrieve the server_ip_mask (in the payload subdocument) and channel
         * from each document. The find() method returns an iterable which will
         * iterate through all the documents. The projection() is then used to
         * limit the retrieved fields to just the server_ip_mask and channel.
         */
        FindIterable<Document> attacks = collection.find()
                .projection(new Document("_id", 0).append("payload.server_ip_mask", 1).append("channel", 1));

        /**
         * Iterate through all the documents and keep a count of matches by
         * server and channel.
         */
        for (Document attack : attacks) {
            Document payload = (Document) attack.get("payload");
            String serverAndChannel = payload.getInteger("server_ip_mask") + "," + attack.getString("channel");
            Integer attackCount = attacksByServerAndChannel.get(serverAndChannel);
            if (attackCount == null) {
                attackCount = 0;
            }
            attackCount++;
            attacksByServerAndChannel.put(serverAndChannel, attackCount);
        }

        /**
         * Create an array to house the resulting servers, channels and counts.
         */
        List<ServerChannelCount> serverChannelAttackCount = new ArrayList<>();

        /**
         * Populate the array with server and channel as well as the counts from
         * the map.
         */
        for (String countryCode : attacksByServerAndChannel.keySet()) {
            serverChannelAttackCount
                    .add(new ServerChannelCount(countryCode, attacksByServerAndChannel.get(countryCode)));
        }

        /**
         * Sort the resulting array (note that ServerChannelCount implements
         * Comparable).
         */
        Collections.sort(serverChannelAttackCount);

        /**
         * Report the attack counts for each server and channel.
         */
        printHeader("Attack Counts for Servers and Channels (using Java coded aggregation)");
        for (ServerChannelCount serverChannel : serverChannelAttackCount) {
            int numAttacks = serverChannel.getAttackCount();
            System.out.println("  Server:" + serverChannel.getServerIpMask() + " Channel:"
                    + serverChannel.getChannel() + " Attack Count:" + numAttacks + " ("
                    + (int) ((numAttacks * 100) / (double) totalAttacks) + "%)");
        }
    }

    /**
     * Summarize the count of attacks by country. This method uses MongoDB's
     * aggregation framework to summarize the data. It then retrieves the
     * resulting documents and displays them.
     * <p>
     * Compare this to the countryBreakdownCoded which produces the same report
     * but reads the raw documents and then uses Java code to produce the
     * summary.
     * <p>
     * Note that this method creates a aggregation pipeline matching the
     * following JSON (which can be used in the Mongo client directly):
     * 
     * <pre>
     * # summarize country 
     * db.honeypotData.aggregate( [ 
     *  { "$group": { "_id": "$client_country_code" , "hits": { "$sum": 1 } } }, 
     *  { "$sort": { "hits":-1} } 
     * ])
     * </pre>
     * 
     * @see #countryBreakdownCoded()
     */
    private void countryBreakdownAggregation() {
        MongoCollection<Document> collection = accessCollection(HONEYPOT_COLLECTION);
        List<Document> aggregationPipeline = new ArrayList<>();
        Document operation;

        /**
         * Group the data on country code. Count the number of documents in each
         * group.
         */
        operation = new Document("$group",
                new Document("_id", "$client_country_code").append("attacks", new Document("$sum", 1)));
        aggregationPipeline.add(operation);

        /**
         * Sort the data on count of attacks, descending.
         */
        operation = new Document("$sort", new Document("attacks", -1));
        aggregationPipeline.add(operation);

        /**
         * Get the iterable for the pipeline result.
         */
        AggregateIterable<Document> attacks = collection.aggregate(aggregationPipeline);

        /**
         * Report the top country codes with their associated attack counts.
         */
        int limit = Math.min(totalAttackingCountries, NUMBER_OF_ITEMS_TO_DISPLAY);
        printHeader("Top " + limit + " Attack Countries (using aggregation pipeline)");
        int count = 0;
        for (Document attack : attacks) {
            int numAttacks = attack.getInteger("attacks");
            System.out.println("  " + attack.get("_id") + ": " + numAttacks + " ("
                    + (int) ((numAttacks * 100) / (double) totalAttacks) + "%)");
            ++count;
            if (count >= limit) {
                break;
            }
        }
    }

    /**
     * Summarize the number of attacks recorded by each honeypot server and
     * channel (sensor). This method uses MongoDB's aggregation framework to
     * summarize the data. It then retrieves the resulting documents and
     * displays them.
     * <p>
     * Compare this to the honeypotBreakdownCoded which produces the same report
     * but reads the raw documents and then uses Java code to produce the
     * summary.
     * <p>
     * Note that this method creates a aggregation pipeline matching the
     * following JSON (which can be used in the Mongo client directly):
     * 
     * <pre>
     * # summarize server and channel 
     * db.honeypotData.aggregate( [ 
     *  { "$group": { "_id": { "server_ip_mask" : "$payload.server_ip_mask" , 
     *   "channel" : "$channel" }, "attacks": { "$sum": 1 } } }, 
     *  { "$project": { "server_ip_mask" : "$_id.server_ip_mask", 
     *   "channel" : "$_id.channel", "attacks" : "$attacks" } }, 
     *  { "$sort": { "_id.server_ip_mask":1, "_id.channel":1} } 
     * ])
     * </pre>
     * 
     * @see #honeypotBreakdownAggregation()
     */
    private void honeypotBreakdownAggregation() {
        MongoCollection<Document> collection = accessCollection(HONEYPOT_COLLECTION);
        List<Document> aggregationPipeline = new ArrayList<>();
        Document operation;

        /**
         * Group by server IP mask value and channel. Count the number of
         * documents in each group.
         */
        operation = new Document("$group",
                new Document("_id",
                        new Document("server_ip_mask", "$payload.server_ip_mask").append("channel", "$channel"))
                                .append("attacks", new Document("$sum", 1)));
        aggregationPipeline.add(operation);

        /**
         * Project the attributes to extract them from the _id.
         */
        operation = new Document("$project", new Document("server_ip_mask", "$_id.server_ip_mask")
                .append("channel", "$_id.channel").append("attacks", 1));
        aggregationPipeline.add(operation);

        /**
         * Sort the data based on server IP mask value and channel.
         */
        operation = new Document("$sort", new Document("_id.server_ip_mask", 1).append("_id.channel", 1));
        aggregationPipeline.add(operation);

        /**
         * Get the iterable for the pipeline result.
         */
        AggregateIterable<Document> attacks = collection.aggregate(aggregationPipeline);

        /**
         * Report the attack counts for each server and channel.
         */
        printHeader("Attack Counts for Servers and Channels (using aggregation pipeline)");
        for (Document attack : attacks) {
            int numAttacks = attack.getInteger("attacks");
            System.out.println("  Server:" + attack.getInteger("server_ip_mask") + " Channel:"
                    + attack.getString("channel") + " Attack Count:" + numAttacks + " ("
                    + (int) ((numAttacks * 100) / (double) totalAttacks) + "%)");
        }
    }

    /**
     * Summarize the number of attacks from each client. This method uses
     * MongoDB's aggregation framework to summarize the data. It then retrieves
     * the resulting documents and displays them.
     * <p>
     * Note that this method creates a aggregation pipeline matching the
     * following JSON (which can be used in the Mongo client directly):
     * 
     * <pre>
     * # summarize most active IPs and their countries
     * db.honeypotData.aggregate( [ 
     *  { "$group": { "_id": { "client_ip_mask" : "$payload.client_ip_mask" , 
     *   "client_country_code" : "$client_country_code" }, "attacks": { "$sum": 1 } } }, 
     *  { "$project": { "client_ip_mask" : "$_id.client_ip_mask", 
     *   "client_country_code" : "$_id.client_country_code", "attacks" : "$attacks" } }, 
     *  { "$sort": {"attacks":-1} } 
     * ])
     * </pre>
     */
    private void mostActiveIps() {
        MongoCollection<Document> collection = accessCollection(HONEYPOT_COLLECTION);
        List<Document> aggregationPipeline = new ArrayList<>();
        Document operation;

        /**
         * Group by client IP mask value and country code (expect a given IP to
         * always map the the same country). Count the number of documents in
         * each group.
         */
        operation = new Document("$group",
                new Document("_id", new Document("client_ip_mask", "$payload.client_ip_mask")
                        .append("client_country_code", "$client_country_code")).append("attacks",
                                new Document("$sum", 1)));
        aggregationPipeline.add(operation);

        /**
         * Project the attributes to extract them from the _id.
         */
        operation = new Document("$project", new Document("client_ip_mask", "$_id.client_ip_mask")
                .append("client_country_code", "$_id.client_country_code").append("attacks", "$attacks"));
        aggregationPipeline.add(operation);

        /**
         * Sort the data based on number of attacks, descending.
         */
        operation = new Document("$sort", new Document("attacks", -1));
        aggregationPipeline.add(operation);

        /**
         * Get the iterable for the pipeline result.
         */
        AggregateIterable<Document> attacks = collection.aggregate(aggregationPipeline);

        /**
         * Report the attack counts for top attacking clients.
         */
        int limit = Math.min(totalAttackingCountries, NUMBER_OF_ITEMS_TO_DISPLAY);
        printHeader("Top " + limit + " Attacking Client IPs (using aggregation pipeline)");
        int count = 0;
        for (Document attack : attacks) {
            int numAttacks = attack.getInteger("attacks");
            System.out.println("  Client:" + attack.getInteger("client_ip_mask") + " Country:"
                    + attack.getString("client_country_code") + " Attack Count:" + numAttacks + " ("
                    + (int) ((numAttacks * 100) / (double) totalAttacks) + "%)");
            ++count;
            if (count >= limit) {
                break;
            }
        }
    }

    /**
     * Print a message on the console underlined with equal signs.
     * @param message
     *          The text the print on the console
     */
    private void printHeader(String message) {
        System.out.println(message);
        for (int index = 0; index < message.length(); ++index) {
            System.out.print("=");
        }
        System.out.println();
    }

    /**
     * Create the instance and call the different summarization methods.
     * @param args
     *          Command line arguments - not used
     */
    public static void main(String[] args) {
        BasicStatistics attackStats = new BasicStatistics();
        attackStats.overallStats();
        System.out.println();
        attackStats.countryBreakdownCoded();
        System.out.println();
        attackStats.countryBreakdownAggregation();
        System.out.println();
        attackStats.honeypotBreakdownCoded();
        System.out.println();
        attackStats.honeypotBreakdownAggregation();
        System.out.println();
        attackStats.mostActiveIps();
    }
}