com.wordnik.system.mongodb.Analyzer.java Source code

Java tutorial

Introduction

Here is the source code for com.wordnik.system.mongodb.Analyzer.java

Source

// Copyright (C) 2012  Wordnik, Inc.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or (at your 
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser 
// General Public License for more details.  You should have received a copy 
// of the GNU Lesser General Public License along with this program.  If not,
// see <http://www.gnu.org/licenses/>.

package com.wordnik.system.mongodb;

import java.io.BufferedInputStream;

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.zip.GZIPInputStream;

import org.bson.BSONDecoder;
import com.mongodb.DefaultDBDecoder;
import org.bson.BSONObject;
import org.bson.BasicBSONObject;
import org.bson.types.BSONTimestamp;

import com.mongodb.BasicDBObject;
import com.wordnik.util.PrintFormat;

public class Analyzer extends MongoUtil {
    private static final Object inserts = null;
    private static final Object updates = null;
    private static final Object deletes = null;
    private static final Object skips = null;

    protected static String INPUT_DIR;
    protected static String COLLECTION_STRING;
    protected static String COLLECTION_MAPPING_STRING;
    protected static String DATABASE_MAPPING_STRING;
    protected static Map<String, String> COLLECTION_MAPPING = new HashMap<String, String>();
    protected static Map<String, String> DATABASE_MAPPING = new HashMap<String, String>();
    protected static Set<String> COLLECTIONS_TO_SKIP = new HashSet<String>();
    protected static Set<String> COLLECTIONS_TO_ADD = new HashSet<String>();
    protected static BSONTimestamp AFTER_TIMESTAMP = null;
    protected static BSONTimestamp BEFORE_TIMESTAMP = null;
    protected static boolean ONLY_COLLECTION_EXCLUSIONS = true;
    protected static Map<String, String> NAMESPACE_COLLECTION_MAP = new HashMap<String, String>();

    protected static String DEST_DATABASE_NAME = "test";
    protected static String DEST_DATABASE_USER_NAME = null;
    protected static String DEST_DATABASE_PASSWORD = null;
    protected static String DEST_DATABASE_HOST = "localhost";

    protected static long REPORT_INTERVAL = 10000;

    public static void main(String... args) {
        if (!parseArgs(args)) {
            usage();
            return;
        }
        if (INPUT_DIR == null) {
            usage();
            return;
        }
        new Analyzer().run();
    }

    protected static void selectCollections() {
        if (COLLECTION_STRING != null) {
            String[] collectionNames = COLLECTION_STRING.split(",");
            for (String collectionName : collectionNames) {
                if (collectionName.startsWith("!")) {
                    //  skip it
                    COLLECTIONS_TO_SKIP.add(collectionName.substring(1));
                } else {
                    ONLY_COLLECTION_EXCLUSIONS = false;
                    COLLECTIONS_TO_ADD.add(collectionName);
                }
            }
        }
    }

    protected static void createMappings(String databaseMappingString, String collectionMappingString,
            Map<String, String> databaseMappings, Map<String, String> collectionMappings) {
        if (databaseMappingString != null) {
            StringTokenizer tk = new StringTokenizer(databaseMappingString, ",");
            while (tk.hasMoreElements()) {
                String[] split = tk.nextToken().split("\\=");
                databaseMappings.put(split[0], split[1]);
            }
        }

        if (collectionMappingString != null) {
            StringTokenizer tk = new StringTokenizer(collectionMappingString, ",");
            while (tk.hasMoreElements()) {
                String[] split = tk.nextToken().split("\\=");
                collectionMappings.put(split[0], split[1]);
            }
        }
    }

    protected void run() {
        long startTime = System.currentTimeMillis();
        //  decide what collections to process
        selectCollections();

        //  create any re-mappings
        Map<String, String> collectionMappings = new HashMap<String, String>();
        Map<String, String> databaseMappings = new HashMap<String, String>();
        createMappings(DATABASE_MAPPING_STRING, COLLECTION_MAPPING_STRING, databaseMappings, collectionMappings);

        try {
            File[] files = new File(INPUT_DIR).listFiles();
            if (files != null) {
                List<File> filesToProcess = new ArrayList<File>();
                for (File file : files) {
                    if (file.getName().indexOf(".bson") > 0) {
                        filesToProcess.add(file);
                    }
                }
                long operationsRead = 0;
                long operationsSkipped = 0;
                long lastOutput = System.currentTimeMillis();
                for (File file : filesToProcess) {
                    System.out.println("analyzing file " + file.getName());
                    BufferedInputStream inputStream = null;
                    try {
                        if (file.getName().endsWith(".gz")) {
                            InputStream is = new GZIPInputStream(new FileInputStream(file));
                            inputStream = new BufferedInputStream(is);
                        } else {
                            inputStream = new BufferedInputStream(new FileInputStream(file));
                        }
                        BSONDecoder decoder = new DefaultDBDecoder();
                        while (true) {
                            if (inputStream.available() == 0) {
                                break;
                            }
                            BSONObject obj = decoder.readObject(inputStream);
                            if (obj == null) {
                                break;
                            }
                            BasicDBObject dbo = new BasicDBObject((BasicBSONObject) obj);

                            BSONTimestamp operationTimestamp = (BSONTimestamp) dbo.get("ts");
                            String namespace = dbo.getString("ns");

                            processRecord(dbo);
                            operationsRead++;

                            long durationSinceLastOutput = System.currentTimeMillis() - lastOutput;
                            if (durationSinceLastOutput > REPORT_INTERVAL) {
                                report(operationsRead, System.currentTimeMillis() - startTime);
                                lastOutput = System.currentTimeMillis();
                            }
                        }
                    } catch (Exception ex) {
                        ex.printStackTrace();
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        report(0, System.currentTimeMillis() - startTime);
    }

    Map<String, Long> counters = new HashMap<String, Long>();

    protected void processRecord(BasicDBObject dbo) {
        String operationType = dbo.getString("op");
        String namespace = dbo.getString("ns");

        String key = operationType + " - " + namespace;
        Long count = new Long(1);
        if (counters.containsKey(key)) {
            count = new Long(counters.get(key).longValue() + 1);
        }
        counters.put(key, count);
    }

    protected boolean shouldProcessRecord(String collection, BSONTimestamp timestamp) {
        boolean shouldProcess = false;

        if (COLLECTIONS_TO_ADD.contains(collection)) {
            shouldProcess = true;
        }
        if (COLLECTIONS_TO_SKIP.contains(collection)) {
            shouldProcess = false;
        } else {
            if (ONLY_COLLECTION_EXCLUSIONS) {
                shouldProcess = true;
            }
        }
        if (AFTER_TIMESTAMP != null) {
            if (timestamp.getTime() < AFTER_TIMESTAMP.getTime()) {
                shouldProcess = false;
            }
        }
        if (BEFORE_TIMESTAMP != null) {
            if (timestamp.getTime() >= BEFORE_TIMESTAMP.getTime()) {
                shouldProcess = false;
            }
        }
        return shouldProcess;
    }

    public static boolean parseArgs(String... args) {
        for (int i = 0; i < args.length; i++) {
            switch (args[i].charAt(1)) {
            case 'i':
                INPUT_DIR = args[++i];
                break;
            case 'c':
                COLLECTION_STRING = args[++i];
                break;
            case 'R':
                DATABASE_MAPPING_STRING = args[++i];
                break;
            case 'r':
                COLLECTION_MAPPING_STRING = args[++i];
                break;
            case 'a':
                try {
                    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                    Date date = sdf.parse(args[++i]);
                    AFTER_TIMESTAMP = new BSONTimestamp((int) (date.getTime() / 1000), 0);
                } catch (Exception e) {
                    throw new RuntimeException("invalid date supplied");
                }
                break;
            case 'b':
                try {
                    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                    Date date = sdf.parse(args[++i]);
                    BEFORE_TIMESTAMP = new BSONTimestamp((int) (date.getTime() / 1000), 0);
                } catch (Exception e) {
                    throw new RuntimeException("invalid date supplied");
                }
                break;
            case 'u':
                DEST_DATABASE_USER_NAME = args[++i];
                break;
            case 'p':
                DEST_DATABASE_PASSWORD = args[++i];
                break;
            case 'h':
                DEST_DATABASE_HOST = args[++i];
                break;
            default:
                return false;
            }
        }
        return true;
    }

    void report(long totalCount, long duration) {
        double brate = (double) totalCount / ((duration) / 1000.0);
        System.out.println("inserts: " + PrintFormat.LONG_FORMAT.format(inserts) + ", updates: "
                + PrintFormat.LONG_FORMAT.format(updates) + ", deletes: " + PrintFormat.LONG_FORMAT.format(deletes)
                + ", skips: " + PrintFormat.LONG_FORMAT.format(skips) + " (" + PrintFormat.LONG_FORMAT.format(brate)
                + " req/sec)");

        Writer writer = null;
        try {
            OutputStream out = new FileOutputStream(new File("stats.txt"));
            writer = new OutputStreamWriter(out, "UTF-8");
            for (String key : counters.keySet()) {
                writer.write(key + "|" + counters.get(key).toString() + "\n");
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (Exception e) {
                }
            }
        }
    }

    public static void usage() {
        System.out.println("usage: ReplayUtil");
        System.out.println(" -i : input directory");
        System.out.println(" -c : CSV collection string (prefix with ! to exclude)");
        System.out.println(" -r : collection re-targeting (format: {SOURCE}={TARGET}");
        System.out.println(" -R : database re-targeting (format: {SOURCE}={TARGET}");
        System.out.println(" -a : only process entries after this timestamp");
        System.out.println(" -b : only process entries before this timestamp");
        System.out.println(" -h : destination hostname");
        System.out.println(" [-u : username]");
        System.out.println(" [-p : password]");
    }
}