com.justgiving.raven.kissmetrics.utils.KissmetricsLocalSchemaExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.justgiving.raven.kissmetrics.utils.KissmetricsLocalSchemaExtractor.java

Source

/*
 * Copyright (c) 2014-2015 Giving.com, trading as JustGiving or its affiliates. All Rights Reserved. 
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"). 
 * You may not use this file except in compliance with the License. 
 * A copy of the License is located in the "license" file accompanying this file.
 * 
 * This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 * See the License for 
 * the specific language governing permissions and limitations under the License.
 * 
 * @author Richard Freeman
 * 
 */

package com.justgiving.raven.kissmetrics.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Set;

import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;

public class KissmetricsLocalSchemaExtractor {

    static final Logger logger = Logger.getLogger(KissmetricsLocalSchemaExtractor.class);

    /****
     * This function parses all the json record files in a folder and returns a counts of the total occurrences of keys
     * in all files
     * 
     * @param inputFolder
     * @param outputFolder
     * @throws IOException
     */
    private static void countKeysInJsonRecordsFolder(String inputFolder, String outputFile) throws IOException {
        File folder = new File(inputFolder);
        File[] listOfFiles = folder.listFiles();
        KeyValueCounter totalKeyValueCounter = new KeyValueCounter();
        KeyValueCounter currentKeyValueCounter = new KeyValueCounter();
        for (File currentFile : listOfFiles) {
            if (currentFile.isFile()) {
                logger.info("Processing file: " + currentFile.getName());
                currentKeyValueCounter = countKeysInJsonRecordsFile(
                        Paths.get(inputFolder, currentFile.getName()).toString());
                totalKeyValueCounter = deepMergeKeyValueCounter(totalKeyValueCounter, currentKeyValueCounter);
            } else if (currentFile.isDirectory()) {
                logger.warn("Sub-directory folders are currently ignored");
            }
        }
        //System.out.println(totalKeyCounter.toString());
        logger.info("---------------");
        logger.info(sortOutputByKey(totalKeyValueCounter));
        logger.info("saving output to file: ");
        File outpuFile = new File(outputFile);
        outpuFile.getParentFile().mkdirs();
        PrintWriter out = new PrintWriter(outputFile);
        out.print(sortOutputByKey(totalKeyValueCounter));
        out.close();
    }

    public static KeyValueCounter deepMergeKeyValueCounter(KeyValueCounter originalMap,
            KeyValueCounter newMapToAdd) {
        KeyValueCounter outputValueCounter = new KeyValueCounter();
        outputValueCounter.keyCounter.putAll(originalMap.keyCounter);
        outputValueCounter.deepMergeHashMapsAddition(newMapToAdd.keyCounter);
        outputValueCounter.valueLength.putAll(originalMap.valueLength);
        outputValueCounter.deepMergeHashMapsMaxium(newMapToAdd.valueLength);
        return outputValueCounter;
    }

    /****
     * This function counts the total occurrences of keys, in the json records files
     *  
     * @param input_filename path to a json file
     * @return a HashMap with the keys / total counts pairs
     */
    private static KeyValueCounter countKeysInJsonRecordsFile(String input_filename) {
        InputStream fis;
        BufferedReader bufferedReader;
        String line;
        JSONParser jsonParser = new JSONParser();
        KeyValueCounter keyValueCounter = new KeyValueCounter();
        String jsonValue = "";
        try {
            fis = new FileInputStream(input_filename);
            bufferedReader = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
            while ((line = bufferedReader.readLine()) != null) {
                JSONObject jsonObject = (JSONObject) jsonParser.parse(line);
                Set<String> keyset = jsonObject.keySet();
                for (String jsonkey : keyset) {
                    if (jsonObject.get(jsonkey) != null) {
                        jsonValue = (String) jsonObject.get(jsonkey).toString();
                        if (jsonValue == null || jsonValue == "") {
                            jsonValue = "";
                        }
                        int lenValue = jsonValue.length();
                        keyValueCounter.incrementKeyCounter(jsonkey);
                        keyValueCounter.putValueLength(jsonkey, lenValue);
                    } else {
                        if (jsonkey.compareTo("user_agent") != 0) {
                            logger.error("Errot typing to get jsonkey " + jsonkey);
                        }

                    }
                }
            }
            bufferedReader.close();
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
        //System.out.println(keyCounter.toString());
        //System.out.println(sortHashByKey(keyCounter));      
        return keyValueCounter;
    }

    /***
     * This function sorts the HashMap values by key and returns the key/value pairs as a string
     * @param hashMap the input hashMap
     * @return the return string of the sorted key/value pairs
     */
    private static String sortOutputByKey(KeyValueCounter outputKetValueCounter) {

        Set<String> set = outputKetValueCounter.keyCounter.keySet();
        ArrayList<String> list = new ArrayList<String>();
        list.addAll(set);
        Collections.sort(list);
        StringBuilder sb = new StringBuilder();

        for (String key : list) {
            sb.append(key).append("\t").append(outputKetValueCounter.keyCounter.get(key)).append("\t")
                    .append(outputKetValueCounter.valueLength.get(key)).append("\n");
        }
        return sb.toString();
    }

    public static void main(String[] args) throws FileNotFoundException, IOException {
        for (String s : args) {
            System.out.println(s);
        }

        //String inputFolder ="D:\\datasets\\kissmetrics\\input\\2250.json";
        //String outputFile ="D:\\datasets\\kissmetrics\\output\\2250.json";
        //String inputFolder ="D:\\datasets\\kissmetrics\\input\\";
        //String inputFolder ="D:\\ouptuts\\km\\input\\";
        //String inputFolder ="D:\\datasets\\kissmetrics\\input4\\revisions\\";
        //String inputFolder ="D:\\datasets\\kissmetrics\\input5\\";
        //String outputFile ="D:\\datasets\\kissmetrics\\output\\";
        //String inputFolder ="D:\\datasets\\kinesis\\input2\\";
        //String outputFile ="D:\\datasets\\kissmetrics\\output\\schema2.txt";
        //String inputFolder ="D:\\datasets\\kissmetrics\\stg\\input\\";
        //String outputFile ="D:\\datasets\\kissmetrics\\stg\\ouput\\schema1.txt";

        String inputFolder = "D:\\datasets\\kinesis\\stg\\input2\\";
        String outputFile = "D:\\datasets\\kinesis\\stg\\output2\\schema-kinesis.txt";
        //String inputFolder ="D:\\datasets\\kissmetrics\\prd\\input1\\";
        //String outputFile ="D:\\datasets\\kissmetrics\\prd\\output1\\schema-kissmetrics.txt";

        if (args.length != 2) {
            System.out.println("No arguments provided, using default values");
            System.out.println("InputFolder/File: " + inputFolder);
            System.out.println("OutputFile: " + outputFile);
        } else {
            inputFolder = args[0];
            outputFile = args[1];
        }

        if ((new File(outputFile)).isDirectory()) {
            System.err.println("Error output file cannot be a directory");
            return;
        }

        String logConfigPath = Paths.get(System.getProperty("user.dir"), "log4j.properties").toString();
        System.out.println("log config file used: " + logConfigPath);
        PropertyConfigurator.configure(logConfigPath);
        logger.info("log config file used: " + logConfigPath);
        if (inputFolder.endsWith("\\")) {
            logger.info("Detected source folder");
            countKeysInJsonRecordsFolder(inputFolder, outputFile);
        } else {
            logger.info("Detected source file");
            countKeysInJsonRecordsFile(inputFolder);
        }
    }
}