qa.qcri.nadeef.console.Console.java Source code

Java tutorial

Introduction

Here is the source code for qa.qcri.nadeef.console.Console.java

Source

/*
 * QCRI, NADEEF LICENSE
 * NADEEF is an extensible, generalized and easy-to-deploy data cleaning platform built at QCRI.
 * NADEEF means "Clean" in Arabic
 *
 * Copyright (c) 2011-2013, Qatar Foundation for Education, Science and Community Development (on
 * behalf of Qatar Computing Research Institute) having its principle place of business in Doha,
 * Qatar with the registered address P.O box 5825 Doha, Qatar (hereinafter referred to as "QCRI")
 *
 * NADEEF has patent pending nevertheless the following is granted.
 * NADEEF is released under the terms of the MIT License, (http://opensource.org/licenses/MIT).
 */

package qa.qcri.nadeef.console;

import com.google.common.base.Stopwatch;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import jline.console.ConsoleReader;
import jline.console.completer.*;
import qa.qcri.nadeef.core.datamodel.CleanPlan;
import qa.qcri.nadeef.core.datamodel.NadeefConfiguration;
import qa.qcri.nadeef.core.datamodel.Rule;
import qa.qcri.nadeef.core.pipeline.CleanExecutor;
import qa.qcri.nadeef.core.pipeline.UpdateExecutor;
import qa.qcri.nadeef.core.util.Bootstrap;
import qa.qcri.nadeef.core.util.CSVTools;
import qa.qcri.nadeef.core.util.sql.DBInstaller;
import qa.qcri.nadeef.core.util.sql.SQLDialectBase;
import qa.qcri.nadeef.core.util.sql.SQLDialectFactory;
import qa.qcri.nadeef.tools.CommonTools;
import qa.qcri.nadeef.tools.DBConfig;
import qa.qcri.nadeef.tools.PerfReport;
import qa.qcri.nadeef.tools.Tracer;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.TimeUnit;

/**
 * User interactive console.
 *
 */
public class Console {

    //<editor-fold desc="Private fields">
    private static final String logo = "   _  __        __        _____\n" + "  / |/ /__ ____/ /__ ___ /   _/\n"
            + " /    / _ `/ _  / -_) -_)   _/\n" + "/_/|_/\\_,_/\\_,_/\\__/\\__/ __/\n"
            + "Data Cleaning solution (Build " + System.getenv("BuildVersion") + ", using Java "
            + System.getProperty("java.version") + ").\n"
            + "Copyright (C) Qatar Computing Research Institute, 2013 - Present (http://da.qcri.org).";

    private static final String helpInfo = "Type 'help' to see what commands we have.";

    private static final String prompt = ":> ";

    private static final String[] commands = { "load", "run", "repair", "detect", "help", "set", "exit", "append" };
    private static ConsoleReader console;
    private static List<CleanPlan> cleanPlans;
    private static List<CleanExecutor> executors = Lists.newArrayList();
    private static Tracer tracer = Tracer.getTracer(Console.class);
    private static int lastExecutorIndex = -1;

    //</editor-fold>

    //<editor-fold desc="Detect Thread class">
    /**
     * Detect thread.
     */
    private static class DetectRunnable implements Runnable {
        private CleanExecutor executor;

        public DetectRunnable(CleanExecutor executor) {
            this.executor = executor;
        }

        @Override
        public void run() {
            executor.detect();
        }
    }
    //</editor-fold>

    //<editor-fold desc="Repair Thread class">

    /**
     * Repair thread class.
     */
    private static class RepairRunnable implements Runnable {
        private CleanExecutor executor;

        public RepairRunnable(CleanExecutor cleanExecutor) {
            this.executor = cleanExecutor;
        }

        @Override
        public void run() {
            executor.repair();
        }
    }

    //</editor-fold>

    //<editor-fold desc="Clean Thread class">

    /**
     * Repair thread class.
     */
    private static class CleanRunnable implements Runnable {
        private CleanExecutor executor;

        public CleanRunnable(CleanExecutor cleanExecutor) {
            this.executor = cleanExecutor;
        }

        @Override
        public void run() {
            executor.detect();
            executor.repair();
        }
    }

    //</editor-fold>
    /**
     * Start of Console.
     * @param args user input.
     */
    public static void main(String[] args) {
        try {
            // bootstrap Nadeef.
            Stopwatch stopwatch = Stopwatch.createStarted();
            Bootstrap.start();

            console = new ConsoleReader();
            Tracer.setConsole(new ConsoleReaderAdaptor(console));
            List<Completer> loadCompleter = Arrays.asList(new StringsCompleter(commands), new FileNameCompleter(),
                    new NullCompleter());
            console.addCompleter(new ArgumentCompleter(loadCompleter));

            console.clearScreen();
            console.println(logo);
            console.println();
            console.println(helpInfo);
            console.println();
            console.drawLine();
            console.setPrompt(prompt);
            console.println("Your NADEEF started in " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms.");

            String line;
            while ((line = console.readLine()) != null) {
                line = line.trim();
                String[] tokens = line.split(" ");

                if (tokens.length == 0) {
                    continue;
                }

                // clear the statistics for every run.
                PerfReport.clear();
                try {
                    if (tokens[0].equalsIgnoreCase("exit")) {
                        break;
                    } else if (tokens[0].equalsIgnoreCase("load")) {
                        load(line);
                    } else if (tokens[0].equalsIgnoreCase("list")) {
                        list();
                    } else if (tokens[0].equalsIgnoreCase("help")) {
                        printHelp();
                    } else if (tokens[0].equalsIgnoreCase("detect")) {
                        detect(line);
                    } else if (tokens[0].equalsIgnoreCase("repair")) {
                        repair(line);
                    } else if (tokens[0].equalsIgnoreCase("run")) {
                        run(line);
                    } else if (tokens[0].equalsIgnoreCase("append")) {
                        append(line);
                    } else if (tokens[0].equalsIgnoreCase("set")) {
                        set(line);
                    } else if (!Strings.isNullOrEmpty(tokens[0])) {
                        console.println("I don't know this command.");
                    }
                } catch (Exception ex) {
                    console.println("Oops, something is wrong. Please check the log in the output dir.");

                    tracer.err("", ex);
                }
            }
        } catch (Exception ex) {
            try {
                tracer.err("Bootstrap failed", ex);
            } catch (Exception ignore) {
            }
        } finally {
            Bootstrap.shutdown();
        }

        System.exit(0);
    }

    private static void load(String cmdLine) throws IOException {
        Stopwatch stopwatch = Stopwatch.createStarted();
        String[] splits = cmdLine.split("\\s");
        if (splits.length != 2) {
            console.println("Invalid load command. Run load <Nadeef config file>.");
            return;
        }
        String fileName = splits[1];
        File file = CommonTools.getFile(fileName);

        // shutdown existing executors
        for (CleanExecutor executor : executors) {
            executor.shutdown();
        }
        executors.clear();

        FileReader reader = null;
        try {
            reader = new FileReader(file);
            DBConfig dbConfig = NadeefConfiguration.getDbConfig();
            cleanPlans = CleanPlan.create(reader, dbConfig);
            for (CleanPlan cleanPlan : cleanPlans) {
                executors.add(new CleanExecutor(cleanPlan, dbConfig));
            }
        } catch (Exception ex) {
            tracer.err("Loading CleanPlan failed.", ex);
            return;
        } finally {
            if (reader != null)
                reader.close();
        }

        console.println(
                cleanPlans.size() + " rules loaded in " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms.");
        stopwatch.stop();
    }

    private static void append(String cmdLine) throws Exception {
        String[] splits = cmdLine.split("\\s");
        int defaultTableIndex = 0;
        if (splits.length != 2 && splits.length != 3) {
            console.println("Invalid append command. Run append <new data file> [table index].");
            return;
        }

        if (lastExecutorIndex == -1) {
            console.println("There is no detection just executed.");
            return;
        }

        if (splits.length == 3) {
            defaultTableIndex = Integer.parseInt(splits[2]);
        }

        String fileName = splits[1];
        File file = CommonTools.getFile(fileName);
        CleanPlan cleanPlan = cleanPlans.get(lastExecutorIndex);
        DBConfig dbConfig = cleanPlan.getSourceDBConfig();
        Rule rule = cleanPlan.getRule();
        String tableName = (String) rule.getTableNames().get(defaultTableIndex);
        SQLDialectBase dialectManager = SQLDialectFactory.getDialectManagerInstance(dbConfig.getDialect());

        HashSet<Integer> newTuples = CSVTools.append(dbConfig, dialectManager, tableName, file);
        executors.get(lastExecutorIndex).incrementalAppend(tableName, newTuples);
    }

    private static void list() throws IOException {
        if (cleanPlans == null) {
            console.println("There is no rule loaded.");
            return;
        }

        console.println("There are " + cleanPlans.size() + " rules loaded.");
        for (int i = 0; i < cleanPlans.size(); i++) {
            console.println("\t" + i + ": " + cleanPlans.get(i).getRule().getRuleName());
        }
    }

    private static void detect(String cmd) throws IOException, InterruptedException {
        String[] tokens = cmd.split("\\s");
        if (tokens.length > 2) {
            console.println("Wrong detect command. Run detect [id number] instead.");
            return;
        }

        if (executors == null || executors.size() == 0) {
            console.println("There is no rule loaded.");
            return;
        }

        int index = -1;
        lastExecutorIndex = -1;
        if (tokens.length == 2) {
            index = Integer.valueOf(tokens[1]);
            if (index < 0 || index >= cleanPlans.size()) {
                console.println("Out of index.");
                return;
            }
            lastExecutorIndex = index;
        }

        if (executors.size() == 1) {
            lastExecutorIndex = 0;
        }

        for (int i = 0; i < executors.size(); i++) {
            if (index != -1 && i != index) {
                continue;
            }

            CleanExecutor executor = executors.get(i);
            Thread thread = new Thread(new DetectRunnable(executor));
            thread.start();

            do {
                Thread.sleep(1000);
                double percentage = executor.getDetectProgress();
                printProgress(percentage, "DETECT");
            } while (thread.isAlive());

            // print out the final result.
            String ruleName = executor.getCleanPlan().getRule().getRuleName();
            double percentage = executor.getDetectProgress();
            printProgress(percentage, "DETECT");
            console.println();
            console.flush();
            tracer.info(PerfReport.generateDetectSummary(ruleName));
        }
    }

    private static void repair(String cmd) throws IOException, InterruptedException {
        String[] tokens = cmd.split("\\s");
        if (tokens.length > 2) {
            console.println("Wrong repair command. Run repair [id number] instead.");
        }

        if (executors == null || executors.size() == 0) {
            console.println("There is no rule loaded.");
            return;
        }

        int index = -1;
        if (tokens.length == 2) {
            index = Integer.valueOf(tokens[1]);
            if (index < 0 && index >= cleanPlans.size()) {
                console.println("Out of index.");
                return;
            }
        }

        for (int i = 0; i < executors.size(); i++) {
            if (index != -1 && index != i) {
                continue;
            }

            CleanExecutor executor = executors.get(i);
            Thread thread = new Thread(new RepairRunnable(executor));
            thread.start();

            do {
                Thread.sleep(1000);
                double percentage = executor.getRepairProgress();
                printProgress(percentage, "REPAIR");
            } while (thread.isAlive());

            // print out the final result.
            String ruleName = executor.getCleanPlan().getRule().getRuleName();
            double percentage = executor.getRepairProgress();
            printProgress(percentage, "REPAIR");
            console.println();
            console.flush();
            tracer.info(PerfReport.generateRepairSummary(ruleName));
        }
    }

    private static void run(String cmd) throws IOException, InterruptedException {
        String[] tokens = cmd.split("\\s");
        if (tokens.length > 2) {
            console.println("Wrong repair command. Run repair [id number] instead.");
        }

        if (executors == null || executors.size() == 0) {
            console.println("There is no rule loaded.");
            return;
        }

        int index = -1;
        if (tokens.length == 2) {
            index = Integer.valueOf(tokens[1]);
            if (index < 0 && index >= cleanPlans.size()) {
                console.println("Out of index.");
                return;
            }
        }

        // TODO: Here the updater only has one source connection, it is wrong since
        // a update can be in multiple sources from different DB. Think about a pattern
        // to fix it.
        UpdateExecutor updateExecutor = new UpdateExecutor(cleanPlans.get(0), NadeefConfiguration.getDbConfig());
        int updatedCell = 0;
        int maxIterationNumber = 0;

        do {
            try {
                DBInstaller.cleanExecutionDB();
            } catch (Exception ex) {
                tracer.err("Cleaning database failed.", ex);
            }
            for (int i = 0; i < executors.size(); i++) {
                if (index != -1 && index != i) {
                    continue;
                }

                CleanExecutor executor = executors.get(i);
                Thread thread = new Thread(new CleanRunnable(executor));
                thread.start();

                do {
                    Thread.sleep(1000);
                    double percentage = executor.getRepairProgress();
                    printProgress(percentage, "CLEAN");
                } while (thread.isAlive());

                // print out the final result.
                double percentage = executor.getRepairProgress();
                printProgress(percentage, "CLEAN");
                console.println();
                console.flush();
            }

            // do the final holistic update
            updateExecutor.run();
            updatedCell = updateExecutor.getUpdateCellCount();
            maxIterationNumber++;
        } while (updatedCell != 0 && maxIterationNumber <= NadeefConfiguration.getMaxIterationNumber());

        // Print overall statistics
        for (int i = 0; i < executors.size(); i++) {
            if (index != -1 && index != i) {
                continue;
            }
            CleanExecutor executor = executors.get(i);
            String ruleName = executor.getCleanPlan().getRule().getRuleName();
            tracer.info(PerfReport.generateDetectSummary(ruleName));
            tracer.info(PerfReport.generateRepairSummary(ruleName));
        }
        console.println();
        tracer.info(PerfReport.generateUpdateSummary());
    }

    private static void set(String cmd) throws IOException {
        String[] tokens = cmd.split("\\s");
        if (tokens[1].equalsIgnoreCase("verbose")) {
            boolean mode = !Tracer.isVerboseOn();
            console.println("set verbose " + (mode ? "on" : "off"));
            Tracer.setVerbose(mode);
        }

        if (tokens[1].equalsIgnoreCase("info")) {
            boolean mode = !Tracer.isInfoOn();
            console.println("set info " + (mode ? "on" : "off"));
            Tracer.setInfo(mode);
        }

        if (tokens[1].equalsIgnoreCase("alwaysCompile")) {
            boolean mode = !NadeefConfiguration.getAlwaysCompile();
            console.println("set alwaysCompile " + (mode ? "on" : "off"));
            NadeefConfiguration.setAlwaysCompile(mode);
        }
    }

    private static void printHelp() throws IOException {
        final String help = " |NADEEF console usage:\n" + " |----------------------------------\n"
                + " |help : Print out this help information.\n" + " |\n" + " |load <input CleanPlan file> :\n"
                + " |    load a NADEEF clean plan.\n" + " |\n" + " |detect [rule id] :\n"
                + " |    start the violation detection with a given rule id number.\n" + " |\n" + " |list : \n"
                + " |    list available rules.\n" + " |\n" + " |repair [rule id] :\n"
                + " |    repair the data source with a given rule id number.\n" + " |\n" + " |run [rule id]:\n"
                + " |    run both detect and repair with a given rule id number. \n" + " |\n"
                + " |append <new data file> [table index]:\n"
                + " |    appending new data into the source from the last detection. \n" + " |\n" + " |exit :\n"
                + " |    exit the console.\n";
        console.println(help);
    }

    //<editor-fold desc="Private helpers">
    private static void printProgress(double percentage, String title) throws IOException {
        console.redrawLine();
        int ne = (int) Math.round(percentage * 50.f);
        StringBuilder stringBuilder = new StringBuilder(512);
        stringBuilder.append('[').append(title).append("][");
        for (int i = 0; i < ne; i++) {
            stringBuilder.append("=");
        }

        if (ne < 50) {
            stringBuilder.append(">");
            for (int i = 0; i < 50 - ne; i++) {
                stringBuilder.append(" ");
            }
        }
        stringBuilder.append("]");
        stringBuilder.append(Math.round(percentage * 100));
        stringBuilder.append(" %");
        console.print(stringBuilder.toString());
        console.flush();
    }
    //</editor-fold>
}