Example usage for weka.classifiers.trees J48 J48

Introduction

In this page you can find the example usage for weka.classifiers.trees J48 J48.

Prototype

J48

Source Link

Usage

From source file:c4.pkg5crossv.Classifier.java

public static void C45() throws FileNotFoundException, IOException, Exception {
    Instances data = DataLoad.loadData("./src/data/irysy.arff");

    //Ustawienie atrybutu decyzyjnego (ostatni atrybut)
    data.setClassIndex(data.numAttributes() - 1);

    //OPCJE:/* w w  w. j a va  2s  .  com*/
    //-U -> budowa drzewa bez przycinania (ostre liscie)
    //-C -> <wspolczynnik dokladnosci> - ustawienie wspolczynnika dokladnosci dla lisci (default 0.25)
    //-M -> ustawienie minimalnej liczby obiektow w lisciu dla ktorej lisc nie jest dzielony (default 2)

    //Ustalenie opcji
    String[] options = Utils.splitOptions("-U -M 10");

    J48 tree = new J48();
    tree.setOptions(options); //Ustawienie opcji
    tree.buildClassifier(data); // Tworzenie klasyfikatora (drzewa)

    System.out.println(tree.toString()); //Wypisanie drzewa w formie tekstowej

    System.out.println("TRAIN&TEST");
    trainAndTest();
}

From source file:c4.pkg5crossv.Classifier.java

public static void trainAndTest() throws FileNotFoundException, IOException, Exception {

    Instances data = DataLoad.loadData("./src/data/irysy.arff");
    data.setClassIndex(data.numAttributes() - 1);

    //Losowy podzial tablicy
    data.randomize(new Random());
    double percent = 60.0;
    int trainSize = (int) Math.round(data.numInstances() * percent / 100);
    int testSize = data.numInstances() - trainSize;
    Instances trainData = new Instances(data, 0, trainSize);
    Instances testData = new Instances(data, trainSize, testSize);

    String[] options = Utils.splitOptions("-U -M 10");
    J48 tree = new J48();
    tree.setOptions(options);//  www.  j av a  2  s .  c  o  m
    tree.buildClassifier(trainData);

    Evaluation eval2 = new Evaluation(trainData);
    eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold
    System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation
}

From source file:ca.uottawa.balie.SentenceBoundariesRecognition.java

License:Open Source License

private WekaLearner TrainModel() {
    ArrayList<String> alSentence = null;

    try {//from   w  w w. j  ava 2s  .c o  m
        alSentence = ReadCorpus(Balie.SBR_TRAINING_CORPUS_PC);
    } catch (Exception e) {
        throw new Error("SBD Training corpus was not found");
    }

    ArrayList<TokenList> alTokenLists = GetTokenLists(alSentence);

    // Let's prepare the classifier
    WekaAttribute[] wekaAttributes = new WekaAttribute[NUM_FEATURES];
    FastVector attrVal = new FastVector(NUM_ATTRIBUTES);
    attrVal.addElement(VAL_PERIOD);
    attrVal.addElement(VAL_PERIOD_LIKE);
    attrVal.addElement(VAL_OPEN_BRACKET);
    attrVal.addElement(VAL_CLOSE_BRACKET);
    attrVal.addElement(VAL_QUOTE);
    attrVal.addElement(VAL_PUNCT);
    attrVal.addElement(VAL_NEW_LINE);
    attrVal.addElement(VAL_LINE_FEED);
    attrVal.addElement(VAL_LF_IN_CAP);
    attrVal.addElement(VAL_NL_IN_CAP);
    attrVal.addElement(VAL_CAPITAL);
    attrVal.addElement(VAL_DIGIT);
    attrVal.addElement(VAL_ABBREVIATION);
    attrVal.addElement(VAL_OTHER);
    attrVal.addElement(VAL_NULL);

    wekaAttributes[0] = new WekaAttribute("SentenceBeginning", attrVal);
    wekaAttributes[1] = new WekaAttribute("LastToken", attrVal);
    wekaAttributes[2] = new WekaAttribute("Last2CurrentSpace");
    wekaAttributes[3] = new WekaAttribute("CurrentToken", attrVal);
    wekaAttributes[4] = new WekaAttribute("Current2NextSpace");
    wekaAttributes[5] = new WekaAttribute("NextToken", attrVal);
    String[] strClass = new String[] { IS_SENTENCE_BOUNDARY, IS_NOT_SENTENCE_BOUNDARY };
    WekaLearner wl = new WekaLearner(wekaAttributes, strClass);

    // Let's create an attribute for each token transition.
    for (int i = 0; i != alTokenLists.size(); ++i) {
        TokenList alCurrentTokenList = (TokenList) alTokenLists.get(i);
        TokenList alNextTokenList = null;
        if (i != alTokenLists.size() - 1) {
            alNextTokenList = (TokenList) alTokenLists.get(i + 1);
        }

        // Describe current attribute
        for (int j = 0; j != alCurrentTokenList.Size(); ++j) {
            Object[] strInstance = new Object[NUM_FEATURES];

            boolean bTrivialInstance = DescribeTrainTestInstance(alCurrentTokenList, alNextTokenList, j,
                    strInstance);

            String curClass = IS_NOT_SENTENCE_BOUNDARY;
            if (j == alCurrentTokenList.Size() - 1) {
                curClass = IS_SENTENCE_BOUNDARY;
                YieldSentenceBeginning();
            }

            // Do not add trivial examples
            if (!bTrivialInstance) {
                wl.AddTrainInstance(strInstance, curClass);
            }
        }
    }
    J48 j48 = new J48();
    wl.CreateModel(j48);

    return wl;
}

From source file:clasificador.Perceptron.java

public void J48() {
    try {/*from w  w w.j  a v  a2s . c  o m*/
        //INSTANCIAS PARA ENTRENAMIENTO DEL CLASIFICADOR
        ConverterUtils.DataSource converU = new ConverterUtils.DataSource(
                "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro.arff");
        Instances instancias = converU.getDataSet();
        instancias.setClassIndex(instancias.numAttributes() - 1);

        //INSTANCIAS PARA TEST DEL MODELO 
        ConverterUtils.DataSource convertest = new ConverterUtils.DataSource(
                "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro5.arff");
        Instances testInstance = convertest.getDataSet();
        testInstance.setClassIndex(testInstance.numAttributes() - 1);
        //INSTANCIAS PARA PREDICCIN
        ConverterUtils.DataSource converPredict = new ConverterUtils.DataSource(
                "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro1.arff");
        Instances predictInstance = converPredict.getDataSet();
        predictInstance.setClassIndex(predictInstance.numAttributes() - 1);
        //CONTRUCCIN DEL CLASIFICADOR
        J48 perceptron = new J48();
        perceptron.buildClassifier(instancias);
        //Evaluar las instancias
        Evaluation ev = new Evaluation(instancias);
        //EVALUAR MODELO DE ENTRENAMIENTO
        ev.evaluateModel(perceptron, instancias);
        //System.out.println(instancias);
        System.out.println("\n\nENTRENAMIENTO DEL MODELO ?RBOL DE DECISIN J48\n\n");
        System.out.println(ev.toSummaryString("_____RESULTADO_____", true));
        System.out.println(ev.toMatrixString("_____Matriz confusion___"));

        //PREDECIR CON EL MODELO
        Evaluation evPredict = new Evaluation(instancias);
        evPredict.evaluateModel(perceptron, predictInstance);

        //System.out.println(instancias);
        System.out.println("\n\nPREDICCIN DEL MODELO ?RBOL DE DECISIN J48\n\n");
        System.out.println(evPredict.toSummaryString("_____RESULTADO_____", false));
        System.out.println(evPredict.toMatrixString("_____Matriz confusion___"));

        //MOSTRAR VALORES 
        for (int i = 0; i < evPredict.evaluateModel(perceptron, predictInstance).length; i++) {
            resultado = evPredict.evaluateModel(perceptron, predictInstance)[i];
            polaridad += polaridad(resultado) + "\n";
            //System.out.println("Se clasifica como:  "+resultado + "que es: " + polaridad(resultado));                
        }
        archivoResultados(polaridad);

        //TEST DEL MODELO CON LOS DATOS DEL CLASIFICADOR
        Evaluation evtesting = new Evaluation(instancias);
        evtesting.evaluateModel(perceptron, testInstance);

        //System.out.println(instancias);
        System.out.println("\n\nTEST DEL MODELO ?RBOL DE DECISIN J48\n\n");
        System.out.println(evtesting.toSummaryString("_____RESULTADO_____", false));
        System.out.println(evtesting.toMatrixString("_____Matriz confusion___"));
    }

    catch (Exception ex) {
        Logger.getLogger(Perceptron.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:classif.ExperimentsLauncher.java

License:Open Source License

public void launchJ48() {
    try {//from   w  w  w  .j  a v a  2s.co  m
        String algo = "J48";
        System.out.println(algo);

        double testError = 0.0;
        J48 dt = new J48();
        dt.buildClassifier(train);
        Evaluation eval = new Evaluation(train);
        eval.evaluateModel(dt, test);
        testError = eval.errorRate();
        System.out.println("TestError:" + testError + "\n");
        System.out.println(dt.toSummaryString());
        System.out.println(dt.graph());
        System.out.println(eval.toSummaryString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.edwardraff.WekaMNIST.java

License:Open Source License

public static void main(String[] args) throws IOException, Exception {
    String folder = args[0];//from w w  w  .j  av a2  s.c  o m
    String trainPath = folder + "MNISTtrain.arff";
    String testPath = folder + "MNISTtest.arff";

    System.out.println("Weka Timings");
    Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath))));
    mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1);
    Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath))));
    mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1);

    //normalize range like into [0, 1]
    Normalize normalizeFilter = new Normalize();
    normalizeFilter.setInputFormat(mnistTrainWeka);

    mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter);
    mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter);

    long start, end;

    System.out.println("RBF SVM (Full Cache)");
    SMO smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("RBF SVM (No Cache)");
    smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("Decision Tree C45");
    J48 wekaC45 = new J48();
    wekaC45.setUseLaplace(false);
    wekaC45.setCollapseTree(false);
    wekaC45.setUnpruned(true);
    wekaC45.setMinNumObj(2);
    wekaC45.setUseMDLcorrection(true);

    evalModel(wekaC45, mnistTrainWeka, mnistTestWeka);

    System.out.println("Random Forest 50 trees");
    int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way

    RandomForest wekaRF = new RandomForest();
    wekaRF.setNumExecutionSlots(1);
    wekaRF.setMaxDepth(0/*0 for unlimited*/);
    wekaRF.setNumFeatures(featuresToUse);
    wekaRF.setNumTrees(50);

    evalModel(wekaRF, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (brute)");
    IBk wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Ball Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Cover Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("Logistic Regression LBFGS lambda = 1e-4");
    Logistic logisticLBFGS = new Logistic();
    logisticLBFGS.setRidge(1e-4);
    logisticLBFGS.setMaxIts(500);

    evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka);

    System.out.println("k-means (Loyd)");
    int origClassIndex = mnistTrainWeka.classIndex();
    mnistTrainWeka.setClassIndex(-1);
    mnistTrainWeka.deleteAttributeAt(origClassIndex);
    {
        long totalTime = 0;
        for (int i = 0; i < 10; i++) {
            SimpleKMeans wekaKMeans = new SimpleKMeans();
            wekaKMeans.setNumClusters(10);
            wekaKMeans.setNumExecutionSlots(1);
            wekaKMeans.setFastDistanceCalc(true);

            start = System.currentTimeMillis();
            wekaKMeans.buildClusterer(mnistTrainWeka);
            end = System.currentTimeMillis();
            totalTime += (end - start);
        }
        System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average");
    }
}

From source file:com.github.fracpete.multisearch.optimize.J48ConfidenceFactor.java

License:Open Source License

/**
 * The first parameter must be dataset,//w ww .j a v  a2  s .  c  o m
 * the (optional) second the class index (1-based, 'first' and 'last'
 * also supported).
 *
 * @param args   the commandline options
 * @throws Exception   if optimization fails for some reason
 */
public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        System.err.println("\nUsage: J48ConfidenceFactor <dataset> [classindex]\n");
        System.exit(1);
    }

    // load data
    Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null);

    // configure classifier we want to optimize
    J48 j48 = new J48();

    // configure multisearch
    MathParameter conf = new MathParameter();
    conf.setProperty("confidenceFactor");
    conf.setBase(10);
    conf.setMin(0.05);
    conf.setMax(0.75);
    conf.setStep(0.05);
    conf.setExpression("I");
    MultiSearch multi = new MultiSearch();
    multi.setClassifier(j48);
    multi.setSearchParameters(new AbstractParameter[] { conf });
    SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_AUC,
            new DefaultEvaluationMetrics().getTags());
    multi.setEvaluation(tag);

    // output configuration
    System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi));

    // optimize
    System.out.println("\nOptimizing...\n");
    multi.buildClassifier(data);
    System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier()));
    System.out.println("Best parameter: " + multi.getGenerator().evaluate(multi.getBestValues()));
}

From source file:com.github.fracpete.multisearch.setupgenerator.J48ConfidenceFactor.java

License:Open Source License

/**
 * Outputs the commandlines.//w  ww  . j a va 2s  .c  o  m
 *
 * @param args   the commandline options
 * @throws Exception   if setup generator fails for some reason
 */
public static void main(String[] args) throws Exception {
    // configure classifier we want to generate setups for
    J48 j48 = new J48();

    // configure generator
    MathParameter conf = new MathParameter();
    conf.setProperty("confidenceFactor");
    conf.setBase(10);
    conf.setMin(0.05);
    conf.setMax(0.75);
    conf.setStep(0.05);
    conf.setExpression("I");
    MultiSearch multi = new MultiSearch();
    multi.setClassifier(j48);
    SetupGenerator generator = new SetupGenerator();
    generator.setBaseObject(j48);
    generator.setParameters(new AbstractParameter[] { conf });

    // output configuration
    System.out.println("\nSetupgenerator commandline:\n" + Utils.toCommandLine(generator));

    // output commandlines
    System.out.println("\nCommandlines:\n");
    Enumeration<Serializable> enm = generator.setups();
    while (enm.hasMoreElements())
        System.out.println(Utils.toCommandLine(enm.nextElement()));
}

From source file:com.relationalcloud.main.Explanation.java

License:Open Source License

/**
 * @param args//from   w  ww .  java  2  s .co  m
 */
public static void main(String[] args) {

    // LOADING PROPERTY FILE AND DRIVER
    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // LOAD PROPERTIES FROM CONFIGURATION FILE
    String connection = ini.getProperty("conn");
    String schemaname = ini.getProperty("schema");

    String user = ini.getProperty("user");
    String password = ini.getProperty("password");
    String txnLogTable = ini.getProperty("txnLogTable");
    String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates");

    int numPart = Integer.parseInt(ini.getProperty("numPartitions"));

    // Initialize the Justification Handler
    ExplanationHandler jh = new ExplanationHandler(ini);

    System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :"
            + jh.dbPropertyFile);

    try {

        // CREATE A DB CONNEctioN
        Connection conn = DriverManager.getConnection(connection + schemaname, user, password);
        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES
        ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable,
                numb_trans_to_process, schemaname, conn, schema);

        // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN
        for (String tableProcessed : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + tableProcessed);

            // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT
            Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn);

            // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE
            if (data.numAttributes() < 2) {
                System.out.println("No transactions touches this table, nothing to be done.");
                continue;
            }
            // INSTANTIATE THE CLASSIFIER
            String[] options;
            options = new String[3];
            options[0] = "-P";
            options[1] = "-C";
            options[2] = ini.getProperty("Explanation.j48PruningConfidence");
            J48 classifier = new J48(); // new instance of tree
            classifier.setOptions(options); // set the options

            Boolean attributeFilter = true;
            // ATTRIBUTE FILTERING
            Instances newData;
            if (data.numClasses() > 1 && attributeFilter) {
                AttributeSelection filter = new AttributeSelection();

                //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES
                //InfoGainAttributeEval eval = new InfoGainAttributeEval();
                //Ranker search = new Ranker();
                //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2")));
                CfsSubsetEval eval = new CfsSubsetEval();
                GreedyStepwise search = new GreedyStepwise();

                search.setSearchBackwards(true);
                filter.setEvaluator(eval);
                filter.setSearch(search);
                filter.setInputFormat(data);
                newData = Filter.useFilter(data, filter);
            } else {
                newData = data;
            }

            String atts = "";
            Enumeration e = newData.enumerateAttributes();
            ArrayList<String> attributesForPopulation = new ArrayList<String>();
            while (e.hasMoreElements()) {
                String s = ((Attribute) e.nextElement()).name();
                attributesForPopulation.add(s);
                atts += s + ", ";
            }
            atts = atts.substring(0, atts.length() - 2);

            System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to "
                    + (newData.numAttributes() - 1) + " (" + atts + ")");

            data = null;
            System.gc();

            if (newData.numInstances() < 1) {
                System.err.println("The are no data in the table, skipping classification");
                continue;
            }

            if (newData.numInstances() > 0) {
                if (newData.classAttribute().numValues() > 1) {
                    // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES
                    ExplanationHandler.trainClassifier(newData, classifier);

                    if (classifier.measureNumLeaves() == 1) {

                        int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance());
                        System.out.println(
                                "The classifier decided to put all the tuplesi in the table in one partition: "
                                        + partitionvalue);
                        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                            jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                    conn);
                        }

                    }

                    // POPULATING THE justifiedpartition column with the result of this
                    // classifier if required
                    else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn,
                                numPart, newData.classAttribute().enumerateValues());
                    }

                } else { // easy case... the class attribute is unary!!
                    int partitionvalue = ((int) newData.firstInstance()
                            .value(newData.firstInstance().classIndex()));
                    System.out.println("The table is all stored in one partition, no need to use classifier");
                    if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                conn);
                    }
                }
            } else
                throw new Exception("The Instances is empty");

        }

        // SET HASH PARTITION / REPLICATED PARTITION
        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) {
            jh.populateHashPartition(conn);
        }

        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) {
            jh.populateReplicatedPartition(conn,
                    Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate")));
        }

        conn.close();
    } catch (SQLException e) {
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.main.ExplanationSingleAttribute.java

License:Open Source License

/**
 * @param args/*from  w ww .j  a v a2 s  . c  o  m*/
 */
@Deprecated
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // loading properties from file
    String schemaname = ini.getProperty("schemaname");

    String partitioningMethod = ini.getProperty("partitioningMethod");
    String pcol;
    if (partitioningMethod.equals("repGraph")) {
        System.out.println("Replication Graph: using replicated column");
        pcol = ini.getProperty("replicatedPartitionCol");
    } else {
        pcol = ini.getProperty("graphPartitionCol");
    }

    String accessLogTable = ini.getProperty("accessLogTable");
    String numb_trans_to_process = ini.getProperty("numb_trans_to_process");
    String txnLogTable = ini.getProperty("txnLogTable");
    String driver = ini.getProperty("driver");
    String connection = ini.getProperty("conn");
    String user = ini.getProperty("user");
    String password = ini.getProperty("password");

    System.out.println("Loading and processing " + schemaname + " traces...");

    // Register jdbcDriver
    try {
        Class.forName(driver);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    Connection conn;
    try {
        conn = DriverManager.getConnection(connection + schemaname, user, password);
        conn.setAutoCommit(true);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        Statement stmt = conn.createStatement();

        // NOTE: the paramenter numb_trans_to_process is used to limit
        // the number of transactions parsed to determine the which attributes
        // are common in the workload WHERE clauses. This can be a subset of the
        // overall set

        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process;
        ResultSet res = stmt.executeQuery(sqlstring);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");

        System.out.println("ANALISYS RESULTS:\n ");
        wa.printStatsByTableColumn();

        for (String str : wa.getAllTableNames()) {
            if (str == null)
                continue;
            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable
                        + "` g, relcloud_" + str + " s WHERE tableid = \"" + str
                        + "\" AND s.relcloud_id = g.tupleid";

                // System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    Object o1 = res.getObject(1);
                    Object o2 = res.getObject(2);
                    if (o1 != null && o2 != null) {
                        a0.add(new Double(o1.hashCode()));
                        a1.add(new Double(o2.hashCode()));
                    }
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();

                                Instances data = WekaHelper.retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    long treeTstart = System.currentTimeMillis();
                                    tree.buildClassifier(newData); // build classifier
                                    long treeTend = System.currentTimeMillis();
                                    System.out.println("CLASSIFICATION CONFIDENCE:  "
                                            + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: "
                                            + (treeTend - treeTstart) + "ms \n" + tree.toString());
                                    System.out.println("TREE:" + tree.prefix());
                                }

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}