Example usage for weka.classifiers.trees J48 J48

List of usage examples for weka.classifiers.trees J48 J48

Introduction

In this page you can find the example usage for weka.classifiers.trees J48 J48.

Prototype

J48

Source Link

Usage

From source file:c4.pkg5crossv.Classifier.java

public static void C45() throws FileNotFoundException, IOException, Exception {
    Instances data = DataLoad.loadData("./src/data/irysy.arff");

    //Ustawienie atrybutu decyzyjnego (ostatni atrybut)
    data.setClassIndex(data.numAttributes() - 1);

    //OPCJE:/* w w  w. j a va  2s  .  com*/
    //-U -> budowa drzewa bez przycinania (ostre liscie)
    //-C -> <wspolczynnik dokladnosci> - ustawienie wspolczynnika dokladnosci dla lisci (default 0.25)
    //-M -> ustawienie minimalnej liczby obiektow w lisciu dla ktorej lisc nie jest dzielony (default 2)

    //Ustalenie opcji
    String[] options = Utils.splitOptions("-U -M 10");

    J48 tree = new J48();
    tree.setOptions(options); //Ustawienie opcji
    tree.buildClassifier(data); // Tworzenie klasyfikatora (drzewa)

    System.out.println(tree.toString()); //Wypisanie drzewa w formie tekstowej

    System.out.println("TRAIN&TEST");
    trainAndTest();
}

From source file:c4.pkg5crossv.Classifier.java

public static void trainAndTest() throws FileNotFoundException, IOException, Exception {

    Instances data = DataLoad.loadData("./src/data/irysy.arff");
    data.setClassIndex(data.numAttributes() - 1);

    //Losowy podzial tablicy
    data.randomize(new Random());
    double percent = 60.0;
    int trainSize = (int) Math.round(data.numInstances() * percent / 100);
    int testSize = data.numInstances() - trainSize;
    Instances trainData = new Instances(data, 0, trainSize);
    Instances testData = new Instances(data, trainSize, testSize);

    String[] options = Utils.splitOptions("-U -M 10");
    J48 tree = new J48();
    tree.setOptions(options);//  www.  j av a  2  s .  c  o  m
    tree.buildClassifier(trainData);

    Evaluation eval2 = new Evaluation(trainData);
    eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold
    System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation
}

From source file:ca.uottawa.balie.SentenceBoundariesRecognition.java

License:Open Source License

private WekaLearner TrainModel() {
    ArrayList<String> alSentence = null;

    try {//from   w  w w. j  ava 2s  .c o  m
        alSentence = ReadCorpus(Balie.SBR_TRAINING_CORPUS_PC);
    } catch (Exception e) {
        throw new Error("SBD Training corpus was not found");
    }

    ArrayList<TokenList> alTokenLists = GetTokenLists(alSentence);

    // Let's prepare the classifier
    WekaAttribute[] wekaAttributes = new WekaAttribute[NUM_FEATURES];
    FastVector attrVal = new FastVector(NUM_ATTRIBUTES);
    attrVal.addElement(VAL_PERIOD);
    attrVal.addElement(VAL_PERIOD_LIKE);
    attrVal.addElement(VAL_OPEN_BRACKET);
    attrVal.addElement(VAL_CLOSE_BRACKET);
    attrVal.addElement(VAL_QUOTE);
    attrVal.addElement(VAL_PUNCT);
    attrVal.addElement(VAL_NEW_LINE);
    attrVal.addElement(VAL_LINE_FEED);
    attrVal.addElement(VAL_LF_IN_CAP);
    attrVal.addElement(VAL_NL_IN_CAP);
    attrVal.addElement(VAL_CAPITAL);
    attrVal.addElement(VAL_DIGIT);
    attrVal.addElement(VAL_ABBREVIATION);
    attrVal.addElement(VAL_OTHER);
    attrVal.addElement(VAL_NULL);

    wekaAttributes[0] = new WekaAttribute("SentenceBeginning", attrVal);
    wekaAttributes[1] = new WekaAttribute("LastToken", attrVal);
    wekaAttributes[2] = new WekaAttribute("Last2CurrentSpace");
    wekaAttributes[3] = new WekaAttribute("CurrentToken", attrVal);
    wekaAttributes[4] = new WekaAttribute("Current2NextSpace");
    wekaAttributes[5] = new WekaAttribute("NextToken", attrVal);
    String[] strClass = new String[] { IS_SENTENCE_BOUNDARY, IS_NOT_SENTENCE_BOUNDARY };
    WekaLearner wl = new WekaLearner(wekaAttributes, strClass);

    // Let's create an attribute for each token transition.
    for (int i = 0; i != alTokenLists.size(); ++i) {
        TokenList alCurrentTokenList = (TokenList) alTokenLists.get(i);
        TokenList alNextTokenList = null;
        if (i != alTokenLists.size() - 1) {
            alNextTokenList = (TokenList) alTokenLists.get(i + 1);
        }

        // Describe current attribute
        for (int j = 0; j != alCurrentTokenList.Size(); ++j) {
            Object[] strInstance = new Object[NUM_FEATURES];

            boolean bTrivialInstance = DescribeTrainTestInstance(alCurrentTokenList, alNextTokenList, j,
                    strInstance);

            String curClass = IS_NOT_SENTENCE_BOUNDARY;
            if (j == alCurrentTokenList.Size() - 1) {
                curClass = IS_SENTENCE_BOUNDARY;
                YieldSentenceBeginning();
            }

            // Do not add trivial examples
            if (!bTrivialInstance) {
                wl.AddTrainInstance(strInstance, curClass);
            }
        }
    }
    J48 j48 = new J48();
    wl.CreateModel(j48);

    return wl;
}

From source file:clasificador.Perceptron.java

public void J48() {
    try {/*from w  w w.j  a v  a2s . c  o m*/
        //INSTANCIAS PARA ENTRENAMIENTO DEL CLASIFICADOR
        ConverterUtils.DataSource converU = new ConverterUtils.DataSource(
                "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro.arff");
        Instances instancias = converU.getDataSet();
        instancias.setClassIndex(instancias.numAttributes() - 1);

        //INSTANCIAS PARA TEST DEL MODELO 
        ConverterUtils.DataSource convertest = new ConverterUtils.DataSource(
                "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro5.arff");
        Instances testInstance = convertest.getDataSet();
        testInstance.setClassIndex(testInstance.numAttributes() - 1);
        //INSTANCIAS PARA PREDICCIN
        ConverterUtils.DataSource converPredict = new ConverterUtils.DataSource(
                "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro1.arff");
        Instances predictInstance = converPredict.getDataSet();
        predictInstance.setClassIndex(predictInstance.numAttributes() - 1);
        //CONTRUCCIN DEL CLASIFICADOR
        J48 perceptron = new J48();
        perceptron.buildClassifier(instancias);
        //Evaluar las instancias
        Evaluation ev = new Evaluation(instancias);
        //EVALUAR MODELO DE ENTRENAMIENTO
        ev.evaluateModel(perceptron, instancias);
        //System.out.println(instancias);
        System.out.println("\n\nENTRENAMIENTO DEL MODELO ?RBOL DE DECISIN J48\n\n");
        System.out.println(ev.toSummaryString("_____RESULTADO_____", true));
        System.out.println(ev.toMatrixString("_____Matriz confusion___"));

        //PREDECIR CON EL MODELO
        Evaluation evPredict = new Evaluation(instancias);
        evPredict.evaluateModel(perceptron, predictInstance);

        //System.out.println(instancias);
        System.out.println("\n\nPREDICCIN DEL MODELO ?RBOL DE DECISIN J48\n\n");
        System.out.println(evPredict.toSummaryString("_____RESULTADO_____", false));
        System.out.println(evPredict.toMatrixString("_____Matriz confusion___"));

        //MOSTRAR VALORES 
        for (int i = 0; i < evPredict.evaluateModel(perceptron, predictInstance).length; i++) {
            resultado = evPredict.evaluateModel(perceptron, predictInstance)[i];
            polaridad += polaridad(resultado) + "\n";
            //System.out.println("Se clasifica como:  "+resultado + "que es: " + polaridad(resultado));                
        }
        archivoResultados(polaridad);

        //TEST DEL MODELO CON LOS DATOS DEL CLASIFICADOR
        Evaluation evtesting = new Evaluation(instancias);
        evtesting.evaluateModel(perceptron, testInstance);

        //System.out.println(instancias);
        System.out.println("\n\nTEST DEL MODELO ?RBOL DE DECISIN J48\n\n");
        System.out.println(evtesting.toSummaryString("_____RESULTADO_____", false));
        System.out.println(evtesting.toMatrixString("_____Matriz confusion___"));
    }

    catch (Exception ex) {
        Logger.getLogger(Perceptron.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:classif.ExperimentsLauncher.java

License:Open Source License

public void launchJ48() {
    try {//from   w  w  w  .j  a v a  2s.co  m
        String algo = "J48";
        System.out.println(algo);

        double testError = 0.0;
        J48 dt = new J48();
        dt.buildClassifier(train);
        Evaluation eval = new Evaluation(train);
        eval.evaluateModel(dt, test);
        testError = eval.errorRate();
        System.out.println("TestError:" + testError + "\n");
        System.out.println(dt.toSummaryString());
        System.out.println(dt.graph());
        System.out.println(eval.toSummaryString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.edwardraff.WekaMNIST.java

License:Open Source License

public static void main(String[] args) throws IOException, Exception {
    String folder = args[0];//from w w  w  .j  av a2  s.c  o m
    String trainPath = folder + "MNISTtrain.arff";
    String testPath = folder + "MNISTtest.arff";

    System.out.println("Weka Timings");
    Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath))));
    mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1);
    Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath))));
    mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1);

    //normalize range like into [0, 1]
    Normalize normalizeFilter = new Normalize();
    normalizeFilter.setInputFormat(mnistTrainWeka);

    mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter);
    mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter);

    long start, end;

    System.out.println("RBF SVM (Full Cache)");
    SMO smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("RBF SVM (No Cache)");
    smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("Decision Tree C45");
    J48 wekaC45 = new J48();
    wekaC45.setUseLaplace(false);
    wekaC45.setCollapseTree(false);
    wekaC45.setUnpruned(true);
    wekaC45.setMinNumObj(2);
    wekaC45.setUseMDLcorrection(true);

    evalModel(wekaC45, mnistTrainWeka, mnistTestWeka);

    System.out.println("Random Forest 50 trees");
    int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way

    RandomForest wekaRF = new RandomForest();
    wekaRF.setNumExecutionSlots(1);
    wekaRF.setMaxDepth(0/*0 for unlimited*/);
    wekaRF.setNumFeatures(featuresToUse);
    wekaRF.setNumTrees(50);

    evalModel(wekaRF, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (brute)");
    IBk wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Ball Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Cover Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("Logistic Regression LBFGS lambda = 1e-4");
    Logistic logisticLBFGS = new Logistic();
    logisticLBFGS.setRidge(1e-4);
    logisticLBFGS.setMaxIts(500);

    evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka);

    System.out.println("k-means (Loyd)");
    int origClassIndex = mnistTrainWeka.classIndex();
    mnistTrainWeka.setClassIndex(-1);
    mnistTrainWeka.deleteAttributeAt(origClassIndex);
    {
        long totalTime = 0;
        for (int i = 0; i < 10; i++) {
            SimpleKMeans wekaKMeans = new SimpleKMeans();
            wekaKMeans.setNumClusters(10);
            wekaKMeans.setNumExecutionSlots(1);
            wekaKMeans.setFastDistanceCalc(true);

            start = System.currentTimeMillis();
            wekaKMeans.buildClusterer(mnistTrainWeka);
            end = System.currentTimeMillis();
            totalTime += (end - start);
        }
        System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average");
    }
}

From source file:com.github.fracpete.multisearch.optimize.J48ConfidenceFactor.java

License:Open Source License

/**
 * The first parameter must be dataset,//w ww .j a v  a2  s .  c  o m
 * the (optional) second the class index (1-based, 'first' and 'last'
 * also supported).
 *
 * @param args   the commandline options
 * @throws Exception   if optimization fails for some reason
 */
public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        System.err.println("\nUsage: J48ConfidenceFactor <dataset> [classindex]\n");
        System.exit(1);
    }

    // load data
    Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null);

    // configure classifier we want to optimize
    J48 j48 = new J48();

    // configure multisearch
    MathParameter conf = new MathParameter();
    conf.setProperty("confidenceFactor");
    conf.setBase(10);
    conf.setMin(0.05);
    conf.setMax(0.75);
    conf.setStep(0.05);
    conf.setExpression("I");
    MultiSearch multi = new MultiSearch();
    multi.setClassifier(j48);
    multi.setSearchParameters(new AbstractParameter[] { conf });
    SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_AUC,
            new DefaultEvaluationMetrics().getTags());
    multi.setEvaluation(tag);

    // output configuration
    System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi));

    // optimize
    System.out.println("\nOptimizing...\n");
    multi.buildClassifier(data);
    System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier()));
    System.out.println("Best parameter: " + multi.getGenerator().evaluate(multi.getBestValues()));
}

From source file:com.github.fracpete.multisearch.setupgenerator.J48ConfidenceFactor.java

License:Open Source License

/**
 * Outputs the commandlines.//w  ww  . j a va 2s  .c  o  m
 *
 * @param args   the commandline options
 * @throws Exception   if setup generator fails for some reason
 */
public static void main(String[] args) throws Exception {
    // configure classifier we want to generate setups for
    J48 j48 = new J48();

    // configure generator
    MathParameter conf = new MathParameter();
    conf.setProperty("confidenceFactor");
    conf.setBase(10);
    conf.setMin(0.05);
    conf.setMax(0.75);
    conf.setStep(0.05);
    conf.setExpression("I");
    MultiSearch multi = new MultiSearch();
    multi.setClassifier(j48);
    SetupGenerator generator = new SetupGenerator();
    generator.setBaseObject(j48);
    generator.setParameters(new AbstractParameter[] { conf });

    // output configuration
    System.out.println("\nSetupgenerator commandline:\n" + Utils.toCommandLine(generator));

    // output commandlines
    System.out.println("\nCommandlines:\n");
    Enumeration<Serializable> enm = generator.setups();
    while (enm.hasMoreElements())
        System.out.println(Utils.toCommandLine(enm.nextElement()));
}

From source file:com.relationalcloud.main.Explanation.java

License:Open Source License

/**
 * @param args//from   w  ww .  java  2  s .co  m
 */
public static void main(String[] args) {

    // LOADING PROPERTY FILE AND DRIVER
    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // LOAD PROPERTIES FROM CONFIGURATION FILE
    String connection = ini.getProperty("conn");
    String schemaname = ini.getProperty("schema");

    String user = ini.getProperty("user");
    String password = ini.getProperty("password");
    String txnLogTable = ini.getProperty("txnLogTable");
    String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates");

    int numPart = Integer.parseInt(ini.getProperty("numPartitions"));

    // Initialize the Justification Handler
    ExplanationHandler jh = new ExplanationHandler(ini);

    System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :"
            + jh.dbPropertyFile);

    try {

        // CREATE A DB CONNEctioN
        Connection conn = DriverManager.getConnection(connection + schemaname, user, password);
        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES
        ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable,
                numb_trans_to_process, schemaname, conn, schema);

        // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN
        for (String tableProcessed : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + tableProcessed);

            // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT
            Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn);

            // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE
            if (data.numAttributes() < 2) {
                System.out.println("No transactions touches this table, nothing to be done.");
                continue;
            }
            // INSTANTIATE THE CLASSIFIER
            String[] options;
            options = new String[3];
            options[0] = "-P";
            options[1] = "-C";
            options[2] = ini.getProperty("Explanation.j48PruningConfidence");
            J48 classifier = new J48(); // new instance of tree
            classifier.setOptions(options); // set the options

            Boolean attributeFilter = true;
            // ATTRIBUTE FILTERING
            Instances newData;
            if (data.numClasses() > 1 && attributeFilter) {
                AttributeSelection filter = new AttributeSelection();

                //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES
                //InfoGainAttributeEval eval = new InfoGainAttributeEval();
                //Ranker search = new Ranker();
                //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2")));
                CfsSubsetEval eval = new CfsSubsetEval();
                GreedyStepwise search = new GreedyStepwise();

                search.setSearchBackwards(true);
                filter.setEvaluator(eval);
                filter.setSearch(search);
                filter.setInputFormat(data);
                newData = Filter.useFilter(data, filter);
            } else {
                newData = data;
            }

            String atts = "";
            Enumeration e = newData.enumerateAttributes();
            ArrayList<String> attributesForPopulation = new ArrayList<String>();
            while (e.hasMoreElements()) {
                String s = ((Attribute) e.nextElement()).name();
                attributesForPopulation.add(s);
                atts += s + ", ";
            }
            atts = atts.substring(0, atts.length() - 2);

            System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to "
                    + (newData.numAttributes() - 1) + " (" + atts + ")");

            data = null;
            System.gc();

            if (newData.numInstances() < 1) {
                System.err.println("The are no data in the table, skipping classification");
                continue;
            }

            if (newData.numInstances() > 0) {
                if (newData.classAttribute().numValues() > 1) {
                    // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES
                    ExplanationHandler.trainClassifier(newData, classifier);

                    if (classifier.measureNumLeaves() == 1) {

                        int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance());
                        System.out.println(
                                "The classifier decided to put all the tuplesi in the table in one partition: "
                                        + partitionvalue);
                        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                            jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                    conn);
                        }

                    }

                    // POPULATING THE justifiedpartition column with the result of this
                    // classifier if required
                    else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn,
                                numPart, newData.classAttribute().enumerateValues());
                    }

                } else { // easy case... the class attribute is unary!!
                    int partitionvalue = ((int) newData.firstInstance()
                            .value(newData.firstInstance().classIndex()));
                    System.out.println("The table is all stored in one partition, no need to use classifier");
                    if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                conn);
                    }
                }
            } else
                throw new Exception("The Instances is empty");

        }

        // SET HASH PARTITION / REPLICATED PARTITION
        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) {
            jh.populateHashPartition(conn);
        }

        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) {
            jh.populateReplicatedPartition(conn,
                    Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate")));
        }

        conn.close();
    } catch (SQLException e) {
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.main.ExplanationSingleAttribute.java

License:Open Source License

/**
 * @param args/*from  w ww .j  a v a2 s  . c  o  m*/
 */
@Deprecated
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // loading properties from file
    String schemaname = ini.getProperty("schemaname");

    String partitioningMethod = ini.getProperty("partitioningMethod");
    String pcol;
    if (partitioningMethod.equals("repGraph")) {
        System.out.println("Replication Graph: using replicated column");
        pcol = ini.getProperty("replicatedPartitionCol");
    } else {
        pcol = ini.getProperty("graphPartitionCol");
    }

    String accessLogTable = ini.getProperty("accessLogTable");
    String numb_trans_to_process = ini.getProperty("numb_trans_to_process");
    String txnLogTable = ini.getProperty("txnLogTable");
    String driver = ini.getProperty("driver");
    String connection = ini.getProperty("conn");
    String user = ini.getProperty("user");
    String password = ini.getProperty("password");

    System.out.println("Loading and processing " + schemaname + " traces...");

    // Register jdbcDriver
    try {
        Class.forName(driver);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    Connection conn;
    try {
        conn = DriverManager.getConnection(connection + schemaname, user, password);
        conn.setAutoCommit(true);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        Statement stmt = conn.createStatement();

        // NOTE: the paramenter numb_trans_to_process is used to limit
        // the number of transactions parsed to determine the which attributes
        // are common in the workload WHERE clauses. This can be a subset of the
        // overall set

        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process;
        ResultSet res = stmt.executeQuery(sqlstring);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");

        System.out.println("ANALISYS RESULTS:\n ");
        wa.printStatsByTableColumn();

        for (String str : wa.getAllTableNames()) {
            if (str == null)
                continue;
            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable
                        + "` g, relcloud_" + str + " s WHERE tableid = \"" + str
                        + "\" AND s.relcloud_id = g.tupleid";

                // System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    Object o1 = res.getObject(1);
                    Object o2 = res.getObject(2);
                    if (o1 != null && o2 != null) {
                        a0.add(new Double(o1.hashCode()));
                        a1.add(new Double(o2.hashCode()));
                    }
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();

                                Instances data = WekaHelper.retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    long treeTstart = System.currentTimeMillis();
                                    tree.buildClassifier(newData); // build classifier
                                    long treeTend = System.currentTimeMillis();
                                    System.out.println("CLASSIFICATION CONFIDENCE:  "
                                            + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: "
                                            + (treeTend - treeTstart) + "ms \n" + tree.toString());
                                    System.out.println("TREE:" + tree.prefix());
                                }

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}