Example usage for weka.classifiers.trees J48 J48

Introduction

In this page you can find the example usage for weka.classifiers.trees J48 J48.

Prototype

J48

Source Link

Usage

From source file:org.vimarsha.classifier.impl.WholeProgramClassifier.java

License:Open Source License

/**
 * Classifies whole program test instances,
 *
 * @return String containing the classification result of the evaluated program's dataset.
 * @throws ClassificationFailedException
 *///from   w w  w  .j  av a2  s  .c  om
@Override
public Object classify() throws ClassificationFailedException {
    J48 j48 = new J48();
    Remove rm = new Remove();
    String output = null;
    rm.setAttributeIndices("1");
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);
    fc.setClassifier(j48);
    try {
        fc.buildClassifier(trainSet);
        this.treeModel = j48.toString();
        double pred = fc.classifyInstance(testSet.instance(0));
        output = testSet.classAttribute().value((int) pred);
        classificationResult = output;
    } catch (Exception ex) {
        throw new ClassificationFailedException();
    }
    return output;
}

From source file:personality_prediction.Classifier.java

/**
 * @param args the command line arguments
 *//*from   ww  w  . j a v a2 s .  c  o  m*/
void run_classifier() {
    // TODO code application logic here
    try {
        //csv2arff();
        System.out.println("Enter the class for which you want to classify");
        System.out.println("1..Openness");
        System.out.println("2..Neuroticism");
        System.out.println("3..Agreeableness");
        System.out.println("4..Conscientiousness");
        System.out.println("5..Extraversion");
        System.out.println();
        Scanner sc = new Scanner(System.in);
        int choice = sc.nextInt();
        String filename = "";
        // BufferedReader reader=new BufferedReader(new FileReader(""));
        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff";
            //reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff"));
        } else if (choice == 2) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff"));
        } else if (choice == 3) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff"));
        } else if (choice == 4) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff"));
        } else if (choice == 5) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff"));  
        }
        //BufferedReader reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\"));
        // DataSource source = new DataSource("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\Features_value.arff");
        //Instances data = source.getDataSet();
        BufferedReader reader = new BufferedReader(new FileReader(filename));
        Instances data = new Instances(reader);
        reader.close();
        //******88setting class attribute************
        data.setClassIndex(data.numAttributes() - 1);

        //  OptionsToCode option=new OptionsToCode();
        // String options[]={"java","ExperimentDemo","-classifier weka.classifiers.trees.M5P","-exptype regression","-splittype randomsplit","-runs 10",
        //"-percentage 66","-result /some/where/results.arff","-t bolts.arff","-t bodyfat.arff"};
        // String[] options={"weka.classifiers.functions.SMO"};
        //String[] options={"weka.classifiers.trees.M5P"};
        //option.convert(options);

        //*******************building a classifier*********************
        String[] options = new String[1];
        options[0] = "-U"; // unpruned tree
        J48 tree = new J48(); // new instance of tree
        tree.setOptions(options); // set the options
        tree.buildClassifier(data); // build classifier

        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff");
        } else if (choice == 2) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff");       
        } else if (choice == 3) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff";
            // fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff");              
        } else if (choice == 4) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff");                
        } else if (choice == 5) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff");              
        }
        FileReader fr = new FileReader(filename);
        BufferedReader br = new BufferedReader(fr);
        Instances unlabeled = new Instances(br);
        /// Instances unlabeled = new Instances(
        //  new BufferedReader(
        //  new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\experiment\\test_data_unlabelled.arff")));
        // set class attribute
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);
        // create copy
        Instances labeled = new Instances(unlabeled);
        // label instances
        for (int i = 0; i < unlabeled.numInstances(); i++) {
            double clsLabel = tree.classifyInstance(unlabeled.instance(i));
            labeled.instance(i).setClassValue(clsLabel);
        }
        // save labeled data

        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open_labelled.arff";
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open123.arff");
        } else if (choice == 2) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur_labelled.arff");       
        } else if (choice == 3) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr_labelled.arff");              
        } else if (choice == 4) {
            //fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con_labelled.arff");                
        } else if (choice == 5) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr_labelled.arff");              
        }
        FileWriter fr1 = new FileWriter(filename);
        BufferedWriter writer = new BufferedWriter(fr1);
        writer.write(labeled.toString());
        writer.newLine();
        writer.flush();
        writer.close();
    } catch (Exception e) {
        System.out.println(e.getLocalizedMessage());
    }
}

From source file:personality_prediction.Evaluation_Result.java

void eval_result() {
    try {//from ww  w.  j  a  v  a2 s .c  om
        DataSource source_train = new DataSource(
                "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\training_data_neur.csv");
        Instances train = source_train.getDataSet();
        DataSource source_test = new DataSource(
                "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Testing dataset\\Testing_data_neur.csv");
        Instances test = source_test.getDataSet();
        train.setClassIndex(train.numAttributes() - 1);
        test.setClassIndex(train.numAttributes() - 1);
        // train classifier
        Classifier cls = new J48();
        cls.buildClassifier(train);
        Evaluation eval = new Evaluation(train);
        eval.evaluateModel(cls, test);
        System.out.println(eval.toSummaryString("\nResults\n======\n", false));

    } catch (Exception e) {
        System.out.println(e.getLocalizedMessage());
    }
}

From source file:PointAnalyser.Main.java

public static void trainC45Classifier() throws Exception {

    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }/*ww  w. j a  va  2  s  .c  o  m*/

    NumericToNominal nmf = new NumericToNominal();
    nmf.setInputFormat(data);
    data = Filter.useFilter(data, nmf);

    // build a c4.5 classifier
    String[] options = new String[1];
    options[0] = "-C 0.25 -M 2 -U"; // unpruned tree
    tree = new J48(); // new instance of tree
    tree.setOptions(options); // set the options
    tree.buildClassifier(data); // build classifier
    /*
             RemoveMisclassified rm = new RemoveMisclassified();
             rm.setInputFormat(data);
             rm.setClassifier(tree);
             rm.setNumFolds(10);
             rm.setThreshold(0.1);
             rm.setMaxIterations(0);
             data = Filter.useFilter(data, rm);
            
             tree = new J48();         // new instance of tree
             tree.setOptions(options);     // set the options
             tree.buildClassifier(data);   // build classifier
             */
    // eval
    Evaluation eval = new Evaluation(data);
    eval.crossValidateModel(tree, data, 10, new Random(1));

    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());
    System.out.println(eval.toClassDetailsString());

}

From source file:qa.qcri.nadeef.core.utils.classification.J48Classifier.java

License:Open Source License

public J48Classifier(ExecutionContext executionContext, Schema databaseSchema, List<String> permittedAttributes,
        Column newValueColumn) throws NadeefDatabaseException {
    super(executionContext, databaseSchema, permittedAttributes, newValueColumn);

    // initialize the model
    this.classifier = new J48();
}

From source file:rdfsystem.data.DataMining.java

public static String classify(RdfManager manager) throws Exception {
    Instances ins = transformData(manager, true);
    ins.setClassIndex(ins.attribute("year").index());
    J48 tree = new J48();
    tree.buildClassifier(ins);/*from ww w . ja va  2 s  . c o  m*/
    return tree.graph();
}

From source file:sentinets.TrainModel.java

License:Open Source License

public void runExps() {
    Classifier c1 = new SMO();
    Classifier c2 = new J48();
    Classifier c3 = new NaiveBayes();
    trainModel(c1, "SVM");
    trainModel(c2, "J48");
    trainModel(c3, "Naive Bayes");

}

From source file:statistics.BinaryStatisticsEvaluator.java

@Override
public double[][] getConfusionMatrix(Instances Training_Instances, Instances Testing_Instances,
        String classifier) {/*from  www .  ja v  a2  s.  c  om*/

    Classifier cModel = null;
    if ("NB".equals(classifier)) {
        cModel = (Classifier) new NaiveBayes();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("DT".equals(classifier)) {
        cModel = (Classifier) new J48();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("SVM".equals(classifier)) {
        cModel = (Classifier) new SMO();

        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("KNN".equals(classifier)) {
        cModel = (Classifier) new IBk();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    //Test the model
    Evaluation eTest;
    try {
        eTest = new Evaluation(Training_Instances);
        eTest.evaluateModel(cModel, Testing_Instances);
        //Print the result
        String strSummary = eTest.toSummaryString();
        System.out.println(strSummary);
        String strSummary1 = eTest.toMatrixString();
        System.out.println(strSummary1);
        String strSummary2 = eTest.toClassDetailsString();
        System.out.println(strSummary2);

        //Get the confusion matrix
        double[][] cmMatrix = eTest.confusionMatrix();
        return cmMatrix;
    } catch (Exception ex) {
        Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:tclass.ABClassifier.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpAB_TC2 thisExp = new ExpAB_TC2();
    thisExp.parseArgs(args);//www  .  j  a  va 2  s . c o  m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 
    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer[] eventClusterers = new EventClusterer[numClasses];
    // And now, initialise. 
    for (int i = 0; i < numClasses; i++) {
        // The new way: 
        eventClusterers[i] = settings.getEventClusterer();
        // The old way: 
        // eventClusterers[i] = new EventClusterer(new
        //    StreamTokenizer(
        //                    new FileReader(thisExp.evClusterDesc)), 
        //                   domDesc,
        //                   eventDescVec); 

        // System.out.println(eventClusterers[i]); 
    }

    // Segment the data. 

    ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses];
    for (int i = 0; i < numClasses; i++) {
        trainStreamsByClass[i] = new ClassStreamEventsVec();
        trainStreamsByClass[i].setClassVec(new ClassificationVec());
        trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    for (int i = 0; i < numTrainStreams; i++) {
        int currentClass = trainEventCV.elAt(i).getRealClass();
        trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i));
    }

    ClusterVecI[] clustersByClass = new ClusterVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete");
        Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:");
        Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor[] attribsByClass = new Attributor[numClasses];
    for (int i = 0; i < numClasses; i++) {
        attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription());

        Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete.");
    }

    ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses];
    ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData);
        testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete.");

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();

    ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses];

    ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]);

        testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]);
    }

    // Now we have to do some garbage collection. 

    trainStreamData = null;
    testStreamData = null;
    eventClusterers = null;
    trainEventSEV = null;
    trainEventCV = null;
    clustersByClass = null;
    attribsByClass = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);

    AdaBoostM1[] dtLearners = new AdaBoostM1[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtLearners[i] = new AdaBoostM1();
        dtLearners[i].setClassifier(new J48());
        Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i);
        Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
        dtLearners[i].buildClassifier(data);
        Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString());
    }

    ABClassifier[] dtClassifiers = new ABClassifier[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtClassifiers[i] = new ABClassifier(dtLearners[i]);
        // System.out.println(nbClassifiers[i].toString()); 
    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    // Now test on training data (each one)
    /*
      for(int i=0; i < numClasses; i++){
      String className =
      domDesc.getClassDescVec().getClassLabel(i); 
      ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone();
      StreamAttValVecI savvi =
      trainAttsByClass[i].getStreamAttValVec(); 
            
      for(int j=0; j < trainAttsByClass[i].size(); j++){
      nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j));
      }
      System.out.println(">>> Learner for class " + className); 
      int numCorrect = 0; 
      for(int j=0; j < classvi.size(); j++){
      System.out.print(classvi.elAt(j).toString()); 
      if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){
      numCorrect++; 
      }
            
      }
      System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + 
      numCorrect*100.0/numTrainStreams + "%)"); 
            
            
      }
    */

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI[] classns = new ClassificationVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        String className = domDesc.getClassDescVec().getClassLabel(i);
        classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone();
        StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec();
        Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i);
        for (int j = 0; j < numTestStreams; j++) {
            dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j));
        }
        System.out.println(">>> Learner for class " + className);
        int numCorrect = 0;
        for (int j = 0; j < numTestStreams; j++) {
            System.out.print(classns[i].elAt(j).toString());
            if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) {
                numCorrect++;
            }

        }
        System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of "
                + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)");

    }

    // Now do voting. This is a hack solution. 
    int numCorrect = 0;
    for (int i = 0; i < numTestStreams; i++) {
        int[] votes = new int[numClasses];
        int realClass = classns[0].elAt(i).getRealClass();
        String realClassName = domDesc.getClassDescVec().getClassLabel(realClass);
        for (int j = 0; j < numClasses; j++) {
            int thisPrediction = classns[j].elAt(i).getPredictedClass();

            // if(thisPrediction == j){
            //     votes[thisPrediction] += 2; 
            // }
            //else {
            votes[thisPrediction]++;
            //}

        }
        int maxIndex = -1;
        int maxVotes = 0;
        String voteRes = "[ ";
        for (int j = 0; j < numClasses; j++) {
            voteRes += votes[j] + " ";
            if (votes[j] > maxVotes) {
                maxIndex = j;
                maxVotes = votes[j];
            }
        }
        voteRes += "]";
        // Now print the result: 
        String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex);
        if (maxIndex == realClass) {
            System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes
                    + " votes. Votes: " + voteRes);
            numCorrect++;
        } else {
            System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName
                    + " with " + maxVotes + " votes. Votes: " + voteRes);
        }

    }
    System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
}

From source file:tclass.DTClassifier.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpDT_TC2 thisExp = new ExpDT_TC2();
    thisExp.parseArgs(args);//from w  w  w . ja  v  a 2s  .  co m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 
    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer[] eventClusterers = new EventClusterer[numClasses];
    // And now, initialise. 
    for (int i = 0; i < numClasses; i++) {
        // The new way: 
        eventClusterers[i] = settings.getEventClusterer();
        // The old way: 
        // eventClusterers[i] = new EventClusterer(new
        //    StreamTokenizer(
        //                    new FileReader(thisExp.evClusterDesc)), 
        //                   domDesc,
        //                   eventDescVec); 

        // System.out.println(eventClusterers[i]); 
    }

    // Segment the data. 

    ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses];
    for (int i = 0; i < numClasses; i++) {
        trainStreamsByClass[i] = new ClassStreamEventsVec();
        trainStreamsByClass[i].setClassVec(new ClassificationVec());
        trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    for (int i = 0; i < numTrainStreams; i++) {
        int currentClass = trainEventCV.elAt(i).getRealClass();
        trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i));
    }

    ClusterVecI[] clustersByClass = new ClusterVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete");
        Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:");
        Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor[] attribsByClass = new Attributor[numClasses];
    for (int i = 0; i < numClasses; i++) {
        attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription());

        Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete.");
    }

    ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses];
    ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData);
        testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete.");

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();

    ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses];

    ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]);

        testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]);
    }

    // Now we have to do some garbage collection. 

    trainStreamData = null;
    testStreamData = null;
    eventClusterers = null;
    trainEventSEV = null;
    trainEventCV = null;
    clustersByClass = null;
    attribsByClass = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[][] selectedIndices = new int[numClasses][];
    J48[] dtLearners = new J48[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtLearners[i] = new J48();
        Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i);
        Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i);

        Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
        if (thisExp.featureSel) {
            Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
            BestFirst bfs = new BestFirst();
            CfsSubsetEval cfs = new CfsSubsetEval();
            cfs.buildEvaluator(data);
            selectedIndices[i] = bfs.search(cfs, data);
            // Now extract the features. 
            System.out.print("Selected features for class " + i + ": ");
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += ("last");
            System.out.println(featureString);
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }

        dtLearners[i].buildClassifier(data);
        Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString());
    }

    DTClassifier[] dtClassifiers = new DTClassifier[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtClassifiers[i] = new DTClassifier(dtLearners[i]);
        // System.out.println(nbClassifiers[i].toString()); 
    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    // Now test on training data (each one)
    /*
      for(int i=0; i < numClasses; i++){
      String className =
      domDesc.getClassDescVec().getClassLabel(i); 
      ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone();
      StreamAttValVecI savvi =
      trainAttsByClass[i].getStreamAttValVec(); 
            
      for(int j=0; j < trainAttsByClass[i].size(); j++){
      nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j));
      }
      System.out.println(">>> Learner for class " + className); 
      int numCorrect = 0; 
      for(int j=0; j < classvi.size(); j++){
      System.out.print(classvi.elAt(j).toString()); 
      if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){
      numCorrect++; 
      }
            
      }
      System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + 
      numCorrect*100.0/numTrainStreams + "%)"); 
            
            
      }
    */

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI[] classns = new ClassificationVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        String className = domDesc.getClassDescVec().getClassLabel(i);
        classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone();
        StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec();
        Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i);
        if (thisExp.featureSel) {
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += "last";
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }
        for (int j = 0; j < numTestStreams; j++) {
            dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j));
        }
        System.out.println(">>> Learner for class " + className);
        int numCorrect = 0;
        for (int j = 0; j < numTestStreams; j++) {
            System.out.print(classns[i].elAt(j).toString());
            if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) {
                numCorrect++;
            }

        }
        System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of "
                + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)");

    }

    // Now do voting. This is a hack solution. 
    int numCorrect = 0;
    for (int i = 0; i < numTestStreams; i++) {
        int[] votes = new int[numClasses];
        int realClass = classns[0].elAt(i).getRealClass();
        String realClassName = domDesc.getClassDescVec().getClassLabel(realClass);
        for (int j = 0; j < numClasses; j++) {
            int thisPrediction = classns[j].elAt(i).getPredictedClass();

            // if(thisPrediction == j){
            //     votes[thisPrediction] += 2; 
            // }
            //else {
            votes[thisPrediction]++;
            //}

        }
        int maxIndex = -1;
        int maxVotes = 0;
        String voteRes = "[ ";
        for (int j = 0; j < numClasses; j++) {
            voteRes += votes[j] + " ";
            if (votes[j] > maxVotes) {
                maxIndex = j;
                maxVotes = votes[j];
            }
        }
        voteRes += "]";
        // Now print the result: 
        String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex);
        if (maxIndex == realClass) {
            System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes
                    + " votes. Votes: " + voteRes);
            numCorrect++;
        } else {
            System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName
                    + " with " + maxVotes + " votes. Votes: " + voteRes);
        }

    }
    System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
}