Example usage for weka.classifiers.trees J48 J48

List of usage examples for weka.classifiers.trees J48 J48

Introduction

In this page you can find the example usage for weka.classifiers.trees J48 J48.

Prototype

J48

Source Link

Usage

From source file:org.vimarsha.classifier.impl.WholeProgramClassifier.java

License:Open Source License

/**
 * Classifies whole program test instances,
 *
 * @return String containing the classification result of the evaluated program's dataset.
 * @throws ClassificationFailedException
 *///from   w w  w  .j  av a2  s  .c  om
@Override
public Object classify() throws ClassificationFailedException {
    J48 j48 = new J48();
    Remove rm = new Remove();
    String output = null;
    rm.setAttributeIndices("1");
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);
    fc.setClassifier(j48);
    try {
        fc.buildClassifier(trainSet);
        this.treeModel = j48.toString();
        double pred = fc.classifyInstance(testSet.instance(0));
        output = testSet.classAttribute().value((int) pred);
        classificationResult = output;
    } catch (Exception ex) {
        throw new ClassificationFailedException();
    }
    return output;
}

From source file:personality_prediction.Classifier.java

/**
 * @param args the command line arguments
 *//*from   ww  w  . j a v a2 s .  c  o  m*/
void run_classifier() {
    // TODO code application logic here
    try {
        //csv2arff();
        System.out.println("Enter the class for which you want to classify");
        System.out.println("1..Openness");
        System.out.println("2..Neuroticism");
        System.out.println("3..Agreeableness");
        System.out.println("4..Conscientiousness");
        System.out.println("5..Extraversion");
        System.out.println();
        Scanner sc = new Scanner(System.in);
        int choice = sc.nextInt();
        String filename = "";
        // BufferedReader reader=new BufferedReader(new FileReader(""));
        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff";
            //reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff"));
        } else if (choice == 2) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff"));
        } else if (choice == 3) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff"));
        } else if (choice == 4) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff"));
        } else if (choice == 5) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff";
            // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff"));  
        }
        //BufferedReader reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\"));
        // DataSource source = new DataSource("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\Features_value.arff");
        //Instances data = source.getDataSet();
        BufferedReader reader = new BufferedReader(new FileReader(filename));
        Instances data = new Instances(reader);
        reader.close();
        //******88setting class attribute************
        data.setClassIndex(data.numAttributes() - 1);

        //  OptionsToCode option=new OptionsToCode();
        // String options[]={"java","ExperimentDemo","-classifier weka.classifiers.trees.M5P","-exptype regression","-splittype randomsplit","-runs 10",
        //"-percentage 66","-result /some/where/results.arff","-t bolts.arff","-t bodyfat.arff"};
        // String[] options={"weka.classifiers.functions.SMO"};
        //String[] options={"weka.classifiers.trees.M5P"};
        //option.convert(options);

        //*******************building a classifier*********************
        String[] options = new String[1];
        options[0] = "-U"; // unpruned tree
        J48 tree = new J48(); // new instance of tree
        tree.setOptions(options); // set the options
        tree.buildClassifier(data); // build classifier

        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff");
        } else if (choice == 2) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff");       
        } else if (choice == 3) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff";
            // fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff");              
        } else if (choice == 4) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff");                
        } else if (choice == 5) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff";
            //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff");              
        }
        FileReader fr = new FileReader(filename);
        BufferedReader br = new BufferedReader(fr);
        Instances unlabeled = new Instances(br);
        /// Instances unlabeled = new Instances(
        //  new BufferedReader(
        //  new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\experiment\\test_data_unlabelled.arff")));
        // set class attribute
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);
        // create copy
        Instances labeled = new Instances(unlabeled);
        // label instances
        for (int i = 0; i < unlabeled.numInstances(); i++) {
            double clsLabel = tree.classifyInstance(unlabeled.instance(i));
            labeled.instance(i).setClassValue(clsLabel);
        }
        // save labeled data

        if (choice == 1) {
            filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open_labelled.arff";
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open123.arff");
        } else if (choice == 2) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur_labelled.arff");       
        } else if (choice == 3) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr_labelled.arff");              
        } else if (choice == 4) {
            //fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con_labelled.arff");                
        } else if (choice == 5) {
            // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr_labelled.arff");              
        }
        FileWriter fr1 = new FileWriter(filename);
        BufferedWriter writer = new BufferedWriter(fr1);
        writer.write(labeled.toString());
        writer.newLine();
        writer.flush();
        writer.close();
    } catch (Exception e) {
        System.out.println(e.getLocalizedMessage());
    }
}

From source file:personality_prediction.Evaluation_Result.java

void eval_result() {
    try {//from ww  w.  j  a  v  a2 s .c  om
        DataSource source_train = new DataSource(
                "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\training_data_neur.csv");
        Instances train = source_train.getDataSet();
        DataSource source_test = new DataSource(
                "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Testing dataset\\Testing_data_neur.csv");
        Instances test = source_test.getDataSet();
        train.setClassIndex(train.numAttributes() - 1);
        test.setClassIndex(train.numAttributes() - 1);
        // train classifier
        Classifier cls = new J48();
        cls.buildClassifier(train);
        Evaluation eval = new Evaluation(train);
        eval.evaluateModel(cls, test);
        System.out.println(eval.toSummaryString("\nResults\n======\n", false));

    } catch (Exception e) {
        System.out.println(e.getLocalizedMessage());
    }
}

From source file:PointAnalyser.Main.java

public static void trainC45Classifier() throws Exception {

    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }/*ww  w. j a  va  2  s  .c  o  m*/

    NumericToNominal nmf = new NumericToNominal();
    nmf.setInputFormat(data);
    data = Filter.useFilter(data, nmf);

    // build a c4.5 classifier
    String[] options = new String[1];
    options[0] = "-C 0.25 -M 2 -U"; // unpruned tree
    tree = new J48(); // new instance of tree
    tree.setOptions(options); // set the options
    tree.buildClassifier(data); // build classifier
    /*
             RemoveMisclassified rm = new RemoveMisclassified();
             rm.setInputFormat(data);
             rm.setClassifier(tree);
             rm.setNumFolds(10);
             rm.setThreshold(0.1);
             rm.setMaxIterations(0);
             data = Filter.useFilter(data, rm);
            
             tree = new J48();         // new instance of tree
             tree.setOptions(options);     // set the options
             tree.buildClassifier(data);   // build classifier
             */
    // eval
    Evaluation eval = new Evaluation(data);
    eval.crossValidateModel(tree, data, 10, new Random(1));

    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());
    System.out.println(eval.toClassDetailsString());

}

From source file:qa.qcri.nadeef.core.utils.classification.J48Classifier.java

License:Open Source License

public J48Classifier(ExecutionContext executionContext, Schema databaseSchema, List<String> permittedAttributes,
        Column newValueColumn) throws NadeefDatabaseException {
    super(executionContext, databaseSchema, permittedAttributes, newValueColumn);

    // initialize the model
    this.classifier = new J48();
}

From source file:rdfsystem.data.DataMining.java

public static String classify(RdfManager manager) throws Exception {
    Instances ins = transformData(manager, true);
    ins.setClassIndex(ins.attribute("year").index());
    J48 tree = new J48();
    tree.buildClassifier(ins);/*from ww w . ja va  2 s  . c o  m*/
    return tree.graph();
}

From source file:sentinets.TrainModel.java

License:Open Source License

public void runExps() {
    Classifier c1 = new SMO();
    Classifier c2 = new J48();
    Classifier c3 = new NaiveBayes();
    trainModel(c1, "SVM");
    trainModel(c2, "J48");
    trainModel(c3, "Naive Bayes");

}

From source file:statistics.BinaryStatisticsEvaluator.java

@Override
public double[][] getConfusionMatrix(Instances Training_Instances, Instances Testing_Instances,
        String classifier) {/*from  www .  ja v  a2  s.  c  om*/

    Classifier cModel = null;
    if ("NB".equals(classifier)) {
        cModel = (Classifier) new NaiveBayes();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("DT".equals(classifier)) {
        cModel = (Classifier) new J48();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("SVM".equals(classifier)) {
        cModel = (Classifier) new SMO();

        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("KNN".equals(classifier)) {
        cModel = (Classifier) new IBk();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    //Test the model
    Evaluation eTest;
    try {
        eTest = new Evaluation(Training_Instances);
        eTest.evaluateModel(cModel, Testing_Instances);
        //Print the result
        String strSummary = eTest.toSummaryString();
        System.out.println(strSummary);
        String strSummary1 = eTest.toMatrixString();
        System.out.println(strSummary1);
        String strSummary2 = eTest.toClassDetailsString();
        System.out.println(strSummary2);

        //Get the confusion matrix
        double[][] cmMatrix = eTest.confusionMatrix();
        return cmMatrix;
    } catch (Exception ex) {
        Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:tclass.ABClassifier.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpAB_TC2 thisExp = new ExpAB_TC2();
    thisExp.parseArgs(args);//www  .  j  a  va 2  s . c o  m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 
    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer[] eventClusterers = new EventClusterer[numClasses];
    // And now, initialise. 
    for (int i = 0; i < numClasses; i++) {
        // The new way: 
        eventClusterers[i] = settings.getEventClusterer();
        // The old way: 
        // eventClusterers[i] = new EventClusterer(new
        //    StreamTokenizer(
        //                    new FileReader(thisExp.evClusterDesc)), 
        //                   domDesc,
        //                   eventDescVec); 

        // System.out.println(eventClusterers[i]); 
    }

    // Segment the data. 

    ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses];
    for (int i = 0; i < numClasses; i++) {
        trainStreamsByClass[i] = new ClassStreamEventsVec();
        trainStreamsByClass[i].setClassVec(new ClassificationVec());
        trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    for (int i = 0; i < numTrainStreams; i++) {
        int currentClass = trainEventCV.elAt(i).getRealClass();
        trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i));
    }

    ClusterVecI[] clustersByClass = new ClusterVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete");
        Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:");
        Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor[] attribsByClass = new Attributor[numClasses];
    for (int i = 0; i < numClasses; i++) {
        attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription());

        Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete.");
    }

    ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses];
    ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData);
        testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete.");

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();

    ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses];

    ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]);

        testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]);
    }

    // Now we have to do some garbage collection. 

    trainStreamData = null;
    testStreamData = null;
    eventClusterers = null;
    trainEventSEV = null;
    trainEventCV = null;
    clustersByClass = null;
    attribsByClass = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);

    AdaBoostM1[] dtLearners = new AdaBoostM1[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtLearners[i] = new AdaBoostM1();
        dtLearners[i].setClassifier(new J48());
        Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i);
        Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
        dtLearners[i].buildClassifier(data);
        Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString());
    }

    ABClassifier[] dtClassifiers = new ABClassifier[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtClassifiers[i] = new ABClassifier(dtLearners[i]);
        // System.out.println(nbClassifiers[i].toString()); 
    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    // Now test on training data (each one)
    /*
      for(int i=0; i < numClasses; i++){
      String className =
      domDesc.getClassDescVec().getClassLabel(i); 
      ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone();
      StreamAttValVecI savvi =
      trainAttsByClass[i].getStreamAttValVec(); 
            
      for(int j=0; j < trainAttsByClass[i].size(); j++){
      nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j));
      }
      System.out.println(">>> Learner for class " + className); 
      int numCorrect = 0; 
      for(int j=0; j < classvi.size(); j++){
      System.out.print(classvi.elAt(j).toString()); 
      if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){
      numCorrect++; 
      }
            
      }
      System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + 
      numCorrect*100.0/numTrainStreams + "%)"); 
            
            
      }
    */

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI[] classns = new ClassificationVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        String className = domDesc.getClassDescVec().getClassLabel(i);
        classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone();
        StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec();
        Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i);
        for (int j = 0; j < numTestStreams; j++) {
            dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j));
        }
        System.out.println(">>> Learner for class " + className);
        int numCorrect = 0;
        for (int j = 0; j < numTestStreams; j++) {
            System.out.print(classns[i].elAt(j).toString());
            if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) {
                numCorrect++;
            }

        }
        System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of "
                + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)");

    }

    // Now do voting. This is a hack solution. 
    int numCorrect = 0;
    for (int i = 0; i < numTestStreams; i++) {
        int[] votes = new int[numClasses];
        int realClass = classns[0].elAt(i).getRealClass();
        String realClassName = domDesc.getClassDescVec().getClassLabel(realClass);
        for (int j = 0; j < numClasses; j++) {
            int thisPrediction = classns[j].elAt(i).getPredictedClass();

            // if(thisPrediction == j){
            //     votes[thisPrediction] += 2; 
            // }
            //else {
            votes[thisPrediction]++;
            //}

        }
        int maxIndex = -1;
        int maxVotes = 0;
        String voteRes = "[ ";
        for (int j = 0; j < numClasses; j++) {
            voteRes += votes[j] + " ";
            if (votes[j] > maxVotes) {
                maxIndex = j;
                maxVotes = votes[j];
            }
        }
        voteRes += "]";
        // Now print the result: 
        String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex);
        if (maxIndex == realClass) {
            System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes
                    + " votes. Votes: " + voteRes);
            numCorrect++;
        } else {
            System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName
                    + " with " + maxVotes + " votes. Votes: " + voteRes);
        }

    }
    System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
}

From source file:tclass.DTClassifier.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpDT_TC2 thisExp = new ExpDT_TC2();
    thisExp.parseArgs(args);//from w  w  w . ja  v  a 2s  .  co m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 
    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer[] eventClusterers = new EventClusterer[numClasses];
    // And now, initialise. 
    for (int i = 0; i < numClasses; i++) {
        // The new way: 
        eventClusterers[i] = settings.getEventClusterer();
        // The old way: 
        // eventClusterers[i] = new EventClusterer(new
        //    StreamTokenizer(
        //                    new FileReader(thisExp.evClusterDesc)), 
        //                   domDesc,
        //                   eventDescVec); 

        // System.out.println(eventClusterers[i]); 
    }

    // Segment the data. 

    ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses];
    for (int i = 0; i < numClasses; i++) {
        trainStreamsByClass[i] = new ClassStreamEventsVec();
        trainStreamsByClass[i].setClassVec(new ClassificationVec());
        trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    for (int i = 0; i < numTrainStreams; i++) {
        int currentClass = trainEventCV.elAt(i).getRealClass();
        trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i));
    }

    ClusterVecI[] clustersByClass = new ClusterVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete");
        Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:");
        Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor[] attribsByClass = new Attributor[numClasses];
    for (int i = 0; i < numClasses; i++) {
        attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription());

        Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete.");
    }

    ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses];
    ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData);
        testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete.");

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();

    ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses];

    ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]);

        testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]);
    }

    // Now we have to do some garbage collection. 

    trainStreamData = null;
    testStreamData = null;
    eventClusterers = null;
    trainEventSEV = null;
    trainEventCV = null;
    clustersByClass = null;
    attribsByClass = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[][] selectedIndices = new int[numClasses][];
    J48[] dtLearners = new J48[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtLearners[i] = new J48();
        Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i);
        Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i);

        Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
        if (thisExp.featureSel) {
            Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
            BestFirst bfs = new BestFirst();
            CfsSubsetEval cfs = new CfsSubsetEval();
            cfs.buildEvaluator(data);
            selectedIndices[i] = bfs.search(cfs, data);
            // Now extract the features. 
            System.out.print("Selected features for class " + i + ": ");
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += ("last");
            System.out.println(featureString);
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }

        dtLearners[i].buildClassifier(data);
        Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString());
    }

    DTClassifier[] dtClassifiers = new DTClassifier[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtClassifiers[i] = new DTClassifier(dtLearners[i]);
        // System.out.println(nbClassifiers[i].toString()); 
    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    // Now test on training data (each one)
    /*
      for(int i=0; i < numClasses; i++){
      String className =
      domDesc.getClassDescVec().getClassLabel(i); 
      ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone();
      StreamAttValVecI savvi =
      trainAttsByClass[i].getStreamAttValVec(); 
            
      for(int j=0; j < trainAttsByClass[i].size(); j++){
      nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j));
      }
      System.out.println(">>> Learner for class " + className); 
      int numCorrect = 0; 
      for(int j=0; j < classvi.size(); j++){
      System.out.print(classvi.elAt(j).toString()); 
      if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){
      numCorrect++; 
      }
            
      }
      System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + 
      numCorrect*100.0/numTrainStreams + "%)"); 
            
            
      }
    */

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI[] classns = new ClassificationVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        String className = domDesc.getClassDescVec().getClassLabel(i);
        classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone();
        StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec();
        Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i);
        if (thisExp.featureSel) {
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += "last";
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }
        for (int j = 0; j < numTestStreams; j++) {
            dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j));
        }
        System.out.println(">>> Learner for class " + className);
        int numCorrect = 0;
        for (int j = 0; j < numTestStreams; j++) {
            System.out.print(classns[i].elAt(j).toString());
            if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) {
                numCorrect++;
            }

        }
        System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of "
                + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)");

    }

    // Now do voting. This is a hack solution. 
    int numCorrect = 0;
    for (int i = 0; i < numTestStreams; i++) {
        int[] votes = new int[numClasses];
        int realClass = classns[0].elAt(i).getRealClass();
        String realClassName = domDesc.getClassDescVec().getClassLabel(realClass);
        for (int j = 0; j < numClasses; j++) {
            int thisPrediction = classns[j].elAt(i).getPredictedClass();

            // if(thisPrediction == j){
            //     votes[thisPrediction] += 2; 
            // }
            //else {
            votes[thisPrediction]++;
            //}

        }
        int maxIndex = -1;
        int maxVotes = 0;
        String voteRes = "[ ";
        for (int j = 0; j < numClasses; j++) {
            voteRes += votes[j] + " ";
            if (votes[j] > maxVotes) {
                maxIndex = j;
                maxVotes = votes[j];
            }
        }
        voteRes += "]";
        // Now print the result: 
        String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex);
        if (maxIndex == realClass) {
            System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes
                    + " votes. Votes: " + voteRes);
            numCorrect++;
        } else {
            System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName
                    + " with " + maxVotes + " votes. Votes: " + voteRes);
        }

    }
    System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
}