List of usage examples for weka.classifiers.trees J48 J48
J48
From source file:org.vimarsha.classifier.impl.WholeProgramClassifier.java
License:Open Source License
/** * Classifies whole program test instances, * * @return String containing the classification result of the evaluated program's dataset. * @throws ClassificationFailedException *///from w w w .j av a2 s .c om @Override public Object classify() throws ClassificationFailedException { J48 j48 = new J48(); Remove rm = new Remove(); String output = null; rm.setAttributeIndices("1"); FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(j48); try { fc.buildClassifier(trainSet); this.treeModel = j48.toString(); double pred = fc.classifyInstance(testSet.instance(0)); output = testSet.classAttribute().value((int) pred); classificationResult = output; } catch (Exception ex) { throw new ClassificationFailedException(); } return output; }
From source file:personality_prediction.Classifier.java
/** * @param args the command line arguments *//*from ww w . j a v a2 s . c o m*/ void run_classifier() { // TODO code application logic here try { //csv2arff(); System.out.println("Enter the class for which you want to classify"); System.out.println("1..Openness"); System.out.println("2..Neuroticism"); System.out.println("3..Agreeableness"); System.out.println("4..Conscientiousness"); System.out.println("5..Extraversion"); System.out.println(); Scanner sc = new Scanner(System.in); int choice = sc.nextInt(); String filename = ""; // BufferedReader reader=new BufferedReader(new FileReader("")); if (choice == 1) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff"; //reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_open.arff")); } else if (choice == 2) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff"; // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_neur.arff")); } else if (choice == 3) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff"; // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_agr.arff")); } else if (choice == 4) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff"; // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_con.arff")); } else if (choice == 5) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff"; // reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\Training_data_extr.arff")); } //BufferedReader reader = new BufferedReader(new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\")); // DataSource source = new DataSource("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\Features_value.arff"); //Instances data = source.getDataSet(); BufferedReader reader = new BufferedReader(new FileReader(filename)); Instances data = new Instances(reader); reader.close(); //******88setting class attribute************ data.setClassIndex(data.numAttributes() - 1); // OptionsToCode option=new OptionsToCode(); // String options[]={"java","ExperimentDemo","-classifier weka.classifiers.trees.M5P","-exptype regression","-splittype randomsplit","-runs 10", //"-percentage 66","-result /some/where/results.arff","-t bolts.arff","-t bodyfat.arff"}; // String[] options={"weka.classifiers.functions.SMO"}; //String[] options={"weka.classifiers.trees.M5P"}; //option.convert(options); //*******************building a classifier********************* String[] options = new String[1]; options[0] = "-U"; // unpruned tree J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options tree.buildClassifier(data); // build classifier if (choice == 1) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff"; //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open.arff"); } else if (choice == 2) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff"; //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur.arff"); } else if (choice == 3) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff"; // fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr.arff"); } else if (choice == 4) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff"; //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con.arff"); } else if (choice == 5) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff"; //fr=new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr.arff"); } FileReader fr = new FileReader(filename); BufferedReader br = new BufferedReader(fr); Instances unlabeled = new Instances(br); /// Instances unlabeled = new Instances( // new BufferedReader( // new FileReader("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_Dataset\\experiment\\test_data_unlabelled.arff"))); // set class attribute unlabeled.setClassIndex(unlabeled.numAttributes() - 1); // create copy Instances labeled = new Instances(unlabeled); // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { double clsLabel = tree.classifyInstance(unlabeled.instance(i)); labeled.instance(i).setClassValue(clsLabel); } // save labeled data if (choice == 1) { filename = "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open_labelled.arff"; // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_open123.arff"); } else if (choice == 2) { // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_neur_labelled.arff"); } else if (choice == 3) { // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_agr_labelled.arff"); } else if (choice == 4) { //fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_con_labelled.arff"); } else if (choice == 5) { // fr1=new FileWriter("C:\\Users\\somya\\Desktop\\Personality Mining\\WEKA_DataSet\\Labelling\\Testing_data_extr_labelled.arff"); } FileWriter fr1 = new FileWriter(filename); BufferedWriter writer = new BufferedWriter(fr1); writer.write(labeled.toString()); writer.newLine(); writer.flush(); writer.close(); } catch (Exception e) { System.out.println(e.getLocalizedMessage()); } }
From source file:personality_prediction.Evaluation_Result.java
void eval_result() { try {//from ww w. j a v a2 s .c om DataSource source_train = new DataSource( "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Training dataset\\training_data_neur.csv"); Instances train = source_train.getDataSet(); DataSource source_test = new DataSource( "C:\\Users\\divya\\Desktop\\Personality Mining\\WEKA_DataSet\\Testing dataset\\Testing_data_neur.csv"); Instances test = source_test.getDataSet(); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(train.numAttributes() - 1); // train classifier Classifier cls = new J48(); cls.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(cls, test); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); } catch (Exception e) { System.out.println(e.getLocalizedMessage()); } }
From source file:PointAnalyser.Main.java
public static void trainC45Classifier() throws Exception { // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); }/*ww w. j a va 2 s .c o m*/ NumericToNominal nmf = new NumericToNominal(); nmf.setInputFormat(data); data = Filter.useFilter(data, nmf); // build a c4.5 classifier String[] options = new String[1]; options[0] = "-C 0.25 -M 2 -U"; // unpruned tree tree = new J48(); // new instance of tree tree.setOptions(options); // set the options tree.buildClassifier(data); // build classifier /* RemoveMisclassified rm = new RemoveMisclassified(); rm.setInputFormat(data); rm.setClassifier(tree); rm.setNumFolds(10); rm.setThreshold(0.1); rm.setMaxIterations(0); data = Filter.useFilter(data, rm); tree = new J48(); // new instance of tree tree.setOptions(options); // set the options tree.buildClassifier(data); // build classifier */ // eval Evaluation eval = new Evaluation(data); eval.crossValidateModel(tree, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); }
From source file:qa.qcri.nadeef.core.utils.classification.J48Classifier.java
License:Open Source License
public J48Classifier(ExecutionContext executionContext, Schema databaseSchema, List<String> permittedAttributes, Column newValueColumn) throws NadeefDatabaseException { super(executionContext, databaseSchema, permittedAttributes, newValueColumn); // initialize the model this.classifier = new J48(); }
From source file:rdfsystem.data.DataMining.java
public static String classify(RdfManager manager) throws Exception { Instances ins = transformData(manager, true); ins.setClassIndex(ins.attribute("year").index()); J48 tree = new J48(); tree.buildClassifier(ins);/*from ww w . ja va 2 s . c o m*/ return tree.graph(); }
From source file:sentinets.TrainModel.java
License:Open Source License
public void runExps() { Classifier c1 = new SMO(); Classifier c2 = new J48(); Classifier c3 = new NaiveBayes(); trainModel(c1, "SVM"); trainModel(c2, "J48"); trainModel(c3, "Naive Bayes"); }
From source file:statistics.BinaryStatisticsEvaluator.java
@Override public double[][] getConfusionMatrix(Instances Training_Instances, Instances Testing_Instances, String classifier) {/*from www . ja v a2 s. c om*/ Classifier cModel = null; if ("NB".equals(classifier)) { cModel = (Classifier) new NaiveBayes(); try { cModel.buildClassifier(Training_Instances); } catch (Exception ex) { Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex); } } else if ("DT".equals(classifier)) { cModel = (Classifier) new J48(); try { cModel.buildClassifier(Training_Instances); } catch (Exception ex) { Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex); } } else if ("SVM".equals(classifier)) { cModel = (Classifier) new SMO(); try { cModel.buildClassifier(Training_Instances); } catch (Exception ex) { Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex); } } else if ("KNN".equals(classifier)) { cModel = (Classifier) new IBk(); try { cModel.buildClassifier(Training_Instances); } catch (Exception ex) { Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex); } } //Test the model Evaluation eTest; try { eTest = new Evaluation(Training_Instances); eTest.evaluateModel(cModel, Testing_Instances); //Print the result String strSummary = eTest.toSummaryString(); System.out.println(strSummary); String strSummary1 = eTest.toMatrixString(); System.out.println(strSummary1); String strSummary2 = eTest.toClassDetailsString(); System.out.println(strSummary2); //Get the confusion matrix double[][] cmMatrix = eTest.confusionMatrix(); return cmMatrix; } catch (Exception ex) { Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:tclass.ABClassifier.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpAB_TC2 thisExp = new ExpAB_TC2(); thisExp.parseArgs(args);//www . j a va 2 s . c o m DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer[] eventClusterers = new EventClusterer[numClasses]; // And now, initialise. for (int i = 0; i < numClasses; i++) { // The new way: eventClusterers[i] = settings.getEventClusterer(); // The old way: // eventClusterers[i] = new EventClusterer(new // StreamTokenizer( // new FileReader(thisExp.evClusterDesc)), // domDesc, // eventDescVec); // System.out.println(eventClusterers[i]); } // Segment the data. ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainStreamsByClass[i] = new ClassStreamEventsVec(); trainStreamsByClass[i].setClassVec(new ClassificationVec()); trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); for (int i = 0; i < numTrainStreams; i++) { int currentClass = trainEventCV.elAt(i).getRealClass(); trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i)); } ClusterVecI[] clustersByClass = new ClusterVecI[numClasses]; for (int i = 0; i < numClasses; i++) { clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete"); Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:"); Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor[] attribsByClass = new Attributor[numClasses]; for (int i = 0; i < numClasses; i++) { attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete."); } ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData); testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete."); } Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]); testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]); } // Now we have to do some garbage collection. trainStreamData = null; testStreamData = null; eventClusterers = null; trainEventSEV = null; trainEventCV = null; clustersByClass = null; attribsByClass = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); AdaBoostM1[] dtLearners = new AdaBoostM1[numClasses]; for (int i = 0; i < numClasses; i++) { dtLearners[i] = new AdaBoostM1(); dtLearners[i].setClassifier(new J48()); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i); Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); dtLearners[i].buildClassifier(data); Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString()); } ABClassifier[] dtClassifiers = new ABClassifier[numClasses]; for (int i = 0; i < numClasses; i++) { dtClassifiers[i] = new ABClassifier(dtLearners[i]); // System.out.println(nbClassifiers[i].toString()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); // Now test on training data (each one) /* for(int i=0; i < numClasses; i++){ String className = domDesc.getClassDescVec().getClassLabel(i); ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = trainAttsByClass[i].getStreamAttValVec(); for(int j=0; j < trainAttsByClass[i].size(); j++){ nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for(int j=0; j < classvi.size(); j++){ System.out.print(classvi.elAt(j).toString()); if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){ numCorrect++; } } System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + numCorrect*100.0/numTrainStreams + "%)"); } */ System.out.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. ClassificationVecI[] classns = new ClassificationVecI[numClasses]; for (int i = 0; i < numClasses; i++) { String className = domDesc.getClassDescVec().getClassLabel(i); classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec(); Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i); for (int j = 0; j < numTestStreams; j++) { dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { System.out.print(classns[i].elAt(j).toString()); if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) { numCorrect++; } } System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); } // Now do voting. This is a hack solution. int numCorrect = 0; for (int i = 0; i < numTestStreams; i++) { int[] votes = new int[numClasses]; int realClass = classns[0].elAt(i).getRealClass(); String realClassName = domDesc.getClassDescVec().getClassLabel(realClass); for (int j = 0; j < numClasses; j++) { int thisPrediction = classns[j].elAt(i).getPredictedClass(); // if(thisPrediction == j){ // votes[thisPrediction] += 2; // } //else { votes[thisPrediction]++; //} } int maxIndex = -1; int maxVotes = 0; String voteRes = "[ "; for (int j = 0; j < numClasses; j++) { voteRes += votes[j] + " "; if (votes[j] > maxVotes) { maxIndex = j; maxVotes = votes[j]; } } voteRes += "]"; // Now print the result: String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex); if (maxIndex == realClass) { System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes + " votes. Votes: " + voteRes); numCorrect++; } else { System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + " with " + maxVotes + " votes. Votes: " + voteRes); } } System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); }
From source file:tclass.DTClassifier.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpDT_TC2 thisExp = new ExpDT_TC2(); thisExp.parseArgs(args);//from w w w . ja v a 2s . co m DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer[] eventClusterers = new EventClusterer[numClasses]; // And now, initialise. for (int i = 0; i < numClasses; i++) { // The new way: eventClusterers[i] = settings.getEventClusterer(); // The old way: // eventClusterers[i] = new EventClusterer(new // StreamTokenizer( // new FileReader(thisExp.evClusterDesc)), // domDesc, // eventDescVec); // System.out.println(eventClusterers[i]); } // Segment the data. ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainStreamsByClass[i] = new ClassStreamEventsVec(); trainStreamsByClass[i].setClassVec(new ClassificationVec()); trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); for (int i = 0; i < numTrainStreams; i++) { int currentClass = trainEventCV.elAt(i).getRealClass(); trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i)); } ClusterVecI[] clustersByClass = new ClusterVecI[numClasses]; for (int i = 0; i < numClasses; i++) { clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete"); Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:"); Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor[] attribsByClass = new Attributor[numClasses]; for (int i = 0; i < numClasses; i++) { attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete."); } ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData); testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete."); } Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]); testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]); } // Now we have to do some garbage collection. trainStreamData = null; testStreamData = null; eventClusterers = null; trainEventSEV = null; trainEventCV = null; clustersByClass = null; attribsByClass = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); int[][] selectedIndices = new int[numClasses][]; J48[] dtLearners = new J48[numClasses]; for (int i = 0; i < numClasses; i++) { dtLearners[i] = new J48(); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i); Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices[i] = bfs.search(cfs, data); // Now extract the features. System.out.print("Selected features for class " + i + ": "); String featureString = new String(); for (int j = 0; j < selectedIndices[i].length; j++) { featureString += (selectedIndices[i][j] + 1) + ","; } featureString += ("last"); System.out.println(featureString); // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } dtLearners[i].buildClassifier(data); Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString()); } DTClassifier[] dtClassifiers = new DTClassifier[numClasses]; for (int i = 0; i < numClasses; i++) { dtClassifiers[i] = new DTClassifier(dtLearners[i]); // System.out.println(nbClassifiers[i].toString()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); // Now test on training data (each one) /* for(int i=0; i < numClasses; i++){ String className = domDesc.getClassDescVec().getClassLabel(i); ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = trainAttsByClass[i].getStreamAttValVec(); for(int j=0; j < trainAttsByClass[i].size(); j++){ nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for(int j=0; j < classvi.size(); j++){ System.out.print(classvi.elAt(j).toString()); if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){ numCorrect++; } } System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + numCorrect*100.0/numTrainStreams + "%)"); } */ System.out.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. ClassificationVecI[] classns = new ClassificationVecI[numClasses]; for (int i = 0; i < numClasses; i++) { String className = domDesc.getClassDescVec().getClassLabel(i); classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec(); Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i); if (thisExp.featureSel) { String featureString = new String(); for (int j = 0; j < selectedIndices[i].length; j++) { featureString += (selectedIndices[i][j] + 1) + ","; } featureString += "last"; // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } for (int j = 0; j < numTestStreams; j++) { dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { System.out.print(classns[i].elAt(j).toString()); if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) { numCorrect++; } } System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); } // Now do voting. This is a hack solution. int numCorrect = 0; for (int i = 0; i < numTestStreams; i++) { int[] votes = new int[numClasses]; int realClass = classns[0].elAt(i).getRealClass(); String realClassName = domDesc.getClassDescVec().getClassLabel(realClass); for (int j = 0; j < numClasses; j++) { int thisPrediction = classns[j].elAt(i).getPredictedClass(); // if(thisPrediction == j){ // votes[thisPrediction] += 2; // } //else { votes[thisPrediction]++; //} } int maxIndex = -1; int maxVotes = 0; String voteRes = "[ "; for (int j = 0; j < numClasses; j++) { voteRes += votes[j] + " "; if (votes[j] > maxVotes) { maxIndex = j; maxVotes = votes[j]; } } voteRes += "]"; // Now print the result: String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex); if (maxIndex == realClass) { System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes + " votes. Votes: " + voteRes); numCorrect++; } else { System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + " with " + maxVotes + " votes. Votes: " + voteRes); } } System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); }