List of usage examples for weka.attributeSelection BestFirst search
@Override public int[] search(ASEvaluation ASEval, Instances data) throws Exception
From source file:tclass.DTClassifier.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpDT_TC2 thisExp = new ExpDT_TC2(); thisExp.parseArgs(args);//from w w w . ja va2 s . c om DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer[] eventClusterers = new EventClusterer[numClasses]; // And now, initialise. for (int i = 0; i < numClasses; i++) { // The new way: eventClusterers[i] = settings.getEventClusterer(); // The old way: // eventClusterers[i] = new EventClusterer(new // StreamTokenizer( // new FileReader(thisExp.evClusterDesc)), // domDesc, // eventDescVec); // System.out.println(eventClusterers[i]); } // Segment the data. ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainStreamsByClass[i] = new ClassStreamEventsVec(); trainStreamsByClass[i].setClassVec(new ClassificationVec()); trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); for (int i = 0; i < numTrainStreams; i++) { int currentClass = trainEventCV.elAt(i).getRealClass(); trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i)); } ClusterVecI[] clustersByClass = new ClusterVecI[numClasses]; for (int i = 0; i < numClasses; i++) { clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete"); Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:"); Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor[] attribsByClass = new Attributor[numClasses]; for (int i = 0; i < numClasses; i++) { attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete."); } ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData); testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete."); } Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses]; for (int i = 0; i < numClasses; i++) { trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]); testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]); } // Now we have to do some garbage collection. trainStreamData = null; testStreamData = null; eventClusterers = null; trainEventSEV = null; trainEventCV = null; clustersByClass = null; attribsByClass = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); int[][] selectedIndices = new int[numClasses][]; J48[] dtLearners = new J48[numClasses]; for (int i = 0; i < numClasses; i++) { dtLearners[i] = new J48(); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i); Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices[i] = bfs.search(cfs, data); // Now extract the features. System.out.print("Selected features for class " + i + ": "); String featureString = new String(); for (int j = 0; j < selectedIndices[i].length; j++) { featureString += (selectedIndices[i][j] + 1) + ","; } featureString += ("last"); System.out.println(featureString); // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } dtLearners[i].buildClassifier(data); Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString()); } DTClassifier[] dtClassifiers = new DTClassifier[numClasses]; for (int i = 0; i < numClasses; i++) { dtClassifiers[i] = new DTClassifier(dtLearners[i]); // System.out.println(nbClassifiers[i].toString()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); // Now test on training data (each one) /* for(int i=0; i < numClasses; i++){ String className = domDesc.getClassDescVec().getClassLabel(i); ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = trainAttsByClass[i].getStreamAttValVec(); for(int j=0; j < trainAttsByClass[i].size(); j++){ nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for(int j=0; j < classvi.size(); j++){ System.out.print(classvi.elAt(j).toString()); if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){ numCorrect++; } } System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + numCorrect*100.0/numTrainStreams + "%)"); } */ System.out.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. ClassificationVecI[] classns = new ClassificationVecI[numClasses]; for (int i = 0; i < numClasses; i++) { String className = domDesc.getClassDescVec().getClassLabel(i); classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone(); StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec(); Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i); if (thisExp.featureSel) { String featureString = new String(); for (int j = 0; j < selectedIndices[i].length; j++) { featureString += (selectedIndices[i][j] + 1) + ","; } featureString += "last"; // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } for (int j = 0; j < numTestStreams; j++) { dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j)); } System.out.println(">>> Learner for class " + className); int numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { System.out.print(classns[i].elAt(j).toString()); if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) { numCorrect++; } } System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); } // Now do voting. This is a hack solution. int numCorrect = 0; for (int i = 0; i < numTestStreams; i++) { int[] votes = new int[numClasses]; int realClass = classns[0].elAt(i).getRealClass(); String realClassName = domDesc.getClassDescVec().getClassLabel(realClass); for (int j = 0; j < numClasses; j++) { int thisPrediction = classns[j].elAt(i).getPredictedClass(); // if(thisPrediction == j){ // votes[thisPrediction] += 2; // } //else { votes[thisPrediction]++; //} } int maxIndex = -1; int maxVotes = 0; String voteRes = "[ "; for (int j = 0; j < numClasses; j++) { voteRes += votes[j] + " "; if (votes[j] > maxVotes) { maxIndex = j; maxVotes = votes[j]; } } voteRes += "]"; // Now print the result: String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex); if (maxIndex == realClass) { System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes + " votes. Votes: " + voteRes); numCorrect++; } else { System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + " with " + maxVotes + " votes. Votes: " + voteRes); } } System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); }
From source file:tclass.ExpNB_Single.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpSingle thisExp = new ExpSingle(); thisExp.parseArgs(args);/* w w w . ja v a2 s . c o m*/ DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer eventClusterer = settings.getEventClusterer(); Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete"); Debug.dp(Debug.PROGRESS, "Clusters are:"); Debug.dp(Debug.PROGRESS, eventClusterer.getMapping()); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete."); ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData); ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts); ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts); trainStreamData = null; testStreamData = null; eventClusterer = null; trainEventSEV = null; trainEventCV = null; clusters = null; attribs = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class "); Instances data = WekaBridge.makeInstances(trainAtts, "Train "); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); Debug.setDebugLevel(Debug.PROGRESS); int[] selectedIndices = null; if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices = bfs.search(cfs, data); // Now extract the features. System.err.print("Selected features: "); String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += selectedIndices[j] + ","; } featureString += ("last"); System.err.println(featureString); // Now cut from trainAtts. // trainAtts.selectFeatures(selectedIndices); } Debug.dp(Debug.PROGRESS, "Learning with Naive Bayes now ..."); NaiveBayes nbLearner = new NaiveBayes(); nbLearner.setDomDesc(domDesc); nbLearner.setAttDescVec(trainAtts.getStreamAttValVec().getDescription()); ClassifierI nbClassifier = nbLearner.learn(trainAtts); Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); System.out.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. ClassificationVecI classns; classns = (ClassificationVecI) testAtts.getClassVec().clone(); StreamAttValVecI savvi = testAtts.getStreamAttValVec(); /* if(thisExp.featureSel){ String featureString = new String(); for(int j=0; j < selectedIndices.length; j++){ featureString += (selectedIndices[j]+1) + ","; } featureString += "last"; // Now apply the filter. AttributeFilter af = new AttributeFilter(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.inputFormat(data); data = af.useFilter(data, af); } */ for (int j = 0; j < numTestStreams; j++) { nbClassifier.classify(savvi.elAt(j), classns.elAt(j)); } System.out.println(">>> Learner <<<"); int numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.out.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.out.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.out.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); }
From source file:tclass.ExpSeg.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpSeg thisExp = new ExpSeg(); thisExp.parseArgs(args);/*from w ww . ja v a2 s .com*/ DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testGlobalData.size(); int numClasses = domDesc.getClassDescVec().size(); TimeDivision td = new TimeDivision(domDesc, thisExp.numDivs); ClassStreamAttValVecI trainDivData = td.timeDivide(trainStreamData); ClassStreamAttValVecI testDivData = td.timeDivide(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Segmentation performed"); Combiner c = new Combiner(); ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainDivData); ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testDivData); trainStreamData = null; testStreamData = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); int[] selectedIndices = null; String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff); if (classifierSpec.length == 0) { throw new Exception("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class "); Instances data = WekaBridge.makeInstances(trainAtts, "Train "); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices = bfs.search(cfs, data); // Now extract the features. System.err.print("Selected features: "); String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += ("last"); System.err.println(featureString); // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } learner.buildClassifier(data); Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString()); WekaClassifier wekaClassifier; wekaClassifier = new WekaClassifier(learner); Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); System.err.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. ClassificationVecI classns; classns = (ClassificationVecI) testAtts.getClassVec().clone(); StreamAttValVecI savvi = testAtts.getStreamAttValVec(); data = WekaBridge.makeInstances(testAtts, "Test "); if (thisExp.featureSel) { String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += "last"; // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } for (int j = 0; j < numTestStreams; j++) { wekaClassifier.classify(data.instance(j), classns.elAt(j)); } System.err.println(">>> Learner <<<"); int numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { // System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.err.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.err.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); }
From source file:tclass.ExpSingle.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpSingle thisExp = new ExpSingle(); thisExp.parseArgs(args);//from www. j a v a2 s .c o m mem("PARSE"); DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); mem("DATAIN"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); mem("GLOBAL"); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); mem("EVENTEXTRACT"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer eventClusterer = settings.getEventClusterer(); Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); mem("REARRANGE"); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete"); Debug.dp(Debug.PROGRESS, "Clusters are:"); Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping()); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); mem("CLUSTER"); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete."); mem("MAKEATTRIBUTOR"); ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData); ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); mem("ATTRIBUTION"); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts); ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts); mem("COMBINATION"); trainStreamData = null; testStreamData = null; trainEventSEV = null; trainEventCV = null; if (!thisExp.makeDesc) { clusters = null; eventClusterer = null; } attribs = null; System.gc(); mem("GARBAGECOLLECT"); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); int[] selectedIndices = null; String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff); if (classifierSpec.length == 0) { throw new Exception("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class "); Instances data = WekaBridge.makeInstances(trainAtts, "Train "); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); mem("ATTCONVERSION"); if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices = bfs.search(cfs, data); // Now extract the features. System.err.print("Selected features: "); String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += ("last"); System.err.println(featureString); // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } learner.buildClassifier(data); mem("POSTLEARNER"); Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString()); WekaClassifier wekaClassifier; wekaClassifier = new WekaClassifier(learner); if (thisExp.makeDesc) { // Section for making description more readable. Assumes that // learner.toString() returns a string with things that look like // feature names. String concept = learner.toString(); StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true); int evId = 1; String evIndex = ""; while (st.hasMoreTokens()) { boolean appendColon = false; String curTok = st.nextToken(); GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok); if (clust != null) { // Skip the spaces st.nextToken(); // Get a < or > String cmp = st.nextToken(); String qual = ""; if (cmp.equals("<=")) { qual = " HAS NO "; } else { qual = " HAS "; } // skip spaces st.nextToken(); // Get the number. String conf = st.nextToken(); if (conf.endsWith(":")) { conf = conf.substring(0, conf.length() - 1); appendColon = true; } float minconf = Float.valueOf(conf).floatValue(); EventI[] res = clust.getBounds(minconf); String name = clust.getName(); int dashPos = name.indexOf('-'); int undPos = name.indexOf('_'); String chan = name.substring(0, dashPos); String evType = name.substring(dashPos + 1, undPos); EventDescI edi = clust.eventDesc(); if (qual == " HAS NO " && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) { System.out.print("OTHERWISE"); } else { System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")"); int numParams = edi.numParams(); evIndex += "*" + evId + ": " + evType + "\n"; for (int i = 0; i < numParams; i++) { evIndex += " " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i) + "," + res[1].valOf(i) + "]\n"; } evId++; } evIndex += "\n"; if (appendColon) { System.out.print(" THEN"); } } else { System.out.print(curTok); } } System.out.println("\nEvent index"); System.out.println("-----------"); System.out.print(evIndex); mem("POSTDESC"); // Now this is going to be messy as fuck. Really. What do we needs? Well, // we need to read in the data; look up some info, that we // assume came from a GainClusterer ... // Sanity check. // GClust clust = (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); // System.out.println("INSANE!: " + clust.getDescription()); // EventI[] res = clust.getBounds(1); // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); int numCorrect = 0; ClassificationVecI classns; if (thisExp.trainResults) { System.err.println(">>> Training performance <<<"); classns = (ClassificationVecI) trainAtts.getClassVec().clone(); for (int j = 0; j < numTrainStreams; j++) { wekaClassifier.classify(data.instance(j), classns.elAt(j)); } for (int j = 0; j < numTrainStreams; j++) { // System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.err.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.err.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " (" + numCorrect * 100.0 / numTrainStreams + "%)"); } mem("POSTTRAIN"); System.err.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. classns = (ClassificationVecI) testAtts.getClassVec().clone(); StreamAttValVecI savvi = testAtts.getStreamAttValVec(); data = WekaBridge.makeInstances(testAtts, "Test "); if (thisExp.featureSel) { String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += "last"; // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } for (int j = 0; j < numTestStreams; j++) { wekaClassifier.classify(data.instance(j), classns.elAt(j)); } System.err.println(">>> Learner <<<"); numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { // System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.err.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.err.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); mem("POSTTEST"); }
From source file:tclass.ExpSingleLM.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpSingleLM thisExp = new ExpSingleLM(); thisExp.parseArgs(args);//from w ww .j ava 2s . c o m mem("PARSE"); DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Training data read in"); mem("TRAINDATAIN"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Training data globals calculated."); mem("TRAINGLOBAL"); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Training events extracted"); mem("EVENTEXTRACT"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer eventClusterer = settings.getEventClusterer(); Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); mem("REARRANGE"); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete"); Debug.dp(Debug.PROGRESS, "Clusters are:"); Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping()); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); mem("CLUSTER"); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete."); mem("MAKEATTRIBUTOR"); ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Training data Attribution complete."); mem("TRAINATTRIBUTION"); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts); mem("TRAINCOMBINATION"); trainStreamData = null; trainEventSEV = null; trainEventCV = null; System.gc(); mem("TRAINGC"); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); int[] selectedIndices = null; String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff); if (classifierSpec.length == 0) { throw new Exception("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class "); Instances data = WekaBridge.makeInstances(trainAtts, "Train "); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); mem("ATTCONVERSION"); if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices = bfs.search(cfs, data); // Now extract the features. System.err.print("Selected features: "); String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += ("last"); System.err.println(featureString); // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } learner.buildClassifier(data); mem("POSTLEARNER"); Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString()); WekaClassifier wekaClassifier; wekaClassifier = new WekaClassifier(learner); if (thisExp.makeDesc) { // Section for making description more readable. Assumes that // learner.toString() returns a string with things that look like // feature names. String concept = learner.toString(); StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true); int evId = 1; String evIndex = ""; while (st.hasMoreTokens()) { boolean appendColon = false; String curTok = st.nextToken(); GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok); if (clust != null) { // Skip the spaces st.nextToken(); // Get a < or > String cmp = st.nextToken(); String qual = ""; if (cmp.equals("<=")) { qual = " HAS NO "; } else { qual = " HAS "; } // skip spaces st.nextToken(); // Get the number. String conf = st.nextToken(); if (conf.endsWith(":")) { conf = conf.substring(0, conf.length() - 1); appendColon = true; } float minconf = Float.valueOf(conf).floatValue(); EventI[] res = clust.getBounds(minconf); String name = clust.getName(); int dashPos = name.indexOf('-'); int undPos = name.indexOf('_'); String chan = name.substring(0, dashPos); String evType = name.substring(dashPos + 1, undPos); EventDescI edi = clust.eventDesc(); if (qual == " HAS NO " && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) { System.out.print("OTHERWISE"); } else { System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")"); int numParams = edi.numParams(); evIndex += "*" + evId + ": " + evType + "\n"; for (int i = 0; i < numParams; i++) { evIndex += " " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i) + "," + res[1].valOf(i) + "]\n"; } evId++; } evIndex += "\n"; if (appendColon) { System.out.print(" THEN"); } } else { System.out.print(curTok); } } System.out.println("\nEvent index"); System.out.println("-----------"); System.out.print(evIndex); mem("POSTDESC"); // Now this is going to be messy as fuck. Really. What do we needs? Well, // we need to read in the data; look up some info, that we // assume came from a GainClusterer ... // Sanity check. // GClust clust = (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); // System.out.println("INSANE!: " + clust.getDescription()); // EventI[] res = clust.getBounds(1); // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); int numCorrect = 0; ClassificationVecI classns; if (thisExp.trainResults) { System.err.println(">>> Training performance <<<"); classns = (ClassificationVecI) trainAtts.getClassVec().clone(); for (int j = 0; j < numTrainStreams; j++) { wekaClassifier.classify(data.instance(j), classns.elAt(j)); } for (int j = 0; j < numTrainStreams; j++) { // System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.err.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.err.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " (" + numCorrect * 100.0 / numTrainStreams + "%)"); } mem("POSTTRAIN"); System.err.println(">>> Testing stage <<<"); // Stick testing stuff here. mem("TESTBEGIN"); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Test data read in"); mem("TESTREAD"); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Test data globals calculated"); mem("TESTGLOBALS"); Debug.dp(Debug.PROGRESS, "Test data: " + testGlobalData.size()); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Test events extracted"); mem("TESTEVENTS"); int numTestStreams = testEventData.size(); ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData); mem("TESTATTRIBUTES"); ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts); mem("TESTCOMBINE"); testStreamData = null; System.gc(); // Do garbage collection. mem("TESTGC"); if (!thisExp.makeDesc) { clusters = null; eventClusterer = null; } attribs = null; // First, print the results of using the straight testers. classns = (ClassificationVecI) testAtts.getClassVec().clone(); StreamAttValVecI savvi = testAtts.getStreamAttValVec(); data = WekaBridge.makeInstances(testAtts, "Test "); if (thisExp.featureSel) { String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += "last"; // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } for (int j = 0; j < numTestStreams; j++) { wekaClassifier.classify(data.instance(j), classns.elAt(j)); } System.err.println(">>> Learner <<<"); numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { // System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.err.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.err.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); mem("POSTTEST"); }
From source file:tclass.TClass.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); TClass thisExp = new TClass(); thisExp.parseArgs(args);/* ww w .j av a 2 s. c om*/ DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer eventClusterer = settings.getEventClusterer(); Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete"); Debug.dp(Debug.PROGRESS, "Clusters are:"); Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping()); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete."); ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData); ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts); ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts); trainStreamData = null; testStreamData = null; trainEventSEV = null; trainEventCV = null; if (!thisExp.makeDesc) { clusters = null; eventClusterer = null; } attribs = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); int[] selectedIndices = null; String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff); if (classifierSpec.length == 0) { throw new Exception("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class "); Instances data = WekaBridge.makeInstances(trainAtts, "Train "); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices = bfs.search(cfs, data); // Now extract the features. System.err.print("Selected features: "); String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += ("last"); System.err.println(featureString); // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } learner.buildClassifier(data); Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString()); WekaClassifier wekaClassifier; wekaClassifier = new WekaClassifier(learner); if (thisExp.makeDesc) { // Section for making description more readable. Assumes that // learner.toString() returns a string with things that look like // feature names. String concept = learner.toString(); StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true); while (st.hasMoreTokens()) { boolean appendColon = false; String curTok = st.nextToken(); GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok); if (clust != null) { // Skip the spaces st.nextToken(); // Get a < or > String cmp = st.nextToken(); String qual = ""; if (cmp.equals("<=")) { qual = " HAS NO "; } else { qual = " HAS "; } // skip spaces st.nextToken(); // Get the number. String conf = st.nextToken(); if (conf.endsWith(":")) { conf = conf.substring(0, conf.length() - 1); appendColon = true; } float minconf = Float.valueOf(conf).floatValue(); EventI[] res = clust.getBounds(minconf); String name = clust.getName(); int dashPos = name.indexOf('-'); int undPos = name.indexOf('_'); String chan = name.substring(0, dashPos); String evType = name.substring(dashPos + 1, undPos); EventDescI edi = clust.eventDesc(); System.out.print("Channel " + chan + qual + evType + " "); int numParams = edi.numParams(); for (int i = 0; i < numParams; i++) { System.out .print(edi.paramName(i) + " in [" + res[0].valOf(i) + "," + res[1].valOf(i) + "] "); } if (appendColon) { System.out.print(":"); } } else { System.out.print(curTok); } } // Now this is going to be messy as fuck. Really. What do we needs? Well, // we need to read in the data; look up some info, that we // assume came from a GainClusterer ... // Sanity check. // GClust clust = (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); // System.out.println("INSANE!: " + clust.getDescription()); // EventI[] res = clust.getBounds(1); // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. "); int numCorrect = 0; ClassificationVecI classns; if (thisExp.trainResults) { System.err.println(">>> Training performance <<<"); classns = (ClassificationVecI) trainAtts.getClassVec().clone(); for (int j = 0; j < numTrainStreams; j++) { wekaClassifier.classify(data.instance(j), classns.elAt(j)); } for (int j = 0; j < numTrainStreams; j++) { // System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.err.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.err.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " (" + numCorrect * 100.0 / numTrainStreams + "%)"); } System.err.println(">>> Testing stage <<<"); // First, print the results of using the straight testers. classns = (ClassificationVecI) testAtts.getClassVec().clone(); StreamAttValVecI savvi = testAtts.getStreamAttValVec(); data = WekaBridge.makeInstances(testAtts, "Test "); if (thisExp.featureSel) { String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += "last"; // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } for (int j = 0; j < numTestStreams; j++) { wekaClassifier.classify(data.instance(j), classns.elAt(j)); } System.err.println(">>> Learner <<<"); numCorrect = 0; for (int j = 0; j < numTestStreams; j++) { // System.out.print(classns.elAt(j).toString()); if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) { numCorrect++; String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); System.err.println("Class " + realClassName + " CORRECTLY classified."); } else { String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass()); String predictedClassName = domDesc.getClassDescVec() .getClassLabel(classns.elAt(j).getPredictedClass()); System.err.println( "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + "."); } } System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)"); }
From source file:tclass.ToArff.java
License:Open Source License
public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ToArff thisExp = new ToArff(); thisExp.parseArgs(args);//from ww w . j av a 2s . c o m DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.inFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer eventClusterer = settings.getEventClusterer(); Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete"); Debug.dp(Debug.PROGRESS, "Clusters are:"); Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping()); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete."); ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts); trainStreamData = null; trainEventSEV = null; trainEventCV = null; if (!thisExp.makeDesc) { clusters = null; eventClusterer = null; } attribs = null; System.gc(); // So now we have the raw data in the correct form for each // attributor. // And now, we can construct a learner for each case. // Well, for now, I'm going to do something completely crazy. // Let's run each classifier nonetheless over the whole data // ... and see what the hell happens. Maybe some voting scheme // is possible!! This is a strange form of ensemble // classifier. // Each naive bayes algorithm only gets one Debug.setDebugLevel(Debug.PROGRESS); int[] selectedIndices = null; String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff); if (classifierSpec.length == 0) { throw new Exception("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec); Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class "); Instances data = WekaBridge.makeInstances(trainAtts, "Train "); Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning"); if (thisExp.featureSel) { Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection"); BestFirst bfs = new BestFirst(); CfsSubsetEval cfs = new CfsSubsetEval(); cfs.buildEvaluator(data); selectedIndices = bfs.search(cfs, data); // Now extract the features. System.err.print("Selected features: "); String featureString = new String(); for (int j = 0; j < selectedIndices.length; j++) { featureString += (selectedIndices[j] + 1) + ","; } featureString += ("last"); System.err.println(featureString); // Now apply the filter. Remove af = new Remove(); af.setInvertSelection(true); af.setAttributeIndices(featureString); af.setInputFormat(data); data = Filter.useFilter(data, af); } try { FileWriter fw = new FileWriter(thisExp.outFile); fw.write(data.toString()); fw.close(); } catch (Exception e) { throw new Exception("Could not write to output file. "); } }