Example usage for weka.attributeSelection BestFirst search

List of usage examples for weka.attributeSelection BestFirst search

Introduction

In this page you can find the example usage for weka.attributeSelection BestFirst search.

Prototype

@Override
public int[] search(ASEvaluation ASEval, Instances data) throws Exception 

Source Link

Document

Searches the attribute subset space by best first search

Usage

From source file:tclass.DTClassifier.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpDT_TC2 thisExp = new ExpDT_TC2();
    thisExp.parseArgs(args);//from   w  w w . ja  va2 s .  c  om
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 
    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer[] eventClusterers = new EventClusterer[numClasses];
    // And now, initialise. 
    for (int i = 0; i < numClasses; i++) {
        // The new way: 
        eventClusterers[i] = settings.getEventClusterer();
        // The old way: 
        // eventClusterers[i] = new EventClusterer(new
        //    StreamTokenizer(
        //                    new FileReader(thisExp.evClusterDesc)), 
        //                   domDesc,
        //                   eventDescVec); 

        // System.out.println(eventClusterers[i]); 
    }

    // Segment the data. 

    ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses];
    for (int i = 0; i < numClasses; i++) {
        trainStreamsByClass[i] = new ClassStreamEventsVec();
        trainStreamsByClass[i].setClassVec(new ClassificationVec());
        trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    for (int i = 0; i < numTrainStreams; i++) {
        int currentClass = trainEventCV.elAt(i).getRealClass();
        trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i));
    }

    ClusterVecI[] clustersByClass = new ClusterVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete");
        Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:");
        Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor[] attribsByClass = new Attributor[numClasses];
    for (int i = 0; i < numClasses; i++) {
        attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription());

        Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete.");
    }

    ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses];
    ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData);
        testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete.");

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();

    ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses];

    ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]);

        testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]);
    }

    // Now we have to do some garbage collection. 

    trainStreamData = null;
    testStreamData = null;
    eventClusterers = null;
    trainEventSEV = null;
    trainEventCV = null;
    clustersByClass = null;
    attribsByClass = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[][] selectedIndices = new int[numClasses][];
    J48[] dtLearners = new J48[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtLearners[i] = new J48();
        Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i);
        Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i);

        Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
        if (thisExp.featureSel) {
            Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
            BestFirst bfs = new BestFirst();
            CfsSubsetEval cfs = new CfsSubsetEval();
            cfs.buildEvaluator(data);
            selectedIndices[i] = bfs.search(cfs, data);
            // Now extract the features. 
            System.out.print("Selected features for class " + i + ": ");
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += ("last");
            System.out.println(featureString);
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }

        dtLearners[i].buildClassifier(data);
        Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString());
    }

    DTClassifier[] dtClassifiers = new DTClassifier[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtClassifiers[i] = new DTClassifier(dtLearners[i]);
        // System.out.println(nbClassifiers[i].toString()); 
    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    // Now test on training data (each one)
    /*
      for(int i=0; i < numClasses; i++){
      String className =
      domDesc.getClassDescVec().getClassLabel(i); 
      ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone();
      StreamAttValVecI savvi =
      trainAttsByClass[i].getStreamAttValVec(); 
            
      for(int j=0; j < trainAttsByClass[i].size(); j++){
      nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j));
      }
      System.out.println(">>> Learner for class " + className); 
      int numCorrect = 0; 
      for(int j=0; j < classvi.size(); j++){
      System.out.print(classvi.elAt(j).toString()); 
      if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){
      numCorrect++; 
      }
            
      }
      System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + 
      numCorrect*100.0/numTrainStreams + "%)"); 
            
            
      }
    */

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI[] classns = new ClassificationVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        String className = domDesc.getClassDescVec().getClassLabel(i);
        classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone();
        StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec();
        Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i);
        if (thisExp.featureSel) {
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += "last";
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }
        for (int j = 0; j < numTestStreams; j++) {
            dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j));
        }
        System.out.println(">>> Learner for class " + className);
        int numCorrect = 0;
        for (int j = 0; j < numTestStreams; j++) {
            System.out.print(classns[i].elAt(j).toString());
            if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) {
                numCorrect++;
            }

        }
        System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of "
                + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)");

    }

    // Now do voting. This is a hack solution. 
    int numCorrect = 0;
    for (int i = 0; i < numTestStreams; i++) {
        int[] votes = new int[numClasses];
        int realClass = classns[0].elAt(i).getRealClass();
        String realClassName = domDesc.getClassDescVec().getClassLabel(realClass);
        for (int j = 0; j < numClasses; j++) {
            int thisPrediction = classns[j].elAt(i).getPredictedClass();

            // if(thisPrediction == j){
            //     votes[thisPrediction] += 2; 
            // }
            //else {
            votes[thisPrediction]++;
            //}

        }
        int maxIndex = -1;
        int maxVotes = 0;
        String voteRes = "[ ";
        for (int j = 0; j < numClasses; j++) {
            voteRes += votes[j] + " ";
            if (votes[j] > maxVotes) {
                maxIndex = j;
                maxVotes = votes[j];
            }
        }
        voteRes += "]";
        // Now print the result: 
        String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex);
        if (maxIndex == realClass) {
            System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes
                    + " votes. Votes: " + voteRes);
            numCorrect++;
        } else {
            System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName
                    + " with " + maxVotes + " votes. Votes: " + voteRes);
        }

    }
    System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
}

From source file:tclass.ExpNB_Single.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingle thisExp = new ExpSingle();
    thisExp.parseArgs(args);/*  w w  w . ja  v  a2  s .  c o  m*/
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);

    trainStreamData = null;
    testStreamData = null;
    eventClusterer = null;
    trainEventSEV = null;
    trainEventCV = null;
    clusters = null;
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += selectedIndices[j] + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now cut from trainAtts. 
        // trainAtts.selectFeatures(selectedIndices); 
    }

    Debug.dp(Debug.PROGRESS, "Learning with Naive Bayes now ...");
    NaiveBayes nbLearner = new NaiveBayes();
    nbLearner.setDomDesc(domDesc);
    nbLearner.setAttDescVec(trainAtts.getStreamAttValVec().getDescription());
    ClassifierI nbClassifier = nbLearner.learn(trainAtts);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI classns;
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    /*
    if(thisExp.featureSel){
    String featureString = new String(); 
    for(int j=0; j < selectedIndices.length; j++){
        featureString += (selectedIndices[j]+1) + ",";
    }
    featureString += "last"; 
    // Now apply the filter. 
    AttributeFilter af = new AttributeFilter(); 
    af.setInvertSelection(true); 
    af.setAttributeIndices(featureString); 
    af.inputFormat(data); 
    data = af.useFilter(data, af); 
    }
    */
    for (int j = 0; j < numTestStreams; j++) {
        nbClassifier.classify(savvi.elAt(j), classns.elAt(j));
    }
    System.out.println(">>> Learner <<<");
    int numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        System.out.print(classns.elAt(j).toString());
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.out.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.out.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.out.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:tclass.ExpSeg.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSeg thisExp = new ExpSeg();
    thisExp.parseArgs(args);/*from w  ww  . ja  v a2  s .com*/
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testGlobalData.size();
    int numClasses = domDesc.getClassDescVec().size();
    TimeDivision td = new TimeDivision(domDesc, thisExp.numDivs);
    ClassStreamAttValVecI trainDivData = td.timeDivide(trainStreamData);
    ClassStreamAttValVecI testDivData = td.timeDivide(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Segmentation performed");

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainDivData);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testDivData);

    trainStreamData = null;
    testStreamData = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI classns;
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    int numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:tclass.ExpSingle.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingle thisExp = new ExpSingle();
    thisExp.parseArgs(args);//from   www. j a  v  a2  s .c  o  m
    mem("PARSE");
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    mem("DATAIN");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    mem("GLOBAL");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    mem("EVENTEXTRACT");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");
    mem("REARRANGE");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");
    mem("CLUSTER");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");
    mem("MAKEATTRIBUTOR");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");
    mem("ATTRIBUTION");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);
    mem("COMBINATION");
    trainStreamData = null;
    testStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;
    System.gc();
    mem("GARBAGECOLLECT");

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    mem("ATTCONVERSION");
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    mem("POSTLEARNER");
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        int evId = 1;
        String evIndex = "";
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                if (qual == " HAS NO "
                        && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) {
                    System.out.print("OTHERWISE");
                } else {
                    System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")");
                    int numParams = edi.numParams();
                    evIndex += "*" + evId + ": " + evType + "\n";
                    for (int i = 0; i < numParams; i++) {
                        evIndex += "   " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i)
                                + "," + res[1].valOf(i) + "]\n";
                    }
                    evId++;
                }
                evIndex += "\n";
                if (appendColon) {
                    System.out.print(" THEN");
                }
            } else {
                System.out.print(curTok);
            }
        }
        System.out.println("\nEvent index");
        System.out.println("-----------");
        System.out.print(evIndex);
        mem("POSTDESC");

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }
    mem("POSTTRAIN");

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
    mem("POSTTEST");

}

From source file:tclass.ExpSingleLM.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingleLM thisExp = new ExpSingleLM();
    thisExp.parseArgs(args);//from   w  ww .j  ava 2s .  c  o  m
    mem("PARSE");
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data read in");
    mem("TRAINDATAIN");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data globals calculated.");
    mem("TRAINGLOBAL");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training events extracted");
    mem("EVENTEXTRACT");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");
    mem("REARRANGE");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");
    mem("CLUSTER");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");
    mem("MAKEATTRIBUTOR");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data Attribution complete.");
    mem("TRAINATTRIBUTION");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    mem("TRAINCOMBINATION");
    trainStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    System.gc();
    mem("TRAINGC");

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    mem("ATTCONVERSION");
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    mem("POSTLEARNER");
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        int evId = 1;
        String evIndex = "";
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                if (qual == " HAS NO "
                        && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) {
                    System.out.print("OTHERWISE");
                } else {
                    System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")");
                    int numParams = edi.numParams();
                    evIndex += "*" + evId + ": " + evType + "\n";
                    for (int i = 0; i < numParams; i++) {
                        evIndex += "   " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i)
                                + "," + res[1].valOf(i) + "]\n";
                    }
                    evId++;
                }
                evIndex += "\n";
                if (appendColon) {
                    System.out.print(" THEN");
                }
            } else {
                System.out.print(curTok);
            }
        }
        System.out.println("\nEvent index");
        System.out.println("-----------");
        System.out.print(evIndex);
        mem("POSTDESC");

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }
    mem("POSTTRAIN");

    System.err.println(">>> Testing stage <<<");

    // Stick testing stuff here. 
    mem("TESTBEGIN");
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test data read in");
    mem("TESTREAD");
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test data globals calculated");
    mem("TESTGLOBALS");
    Debug.dp(Debug.PROGRESS, "Test data: " + testGlobalData.size());
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test events extracted");
    mem("TESTEVENTS");
    int numTestStreams = testEventData.size();
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    mem("TESTATTRIBUTES");
    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);
    mem("TESTCOMBINE");
    testStreamData = null;
    System.gc(); // Do garbage collection. 
    mem("TESTGC");
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
    mem("POSTTEST");

}

From source file:tclass.TClass.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    TClass thisExp = new TClass();
    thisExp.parseArgs(args);/*  ww w .j av a 2  s.  c om*/
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);

    trainStreamData = null;
    testStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                System.out.print("Channel " + chan + qual + evType + " ");
                int numParams = edi.numParams();
                for (int i = 0; i < numParams; i++) {
                    System.out
                            .print(edi.paramName(i) + " in [" + res[0].valOf(i) + "," + res[1].valOf(i) + "] ");
                }
                if (appendColon) {
                    System.out.print(":");
                }
            } else {
                System.out.print(curTok);
            }
        }

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:tclass.ToArff.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ToArff thisExp = new ToArff();
    thisExp.parseArgs(args);//from  ww  w  . j  av a  2s  . c o m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.inFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    trainStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);

    }
    try {
        FileWriter fw = new FileWriter(thisExp.outFile);
        fw.write(data.toString());
        fw.close();
    } catch (Exception e) {
        throw new Exception("Could not write to output file. ");
    }
}