Example usage for weka.attributeSelection BestFirst BestFirst

List of usage examples for weka.attributeSelection BestFirst BestFirst

Introduction

In this page you can find the example usage for weka.attributeSelection BestFirst BestFirst.

Prototype

public BestFirst() 

Source Link

Document

Constructor

Usage

From source file:RunBestFirstSearch.java

License:Open Source License

protected static void runAttributeSelection(Instances data, int n) throws Exception {
    AttributeSelection attsel = new AttributeSelection();
    CfsSubsetEval cost_function = new CfsSubsetEval(); // CFS cost function.
    BestFirst algorithm = new BestFirst(); // BFS algorithm.

    cost_function.buildEvaluator(data);//from   w w w. j a  v a  2s .com

    algorithm.setLookupCacheSize(n);

    // BFS with forward direction and terminating search after five
    // non-improving nodes.
    //
    String[] parameters = { "-D 1", "-N 5" };

    algorithm.setOptions(parameters);

    cost_function.setLocallyPredictive(false);

    attsel.setEvaluator(cost_function);
    attsel.setSearch(algorithm);

    attsel.SelectAttributes(data);

    int[] indices = attsel.selectedAttributes();

    System.out.println("Selected features:\n" + Utils.arrayToString(indices));
}

From source file:tclass.DTClassifier.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpDT_TC2 thisExp = new ExpDT_TC2();
    thisExp.parseArgs(args);/*  ww  w  .ja  va  2s.  c  o m*/
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 
    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer[] eventClusterers = new EventClusterer[numClasses];
    // And now, initialise. 
    for (int i = 0; i < numClasses; i++) {
        // The new way: 
        eventClusterers[i] = settings.getEventClusterer();
        // The old way: 
        // eventClusterers[i] = new EventClusterer(new
        //    StreamTokenizer(
        //                    new FileReader(thisExp.evClusterDesc)), 
        //                   domDesc,
        //                   eventDescVec); 

        // System.out.println(eventClusterers[i]); 
    }

    // Segment the data. 

    ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses];
    for (int i = 0; i < numClasses; i++) {
        trainStreamsByClass[i] = new ClassStreamEventsVec();
        trainStreamsByClass[i].setClassVec(new ClassificationVec());
        trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    for (int i = 0; i < numTrainStreams; i++) {
        int currentClass = trainEventCV.elAt(i).getRealClass();
        trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i));
    }

    ClusterVecI[] clustersByClass = new ClusterVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete");
        Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:");
        Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping());

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor[] attribsByClass = new Attributor[numClasses];
    for (int i = 0; i < numClasses; i++) {
        attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription());

        Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete.");
    }

    ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses];
    ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData);
        testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData);
        Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution of " + i + " complete.");

    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();

    ClassStreamAttValVecI[] trainAttsByClass = new ClassStreamAttValVec[numClasses];

    ClassStreamAttValVecI[] testAttsByClass = new ClassStreamAttValVec[numClasses];

    for (int i = 0; i < numClasses; i++) {
        trainAttsByClass[i] = c.combine(trainGlobalData, trainEventAtts[i]);

        testAttsByClass[i] = c.combine(testGlobalData, testEventAtts[i]);
    }

    // Now we have to do some garbage collection. 

    trainStreamData = null;
    testStreamData = null;
    eventClusterers = null;
    trainEventSEV = null;
    trainEventCV = null;
    clustersByClass = null;
    attribsByClass = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[][] selectedIndices = new int[numClasses][];
    J48[] dtLearners = new J48[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtLearners[i] = new J48();
        Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class " + i);
        Instances data = WekaBridge.makeInstances(trainAttsByClass[i], "Train " + i);

        Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
        if (thisExp.featureSel) {
            Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
            BestFirst bfs = new BestFirst();
            CfsSubsetEval cfs = new CfsSubsetEval();
            cfs.buildEvaluator(data);
            selectedIndices[i] = bfs.search(cfs, data);
            // Now extract the features. 
            System.out.print("Selected features for class " + i + ": ");
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += ("last");
            System.out.println(featureString);
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }

        dtLearners[i].buildClassifier(data);
        Debug.dp(Debug.PROGRESS, "Learnt tree: \n" + dtLearners[i].toString());
    }

    DTClassifier[] dtClassifiers = new DTClassifier[numClasses];
    for (int i = 0; i < numClasses; i++) {
        dtClassifiers[i] = new DTClassifier(dtLearners[i]);
        // System.out.println(nbClassifiers[i].toString()); 
    }

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    // Now test on training data (each one)
    /*
      for(int i=0; i < numClasses; i++){
      String className =
      domDesc.getClassDescVec().getClassLabel(i); 
      ClassificationVecI classvi = (ClassificationVecI) trainAttsByClass[i].getClassVec().clone();
      StreamAttValVecI savvi =
      trainAttsByClass[i].getStreamAttValVec(); 
            
      for(int j=0; j < trainAttsByClass[i].size(); j++){
      nbClassifiers[i].classify(savvi.elAt(j), classvi.elAt(j));
      }
      System.out.println(">>> Learner for class " + className); 
      int numCorrect = 0; 
      for(int j=0; j < classvi.size(); j++){
      System.out.print(classvi.elAt(j).toString()); 
      if(classvi.elAt(j).getRealClass() == classvi.elAt(j).getPredictedClass()){
      numCorrect++; 
      }
            
      }
      System.out.println("Train accuracy for " + className + " classifier: " + numCorrect + " of " + numTrainStreams + " (" + 
      numCorrect*100.0/numTrainStreams + "%)"); 
            
            
      }
    */

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI[] classns = new ClassificationVecI[numClasses];
    for (int i = 0; i < numClasses; i++) {
        String className = domDesc.getClassDescVec().getClassLabel(i);
        classns[i] = (ClassificationVecI) testAttsByClass[i].getClassVec().clone();
        StreamAttValVecI savvi = testAttsByClass[i].getStreamAttValVec();
        Instances data = WekaBridge.makeInstances(testAttsByClass[i], "Test " + i);
        if (thisExp.featureSel) {
            String featureString = new String();
            for (int j = 0; j < selectedIndices[i].length; j++) {
                featureString += (selectedIndices[i][j] + 1) + ",";
            }
            featureString += "last";
            // Now apply the filter. 
            Remove af = new Remove();
            af.setInvertSelection(true);
            af.setAttributeIndices(featureString);
            af.setInputFormat(data);
            data = Filter.useFilter(data, af);
        }
        for (int j = 0; j < numTestStreams; j++) {
            dtClassifiers[i].classify(data.instance(j), classns[i].elAt(j));
        }
        System.out.println(">>> Learner for class " + className);
        int numCorrect = 0;
        for (int j = 0; j < numTestStreams; j++) {
            System.out.print(classns[i].elAt(j).toString());
            if (classns[i].elAt(j).getRealClass() == classns[i].elAt(j).getPredictedClass()) {
                numCorrect++;
            }

        }
        System.out.println("Test accuracy for " + className + " classifier: " + numCorrect + " of "
                + numTestStreams + " (" + numCorrect * 100.0 / numTestStreams + "%)");

    }

    // Now do voting. This is a hack solution. 
    int numCorrect = 0;
    for (int i = 0; i < numTestStreams; i++) {
        int[] votes = new int[numClasses];
        int realClass = classns[0].elAt(i).getRealClass();
        String realClassName = domDesc.getClassDescVec().getClassLabel(realClass);
        for (int j = 0; j < numClasses; j++) {
            int thisPrediction = classns[j].elAt(i).getPredictedClass();

            // if(thisPrediction == j){
            //     votes[thisPrediction] += 2; 
            // }
            //else {
            votes[thisPrediction]++;
            //}

        }
        int maxIndex = -1;
        int maxVotes = 0;
        String voteRes = "[ ";
        for (int j = 0; j < numClasses; j++) {
            voteRes += votes[j] + " ";
            if (votes[j] > maxVotes) {
                maxIndex = j;
                maxVotes = votes[j];
            }
        }
        voteRes += "]";
        // Now print the result: 
        String predictedClassName = domDesc.getClassDescVec().getClassLabel(maxIndex);
        if (maxIndex == realClass) {
            System.out.println("Class " + realClassName + " CORRECTLY classified with " + maxVotes
                    + " votes. Votes: " + voteRes);
            numCorrect++;
        } else {
            System.out.println("Class " + realClassName + " INCORRECTLY classified as " + predictedClassName
                    + " with " + maxVotes + " votes. Votes: " + voteRes);
        }

    }
    System.out.println("Final voted accuracy: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
}

From source file:tclass.ExpNB_Single.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingle thisExp = new ExpSingle();
    thisExp.parseArgs(args);/*w w w.  j a  va 2  s  .c om*/
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);

    trainStreamData = null;
    testStreamData = null;
    eventClusterer = null;
    trainEventSEV = null;
    trainEventCV = null;
    clusters = null;
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += selectedIndices[j] + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now cut from trainAtts. 
        // trainAtts.selectFeatures(selectedIndices); 
    }

    Debug.dp(Debug.PROGRESS, "Learning with Naive Bayes now ...");
    NaiveBayes nbLearner = new NaiveBayes();
    nbLearner.setDomDesc(domDesc);
    nbLearner.setAttDescVec(trainAtts.getStreamAttValVec().getDescription());
    ClassifierI nbClassifier = nbLearner.learn(trainAtts);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    System.out.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI classns;
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    /*
    if(thisExp.featureSel){
    String featureString = new String(); 
    for(int j=0; j < selectedIndices.length; j++){
        featureString += (selectedIndices[j]+1) + ",";
    }
    featureString += "last"; 
    // Now apply the filter. 
    AttributeFilter af = new AttributeFilter(); 
    af.setInvertSelection(true); 
    af.setAttributeIndices(featureString); 
    af.inputFormat(data); 
    data = af.useFilter(data, af); 
    }
    */
    for (int j = 0; j < numTestStreams; j++) {
        nbClassifier.classify(savvi.elAt(j), classns.elAt(j));
    }
    System.out.println(">>> Learner <<<");
    int numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        System.out.print(classns.elAt(j).toString());
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.out.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.out.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.out.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:tclass.ExpSeg.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSeg thisExp = new ExpSeg();
    thisExp.parseArgs(args);//from   w w w  .j  a va 2 s.  c o m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testGlobalData.size();
    int numClasses = domDesc.getClassDescVec().size();
    TimeDivision td = new TimeDivision(domDesc, thisExp.numDivs);
    ClassStreamAttValVecI trainDivData = td.timeDivide(trainStreamData);
    ClassStreamAttValVecI testDivData = td.timeDivide(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Segmentation performed");

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainDivData);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testDivData);

    trainStreamData = null;
    testStreamData = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI classns;
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    int numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:tclass.ExpSingle.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingle thisExp = new ExpSingle();
    thisExp.parseArgs(args);//w  w  w .jav a  2  s .co m
    mem("PARSE");
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    mem("DATAIN");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    mem("GLOBAL");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    mem("EVENTEXTRACT");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");
    mem("REARRANGE");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");
    mem("CLUSTER");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");
    mem("MAKEATTRIBUTOR");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");
    mem("ATTRIBUTION");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);
    mem("COMBINATION");
    trainStreamData = null;
    testStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;
    System.gc();
    mem("GARBAGECOLLECT");

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    mem("ATTCONVERSION");
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    mem("POSTLEARNER");
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        int evId = 1;
        String evIndex = "";
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                if (qual == " HAS NO "
                        && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) {
                    System.out.print("OTHERWISE");
                } else {
                    System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")");
                    int numParams = edi.numParams();
                    evIndex += "*" + evId + ": " + evType + "\n";
                    for (int i = 0; i < numParams; i++) {
                        evIndex += "   " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i)
                                + "," + res[1].valOf(i) + "]\n";
                    }
                    evId++;
                }
                evIndex += "\n";
                if (appendColon) {
                    System.out.print(" THEN");
                }
            } else {
                System.out.print(curTok);
            }
        }
        System.out.println("\nEvent index");
        System.out.println("-----------");
        System.out.print(evIndex);
        mem("POSTDESC");

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }
    mem("POSTTRAIN");

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
    mem("POSTTEST");

}

From source file:tclass.ExpSingleLM.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingleLM thisExp = new ExpSingleLM();
    thisExp.parseArgs(args);/*from w w w.  j a  v a  2s.c o m*/
    mem("PARSE");
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data read in");
    mem("TRAINDATAIN");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data globals calculated.");
    mem("TRAINGLOBAL");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training events extracted");
    mem("EVENTEXTRACT");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");
    mem("REARRANGE");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");
    mem("CLUSTER");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");
    mem("MAKEATTRIBUTOR");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data Attribution complete.");
    mem("TRAINATTRIBUTION");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    mem("TRAINCOMBINATION");
    trainStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    System.gc();
    mem("TRAINGC");

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    mem("ATTCONVERSION");
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    mem("POSTLEARNER");
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        int evId = 1;
        String evIndex = "";
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                if (qual == " HAS NO "
                        && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) {
                    System.out.print("OTHERWISE");
                } else {
                    System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")");
                    int numParams = edi.numParams();
                    evIndex += "*" + evId + ": " + evType + "\n";
                    for (int i = 0; i < numParams; i++) {
                        evIndex += "   " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i)
                                + "," + res[1].valOf(i) + "]\n";
                    }
                    evId++;
                }
                evIndex += "\n";
                if (appendColon) {
                    System.out.print(" THEN");
                }
            } else {
                System.out.print(curTok);
            }
        }
        System.out.println("\nEvent index");
        System.out.println("-----------");
        System.out.print(evIndex);
        mem("POSTDESC");

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }
    mem("POSTTRAIN");

    System.err.println(">>> Testing stage <<<");

    // Stick testing stuff here. 
    mem("TESTBEGIN");
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test data read in");
    mem("TESTREAD");
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test data globals calculated");
    mem("TESTGLOBALS");
    Debug.dp(Debug.PROGRESS, "Test data: " + testGlobalData.size());
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test events extracted");
    mem("TESTEVENTS");
    int numTestStreams = testEventData.size();
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    mem("TESTATTRIBUTES");
    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);
    mem("TESTCOMBINE");
    testStreamData = null;
    System.gc(); // Do garbage collection. 
    mem("TESTGC");
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
    mem("POSTTEST");

}

From source file:tclass.TClass.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    TClass thisExp = new TClass();
    thisExp.parseArgs(args);/*from  w  w  w . j av  a2 s . c om*/
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);

    trainStreamData = null;
    testStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                System.out.print("Channel " + chan + qual + evType + " ");
                int numParams = edi.numParams();
                for (int i = 0; i < numParams; i++) {
                    System.out
                            .print(edi.paramName(i) + " in [" + res[0].valOf(i) + "," + res[1].valOf(i) + "] ");
                }
                if (appendColon) {
                    System.out.print(":");
                }
            } else {
                System.out.print(curTok);
            }
        }

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:tclass.ToArff.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ToArff thisExp = new ToArff();
    thisExp.parseArgs(args);// w w  w . jav  a  2  s  .co m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.inFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    trainStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);

    }
    try {
        FileWriter fw = new FileWriter(thisExp.outFile);
        fw.write(data.toString());
        fw.close();
    } catch (Exception e) {
        throw new Exception("Could not write to output file. ");
    }
}

From source file:trainableSegmentation.WekaSegmentation.java

License:GNU General Public License

/**
 * Select attributes using BestFirst search to reduce
 * the number of parameters per instance of a dataset
 *
 * @param data input set of instances//from  w w  w.  j  a v a2  s  .  co  m
 * @return resampled set of instances
 */
public static Instances selectAttributes(Instances data) {
    final AttributeSelection filter = new AttributeSelection();
    Instances filteredIns = null;
    // Evaluator
    final CfsSubsetEval evaluator = new CfsSubsetEval();
    evaluator.setMissingSeparate(true);
    // Assign evaluator to filter
    filter.setEvaluator(evaluator);
    // Search strategy: best first (default values)
    final BestFirst search = new BestFirst();
    filter.setSearch(search);
    // Apply filter
    try {
        filter.setInputFormat(data);

        filteredIns = Filter.useFilter(data, filter);
    } catch (Exception e) {
        IJ.log("Error when resampling input data with selected attributes!");
        e.printStackTrace();
    }
    return filteredIns;

}