Example usage for weka.attributeSelection CfsSubsetEval buildEvaluator

List of usage examples for weka.attributeSelection CfsSubsetEval buildEvaluator

Introduction

In this page you can find the example usage for weka.attributeSelection CfsSubsetEval buildEvaluator.

Prototype

@Override
public void buildEvaluator(Instances data) throws Exception 

Source Link

Document

Generates a attribute evaluator.

Usage

From source file:tclass.ToArff.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ToArff thisExp = new ToArff();
    thisExp.parseArgs(args);//  ww w . j ava  2 s .c om
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.inFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    trainStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);

    }
    try {
        FileWriter fw = new FileWriter(thisExp.outFile);
        fw.write(data.toString());
        fw.close();
    } catch (Exception e) {
        throw new Exception("Could not write to output file. ");
    }
}