Example usage for weka.classifiers Classifier buildClassifier

Introduction

In this page you can find the example usage for weka.classifiers Classifier buildClassifier.

Prototype

public abstract void buildClassifier(Instances data) throws Exception;

Source Link

Document

Generates a classifier.

Usage

From source file:sirius.trainer.step4.RunClassifierWithNoLocationIndex.java

License:Open Source License

public static Object jackKnifeClassifierOneWithNoLocationIndex(JInternalFrame parent,
        ApplicationData applicationData, JTextArea classifierOneDisplayTextArea,
        GenericObjectEditor m_ClassifierEditor, double ratio, GraphPane myGraph,
        ClassifierResults classifierResults, int range, double threshold, boolean outputClassifier,
        String classifierName, String[] classifierOptions, boolean returnClassifier,
        int randomNumberForClassifier) {
    try {/*from   w  w  w .  j  a  va  2  s .co  m*/
        StatusPane statusPane = applicationData.getStatusPane();

        long totalTimeStart = System.currentTimeMillis(), totalTimeElapsed;
        Classifier tempClassifier;
        if (m_ClassifierEditor != null)
            tempClassifier = (Classifier) m_ClassifierEditor.getValue();
        else
            tempClassifier = Classifier.forName(classifierName, classifierOptions);

        //Assume that class attribute is the last attribute - This should be the case for all Sirius produced Arff files               
        //split the instances into positive and negative
        Instances posInst = new Instances(applicationData.getDataset1Instances());
        posInst.setClassIndex(posInst.numAttributes() - 1);
        for (int x = 0; x < posInst.numInstances();)
            if (posInst.instance(x).stringValue(posInst.numAttributes() - 1).equalsIgnoreCase("pos"))
                x++;
            else
                posInst.delete(x);
        posInst.deleteAttributeType(Attribute.STRING);
        Instances negInst = new Instances(applicationData.getDataset1Instances());
        negInst.setClassIndex(negInst.numAttributes() - 1);
        for (int x = 0; x < negInst.numInstances();)
            if (negInst.instance(x).stringValue(negInst.numAttributes() - 1).equalsIgnoreCase("neg"))
                x++;
            else
                negInst.delete(x);
        negInst.deleteAttributeType(Attribute.STRING);
        //Train classifier one with the full dataset first then do cross-validation to gauge its accuracy   
        long trainTimeStart = 0, trainTimeElapsed = 0;
        if (statusPane != null)
            statusPane.setText("Training Classifier One... May take a while... Please wait...");
        //Record Start Time
        trainTimeStart = System.currentTimeMillis();
        Instances fullInst = new Instances(applicationData.getDataset1Instances());
        fullInst.setClassIndex(fullInst.numAttributes() - 1);
        Classifier classifierOne;
        if (m_ClassifierEditor != null)
            classifierOne = (Classifier) m_ClassifierEditor.getValue();
        else
            classifierOne = Classifier.forName(classifierName, classifierOptions);
        if (outputClassifier)
            classifierOne.buildClassifier(fullInst);
        //Record Total Time used to build classifier one
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        //Training Done

        String tclassifierName;
        if (m_ClassifierEditor != null)
            tclassifierName = m_ClassifierEditor.getValue().getClass().getName();
        else
            tclassifierName = classifierName;
        if (classifierResults != null) {
            classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ",
                    tclassifierName);
            classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ",
                    " Jack Knife Validation");
            classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ",
                    Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");
        }
        String classifierOneFilename = applicationData.getWorkingDirectory() + File.separator + "ClassifierOne_"
                + randomNumberForClassifier + ".scores";
        BufferedWriter outputCrossValidation = new BufferedWriter(new FileWriter(classifierOneFilename));

        //Instances foldTrainingInstance;
        //Instances foldTestingInstance;
        int positiveDataset1FromInt = applicationData.getPositiveDataset1FromField();
        int positiveDataset1ToInt = applicationData.getPositiveDataset1ToField();
        int negativeDataset1FromInt = applicationData.getNegativeDataset1FromField();
        int negativeDataset1ToInt = applicationData.getNegativeDataset1ToField();
        Step1TableModel positiveStep1TableModel = applicationData.getPositiveStep1TableModel();
        Step1TableModel negativeStep1TableModel = applicationData.getNegativeStep1TableModel();
        FastaFileManipulation fastaFile = new FastaFileManipulation(positiveStep1TableModel,
                negativeStep1TableModel, positiveDataset1FromInt, positiveDataset1ToInt,
                negativeDataset1FromInt, negativeDataset1ToInt, applicationData.getWorkingDirectory());
        FastaFormat fastaFormat;
        String header[] = new String[fullInst.numInstances()];
        String data[] = new String[fullInst.numInstances()];
        int counter = 0;
        while ((fastaFormat = fastaFile.nextSequence("pos")) != null) {
            header[counter] = fastaFormat.getHeader();
            data[counter] = fastaFormat.getSequence();
            counter++;
        }
        while ((fastaFormat = fastaFile.nextSequence("neg")) != null) {
            header[counter] = fastaFormat.getHeader();
            data[counter] = fastaFormat.getSequence();
            counter++;
        }

        //run jack knife validation
        for (int x = 0; x < fullInst.numInstances(); x++) {
            if (applicationData.terminateThread == true) {
                if (statusPane != null)
                    statusPane.setText("Interrupted - Classifier One Training Completed");
                outputCrossValidation.close();
                return classifierOne;
            }
            if (statusPane != null)
                statusPane.setText("Running " + (x + 1) + " / " + fullInst.numInstances());
            Instances trainPosInst = new Instances(posInst);
            Instances trainNegInst = new Instances(negInst);
            Instance testInst;
            //split data into training and testing
            if (x < trainPosInst.numInstances()) {
                testInst = posInst.instance(x);
                trainPosInst.delete(x);
            } else {
                testInst = negInst.instance(x - posInst.numInstances());
                trainNegInst.delete(x - posInst.numInstances());
            }
            Instances trainInstances;
            if (trainPosInst.numInstances() < trainNegInst.numInstances()) {
                trainInstances = new Instances(trainPosInst);
                int max = (int) (ratio * trainPosInst.numInstances());
                if (ratio == -1)
                    max = trainNegInst.numInstances();
                Random rand = new Random(1);
                for (int y = 0; y < trainNegInst.numInstances() && y < max; y++) {
                    int index = rand.nextInt(trainNegInst.numInstances());
                    trainInstances.add(trainNegInst.instance(index));
                    trainNegInst.delete(index);
                }
            } else {
                trainInstances = new Instances(trainNegInst);
                int max = (int) (ratio * trainNegInst.numInstances());
                if (ratio == -1)
                    max = trainPosInst.numInstances();
                Random rand = new Random(1);
                for (int y = 0; y < trainPosInst.numInstances() && y < max; y++) {
                    int index = rand.nextInt(trainPosInst.numInstances());
                    trainInstances.add(trainPosInst.instance(index));
                    trainPosInst.delete(index);
                }
            }
            Classifier foldClassifier = tempClassifier;
            foldClassifier.buildClassifier(trainInstances);
            double[] results = foldClassifier.distributionForInstance(testInst);
            int classIndex = testInst.classIndex();
            String classValue = testInst.toString(classIndex);
            outputCrossValidation.write(header[x]);
            outputCrossValidation.newLine();
            outputCrossValidation.write(data[x]);
            outputCrossValidation.newLine();
            if (classValue.equals("pos"))
                outputCrossValidation.write("pos,0=" + results[0]);
            else if (classValue.equals("neg"))
                outputCrossValidation.write("neg,0=" + results[0]);
            else {
                outputCrossValidation.close();
                throw new Error("Invalid Class Type!");
            }
            outputCrossValidation.newLine();
            outputCrossValidation.flush();
        }
        outputCrossValidation.close();
        PredictionStats classifierOneStatsOnJackKnife = new PredictionStats(classifierOneFilename, range,
                threshold);
        totalTimeElapsed = System.currentTimeMillis() - totalTimeStart;
        if (classifierResults != null)
            classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ",
                    Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes "
                            + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds");

        //if(classifierOneDisplayTextArea != null)
        classifierOneStatsOnJackKnife.updateDisplay(classifierResults, classifierOneDisplayTextArea, true);
        applicationData.setClassifierOneStats(classifierOneStatsOnJackKnife);
        if (myGraph != null)
            myGraph.setMyStats(classifierOneStatsOnJackKnife);

        if (statusPane != null)
            statusPane.setText("Done!");
        if (returnClassifier)
            return classifierOne;
        else
            return classifierOneStatsOnJackKnife;
    } catch (Exception e) {
        e.printStackTrace();
        JOptionPane.showMessageDialog(parent, e.getMessage(), "ERROR", JOptionPane.ERROR_MESSAGE);
        return null;
    }
}

From source file:statechum.analysis.learning.experiments.PairSelection.PairQualityLearner.java

License:Open Source License

@SuppressWarnings("null")
public static void runExperiment() throws Exception {
    DrawGraphs gr = new DrawGraphs();
    Configuration config = Configuration.getDefaultConfiguration().copy();
    config.setAskQuestions(false);/*from www. j a  v  a2  s  .c  om*/
    config.setDebugMode(false);
    config.setGdLowToHighRatio(0.7);
    config.setRandomPathAttemptFudgeThreshold(1000);
    config.setTransitionMatrixImplType(STATETREE.STATETREE_LINKEDHASH);
    ConvertALabel converter = new Transform.InternStringLabel();
    //gr_NewToOrig.setLimit(7000);
    GlobalConfiguration.getConfiguration().setProperty(G_PROPERTIES.LINEARWARNINGS, "false");
    final int ThreadNumber = ExperimentRunner.getCpuNumber();

    ExecutorService executorService = Executors.newFixedThreadPool(ThreadNumber);
    final int minStateNumber = 20;
    final int samplesPerFSM = 4;
    final int rangeOfStateNumbers = 4;
    final int stateNumberIncrement = 4;
    final double trainingDataMultiplier = 2;
    // Stores tasks to complete.
    CompletionService<ThreadResult> runner = new ExecutorCompletionService<ThreadResult>(executorService);
    for (final int lengthMultiplier : new int[] { 50 })
        for (final int ifDepth : new int[] { 1 })
            for (final boolean onlyPositives : new boolean[] { true }) {
                final int traceQuantity = 1;
                for (final boolean useUnique : new boolean[] { false }) {
                    String selection = "TRUNK;TRAINING;" + "ifDepth=" + ifDepth + ";onlyPositives="
                            + onlyPositives + ";useUnique=" + useUnique + ";traceQuantity=" + traceQuantity
                            + ";lengthMultiplier=" + lengthMultiplier + ";trainingDataMultiplier="
                            + trainingDataMultiplier + ";";

                    WekaDataCollector dataCollector = createDataCollector(ifDepth);
                    List<SampleData> samples = new LinkedList<SampleData>();
                    try {
                        int numberOfTasks = 0;
                        for (int states = minStateNumber; states < minStateNumber
                                + rangeOfStateNumbers; states += stateNumberIncrement)
                            for (int sample = 0; sample < Math
                                    .round(samplesPerFSM * trainingDataMultiplier); ++sample) {
                                LearnerRunner learnerRunner = new LearnerRunner(dataCollector, states, sample,
                                        1 + numberOfTasks, traceQuantity, config, converter) {
                                    @Override
                                    public LearnerThatCanClassifyPairs createLearner(
                                            LearnerEvaluationConfiguration evalCnf,
                                            LearnerGraph argReferenceGraph, WekaDataCollector argDataCollector,
                                            LearnerGraph argInitialPTA) {
                                        return new LearnerThatUpdatesWekaResults(evalCnf, argReferenceGraph,
                                                argDataCollector, argInitialPTA);
                                    }
                                };
                                learnerRunner.setPickUniqueFromInitial(useUnique);
                                learnerRunner.setOnlyUsePositives(onlyPositives);
                                learnerRunner.setIfdepth(ifDepth);
                                learnerRunner.setLengthMultiplier(lengthMultiplier);
                                learnerRunner
                                        .setSelectionID(selection + "_states" + states + "_sample" + sample);
                                runner.submit(learnerRunner);
                                ++numberOfTasks;
                            }
                        ProgressIndicator progress = new ProgressIndicator(
                                "running " + numberOfTasks + " tasks for " + selection, numberOfTasks);
                        for (int count = 0; count < numberOfTasks; ++count) {
                            ThreadResult result = runner.take().get();// this will throw an exception if any of the tasks failed.
                            samples.addAll(result.samples);
                            progress.next();
                        }
                    } catch (Exception ex) {
                        IllegalArgumentException e = new IllegalArgumentException(
                                "failed to compute, the problem is: " + ex);
                        e.initCause(ex);
                        if (executorService != null) {
                            executorService.shutdown();
                            executorService = null;
                        }
                        throw e;
                    }

                    int nonZeroes = 0;
                    long numberOfValues = 0;
                    System.out.println("number of instances: " + dataCollector.trainingData.numInstances());
                    int freqData[] = new int[dataCollector.attributesOfAnInstance.length];
                    for (int i = 0; i < dataCollector.trainingData.numInstances(); ++i)
                        for (int attrNum = 0; attrNum < dataCollector.attributesOfAnInstance.length; ++attrNum) {
                            assert dataCollector.attributesOfAnInstance[attrNum].index() == attrNum;
                            if (dataCollector.trainingData.instance(i)
                                    .stringValue(attrNum) != WekaDataCollector.ZERO) {
                                ++freqData[attrNum];
                                ++numberOfValues;
                            }
                        }
                    for (int attrNum = 0; attrNum < dataCollector.attributesOfAnInstance.length; ++attrNum)
                        if (freqData[attrNum] > 0)
                            ++nonZeroes;

                    System.out.println("Total instances: " + dataCollector.trainingData.numInstances()
                            + " with " + dataCollector.attributesOfAnInstance.length
                            + " attributes, non-zeroes are " + nonZeroes + " with average of "
                            + ((double) numberOfValues) / nonZeroes);
                    Arrays.sort(freqData);
                    int numOfcolumns = 20;
                    int stepWidth = dataCollector.attributesOfAnInstance.length / numOfcolumns;

                    final RBoxPlot<Long> gr_HistogramOfAttributeValues = new RBoxPlot<Long>("Attributes",
                            "Number of values", new File("attributes_use" + selection + ".pdf"));
                    for (int i = 0; i < numOfcolumns; ++i) {
                        int columnData = 0;
                        for (int j = i * stepWidth; j < (i + 1) * stepWidth; ++j)
                            if (j < dataCollector.attributesOfAnInstance.length)
                                columnData += freqData[j];

                        gr_HistogramOfAttributeValues.add(new Long(numOfcolumns - i),
                                new Double(columnData > 0 ? Math.log10(columnData) : 0));
                    }
                    //gr_HistogramOfAttributeValues.drawInteractive(gr);
                    gr_HistogramOfAttributeValues.drawPdf(gr);
                    /*
                    // write arff
                    FileWriter wekaInstances = null;
                    String whereToWrite = "qualityLearner_"+selection+".arff";
                    try
                    {
                       wekaInstances = new FileWriter(whereToWrite);
                       // This chunk is almost verbatim from Weka's Instances.toString()
                       wekaInstances.append(Instances.ARFF_RELATION).append(" ").append(Utils.quote(dataCollector.trainingData.relationName())).append("\n\n");
                        for (int i = 0; i < dataCollector.trainingData.numAttributes(); i++) {
                           wekaInstances.append(dataCollector.trainingData.attribute(i).toString()).append("\n");
                        }
                        wekaInstances.append("\n").append(Instances.ARFF_DATA).append("\n");
                        for (int i = 0; i < dataCollector.trainingData.numInstances(); i++) {
                           wekaInstances.append(dataCollector.trainingData.instance(i).toString());
                            if (i < dataCollector.trainingData.numInstances() - 1) {
                               wekaInstances.append('\n');
                            }
                          }
                    }
                    catch(Exception ex)
                    {
                       Helper.throwUnchecked("failed to create a file with training data for "+whereToWrite, ex);
                    }
                    finally
                    {
                       if (wekaInstances != null)
                          try {
                             wekaInstances.close();
                          } catch (IOException e) {
                             // ignore this, we are not proceeding anyway due to an earlier exception so whether the file was actually written does not matter
                          }
                    }
                    */
                    // Run the evaluation
                    final weka.classifiers.trees.REPTree repTree = new weka.classifiers.trees.REPTree();
                    repTree.setMaxDepth(4);
                    //repTree.setNoPruning(true);// since we only use the tree as a classifier (as a conservative extension of what is currently done) and do not actually look at it, elimination of pruning is not a problem. 
                    // As part of learning, we also prune some of the nodes where the ratio of correctly-classified pairs to those incorrectly classified is comparable.
                    // The significant advantage of not pruning is that the result is no longer sensitive to the order of elements in the tree and hence does not depend on the order in which elements have been obtained by concurrent threads.
                    //final weka.classifiers.lazy.IB1 ib1 = new weka.classifiers.lazy.IB1();
                    //final weka.classifiers.trees.J48 classifier = new weka.classifiers.trees.J48();
                    final Classifier classifier = repTree;
                    classifier.buildClassifier(dataCollector.trainingData);
                    System.out
                            .println("Entries in the classifier: " + dataCollector.trainingData.numInstances());
                    System.out.println(classifier);
                    dataCollector = null;// throw all the training data away.

                    {// serialise the classifier, this is the only way to store it.
                        OutputStream os = new FileOutputStream(selection + ".ser");
                        ObjectOutputStream oo = new ObjectOutputStream(os);
                        oo.writeObject(classifier);
                        os.close();
                    }

                    for (final boolean selectingRed : new boolean[] { false })
                        for (final boolean classifierToBlockAllMergers : new boolean[] { true })
                            //for(final boolean zeroScoringAsRed:(classifierToBlockAllMergers?new boolean[]{true,false}:new boolean[]{false}))// where we are not using classifier to rule out all mergers proposed by pair selection, it does not make sense to use two values configuring this classifier.
                            for (final double threshold : new double[] { 1 }) {
                                final boolean zeroScoringAsRed = false;
                                selection = "TRUNK;EVALUATION;" + "ifDepth=" + ifDepth + ";threshold="
                                        + threshold + // ";useUnique="+useUnique+";onlyPositives="+onlyPositives+
                                        ";selectingRed=" + selectingRed + ";classifierToBlockAllMergers="
                                        + classifierToBlockAllMergers + ";zeroScoringAsRed=" + zeroScoringAsRed
                                        + ";traceQuantity=" + traceQuantity + ";lengthMultiplier="
                                        + lengthMultiplier + ";trainingDataMultiplier=" + trainingDataMultiplier
                                        + ";";

                                final int totalTaskNumber = traceQuantity;
                                final RBoxPlot<Long> gr_PairQuality = new RBoxPlot<Long>("Correct v.s. wrong",
                                        "%%", new File("percentage_score" + selection + ".pdf"));
                                final RBoxPlot<String> gr_QualityForNumberOfTraces = new RBoxPlot<String>(
                                        "traces", "%%", new File("quality_traces" + selection + ".pdf"));
                                SquareBagPlot gr_NewToOrig = new SquareBagPlot("orig score",
                                        "score with learnt selection",
                                        new File("new_to_orig" + selection + ".pdf"), 0, 1, true);
                                final Map<Long, TrueFalseCounter> pairQualityCounter = new TreeMap<Long, TrueFalseCounter>();
                                try {
                                    int numberOfTasks = 0;
                                    for (int states = minStateNumber; states < minStateNumber
                                            + rangeOfStateNumbers; states += stateNumberIncrement)
                                        for (int sample = 0; sample < samplesPerFSM; ++sample) {
                                            LearnerRunner learnerRunner = new LearnerRunner(dataCollector,
                                                    states, sample, totalTaskNumber + numberOfTasks,
                                                    traceQuantity, config, converter) {
                                                @Override
                                                public LearnerThatCanClassifyPairs createLearner(
                                                        LearnerEvaluationConfiguration evalCnf,
                                                        LearnerGraph argReferenceGraph,
                                                        @SuppressWarnings("unused") WekaDataCollector argDataCollector,
                                                        LearnerGraph argInitialPTA) {
                                                    LearnerThatUsesWekaResults l = new LearnerThatUsesWekaResults(
                                                            ifDepth, evalCnf, argReferenceGraph, classifier,
                                                            argInitialPTA);
                                                    if (gr_PairQuality != null)
                                                        l.setPairQualityCounter(pairQualityCounter);

                                                    l.setUseClassifierForRed(selectingRed);
                                                    l.setUseClassifierToChooseNextRed(
                                                            classifierToBlockAllMergers);
                                                    l.setBlacklistZeroScoringPairs(zeroScoringAsRed);
                                                    l.setThreshold(threshold);
                                                    return l;
                                                }

                                            };
                                            learnerRunner.setPickUniqueFromInitial(useUnique);
                                            learnerRunner.setEvaluateAlsoUsingReferenceLearner(true);
                                            learnerRunner.setOnlyUsePositives(onlyPositives);
                                            learnerRunner.setIfdepth(ifDepth);
                                            learnerRunner.setLengthMultiplier(lengthMultiplier);
                                            learnerRunner.setSelectionID(
                                                    selection + "_states" + states + "_sample" + sample);
                                            runner.submit(learnerRunner);
                                            ++numberOfTasks;
                                        }
                                    ProgressIndicator progress = new ProgressIndicator(new Date()
                                            + " evaluating " + numberOfTasks + " tasks for " + selection,
                                            numberOfTasks);
                                    for (int count = 0; count < numberOfTasks; ++count) {
                                        ThreadResult result = runner.take().get();// this will throw an exception if any of the tasks failed.
                                        if (gr_NewToOrig != null) {
                                            for (SampleData sample : result.samples)
                                                gr_NewToOrig.add(sample.referenceLearner.getValue(),
                                                        sample.actualLearner.getValue());
                                        }

                                        for (SampleData sample : result.samples)
                                            if (sample.referenceLearner.getValue() > 0)
                                                gr_QualityForNumberOfTraces.add(traceQuantity + "",
                                                        sample.actualLearner.getValue()
                                                                / sample.referenceLearner.getValue());
                                        progress.next();
                                    }
                                    if (gr_PairQuality != null) {
                                        synchronized (pairQualityCounter) {
                                            updateGraph(gr_PairQuality, pairQualityCounter);
                                            //gr_PairQuality.drawInteractive(gr);
                                            //gr_NewToOrig.drawInteractive(gr);
                                            //if (gr_QualityForNumberOfTraces.size() > 0)
                                            //   gr_QualityForNumberOfTraces.drawInteractive(gr);
                                        }
                                    }
                                    if (gr_PairQuality != null)
                                        gr_PairQuality.drawPdf(gr);
                                } catch (Exception ex) {
                                    IllegalArgumentException e = new IllegalArgumentException(
                                            "failed to compute, the problem is: " + ex);
                                    e.initCause(ex);
                                    if (executorService != null) {
                                        executorService.shutdownNow();
                                        executorService = null;
                                    }
                                    throw e;
                                }
                                if (gr_NewToOrig != null)
                                    gr_NewToOrig.drawPdf(gr);
                                if (gr_QualityForNumberOfTraces != null)
                                    gr_QualityForNumberOfTraces.drawPdf(gr);
                            }
                }
            }
    if (executorService != null) {
        executorService.shutdown();
        executorService = null;
    }
}

From source file:statechum.analysis.learning.experiments.PaperUAS.java

License:Open Source License

/** Used to load the classifier from a full PTA by comparing metrics on pairs considered by QSM and checking them against the reference solution. */
protected void trainClassifierFromArff(Classifier classifier, String arffWithTrainingData) {
    Reader arffReader = null;//  ww  w.j  a v a 2  s  .  c  o m
    try {
        arffReader = new FileReader(arffWithTrainingData);
        Instances trainingData = new Instances(arffReader);
        if (!"class".equals(trainingData.attribute(trainingData.numAttributes() - 1).name()))
            throw new IllegalArgumentException("last element is not a class");
        trainingData.setClassIndex(trainingData.numAttributes() - 1);

        classifier.buildClassifier(trainingData);
    } catch (Exception ex) {// we cannot proceed if this happens because every classifier should be able to both learn and deliver. Throw the exception.
        Helper.throwUnchecked("failed to train classifier " + classifier.getClass(), ex);
    } finally {
        if (arffReader != null)
            try {
                arffReader.close();
            } catch (IOException e) {
                // ignore this, we have opened the file for reading hence not much else we can do in terms of cleanup other than doing a close.
            }
    }
}

From source file:statistics.BinaryStatisticsEvaluator.java

@Override
public double[][] getConfusionMatrix(Instances Training_Instances, Instances Testing_Instances,
        String classifier) {//from ww w . ja v a 2 s .  c  o m

    Classifier cModel = null;
    if ("NB".equals(classifier)) {
        cModel = (Classifier) new NaiveBayes();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("DT".equals(classifier)) {
        cModel = (Classifier) new J48();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("SVM".equals(classifier)) {
        cModel = (Classifier) new SMO();

        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    } else if ("KNN".equals(classifier)) {
        cModel = (Classifier) new IBk();
        try {
            cModel.buildClassifier(Training_Instances);
        } catch (Exception ex) {
            Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    //Test the model
    Evaluation eTest;
    try {
        eTest = new Evaluation(Training_Instances);
        eTest.evaluateModel(cModel, Testing_Instances);
        //Print the result
        String strSummary = eTest.toSummaryString();
        System.out.println(strSummary);
        String strSummary1 = eTest.toMatrixString();
        System.out.println(strSummary1);
        String strSummary2 = eTest.toClassDetailsString();
        System.out.println(strSummary2);

        //Get the confusion matrix
        double[][] cmMatrix = eTest.confusionMatrix();
        return cmMatrix;
    } catch (Exception ex) {
        Logger.getLogger(BinaryStatisticsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:tclass.ExpSeg.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSeg thisExp = new ExpSeg();
    thisExp.parseArgs(args);//from  w w w  .j  ava  2  s.c  o  m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testGlobalData.size();
    int numClasses = domDesc.getClassDescVec().size();
    TimeDivision td = new TimeDivision(domDesc, thisExp.numDivs);
    ClassStreamAttValVecI trainDivData = td.timeDivide(trainStreamData);
    ClassStreamAttValVecI testDivData = td.timeDivide(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Segmentation performed");

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainDivData);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testDivData);

    trainStreamData = null;
    testStreamData = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    ClassificationVecI classns;
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    int numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:tclass.ExpSingle.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingle thisExp = new ExpSingle();
    thisExp.parseArgs(args);//  ww w.j  av a2s . c o  m
    mem("PARSE");
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    mem("DATAIN");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    mem("GLOBAL");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    mem("EVENTEXTRACT");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");
    mem("REARRANGE");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");
    mem("CLUSTER");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");
    mem("MAKEATTRIBUTOR");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");
    mem("ATTRIBUTION");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);
    mem("COMBINATION");
    trainStreamData = null;
    testStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;
    System.gc();
    mem("GARBAGECOLLECT");

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    mem("ATTCONVERSION");
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    mem("POSTLEARNER");
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        int evId = 1;
        String evIndex = "";
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                if (qual == " HAS NO "
                        && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) {
                    System.out.print("OTHERWISE");
                } else {
                    System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")");
                    int numParams = edi.numParams();
                    evIndex += "*" + evId + ": " + evType + "\n";
                    for (int i = 0; i < numParams; i++) {
                        evIndex += "   " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i)
                                + "," + res[1].valOf(i) + "]\n";
                    }
                    evId++;
                }
                evIndex += "\n";
                if (appendColon) {
                    System.out.print(" THEN");
                }
            } else {
                System.out.print(curTok);
            }
        }
        System.out.println("\nEvent index");
        System.out.println("-----------");
        System.out.print(evIndex);
        mem("POSTDESC");

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }
    mem("POSTTRAIN");

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
    mem("POSTTEST");

}

From source file:tclass.ExpSingleLM.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    ExpSingleLM thisExp = new ExpSingleLM();
    thisExp.parseArgs(args);//from w  ww. ja  v  a2s. c  o  m
    mem("PARSE");
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data read in");
    mem("TRAINDATAIN");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data globals calculated.");
    mem("TRAINGLOBAL");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training events extracted");
    mem("EVENTEXTRACT");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");
    mem("REARRANGE");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");
    mem("CLUSTER");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");
    mem("MAKEATTRIBUTOR");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Training data Attribution complete.");
    mem("TRAINATTRIBUTION");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    mem("TRAINCOMBINATION");
    trainStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    System.gc();
    mem("TRAINGC");

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");
    mem("ATTCONVERSION");
    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    mem("POSTLEARNER");
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        int evId = 1;
        String evIndex = "";
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                if (qual == " HAS NO "
                        && thisExp.learnerStuff.startsWith(weka.classifiers.trees.J48.class.getName())) {
                    System.out.print("OTHERWISE");
                } else {
                    System.out.print("IF " + chan + qual + res[2] + " (*" + evId + ")");
                    int numParams = edi.numParams();
                    evIndex += "*" + evId + ": " + evType + "\n";
                    for (int i = 0; i < numParams; i++) {
                        evIndex += "   " + edi.paramName(i) + "=" + res[2].valOf(i) + " r=[" + res[0].valOf(i)
                                + "," + res[1].valOf(i) + "]\n";
                    }
                    evId++;
                }
                evIndex += "\n";
                if (appendColon) {
                    System.out.print(" THEN");
                }
            } else {
                System.out.print(curTok);
            }
        }
        System.out.println("\nEvent index");
        System.out.println("-----------");
        System.out.print(evIndex);
        mem("POSTDESC");

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }
    mem("POSTTRAIN");

    System.err.println(">>> Testing stage <<<");

    // Stick testing stuff here. 
    mem("TESTBEGIN");
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test data read in");
    mem("TESTREAD");
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test data globals calculated");
    mem("TESTGLOBALS");
    Debug.dp(Debug.PROGRESS, "Test data: " + testGlobalData.size());
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Test events extracted");
    mem("TESTEVENTS");
    int numTestStreams = testEventData.size();
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    mem("TESTATTRIBUTES");
    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);
    mem("TESTCOMBINE");
    testStreamData = null;
    System.gc(); // Do garbage collection. 
    mem("TESTGC");
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");
    mem("POSTTEST");

}

From source file:tclass.TClass.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Debug.setDebugLevel(Debug.PROGRESS);
    TClass thisExp = new TClass();
    thisExp.parseArgs(args);//from   w  w w . ja  v  a 2  s.  c o m
    DomDesc domDesc = new DomDesc(thisExp.domDescFile);
    ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc);
    ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in");
    Settings settings = new Settings(thisExp.settingsFile, domDesc);

    EventExtractor evExtractor = settings.getEventExtractor();
    // Global data is likely to be included in every model; so we
    // might as well calculated now
    GlobalCalc globalCalc = settings.getGlobalCalc();

    ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData);
    ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData);
    // And we might as well extract the events. 

    Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated.");
    Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size());

    ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData);
    ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData);

    Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted");
    // System.out.println(trainEventData.toString()); 

    // Now we want the clustering algorithms only to cluster
    // instances of each class. Make an array of clusterers, 
    // one per class. 

    int numTestStreams = testEventData.size();

    int numClasses = domDesc.getClassDescVec().size();
    EventDescVecI eventDescVec = evExtractor.getDescription();
    EventClusterer eventClusterer = settings.getEventClusterer();
    Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged.");

    //And now load it up. 
    StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec();
    ClassificationVecI trainEventCV = trainEventData.getClassVec();
    int numTrainStreams = trainEventCV.size();
    ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete");
    Debug.dp(Debug.PROGRESS, "Clusters are:");
    Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping());
    Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. ");

    // But wait! There's more! There is always more. 
    // The first thing was only useful for clustering. 
    // Now attribution. We want to attribute all the data. So we are going 
    // to have one dataset for each learner. 
    // First set up the attributors. 

    Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription());
    Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete.");

    ClassStreamAttValVecI trainEventAtts = attribs.attribute(trainStreamData, trainEventData);
    ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete.");

    // Combine all data sources. For now, globals go in every
    // one. 

    Combiner c = new Combiner();
    ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts);

    ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts);

    trainStreamData = null;
    testStreamData = null;
    trainEventSEV = null;
    trainEventCV = null;
    if (!thisExp.makeDesc) {
        clusters = null;
        eventClusterer = null;
    }
    attribs = null;

    System.gc();

    // So now we have the raw data in the correct form for each
    // attributor. 
    // And now, we can construct a learner for each case. 
    // Well, for now, I'm going to do something completely crazy. 
    // Let's run each classifier nonetheless over the whole data
    // ... and see what the hell happens. Maybe some voting scheme 
    // is possible!! This is a strange form of ensemble
    // classifier. 
    // Each naive bayes algorithm only gets one 

    Debug.setDebugLevel(Debug.PROGRESS);
    int[] selectedIndices = null;
    String[] classifierSpec = Utils.splitOptions(thisExp.learnerStuff);
    if (classifierSpec.length == 0) {
        throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    Classifier learner = AbstractClassifier.forName(classifierName, classifierSpec);
    Debug.dp(Debug.PROGRESS, "PROGRESS: Beginning format conversion for class ");
    Instances data = WekaBridge.makeInstances(trainAtts, "Train ");
    Debug.dp(Debug.PROGRESS, "PROGRESS: Conversion complete. Starting learning");

    if (thisExp.featureSel) {
        Debug.dp(Debug.PROGRESS, "PROGRESS: Doing feature selection");
        BestFirst bfs = new BestFirst();
        CfsSubsetEval cfs = new CfsSubsetEval();
        cfs.buildEvaluator(data);
        selectedIndices = bfs.search(cfs, data);
        // Now extract the features. 
        System.err.print("Selected features: ");
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += ("last");
        System.err.println(featureString);
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    learner.buildClassifier(data);
    Debug.dp(Debug.PROGRESS, "Learnt classifier: \n" + learner.toString());

    WekaClassifier wekaClassifier;
    wekaClassifier = new WekaClassifier(learner);

    if (thisExp.makeDesc) {
        // Section for making description more readable. Assumes that 
        // learner.toString() returns a string with things that look like 
        // feature names. 
        String concept = learner.toString();
        StringTokenizer st = new StringTokenizer(concept, " \t\r\n", true);
        while (st.hasMoreTokens()) {
            boolean appendColon = false;
            String curTok = st.nextToken();
            GClust clust = (GClust) ((ClusterVec) clusters).elCalled(curTok);
            if (clust != null) {
                // Skip the spaces
                st.nextToken();
                // Get a < or >
                String cmp = st.nextToken();
                String qual = "";
                if (cmp.equals("<=")) {
                    qual = " HAS NO ";
                } else {
                    qual = " HAS ";
                }
                // skip spaces
                st.nextToken();
                // Get the number. 
                String conf = st.nextToken();
                if (conf.endsWith(":")) {
                    conf = conf.substring(0, conf.length() - 1);
                    appendColon = true;
                }
                float minconf = Float.valueOf(conf).floatValue();
                EventI[] res = clust.getBounds(minconf);
                String name = clust.getName();
                int dashPos = name.indexOf('-');
                int undPos = name.indexOf('_');
                String chan = name.substring(0, dashPos);
                String evType = name.substring(dashPos + 1, undPos);
                EventDescI edi = clust.eventDesc();
                System.out.print("Channel " + chan + qual + evType + " ");
                int numParams = edi.numParams();
                for (int i = 0; i < numParams; i++) {
                    System.out
                            .print(edi.paramName(i) + " in [" + res[0].valOf(i) + "," + res[1].valOf(i) + "] ");
                }
                if (appendColon) {
                    System.out.print(":");
                }
            } else {
                System.out.print(curTok);
            }
        }

        // Now this is going to be messy as fuck. Really. What do we needs? Well, 
        // we need to read in the data; look up some info, that we 
        // assume came from a GainClusterer ... 
        // Sanity check. 
        //            GClust clust =  (GClust) ((ClusterVec) clusters).elCalled("alpha-inc_0"); 
        // System.out.println("INSANE!: " + clust.getDescription()); 
        // EventI[] res = clust.getBounds(1); 
        // System.out.println("For clust settings: min event = " + res[0].toString() + " and max event = " + res[1].toString()); 
    }
    Debug.dp(Debug.PROGRESS, "PROGRESS: Learning complete. ");
    int numCorrect = 0;
    ClassificationVecI classns;
    if (thisExp.trainResults) {
        System.err.println(">>> Training performance <<<");
        classns = (ClassificationVecI) trainAtts.getClassVec().clone();
        for (int j = 0; j < numTrainStreams; j++) {
            wekaClassifier.classify(data.instance(j), classns.elAt(j));
        }
        for (int j = 0; j < numTrainStreams; j++) {
            // System.out.print(classns.elAt(j).toString()); 
            if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
                numCorrect++;
                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                System.err.println("Class " + realClassName + " CORRECTLY classified.");

            } else {

                String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
                String predictedClassName = domDesc.getClassDescVec()
                        .getClassLabel(classns.elAt(j).getPredictedClass());
                System.err.println(
                        "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

            }
        }
        System.err.println("Training results for classifier: " + numCorrect + " of " + numTrainStreams + " ("
                + numCorrect * 100.0 / numTrainStreams + "%)");
    }

    System.err.println(">>> Testing stage <<<");
    // First, print the results of using the straight testers. 
    classns = (ClassificationVecI) testAtts.getClassVec().clone();
    StreamAttValVecI savvi = testAtts.getStreamAttValVec();
    data = WekaBridge.makeInstances(testAtts, "Test ");
    if (thisExp.featureSel) {
        String featureString = new String();
        for (int j = 0; j < selectedIndices.length; j++) {
            featureString += (selectedIndices[j] + 1) + ",";
        }
        featureString += "last";
        // Now apply the filter. 
        Remove af = new Remove();
        af.setInvertSelection(true);
        af.setAttributeIndices(featureString);
        af.setInputFormat(data);
        data = Filter.useFilter(data, af);
    }
    for (int j = 0; j < numTestStreams; j++) {
        wekaClassifier.classify(data.instance(j), classns.elAt(j));
    }
    System.err.println(">>> Learner <<<");
    numCorrect = 0;
    for (int j = 0; j < numTestStreams; j++) {
        // System.out.print(classns.elAt(j).toString()); 
        if (classns.elAt(j).getRealClass() == classns.elAt(j).getPredictedClass()) {
            numCorrect++;
            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            System.err.println("Class " + realClassName + " CORRECTLY classified.");

        } else {

            String realClassName = domDesc.getClassDescVec().getClassLabel(classns.elAt(j).getRealClass());
            String predictedClassName = domDesc.getClassDescVec()
                    .getClassLabel(classns.elAt(j).getPredictedClass());

            System.err.println(
                    "Class " + realClassName + " INCORRECTLY classified as " + predictedClassName + ".");

        }
    }
    System.err.println("Test accuracy for classifier: " + numCorrect + " of " + numTestStreams + " ("
            + numCorrect * 100.0 / numTestStreams + "%)");

}

From source file:textmining.TextMining.java

/**
 * Decision Table//from  w ww  .  j  a va2 s . c  o m
 *
 * @param instances
 * @return string
 * @throws Exception
 */
private static String C_DecisionTable(Instances instances) throws Exception {
    Classifier decisionTable = (Classifier) new DecisionTable();
    String[] options = weka.core.Utils.splitOptions("-X 1 -S \"weka.attributeSelection.BestFirst -D 1 -N 5\"");
    decisionTable.setOptions(options);
    decisionTable.buildClassifier(instances);
    Evaluation eval = new Evaluation(instances);
    //        eval.evaluateModel(decisionTable, instances);
    eval.crossValidateModel(decisionTable, instances, 5, new Random(1));
    String resume = eval.toSummaryString();

    return eval.toMatrixString(resume);
}

From source file:textmining.TextMining.java

private static String setOptions(Classifier classifier, Instances instances, String[] options)
        throws Exception {
    classifier.setOptions(options);/*from w w  w . j  a v  a  2  s . c o  m*/
    classifier.buildClassifier(instances);
    Evaluation eval = new Evaluation(instances);
    eval.crossValidateModel(classifier, instances, 5, new Random(1));
    eval.evaluateModel(classifier, instances);
    String resume = eval.toSummaryString();
    return eval.toMatrixString(resume);
}