Example usage for weka.core Instances classAttribute

List of usage examples for weka.core Instances classAttribute

Introduction

In this page you can find the example usage for weka.core Instances classAttribute.

Prototype


publicAttribute classAttribute() 

Source Link

Document

Returns the class attribute.

Usage

From source file:ann.MyANN.java

/**
 * mengevaluasi model dengan testSet dan mengembalikan Confusion Matrix
 * buildClassifier harus dipanggil terlebih dahulu
 * @param testSet testSet untuk menguji model
 * @return confusion Matrix, nominal = matrix persegi berukuran NxN dengan
 * N adalah jumlah kelas. numerik = matrix 1x2 dengan elemen pertama adalah 
 * jumlah prediksi yang benar dan elemen kedua adalah jumlah prediksi yang salah
 *///from   w  ww.  ja va2  s .c  om
public int[][] evaluate(Instances testSet) {
    int[][] confusionMatrix;
    if (testSet.classAttribute().isNominal()) {
        confusionMatrix = new int[testSet.classAttribute().numValues()][testSet.classAttribute().numValues()];
    } else {
        confusionMatrix = new int[1][2];
    }
    // debug
    for (int i = 0; i < testSet.numInstances(); i++) {
        //            System.out.println("cv: "+testSet.instance(i).classValue());
    }

    for (int i = 0; i < testSet.numInstances(); i++) {
        try {
            double[] prob = distributionForInstance(testSet.instance(i));
            //                System.out.println("probl:"+prob.length);
            //                System.out.println("i: "+testSet.instance(i));
            if (testSet.classAttribute().isNominal()) {
                int idx = predictClassIndex(prob);
                confusionMatrix[(int) testSet.instance(i).classValue()][idx]++;
            } else {
                if (Math.abs(prob[0] - testSet.instance(i).classValue()) <= 0.001)
                    confusionMatrix[0][0]++;
                else
                    confusionMatrix[0][1]++;
            }
        } catch (Exception ex) {
            Logger.getLogger(MyANN.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return confusionMatrix;
}

From source file:ann.MyANN.java

/**
 * Mengevaluasi model dengan membagi instances menjadi trainSet dan testSet sebanyak numFold
 * @param instances data yang akan diuji
 * @param numFold/*from ww  w  .  ja  v a  2  s . c  o m*/
 * @param rand 
 * @return confusion matrix
 */
public int[][] crossValidation(Instances instances, int numFold, Random rand) {
    int[][] totalResult = null;
    instances = new Instances(instances);
    instances.randomize(rand);
    if (instances.classAttribute().isNominal()) {
        instances.stratify(numFold);
    }
    for (int i = 0; i < numFold; i++) {
        try {
            // membagi instance berdasarkan jumlah fold
            Instances train = instances.trainCV(numFold, i, rand);
            Instances test = instances.testCV(numFold, i);
            MyANN cc = new MyANN(this);
            cc.buildClassifier(train);
            int[][] result = cc.evaluate(test);
            if (i == 0) {
                totalResult = cc.evaluate(test);
            } else {
                result = cc.evaluate(test);
                for (int j = 0; j < totalResult.length; j++) {
                    for (int k = 0; k < totalResult[0].length; k++) {
                        totalResult[j][k] += result[j][k];
                    }
                }
            }
        } catch (Exception ex) {
            Logger.getLogger(MyANN.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return totalResult;
}

From source file:asap.CrossValidation.java

/**
 *
 * @param dataInput/*from   w ww . ja v a2 s . c  om*/
 * @param classIndex
 * @param removeIndices
 * @param cls
 * @param seed
 * @param folds
 * @param modelOutputFile
 * @return
 * @throws Exception
 */
public static String performCrossValidation(String dataInput, String classIndex, String removeIndices,
        AbstractClassifier cls, int seed, int folds, String modelOutputFile) throws Exception {

    PerformanceCounters.startTimer("cross-validation ST");

    PerformanceCounters.startTimer("cross-validation init ST");

    // loads data and set class index
    Instances data = DataSource.read(dataInput);
    String clsIndex = classIndex;

    switch (clsIndex) {
    case "first":
        data.setClassIndex(0);
        break;
    case "last":
        data.setClassIndex(data.numAttributes() - 1);
        break;
    default:
        try {
            data.setClassIndex(Integer.parseInt(clsIndex) - 1);
        } catch (NumberFormatException e) {
            data.setClassIndex(data.attribute(clsIndex).index());
        }
        break;
    }

    Remove removeFilter = new Remove();
    removeFilter.setAttributeIndices(removeIndices);
    removeFilter.setInputFormat(data);
    data = Filter.useFilter(data, removeFilter);

    // randomize data
    Random rand = new Random(seed);
    Instances randData = new Instances(data);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Evaluation eval = new Evaluation(randData);
    Instances trainSets[] = new Instances[folds];
    Instances testSets[] = new Instances[folds];
    Classifier foldCls[] = new Classifier[folds];

    for (int n = 0; n < folds; n++) {
        trainSets[n] = randData.trainCV(folds, n);
        testSets[n] = randData.testCV(folds, n);
        foldCls[n] = AbstractClassifier.makeCopy(cls);
    }

    PerformanceCounters.stopTimer("cross-validation init ST");
    PerformanceCounters.startTimer("cross-validation folds+train ST");
    //paralelize!!:--------------------------------------------------------------
    for (int n = 0; n < folds; n++) {
        Instances train = trainSets[n];
        Instances test = testSets[n];

        // the above code is used by the StratifiedRemoveFolds filter, the
        // code below by the Explorer/Experimenter:
        // Instances train = randData.trainCV(folds, n, rand);
        // build and evaluate classifier
        Classifier clsCopy = foldCls[n];
        clsCopy.buildClassifier(train);
        eval.evaluateModel(clsCopy, test);
    }

    cls.buildClassifier(data);
    //until here!-----------------------------------------------------------------

    PerformanceCounters.stopTimer("cross-validation folds+train ST");
    PerformanceCounters.startTimer("cross-validation post ST");
    // output evaluation
    String out = "\n" + "=== Setup ===\n" + "Classifier: " + cls.getClass().getName() + " "
            + Utils.joinOptions(cls.getOptions()) + "\n" + "Dataset: " + data.relationName() + "\n" + "Folds: "
            + folds + "\n" + "Seed: " + seed + "\n" + "\n"
            + eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false) + "\n";

    if (!modelOutputFile.isEmpty()) {
        SerializationHelper.write(modelOutputFile, cls);
    }

    PerformanceCounters.stopTimer("cross-validation post ST");
    PerformanceCounters.stopTimer("cross-validation ST");

    return out;
}

From source file:asap.CrossValidation.java

/**
 *
 * @param dataInput//  w  ww  .  j ava 2 s.  c  om
 * @param classIndex
 * @param removeIndices
 * @param cls
 * @param seed
 * @param folds
 * @param modelOutputFile
 * @return
 * @throws Exception
 */
public static String performCrossValidationMT(String dataInput, String classIndex, String removeIndices,
        AbstractClassifier cls, int seed, int folds, String modelOutputFile) throws Exception {

    PerformanceCounters.startTimer("cross-validation MT");

    PerformanceCounters.startTimer("cross-validation init MT");

    // loads data and set class index
    Instances data = DataSource.read(dataInput);
    String clsIndex = classIndex;

    switch (clsIndex) {
    case "first":
        data.setClassIndex(0);
        break;
    case "last":
        data.setClassIndex(data.numAttributes() - 1);
        break;
    default:
        try {
            data.setClassIndex(Integer.parseInt(clsIndex) - 1);
        } catch (NumberFormatException e) {
            data.setClassIndex(data.attribute(clsIndex).index());
        }
        break;
    }

    Remove removeFilter = new Remove();
    removeFilter.setAttributeIndices(removeIndices);
    removeFilter.setInputFormat(data);
    data = Filter.useFilter(data, removeFilter);

    // randomize data
    Random rand = new Random(seed);
    Instances randData = new Instances(data);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Evaluation eval = new Evaluation(randData);
    List<Thread> foldThreads = (List<Thread>) Collections.synchronizedList(new LinkedList<Thread>());

    List<FoldSet> foldSets = (List<FoldSet>) Collections.synchronizedList(new LinkedList<FoldSet>());

    for (int n = 0; n < folds; n++) {
        foldSets.add(new FoldSet(randData.trainCV(folds, n), randData.testCV(folds, n),
                AbstractClassifier.makeCopy(cls)));

        if (n < Config.getNumThreads() - 1) {
            Thread foldThread = new Thread(new CrossValidationFoldThread(n, foldSets, eval));
            foldThreads.add(foldThread);
        }
    }

    PerformanceCounters.stopTimer("cross-validation init MT");
    PerformanceCounters.startTimer("cross-validation folds+train MT");
    //paralelize!!:--------------------------------------------------------------
    if (Config.getNumThreads() > 1) {
        for (Thread foldThread : foldThreads) {
            foldThread.start();
        }
    } else {
        //use the current thread to run the cross-validation instead of using the Thread instance created here:
        new CrossValidationFoldThread(0, foldSets, eval).run();
    }

    cls.buildClassifier(data);

    for (Thread foldThread : foldThreads) {
        foldThread.join();
    }

    //until here!-----------------------------------------------------------------
    PerformanceCounters.stopTimer("cross-validation folds+train MT");
    PerformanceCounters.startTimer("cross-validation post MT");
    // evaluation for output:
    String out = "\n" + "=== Setup ===\n" + "Classifier: " + cls.getClass().getName() + " "
            + Utils.joinOptions(cls.getOptions()) + "\n" + "Dataset: " + data.relationName() + "\n" + "Folds: "
            + folds + "\n" + "Seed: " + seed + "\n" + "\n"
            + eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false) + "\n";

    if (!modelOutputFile.isEmpty()) {
        SerializationHelper.write(modelOutputFile, cls);
    }

    PerformanceCounters.stopTimer("cross-validation post MT");
    PerformanceCounters.stopTimer("cross-validation MT");
    return out;
}

From source file:asap.CrossValidation.java

static String performCrossValidationMT(Instances data, AbstractClassifier cls, int seed, int folds,
        String modelOutputFile) {

    PerformanceCounters.startTimer("cross-validation MT");

    PerformanceCounters.startTimer("cross-validation init MT");

    // randomize data
    Random rand = new Random(seed);
    Instances randData = new Instances(data);
    randData.randomize(rand);/*from www.ja v a2 s  .  com*/
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Evaluation eval;
    try {
        eval = new Evaluation(randData);
    } catch (Exception ex) {
        Logger.getLogger(CrossValidation.class.getName()).log(Level.SEVERE, null, ex);
        return "Error creating evaluation instance for given data!";
    }
    List<Thread> foldThreads = (List<Thread>) Collections.synchronizedList(new LinkedList<Thread>());

    List<FoldSet> foldSets = (List<FoldSet>) Collections.synchronizedList(new LinkedList<FoldSet>());

    for (int n = 0; n < folds; n++) {
        try {
            foldSets.add(new FoldSet(randData.trainCV(folds, n), randData.testCV(folds, n),
                    AbstractClassifier.makeCopy(cls)));
        } catch (Exception ex) {
            Logger.getLogger(CrossValidation.class.getName()).log(Level.SEVERE, null, ex);
        }

        //TODO: use Config.getNumThreads() for limiting these::
        if (n < Config.getNumThreads() - 1) {
            Thread foldThread = new Thread(new CrossValidationFoldThread(n, foldSets, eval));
            foldThreads.add(foldThread);
        }
    }

    PerformanceCounters.stopTimer("cross-validation init MT");
    PerformanceCounters.startTimer("cross-validation folds+train MT");
    //paralelize!!:--------------------------------------------------------------
    if (Config.getNumThreads() > 1) {
        for (Thread foldThread : foldThreads) {
            foldThread.start();
        }
    } else {
        new CrossValidationFoldThread(0, foldSets, eval).run();
    }

    try {
        cls.buildClassifier(data);
    } catch (Exception ex) {
        Logger.getLogger(CrossValidation.class.getName()).log(Level.SEVERE, null, ex);
    }

    for (Thread foldThread : foldThreads) {
        try {
            foldThread.join();
        } catch (InterruptedException ex) {
            Logger.getLogger(CrossValidation.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    //until here!-----------------------------------------------------------------
    PerformanceCounters.stopTimer("cross-validation folds+train MT");
    PerformanceCounters.startTimer("cross-validation post MT");
    // evaluation for output:
    String out = "\n" + "=== Setup ===\n" + "Classifier: " + cls.getClass().getName() + " "
            + Utils.joinOptions(cls.getOptions()) + "\n" + "Dataset: " + data.relationName() + "\n" + "Folds: "
            + folds + "\n" + "Seed: " + seed + "\n" + "\n"
            + eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false) + "\n";

    if (modelOutputFile != null) {
        if (!modelOutputFile.isEmpty()) {
            try {
                SerializationHelper.write(modelOutputFile, cls);
            } catch (Exception ex) {
                Logger.getLogger(CrossValidation.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    PerformanceCounters.stopTimer("cross-validation post MT");
    PerformanceCounters.stopTimer("cross-validation MT");
    return out;
}

From source file:asap.NLPSystem.java

private String crossValidate(int seed, int folds, String modelOutputFile) {

    PerformanceCounters.startTimer("cross-validation");
    PerformanceCounters.startTimer("cross-validation init");

    AbstractClassifier abstractClassifier = (AbstractClassifier) classifier;
    // randomize data
    Random rand = new Random(seed);
    Instances randData = new Instances(trainingSet);
    randData.randomize(rand);//w w w  .j a v  a2s .  c  om
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Evaluation eval;
    try {
        eval = new Evaluation(randData);
    } catch (Exception ex) {
        Logger.getLogger(NLPSystem.class.getName()).log(Level.SEVERE, null, ex);
        return "Error creating evaluation instance for given data!";
    }
    List<Thread> foldThreads = (List<Thread>) Collections.synchronizedList(new LinkedList<Thread>());

    List<FoldSet> foldSets = (List<FoldSet>) Collections.synchronizedList(new LinkedList<FoldSet>());

    for (int n = 0; n < folds; n++) {
        try {
            foldSets.add(new FoldSet(randData.trainCV(folds, n), randData.testCV(folds, n),
                    AbstractClassifier.makeCopy(abstractClassifier)));
        } catch (Exception ex) {
            Logger.getLogger(NLPSystem.class.getName()).log(Level.SEVERE, null, ex);
        }

        if (n < Config.getNumThreads() - 1) {
            Thread foldThread = new Thread(new CrossValidationFoldThread(n, foldSets, eval));
            foldThreads.add(foldThread);
        }
    }

    PerformanceCounters.stopTimer("cross-validation init");
    PerformanceCounters.startTimer("cross-validation folds+train");

    if (Config.getNumThreads() > 1) {
        for (Thread foldThread : foldThreads) {
            foldThread.start();
        }
    } else {
        new CrossValidationFoldThread(0, foldSets, eval).run();
    }

    for (Thread foldThread : foldThreads) {
        while (foldThread.isAlive()) {
            try {
                foldThread.join();
            } catch (InterruptedException ex) {
                Logger.getLogger(NLPSystem.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    PerformanceCounters.stopTimer("cross-validation folds+train");
    PerformanceCounters.startTimer("cross-validation post");
    // evaluation for output:
    String out = String.format(
            "\n=== Setup ===\nClassifier: %s %s\n" + "Dataset: %s\nFolds: %s\nSeed: %s\n\n%s\n",
            abstractClassifier.getClass().getName(), Utils.joinOptions(abstractClassifier.getOptions()),
            trainingSet.relationName(), folds, seed,
            eval.toSummaryString(String.format("=== %s-fold Cross-validation ===", folds), false));

    try {
        crossValidationPearsonsCorrelation = eval.correlationCoefficient();
    } catch (Exception ex) {
        Logger.getLogger(NLPSystem.class.getName()).log(Level.SEVERE, null, ex);
    }
    if (modelOutputFile != null) {
        if (!modelOutputFile.isEmpty()) {
            try {
                SerializationHelper.write(modelOutputFile, abstractClassifier);
            } catch (Exception ex) {
                Logger.getLogger(NLPSystem.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    classifierBuiltWithCrossValidation = true;
    PerformanceCounters.stopTimer("cross-validation post");
    PerformanceCounters.stopTimer("cross-validation");
    return out;
}

From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java

public void trainModel() {
    Instances trainingData = loadTrainingData();

    System.out.println("Class attribute: " + trainingData.classAttribute().toString());

    // Partition dataset into training and test sets
    RemovePercentage filter = new RemovePercentage();

    filter.setPercentage(10);/* w w w.  j a v  a 2  s .co m*/

    Instances testData = null;

    // Split in training and testdata
    try {
        filter.setInputFormat(trainingData);

        testData = Filter.useFilter(trainingData, filter);
    } catch (Exception ex) {
        //Logger.getLogger(Trainer.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("Error getting testData: " + ex.toString());
    }

    // Train the classifier
    Classifier model = (Classifier) new NaiveBayes();

    try {
        // Save the model to fil
        // serialize model
        weka.core.SerializationHelper.write(modelDir + algorithm + ".model", model);
    } catch (Exception ex) {
        Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex);
    }
    // Set the local model 
    this.trainedModel = model;

    try {
        model.buildClassifier(trainingData);
    } catch (Exception ex) {
        //Logger.getLogger(Trainer.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("Error training model: " + ex.toString());
    }

    try {
        // Evaluate model
        Evaluation test = new Evaluation(trainingData);
        test.evaluateModel(model, testData);

        System.out.println(test.toSummaryString());

    } catch (Exception ex) {
        //Logger.getLogger(Trainer.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("Error evaluating model: " + ex.toString());
    }
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Initializes all the counters for the evaluation and also takes a cost
 * matrix as parameter. Use <code>useNoPriors()</code> if the dataset is the
 * test set and you can't initialize with the priors from the training set via
 * <code>setPriors(Instances)</code>.
 * //from   w w w  .  j  ava2 s .  c om
 * @param data set of training instances, to get some header information and
 *          prior class distribution information
 * @param costMatrix the cost matrix---if null, default costs will be used
 * @throws Exception if cost matrix is not compatible with data, the class is
 *           not defined or the class is numeric
 * @see #useNoPriors()
 * @see #setPriors(Instances)
 */
public Evaluation(Instances data, CostMatrix costMatrix) throws Exception {

    m_NumClasses = data.numClasses();
    m_NumFolds = 1;
    m_ClassIsNominal = data.classAttribute().isNominal();

    if (m_ClassIsNominal) {
        m_ConfusionMatrix = new double[m_NumClasses][m_NumClasses];
        m_ClassNames = new String[m_NumClasses];
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassNames[i] = data.classAttribute().value(i);
        }
    }
    m_CostMatrix = costMatrix;
    if (m_CostMatrix != null) {
        if (!m_ClassIsNominal) {
            throw new Exception("Class has to be nominal if cost matrix " + "given!");
        }
        if (m_CostMatrix.size() != m_NumClasses) {
            throw new Exception("Cost matrix not compatible with data!");
        }
    }
    m_ClassPriors = new double[m_NumClasses];
    setPriors(data);
    m_MarginCounts = new double[k_MarginResolution + 1];
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Prints the header for the predictions output into a supplied StringBuffer
 * /*from  ww w  .j a  v  a2 s .c o  m*/
 * @param test structure of the test set to print predictions for
 * @param attributesToOutput indices of the attributes to output
 * @param printDistribution prints the complete distribution for nominal
 *          attributes, not just the predicted value
 * @param text the StringBuffer to print to
 */
protected static void printClassificationsHeader(Instances test, Range attributesToOutput,
        boolean printDistribution, StringBuffer text) {
    // print header
    if (test.classAttribute().isNominal()) {
        if (printDistribution) {
            text.append(" inst#     actual  predicted error distribution");
        } else {
            text.append(" inst#     actual  predicted error prediction");
        }
    } else {
        text.append(" inst#     actual  predicted      error");
    }
    if (attributesToOutput != null) {
        attributesToOutput.setUpper(test.numAttributes() - 1);
        text.append(" (");
        boolean first = true;
        for (int i = 0; i < test.numAttributes(); i++) {
            if (i == test.classIndex()) {
                continue;
            }

            if (attributesToOutput.isInRange(i)) {
                if (!first) {
                    text.append(",");
                }
                text.append(test.attribute(i).name());
                first = false;
            }
        }
        text.append(")");
    }
    text.append("\n");
}

From source file:boostingPL.boosting.InstancesHelper.java

License:Open Source License

public static Instance createInstance(String text, Instances insts) {
    // numeric attributes
    String[] items = text.split(",");
    double[] ds = new double[items.length];
    for (int i = 0; i < ds.length - 1; i++) {
        ds[i] = Double.parseDouble(items[i]);
    }//from www. ja  v  a 2 s.c o m

    // nominal class attribute
    ds[items.length - 1] = insts.classAttribute().indexOfValue(items[items.length - 1]);

    Instance inst = new DenseInstance(1, ds);
    inst.setDataset(insts);
    return inst;
}