Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:DetectorCreation.DetectorCreator.java

private static void ClassifyInstances(WekaTrainedClassifier trainedClassifier, Instances testset) {
    String classification;//from   w w w  .  ja v a  2  s. co  m
    double classificationIndex;
    double[] classificationDist;
    Instance instance;
    for (int i = 0; i < testset.numInstances(); i++) {
        instance = testset.instance(i);
        //Print classification
        classification = trainedClassifier.GetClassification(instance);
        classificationIndex = trainedClassifier.GetClassificationIndex(instance);
        Console.PrintLine(String.format("Instance %s classification: %s", i + 1, classification));
        //Print classification distribution
        classificationDist = trainedClassifier.GetDistribution(instance);
        Console.PrintLine(String.format("Instance %s distribution: %s , %s", i + 1, classificationDist[0],
                classificationDist[1]));
        Console.PrintLine("");
    }
}

From source file:development.CrossValidateShapelets.java

public static void formCV() {
    //Delete any existing shapelet files for the incomplete
    for (int i = 0; i < incomplete.length; i++) {
        File f = new File("/gpfs/sys/ajb/TSC Problems/" + DataSets.fileNames[incomplete[i]] + "/ShapeletCV/");
        //Delete everything there             
        if (f.exists()) {
            try {
                delete(f);/* ww  w .jav  a  2 s .c o  m*/
            } catch (IOException e) {
                System.err.println(" Unable to delete directory ShapeletCV/  Continuing  ");
            }
        }
        //Recreate the directory             
        if (!f.exists()) {
            f.mkdir();
        }
    }
    for (int i = 0; i < missing.length; i++) {
        String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + DataSets.fileNames[missing[i]] + "/";
        String dropboxPath = "C:/Users/ajb/Dropbox/TSC Problems/" + DataSets.fileNames[missing[i]] + "/";
        //            String path=dropboxPath;
        String path = clusterPath;
        Instances train = ClassifierTools.loadData(path + DataSets.fileNames[missing[i]] + "_TRAIN");
        System.out.println("Processing : " + DataSets.fileNames[missing[i]]);
        NormalizeCase nc = new NormalizeCase();
        try {
            train = nc.process(train);
        } catch (Exception e) {
            System.out.println(" Unable to normalise for some unknown reason " + e + "  but continuing...");
        }
        //Randomize the data. Need to save the mapping somewhere.
        int[] positions = new int[train.numInstances()];
        train = randomise(train, positions);
        OutFile of = new OutFile(path + "ShapeletCV/InstancePositions.csv");
        for (int j = 0; j < positions.length; j++)
            of.writeLine(positions[j] + ",");
        of = new OutFile(path + "InstancePositions.csv");
        for (int j = 0; j < positions.length; j++)
            of.writeLine(positions[j] + ",");

        //Split into time domain folds
        int folds = 10;
        Instances[] trainFolds = new Instances[folds];
        Instances[] testFolds = new Instances[folds];
        splitTrainData(train, trainFolds, testFolds, folds);
        //Save folds to file
        for (int j = 1; j <= folds; j++) {
            OutFile of1 = new OutFile(path + DataSets.fileNames[missing[i]] + "_TRAIN" + (j) + ".arff");
            OutFile of2 = new OutFile(path + DataSets.fileNames[missing[i]] + "_TEST" + (j) + ".arff");
            of1.writeLine(trainFolds[j - 1].toString());
            of2.writeLine(testFolds[j - 1].toString());
        }

    }
}

From source file:development.CrossValidateShapelets.java

public static void splitTrainData(Instances train, Instances[] trainFolds, Instances[] testFolds, int folds) {
    int size = train.numInstances();
    int foldSize = size / folds;
    int[] foldCV = new int[folds];
    for (int i = 0; i < foldCV.length; i++)
        foldCV[i] = foldSize;/*from w  w  w  . j a  v a 2 s  .  c  o  m*/
    if (size % folds != 0) //Adjust the last fold size accordingly
        foldCV[folds - 1] = size - foldSize * (folds - 1);
    int diff = foldCV[folds - 1] - foldSize;
    int c = 0;
    while (diff > 0) { //Reassign elements to other folds

        foldCV[c % (folds - 1)]++;
        foldCV[folds - 1]--;
        diff = foldCV[folds - 1] - foldCV[c % (folds - 1)];
        c++;
    }
    Instances copy = new Instances(train);
    int start = 0;
    for (int i = 0; i < folds; i++) {
        trainFolds[i] = new Instances(copy, 0);
        testFolds[i] = new Instances(copy, 0);
        for (int j = 0; j < train.numInstances(); j++) {
            if (j < start || j >= start + foldCV[i])
                trainFolds[i].add(train.instance(j));
            else
                testFolds[i].add(train.instance(j));
        }
        start += foldCV[i];
    }
}

From source file:development.CrossValidateShapelets.java

public static void singleRunThreaded(String file) {
    //        String file ="ItalyPowerDemand";

    String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + file + "/";
    String desktopPath = "C:/Users/ajb/Dropbox/TSC Problems/" + file + "/";
    String path = desktopPath;/*  w ww  .  ja v  a2  s  .  com*/
    if (useCluster)
        path = clusterPath;

    String filePath = path + "ShapeletCV/";

    int count = 0;
    //Create directory if it isn't there already          
    File dir = new File(filePath);
    if (!dir.exists()) {
        dir.mkdir();
    } else { //Comment out to allow overwriting 
        boolean present = true;
        for (int i = 1; i <= 10 && present; i++) {
            File cv = new File(filePath + file + "_TRAIN" + i + ".arff");
            File cv2 = new File(filePath + file + "_TEST" + i + ".arff");
            if (cv.exists() && cv2.exists()) {
                //CV files already there
                count++;
            } else
                present = false;
        }
        if (count == 10)//Exit now
            return;
    }

    CrossValidateShapelets.fileName = file;
    Instances train = ClassifierTools.loadData(path + file + "_TRAIN");
    NormalizeCase nc = new NormalizeCase();
    try {
        train = nc.process(train);
    } catch (Exception e) {
        System.out.println(" Unable to normalise for some unknown reason " + e + "  but continuing...");
    }
    //Randomize the data. Need to save the mapping somewhere.
    int[] positions = new int[train.numInstances()];
    train = randomise(train, positions);
    OutFile of = new OutFile(filePath + "InstancePositions.csv");
    for (int i = 0; i < positions.length; i++)
        of.writeLine(positions[i] + ",");

    //Split data into folds

    int folds = 10;
    Instances[] trainFolds = new Instances[folds];
    Instances[] testFolds = new Instances[folds];
    splitTrainData(train, trainFolds, testFolds, folds);

    CrossValidateShapelets[] ct = new CrossValidateShapelets[folds];
    for (int i = 0; i < folds; i++) {
        ct[i] = new CrossValidateShapelets(trainFolds[i], testFolds[i], i, filePath);
    }
    for (int i = 0; i < folds; i++) { //Only start the threads where file is not their
        ct[i].start();
    }
    try {
        for (int i = 0; i < folds; i++)
            ct[i].join();
    } catch (InterruptedException ex) {
        Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:development.CrossValidateShapelets.java

public static int[][] classifyFold(String file, int fold) {

    String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + file + "/";
    String desktopPath = "C:/Users/ajb/Dropbox/TSC Problems/" + file + "/";
    String path = desktopPath;// w ww. j a  va  2 s. com
    if (useCluster)
        path = clusterPath;

    String filePath = path + "ShapeletCV/";

    //Check training and test files exist, terminate if not
    File tr = new File(filePath + file + "_TRAIN" + fold + ".arff");
    File ts = new File(filePath + file + "_TEST" + fold + ".arff");
    if (!tr.exists() || !ts.exists()) {
        System.err.println(" ERROR CLASSIFYING " + file + " fold " + fold + " file does not exist");
        return null;
    }
    //Check whether predictions exist, terminate if not.
    File r = new File(filePath + file + "Predictions" + fold + ".csv");
    if (r.exists()) {
        System.err.println(file + " fold " + fold + " Classificastion already done");
        return null;
    }

    Instances train = ClassifierTools.loadData(filePath + file + "_TRAIN" + fold);
    Instances test = ClassifierTools.loadData(filePath + file + "_TEST" + fold);
    ArrayList<String> names = new ArrayList<>();
    ArrayList<Classifier> c = setSingleClassifiers(names);
    HeterogeneousEnsemble hc = new HeterogeneousEnsemble(c);
    hc.useCVWeighting(true);

    int[][] preds = new int[2][test.numInstances()];
    try {
        hc.buildClassifier(train);
        for (int i = 0; i < test.numInstances(); i++) {
            preds[0][i] = (int) test.instance(i).classValue();
            preds[1][i] = (int) hc.classifyInstance(test.instance(i));
        }
    } catch (Exception ex) {
        Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex);
    }
    //Save results to the appropriate file
    double[] cvAccs = hc.getWeights();
    OutFile results = new OutFile(filePath + file + "Predictions" + fold + ".csv");
    for (int i = 0; i < cvAccs.length; i++)
        results.writeString(cvAccs[i] + ",");
    results.writeString("\n Actual,Predicted\n");
    int correct = 0;
    for (int i = 0; i < preds[0].length; i++) {
        results.writeString(preds[0][i] + "," + preds[1][i] + "\n");
        if (preds[0][i] == preds[1][i])
            correct++;
    }
    System.out.println(
            " Fold =" + fold + " correct =" + correct + " acc = " + ((double) correct) / preds[0].length);
    return preds;

}

From source file:development.CrossValidateShapelets.java

public static void doSingleTransform(int problemNum, int foldNum) {
    String fileName = DataSets.fileNames[problemNum];
    String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + fileName + "/";
    String path = clusterPath;/*w  ww . j av  a  2  s  .  c o  m*/
    String shapeletPath = path + "ShapeletCV/";
    File f1 = new File(shapeletPath + fileName + "_TRAIN" + (foldNum + 1) + ".arff");
    File f2 = new File(shapeletPath + fileName + "_TEST" + (foldNum + 1) + ".arff");
    if (f1.exists() && f2.exists()) {
        System.out.println(" Transform " + foldNum + " problem " + fileName + " already exists");
        return;
    }

    Instances train = ClassifierTools.loadData(clusterPath + fileName + "_TRAIN" + (foldNum + 1));
    Instances test = ClassifierTools.loadData(clusterPath + fileName + "_TEST" + (foldNum + 1));
    FullShapeletTransform st = new ShapeletTransformDistCaching();
    //        if(train.numInstances()>=500 || train.numAttributes()>500)
    //            st = new ShapeletTransform();
    st.supressOutput();
    st.setNumberOfShapelets(Math.max(train.numAttributes(), train.numInstances()));
    try {
        Instances sTrain = st.process(train);
        Instances sTest = st.process(test);
        OutFile of1 = new OutFile(shapeletPath + fileName + "_TRAIN" + (foldNum + 1) + ".arff");
        OutFile of2 = new OutFile(shapeletPath + fileName + "_TEST" + (foldNum + 1) + ".arff");
        of1.writeLine(sTrain.toString());
        of2.writeLine(sTest.toString());

    } catch (Exception ex) {
        Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:development.GoodHonoursPrediction.java

public static void main(String[] args) {
    Instances data = ClassifierTools.loadData("C:\\Admin\\Perfomance Analysis\\GoodHonsClassification");
    RandomForest rf = new RandomForest();
    double[][] a = ClassifierTools.crossValidationWithStats(rf, data, data.numInstances());
    System.out.println(" Random forest LOOCV accuracy =" + a[0][0]);
    J48 tree = new J48();
    a = ClassifierTools.crossValidationWithStats(tree, data, data.numInstances());
    System.out.println(" C4.5 LOOCV accuracy =" + a[0][0]);
    IBk knn = new IBk(11);
    knn.setCrossValidate(true);/*from  w  w w  .j  a va 2  s .  c  om*/
    a = ClassifierTools.crossValidationWithStats(knn, data, data.numInstances());
    System.out.println(" KNN LOOCV accuracy =" + a[0][0]);
    NaiveBayes nb = new NaiveBayes();
    a = ClassifierTools.crossValidationWithStats(nb, data, data.numInstances());
    System.out.println(" Naive Bayes LOOCV accuracy =" + a[0][0]);

    /*       try {
    tree.buildClassifier(data);
           System.out.println(" Tree ="+tree);
           Classifier cls = new J48();
            Evaluation eval = new Evaluation(data);
            Random rand = new Random(1);  // using seed = 1
            int folds = data.numInstances();
            eval.crossValidateModel(cls, data, folds, rand);
            System.out.println(eval.toSummaryString());        
                   
           tree.getTechnicalInformation();
           } catch (Exception ex) {
    Logger.getLogger(GoodHonoursPrediction.class.getName()).log(Level.SEVERE, null, ex);
           }
           */

}

From source file:dewaweebtreeclassifier.Sujeong.java

public double computeGain(Instances data, Attribute attr) {
    double informationGain = computeEntropy(data);
    Instances[] splitInstances = splitInstancesOnAttribute(data, attr);
    for (Instances instances : splitInstances) {
        informationGain -= ((double) instances.numInstances() / (double) data.numInstances())
                * computeEntropy(instances);
    }/*from w  w w. j a  v a2s.c  o m*/

    return informationGain;
}

From source file:dewaweebtreeclassifier.Sujeong.java

public double computeEntropy(Instances data) {
    double[] nClass = new double[data.numClasses()];
    Enumeration enumInstance = data.enumerateInstances();
    while (enumInstance.hasMoreElements()) {
        Instance instance = (Instance) enumInstance.nextElement();
        nClass[(int) instance.classValue()]++;
    }//from   w  w w  . jav a2  s  . c  o  m

    double entropy = 0.0;
    for (int i = 0; i < data.numClasses(); i++) {
        if (nClass[i] > 0) {
            double ratio = nClass[i] / data.numInstances();
            entropy -= (ratio * Utils.log2(ratio));
        }
    }

    return entropy;
}

From source file:dewaweebtreeclassifier.Sujeong.java

public void buildTree(Instances instances) throws java.lang.Exception {
    if (instances.numAttributes() < 1) {
        throw new Exception("Data instances need to have minimum of 1 attribute.");
    } else if (instances.numAttributes() == 1) {
        this.value = instances.meanOrMode(instances.classIndex());
    } else {//from   ww  w . j a v  a  2  s .  c  o  m
        Enumeration attrs = instances.enumerateAttributes();
        double informationGain = 0.0;
        while (attrs.hasMoreElements()) {
            Attribute attr = (Attribute) attrs.nextElement();
            double tmpGain = computeGain(instances, attr);
            if (tmpGain > informationGain) {
                bestAttr = attr;
                informationGain = tmpGain;
            }
        }
        if (bestAttr != null) {
            double mode = instances.meanOrMode(instances.classIndex());
            Instances[] chunks = splitInstancesOnAttribute(instances, bestAttr);
            children = new Sujeong[chunks.length];
            for (int i = 0; i < chunks.length; ++i) {
                Instances chunk = chunks[i];
                Sujeong child = new Sujeong();
                children[i] = child;
                if (chunk.numInstances() > 0)
                    child.buildTree(chunk);
                else
                    child.value = mode;
            }
        } else {
            this.value = instances.meanOrMode(instances.classIndex());
        }
    }
}