List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:DetectorCreation.DetectorCreator.java
private static void ClassifyInstances(WekaTrainedClassifier trainedClassifier, Instances testset) { String classification;//from w w w . ja v a 2 s. co m double classificationIndex; double[] classificationDist; Instance instance; for (int i = 0; i < testset.numInstances(); i++) { instance = testset.instance(i); //Print classification classification = trainedClassifier.GetClassification(instance); classificationIndex = trainedClassifier.GetClassificationIndex(instance); Console.PrintLine(String.format("Instance %s classification: %s", i + 1, classification)); //Print classification distribution classificationDist = trainedClassifier.GetDistribution(instance); Console.PrintLine(String.format("Instance %s distribution: %s , %s", i + 1, classificationDist[0], classificationDist[1])); Console.PrintLine(""); } }
From source file:development.CrossValidateShapelets.java
public static void formCV() { //Delete any existing shapelet files for the incomplete for (int i = 0; i < incomplete.length; i++) { File f = new File("/gpfs/sys/ajb/TSC Problems/" + DataSets.fileNames[incomplete[i]] + "/ShapeletCV/"); //Delete everything there if (f.exists()) { try { delete(f);/* ww w .jav a 2 s .c o m*/ } catch (IOException e) { System.err.println(" Unable to delete directory ShapeletCV/ Continuing "); } } //Recreate the directory if (!f.exists()) { f.mkdir(); } } for (int i = 0; i < missing.length; i++) { String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + DataSets.fileNames[missing[i]] + "/"; String dropboxPath = "C:/Users/ajb/Dropbox/TSC Problems/" + DataSets.fileNames[missing[i]] + "/"; // String path=dropboxPath; String path = clusterPath; Instances train = ClassifierTools.loadData(path + DataSets.fileNames[missing[i]] + "_TRAIN"); System.out.println("Processing : " + DataSets.fileNames[missing[i]]); NormalizeCase nc = new NormalizeCase(); try { train = nc.process(train); } catch (Exception e) { System.out.println(" Unable to normalise for some unknown reason " + e + " but continuing..."); } //Randomize the data. Need to save the mapping somewhere. int[] positions = new int[train.numInstances()]; train = randomise(train, positions); OutFile of = new OutFile(path + "ShapeletCV/InstancePositions.csv"); for (int j = 0; j < positions.length; j++) of.writeLine(positions[j] + ","); of = new OutFile(path + "InstancePositions.csv"); for (int j = 0; j < positions.length; j++) of.writeLine(positions[j] + ","); //Split into time domain folds int folds = 10; Instances[] trainFolds = new Instances[folds]; Instances[] testFolds = new Instances[folds]; splitTrainData(train, trainFolds, testFolds, folds); //Save folds to file for (int j = 1; j <= folds; j++) { OutFile of1 = new OutFile(path + DataSets.fileNames[missing[i]] + "_TRAIN" + (j) + ".arff"); OutFile of2 = new OutFile(path + DataSets.fileNames[missing[i]] + "_TEST" + (j) + ".arff"); of1.writeLine(trainFolds[j - 1].toString()); of2.writeLine(testFolds[j - 1].toString()); } } }
From source file:development.CrossValidateShapelets.java
public static void splitTrainData(Instances train, Instances[] trainFolds, Instances[] testFolds, int folds) { int size = train.numInstances(); int foldSize = size / folds; int[] foldCV = new int[folds]; for (int i = 0; i < foldCV.length; i++) foldCV[i] = foldSize;/*from w w w . j a v a 2 s . c o m*/ if (size % folds != 0) //Adjust the last fold size accordingly foldCV[folds - 1] = size - foldSize * (folds - 1); int diff = foldCV[folds - 1] - foldSize; int c = 0; while (diff > 0) { //Reassign elements to other folds foldCV[c % (folds - 1)]++; foldCV[folds - 1]--; diff = foldCV[folds - 1] - foldCV[c % (folds - 1)]; c++; } Instances copy = new Instances(train); int start = 0; for (int i = 0; i < folds; i++) { trainFolds[i] = new Instances(copy, 0); testFolds[i] = new Instances(copy, 0); for (int j = 0; j < train.numInstances(); j++) { if (j < start || j >= start + foldCV[i]) trainFolds[i].add(train.instance(j)); else testFolds[i].add(train.instance(j)); } start += foldCV[i]; } }
From source file:development.CrossValidateShapelets.java
public static void singleRunThreaded(String file) { // String file ="ItalyPowerDemand"; String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + file + "/"; String desktopPath = "C:/Users/ajb/Dropbox/TSC Problems/" + file + "/"; String path = desktopPath;/* w ww . ja v a2 s . com*/ if (useCluster) path = clusterPath; String filePath = path + "ShapeletCV/"; int count = 0; //Create directory if it isn't there already File dir = new File(filePath); if (!dir.exists()) { dir.mkdir(); } else { //Comment out to allow overwriting boolean present = true; for (int i = 1; i <= 10 && present; i++) { File cv = new File(filePath + file + "_TRAIN" + i + ".arff"); File cv2 = new File(filePath + file + "_TEST" + i + ".arff"); if (cv.exists() && cv2.exists()) { //CV files already there count++; } else present = false; } if (count == 10)//Exit now return; } CrossValidateShapelets.fileName = file; Instances train = ClassifierTools.loadData(path + file + "_TRAIN"); NormalizeCase nc = new NormalizeCase(); try { train = nc.process(train); } catch (Exception e) { System.out.println(" Unable to normalise for some unknown reason " + e + " but continuing..."); } //Randomize the data. Need to save the mapping somewhere. int[] positions = new int[train.numInstances()]; train = randomise(train, positions); OutFile of = new OutFile(filePath + "InstancePositions.csv"); for (int i = 0; i < positions.length; i++) of.writeLine(positions[i] + ","); //Split data into folds int folds = 10; Instances[] trainFolds = new Instances[folds]; Instances[] testFolds = new Instances[folds]; splitTrainData(train, trainFolds, testFolds, folds); CrossValidateShapelets[] ct = new CrossValidateShapelets[folds]; for (int i = 0; i < folds; i++) { ct[i] = new CrossValidateShapelets(trainFolds[i], testFolds[i], i, filePath); } for (int i = 0; i < folds; i++) { //Only start the threads where file is not their ct[i].start(); } try { for (int i = 0; i < folds; i++) ct[i].join(); } catch (InterruptedException ex) { Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:development.CrossValidateShapelets.java
public static int[][] classifyFold(String file, int fold) { String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + file + "/"; String desktopPath = "C:/Users/ajb/Dropbox/TSC Problems/" + file + "/"; String path = desktopPath;// w ww. j a va 2 s. com if (useCluster) path = clusterPath; String filePath = path + "ShapeletCV/"; //Check training and test files exist, terminate if not File tr = new File(filePath + file + "_TRAIN" + fold + ".arff"); File ts = new File(filePath + file + "_TEST" + fold + ".arff"); if (!tr.exists() || !ts.exists()) { System.err.println(" ERROR CLASSIFYING " + file + " fold " + fold + " file does not exist"); return null; } //Check whether predictions exist, terminate if not. File r = new File(filePath + file + "Predictions" + fold + ".csv"); if (r.exists()) { System.err.println(file + " fold " + fold + " Classificastion already done"); return null; } Instances train = ClassifierTools.loadData(filePath + file + "_TRAIN" + fold); Instances test = ClassifierTools.loadData(filePath + file + "_TEST" + fold); ArrayList<String> names = new ArrayList<>(); ArrayList<Classifier> c = setSingleClassifiers(names); HeterogeneousEnsemble hc = new HeterogeneousEnsemble(c); hc.useCVWeighting(true); int[][] preds = new int[2][test.numInstances()]; try { hc.buildClassifier(train); for (int i = 0; i < test.numInstances(); i++) { preds[0][i] = (int) test.instance(i).classValue(); preds[1][i] = (int) hc.classifyInstance(test.instance(i)); } } catch (Exception ex) { Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex); } //Save results to the appropriate file double[] cvAccs = hc.getWeights(); OutFile results = new OutFile(filePath + file + "Predictions" + fold + ".csv"); for (int i = 0; i < cvAccs.length; i++) results.writeString(cvAccs[i] + ","); results.writeString("\n Actual,Predicted\n"); int correct = 0; for (int i = 0; i < preds[0].length; i++) { results.writeString(preds[0][i] + "," + preds[1][i] + "\n"); if (preds[0][i] == preds[1][i]) correct++; } System.out.println( " Fold =" + fold + " correct =" + correct + " acc = " + ((double) correct) / preds[0].length); return preds; }
From source file:development.CrossValidateShapelets.java
public static void doSingleTransform(int problemNum, int foldNum) { String fileName = DataSets.fileNames[problemNum]; String clusterPath = "/gpfs/sys/ajb/TSC Problems/" + fileName + "/"; String path = clusterPath;/*w ww . j av a 2 s . c o m*/ String shapeletPath = path + "ShapeletCV/"; File f1 = new File(shapeletPath + fileName + "_TRAIN" + (foldNum + 1) + ".arff"); File f2 = new File(shapeletPath + fileName + "_TEST" + (foldNum + 1) + ".arff"); if (f1.exists() && f2.exists()) { System.out.println(" Transform " + foldNum + " problem " + fileName + " already exists"); return; } Instances train = ClassifierTools.loadData(clusterPath + fileName + "_TRAIN" + (foldNum + 1)); Instances test = ClassifierTools.loadData(clusterPath + fileName + "_TEST" + (foldNum + 1)); FullShapeletTransform st = new ShapeletTransformDistCaching(); // if(train.numInstances()>=500 || train.numAttributes()>500) // st = new ShapeletTransform(); st.supressOutput(); st.setNumberOfShapelets(Math.max(train.numAttributes(), train.numInstances())); try { Instances sTrain = st.process(train); Instances sTest = st.process(test); OutFile of1 = new OutFile(shapeletPath + fileName + "_TRAIN" + (foldNum + 1) + ".arff"); OutFile of2 = new OutFile(shapeletPath + fileName + "_TEST" + (foldNum + 1) + ".arff"); of1.writeLine(sTrain.toString()); of2.writeLine(sTest.toString()); } catch (Exception ex) { Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:development.GoodHonoursPrediction.java
public static void main(String[] args) { Instances data = ClassifierTools.loadData("C:\\Admin\\Perfomance Analysis\\GoodHonsClassification"); RandomForest rf = new RandomForest(); double[][] a = ClassifierTools.crossValidationWithStats(rf, data, data.numInstances()); System.out.println(" Random forest LOOCV accuracy =" + a[0][0]); J48 tree = new J48(); a = ClassifierTools.crossValidationWithStats(tree, data, data.numInstances()); System.out.println(" C4.5 LOOCV accuracy =" + a[0][0]); IBk knn = new IBk(11); knn.setCrossValidate(true);/*from w w w .j a va 2 s . c om*/ a = ClassifierTools.crossValidationWithStats(knn, data, data.numInstances()); System.out.println(" KNN LOOCV accuracy =" + a[0][0]); NaiveBayes nb = new NaiveBayes(); a = ClassifierTools.crossValidationWithStats(nb, data, data.numInstances()); System.out.println(" Naive Bayes LOOCV accuracy =" + a[0][0]); /* try { tree.buildClassifier(data); System.out.println(" Tree ="+tree); Classifier cls = new J48(); Evaluation eval = new Evaluation(data); Random rand = new Random(1); // using seed = 1 int folds = data.numInstances(); eval.crossValidateModel(cls, data, folds, rand); System.out.println(eval.toSummaryString()); tree.getTechnicalInformation(); } catch (Exception ex) { Logger.getLogger(GoodHonoursPrediction.class.getName()).log(Level.SEVERE, null, ex); } */ }
From source file:dewaweebtreeclassifier.Sujeong.java
public double computeGain(Instances data, Attribute attr) { double informationGain = computeEntropy(data); Instances[] splitInstances = splitInstancesOnAttribute(data, attr); for (Instances instances : splitInstances) { informationGain -= ((double) instances.numInstances() / (double) data.numInstances()) * computeEntropy(instances); }/*from w w w. j a v a2s.c o m*/ return informationGain; }
From source file:dewaweebtreeclassifier.Sujeong.java
public double computeEntropy(Instances data) { double[] nClass = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); nClass[(int) instance.classValue()]++; }//from w w w . jav a2 s . c o m double entropy = 0.0; for (int i = 0; i < data.numClasses(); i++) { if (nClass[i] > 0) { double ratio = nClass[i] / data.numInstances(); entropy -= (ratio * Utils.log2(ratio)); } } return entropy; }
From source file:dewaweebtreeclassifier.Sujeong.java
public void buildTree(Instances instances) throws java.lang.Exception { if (instances.numAttributes() < 1) { throw new Exception("Data instances need to have minimum of 1 attribute."); } else if (instances.numAttributes() == 1) { this.value = instances.meanOrMode(instances.classIndex()); } else {//from ww w . j a v a 2 s . c o m Enumeration attrs = instances.enumerateAttributes(); double informationGain = 0.0; while (attrs.hasMoreElements()) { Attribute attr = (Attribute) attrs.nextElement(); double tmpGain = computeGain(instances, attr); if (tmpGain > informationGain) { bestAttr = attr; informationGain = tmpGain; } } if (bestAttr != null) { double mode = instances.meanOrMode(instances.classIndex()); Instances[] chunks = splitInstancesOnAttribute(instances, bestAttr); children = new Sujeong[chunks.length]; for (int i = 0; i < chunks.length; ++i) { Instances chunk = chunks[i]; Sujeong child = new Sujeong(); children[i] = child; if (chunk.numInstances() > 0) child.buildTree(chunk); else child.value = mode; } } else { this.value = instances.meanOrMode(instances.classIndex()); } } }