List of usage examples for weka.core Instances remove
@Override public Instance remove(int index)
From source file:machinelearning_cw.MachineLearning_CW.java
/** * //from w w w . ja v a 2s . c om * Tests the accuracy of a classifier against a collection of datasets * by Resampling. * * @param classifier The classifier to be tested * @param trainingDatasets A collection of Instances objects containing * the training data for different datasets. * @param testDatasets A collection of Instances objects containing * the test data for different datasets. * @param t The number of times the data should be sampled * @throws Exception */ public static void performClassifierAccuracyTests(Classifier classifier, ArrayList<Instances> trainingDatasets, ArrayList<Instances> testDatasets, int t) throws Exception { ArrayList<Double> accuracies = new ArrayList<Double>(); Random randomGenerator = new Random(); for (int i = 0; i < trainingDatasets.size(); i++) { Instances train = trainingDatasets.get(i); Instances test = testDatasets.get(i); /* Test by Resampling. First, merge train and test data */ for (int j = 0; j < t; j++) { Instances mergedDataSet = mergeDataSets(train, test); train.clear(); test.clear(); /* Randomly sample n instances from the merged dataset * (without replacement) to form the train set */ int n = mergedDataSet.size() / 2; for (int k = 0; k < n; k++) { int indexToRemove = randomGenerator.nextInt(mergedDataSet.size()); train.add(mergedDataSet.remove(indexToRemove)); } /* Reserve remainingdata as test data */ for (int k = 0; k < mergedDataSet.size(); k++) { test.add(mergedDataSet.remove(k)); } /* Train classifier. Recalculates k */ classifier.buildClassifier(train); /* Measure and record the accuracy of the classifier on * the test set */ double accuracy = Helpers.findClassifierAccuracy(classifier, test); accuracies.add(accuracy); } double accuracyAverage = average(accuracies); System.out.println(accuracyAverage); } }
From source file:meka.classifiers.multilabel.cc.CNode.java
License:Open Source License
/** * Main - run some tests./*from www . ja v a 2 s . c o m*/ */ public static void main(String args[]) throws Exception { Instances D = new Instances(new FileReader(args[0])); Instance x = D.lastInstance(); D.remove(D.numInstances() - 1); int L = Integer.parseInt(args[1]); D.setClassIndex(L); double y[] = new double[L]; Random r = new Random(); int s[] = new int[] { 1, 0, 2 }; int PA_J[][] = new int[][] { {}, {}, { 0, 1 }, }; //MLUtils.randomize(s,r); // MUST GO IN TREE ORDER !! for (int j : s) { int pa_j[] = PA_J[j]; System.out.println("PARENTS = " + Arrays.toString(pa_j)); //MLUtils.randomize(pa_j,r); System.out.println("**** TRAINING ***"); CNode n = new CNode(j, null, pa_j); n.build(D, new SMO()); /* */ //Instances D_ = n.transform(D); //n.T = D_; System.out.println("============== D_" + j + " / class = " + n.T.classIndex() + " ="); System.out.println("" + n.T); System.out.println("**** TESTING ****"); /* Instance x_ = MLUtils.setTemplate(x,(Instance)D_.firstInstance().copy(),D_); for(int pa : pa_j) { //System.out.println(""+map[pa]); x_.setValue(n.map[pa],y[pa]); } //x_.setDataset(T); x_.setClassMissing(); */ //n.T = D_; Instance x_ = n.transform(x, y); System.out.println("" + x_); y[j] = 1; } }
From source file:net.sf.jclal.sampling.supervised.Resample.java
License:Open Source License
/** * creates the subsample with replacement * * @param dataSet The dataset to extract a percent of instances * @param sampleSize the size to generate * @param actualClasses The actual classes * @param classIndices The indexes of the classes *//*from ww w . ja va 2 s. c om*/ public void createSubsampleWithReplacement(WekaDataset dataSet, int sampleSize, int actualClasses, int[] classIndices) { int originalSize = dataSet.getNumInstances(); Set<Integer> indexes = new HashSet<Integer>(); Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize); for (int i = 0; i < sampleSize; i++) { int index = 0; if (getRandgen().uniform(0, 1) < biasToUniformClass) { // Pick a random class (of those classes that actually appear) int cIndex = getRandgen().choose(0, actualClasses); for (int j = 0, k = 0; j < classIndices.length - 1; j++) { if ((classIndices[j] != classIndices[j + 1]) && (k++ >= cIndex)) { // Pick a random instance of the designated class index = classIndices[j] + getRandgen().choose(0, classIndices[j + 1] - classIndices[j]); break; } } } else { index = getRandgen().choose(0, originalSize); } labeledInstances.add((Instance) dataSet.instance(index).copy()); indexes.add(index); } setLabeledData(new WekaDataset(labeledInstances)); ArrayList<Container> indexesArray = new ArrayList<Container>(); for (Integer i : indexes) { indexesArray.add(new Container(i, i)); } //The array is ordered in descendent order OrderUtils.mergeSort(indexesArray, true); //Copy the entire dataset into unlabeled set Instances unlabeledInstances = new Instances(dataSet.getDataset()); //remove the instances that have been selected previously for (Container pair : indexesArray) { unlabeledInstances.remove(Integer.parseInt(pair.getValue().toString())); } setUnlabeledData(new WekaDataset(unlabeledInstances)); //clean up labeledInstances.clear(); unlabeledInstances.clear(); indexes.clear(); indexesArray.clear(); labeledInstances = null; unlabeledInstances = null; indexes = null; indexesArray = null; }
From source file:net.sf.jclal.sampling.unsupervised.Resample.java
License:Open Source License
/** * creates the subsample with replacement * * @param dataSet The dataset to extract a percent of instances * @param sampleSize the size to generate *///w ww . j a v a 2s.c o m public void createSubsampleWithReplacement(IDataset dataSet, int sampleSize) { int origSize = dataSet.getNumInstances(); Set<Integer> indexes = new HashSet<Integer>(); Instances labeledInstances = new Instances(dataSet.getDataset(), sampleSize); //Fill the labeled set for (int i = 0; i < sampleSize; i++) { int index = getRandgen().choose(0, origSize); labeledInstances.add((Instance) dataSet.instance(index).copy()); indexes.add(index); } if (dataSet instanceof WekaDataset) { setLabeledData(new WekaDataset(labeledInstances)); } if (dataSet instanceof MulanDataset) { setLabeledData(new MulanDataset(labeledInstances, ((MulanDataset) dataSet).getLabelsMetaData())); } ArrayList<Container> indexesArray = new ArrayList<Container>(); for (Integer i : indexes) { indexesArray.add(new Container(i, i)); } //The array is ordered in descendent order OrderUtils.mergeSort(indexesArray, true); //Copy the entire dataset into unlabeled set Instances unlabeledInstances = new Instances(dataSet.getDataset()); //remove the instances that have been selected previously for (Container pair : indexesArray) { unlabeledInstances.remove(Integer.parseInt(pair.getValue().toString())); } if (dataSet instanceof WekaDataset) { setUnlabeledData(new WekaDataset(unlabeledInstances)); } if (dataSet instanceof MulanDataset) { setUnlabeledData(new MulanDataset(unlabeledInstances, ((MulanDataset) dataSet).getLabelsMetaData())); } // clean up unlabeledInstances.clear(); labeledInstances.clear(); unlabeledInstances = null; labeledInstances = null; indexes.clear(); indexesArray.clear(); indexes = null; indexesArray = null; }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public Instances clusteredInstances(Instances data) { if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); }// w w w. ja va 2 s . c o m Instances sampled_data = data; for (int i = 0; i < sampled_data.numInstances(); i++) { sampled_data.remove(i); } SimpleKMeans sKmeans = new SimpleKMeans(); data.setClassIndex(data.numAttributes() - 1); Remove filter = new Remove(); filter.setAttributeIndices("" + (data.classIndex() + 1)); List assignments = new ArrayList(); try { filter.setInputFormat(data); Instances dataClusterer = Filter.useFilter(data, filter); String[] options = new String[3]; options[0] = "-I"; // max. iterations options[1] = "500"; options[2] = "-O"; sKmeans.setNumClusters(data.numClasses()); sKmeans.setOptions(options); sKmeans.buildClusterer(dataClusterer); System.out.println("Kmeans\n:" + sKmeans); System.out.println(Arrays.toString(sKmeans.getAssignments())); assignments = Arrays.asList(sKmeans.getAssignments()); } catch (Exception e) { e.printStackTrace(); } System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(sKmeans); try { eval.evaluateClusterer(data); } catch (Exception e) { e.printStackTrace(); } int classesToClustersMap[] = eval.getClassesToClusters(); for (int i = 0; i < classesToClustersMap.length; i++) { if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) { ((Instances) sampled_data).add(data.get(i)); } } return ((Instances) sampled_data); }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public static Instances clusterInstances(Instances data) { XMeans xmeans = new XMeans(); Remove filter = new Remove(); Instances dataClusterer = null;/*from w w w .j av a 2s. com*/ if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); } //Get the attributes from the data for creating the sampled_data object ArrayList<Attribute> attrList = new ArrayList<Attribute>(); Enumeration attributes = data.enumerateAttributes(); while (attributes.hasMoreElements()) { attrList.add((Attribute) attributes.nextElement()); } Instances sampled_data = new Instances(data.relationName(), attrList, 0); data.setClassIndex(data.numAttributes() - 1); sampled_data.setClassIndex(data.numAttributes() - 1); filter.setAttributeIndices("" + (data.classIndex() + 1)); data.remove(0);//In Wavelet Stream of MOA always the first element comes without class try { filter.setInputFormat(data); dataClusterer = Filter.useFilter(data, filter); String[] options = new String[4]; options[0] = "-L"; // max. iterations options[1] = Integer.toString(noOfClassesInPool - 1); if (noOfClassesInPool > 2) { options[1] = Integer.toString(noOfClassesInPool - 1); xmeans.setMinNumClusters(noOfClassesInPool - 1); } else { options[1] = Integer.toString(noOfClassesInPool); xmeans.setMinNumClusters(noOfClassesInPool); } xmeans.setMaxNumClusters(data.numClasses() + 1); System.out.println("No of classes in the pool: " + noOfClassesInPool); xmeans.setUseKDTree(true); //xmeans.setOptions(options); xmeans.buildClusterer(dataClusterer); System.out.println("Xmeans\n:" + xmeans); } catch (Exception e) { e.printStackTrace(); } //System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(xmeans); try { eval.evaluateClusterer(data); int classesToClustersMap[] = eval.getClassesToClusters(); //check the classes to cluster map int clusterNo = 0; for (int i = 0; i < data.size(); i++) { clusterNo = xmeans.clusterInstance(dataClusterer.get(i)); //Check if the class value of instance and class value of cluster matches if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) { sampled_data.add(data.get(i)); } } } catch (Exception e) { e.printStackTrace(); } return ((Instances) sampled_data); }