List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java
License:Open Source License
/** * EvaluateModelBatchWindow - Evaluate a multi-label data-stream model over windows. * @param h Multilabel Classifier/*from w w w . j av a2 s. co m*/ * @param D stream * @param numWindows number of windows * @param rLabeled labelled-ness (1.0 by default) * @param Top threshold option * @param Vop verbosity option * @return The Result on the final window (but it contains samples of all the other evaluated windows). * The window is sampled every N/numWindows instances, for a total of numWindows windows. */ public static Result evaluateModelBatchWindow(MultiLabelClassifier h, Instances D, int numWindows, double rLabeled, String Top, String Vop) throws Exception { if (h.getDebug()) System.out .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions())); int N = D.numInstances(); int L = D.classIndex(); // the Result to use Result result = null; // the samples of all windows ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>(); long train_time = 0; long test_time = 0; int windowSize = (int) Math.floor(D.numInstances() / (double) numWindows); if (rLabeled * windowSize < 1.) throw new Exception("[Error] The ratio of labelled instances (" + rLabeled + ") is too small given the window size!"); double nth = 1. / rLabeled; // label every nth example Instances D_init = new Instances(D, 0, windowSize); // initial window if (h.getDebug()) { System.out.println("Training classifier on initial window ..."); } train_time = System.currentTimeMillis(); h.buildClassifier(D_init); // initial classifier train_time = System.currentTimeMillis() - train_time; if (h.getDebug()) { System.out.println("Done (in " + (train_time / 1000.0) + " s)"); } D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window) double t[] = new double[L]; Arrays.fill(t, 0.5); int V = MLUtils.getIntegerOption(Vop, 3); if (h.getDebug()) { System.out.println("--------------------------------------------------------------------------------"); System.out.print("#" + Utils.padLeft("w", 6) + " " + Utils.padLeft("n", 6)); for (String m : measures) { System.out.print(" "); System.out.print(Utils.padLeft(m, 12)); } System.out.println(""); System.out.println("--------------------------------------------------------------------------------"); } int i = 0; for (int w = 0; w < numWindows - 1; w++) { // For each evaluation window ... result = new Result(L); result.setInfo("Supervision", String.valueOf(rLabeled)); result.setInfo("Type", "MLi"); int n = 0; test_time = 0; train_time = 0; for (int c = 0; i < (w * windowSize) + windowSize; i++) { // For each instance in the evaluation window ... Instance x = D.instance(i); AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy // (we can't clear the class values because certain classifiers need to know how well they're doing -- just trust that there's no cheating!) //for(int j = 0; j < L; j++) // x_.setValue(j,0.0); if (rLabeled < 0.5 && (i % (int) (1 / rLabeled) == 0) || (rLabeled >= 0.5 && (i % (int) (1. / (1. - rLabeled)) != 0))) { // LABELLED - Test & record prediction long before_test = System.currentTimeMillis(); double y[] = h.distributionForInstance(x_); long after_test = System.currentTimeMillis(); test_time += (after_test - before_test); // was += result.addResult(y, x); n++; } else { // UNLABELLED x = MLUtils.setLabelsMissing(x, L); } // Update the classifier. (The classifier will have to decide if it wants to deal with unlabelled instances.) long before = System.currentTimeMillis(); ((UpdateableClassifier) h).updateClassifier(x); long after = System.currentTimeMillis(); train_time += (after - before); // was += } // calculate results result.setInfo("Threshold", Arrays.toString(t)); result.output = Result.getStats(result, Vop); result.setMeasurement("Test time", (test_time) / 1000.0); result.setMeasurement("Build time", (train_time) / 1000.0); result.setMeasurement("Total time", (test_time + train_time) / 1000.0); result.setMeasurement("Threshold", (double) t[0]); result.setMeasurement("Instances", (double) i); result.setMeasurement("Samples", (double) (samples.size() + 1)); samples.add(result.output); // Display results (to CLI) if (h.getDebug()) { System.out.print("#" + Utils.doubleToString((double) w + 1, 6, 0) + " " + Utils.doubleToString((double) n, 6, 0)); n = 0; for (String m : measures) { System.out.print(" "); System.out.print(Utils.doubleToString((Double) result.getMeasurement(m), 12, 4)); } System.out.println(""); } // Calibrate threshold for next window if (Top.equals("PCutL")) { t = ThresholdUtils.calibrateThresholds(result.predictions, MLUtils.labelCardinalities(result.actuals)); } else { Arrays.fill(t, ThresholdUtils.calibrateThreshold(result.predictions, MLUtils.labelCardinality(result.allTrueValues()))); } } if (h.getDebug()) { System.out.println("--------------------------------------------------------------------------------"); } // This is the last Result; prepare it for evaluation output. result.setInfo("Classifier", h.getClass().getName()); result.vals.put("Test time", (test_time) / 1000.0); result.vals.put("Build time", (train_time) / 1000.0); result.vals.put("Total time", (test_time + train_time) / 1000.0); result.vals.put("Total instances tested", (double) i); result.vals.put("Initial instances for training", (double) windowSize); result.setInfo("Options", Arrays.toString(h.getOptions())); result.setInfo("Additional Info", h.toString()); result.setInfo("Dataset", MLUtils.getDatasetName(D)); result.output = Result.getStats(result, Vop); result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples)); return result; }
From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java
License:Open Source License
/** * Prequential Evaluation - Accuracy since the start of evaluation. * @param h Multilabel Classifier// www. j a v a2 s .com * @param D stream * @param windowSize sampling frequency (of evaluation statistics) * @param rLabeled labelled-ness (1.0 by default) * @param Top threshold option * @param Vop verbosity option * The window is sampled every N/numWindows instances, for a total of numWindows windows. */ public static Result evaluateModelPrequentialBasic(MultiLabelClassifier h, Instances D, int windowSize, double rLabeled, String Top, String Vop) throws Exception { if (h.getDebug()) System.out .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions())); int L = D.classIndex(); Result result = new Result(); long train_time = 0; long test_time = 0; double nth = 1. / rLabeled; // label every nth example result.setInfo("Supervision", String.valueOf(rLabeled)); Instances D_init = new Instances(D, 0, windowSize); // initial window if (h.getDebug()) { System.out.println("Training classifier on initial window (of size " + windowSize + ") ..."); } train_time = System.currentTimeMillis(); h.buildClassifier(D_init); // initial classifir train_time = System.currentTimeMillis() - train_time; D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window) if (h.getDebug()) { System.out.println( "Proceeding to Test/Label/Update cycle on remaining (" + D.numInstances() + ") instances ..."); } result.setInfo("Classifier", h.getClass().getName()); result.setInfo("Options", Arrays.toString(h.getOptions())); result.setInfo("Additional Info", h.toString()); result.setInfo("Dataset", MLUtils.getDatasetName(D)); result.setInfo("Verbosity", Vop); if (h instanceof MultiTargetClassifier || Evaluation.isMT(D)) { result.setInfo("Type", "MT"); } else { result.setInfo("Type", "ML"); double t = 0.5; try { t = Double.parseDouble(Top); } catch (Exception e) { System.err.println( "[WARNING] Only a single threshold can be chosen for this kind of evaluation; Using " + t); } result.setInfo("Threshold", String.valueOf(t)); } ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy /* * TEST */ long before_test = System.currentTimeMillis(); double y[] = h.distributionForInstance(x_); long after_test = System.currentTimeMillis(); test_time += (after_test - before_test); result.addResult(y, x); /* * LABEL BECOMES AVAILABLE ? */ if (rLabeled >= 0.5) { x = MLUtils.setLabelsMissing(x, L); } /* * UPDATE * (The classifier will have to decide if it wants to deal with unlabelled instances.) */ long before = System.currentTimeMillis(); ((UpdateableClassifier) h).updateClassifier(x); long after = System.currentTimeMillis(); train_time += (after - before); /* * RECORD MEASUREMENT */ if (i % windowSize == (windowSize - 1)) { HashMap<String, Object> eval_sample = Result.getStats(result, Vop); eval_sample.put("Test time", (test_time) / 1000.0); eval_sample.put("Build time", (train_time) / 1000.0); eval_sample.put("Total time", (test_time + train_time) / 1000.0); eval_sample.put("Instances", (double) i); eval_sample.put("Samples", (double) (samples.size() + 1)); samples.add(eval_sample); System.out.println("Sample (#" + samples.size() + ") of performance at " + i + "/" + D.numInstances() + " instances."); } } result.output = Result.getStats(result, Vop); result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples)); result.vals.put("Test time", (test_time) / 1000.0); result.vals.put("Build time", (train_time) / 1000.0); result.vals.put("Total time", (test_time + train_time) / 1000.0); return result; }
From source file:meka.classifiers.multilabel.MajorityLabelset.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);/*from w w w . ja va 2 s . c om*/ int L = D.classIndex(); this.prediction = new double[L]; for (int i = 0; i < D.numInstances(); i++) { updateCount(D.instance(i), L); } }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instance transformInstance(Instance x) throws Exception { Instances tmpInst = new Instances(x.dataset()); tmpInst.delete();//ww w . j a v a 2s .co m tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedTemplateInst); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for (int i = 0; i < pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(pseudoLabels.numAttributes()); return newDataSet.instance(0); }
From source file:meka.classifiers.multilabel.meta.BaggingML.java
License:Open Source License
@Override public void buildClassifier(Instances train) throws Exception { testCapabilities(train);//from w w w. ja va 2s . co m if (getDebug()) System.out.print("-: Models: "); train = new Instances(train); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); for (int i = 0; i < m_NumIterations; i++) { Random r = new Random(m_Seed + i); Instances bag = new Instances(train, 0); if (m_Classifiers[i] instanceof Randomizable) ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i); if (getDebug()) System.out.print("" + i + " "); int ixs[] = new int[train.numInstances()]; for (int j = 0; j < ixs.length; j++) { ixs[r.nextInt(ixs.length)]++; } for (int j = 0; j < ixs.length; j++) { if (ixs[j] > 0) { Instance instance = train.instance(j); instance.setWeight(ixs[j]); bag.add(instance); } } m_Classifiers[i].buildClassifier(bag); } if (getDebug()) System.out.println(":-"); }
From source file:meka.classifiers.multilabel.meta.BaggingMLdup.java
License:Open Source License
@Override public void buildClassifier(Instances train) throws Exception { testCapabilities(train);/* www . jav a 2 s .co m*/ if (getDebug()) System.out.print("-: Models: "); //m_Classifiers = (MultilabelClassifier[]) AbstractClassifier.makeCopies(m_Classifier, m_NumIterations); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); for (int i = 0; i < m_NumIterations; i++) { Random r = new Random(m_Seed + i); Instances bag = new Instances(train, 0); if (m_Classifiers[i] instanceof Randomizable) ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i); if (getDebug()) System.out.print("" + i + " "); int bag_no = (m_BagSizePercent * train.numInstances() / 100); //System.out.println(" bag no: "+bag_no); while (bag.numInstances() < bag_no) { bag.add(train.instance(r.nextInt(train.numInstances()))); } m_Classifiers[i].buildClassifier(bag); } if (getDebug()) System.out.println(":-"); }
From source file:meka.classifiers.multilabel.meta.MBR.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { testCapabilities(data);/* w w w .j av a2s .com*/ int c = data.classIndex(); // Base BR if (getDebug()) System.out.println("Build BR Base (" + c + " models)"); m_BASE = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(), ((AbstractClassifier) getClassifier()).getOptions()); m_BASE.buildClassifier(data); // Meta BR if (getDebug()) System.out.println("Prepare Meta data "); Instances meta_data = new Instances(data); FastVector BinaryClass = new FastVector(c); BinaryClass.addElement("0"); BinaryClass.addElement("1"); for (int i = 0; i < c; i++) { meta_data.insertAttributeAt(new Attribute("metaclass" + i, BinaryClass), c); } for (int i = 0; i < data.numInstances(); i++) { double cfn[] = m_BASE.distributionForInstance(data.instance(i)); for (int a = 0; a < cfn.length; a++) { meta_data.instance(i).setValue(a + c, cfn[a]); } } meta_data.setClassIndex(c); m_InstancesTemplate = new Instances(meta_data, 0); if (getDebug()) System.out.println("Build BR Meta (" + c + " models)"); m_META = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(), ((AbstractClassifier) getClassifier()).getOptions()); m_META.buildClassifier(meta_data); }
From source file:meka.classifiers.multilabel.meta.RandomSubspaceML.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);/* ww w . j av a2 s. c o m*/ m_InstancesTemplates = new Instances[m_NumIterations]; m_InstanceTemplates = new Instance[m_NumIterations]; if (getDebug()) System.out.println("-: Models: "); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); Random r = new Random(m_Seed); int N_sub = (D.numInstances() * m_BagSizePercent / 100); int L = D.classIndex(); int d = D.numAttributes() - L; int d_new = d * m_AttSizePercent / 100; m_IndicesCut = new int[m_NumIterations][]; for (int i = 0; i < m_NumIterations; i++) { // Downsize the instance space (exactly like in EnsembleML.java) if (getDebug()) System.out.print("\t" + (i + 1) + ": "); D.randomize(r); Instances D_cut = new Instances(D, 0, N_sub); if (getDebug()) System.out.print("N=" + D.numInstances() + " -> N'=" + D_cut.numInstances() + ", "); // Downsize attribute space D_cut.setClassIndex(-1); int indices_a[] = A.make_sequence(L, d + L); A.shuffle(indices_a, r); indices_a = Arrays.copyOfRange(indices_a, 0, d - d_new); Arrays.sort(indices_a); m_IndicesCut[i] = A.invert(indices_a, D.numAttributes()); D_cut = F.remove(D_cut, indices_a, false); D_cut.setClassIndex(L); if (getDebug()) System.out.print(" A:=" + (D.numAttributes() - L) + " -> A'=" + (D_cut.numAttributes() - L) + " (" + m_IndicesCut[i][L] + ",...," + m_IndicesCut[i][m_IndicesCut[i].length - 1] + ")"); // Train multi-label classifier if (m_Classifiers[i] instanceof Randomizable) ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i); if (getDebug()) System.out.println("."); m_Classifiers[i].buildClassifier(D_cut); m_InstanceTemplates[i] = D_cut.instance(1); m_InstancesTemplates[i] = new Instances(D_cut, 0); } if (getDebug()) System.out.println(":-"); }
From source file:meka.classifiers.multilabel.meta.SubsetMapper.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);/*from w w w . j a v a2s . co m*/ for (int i = 0; i < D.numInstances(); i++) { m_Count.put(MLUtils.toBitString(D.instance(i), D.classIndex()), 0); } m_Classifier.buildClassifier(D); }
From source file:meka.classifiers.multilabel.MLCBMaD.java
License:Open Source License
@Override public Instance transformInstance(Instance x) throws Exception { Instances tmpInst = new Instances(x.dataset()); tmpInst.delete();/*w w w . jav a 2 s. c o m*/ tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedMatrix); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for (int i = 0; i < pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(this.size); return newDataSet.instance(0); }