List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Return a set of all the combinations of attributes at 'indices' in 'D', pruned by 'p'; AND THEIR COUNTS, e.g., {(00:3),(01:8),(11:3))}. *///from ww w . j a v a 2s . c o m public static HashMap<String, Integer> getCounts(Instances D, int indices[], int p) { HashMap<String, Integer> count = new HashMap<String, Integer>(); for (int i = 0; i < D.numInstances(); i++) { String v = encodeValue(D.instance(i), indices); count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1); } MLUtils.pruneCountHashMap(count, p); return count; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes. * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'. * @see PSUtils.PSTransformation/*from ww w. ja v a 2s .co m*/ * @param indices m by k: m super variables, each relating to k original variables * @param D either multi-label or multi-target dataset * @param p pruning value * @param n subset relpacement value * @return a multi-target dataset */ public static Instances SLTransformation(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ // F.removeLabels(D_,L); for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! return D_; }
From source file:meka.experiment.statisticsexporters.WekaFilter.java
License:Open Source License
/** * Converts the Instances back into statistics. * * @param data the data to convert * @return the generated statistics *//*w w w . j ava 2 s .co m*/ protected List<EvaluationStatistics> fromInstances(Instances data) { List<EvaluationStatistics> result; EvaluationStatistics stat; MultiLabelClassifier cls; String rel; int i; int n; Instance inst; result = new ArrayList<>(); if (data.attribute(EvaluationStatistics.KEY_CLASSIFIER) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_CLASSIFIER); return result; } if (data.attribute(EvaluationStatistics.KEY_RELATION) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_RELATION); return result; } for (i = 0; i < data.numInstances(); i++) { inst = data.instance(i); try { cls = OptionUtils.fromCommandLine(MultiLabelClassifier.class, inst.stringValue(data.attribute(EvaluationStatistics.KEY_CLASSIFIER))); rel = inst.stringValue(data.attribute(EvaluationStatistics.KEY_RELATION)); stat = new EvaluationStatistics(cls, rel, null); for (n = 0; n < inst.numAttributes(); n++) { if (inst.attribute(n).isNumeric() && !inst.isMissing(n)) { stat.put(inst.attribute(n).name(), inst.value(n)); } } result.add(stat); } catch (Exception e) { handleException("Failed to process instance: " + inst, e); } } return result; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'. * @param D assume attributes in D labeled by original index * @return Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1 *//* w ww . ja va 2 s . c o m*/ public static Instances mergeLabels(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! D = null; return D_; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels.//from w ww. j a va 2 s. co m * * @param j index 1 (assume that <code>j < k</code>) * @param k index 2 (assume that <code>j < k</code>) * @param D iInstances, with attributes in labeled by original index * @return Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1 */ public static Instances mergeLabels(Instances D, int j, int k, int p) { int L = D.classIndex(); HashMap<String, Integer> count = new HashMap<String, Integer>(); Set<String> values = new HashSet<String>(); for (int i = 0; i < D.numInstances(); i++) { String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)); String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k); //System.out.println("w = "+w); count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1); values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k))); } //System.out.println("("+j+","+k+")"+values); System.out.print("pruned from " + count.size() + " to "); MLUtils.pruneCountHashMap(count, p); String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future System.out.println("" + count.size() + " with p = " + p); System.out.println("" + count); values = count.keySet(); // Create and insert the new attribute D.insertAttributeAt( new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L); // Set values for the new attribute for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); String y_jk = encodeValue(x.stringValue(j), x.stringValue(k)); try { x.setValue(L, y_jk); // y_jk = } catch (Exception e) { //x.setMissing(L); //D.delete(i); //i--; String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close)); int max_c = 0; for (String y_ : y_close) { int c = count.get(y_); if (c > max_c) { max_c = c; y_max = y_; } } //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ..."); x.setValue(L, y_max); // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one) } } // Delete separate attributes D.deleteAttributeAt(k > j ? k : j); D.deleteAttributeAt(k > j ? j : k); // Set class index D.setClassIndex(L - 1); return D; }
From source file:meka.gui.explorer.classify.PredictionsOnTestset.java
License:Open Source License
/** * Returns the action lister to use in the menu. * * @param history the current history/* w ww. j a v a2 s .c om*/ * @param index the selected history item * @return the listener */ @Override public ActionListener getActionListener(final ResultHistoryList history, final int index) { final MultiLabelClassifier classifier = (MultiLabelClassifier) getClassifier(history, index); final Instances header = getHeader(history, index); return new ActionListener() { @Override public void actionPerformed(ActionEvent e) { Runnable run = new Runnable() { @Override public void run() { ClassifyTab owner = (ClassifyTab) getOwner(); Instances test; owner.startBusy("Predictions on test..."); try { MLUtils.prepareData(owner.getTestData()); test = new Instances(owner.getTestData()); test.setClassIndex(owner.getTestData().classIndex()); String msg = header.equalHeadersMsg(test); if (msg != null) throw new IllegalArgumentException( "Model's training set and current test set are not compatible:\n" + msg); // collect predictions Instances predicted = new Instances(test, 0); for (int i = 0; i < test.numInstances(); i++) { double pred[] = classifier.distributionForInstance(test.instance(i)); // Cut off any [no-longer-needed] probabalistic information from MT classifiers. if (classifier instanceof MultiTargetClassifier) pred = Arrays.copyOf(pred, test.classIndex()); Instance predInst = (Instance) test.instance(i).copy(); for (int j = 0; j < pred.length; j++) predInst.setValue(j, pred[j]); predicted.add(predInst); if ((i + 1) % 100 == 0) owner.showStatus( "Predictions on test (" + (i + 1) + "/" + test.numInstances() + ")..."); } owner.finishBusy(); // display predictions DataViewerDialog dialog = new DataViewerDialog(GUIHelper.getParentFrame(owner), ModalityType.MODELESS); dialog.setDefaultCloseOperation(DataViewerDialog.DISPOSE_ON_CLOSE); dialog.setInstances(predicted); dialog.setSize(800, 600); dialog.setLocationRelativeTo(owner); dialog.setVisible(true); } catch (Exception e) { owner.handleException("Predictions failed on test set:", e); owner.finishBusy("Predictions failed: " + e); JOptionPane.showMessageDialog(owner, "Predictions failed:\n" + e, "Error", JOptionPane.ERROR_MESSAGE); } } }; ((ClassifyTab) getOwner()).start(run); } }; }
From source file:milk.classifiers.MDD.java
License:Open Source License
/** * Builds the classifier//from w ww. jav a 2s . com * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars train) throws Exception { if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } /*train = new Instances(train); train.deleteWithMissingClass(); if (train.numInstances() == 0) { throw new Exception("No train instances without missing class value!"); } m_ReplaceMissingValues = new ReplaceMissingValuesFilter(); m_ReplaceMissingValues.setInputFormat(train); train = Filter.useFilter(train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinaryFilter(); m_NominalToBinary.setInputFormat(train); train = Filter.useFilter(train, m_NominalToBinary);*/ m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); int nK = 1; // Only K-1 class labels needed int nR = train.numAttributes() - 2; int nC = train.numExemplars(); m_Data = new double[nC][nR][]; // Data values m_Classes = new int[nC]; // Class values m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); double sY1 = 0, sY0 = 0; // Number of classes if (m_Debug) { System.out.println("Extracting data..."); } FastVector maxSzIdx = new FastVector(); int maxSz = 0; for (int h = 0; h < m_Data.length; h++) { Exemplar current = train.exemplar(h); m_Classes[h] = (int) current.classValue(); // Class value starts from 0 Instances currInsts = current.getInstances(); int nI = currInsts.numInstances(); if (m_Classes[h] == 1) { if (nI > maxSz) { maxSz = nI; maxSzIdx = new FastVector(1); maxSzIdx.addElement(new Integer(h)); } else if (nI == maxSz) maxSzIdx.addElement(new Integer(h)); } int idx = 0; for (int i = 0; i < train.numAttributes(); i++) { if ((i == m_ClassIndex) || (i == m_IdIndex)) continue; // initialize m_data[][][] m_Data[h][idx] = new double[nI]; for (int k = 0; k < nI; k++) m_Data[h][idx][k] = currInsts.instance(k).value(i); idx++; } // Class count if (m_Classes[h] == 1) sY1++; else sY0++; } if (m_Debug) { System.out.println("\nIteration History..."); } double[] x = new double[nR * 2], tmp = new double[x.length]; double[][] b = new double[2][x.length]; OptEng opt; double nll, bestnll = Double.MAX_VALUE; for (int t = 0; t < x.length; t++) { b[0][t] = Double.NaN; b[1][t] = Double.NaN; } //for(int s=0; s<nC; s++){ // Multiple starts // if(m_Classes[s] != 1) // continue; // Largest positive exemplar for (int s = 0; s < maxSzIdx.size(); s++) { int exIdx = ((Integer) maxSzIdx.elementAt(s)).intValue(); for (int p = 0; p < m_Data[exIdx][0].length; p++) { for (int q = 0; q < nR; q++) { x[2 * q] = m_Data[exIdx][q][p]; // pick one instance x[2 * q + 1] = 1.0; } opt = new OptEng(); //opt.setDebug(m_Debug); tmp = opt.findArgmin(x, b); while (tmp == null) { tmp = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); tmp = opt.findArgmin(tmp, b); } nll = opt.getMinFunction(); if (nll < bestnll) { bestnll = nll; m_Par = tmp; if (m_Debug) System.out.println("!!!!!!!!!!!!!!!!Smaller NLL found: " + nll); } if (m_Debug) System.out.println(exIdx + ": -------------<Converged>--------------"); } } }
From source file:milk.classifiers.MDD.java
License:Open Source License
/** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution/*from w w w .ja v a 2 s . com*/ * @exception Exception if the distribution can't be computed successfully */ public double[] distributionForExemplar(Exemplar exmp) throws Exception { // Extract the data Instances ins = exmp.getInstances(); int nI = ins.numInstances(), nA = ins.numAttributes() - 2; double[][] dat = new double[nI][nA]; for (int j = 0; j < nI; j++) { int idx = 0; for (int k = 0; k < nA + 2; k++) { if ((k == m_ClassIndex) || (k == m_IdIndex)) continue; dat[j][idx] = ins.instance(j).value(k); idx++; } } // Compute the probability of the bag double[] distribution = new double[2]; distribution[1] = 0.0; // Prob. for class 1 for (int i = 0; i < nI; i++) { double exp = 0.0; for (int r = 0; r < nA; r++) exp += (m_Par[r * 2] - dat[i][r]) * (m_Par[r * 2] - dat[i][r]) / ((m_Par[r * 2 + 1]) * (m_Par[r * 2 + 1])); exp = Math.exp(-exp); // Prob. updated for one instance distribution[1] += exp / (double) nI; distribution[0] += (1.0 - exp) / (double) nI; } return distribution; }
From source file:milk.classifiers.MIBoost.java
License:Open Source License
/** * Builds the classifier/* w w w. j av a 2 s. c o m*/ * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars exps) throws Exception { Exemplars train = new Exemplars(exps); if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); m_NumIterations = m_MaxIterations; if (m_NumClasses > 2) { throw new Exception("Not yet prepared to deal with multiple classes!"); } if (m_Classifier == null) throw new Exception("A base classifier has not been specified!"); if (!(m_Classifier instanceof WeightedInstancesHandler)) throw new Exception("Base classifier cannot handle weighted instances!"); m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations()); if (m_Debug) System.err.println("Base classifier: " + m_Classifier.getClass().getName()); m_Beta = new double[m_NumIterations]; m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); double N = (double) train.numExemplars(), sumNi = 0; Instances data = new Instances(m_Attributes, 0);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); // Initialize weights for (int i = 0; i < N; i++) sumNi += train.exemplar(i).getInstances().numInstances(); for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); exi.setWeight(sumNi / N); Instances insts = exi.getInstances(); double ni = (double) insts.numInstances(); for (int j = 0; j < ni; j++) { Instance ins = new Instance(insts.instance(j));// Copy //insts.instance(j).setWeight(1.0); ins.deleteAttributeAt(m_IdIndex); ins.setDataset(dataset); ins.setWeight(exi.weight() / ni); data.add(ins); } } // Assume the order of the instances are preserved in the Discretize filter if (m_DiscretizeBin > 0) { m_Filter = new Discretize(); m_Filter.setInputFormat(new Instances(data, 0)); m_Filter.setBins(m_DiscretizeBin); data = Filter.useFilter(data, m_Filter); } // Main algorithm int dataIdx; iterations: for (int m = 0; m < m_MaxIterations; m++) { if (m_Debug) System.err.println("\nIteration " + m); // Build a model m_Models[m].buildClassifier(data); // Prediction of each bag double[] err = new double[(int) N], weights = new double[(int) N]; boolean perfect = true, tooWrong = true; dataIdx = 0; for (int n = 0; n < N; n++) { Exemplar exn = train.exemplar(n); // Prediction of each instance and the predicted class distribution // of the bag double nn = (double) exn.getInstances().numInstances(); for (int p = 0; p < nn; p++) { Instance testIns = data.instance(dataIdx++); if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors err[n]++; } weights[n] = exn.weight(); err[n] /= nn; if (err[n] > 0.5) perfect = false; if (err[n] < 0.5) tooWrong = false; } if (perfect || tooWrong) { // No or 100% classification error, cannot find beta if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("No errors"); break iterations; } double[] x = new double[1]; x[0] = 0; double[][] b = new double[2][x.length]; b[0][0] = Double.NaN; b[1][0] = Double.NaN; OptEng opt = new OptEng(); opt.setWeights(weights); opt.setErrs(err); //opt.setDebug(m_Debug); if (m_Debug) System.out.println("Start searching for c... "); x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println("Finished."); m_Beta[m] = x[0]; if (m_Debug) System.err.println("c = " + m_Beta[m]); // Stop if error too small or error too big and ignore this model if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) { if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("Errors out of range!"); break iterations; } // Update weights of data and class label of wfData dataIdx = 0; double totWeights = 0; for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0))); totWeights += exr.weight(); } if (m_Debug) System.err.println("Total weights = " + totWeights); for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); double num = (double) exr.getInstances().numInstances(); exr.setWeight(sumNi * exr.weight() / totWeights); //if(m_Debug) // System.err.print("\nExemplar "+r+"="+exr.weight()+": \t"); for (int s = 0; s < num; s++) { Instance inss = data.instance(dataIdx); inss.setWeight(exr.weight() / num); // if(m_Debug) // System.err.print("instance "+s+"="+inss.weight()+ // "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t"); if (Double.isNaN(inss.weight())) throw new Exception("instance " + s + " in bag " + r + " has weight NaN!"); dataIdx++; } //if(m_Debug) // System.err.println(); } } }
From source file:milk.classifiers.MIBoost.java
License:Open Source License
/** * Computes the distribution for a given exemplar */* www. ja v a 2 s. co m*/ * @param exmp the exemplar for which distribution is computed * @return the classification * @exception Exception if the distribution can't be computed successfully */ public double[] distributionForExemplar(Exemplar exmp) throws Exception { double[] rt = new double[m_NumClasses]; Instances insts = new Instances(exmp.getInstances()); double n = (double) insts.numInstances(); insts.deleteAttributeAt(m_IdIndex);// ID attribute useless if (m_DiscretizeBin > 0) insts = Filter.useFilter(insts, m_Filter); for (int y = 0; y < n; y++) { Instance ins = insts.instance(y); for (int x = 0; x < m_NumIterations; x++) rt[(int) m_Models[x].classifyInstance(ins)] += m_Beta[x] / n; } for (int i = 0; i < rt.length; i++) rt[i] = Math.exp(rt[i]); Utils.normalize(rt); return rt; }