List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:meka.classifiers.multilabel.cc.CNode.java
License:Open Source License
/** * Transform./*from w w w . j a va2s . c o m*/ * @param D original Instances * @param c to be the class Attribute * @param pa_c the parent indices of c * @return new Instances T */ public static Instances transform(Instances D, int c, int pa_c[]) throws Exception { int L = D.classIndex(); int keep[] = A.append(pa_c, c); // keep all parents and self! Arrays.sort(keep); int remv[] = A.invert(keep, L); // i.e., remove the rest < L Arrays.sort(remv); Instances T = F.remove(new Instances(D), remv, false); int map[] = new int[L]; for (int j = 0; j < L; j++) { map[j] = Arrays.binarySearch(keep, j); } T.setClassIndex(map[c]); return T; }
From source file:meka.classifiers.multilabel.cc.CNode.java
License:Open Source License
/** * Main - run some tests./*w ww.j a v a2 s . c om*/ */ public static void main(String args[]) throws Exception { Instances D = new Instances(new FileReader(args[0])); Instance x = D.lastInstance(); D.remove(D.numInstances() - 1); int L = Integer.parseInt(args[1]); D.setClassIndex(L); double y[] = new double[L]; Random r = new Random(); int s[] = new int[] { 1, 0, 2 }; int PA_J[][] = new int[][] { {}, {}, { 0, 1 }, }; //MLUtils.randomize(s,r); // MUST GO IN TREE ORDER !! for (int j : s) { int pa_j[] = PA_J[j]; System.out.println("PARENTS = " + Arrays.toString(pa_j)); //MLUtils.randomize(pa_j,r); System.out.println("**** TRAINING ***"); CNode n = new CNode(j, null, pa_j); n.build(D, new SMO()); /* */ //Instances D_ = n.transform(D); //n.T = D_; System.out.println("============== D_" + j + " / class = " + n.T.classIndex() + " ="); System.out.println("" + n.T); System.out.println("**** TESTING ****"); /* Instance x_ = MLUtils.setTemplate(x,(Instance)D_.firstInstance().copy(),D_); for(int pa : pa_j) { //System.out.println(""+map[pa]); x_.setValue(n.map[pa],y[pa]); } //x_.setDataset(T); x_.setClassMissing(); */ //n.T = D_; Instance x_ = n.transform(x, y); System.out.println("" + x_); y[j] = 1; } }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * RunExperiment - Build and evaluate a model with command-line options. * @param h multi-label classifier * @param options command line options *///w ww . ja v a2 s. c o m public static void runExperiment(MultiLabelClassifier h, String options[]) throws Exception { // Help if (Utils.getOptionPos('h', options) >= 0) { System.out.println("\nHelp requested"); Evaluation.printOptions(h.listOptions()); return; } h.setOptions(options); if (h.getDebug()) System.out.println("Loading and preparing dataset ..."); // Load Instances from a file Instances D_train = loadDataset(options); Instances D_full = D_train; // Try extract and set a class index from the @relation name MLUtils.prepareData(D_train); // Override the number of classes with command-line option (optional) if (Utils.getOptionPos('C', options) >= 0) { int L = Integer.parseInt(Utils.getOption('C', options)); D_train.setClassIndex(L); } // We we still haven't found -C option, we can't continue (don't know how many labels) int L = D_train.classIndex(); if (L <= 0) { throw new Exception( "[Error] Number of labels not specified.\n\tYou must set the number of labels with the -C option, either inside the @relation tag of the Instances file, or on the command line."); // apparently the dataset didn't contain the '-C' flag, check in the command line options ... } // Randomize (Instances) int seed = (Utils.getOptionPos('s', options) >= 0) ? Integer.parseInt(Utils.getOption('s', options)) : 0; if (Utils.getFlag('R', options)) { D_train.randomize(new Random(seed)); } boolean Threaded = false; if (Utils.getOptionPos("Thr", options) >= 0) { Threaded = Utils.getFlag("Thr", options); } // Verbosity Option String voption = "1"; if (Utils.getOptionPos("verbosity", options) >= 0) { voption = Utils.getOption("verbosity", options); } // Save for later? //String fname = null; //if (Utils.getOptionPos('f',options) >= 0) { // fname = Utils.getOption('f',options); //} // Dump for later? String dname = null; if (Utils.getOptionPos('d', options) >= 0) { dname = Utils.getOption('d', options); } // Load from file? String lname = null; Instances dataHeader = null; if (Utils.getOptionPos('l', options) >= 0) { lname = Utils.getOption('l', options); Object[] data = SerializationHelper.readAll(lname); h = (MultiLabelClassifier) data[0]; if (data.length > 1) dataHeader = (Instances) data[1]; //Object o[] = SerializationHelper.readAll(lname); //h = (MultilabelClassifier)o[0]; } try { Result r = null; // Threshold OPtion String top = "PCut1"; // default if (Utils.getOptionPos("threshold", options) >= 0) top = Utils.getOption("threshold", options); if (Utils.getOptionPos('x', options) >= 0) { // CROSS-FOLD-VALIDATION int numFolds = MLUtils.getIntegerOption(Utils.getOption('x', options), 10); // default 10 // Check for remaining options Utils.checkForRemainingOptions(options); r = Evaluation.cvModel(h, D_train, numFolds, top, voption); System.out.println(r.toString()); } else { // TRAIN-TEST SPLIT Instances D_test = null; if (Utils.getOptionPos('T', options) >= 0) { // load separate test set try { D_test = loadDataset(options, 'T'); MLUtils.prepareData(D_test); } catch (Exception e) { throw new Exception("[Error] Failed to Load Test Instances from file.", e); } } else { // split training set into train and test sets // default split int N_T = (int) (D_train.numInstances() * 0.60); if (Utils.getOptionPos("split-percentage", options) >= 0) { // split by percentage double percentTrain = Double.parseDouble(Utils.getOption("split-percentage", options)); N_T = (int) Math.round((D_train.numInstances() * (percentTrain / 100.0))); } else if (Utils.getOptionPos("split-number", options) >= 0) { // split by number N_T = Integer.parseInt(Utils.getOption("split-number", options)); } int N_t = D_train.numInstances() - N_T; D_test = new Instances(D_train, N_T, N_t); D_train = new Instances(D_train, 0, N_T); } // Invert the split? if (Utils.getFlag('i', options)) { //boolean INVERT = Utils.getFlag('i',options); Instances temp = D_test; D_test = D_train; D_train = temp; } // Check for remaining options Utils.checkForRemainingOptions(options); if (h.getDebug()) System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D_train) + "\tL=" + L + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, L), 2) + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, L), 2) + ")"); if (lname != null) { // h is already built, and loaded from a file, test it! r = testClassifier(h, D_test); String t = top; if (top.startsWith("PCut")) { // if PCut is specified we need the training data, // so that we can calibrate the threshold! t = MLEvalUtils.getThreshold(r.predictions, D_train, top); } r = evaluateModel(h, D_test, t, voption); } else { //check if train and test set size are > 0 if (D_train.numInstances() > 0 && D_test.numInstances() > 0) { if (Threaded) { r = evaluateModelM(h, D_train, D_test, top, voption); } else { r = evaluateModel(h, D_train, D_test, top, voption); } } else { // otherwise just train on full set. Maybe better throw an exception. h.buildClassifier(D_full); } } // @todo, if D_train==null, assume h is already trained if (D_train.numInstances() > 0 && D_test.numInstances() > 0) { System.out.println(r.toString()); } } // Save model to file? if (dname != null) { dataHeader = new Instances(D_train, 0); SerializationHelper.writeAll(dname, new Object[] { h, dataHeader }); } } catch (Exception e) { e.printStackTrace(); Evaluation.printOptions(h.listOptions()); System.exit(1); } System.exit(0); }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instance transformInstance(Instance x) throws Exception { Instances tmpInst = new Instances(x.dataset()); tmpInst.delete();//from ww w . j a v a2 s. c o m tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedTemplateInst); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for (int i = 0; i < pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(pseudoLabels.numAttributes()); return newDataSet.instance(0); }
From source file:meka.classifiers.multilabel.Maniac.java
License:Open Source License
@Override public Instances transformLabels(Instances D) throws Exception { // crazy scala-specific stuff that is necessary to access // "static" methods from java org.kramerlab.autoencoder.package$ autoencoderStatics = org.kramerlab.autoencoder.package$.MODULE$; org.kramerlab.autoencoder.wekacompatibility.package$ wekaStatics = org.kramerlab.autoencoder.wekacompatibility.package$.MODULE$; org.kramerlab.autoencoder.experiments.package$ experimentsStatics = org.kramerlab.autoencoder.experiments.package$.MODULE$; int topiter = -1; // the optimization is a bit special, since we learn a stream // of autoencoders, no need to start from scratch, we just add layers if (this.isOptimizeAE()) { Instances train = D.trainCV(3, 1); Instances test = D.testCV(3, 1); Instances labels = this.extractPart(train, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); // test each autoencoder, select the best classifier double bestAccuracy = Double.NEGATIVE_INFINITY; int iteratorcount = 0; topiter = 0;/*from www . java 2 s .c om*/ for (Autoencoder a : autoencoders) { iteratorcount++; Maniac candidate = new Maniac(); candidate.setOptimizeAE(false); candidate.setNumberAutoencoders(this.getNumberAutoencoders()); candidate.setCompression(this.getCompression()); candidate.setClassifier(this.getClassifier()); candidate.setAE(a); Result res = Evaluation.evaluateModel(candidate, train, test); double curac = (Double) res.getValue("Accuracy"); if (bestAccuracy < curac) { bestAccuracy = curac; topiter = iteratorcount; } } } Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); // first convert the arff into non sparse form SparseToNonSparse spfilter = new SparseToNonSparse(); spfilter.setInputFormat(labels); Instances aeData = Filter.useFilter(labels, spfilter); // now convert it into a format suitable for the autoencoder Mat data = wekaStatics.instancesToMat(aeData); if (this.getAE() == null) { Iterable<Autoencoder> autoencoders = autoencoderStatics.deepAutoencoderStream_java( autoencoderStatics.Sigmoid(), // type of neurons. // Sigmoid is ok this.getNumberAutoencoders(), // number of autoencoders = (max hidden layers + 1) / // 2 this.getCompression(), // compression from k-th layer to (k+1)-th layer data, // training data true, // true = L2 Error, false = CrossEntropy autoencoderStatics.HintonsMiraculousStrategy(), true, autoencoderStatics.NoObservers()); int itercount = 0; for (Autoencoder a : autoencoders) { itercount++; if (topiter > 0 && itercount == topiter || itercount == this.getNumberAutoencoders()) { this.setAE(a); break; } } } Mat compressed = this.getAE().compress(data); Instances compressedLabels = wekaStatics.matToInstances(compressed); // remember the labels to use for the prediction step, this.compressedTemplateInst = new Instances(compressedLabels); Instances result = Instances.mergeInstances(compressedLabels, features); result.setClassIndex(compressedLabels.numAttributes()); return result; }
From source file:meka.classifiers.multilabel.meta.MBR.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { testCapabilities(data);// w ww. j a va2 s. c o m int c = data.classIndex(); // Base BR if (getDebug()) System.out.println("Build BR Base (" + c + " models)"); m_BASE = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(), ((AbstractClassifier) getClassifier()).getOptions()); m_BASE.buildClassifier(data); // Meta BR if (getDebug()) System.out.println("Prepare Meta data "); Instances meta_data = new Instances(data); FastVector BinaryClass = new FastVector(c); BinaryClass.addElement("0"); BinaryClass.addElement("1"); for (int i = 0; i < c; i++) { meta_data.insertAttributeAt(new Attribute("metaclass" + i, BinaryClass), c); } for (int i = 0; i < data.numInstances(); i++) { double cfn[] = m_BASE.distributionForInstance(data.instance(i)); for (int a = 0; a < cfn.length; a++) { meta_data.instance(i).setValue(a + c, cfn[a]); } } meta_data.setClassIndex(c); m_InstancesTemplate = new Instances(meta_data, 0); if (getDebug()) System.out.println("Build BR Meta (" + c + " models)"); m_META = (BR) AbstractClassifier.forName(getClassifier().getClass().getName(), ((AbstractClassifier) getClassifier()).getOptions()); m_META.buildClassifier(meta_data); }
From source file:meka.classifiers.multilabel.meta.RandomSubspaceML.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);//from w w w . ja va 2 s . c o m m_InstancesTemplates = new Instances[m_NumIterations]; m_InstanceTemplates = new Instance[m_NumIterations]; if (getDebug()) System.out.println("-: Models: "); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); Random r = new Random(m_Seed); int N_sub = (D.numInstances() * m_BagSizePercent / 100); int L = D.classIndex(); int d = D.numAttributes() - L; int d_new = d * m_AttSizePercent / 100; m_IndicesCut = new int[m_NumIterations][]; for (int i = 0; i < m_NumIterations; i++) { // Downsize the instance space (exactly like in EnsembleML.java) if (getDebug()) System.out.print("\t" + (i + 1) + ": "); D.randomize(r); Instances D_cut = new Instances(D, 0, N_sub); if (getDebug()) System.out.print("N=" + D.numInstances() + " -> N'=" + D_cut.numInstances() + ", "); // Downsize attribute space D_cut.setClassIndex(-1); int indices_a[] = A.make_sequence(L, d + L); A.shuffle(indices_a, r); indices_a = Arrays.copyOfRange(indices_a, 0, d - d_new); Arrays.sort(indices_a); m_IndicesCut[i] = A.invert(indices_a, D.numAttributes()); D_cut = F.remove(D_cut, indices_a, false); D_cut.setClassIndex(L); if (getDebug()) System.out.print(" A:=" + (D.numAttributes() - L) + " -> A'=" + (D_cut.numAttributes() - L) + " (" + m_IndicesCut[i][L] + ",...," + m_IndicesCut[i][m_IndicesCut[i].length - 1] + ")"); // Train multi-label classifier if (m_Classifiers[i] instanceof Randomizable) ((Randomizable) m_Classifiers[i]).setSeed(m_Seed + i); if (getDebug()) System.out.println("."); m_Classifiers[i].buildClassifier(D_cut); m_InstanceTemplates[i] = D_cut.instance(1); m_InstancesTemplates[i] = new Instances(D_cut, 0); } if (getDebug()) System.out.println(":-"); }
From source file:meka.classifiers.multilabel.MLCBMaD.java
License:Open Source License
@Override public Instance transformInstance(Instance x) throws Exception { Instances tmpInst = new Instances(x.dataset()); tmpInst.delete();//from w w w . j a va2s. c om tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedMatrix); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for (int i = 0; i < pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(this.size); return newDataSet.instance(0); }
From source file:meka.classifiers.multilabel.MLCBMaD.java
License:Open Source License
@Override public Instances transformLabels(Instances D) throws Exception { Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); BooleanMatrixDecomposition bmd = BooleanMatrixDecomposition.BEST_CONFIGURED(this.threshold); Tuple<Instances, Instances> res = bmd.decompose(labels, this.size); this.compressedMatrix = res._1; this.uppermatrix = res._2; Instances result = Instances.mergeInstances(compressedMatrix, features); result.setClassIndex(this.getSize()); return result; }
From source file:meka.classifiers.multilabel.PLST.java
License:Open Source License
/** * The method to transform the labels into another set of latent labels, * typically a compression method is used, e.g., Boolean matrix decomposition * in the case of MLC-BMaD, or matrix multiplication based on SVD for PLST. * * @param D the instances to transform into new instances with transformed labels. The * Instances consist of features and original labels. * @return The resulting instances. Instances consist of features and transformed labels. *///from ww w . ja v a2 s. c o m @Override public Instances transformLabels(Instances D) throws Exception { Instances features = this.extractPart(D, false); Instances labels = this.extractPart(D, true); Matrix labelMatrix = MatrixUtils.instancesToMatrix(labels); // first, lets do the preprocessing as in the original implementation double[] averages = new double[labels.numAttributes()]; for (int i = 0; i < labels.numAttributes(); i++) { double[] column = labels.attributeToDoubleArray(i); double sum = 0.0; for (int j = 0; j < column.length; j++) { if (column[j] == 1.0) { sum += 1.0; } else { sum += -1; // The algorithm needs 1/-1 coding, so let's // change the matrix here labelMatrix.set(j, i, -1.0); } } averages[i] = sum / column.length; } double[][] shiftMatrix = new double[1][labels.numAttributes()]; shiftMatrix[0] = averages; // remember shift for prediction this.m_Shift = new Matrix(shiftMatrix); double[][] shiftTrainMatrix = new double[labels.numInstances()][labels.numAttributes()]; for (int i = 0; i < labels.numInstances(); i++) { shiftTrainMatrix[i] = averages; } Matrix trainShift = new Matrix(shiftTrainMatrix); SingularValueDecomposition svd = new SingularValueDecomposition(labelMatrix.minus(trainShift)); // The paper uses U here, but the implementation by the authors uses V, so // we used V here too. m_v = svd.getV(); //remove columns so only size are left double[][] newArr = new double[m_v.getRowDimension()][this.getSize()]; for (int i = 0; i < newArr.length; i++) { for (int j = 0; j < newArr[i].length; j++) { newArr[i][j] = m_v.getArray()[i][j]; } } m_v = new Matrix(newArr); // now the multiplication (last step of the algorithm) Matrix compressed = MatrixUtils.instancesToMatrix(labels).times(this.m_v); // and transform it to Instances ArrayList<Attribute> attinfos = new ArrayList<Attribute>(); for (int i = 0; i < compressed.getColumnDimension(); i++) { Attribute att = new Attribute("att" + i); attinfos.add(att); } // create pattern instances (also used in prediction) note: this is a regression // problem now, labels are not binary this.m_PatternInstances = new Instances("compressedlabels", attinfos, compressed.getRowDimension()); // fill result Instances Instances result = Instances.mergeInstances(MatrixUtils.matrixToInstances(compressed, m_PatternInstances), features); result.setClassIndex(this.getSize()); return result; }