List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:meka.classifiers.multilabel.BRq.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { testCapabilities(data);/*from ww w. j av a 2s . co m*/ int c = data.classIndex(); if (getDebug()) System.out.print("-: Creating " + c + " models (" + m_Classifier.getClass().getName() + "): "); m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier, c); Instances sub_data = null; for (int i = 0; i < c; i++) { int indices[][] = new int[c][c - 1]; for (int j = 0, k = 0; j < c; j++) { if (j != i) { indices[i][k++] = j; } } //Select only class attribute 'i' Remove FilterRemove = new Remove(); FilterRemove.setAttributeIndicesArray(indices[i]); FilterRemove.setInputFormat(data); FilterRemove.setInvertSelection(true); sub_data = Filter.useFilter(data, FilterRemove); sub_data.setClassIndex(0); /* BEGIN downsample for this link */ sub_data.randomize(m_Random); int numToRemove = sub_data.numInstances() - (int) Math.round(sub_data.numInstances() * m_DownSampleRatio); for (int m = 0, removed = 0; m < sub_data.numInstances(); m++) { if (sub_data.instance(m).classValue() <= 0.0) { sub_data.instance(m).setClassMissing(); if (++removed >= numToRemove) break; } } sub_data.deleteWithMissingClass(); /* END downsample for this link */ //Build the classifier for that class m_MultiClassifiers[i].buildClassifier(sub_data); if (getDebug()) System.out.print(" " + (i + 1)); } if (getDebug()) System.out.println(" :-"); m_InstancesTemplate = new Instances(sub_data, 0); }
From source file:meka.classifiers.multilabel.cc.CNode.java
License:Open Source License
/** * Transform - transform dataset D for this node. * this.j defines the current node index, e.g., 3 * this.paY[] defines parents, e.g., [1,4] * we should remove the rest, e.g., [0,2,5,...,L-1] * @return dataset we should remove all variables from D EXCEPT current node, and parents. *//*from ww w . j a v a 2 s . com*/ public Instances transform(Instances D) throws Exception { int L = D.classIndex(); d = D.numAttributes() - L; int keep[] = A.append(this.paY, j); // keep all parents and self! Arrays.sort(keep); int remv[] = A.invert(keep, L); // i.e., remove the rest < L Arrays.sort(remv); map = new int[L]; for (int j = 0; j < L; j++) { map[j] = Arrays.binarySearch(keep, j); } Instances D_ = F.remove(new Instances(D), remv, false); D_.setClassIndex(map[this.j]); return D_; }
From source file:meka.classifiers.multilabel.cc.CNode.java
License:Open Source License
/** * Transform./*ww w. j ava2s . c o m*/ * @param D original Instances * @param c to be the class Attribute * @param pa_c the parent indices of c * @return new Instances T */ public static Instances transform(Instances D, int c, int pa_c[]) throws Exception { int L = D.classIndex(); int keep[] = A.append(pa_c, c); // keep all parents and self! Arrays.sort(keep); int remv[] = A.invert(keep, L); // i.e., remove the rest < L Arrays.sort(remv); Instances T = F.remove(new Instances(D), remv, false); int map[] = new int[L]; for (int j = 0; j < L; j++) { map[j] = Arrays.binarySearch(keep, j); } T.setClassIndex(map[c]); return T; }
From source file:meka.classifiers.multilabel.CCq.java
License:Open Source License
@Override public void buildClassifier(Instances Train) throws Exception { testCapabilities(Train);/*from ww w. j ava2 s .c o m*/ this.m_NumClasses = Train.classIndex(); int indices[] = MLUtils.gen_indices(m_NumClasses); MLUtils.randomize(indices, new Random(m_S)); if (getDebug()) System.out.print(":- Chain ("); root = new QLink(indices, 0, Train); if (getDebug()) System.out.println(" ) -:"); }
From source file:meka.classifiers.multilabel.CDN.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);// www . j a va 2 s .c om int N = D.numInstances(); int L = D.classIndex(); h = new Classifier[L]; m_R = new Random(m_S); D_templates = new Instances[L]; // Build L probabilistic models, each to predict Y_i | X, Y_{-y}; save the templates. for (int j = 0; j < L; j++) { // X = [Y[0],...,Y[j-1],Y[j+1],...,Y[L],X] D_templates[j] = new Instances(D); D_templates[j].setClassIndex(j); // train H[j] : X -> Y h[j] = AbstractClassifier.forName(getClassifier().getClass().getName(), ((AbstractClassifier) getClassifier()).getOptions()); h[j].buildClassifier(D_templates[j]); } }
From source file:meka.classifiers.multilabel.CDT.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);/*from ww w .jav a 2 s. com*/ int L = D.classIndex(); int d = D.numAttributes() - L; m_R = new Random(getSeed()); int width = m_Width; if (m_Width < 0) width = (int) Math.sqrt(L); else if (m_Width == 0) { width = L; } nodes = new CNode[L]; /* * Make the Trellis. */ if (getDebug()) System.out.println("Make Trellis of width " + m_Width); int indices[] = A.make_sequence(L); A.shuffle(indices, new Random(getSeed())); trel = new Trellis(indices, width, m_Density); if (getDebug()) System.out.println("==>\n" + trel.toString()); /* Rearrange the Trellis */ if (!m_DependencyMetric.equals("None")) trel = CT.orderTrellis(trel, StatUtils.margDepMatrix(D, m_DependencyMetric), m_R); /* * Build Trellis */ if (getDebug()) System.out.println("Build Trellis"); if (getDebug()) System.out.println("nodes: " + Arrays.toString(trel.indices)); for (int j = 0; j < L; j++) { int jv = trel.indices[j]; if (getDebug()) { System.out.println("Build Node h_" + jv + "] : P(y_" + jv + " | x_[1:d], y_" + Arrays.toString(trel.getNeighbours(j)) + ")"); } nodes[jv] = new CNode(jv, null, trel.getNeighbours(j)); nodes[jv].build(D, m_Classifier); } }
From source file:meka.classifiers.multilabel.DBPNN.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);//from w w w .j ava 2 s . co m // Extract variables int L = D.classIndex(); int d = D.numAttributes() - L; double X_[][] = MLUtils.getXfromD(D); double Y_[][] = MLUtils.getYfromD(D); // Build an RBM if (getDebug()) System.out.println("Build RBM(s) ... "); String ops[] = this.getOptions(); dbm = new DBM(ops); dbm.setE(m_E); ((DBM) dbm).setH(m_H, m_N); long before = System.currentTimeMillis(); dbm.train(X_, m_H); // batch train rbm_time = System.currentTimeMillis() - before; if (getDebug()) { Matrix tW[] = dbm.getWs(); System.out.println("X = \n" + MatrixUtils.toString(X_)); for (int l = 0; l < tW.length; l++) { System.out.println("W = \n" + MatrixUtils.toString(tW[l].getArray())); } System.out.println("Y = \n" + MatrixUtils.toString(Y_)); } /* Trim W's: instead of (d+1 x h+1), they become (d+1, h) wwb ww wwb ww wwb -> ww wwb ww bbb (this is because RBMs go both ways -- have biases both ways -- whereas BP only goes up) TODO the best thing would be to keep different views of the same array ... */ Matrix W[] = trimBiases(dbm.getWs()); // Back propagate with batch size of 1 to fine tune the DBM into a supervised DBN if (m_Classifier instanceof BPNN) { if (getDebug()) System.out.println("You have chosen to use BPNN (good!)"); } else { System.err.println( "[WARNING] Was expecting BPNN as the base classifier (will set it now, with default parameters) ..."); m_Classifier = new BPNN(); } int i_Y = W.length - 1; // the final W W[i_Y] = RBM.makeW(W[i_Y].getRowDimension() - 1, W[i_Y].getColumnDimension() - 1, new Random(1)); // ((BPNN) m_Classifier).presetWeights(W, L); // this W will be modified ((BPNN) m_Classifier).train(X_, Y_); // could also have called buildClassifier(D) /* for(int i = 0; i < 1000; i++) { double E = ((BPNN)m_Classifier).update(X_,Y_); //double Ypred[][] = ((BPNN)m_Classifier).popY(X_); System.out.println("i="+i+", MSE="+E); } */ if (getDebug()) { Matrix tW[] = W; //System.out.println("X = \n"+M.toString(X_)); System.out.println("W = \n" + MatrixUtils.toString(tW[0].getArray())); System.out.println("W = \n" + MatrixUtils.toString(tW[1].getArray())); double Ypred[][] = ((BPNN) m_Classifier).popY(X_); System.out.println("Y = \n" + MatrixUtils.toString(MatrixUtils.threshold(Ypred, 0.5))); //System.out.println("Z = \n"+M.toString(M.threshold(Z,0.5))); } }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * RunExperiment - Build and evaluate a model with command-line options. * @param h multi-label classifier * @param options command line options */// w w w . j ava 2 s.c o m public static void runExperiment(MultiLabelClassifier h, String options[]) throws Exception { // Help if (Utils.getOptionPos('h', options) >= 0) { System.out.println("\nHelp requested"); Evaluation.printOptions(h.listOptions()); return; } h.setOptions(options); if (h.getDebug()) System.out.println("Loading and preparing dataset ..."); // Load Instances from a file Instances D_train = loadDataset(options); Instances D_full = D_train; // Try extract and set a class index from the @relation name MLUtils.prepareData(D_train); // Override the number of classes with command-line option (optional) if (Utils.getOptionPos('C', options) >= 0) { int L = Integer.parseInt(Utils.getOption('C', options)); D_train.setClassIndex(L); } // We we still haven't found -C option, we can't continue (don't know how many labels) int L = D_train.classIndex(); if (L <= 0) { throw new Exception( "[Error] Number of labels not specified.\n\tYou must set the number of labels with the -C option, either inside the @relation tag of the Instances file, or on the command line."); // apparently the dataset didn't contain the '-C' flag, check in the command line options ... } // Randomize (Instances) int seed = (Utils.getOptionPos('s', options) >= 0) ? Integer.parseInt(Utils.getOption('s', options)) : 0; if (Utils.getFlag('R', options)) { D_train.randomize(new Random(seed)); } boolean Threaded = false; if (Utils.getOptionPos("Thr", options) >= 0) { Threaded = Utils.getFlag("Thr", options); } // Verbosity Option String voption = "1"; if (Utils.getOptionPos("verbosity", options) >= 0) { voption = Utils.getOption("verbosity", options); } // Save for later? //String fname = null; //if (Utils.getOptionPos('f',options) >= 0) { // fname = Utils.getOption('f',options); //} // Dump for later? String dname = null; if (Utils.getOptionPos('d', options) >= 0) { dname = Utils.getOption('d', options); } // Load from file? String lname = null; Instances dataHeader = null; if (Utils.getOptionPos('l', options) >= 0) { lname = Utils.getOption('l', options); Object[] data = SerializationHelper.readAll(lname); h = (MultiLabelClassifier) data[0]; if (data.length > 1) dataHeader = (Instances) data[1]; //Object o[] = SerializationHelper.readAll(lname); //h = (MultilabelClassifier)o[0]; } try { Result r = null; // Threshold OPtion String top = "PCut1"; // default if (Utils.getOptionPos("threshold", options) >= 0) top = Utils.getOption("threshold", options); if (Utils.getOptionPos('x', options) >= 0) { // CROSS-FOLD-VALIDATION int numFolds = MLUtils.getIntegerOption(Utils.getOption('x', options), 10); // default 10 // Check for remaining options Utils.checkForRemainingOptions(options); r = Evaluation.cvModel(h, D_train, numFolds, top, voption); System.out.println(r.toString()); } else { // TRAIN-TEST SPLIT Instances D_test = null; if (Utils.getOptionPos('T', options) >= 0) { // load separate test set try { D_test = loadDataset(options, 'T'); MLUtils.prepareData(D_test); } catch (Exception e) { throw new Exception("[Error] Failed to Load Test Instances from file.", e); } } else { // split training set into train and test sets // default split int N_T = (int) (D_train.numInstances() * 0.60); if (Utils.getOptionPos("split-percentage", options) >= 0) { // split by percentage double percentTrain = Double.parseDouble(Utils.getOption("split-percentage", options)); N_T = (int) Math.round((D_train.numInstances() * (percentTrain / 100.0))); } else if (Utils.getOptionPos("split-number", options) >= 0) { // split by number N_T = Integer.parseInt(Utils.getOption("split-number", options)); } int N_t = D_train.numInstances() - N_T; D_test = new Instances(D_train, N_T, N_t); D_train = new Instances(D_train, 0, N_T); } // Invert the split? if (Utils.getFlag('i', options)) { //boolean INVERT = Utils.getFlag('i',options); Instances temp = D_test; D_test = D_train; D_train = temp; } // Check for remaining options Utils.checkForRemainingOptions(options); if (h.getDebug()) System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D_train) + "\tL=" + L + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, L), 2) + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, L), 2) + ")"); if (lname != null) { // h is already built, and loaded from a file, test it! r = testClassifier(h, D_test); String t = top; if (top.startsWith("PCut")) { // if PCut is specified we need the training data, // so that we can calibrate the threshold! t = MLEvalUtils.getThreshold(r.predictions, D_train, top); } r = evaluateModel(h, D_test, t, voption); } else { //check if train and test set size are > 0 if (D_train.numInstances() > 0 && D_test.numInstances() > 0) { if (Threaded) { r = evaluateModelM(h, D_train, D_test, top, voption); } else { r = evaluateModel(h, D_train, D_test, top, voption); } } else { // otherwise just train on full set. Maybe better throw an exception. h.buildClassifier(D_full); } } // @todo, if D_train==null, assume h is already trained if (D_train.numInstances() > 0 && D_test.numInstances() > 0) { System.out.println(r.toString()); } } // Save model to file? if (dname != null) { dataHeader = new Instances(D_train, 0); SerializationHelper.writeAll(dname, new Object[] { h, dataHeader }); } } catch (Exception e) { e.printStackTrace(); Evaluation.printOptions(h.listOptions()); System.exit(1); } System.exit(0); }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * IsMT - see if dataset D is multi-target (else only multi-label) * @param D data// ww w . j a v a2 s . c o m * @return true iff D is multi-target only (else false) */ public static boolean isMT(Instances D) { int L = D.classIndex(); for (int j = 0; j < L; j++) { if (D.attribute(j).isNominal()) { // Classification if (D.attribute(j).numValues() > 2) { // Multi-class return true; } } else { // Regression? System.err.println("[Warning] Found a non-nominal class -- not sure how this happened?"); } } return false; }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * CVModel - Split D into train/test folds, and then train and evaluate on each one. * @param h a multi-output classifier * @param D test data Instances * @param numFolds number of folds of CV * @param top Threshold OPtion (pertains to multi-label data only) * @param vop Verbosity OPtion (which measures do we want to calculate/output) * @return Result raw prediction data with evaluation statistics included. *//* w w w . j a v a2s . co m*/ public static Result cvModel(MultiLabelClassifier h, Instances D, int numFolds, String top, String vop) throws Exception { Result r_[] = new Result[numFolds]; for (int i = 0; i < numFolds; i++) { Instances D_train = D.trainCV(numFolds, i); Instances D_test = D.testCV(numFolds, i); if (h.getDebug()) System.out.println(":- Fold [" + i + "/" + numFolds + "] -: " + MLUtils.getDatasetName(D) + "\tL=" + D.classIndex() + "\tD(t:T)=(" + D_train.numInstances() + ":" + D_test.numInstances() + ")\tLC(t:T)=" + Utils.roundDouble(MLUtils.labelCardinality(D_train, D.classIndex()), 2) + ":" + Utils.roundDouble(MLUtils.labelCardinality(D_test, D.classIndex()), 2) + ")"); r_[i] = evaluateModel(h, D_train, D_test); // <-- should not run stats yet! } Result r = MLEvalUtils.combinePredictions(r_); if (h instanceof MultiTargetClassifier || isMT(D)) { r.setInfo("Type", "MT-CV"); } else if (h instanceof MultiLabelClassifier) { r.setInfo("Type", "ML-CV"); try { r.setInfo("Threshold", String.valueOf(Double.parseDouble(top))); } catch (Exception e) { System.err.println( "[WARNING] Automatic threshold calibration not currently enabled for cross-fold validation, setting threshold = 0.5.\n"); r.setInfo("Threshold", String.valueOf(0.5)); } } r.setInfo("Verbosity", vop); r.output = Result.getStats(r, vop); // Need to reset this because of CV r.setValue("Number of training instances", D.numInstances()); r.setValue("Number of test instances", D.numInstances()); return r; }