List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:milk.classifiers.MILR.java
License:Open Source License
/** * Builds the classifier/* w w w . j a v a2s . c om*/ * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars train) throws Exception { if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } /*train = new Instances(train); train.deleteWithMissingClass(); if (train.numInstances() == 0) { throw new Exception("No train instances without missing class value!"); } m_ReplaceMissingValues = new ReplaceMissingValuesFilter(); m_ReplaceMissingValues.setInputFormat(train); train = Filter.useFilter(train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinaryFilter(); m_NominalToBinary.setInputFormat(train); train = Filter.useFilter(train, m_NominalToBinary);*/ m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); int nK = 1; // Only K-1 class labels needed int nR = train.numAttributes() - 2; int nC = train.numExemplars(); m_Data = new double[nC][nR][]; // Data values m_Classes = new int[nC]; // Class values m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); double[] xMean = new double[nR]; // Mean of mean double[] xSD = new double[nR]; // Mode of stddev int g1NE = 0; // # of bags with >1 instances double sY1 = 0, sY0 = 0, totIns = 0; // Number of classes if (m_Debug) { System.out.println("Extracting data..."); } for (int h = 0; h < m_Data.length; h++) { Exemplar current = train.exemplar(h); m_Classes[h] = (int) current.classValue(); // Class value starts from 0 Instances currInsts = current.getInstances(); int nI = currInsts.numInstances(); totIns += (double) nI; int idx = 0; for (int i = 0; i < train.numAttributes(); i++) { if ((i == m_ClassIndex) || (i == m_IdIndex)) continue; // initialize m_data[][][] m_Data[h][idx] = new double[nI]; for (int k = 0; k < nI; k++) { m_Data[h][idx][k] = currInsts.instance(k).value(i); //xMean[idx] += m_Data[h][idx][k]; //xSD[idx] += m_Data[h][idx][k]*m_Data[h][idx][k]; xMean[idx] += m_Data[h][idx][k] / (double) nI; xSD[idx] += m_Data[h][idx][k] * m_Data[h][idx][k] / (double) nI; } idx++; } // Class count if (m_Classes[h] == 1) sY1++; else sY0++; } for (int j = 0; j < nR; j++) { //xMean[j] = xMean[j]/totIns; //xSD[j] = Math.sqrt(xSD[j]/(totIns-1.0)-xMean[j]*xMean[j]*totIns/(totIns-1.0)); xMean[j] = xMean[j] / (double) nC; xSD[j] = Math .sqrt(xSD[j] / ((double) nC - 1.0) - xMean[j] * xMean[j] * (double) nC / ((double) nC - 1.0)); } if (m_Debug) { // Output stats about input data System.out.println("Descriptives..."); System.out.println(sY0 + " bags have class 0 and " + sY1 + " bags have class 1"); System.out.println("\n Variable Avg SD "); for (int j = 0; j < nR; j++) System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4)); } // Normalise input data and remove ignored attributes for (int i = 0; i < nC; i++) { for (int j = 0; j < nR; j++) { for (int k = 0; k < m_Data[i][j].length; k++) { if (xSD[j] != 0) m_Data[i][j][k] = (m_Data[i][j][k] - xMean[j]) / xSD[j]; } } } if (m_Debug) { System.out.println("\nIteration History..."); } double x[] = new double[nR + 1]; x[0] = Math.log((sY1 + 1.0) / (sY0 + 1.0)); //double[] b = new double[x.length]; //b[0] = Double.NaN; double[][] b = new double[2][x.length]; b[0][0] = Double.NaN; b[1][0] = Double.NaN; for (int q = 1; q < x.length; q++) { x[q] = 0.0; //b[q] = Double.NaN; b[0][q] = Double.NaN; b[1][q] = Double.NaN; } OptEng opt = new OptEng(); opt.setDebug(m_Debug); m_Par = opt.findArgmin(x, b); while (m_Par == null) { m_Par = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); m_Par = opt.findArgmin(m_Par, b); } if (m_Debug) System.out.println(" -------------<Converged>--------------"); // Convert coefficients back to non-normalized attribute units for (int j = 1; j < nR + 1; j++) { if (xSD[j - 1] != 0) { m_Par[j] /= xSD[j - 1]; m_Par[0] -= m_Par[j] * xMean[j - 1]; } } }
From source file:milk.classifiers.MILR.java
License:Open Source License
/** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution//from www .jav a 2 s . c o m * @exception Exception if the distribution can't be computed successfully */ public double[] distributionForExemplar(Exemplar exmp) throws Exception { /*m_ReplaceMissingValues.input(instance); instance = m_ReplaceMissingValues.output(); m_NominalToBinary.input(instance); instance = m_NominalToBinary.output(); */ // Extract the data Instances ins = exmp.getInstances(); int nI = ins.numInstances(), nA = ins.numAttributes(); double[][] dat = new double[nI][nA + 1 - 2]; for (int j = 0; j < nI; j++) { dat[j][0] = 1.0; int idx = 1; for (int k = 0; k < nA; k++) { if ((k == m_ClassIndex) || (k == m_IdIndex)) continue; dat[j][idx] = ins.instance(j).value(k); idx++; } } // Compute the probability of the bag double[] distribution = new double[2]; distribution[0] = 0.0; // Log-Prob. for class 0 for (int i = 0; i < nI; i++) { double exp = 0.0; for (int r = 0; r < m_Par.length; r++) exp += m_Par[r] * dat[i][r]; exp = Math.exp(exp); // Prob. updated for one instance distribution[0] -= Math.log(1.0 + exp); } distribution[0] = Math.exp(distribution[0]); // Prob. for class 1 distribution[1] = 1.0 - distribution[0]; return distribution; }
From source file:milk.classifiers.MILRARITH.java
License:Open Source License
/** * Builds the classifier/*from w w w . j a v a 2s . c o m*/ * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars train) throws Exception { if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); int nK = 1; // Only K-1 class labels needed int nR = train.numAttributes() - 2; int nC = train.numExemplars(); m_Data = new double[nC][nR][]; // Data values m_Classes = new int[nC]; // Class values m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); xMean = new double[nR]; // Mean of mean xSD = new double[nR]; // Mode of stddev int g1NE = 0; // # of bags with >1 instances double sY1 = 0, sY0 = 0, totIns = 0.0; // Number of classes int[] missingbags = new int[nR]; if (m_Debug) { System.out.println("Extracting data..."); } for (int h = 0; h < m_Data.length; h++) { Exemplar current = train.exemplar(h); m_Classes[h] = (int) current.classValue(); // Class value starts from 0 Instances currInsts = current.getInstances(); int nI = currInsts.numInstances(); totIns += (double) nI; int idx = 0; for (int i = 0; i < train.numAttributes(); i++) { if ((i == m_ClassIndex) || (i == m_IdIndex)) continue; // initialize m_data[][][] m_Data[h][idx] = new double[nI]; double avg = 0, std = 0, num = 0; for (int k = 0; k < nI; k++) { if (!currInsts.instance(k).isMissing(i)) { m_Data[h][idx][k] = currInsts.instance(k).value(i); //xMean[idx] += m_Data[h][idx][k]; //xSD[idx] += m_Data[h][idx][k]*m_Data[h][idx][k]; avg += m_Data[h][idx][k]; std += m_Data[h][idx][k] * m_Data[h][idx][k]; num++; } else m_Data[h][idx][k] = Double.NaN; } if (num > 0) { xMean[idx] += avg / num; xSD[idx] += std / num; } else missingbags[idx]++; idx++; } // Class count if (m_Classes[h] == 1) sY1++; else sY0++; } for (int j = 0; j < nR; j++) { //xMean[j] = xMean[j]/totIns; //xSD[j] = Math.sqrt(xSD[j]/totIns-xMean[j]*xMean[j]*totIns/(totIns-1.0)); xMean[j] = xMean[j] / (double) (nC - missingbags[j]); xSD[j] = Math.sqrt(xSD[j] / ((double) (nC - missingbags[j]) - 1.0) - xMean[j] * xMean[j] * (double) (nC - missingbags[j]) / ((double) (nC - missingbags[j]) - 1.0)); } if (m_Debug) { // Output stats about input data System.out.println("Descriptives..."); System.out.println(sY0 + " bags have class 0 and " + sY1 + " bags have class 1"); System.out.println("\n Variable Avg SD "); for (int j = 0; j < nR; j++) System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4)); } // Normalise input data and remove ignored attributes for (int i = 0; i < nC; i++) { for (int j = 0; j < nR; j++) { for (int k = 0; k < m_Data[i][j].length; k++) { if (xSD[j] != 0) { if (!Double.isNaN(m_Data[i][j][k])) m_Data[i][j][k] = (m_Data[i][j][k] - xMean[j]) / xSD[j]; else m_Data[i][j][k] = 0; } } } } if (m_Debug) { System.out.println("\nIteration History..."); } double x[] = new double[nR + 1]; x[0] = Math.log((sY1 + 1.0) / (sY0 + 1.0)); double[][] b = new double[2][x.length]; b[0][0] = Double.NaN; b[1][0] = Double.NaN; for (int q = 1; q < x.length; q++) { x[q] = 0.0; b[0][q] = Double.NaN; b[1][q] = Double.NaN; } OptEng opt = new OptEng(); //opt.setDebug(m_Debug); //opt.setMaxIteration(200*x.length); m_Par = opt.findArgmin(x, b); while (m_Par == null) { m_Par = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); m_Par = opt.findArgmin(m_Par, b); } if (m_Debug) System.out.println(" -------------<Converged>--------------"); // feature selection use double[] fs = new double[nR]; for (int k = 1; k < nR + 1; k++) fs[k - 1] = Math.abs(m_Par[k]); int[] idx = Utils.sort(fs); double max = fs[idx[idx.length - 1]]; for (int k = idx.length - 1; k >= 0; k--) System.out.println(m_Attributes.attribute(idx[k] + 1).name() + "\t" + (fs[idx[k]] * 100 / max)); // Convert coefficients back to non-normalized attribute units for (int j = 1; j < nR + 1; j++) { if (xSD[j - 1] != 0) { m_Par[j] /= xSD[j - 1]; m_Par[0] -= m_Par[j] * xMean[j - 1]; } } }
From source file:milk.classifiers.MILRARITH.java
License:Open Source License
/** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution/*from w ww . ja v a 2 s . co m*/ * @exception Exception if the distribution can't be computed successfully */ public double[] distributionForExemplar(Exemplar exmp) throws Exception { /*m_ReplaceMissingValues.input(instance); instance = m_ReplaceMissingValues.output(); m_NominalToBinary.input(instance); instance = m_NominalToBinary.output(); */ // Extract the data Instances ins = exmp.getInstances(); int nI = ins.numInstances(), nA = ins.numAttributes(); double[][] dat = new double[nI][nA + 1 - 2]; for (int j = 0; j < nI; j++) { dat[j][0] = 1.0; int idx = 1; for (int k = 0; k < nA; k++) { if ((k == m_ClassIndex) || (k == m_IdIndex)) continue; if (!ins.instance(j).isMissing(k)) dat[j][idx] = ins.instance(j).value(k); else dat[j][idx] = xMean[idx - 1]; idx++; } } // Compute the probability of the bag double[] distribution = new double[2]; distribution[0] = 0.0; // Prob. for class 0 for (int i = 0; i < nI; i++) { double exp = 0.0; for (int r = 0; r < m_Par.length; r++) exp += m_Par[r] * dat[i][r]; exp = Math.exp(exp); // Prob. updated for one instance distribution[0] += 1.0 / (1.0 + exp); } distribution[0] /= (double) nI; // Prob. for class 1 distribution[1] = 1.0 - distribution[0]; return distribution; }
From source file:milk.classifiers.MILRGEOM.java
License:Open Source License
/** * Builds the classifier/* w w w. j a v a2s . c o m*/ * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars train) throws Exception { if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } /*train = new Instances(train); train.deleteWithMissingClass(); if (train.numInstances() == 0) { throw new Exception("No train instances without missing class value!"); } m_ReplaceMissingValues = new ReplaceMissingValuesFilter(); m_ReplaceMissingValues.setInputFormat(train); train = Filter.useFilter(train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinaryFilter(); m_NominalToBinary.setInputFormat(train); train = Filter.useFilter(train, m_NominalToBinary);*/ m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); int nK = 1; // Only K-1 class labels needed int nR = train.numAttributes() - 2; int nC = train.numExemplars(); m_Data = new double[nC][nR][]; // Data values m_Classes = new int[nC]; // Class values m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); xMean = new double[nR]; // Mean of mean xSD = new double[nR]; // Mode of stddev int g1NE = 0; // # of bags with >1 instances double sY1 = 0, sY0 = 0, totIns = 0; // Number of classes int[] missingbags = new int[nR]; if (m_Debug) { System.out.println("Extracting data..."); } for (int h = 0; h < m_Data.length; h++) { Exemplar current = train.exemplar(h); m_Classes[h] = (int) current.classValue(); // Class value starts from 0 Instances currInsts = current.getInstances(); int nI = currInsts.numInstances(); totIns += (double) nI; int idx = 0; for (int i = 0; i < train.numAttributes(); i++) { if ((i == m_ClassIndex) || (i == m_IdIndex)) continue; // initialize m_data[][][] m_Data[h][idx] = new double[nI]; double avg = 0, std = 0, num = 0; for (int k = 0; k < nI; k++) { if (!currInsts.instance(k).isMissing(i)) { m_Data[h][idx][k] = currInsts.instance(k).value(i); //xMean[idx] += m_Data[h][idx][k]; //xSD[idx] += m_Data[h][idx][k]*m_Data[h][idx][k]; avg += m_Data[h][idx][k]; std += m_Data[h][idx][k] * m_Data[h][idx][k]; num++; } else m_Data[h][idx][k] = Double.NaN; } if (num > 0) { xMean[idx] += avg / num; xSD[idx] += std / num; } else missingbags[idx]++; idx++; } // Class count if (m_Classes[h] == 1) sY1++; else sY0++; } for (int j = 0; j < nR; j++) { //xMean[j] = xMean[j]/totIns; //xSD[j] = Math.sqrt(xSD[j]/(totIns-1.0)-xMean[j]*xMean[j]*totIns/(totIns-1.0)); xMean[j] = xMean[j] / (double) (nC - missingbags[j]); xSD[j] = Math.sqrt(xSD[j] / ((double) (nC - missingbags[j]) - 1.0) - xMean[j] * xMean[j] * (double) (nC - missingbags[j]) / ((double) (nC - missingbags[j]) - 1.0)); } if (m_Debug) { // Output stats about input data System.out.println("Descriptives..."); System.out.println(sY0 + " bags have class 0 and " + sY1 + " bags have class 1"); System.out.println("\n Variable Avg SD "); for (int j = 0; j < nR; j++) System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4)); } // Normalise input data and remove ignored attributes for (int i = 0; i < nC; i++) { for (int j = 0; j < nR; j++) { for (int k = 0; k < m_Data[i][j].length; k++) { if (xSD[j] != 0) { if (!Double.isNaN(m_Data[i][j][k])) m_Data[i][j][k] = (m_Data[i][j][k] - xMean[j]) / xSD[j]; else m_Data[i][j][k] = 0; } } } } if (m_Debug) { System.out.println("\nIteration History..."); } double x[] = new double[nR + 1]; x[0] = Math.log((sY1 + 1.0) / (sY0 + 1.0)); double[][] b = new double[2][x.length]; b[0][0] = Double.NaN; b[1][0] = Double.NaN; for (int q = 1; q < x.length; q++) { x[q] = 0.0; b[0][q] = Double.NaN; b[1][q] = Double.NaN; } OptEng opt = new OptEng(); opt.setDebug(m_Debug); m_Par = opt.findArgmin(x, b); while (m_Par == null) { m_Par = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); m_Par = opt.findArgmin(m_Par, b); } if (m_Debug) System.out.println(" -------------<Converged>--------------"); // Convert coefficients back to non-normalized attribute units for (int j = 1; j < nR + 1; j++) { if (xSD[j - 1] != 0) { m_Par[j] /= xSD[j - 1]; m_Par[0] -= m_Par[j] * xMean[j - 1]; } } }
From source file:milk.classifiers.MILRGEOM.java
License:Open Source License
/** * Computes the distribution for a given exemplar * * @param exmp the exemplar for which distribution is computed * @return the distribution/*from w w w . java 2 s.c o m*/ * @exception Exception if the distribution can't be computed successfully */ public double[] distributionForExemplar(Exemplar exmp) throws Exception { /*m_ReplaceMissingValues.input(instance); instance = m_ReplaceMissingValues.output(); m_NominalToBinary.input(instance); instance = m_NominalToBinary.output(); */ // Extract the data Instances ins = exmp.getInstances(); int nI = ins.numInstances(), nA = ins.numAttributes(); double[][] dat = new double[nI][nA + 1 - 2]; for (int j = 0; j < nI; j++) { dat[j][0] = 1.0; int idx = 1; for (int k = 0; k < nA; k++) { if ((k == m_ClassIndex) || (k == m_IdIndex)) continue; if (!ins.instance(j).isMissing(k)) dat[j][idx] = ins.instance(j).value(k); else dat[j][idx] = xMean[idx - 1]; idx++; } } // Compute the log-odds of the bag double[] distribution = new double[m_NumClasses]; for (int i = 0; i < nI; i++) { double exp = 0.0; for (int r = 0; r < m_Par.length; r++) exp += m_Par[r] * dat[i][r]; distribution[1] += exp / (double) nI; } distribution[1] = 1.0 / (1.0 + Math.exp(-distribution[1])); distribution[0] = 1 - distribution[1]; //Utils.normalize(distribution); return distribution; }
From source file:milk.classifiers.MINND.java
License:Open Source License
/** * Updates the minimum and maximum values for all the attributes * based on a new exemplar.//from w w w . ja va 2s . c om * * @param ex the new exemplar */ private void updateMinMax(Exemplar ex) { Instances insts = ex.getInstances(); int m = 0; for (int j = 0; j < insts.numAttributes(); j++) { if ((j != ex.idIndex()) && (j != ex.classIndex())) { if (insts.attribute(j).isNumeric()) { for (int k = 0; k < insts.numInstances(); k++) { Instance ins = insts.instance(k); if (!ins.isMissing(j)) { if (Double.isNaN(m_MinArray[m])) { m_MinArray[m] = ins.value(j); m_MaxArray[m] = ins.value(j); } else { if (ins.value(j) < m_MinArray[m]) m_MinArray[m] = ins.value(j); else if (ins.value(j) > m_MaxArray[m]) m_MaxArray[m] = ins.value(j); } } } } m++; } } }
From source file:milk.classifiers.MINND.java
License:Open Source License
/** * Scale the given exemplar so that the returned exemplar * has the value of 0 to 1 for each dimension * // w ww . j a va 2 s . co m * @param before the given exemplar * @return the resultant exemplar after scaling * @exception if given exampler cannot be scaled properly */ private Exemplar scale(Exemplar before) throws Exception { Instances data = before.getInstances(); Exemplar after = new Exemplar(before, 0); for (int i = 0; i < data.numInstances(); i++) { Instance datum = data.instance(i); Instance inst = (Instance) datum.copy(); int k = 0; for (int j = 0; j < data.numAttributes(); j++) { if ((j != before.idIndex()) && (j != before.classIndex())) { if (data.attribute(j).isNumeric()) inst.setValue(j, (datum.value(j) - m_MinArray[k]) / (m_MaxArray[k] - m_MinArray[k])); k++; } } after.add(inst); } return after; }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplars transform(Exemplars ex) throws Exception { // Throw all the instances together Instances data = new Instances(ex.exemplar(0).getInstances()); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i);//from w w w .ja va 2 s.co m double weight = 1.0 / (double) curr.getInstances().numInstances(); for (int j = 0; j < curr.getInstances().numInstances(); j++) { Instance inst = (Instance) curr.getInstances().instance(j).copy(); inst.setWeight(weight); data.add(inst); } } double factor = (double) data.numInstances() / (double) data.sumOfWeights(); for (int i = 0; i < data.numInstances(); i++) { data.instance(i).setWeight(data.instance(i).weight() * factor); } SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(m_num_clusters); MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer(); clust.setClusterer(kMeans); m_clm.setDensityBasedClusterer(clust); m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1)); m_clm.setInputFormat(data); // Use filter and discard result Instances tempData = Filter.useFilter(data, m_clm); tempData = new Instances(tempData, 0); tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); // Go through exemplars and add them to new dataset Exemplars newExs = new Exemplars(tempData); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i); Instances temp = Filter.useFilter(curr.getInstances(), m_clm); temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, curr.idValue()); } newExs.add(new Exemplar(temp)); } //System.err.println("Finished transforming"); //System.err.println(newExs); return newExs; }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplar transform(Exemplar test) throws Exception { Instances temp = Filter.useFilter(test.getInstances(), m_clm); temp.insertAttributeAt(test.getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, test.idValue()); //System.err.println(temp.instance(j)); }// w w w . jav a 2 s . com return new Exemplar(temp); }