List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:cerebro.Id3.java
License:Open Source License
/** * Method for building an Id3 tree./*from www . jav a 2 s. c o m*/ * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:cezeri.feature.selection.FeatureSelectionInfluence.java
public static Influence[] getMostDiscriminativeFeature(String filePath, Classifier model) { Influence[] ret = null;/*from w w w.j a va2 s. c o m*/ try { Instances data = DataSource.read(filePath); ret = new Influence[data.numAttributes() - 1]; data.setClassIndex(data.numAttributes() - 1); // other options int seed = 1; int folds = 10; // randomize data Instances randData = new Instances(data); Random rand = new Random(seed); randData.randomize(rand); Evaluation evalBase = getEvaluation(randData, model, folds); double accBase = evalBase.correct() / evalBase.numInstances() * 100; double nf = randData.numAttributes(); for (int j = 0; j < nf - 1; j++) { ret[j] = new Influence(); String str = randData.attribute(j).name(); Attribute att = randData.attribute(j); randData.deleteAttributeAt(j); Evaluation evalTemp = getEvaluation(randData, model, folds); double accTemp = evalTemp.correct() / evalTemp.numInstances() * 100; double tempInfluence = accBase - accTemp; ret[j].attributeName = str; ret[j].infVal = tempInfluence; randData.insertAttributeAt(att, j); } sortInfluenceArray(ret); } catch (Exception ex) { Logger.getLogger(FeatureSelectionInfluence.class.getName()).log(Level.SEVERE, null, ex); } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static String[] getOriginalClasses(Instances data) { Attribute att = data.attribute(data.classIndex()); String[] ret = new String[data.numClasses()]; Enumeration enu = att.enumerateValues(); int q = 0;/* w ww . ja v a 2 s . c o m*/ while (enu.hasMoreElements()) { ret[q++] = (String) enu.nextElement(); } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static String[] getAttributeList(Instances data) { int n = data.numAttributes(); String[] ret = new String[n]; for (int i = 0; i < n; i++) { ret[i] = data.attribute(i).name(); }// w w w .j a va2s . c om return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static String[] getAttributeListExceptClassAttribute(Instances data) { int n = data.numAttributes(); String[] ret = new String[n - 1]; String classAtt = data.classAttribute().name(); int k = 0;//from www . j a va 2 s .c o m for (int i = 0; i < n; i++) { if (!classAtt.equals(data.attribute(i).name())) { ret[k++] = data.attribute(i).name(); } } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static Instances getSubsetData(Instances data, String[] attList) { Instances temp = new Instances(data); for (int i = 0; i < data.numAttributes(); i++) { if (!temp.attribute(0).equals(temp.classAttribute())) { temp.deleteAttributeAt(0);// w w w. j av a 2 s . c o m } } double[][] m = new double[attList.length + 1][data.numInstances()]; for (int i = 0; i < attList.length; i++) { int n = attList.length - 1 - i; String str = attList[n]; Attribute t = data.attribute(str); double[] d = data.attributeToDoubleArray(t.index()); m[n] = d; temp.insertAttributeAt(t, 0); } m[attList.length] = data.attributeToDoubleArray(data.classIndex()); m = CMatrix.getInstance(m).transpose().get2DArrayDouble(); FastVector att = new FastVector(); for (int i = 0; i < temp.numAttributes(); i++) { att.addElement(temp.attribute(i)); } Instances ret = new Instances(temp.relationName(), att, m.length); for (int i = 0; i < m.length; i++) { Instance ins = new Instance(m[0].length); for (int j = 0; j < m[0].length; j++) { ins.setValue(j, m[i][j]); } ret.add(ins); } ret.setClassIndex(temp.classIndex()); return ret; }
From source file:ChiSquare.ChiSquaredAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully/*from www .j av a 2s .com*/ */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false); } } }
From source file:clases.GestorFichero.java
public static void asignarClase(Instances data) { for (int i = 0; i < data.numAttributes(); i++) { System.out.println(data.attribute(i).name()); if (data.attribute(i).name().equals(" class")) { data.setClassIndex(i);/*from ww w. j av a 2s. c om*/ break; } } }
From source file:clases.Preproceso.java
public static Instances filterRemoveAttribute(Instances data, List<String> indices) { String ind = ""; boolean a = false; for (String atrib : indices) { for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).name().equals(atrib)) { /* if(!a) ind=""+i;// ww w. j a v a 2 s. c o m else*/ ind = ind + "," + i; break; } } } Remove rm = new Remove(); rm.setAttributeIndices(ind.substring(1, ind.length())); try { rm.setInputFormat(data); return Filter.useFilter(data, rm); } catch (Exception ex) { Logger.getLogger(Preproceso.class.getName()).log(Level.SEVERE, null, ex); return null; } }
From source file:classification.classifiers.LDA.java
License:Open Source License
/** * Modification on Dr. Wolfgang Lenhard's code. * This was necessary because this classifier had to implements * "buildClassifier" and "classifyInstance" to be like a classifier of WEKA(R). * /*from w ww .ja v a 2s. co m*/ * @param data * @throws Exception */ public void buildClassifier(Instances data) throws Exception { int n = data.numInstances(); int a = data.numAttributes(); int k = data.numClasses(); int[] g = new int[n]; double[][] d = new double[n][a]; for (int i = 0; i < n; i++) { double[] d_i = data.instance(i).toDoubleArray(); d[i] = d_i; /** * To print the attribute with the correspondent double * * System.out.print("\n"); for(int j=0; j<a; j++){ * System.out.print(data.instance(i).stringValue(data.attribute(j)) * + " = "); * System.out.print(data.instance(i).value(data.attribute(j)) + * "; "); } System.out.print("\n"); / **/ } // Gives the number of objects belonging to class i in the trainingSet. int classIndex = a - 1; valueClass = new double[k]; data.setClassIndex(classIndex); for (int i = 0; i < k; i++) { // Reference class String refClass = data.classAttribute().value(i); // // System.out.println("refClass: " + refClass + " "); for (int j = 0; j < n; j++) { // Object class String objectClass = data.instance(j).stringValue(classIndex); // // System.out.println("objectClass: " + objectClass + " - value: // " + data.instance(j).value(data.attribute(classIndex))); // Building two vectors of classes, one in int format and // another in double format. if (objectClass == refClass) { // Object class as a double valueClass[i] = data.instance(j).value(data.attribute(classIndex)); // Object class as an int g[j] = i; // // System.out.println("value of class (int): " + g[j] + " // ___ value (double): " + valueClass[i]); } } } this.BuildLDA(d, g, true); }