List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:Bilbo.java
License:Open Source License
/** * Returns a training set for a particular iteration. * /*from w ww . j ava2s.co m*/ * @param iteration the number of the iteration for the requested training set. * @return the training set for the supplied iteration number * @throws Exception if something goes wrong when generating a training set. */ @Override protected synchronized Instances getTrainingSet(Instances p_data, int iteration) throws Exception { int bagSize = (int) (p_data.numInstances() * (m_BagSizePercent / 100.0)); Instances bagData = null; Random r = new Random(m_Seed + iteration); // create the in-bag dataset if (m_CalcOutOfBag && p_data.classIndex() != -1) { m_inBag[iteration] = new boolean[p_data.numInstances()]; bagData = p_data.resampleWithWeights(r, m_inBag[iteration], getRepresentCopiesUsingWeights()); } else { bagData = p_data.resampleWithWeights(r, getRepresentCopiesUsingWeights()); if (bagSize < p_data.numInstances()) { bagData.randomize(r); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } return bagData; }
From source file:PrincipalComponents.java
License:Open Source License
/** * Return a summary of the analysis//from w w w . ja v a2 s .c om * * @return a summary of the analysis. */ private String principalComponentsSummary() { StringBuffer result = new StringBuffer(); double cumulative = 0.0; Instances output = null; int numVectors = 0; try { output = setOutputFormat(); numVectors = (output.classIndex() < 0) ? output.numAttributes() : output.numAttributes() - 1; } catch (Exception ex) { } // tomorrow String corrCov = (m_center) ? "Covariance " : "Correlation "; result.append(corrCov + "matrix\n" + matrixToString(Matrices.getArray(m_correlation)) + "\n\n"); result.append("eigenvalue\tproportion\tcumulative\n"); for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { cumulative += m_eigenvalues[m_sortedEigens[i]]; result.append(Utils.doubleToString(m_eigenvalues[m_sortedEigens[i]], 9, 5) + "\t" + Utils.doubleToString((m_eigenvalues[m_sortedEigens[i]] / m_sumOfEigenValues), 9, 5) + "\t" + Utils.doubleToString((cumulative / m_sumOfEigenValues), 9, 5) + "\t" + output.attribute(m_numAttribs - i - 1).name() + "\n"); } result.append("\nEigenvectors\n"); for (int j = 1; j <= numVectors; j++) { result.append(" V" + j + '\t'); } result.append("\n"); for (int j = 0; j < m_numAttribs; j++) { for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { result.append(Utils.doubleToString(m_eigenvectors[j][m_sortedEigens[i]], 7, 4) + "\t"); } result.append(m_trainInstances.attribute(j).name() + '\n'); } if (m_transBackToOriginal) { result.append("\nPC space transformed back to original space.\n" + "(Note: can't evaluate attributes in the original " + "space)\n"); } return result.toString(); }
From source file:TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set./*w w w . j a v a 2 s . co m*/ * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:FlexDMThread.java
License:Open Source License
public void run() { try {//www . jav a2 s .c om //Get the data from the source FlexDM.getMainData.acquire(); Instances data = dataset.getSource().getDataSet(); FlexDM.getMainData.release(); //Set class attribute if undefined if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //Process hyperparameters for classifier String temp = ""; for (int i = 0; i < classifier.getNumParams(); i++) { temp += classifier.getParameter(i).getName(); temp += " "; if (classifier.getParameter(i).getValue() != null) { temp += classifier.getParameter(i).getValue(); temp += " "; } } String[] options = weka.core.Utils.splitOptions(temp); //Print to console- experiment is starting if (temp.equals("")) { //no parameters temp = "results_no_parameters"; try { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with no parameters"); } catch (Exception e) { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with no parameters"); } } else { //parameters try { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with parameters " + temp); } catch (Exception e) { System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with parameters " + temp); } } //Create classifier, setting parameters weka.classifiers.Classifier x = createObject(classifier.getName()); x.setOptions(options); x.buildClassifier(data); //Process the test selection String[] tempTest = dataset.getTest().split("\\s"); //Create evaluation object for training and testing classifiers Evaluation eval = new Evaluation(data); StringBuffer predictions = new StringBuffer(); //Train and evaluate classifier if (tempTest[0].equals("testset")) { //specified test file //Build classifier x.buildClassifier(data); //Open test file, load data //DataSource testFile = new DataSource(dataset.getTest().substring(7).trim()); // Instances testSet = testFile.getDataSet(); FlexDM.getTestData.acquire(); Instances testSet = dataset.getTestFile().getDataSet(); FlexDM.getTestData.release(); //Set class attribute if undefined if (testSet.classIndex() == -1) { testSet.setClassIndex(testSet.numAttributes() - 1); } //Evaluate model Object[] array = { predictions, new Range(), new Boolean(true) }; eval.evaluateModel(x, testSet, array); } else if (tempTest[0].equals("xval")) { //Cross validation //Build classifier x.buildClassifier(data); //Cross validate eval.crossValidateModel(x, data, Integer.parseInt(tempTest[1]), new Random(1), predictions, new Range(), true); } else if (tempTest[0].equals("leavexval")) { //Leave one out cross validation //Build classifier x.buildClassifier(data); //Cross validate eval.crossValidateModel(x, data, data.numInstances() - 1, new Random(1), predictions, new Range(), true); } else if (tempTest[0].equals("percent")) { //Percentage split of single data set //Set training and test sizes from percentage int trainSize = (int) Math.round(data.numInstances() * Double.parseDouble(tempTest[1])); int testSize = data.numInstances() - trainSize; //Load specified data Instances train = new Instances(data, 0, trainSize); Instances testSet = new Instances(data, trainSize, testSize); //Build classifier x.buildClassifier(train); //Train and evaluate model Object[] array = { predictions, new Range(), new Boolean(true) }; eval.evaluateModel(x, testSet, array); } else { //Evaluate on training data //Test and evaluate model Object[] array = { predictions, new Range(), new Boolean(true) }; eval.evaluateModel(x, data, array); } //create datafile for results String filename = dataset.getDir() + "/" + classifier.getDirName() + "/" + temp + ".txt"; PrintWriter writer = new PrintWriter(filename, "UTF-8"); //Print classifier, dataset, parameters info to file try { writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName() + "\n PARAMETERS: " + temp); } catch (Exception e) { writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName() + "\n PARAMETERS: " + temp); } //Add evaluation string to file writer.println(eval.toSummaryString()); //Process result options if (checkResults("stats")) { //Classifier statistics writer.println(eval.toClassDetailsString()); } if (checkResults("model")) { //The model writer.println(x.toString()); } if (checkResults("matrix")) { //Confusion matrix writer.println(eval.toMatrixString()); } if (checkResults("entropy")) { //Entropy statistics //Set options req'd to get the entropy stats String[] opt = new String[4]; opt[0] = "-t"; opt[1] = dataset.getName(); opt[2] = "-k"; opt[3] = "-v"; //Evaluate model String entropy = Evaluation.evaluateModel(x, opt); //Grab the relevant info from the results, print to file entropy = entropy.substring(entropy.indexOf("=== Stratified cross-validation ===") + 35, entropy.indexOf("=== Confusion Matrix ===")); writer.println("=== Entropy Statistics ==="); writer.println(entropy); } if (checkResults("predictions")) { //The models predictions writer.println("=== Predictions ===\n"); if (!dataset.getTest().contains("xval")) { //print header of predictions table if req'd writer.println(" inst# actual predicted error distribution ()"); } writer.println(predictions.toString()); //print predictions to file } writer.close(); //Summary file is semaphore controlled to ensure quality try { //get a permit //grab the summary file, write the classifiers details to it FlexDM.writeFile.acquire(); PrintWriter p = new PrintWriter(new FileWriter(summary, true)); if (temp.equals("results_no_parameters")) { //change output based on parameters temp = temp.substring(8); } //write percent correct, classifier name, dataset name to summary file p.write(dataset.getName() + ", " + classifier.getName() + ", " + temp + ", " + eval.correct() + ", " + eval.incorrect() + ", " + eval.unclassified() + ", " + eval.pctCorrect() + ", " + eval.pctIncorrect() + ", " + eval.pctUnclassified() + ", " + eval.kappa() + ", " + eval.meanAbsoluteError() + ", " + eval.rootMeanSquaredError() + ", " + eval.relativeAbsoluteError() + ", " + eval.rootRelativeSquaredError() + ", " + eval.SFPriorEntropy() + ", " + eval.SFSchemeEntropy() + ", " + eval.SFEntropyGain() + ", " + eval.SFMeanPriorEntropy() + ", " + eval.SFMeanSchemeEntropy() + ", " + eval.SFMeanEntropyGain() + ", " + eval.KBInformation() + ", " + eval.KBMeanInformation() + ", " + eval.KBRelativeInformation() + ", " + eval.weightedTruePositiveRate() + ", " + eval.weightedFalsePositiveRate() + ", " + eval.weightedTrueNegativeRate() + ", " + eval.weightedFalseNegativeRate() + ", " + eval.weightedPrecision() + ", " + eval.weightedRecall() + ", " + eval.weightedFMeasure() + ", " + eval.weightedAreaUnderROC() + "\n"); p.close(); //release semaphore FlexDM.writeFile.release(); } catch (InterruptedException e) { //bad things happened System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName()); } //output we have successfully finished processing classifier if (temp.equals("no_parameters")) { //no parameters try { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with no parameters"); } catch (Exception e) { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with no parameters"); } } else { //with parameters try { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1) + " with parameters " + temp); } catch (Exception e) { System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName() + " with parameters " + temp); } } try { //get a permit //grab the log file, write the classifiers details to it FlexDM.writeLog.acquire(); PrintWriter p = new PrintWriter(new FileWriter(log, true)); Date date = new Date(); Format formatter = new SimpleDateFormat("dd/MM/YYYY HH:mm:ss"); //formatter.format(date) if (temp.equals("results_no_parameters")) { //change output based on parameters temp = temp.substring(8); } //write details to log file p.write(dataset.getName() + ", " + dataset.getTest() + ", \"" + dataset.getResult_string() + "\", " + classifier.getName() + ", " + temp + ", " + formatter.format(date) + "\n"); p.close(); //release semaphore FlexDM.writeLog.release(); } catch (InterruptedException e) { //bad things happened System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName()); } s.release(); } catch (Exception e) { //an error occurred System.err.println("FATAL ERROR OCCURRED: " + e.toString() + "\nClassifier: " + cNum + " - " + classifier.getName() + " on dataset " + dataset.getName()); s.release(); } }
From source file:REPTree.java
License:Open Source License
/** * Builds classifier.//w w w.j ava2 s . com * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:homemadeWEKA.java
public static Instances loadData(String filename) { Instances data = null; try {/* w ww . j a va 2s .c o m*/ data = DataSource.read(filename); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); } catch (Exception ex) { Logger.getLogger(homemadeWEKA.class.getName()).log(Level.SEVERE, null, ex); } return data; }
From source file:RunExhaustiveSearch.java
License:Open Source License
public static void main(String[] arg) throws Exception { // Load data. ///* ww w .java 2 s .com*/ System.out.println("\nLoading sample file..."); DataSource source = new DataSource(arg[0]); Instances data = source.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); int n = Integer.parseInt(arg[1]); System.out.println("Instance with " + n + " features!"); System.out.println("\nRunning ES algorithm with CFS cost function..."); // Run feature selection algorithm ES using CFS cost function. // runAttributeSelection(data, n); }
From source file:REPRandomTree.java
License:Open Source License
/** * Builds classifier.//from w w w. jav a2 s .co m * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:LabeledItemSet.java
License:Open Source License
/** * Splits the class attribute away. Depending on the invert flag, the instances without class attribute or only the class attribute of all instances is returned * @param instances the instances/*from w w w . j ava2s .c om*/ * @param invert flag; if true only the class attribute remains, otherweise the class attribute is the only attribute that is deleted. * @throws Exception exception if instances cannot be splitted * @return Instances without the class attribute or instances with only the class attribute */ public static Instances divide(Instances instances, boolean invert) throws Exception { Instances newInstances = new Instances(instances); if (instances.classIndex() < 0) throw new Exception("For class association rule mining a class attribute has to be specified."); if (invert) { for (int i = 0; i < newInstances.numAttributes(); i++) { if (i != newInstances.classIndex()) { newInstances.deleteAttributeAt(i); i--; } } return newInstances; } else { newInstances.setClassIndex(-1); newInstances.deleteAttributeAt(instances.classIndex()); return newInstances; } }
From source file:CJWeka.java
License:Open Source License
/** * This function sets what the m_numeric flag to represent the passed class * it also performs the normalization of the attributes if applicable * and sets up the info to normalize the class. (note that regardless of * the options it will fill an array with the range and base, set to * normalize all attributes and the class to be between -1 and 1) * @param inst the instances./* ww w .j av a 2 s . com*/ * @return The modified instances. This needs to be done. If the attributes * are normalized then deep copies will be made of all the instances which * will need to be passed back out. */ private Instances setClassType(Instances inst) throws Exception { if (inst != null) { // x bounds double min = Double.POSITIVE_INFINITY; double max = Double.NEGATIVE_INFINITY; double value; m_attributeRanges = new double[inst.numAttributes()]; m_attributeBases = new double[inst.numAttributes()]; for (int noa = 0; noa < inst.numAttributes(); noa++) { min = Double.POSITIVE_INFINITY; max = Double.NEGATIVE_INFINITY; for (int i = 0; i < inst.numInstances(); i++) { if (!inst.instance(i).isMissing(noa)) { value = inst.instance(i).value(noa); if (value < min) { min = value; } if (value > max) { max = value; } } } m_attributeRanges[noa] = (max - min) / 2; m_attributeBases[noa] = (max + min) / 2; if (noa != inst.classIndex() && m_normalizeAttributes) { for (int i = 0; i < inst.numInstances(); i++) { if (m_attributeRanges[noa] != 0) { inst.instance(i).setValue(noa, (inst.instance(i).value(noa) - m_attributeBases[noa]) / m_attributeRanges[noa]); } else { inst.instance(i).setValue(noa, inst.instance(i).value(noa) - m_attributeBases[noa]); } } } } if (inst.classAttribute().isNumeric()) { m_numeric = true; } else { m_numeric = false; } } return inst; }