List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java
License:Open Source License
/** * builds the classifier/*w w w . j a va2 s . com*/ * * @param data the training instances * @throws Exception if something goes wrong */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // save original header (needed for clusters to classes output) m_OriginalHeader = data.stringFreeStructure(); // remove class attribute for clusterer Instances clusterData = new Instances(data); clusterData.setClassIndex(-1); clusterData.deleteAttributeAt(data.classIndex()); m_ClusteringHeader = clusterData.stringFreeStructure(); if (m_ClusteringHeader.numAttributes() == 0) { System.err.println("Data contains only class attribute, defaulting to ZeroR model."); m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(data); } else { m_ZeroR = null; // build clusterer m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer); m_ActualClusterer.buildClusterer(clusterData); if (!getLabelAllClusters()) { // determine classes-to-clusters mapping ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(m_ActualClusterer); eval.evaluateClusterer(clusterData); double[] clusterAssignments = eval.getClusterAssignments(); int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()]; int[] clusterTotals = new int[eval.getNumClusters()]; double[] best = new double[eval.getNumClusters() + 1]; double[] current = new double[eval.getNumClusters() + 1]; for (int i = 0; i < data.numInstances(); i++) { Instance instance = data.instance(i); if (!instance.classIsMissing()) { counts[(int) clusterAssignments[i]][(int) instance.classValue()]++; clusterTotals[(int) clusterAssignments[i]]++; } } best[eval.getNumClusters()] = Double.MAX_VALUE; ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0); m_ClustersToClasses = new double[best.length]; System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length); } else { m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance clusterInstance = clusterData.instance(i); Instance originalInstance = data.instance(i); if (!originalInstance.classIsMissing()) { double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance); for (int j = 0; j < probs.length; j++) { m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j]; } } } for (int i = 0; i < m_ClusterClassProbs.length; i++) { Utils.normalize(m_ClusterClassProbs[i]); } } } }
From source file:com.tum.classifiertest.DataCache.java
License:Open Source License
/** * Creates a DataCache by copying data from a weka.core.Instances object. *//*from w w w .j a v a 2s . c o m*/ public DataCache(Instances origData) throws Exception { classIndex = origData.classIndex(); numAttributes = origData.numAttributes(); numClasses = origData.numClasses(); numInstances = origData.numInstances(); attNumVals = new int[origData.numAttributes()]; for (int i = 0; i < attNumVals.length; i++) { if (origData.attribute(i).isNumeric()) { attNumVals[i] = 0; } else if (origData.attribute(i).isNominal()) { attNumVals[i] = origData.attribute(i).numValues(); } else throw new Exception("Only numeric and nominal attributes are supported."); } /* Array is indexed by attribute first, to speed access in RF splitting. */ vals = new float[numAttributes][numInstances]; for (int a = 0; a < numAttributes; a++) { for (int i = 0; i < numInstances; i++) { if (origData.instance(i).isMissing(a)) vals[a][i] = Float.MAX_VALUE; // to make sure missing values go to the end else vals[a][i] = (float) origData.instance(i).value(a); // deep copy } } instWeights = new double[numInstances]; instClassValues = new int[numInstances]; for (int i = 0; i < numInstances; i++) { instWeights[i] = origData.instance(i).weight(); instClassValues[i] = (int) origData.instance(i).classValue(); } /* compute the sortedInstances for the whole dataset */ sortedIndices = new int[numAttributes][]; for (int a = 0; a < numAttributes; a++) { // ================= attr by attr if (a == classIndex) continue; if (attNumVals[a] > 0) { // ------------------------------------- nominal // Handling nominal attributes: as of FastRF 0.99, they're sorted as well // missing values are coded as Float.MAX_VALUE and go to the end sortedIndices[a] = new int[numInstances]; //int count = 0; sortedIndices[a] = FastRfUtils.sort(vals[a]); /*for (int i = 0; i < numInstances; i++) { if ( !this.isValueMissing(a, i) ) { sortedIndices[a][count] = i; count++; } } for (int i = 0; i < numInstances; i++) { if ( this.isValueMissing(a, i) ) { sortedIndices[a][count] = i; count++; } }*/ } else { // ----------------------------------------------------- numeric // Sorted indices are computed for numeric attributes // missing values are coded as Float.MAX_VALUE and go to the end sortedIndices[a] = FastRfUtils.sort(vals[a]); } // ---------------------------------------------------------- attr kind } // ========================================================= attr by attr // System.out.println(" Done."); }
From source file:com.tum.classifiertest.FastRfBagging.java
License:Open Source License
/** * Compute the out-of-bag error for a set of instances. * * @param data the instances//ww w.j av a 2s. co m * @param inBag numTrees x numInstances indicating out-of-bag instances * @param threadPool the pool of threads * * @return the oob error */ private double computeOOBError(Instances data, boolean[][] inBag, ExecutorService threadPool) throws InterruptedException, ExecutionException { boolean numeric = data.classAttribute().isNumeric(); List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances()); for (int i = 0; i < data.numInstances(); i++) { VotesCollector aCollector = new VotesCollector(m_Classifiers, i, data, inBag); votes.add(threadPool.submit(aCollector)); } double outOfBagCount = 0.0; double errorSum = 0.0; for (int i = 0; i < data.numInstances(); i++) { double vote = votes.get(i).get(); // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } return errorSum / outOfBagCount; }
From source file:com.tum.classifiertest.FastRfUtils.java
License:Open Source License
/** * Produces a random permutation of the values of an attribute in a dataset using Knuth shuffle. * <p/>/*from w w w. jav a 2 s . c om*/ * Copies back the current values of the previously scrambled attribute and uses the given permutation * to scramble the values of the new attribute all by copying from the original dataset. * * @param src the source dataset * @param dst the scrambled dataset * @param attIndex the attribute index * @param perm the random permutation * * @return fluent */ public static Instances scramble(Instances src, Instances dst, final int attIndex, int[] perm) { for (int i = 0; i < src.numInstances(); i++) { Instance scrambled = dst.instance(i); if (attIndex > 0) scrambled.setValue(attIndex - 1, src.instance(i).value(attIndex - 1)); scrambled.setValue(attIndex, src.instance(perm[i]).value(attIndex)); } return dst; }
From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java
License:Apache License
/** * Samoa instances from weka instances.// w ww. j av a 2s.c o m * * @param instances the instances * @return the instances */ public Instances samoaInstances(weka.core.Instances instances) { Instances samoaInstances = samoaInstancesInformation(instances); //We assume that we have only one samoaInstanceInformation for WekaToSamoaInstanceConverter this.samoaInstanceInformation = samoaInstances; for (int i = 0; i < instances.numInstances(); i++) { samoaInstances.add(samoaInstance(instances.instance(i))); } return samoaInstances; }
From source file:com.yimei.core.Discretizer.java
public static Instances discretize(Instances data) { Instances discData = new Instances(data); for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNumeric()) { double max = Double.MIN_VALUE; double min = Double.MAX_VALUE; for (int j = 0; j < data.size(); j++) { double value = data.instance(j).value(i); if (value > max) { max = value;/*from ww w. j a va 2 s .co m*/ } if (value < min) { min = value; } } double interval = (max - min) / numOfIntervals; for (int j = 0; j < data.size(); j++) { long discValue = Math.round((data.instance(j).value(i) - min) / interval); discData.instance(j).setValue(i, discValue); } } } return discData; }
From source file:com.zooclassifier.Model.FileLoader.java
public FileLoader(String filename) throws FileNotFoundException, IOException { BufferedReader reader = new BufferedReader(new FileReader(filename)); ArffLoader.ArffReader arff = new ArffLoader.ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(data.numAttributes() - 1); attributes = new String[data.numInstances()][data.numAttributes() - 1]; labels = new String[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { Instance instance = data.instance(i); for (int j = 0; j < instance.numAttributes() - 1; j++) { attributes[i][j] = instance.stringValue(j); }/*from ww w .ja v a 2 s. c om*/ labels[i] = instance.stringValue(instance.numAttributes() - 1); } attributesLegalValues = new String[data.numAttributes() - 1][]; for (int i = 0; i < data.numAttributes() - 1; i++) { attributesLegalValues[i] = (String[]) Collections.list(data.attribute(i).enumerateValues()) .toArray(new String[data.attribute(i).numValues()]); } labelsLegalValues = (String[]) Collections.list(data.attribute(data.numAttributes() - 1).enumerateValues()) .toArray(new String[data.attribute(data.numAttributes() - 1).numValues()]); }
From source file:control.CosineDistance.java
License:Open Source License
/** * Returns the index of the closest point to the current instance. * Index is index in Instances object that is the second parameter. * * @param instance the instance to assign a cluster to * @param allPoints all points/*from w w w . java 2 s.c o m*/ * @param pointList the list of points * @return the index of the closest point * @throws Exception if something goes wrong */ public int closestPoint(Instance instance, Instances allPoints, int[] pointList) throws Exception { double minDist = Integer.MAX_VALUE; int bestPoint = 0; for (int i = 0; i < pointList.length; i++) { double dist = distance(instance, allPoints.instance(pointList[i]), Double.POSITIVE_INFINITY); if (dist < minDist) { minDist = dist; bestPoint = i; } } return pointList[bestPoint]; }
From source file:core.classifier.MyFirstClassifier.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems./*from ww w . ja v a 2 s . c o m*/ * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:core.ClusterEvaluationEX.java
License:Open Source License
public Instances DeleteNoise(Instances data) { noise = data.stringFreeStructure();/* ww w . j a v a 2s . c om*/ for (int i = 0; i < data.numInstances(); i++) { if (data.instance(i).value(1) == -1) { noise.add(data.instance(i)); data.delete(i); i--; } } return data; }