List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:de.uniheidelberg.cl.swp.mlprocess.AblationTesting.java
License:Apache License
/** * Creates an Instance object for the specified List of Features. * <br>/*from w w w .j a v a 2 s . co m*/ * Extracts the Instance objects from a source file and suppresses all features but the ones * specified. * * @param fileName File to the training results in ARFF format. * @param features List of {@link AbstractFeatureExtractor}s which are currently being tested. * @return Instances object consisting of the desired attribute structure. * @throws Exception If the ARFF file couldn't be read, an exception is thrown. */ public Instances createInstances(String fileName, List<AbstractFeatureExtractor> features) throws Exception { final Instances train = new Instances(new BufferedReader(new FileReader(fileName))); ArrayList<Attribute> newAttributes = new ArrayList<Attribute>(); for (int i = 0; i < train.numAttributes(); i++) { for (AbstractFeatureExtractor feature : features) { if (train.attribute(i).name().equals(feature.getName())) { newAttributes.add(train.attribute(i)); continue; } } } /* * add the last two features (ACR-System + correct/false predictions) as those * are no features gathered by a FeatureExtractor. */ newAttributes.add(train.attribute(train.numAttributes() - 2)); newAttributes.add(train.attribute(train.numAttributes() - 1)); Instances trainCopy = copyInstances(train, newAttributes); trainCopy.setClassIndex(trainCopy.numAttributes() - 1); return trainCopy; }
From source file:de.uniheidelberg.cl.swp.mlprocess.WEKARunner.java
License:Apache License
/** * Evaluates our classifier with a test set. * <br>/*w w w. j a va 2 s. c om*/ * Not used yet. * * @param testArff ARFF file to evaluate against. * @throws If the evaluation couldn't be initialized. */ public void buildEvaluation(String testArff) throws Exception { Instances evalIns = new Instances(new BufferedReader(new FileReader(testArff))); evalIns.setClassIndex(evalIns.numAttributes() - 1); evaluation = new Evaluation(train); }
From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options./*from w w w . j a v a 2 s .c o m*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } // Save memory!! m_DistanceFunction.clean(); }
From source file:de.uni_koeln.phil_fak.iv.tm.p4.classification.WekaAdapter.java
License:Open Source License
private Instances initTraininSet(Set<Document> trainingData) { /* Der FastVector enthlt die Merkmale: */ FastVector structureVector = new FastVector(vectorSize + 1); /* Die Klasse wird in Weka auch als Merkmalsvektor dargestellt: */ FastVector classesVector = new FastVector(this.classes.size()); for (String c : classes) { /*//from w ww . j a v a 2 s .c o m * Die Klasse ist nicht numerisch, deshalb muessen alle mglichen * Werte angegeben werden: */ classesVector.addElement(c); } /* An Stelle 0 unseres Gesamtvektors kommt der Klassen-Vektor: */ structureVector.addElement(new Attribute("Ressort", classesVector)); for (int i = 0; i < vectorSize; i++) { /* * An jeder Position unseres Merkmalsvektors haben wir ein * numerisches Merkmal (reprsentiert als Attribute), dessen Name * sein Index ist: */ structureVector.addElement(new Attribute(i + "")); // Merkmal i, // d.h. was? > // TF-IDF } /* * Schliesslich erstellen wir einen Container fr unsere * Trainingsbeispiele, der Instanzen der beschriebenen Merkmale * enthalten wird: */ Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1); /* * Wobei wir noch angeben muessen, an welcher Stelle der * Merkmalsvektoren die Klasse zu finden ist: */ result.setClassIndex(0); return result; }
From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java
License:Open Source License
private Instances initTrainingSet(List<ClassifyUnit> trainingData) { int vectorSize = trainingData.get(0).getFeatureVector().length; Set<Integer> classIDs = new TreeSet<Integer>(); for (ClassifyUnit classifyUnit : trainingData) { ZoneClassifyUnit actual = (ZoneClassifyUnit) classifyUnit; classIDs.add(actual.getActualClassID()); }// w w w .ja va2 s . c o m /* Der Vektor enthlt die numerischen Merkmale (bei uns: tf-idf-Werte) sowie ein Klassenattribut: */ ArrayList<Attribute> structureVector = new ArrayList<Attribute>(vectorSize + 1); /* Auch die Klasse wird in Weka als Vektor dargestellt: */ ArrayList<String> classesVector = new ArrayList<String>(); for (Integer c : classIDs) { /* * Da das Klassen-Attribut nicht numerisch ist (sondern, in Weka-Terminologie, ein nominales bzw. * String-Attribut), mssen hier alle mglichen Attributwerte angegeben werden: */ classesVector.add(c + ""); } /* An Stelle 0 unseres Strukturvektors kommt der Klassen-Vektor: */ structureVector.add(new Attribute("topic", classesVector)); for (int i = 0; i < vectorSize; i++) { /* * An jeder weiteren Position unseres Merkmalsvektors haben wir ein numerisches Merkmal (reprsentiert als * Attribute), dessen Name hier einfach seine Indexposition ist: */ structureVector.add(new Attribute(i + "")); // Merkmal i, d.h. was? > TF-IDF } /* * Schliesslich erstellen wir einen Container, der Instanzen in der hier beschriebenen Struktur enthalten wird * (also unsere Trainingsbeispiele): */ Instances result = new Instances("InstanceStructure", structureVector, vectorSize + 1); /* * Wobei wir hier erneut angeben muessen, an welcher Stelle der Merkmalsvektoren die Klasse zu finden ist: */ result.setClassIndex(0); return result; }
From source file:de.upb.timok.oneclassclassifier.WekaSvmClassifier.java
License:Open Source License
@Override public void train(List<double[]> trainingSamples) { Instances data = DatasetTransformationUtils.trainingSetToInstances(trainingSamples); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well try {/*from ww w . j a v a 2 s . c o m*/ if (filter != null) { filter.setInputFormat(data); data = Filter.useFilter(data, filter); } if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } wekaSvm.buildClassifier(data); } catch (final Exception e) { logger.error("Unexpected exception", e); } }
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances trainingSetToInstances(List<double[]> trainingSet) { final double[] sample = trainingSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); }//from w ww.j av a 2 s.com final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); // Declare the feature vector fvWekaAttributes.add(ClassAttribute); final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size()); result.setClass(ClassAttribute); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : trainingSet) { final double[] newInstance = Arrays.copyOf(instance, instance.length + 1); newInstance[newInstance.length - 1] = 0; final Instance wekaInstance = new DenseInstance(1, newInstance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances testSetToInstances(List<double[]> testSet) { if (testSet.size() == 0) { logger.warn("TestSet has size 0"); }/* www. j av a 2 s . com*/ final double[] sample = testSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); } final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); fvWekaAttributes.add(ClassAttribute); // Declare the feature vector final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size()); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : testSet) { final Instance wekaInstance = new DenseInstance(1, instance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:DiversifyQuery.DivTopK.java
/** * Load a set of Instances from an ARFF//from w w w . j a va 2 s . co m * * @param fileName the file name of the ARFF * @return a set of Instances from the ARFF */ public static Instances loadData(String fileName) { Instances data = null; try { FileReader r; r = new FileReader(fileName); data = new Instances(r); data.setClassIndex(data.numAttributes() - 1); } catch (Exception e) { System.out.println(" Error =" + e + " in method loadData"); e.printStackTrace(); } return data; }
From source file:DiversifyQuery.DivTopK.java
/** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format * @throws Exception if all required parameters of the filter are not * initialised correctly/* w ww . j av a 2 s .c o m*/ */ protected Instances determineOutputFormat(Instances inputFormat, ArrayList<LegacyShapelet> shapelets) throws Exception { //Set up instances size and format. //int length = this.numShapelets; int length = shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; }