List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:motaz.CODB.java
License:Open Source License
public String GO(String[] options) throws Exception { Instances train = null; String attributeRangeString;//from w w w . j av a 2 s . co m Range attributesToOutput = null; StringBuffer text = new StringBuffer(); int theClass = -1; // No class attribute assigned to instances train = PostgreSQLlocal.readdata(); theClass = train.numAttributes() - 1; train.setClassIndex(theClass); System.out.println("succesfully read data"); Utils.checkForRemainingOptions(options); buildCODB(train); text.append(toString()); return text.toString(); }
From source file:mulan.classifier.meta.ConstrainedKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer * that are not being set via options./*from w w w. ja v a2 s .c o m*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { for (int i = 0; i < m_NumClusters; i++) { bucket[i] = new ArrayList<bucketInstance>(); } // calculate bucket size bucketSize = (int) Math.ceil(data.numInstances() / (double) m_NumClusters); //System.out.print("bucketSize = " + bucketSize + "\n"); // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); m_Min = new double[instances.numAttributes()]; m_Max = new double[instances.numAttributes()]; for (int i = 0; i < instances.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; for (int i = 0; i < instances.numInstances(); i++) { updateMinMax(instances.instance(i)); } Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; for (int j = instances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(instances.instance(instIndex), instances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(instances.instance(instIndex)); initC.put(hk, null); } instances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; while (!converged) { // reset buckets for (int j = 0; j < m_NumClusters; j++) { bucket[j] = new ArrayList<bucketInstance>(); } emptyClusterCount = 0; m_Iterations++; //System.out.println(">>Iterations: "+m_Iterations); converged = true; for (i = 0; i < instances.numInstances(); i++) { //System.out.println("processing instance: " + i); Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } if (m_Iterations > maxIterations) { converged = true; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { double[] vals = new double[instances.numAttributes()]; if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { for (int j = 0; j < instances.numAttributes(); j++) { vals[j] = tempI[i].meanOrMode(j); m_ClusterNominalCounts[i][j] = tempI[i].attributeStats(j).nominalCounts; } m_ClusterCentroids.add(new DenseInstance(1.0, vals)); } //System.out.println("centroid: " + i + " " + m_ClusterCentroids.instance(i).toString()); } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; tempI = new Instances[m_NumClusters]; } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } // reset buckets for (int j = 0; j < m_NumClusters; j++) { bucket[j] = new ArrayList<bucketInstance>(); } m_ClusterStdDevs = new Instances(instances, m_NumClusters); m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Utils.missingValue(); } } m_ClusterStdDevs.add(new DenseInstance(1.0, vals2)); m_ClusterSizes[i] = tempI[i].numInstances(); } }
From source file:mulan.classifier.meta.SubsetLearner.java
License:Open Source License
/** * We get the initial dataset through trainingSet. Then for each split as specified by splitOrder * we remove the unneeded labels and train the classifiers using a different method for multi-label splits * and single label splits./*from w w w .j a v a2 s. c o m*/ * @param trainingSet The initial {@link MultiLabelInstances} dataset * @throws Exception */ @Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { int countSingle = 0, countMulti = 0; remove = new Remove[splitOrder.length]; //Get values into absoluteIndicesToRemove int numofSplits = splitOrder.length;//Number of sets the main is going to be split into for (int r = 0; r < splitOrder.length; r++) {//Initialization required to avoid NullPointer exception absoluteIndicesToRemove[r] = new int[numLabels - splitOrder[r].length]; } //Initialize an array containing which labels we want boolean[][] Selected = new boolean[splitOrder.length][numLabels]; for (int i = 0; i < numofSplits; i++) {//Set true for the labels we need to keep for (int j = 0; j < splitOrder[i].length; j++) { Selected[i][splitOrder[i][j]] = true; } } for (int i = 0; i < numofSplits; i++) {//Get the labels you need to KEEP int k = 0; for (int j = 0; j < numLabels; j++) { if (Selected[i][j] != true) { absoluteIndicesToRemove[i][k] = labelIndices[j]; k++; } } } //Create the lists which will contain the learners multiLabelLearners = new ArrayList<MultiLabelLearner>(); singleLabelLearners = new ArrayList<FilteredClassifier>(); countSingle = 0;//Reset the values to zero and reuse the variables countMulti = 0; //TODO: Add more comments for the procedure for (int totalSplitNo = 0; totalSplitNo < splitOrder.length; totalSplitNo++) { debug("Building set " + (totalSplitNo + 1) + "/" + splitOrder.length); if (splitOrder[totalSplitNo].length > 1) { //Remove the unneeded labels Instances trainSubset = trainingSet.getDataSet(); remove[totalSplitNo] = new Remove(); remove[totalSplitNo].setAttributeIndicesArray(absoluteIndicesToRemove[totalSplitNo]); remove[totalSplitNo].setInputFormat(trainSubset); remove[totalSplitNo].setInvertSelection(false); trainSubset = Filter.useFilter(trainSubset, remove[totalSplitNo]); //Reintegrate dataset and train learner multiLabelLearners.add(baseMultiLabelLearner.makeCopy()); multiLabelLearners.get(countMulti).build(trainingSet.reintegrateModifiedDataSet(trainSubset)); countMulti++; } else { debug("Single Label model."); //Initialize the FilteredClassifiers singleLabelLearners.add(new FilteredClassifier()); singleLabelLearners.get(countSingle).setClassifier(AbstractClassifier.makeCopy(baseClassifier)); Instances trainSubset = trainingSet.getDataSet(); //Set the remove filter for the FilteredClassifiers remove[totalSplitNo] = new Remove(); remove[totalSplitNo].setAttributeIndicesArray(absoluteIndicesToRemove[totalSplitNo]); remove[totalSplitNo].setInputFormat(trainSubset); remove[totalSplitNo].setInvertSelection(false); singleLabelLearners.get(countSingle).setFilter(remove[totalSplitNo]); //Set the remaining label as the class index trainSubset.setClassIndex(labelIndices[splitOrder[totalSplitNo][0]]); //Train singleLabelLearners.get(countSingle).buildClassifier(trainSubset); countSingle++; } } }
From source file:mulan.classifier.transformation.BinaryRelevance.java
License:Open Source License
protected void buildInternal(MultiLabelInstances train) throws Exception { numLabels = train.getNumLabels();//from w w w . j av a 2 s . c o m ensemble = new FilteredClassifier[numLabels]; Instances trainingData = train.getDataSet(); for (int i = 0; i < numLabels; i++) { ensemble[i] = new FilteredClassifier(); ensemble[i].setClassifier(AbstractClassifier.makeCopy(baseClassifier)); // Indices of attributes to remove int[] indicesToRemove = new int[numLabels - 1]; int counter2 = 0; for (int counter1 = 0; counter1 < numLabels; counter1++) { if (labelIndices[counter1] != labelIndices[i]) { indicesToRemove[counter2] = labelIndices[counter1]; counter2++; } } Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInputFormat(trainingData); remove.setInvertSelection(false); ensemble[i].setFilter(remove); trainingData.setClassIndex(labelIndices[i]); // debug("Bulding model " + (i + 1) + "/" + numLabels); System.out.println("Bulding model " + (i + 1) + "/" + numLabels); ensemble[i].buildClassifier(trainingData); } }
From source file:mulan.classifier.transformation.CalibratedLabelRanking.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // Virtual label models debug("Building calibration label models"); System.out.println("Building calibration label models"); virtualLabelModels = new BinaryRelevance(getBaseClassifier()); virtualLabelModels.setDebug(getDebug()); virtualLabelModels.build(trainingSet); // One-vs-one models numModels = ((numLabels) * (numLabels - 1)) / 2; oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels); nodata = new boolean[numModels]; metaDataTest = new Instances[numModels]; Instances trainingData = trainingSet.getDataSet(); int counter = 0; // Creation of one-vs-one models for (int label1 = 0; label1 < numLabels - 1; label1++) { // Attribute of label 1 Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]); for (int label2 = label1 + 1; label2 < numLabels; label2++) { debug("Building one-vs-one model " + (counter + 1) + "/" + numModels); System.out.println("Building one-vs-one model " + (counter + 1) + "/" + numModels); // Attribute of label 2 Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]); // initialize training set Instances dataOneVsOne = new Instances(trainingData, 0); // filter out examples with no preference for (int i = 0; i < trainingData.numInstances(); i++) { Instance tempInstance;/*from www. j a va 2 s . co m*/ if (trainingData.instance(i) instanceof SparseInstance) { tempInstance = new SparseInstance(trainingData.instance(i)); } else { tempInstance = new DenseInstance(trainingData.instance(i)); } int nominalValueIndex; nominalValueIndex = (int) tempInstance.value(labelIndices[label1]); String value1 = attrLabel1.value(nominalValueIndex); nominalValueIndex = (int) tempInstance.value(labelIndices[label2]); String value2 = attrLabel2.value(nominalValueIndex); if (!value1.equals(value2)) { tempInstance.setValue(attrLabel1, value1); dataOneVsOne.add(tempInstance); } } // remove all labels apart from label1 and place it at the end Reorder filter = new Reorder(); int numPredictors = trainingData.numAttributes() - numLabels; int[] reorderedIndices = new int[numPredictors + 1]; for (int i = 0; i < numPredictors; i++) { reorderedIndices[i] = featureIndices[i]; } reorderedIndices[numPredictors] = labelIndices[label1]; filter.setAttributeIndicesArray(reorderedIndices); filter.setInputFormat(dataOneVsOne); dataOneVsOne = Filter.useFilter(dataOneVsOne, filter); //System.out.println(dataOneVsOne.toString()); dataOneVsOne.setClassIndex(numPredictors); // build model label1 vs label2 if (dataOneVsOne.size() > 0) { oneVsOneModels[counter].buildClassifier(dataOneVsOne); } else { nodata[counter] = true; } dataOneVsOne.delete(); metaDataTest[counter] = dataOneVsOne; counter++; } } }
From source file:mulan.classifier.transformation.ClassifierChain.java
License:Open Source License
protected void buildInternal(MultiLabelInstances train) throws Exception { if (chain == null) { chain = new int[numLabels]; for (int i = 0; i < numLabels; i++) { chain[i] = i;/*from ww w .ja v a 2 s .c o m*/ } } Instances trainDataset; numLabels = train.getNumLabels(); ensemble = new FilteredClassifier[numLabels]; trainDataset = train.getDataSet(); for (int i = 0; i < numLabels; i++) { ensemble[i] = new FilteredClassifier(); ensemble[i].setClassifier(AbstractClassifier.makeCopy(baseClassifier)); // Indices of attributes to remove first removes numLabels attributes // the numLabels - 1 attributes and so on. // The loop starts from the last attribute. int[] indicesToRemove = new int[numLabels - 1 - i]; int counter2 = 0; for (int counter1 = 0; counter1 < numLabels - i - 1; counter1++) { indicesToRemove[counter1] = labelIndices[chain[numLabels - 1 - counter2]]; counter2++; } Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInputFormat(trainDataset); remove.setInvertSelection(false); ensemble[i].setFilter(remove); trainDataset.setClassIndex(labelIndices[chain[i]]); debug("Bulding model " + (i + 1) + "/" + numLabels); //=============================================================== System.out.println("Bulding model " + (i + 1) + "/" + numLabels); //=============================================================== ensemble[i].buildClassifier(trainDataset); } }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Initializes all the parameters used in the meta-level. * Calculates the correlated labels if meta-level pruning is applied. * * @param dataSet// w ww . j av a2 s .co m * @param metaClassifier * @param includeAttrs * @param metaPercentage * @param eval * @throws Exception */ public void initializeMetaLevel(MultiLabelInstances dataSet, Classifier metaClassifier, boolean includeAttrs, double metaPercentage, ASEvaluation eval) throws Exception { this.metaClassifier = metaClassifier; metaLevelEnsemble = AbstractClassifier.makeCopies(metaClassifier, numLabels); metaLevelData = new Instances[numLabels]; metaLevelFilteredEnsemble = new FilteredClassifier[numLabels]; this.includeAttrs = includeAttrs; // calculate the number of correlated labels that corresponds to the // given percentage topkCorrelated = (int) Math.floor(metaPercentage * numLabels); if (topkCorrelated < 1) { debug("Too small percentage, selecting k=1"); topkCorrelated = 1; } if (topkCorrelated < numLabels) {// pruning should be applied selectedAttributes = new int[numLabels][]; if (eval == null) {// calculate the PhiCoefficient Statistics phi = new Statistics(); phi.calculatePhi(dataSet); for (int i = 0; i < numLabels; i++) { selectedAttributes[i] = phi.topPhiCorrelatedLabels(i, topkCorrelated); } } else {// apply feature selection AttributeSelection attsel = new AttributeSelection(); Ranker rankingMethod = new Ranker(); rankingMethod.setNumToSelect(topkCorrelated); attsel.setEvaluator(eval); attsel.setSearch(rankingMethod); // create a dataset consisting of all the classes of each // instance plus the class we want to select attributes from for (int i = 0; i < numLabels; i++) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (int j = 0; j < numLabels; j++) { attributes.add(train.attribute(labelIndices[j])); } attributes.add(train.attribute(labelIndices[i]).copy("meta")); Instances iporesult = new Instances("Meta format", attributes, 0); iporesult.setClassIndex(numLabels); for (int k = 0; k < train.numInstances(); k++) { double[] values = new double[numLabels + 1]; for (int m = 0; m < numLabels; m++) { values[m] = Double.parseDouble(train.attribute(labelIndices[m]) .value((int) train.instance(k).value(labelIndices[m]))); } values[numLabels] = Double.parseDouble(train.attribute(labelIndices[i]) .value((int) train.instance(k).value(labelIndices[i]))); Instance metaInstance = DataUtils.createInstance(train.instance(k), 1, values); metaInstance.setDataset(iporesult); iporesult.add(metaInstance); } attsel.SelectAttributes(iporesult); selectedAttributes[i] = attsel.selectedAttributes(); iporesult.delete(); } } } }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Attaches an index attribute at the beginning of each instance * * @param original/*from w ww. j a v a 2 s. c o m*/ * @return */ protected Instances attachIndexes(Instances original) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(original.numAttributes() + 1); for (int i = 0; i < original.numAttributes(); i++) { attributes.add(original.attribute(i)); } // Add attribute for holding the index at the beginning. attributes.add(0, new Attribute("Index")); Instances transformed = new Instances("Meta format", attributes, 0); for (int i = 0; i < original.numInstances(); i++) { Instance newInstance; newInstance = (Instance) original.instance(i).copy(); newInstance.setDataset(null); newInstance.insertAttributeAt(0); newInstance.setValue(0, i); transformed.add(newInstance); } transformed.setClassIndex(original.classIndex() + 1); return transformed; }
From source file:mulan.classifier.transformation.TwoStageClassifierChainArchitecture.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // Virtual label models debug("Building calibration label models"); virtualLabelModels = new BinaryRelevance(getBaseClassifier()); virtualLabelModels.setDebug(getDebug()); virtualLabelModels.build(trainingSet); //Generate the chain: Test the same dataset MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet); labelIndices = tempTrainingSet.getLabelIndices(); featureIndices = tempTrainingSet.getFeatureIndices(); // One-vs-one models numModels = ((numLabels) * (numLabels - 1)) / 2; oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels); nodata = new boolean[numModels]; metaDataTest = new Instances[numModels]; Instances trainingData = tempTrainingSet.getDataSet(); int counter = 0; // Creation of one-vs-one models for (int label1 = 0; label1 < numLabels - 1; label1++) { // Attribute of label 1 Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]); for (int label2 = label1 + 1; label2 < numLabels; label2++) { debug("Building one-vs-one model " + (counter + 1) + "/" + numModels); // Attribute of label 2 Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]); // initialize training set Instances dataOneVsOne = new Instances(trainingData, 0); // filter out examples with no preference for (int i = 0; i < trainingData.numInstances(); i++) { Instance tempInstance;/*ww w. ja v a2s .c o m*/ if (trainingData.instance(i) instanceof SparseInstance) { tempInstance = new SparseInstance(trainingData.instance(i)); } else { tempInstance = new DenseInstance(trainingData.instance(i)); } int nominalValueIndex; nominalValueIndex = (int) tempInstance.value(labelIndices[label1]); String value1 = attrLabel1.value(nominalValueIndex); nominalValueIndex = (int) tempInstance.value(labelIndices[label2]); String value2 = attrLabel2.value(nominalValueIndex); if (!value1.equals(value2)) { tempInstance.setValue(attrLabel1, value1); dataOneVsOne.add(tempInstance); } } // remove all labels apart from label1 and place it at the end Reorder filter = new Reorder(); int numPredictors = trainingData.numAttributes() - numLabels; int[] reorderedIndices = new int[numPredictors + 1]; System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors); reorderedIndices[numPredictors] = labelIndices[label1]; filter.setAttributeIndicesArray(reorderedIndices); filter.setInputFormat(dataOneVsOne); dataOneVsOne = Filter.useFilter(dataOneVsOne, filter); //System.out.println(dataOneVsOne.toString()); dataOneVsOne.setClassIndex(numPredictors); // build model label1 vs label2 if (dataOneVsOne.size() > 0) { oneVsOneModels[counter].buildClassifier(dataOneVsOne); } else { nodata[counter] = true; } dataOneVsOne.delete(); metaDataTest[counter] = dataOneVsOne; counter++; } } }
From source file:mulan.classifier.transformation.TwoStagePrunedClassifierChainArchitecture.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // Virtual label models debug("Building calibration label models"); virtualLabelModels = new BinaryRelevance(getBaseClassifier()); virtualLabelModels.setDebug(getDebug()); virtualLabelModels.build(trainingSet); // One-vs-one models numModels = ((numLabels) * (numLabels - 1)) / 2; oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels); nodata = new boolean[numModels]; metaDataTest = new Instances[numModels]; ArrayList<MultiLabelOutput> predictions; predictions = predictLabels(trainingSet); int counter = 0; // Creation of one-vs-one models for (int label1 = 0; label1 < numLabels - 1; label1++) { for (int label2 = label1 + 1; label2 < numLabels; label2++) { //Generate the chain: Test the same dataset MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet, label1, label2, predictions); Instances trainingData = tempTrainingSet.getDataSet(); labelIndices = tempTrainingSet.getLabelIndices(); featureIndices = tempTrainingSet.getFeatureIndices(); // Attribute of label 1 Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]); debug("Building one-vs-one model " + (counter + 1) + "/" + numModels); // Attribute of label 2 Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]); // initialize training set Instances dataOneVsOne = new Instances(trainingData, 0); // filter out examples with no preference for (int i = 0; i < trainingData.numInstances(); i++) { Instance tempInstance;/* w ww . j av a2 s.com*/ if (trainingData.instance(i) instanceof SparseInstance) { tempInstance = new SparseInstance(trainingData.instance(i)); } else { tempInstance = new DenseInstance(trainingData.instance(i)); } int nominalValueIndex; nominalValueIndex = (int) tempInstance.value(labelIndices[label1]); String value1 = attrLabel1.value(nominalValueIndex); nominalValueIndex = (int) tempInstance.value(labelIndices[label2]); String value2 = attrLabel2.value(nominalValueIndex); if (!value1.equals(value2)) { tempInstance.setValue(attrLabel1, value1); dataOneVsOne.add(tempInstance); } } // remove all labels apart from label1 and place it at the end Reorder filter = new Reorder(); int numPredictors = trainingData.numAttributes() - numLabels; int[] reorderedIndices = new int[numPredictors + 1]; System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors); reorderedIndices[numPredictors] = labelIndices[label1]; filter.setAttributeIndicesArray(reorderedIndices); filter.setInputFormat(dataOneVsOne); dataOneVsOne = Filter.useFilter(dataOneVsOne, filter); //System.out.println(dataOneVsOne.toString()); dataOneVsOne.setClassIndex(numPredictors); // build model label1 vs label2 if (dataOneVsOne.size() > 0) { oneVsOneModels[counter].buildClassifier(dataOneVsOne); } else { nodata[counter] = true; } dataOneVsOne.delete(); metaDataTest[counter] = dataOneVsOne; counter++; } } }