List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:mulan.classifier.neural.NormalizationFilter.java
License:Open Source License
private void Initialize(MultiLabelInstances mlData) { Instances dataSet = mlData.getDataSet(); int[] featureIndices = mlData.getFeatureIndices(); for (int attIndex : featureIndices) { Attribute feature = dataSet.attribute(attIndex); if (feature.isNumeric()) { Stats stats = dataSet.attributeStats(attIndex).numericStats; attStats.put(attIndex, new double[] { stats.min, stats.max }); }//from ww w.j av a2 s . c o m } }
From source file:mulan.classifier.transformation.CalibratedLabelRanking.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // Virtual label models debug("Building calibration label models"); System.out.println("Building calibration label models"); virtualLabelModels = new BinaryRelevance(getBaseClassifier()); virtualLabelModels.setDebug(getDebug()); virtualLabelModels.build(trainingSet); // One-vs-one models numModels = ((numLabels) * (numLabels - 1)) / 2; oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels); nodata = new boolean[numModels]; metaDataTest = new Instances[numModels]; Instances trainingData = trainingSet.getDataSet(); int counter = 0; // Creation of one-vs-one models for (int label1 = 0; label1 < numLabels - 1; label1++) { // Attribute of label 1 Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]); for (int label2 = label1 + 1; label2 < numLabels; label2++) { debug("Building one-vs-one model " + (counter + 1) + "/" + numModels); System.out.println("Building one-vs-one model " + (counter + 1) + "/" + numModels); // Attribute of label 2 Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]); // initialize training set Instances dataOneVsOne = new Instances(trainingData, 0); // filter out examples with no preference for (int i = 0; i < trainingData.numInstances(); i++) { Instance tempInstance;//from ww w .ja v a 2 s . c o m if (trainingData.instance(i) instanceof SparseInstance) { tempInstance = new SparseInstance(trainingData.instance(i)); } else { tempInstance = new DenseInstance(trainingData.instance(i)); } int nominalValueIndex; nominalValueIndex = (int) tempInstance.value(labelIndices[label1]); String value1 = attrLabel1.value(nominalValueIndex); nominalValueIndex = (int) tempInstance.value(labelIndices[label2]); String value2 = attrLabel2.value(nominalValueIndex); if (!value1.equals(value2)) { tempInstance.setValue(attrLabel1, value1); dataOneVsOne.add(tempInstance); } } // remove all labels apart from label1 and place it at the end Reorder filter = new Reorder(); int numPredictors = trainingData.numAttributes() - numLabels; int[] reorderedIndices = new int[numPredictors + 1]; for (int i = 0; i < numPredictors; i++) { reorderedIndices[i] = featureIndices[i]; } reorderedIndices[numPredictors] = labelIndices[label1]; filter.setAttributeIndicesArray(reorderedIndices); filter.setInputFormat(dataOneVsOne); dataOneVsOne = Filter.useFilter(dataOneVsOne, filter); //System.out.println(dataOneVsOne.toString()); dataOneVsOne.setClassIndex(numPredictors); // build model label1 vs label2 if (dataOneVsOne.size() > 0) { oneVsOneModels[counter].buildClassifier(dataOneVsOne); } else { nodata[counter] = true; } dataOneVsOne.delete(); metaDataTest[counter] = dataOneVsOne; counter++; } } }
From source file:mulan.classifier.transformation.LabelPowerset.java
License:Open Source License
protected void buildInternal(MultiLabelInstances mlData) throws Exception { Instances transformedData; transformation = new LabelPowersetTransformation(); debug("Transforming the training set."); transformedData = transformation.transformInstances(mlData); //================================================================ // System.out.println("Transformed training set: \n" + transformedData.toString()); // check for unary class // System.out.println("Building single-label classifier."); //============================================================== if (transformedData.attribute(transformedData.numAttributes() - 1).numValues() > 1) { baseClassifier.buildClassifier(transformedData); }// w ww . ja va 2s. c o m }
From source file:mulan.classifier.transformation.LabelsetPruning.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances mlDataSet) throws Exception { Instances data = mlDataSet.getDataSet(); format = new Instances(data, 0); int numInstances = data.numInstances(); ListInstancePerLabel = new HashMap<LabelSet, ArrayList<Instance>>(); for (int i = 0; i < numInstances; i++) { double[] dblLabels = new double[numLabels]; for (int j = 0; j < numLabels; j++) { int index = labelIndices[j]; double value = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index))); dblLabels[j] = value;// w w w. j a v a2 s.c o m } LabelSet labelSet = new LabelSet(dblLabels); if (ListInstancePerLabel.containsKey(labelSet)) { ListInstancePerLabel.get(labelSet).add(data.instance(i)); } else { ArrayList<Instance> li = new ArrayList<Instance>(); li.add(data.instance(i)); ListInstancePerLabel.put(labelSet, li); } } // Iterates the structure and a) if occurences of a labelset are higher // than p parameter then add them to the training set, b) if occurences // are less, then depending on the strategy discard/reintroduce them Instances newData = new Instances(data, 0); Iterator<LabelSet> it = ListInstancePerLabel.keySet().iterator(); while (it.hasNext()) { LabelSet ls = it.next(); ArrayList<Instance> instances = ListInstancePerLabel.get(ls); if (instances.size() > p) { for (int i = 0; i < instances.size(); i++) { newData.add(instances.get(i)); } } else { ArrayList<Instance> processed = processRejected(ls); newData.addAll(processed); } } super.buildInternal(new MultiLabelInstances(newData, mlDataSet.getLabelsMetaData())); }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Attaches an index attribute at the beginning of each instance * * @param original// w w w .j ava2 s . c o m * @return */ protected Instances attachIndexes(Instances original) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(original.numAttributes() + 1); for (int i = 0; i < original.numAttributes(); i++) { attributes.add(original.attribute(i)); } // Add attribute for holding the index at the beginning. attributes.add(0, new Attribute("Index")); Instances transformed = new Instances("Meta format", attributes, 0); for (int i = 0; i < original.numInstances(); i++) { Instance newInstance; newInstance = (Instance) original.instance(i).copy(); newInstance.setDataset(null); newInstance.insertAttributeAt(0); newInstance.setValue(0, i); transformed.add(newInstance); } transformed.setClassIndex(original.classIndex() + 1); return transformed; }
From source file:mulan.classifier.transformation.TwoStageClassifierChainArchitecture.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // Virtual label models debug("Building calibration label models"); virtualLabelModels = new BinaryRelevance(getBaseClassifier()); virtualLabelModels.setDebug(getDebug()); virtualLabelModels.build(trainingSet); //Generate the chain: Test the same dataset MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet); labelIndices = tempTrainingSet.getLabelIndices(); featureIndices = tempTrainingSet.getFeatureIndices(); // One-vs-one models numModels = ((numLabels) * (numLabels - 1)) / 2; oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels); nodata = new boolean[numModels]; metaDataTest = new Instances[numModels]; Instances trainingData = tempTrainingSet.getDataSet(); int counter = 0; // Creation of one-vs-one models for (int label1 = 0; label1 < numLabels - 1; label1++) { // Attribute of label 1 Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]); for (int label2 = label1 + 1; label2 < numLabels; label2++) { debug("Building one-vs-one model " + (counter + 1) + "/" + numModels); // Attribute of label 2 Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]); // initialize training set Instances dataOneVsOne = new Instances(trainingData, 0); // filter out examples with no preference for (int i = 0; i < trainingData.numInstances(); i++) { Instance tempInstance;//from w ww.ja v a2s . co m if (trainingData.instance(i) instanceof SparseInstance) { tempInstance = new SparseInstance(trainingData.instance(i)); } else { tempInstance = new DenseInstance(trainingData.instance(i)); } int nominalValueIndex; nominalValueIndex = (int) tempInstance.value(labelIndices[label1]); String value1 = attrLabel1.value(nominalValueIndex); nominalValueIndex = (int) tempInstance.value(labelIndices[label2]); String value2 = attrLabel2.value(nominalValueIndex); if (!value1.equals(value2)) { tempInstance.setValue(attrLabel1, value1); dataOneVsOne.add(tempInstance); } } // remove all labels apart from label1 and place it at the end Reorder filter = new Reorder(); int numPredictors = trainingData.numAttributes() - numLabels; int[] reorderedIndices = new int[numPredictors + 1]; System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors); reorderedIndices[numPredictors] = labelIndices[label1]; filter.setAttributeIndicesArray(reorderedIndices); filter.setInputFormat(dataOneVsOne); dataOneVsOne = Filter.useFilter(dataOneVsOne, filter); //System.out.println(dataOneVsOne.toString()); dataOneVsOne.setClassIndex(numPredictors); // build model label1 vs label2 if (dataOneVsOne.size() > 0) { oneVsOneModels[counter].buildClassifier(dataOneVsOne); } else { nodata[counter] = true; } dataOneVsOne.delete(); metaDataTest[counter] = dataOneVsOne; counter++; } } }
From source file:mulan.classifier.transformation.TwoStagePrunedClassifierChainArchitecture.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { // Virtual label models debug("Building calibration label models"); virtualLabelModels = new BinaryRelevance(getBaseClassifier()); virtualLabelModels.setDebug(getDebug()); virtualLabelModels.build(trainingSet); // One-vs-one models numModels = ((numLabels) * (numLabels - 1)) / 2; oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels); nodata = new boolean[numModels]; metaDataTest = new Instances[numModels]; ArrayList<MultiLabelOutput> predictions; predictions = predictLabels(trainingSet); int counter = 0; // Creation of one-vs-one models for (int label1 = 0; label1 < numLabels - 1; label1++) { for (int label2 = label1 + 1; label2 < numLabels; label2++) { //Generate the chain: Test the same dataset MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet, label1, label2, predictions); Instances trainingData = tempTrainingSet.getDataSet(); labelIndices = tempTrainingSet.getLabelIndices(); featureIndices = tempTrainingSet.getFeatureIndices(); // Attribute of label 1 Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]); debug("Building one-vs-one model " + (counter + 1) + "/" + numModels); // Attribute of label 2 Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]); // initialize training set Instances dataOneVsOne = new Instances(trainingData, 0); // filter out examples with no preference for (int i = 0; i < trainingData.numInstances(); i++) { Instance tempInstance;//from w ww. j a va2s .c om if (trainingData.instance(i) instanceof SparseInstance) { tempInstance = new SparseInstance(trainingData.instance(i)); } else { tempInstance = new DenseInstance(trainingData.instance(i)); } int nominalValueIndex; nominalValueIndex = (int) tempInstance.value(labelIndices[label1]); String value1 = attrLabel1.value(nominalValueIndex); nominalValueIndex = (int) tempInstance.value(labelIndices[label2]); String value2 = attrLabel2.value(nominalValueIndex); if (!value1.equals(value2)) { tempInstance.setValue(attrLabel1, value1); dataOneVsOne.add(tempInstance); } } // remove all labels apart from label1 and place it at the end Reorder filter = new Reorder(); int numPredictors = trainingData.numAttributes() - numLabels; int[] reorderedIndices = new int[numPredictors + 1]; System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors); reorderedIndices[numPredictors] = labelIndices[label1]; filter.setAttributeIndicesArray(reorderedIndices); filter.setInputFormat(dataOneVsOne); dataOneVsOne = Filter.useFilter(dataOneVsOne, filter); //System.out.println(dataOneVsOne.toString()); dataOneVsOne.setClassIndex(numPredictors); // build model label1 vs label2 if (dataOneVsOne.size() > 0) { oneVsOneModels[counter].buildClassifier(dataOneVsOne); } else { nodata[counter] = true; } dataOneVsOne.delete(); metaDataTest[counter] = dataOneVsOne; counter++; } } }
From source file:mulan.data.ConverterCLUS.java
License:Open Source License
/** * Converts the original dataset to mulan compatible dataset. * * @param sourceFilename the source file name * @param arffFilename the converted arff name * @param xmlFilename the xml name/*from www .j a va 2 s . c o m*/ * @throws java.lang.Exception */ public static void convert(String sourceFilename, String arffFilename, String xmlFilename) throws Exception { String line; try { BufferedReader brInput = new BufferedReader(new FileReader(sourceFilename)); String relationName = null; ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); Instances data = null; int numAttributes = 0; String[] labelNames = null; while ((line = brInput.readLine()) != null) { if (line.startsWith("@RELATION")) { relationName = line.replace("@RELATION ", "").replaceAll("'", "").trim(); continue; } if (line.startsWith("@ATTRIBUTE ")) { String tokens[] = line.split("\\s+"); Attribute att; if (line.startsWith("@ATTRIBUTE class")) { labelNames = tokens[3].split(","); for (int i = 0; i < labelNames.length; i++) { ArrayList<String> labelValues = new ArrayList<String>(); labelValues.add("0"); labelValues.add("1"); att = new Attribute(labelNames[i], labelValues); attInfo.add(att); } } else { numAttributes++; if (tokens[2].equals("numeric")) { att = new Attribute(tokens[1]); } else { ArrayList<String> nominalValues = new ArrayList<String>(); tokens[2].substring(1, tokens[2].length() - 1); String[] nominalTokens = tokens[2].substring(1, tokens[2].length() - 1).split(","); for (int i = 0; i < nominalTokens.length; i++) { nominalValues.add(nominalTokens[i]); } att = new Attribute(tokens[1], nominalValues); } attInfo.add(att); } continue; } if (line.toLowerCase().startsWith("@data")) { data = new Instances(relationName, attInfo, 0); while ((line = brInput.readLine()) != null) { // fill data String[] tokens = line.split(","); double[] values = new double[attInfo.size()]; for (int i = 0; i < numAttributes; i++) { Attribute att = (Attribute) attInfo.get(i); if (att.isNumeric()) { values[i] = Double.parseDouble(tokens[i]); } else { values[i] = att.indexOfValue(tokens[i]); } } String[] labels = tokens[numAttributes].split("@"); // fill class values for (int j = 0; j < labels.length; j++) { String[] splitedLabels = labels[j].split("/"); String attrName = splitedLabels[0]; Attribute att = data.attribute(attrName); values[attInfo.indexOf(att)] = 1; for (int k = 1; k < splitedLabels.length; k++) { attrName = attrName + "/" + splitedLabels[k]; att = data.attribute(attrName); values[attInfo.indexOf(att)] = 1; } } Instance instance = new DenseInstance(1, values); data.add(instance); } } } BufferedWriter writer; writer = new BufferedWriter(new FileWriter(arffFilename)); writer.write(data.toString()); writer.close(); // write xml file writer = new BufferedWriter(new FileWriter(xmlFilename)); writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"); writer.write("<labels xmlns=\"http://mulan.sourceforge.net/labels\">\n"); writer.write("<label name=\"" + labelNames[0] + "\">"); int depth = 0; for (int i = 1; i < labelNames.length; i++) { int difSlashes = countSlashes(labelNames[i]) - countSlashes(labelNames[i - 1]); // child if (difSlashes == 1) { depth++; writer.write("\n"); for (int j = 0; j < depth; j++) { writer.write("\t"); } writer.write("<label name=\"" + labelNames[i] + "\">"); } // sibling if (difSlashes == 0) { writer.write("</label>\n"); for (int j = 0; j < depth; j++) { writer.write("\t"); } writer.write("<label name=\"" + labelNames[i] + "\">"); } // ancestor if (difSlashes < 0) { writer.write("</label>\n"); for (int j = 0; j < Math.abs(difSlashes); j++) { depth--; for (int k = 0; k < depth; k++) { writer.write("\t"); } writer.write("</label>\n"); } for (int j = 0; j < depth; j++) { writer.write("\t"); } writer.write("<label name=\"" + labelNames[i] + "\">"); } } writer.write("</label>\n"); while (depth > 0) { for (int k = 0; k < depth; k++) { writer.write("\t"); } writer.write("</label>\n"); depth--; } writer.write("</labels>"); writer.close(); } catch (IOException ioEx) { ioEx.printStackTrace(); } }
From source file:mulan.data.MultiLabelInstances.java
License:Open Source License
/** * If {@link Instances} data set are retrieved from {@link MultiLabelInstances} and * post-processed, modified by custom code, it can be again reintegrated into * {@link MultiLabelInstances} if needed. The underlying {@link LabelsMetaData} are * modified to reflect changes in data set. The method creates new instance of * {@link MultiLabelInstances} with modified data set and new meta-data. * <br></br>/* w w w. ja v a 2 s .com*/ * The supported changes are:<br></br> * - remove of label {@link Attribute} to the existing {@link Instances}<br></br> * - add/remove of {@link Instance} from the existing {@link Instances}<br></br> * - add/remove of feature/predictor {@link Attribute} to the existing {@link Instances}<br></br> * * @param modifiedDataSet the modified data set * @return the modified data set * @throws IllegalArgumentException if specified modified data set is null * @throws InvalidDataFormatException if multi-label data format with specified modifications is not valid */ public MultiLabelInstances reintegrateModifiedDataSet(Instances modifiedDataSet) throws InvalidDataFormatException { if (modifiedDataSet == null) { throw new IllegalArgumentException("The modified data set is null."); } //TODO: add support for addition of label attributes to modified data set if necessary LabelsMetaDataImpl newMetaData = (LabelsMetaDataImpl) labelsMetaData.clone(); Set<String> origLabelNames = labelsMetaData.getLabelNames(); for (String labelName : origLabelNames) { if (modifiedDataSet.attribute(labelName) == null) { newMetaData.removeLabelNode(labelName); } } return new MultiLabelInstances(modifiedDataSet, newMetaData); }
From source file:mulan.data.MultiLabelInstances.java
License:Open Source License
private LabelsMetaData loadLabesMeta(Instances data, int numLabels) throws InvalidDataFormatException { LabelsMetaDataImpl labelsData = new LabelsMetaDataImpl(); int numAttributes = data.numAttributes(); for (int index = numAttributes - numLabels; index < numAttributes; index++) { String attrName = data.attribute(index).name(); labelsData.addRootNode(new LabelNodeImpl(attrName)); }//from w w w . ja v a 2s. c om if (labelsData.getNumLabels() < numLabels) { throw new InvalidDataFormatException("The names of label attributes are not unique."); } return labelsData; }