List of usage examples for weka.core Instances renameAttributeValue
public void renameAttributeValue(Attribute att, String val, String name)
From source file:com.sliit.normalize.NormalizeDataset.java
public String normalizeDataset() { System.out.println("start normalizing data"); String filePathOut = ""; try {/*from w w w. java2 s . c o m*/ CSVLoader loader = new CSVLoader(); if (reducedDiemensionFile != null) { loader.setSource(reducedDiemensionFile); } else { if (tempFIle != null && tempFIle.exists()) { loader.setSource(tempFIle); } else { loader.setSource(csvFile); } } Instances dataInstance = loader.getDataSet(); Normalize normalize = new Normalize(); dataInstance.setClassIndex(dataInstance.numAttributes() - 1); normalize.setInputFormat(dataInstance); String directory = csvFile.getParent(); outputFile = new File(directory + "/" + "normalized" + csvFile.getName()); if (!outputFile.exists()) { outputFile.createNewFile(); } CSVSaver saver = new CSVSaver(); saver.setFile(outputFile); for (int i = 1; i < dataInstance.numInstances(); i++) { normalize.input(dataInstance.instance(i)); } normalize.batchFinished(); Instances outPut = new Instances(dataInstance, 0); for (int i = 1; i < dataInstance.numInstances(); i++) { outPut.add(normalize.output()); } Attribute attribute = dataInstance.attribute(outPut.numAttributes() - 1); for (int j = 0; j < attribute.numValues(); j++) { if (attribute.value(j).equals("normal.")) { outPut.renameAttributeValue(attribute, attribute.value(j), "0"); } else { outPut.renameAttributeValue(attribute, attribute.value(j), "1"); } } saver.setInstances(outPut); saver.writeBatch(); writeToNewFile(directory); filePathOut = directory + "norm" + csvFile.getName(); if (tempFIle != null) { tempFIle.delete(); } if (reducedDiemensionFile != null) { reducedDiemensionFile.delete(); } outputFile.delete(); } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } catch (Exception e) { log.error("Error occurred:" + e.getMessage()); } return filePathOut; }
From source file:com.sliit.normalize.NormalizeDataset.java
public boolean updateStringValues(Map<Integer, String> values) { System.out.println("updating String Values"); boolean status = false; try {// w ww .j av a 2 s . c o m csv.setSource(csvFile); Instances dataInstance = csv.getDataSet(); for (int i = 0; i < dataInstance.numInstances(); i++) { if (values.containsKey(i)) { Attribute attribute = dataInstance.attribute(i); for (int j = 0; j < attribute.numValues(); j++) { dataInstance.renameAttributeValue(attribute, attribute.value(j), j + ""); } } } tempFIle = new File(csvFile.getParent() + "/temp.csv"); CSVSaver saver = new CSVSaver(); saver.setInstances(dataInstance); saver.setFile(tempFIle); saver.writeBatch(); } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } return status; }
From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java
License:Open Source License
/** * Method for testing this class.//ww w.j a v a 2s . com * * @param argv * should contain one element: the name of an ARFF file */ // @ requires argv != null; // @ requires argv.length == 1; // @ requires argv[0] != null; public static void test(String[] argv) { ProcessInstances instances, secondInstances, train, test, empty; Random random = new Random(2); Reader reader; int start, num; FastVector testAtts, testVals; int i, j; try { if (argv.length > 1) { throw (new Exception("Usage: ProcessInstances [<filename>]")); } // Creating set of instances from scratch testVals = new FastVector(2); testVals.addElement("first_value"); testVals.addElement("second_value"); testAtts = new FastVector(2); testAtts.addElement(new Attribute("nominal_attribute", testVals)); testAtts.addElement(new Attribute("numeric_attribute")); instances = new ProcessInstances("test_set", testAtts, new FastVector(), 10); instances.addInstance(new ProcessInstance(instances.numAttributes())); instances.addInstance(new ProcessInstance(instances.numAttributes())); instances.addInstance(new ProcessInstance(instances.numAttributes())); instances.setClassIndex(0); System.out.println("\nSet of instances created from scratch:\n"); System.out.println(instances); if (argv.length == 1) { String filename = argv[0]; reader = new FileReader(filename); // Read first five instances and print them System.out.println("\nFirst five instances from file:\n"); instances = new ProcessInstances(reader, 1); instances.setClassIndex(instances.numAttributes() - 1); i = 0; while ((i < 5) && (instances.readInstance(reader))) { i++; } System.out.println(instances); // Read all the instances in the file reader = new FileReader(filename); instances = new ProcessInstances(reader); // Make the last attribute be the class instances.setClassIndex(instances.numAttributes() - 1); // Print header and instances. System.out.println("\nDataset:\n"); System.out.println(instances); System.out.println("\nClass index: " + instances.classIndex()); } // Test basic methods based on class index. System.out.println("\nClass name: " + instances.classAttribute().name()); System.out.println("\nClass index: " + instances.classIndex()); System.out.println("\nClass is nominal: " + instances.classAttribute().isNominal()); System.out.println("\nClass is numeric: " + instances.classAttribute().isNumeric()); System.out.println("\nClasses:\n"); for (i = 0; i < instances.numClasses(); i++) { System.out.println(instances.classAttribute().value(i)); } System.out.println("\nClass values and labels of instances:\n"); for (i = 0; i < instances.numInstances(); i++) { ProcessInstance inst = instances.getInstance(i); System.out.print(inst.classValue() + "\t"); System.out.print(inst.toString(inst.classIndex())); if (instances.getInstance(i).classIsMissing()) { System.out.println("\tis missing"); } else { System.out.println(); } } // Create random weights. System.out.println("\nCreating random weights for instances."); for (i = 0; i < instances.numInstances(); i++) { instances.getInstance(i).setWeight(random.nextDouble()); } // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); // Insert an attribute secondInstances = new ProcessInstances(instances); Attribute testAtt = new Attribute("Inserted"); secondInstances.insertAttributeAt(testAtt, 0); System.out.println("\nSet with inserted attribute:\n"); System.out.println(secondInstances); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Delete the attribute secondInstances.deleteAttributeAt(0); System.out.println("\nSet with attribute deleted:\n"); System.out.println(secondInstances); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Test if headers are equal System.out.println("\nHeaders equal: " + instances.equalHeaders(secondInstances) + "\n"); // Print data in internal format. System.out.println("\nData (internal values):\n"); for (i = 0; i < instances.numInstances(); i++) { for (j = 0; j < instances.numAttributes(); j++) { if (instances.getInstance(i).isMissing(j)) { System.out.print("? "); } else { System.out.print(instances.getInstance(i).value(j) + " "); } } System.out.println(); } // Just print header System.out.println("\nEmpty dataset:\n"); empty = new ProcessInstances(instances, 0); System.out.println(empty); System.out.println("\nClass name: " + empty.classAttribute().name()); // Create copy and rename an attribute and a value (if possible) if (empty.classAttribute().isNominal()) { Instances copy = new ProcessInstances(empty, 0); copy.renameAttribute(copy.classAttribute(), "new_name"); copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value(0), "new_val_name"); System.out.println("\nDataset with names changed:\n" + copy); System.out.println("\nOriginal dataset:\n" + empty); } // Create and prints subset of instances. start = instances.numInstances() / 4; num = instances.numInstances() / 2; System.out.print("\nSubset of dataset: "); System.out.println(num + " instances from " + (start + 1) + ". instance"); secondInstances = new ProcessInstances(instances, start, num); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(secondInstances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(secondInstances.sumOfWeights()); // Create and print training and test sets for 3-fold // cross-validation. System.out.println("\nTrain and test folds for 3-fold CV:"); if (instances.classAttribute().isNominal()) { instances.stratify(3); } for (j = 0; j < 3; j++) { train = instances.trainCV(3, j, new Random(1)); test = instances.testCV(3, j); // Print all instances and their weights (and the sum of // weights). System.out.println("\nTrain: "); System.out.println("\nInstances and their weights:\n"); System.out.println(train.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(train.sumOfWeights()); System.out.println("\nClass name: " + train.classAttribute().name()); System.out.println("\nTest: "); System.out.println("\nInstances and their weights:\n"); System.out.println(test.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(test.sumOfWeights()); System.out.println("\nClass name: " + test.classAttribute().name()); } // Randomize instances and print them. System.out.println("\nRandomized dataset:"); instances.randomize(random); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); // Sort instances according to first attribute and // print them. System.out.print("\nInstances sorted according to first attribute:\n "); instances.sort(0); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); } catch (Exception e) { e.printStackTrace(); } }
From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.RenameStates.java
License:Open Source License
/** * The <code>renameStates</code> method takes in a WEKA Instances object * corresponding to the data (in this application, gene expression data) discretized into a number of bins * and returns a new WEKA Instances object with the names of the bins * in the given data replaced by the given bin labels * * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data * @param binLabels an <code>ArrayList</code> of <code>String</code> corresponding to the label of each bin. * @return an <code>Instances</code> a new WEKA Instances object with the names of the bins * in the given data replaced by the given bin labels *///from w w w . j a va2 s.c o m public static Instances renameStates(Instances data, ArrayList binLabels) { ArrayList al = new ArrayList(); Attribute attr = null; for (int i = 1; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal()) { attr = data.attribute(i); for (int j = 0; j < attr.numValues(); j++) { data.renameAttributeValue(attr, attr.value(j), (String) binLabels.get(j)); } } } return data; }