List of usage examples for weka.core Instances ARFF_RELATION
String ARFF_RELATION
To view the source code for weka.core Instances ARFF_RELATION.
Click Source Link
From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java
License:Open Source License
/** * modify the internal training data with the particle information * /*from w w w . j a v a2 s .co m*/ * @param PId particle Id * @throws IOException */ public void modifyData(int PId) throws IOException { // write the data definition BufferedWriter fw = new BufferedWriter(new FileWriter("reduced.arff")); fw.write(Instances.ARFF_RELATION + " reducedSet"); fw.newLine(); fw.newLine(); for (int i = 0; i < internalTrain.numAttributes() - 1; i++) { fw.write(internalTrain.attribute(i).toString()); fw.newLine(); } fw.write(internalTrain.classAttribute().toString()); fw.newLine(); fw.newLine(); fw.write(Instances.ARFF_DATA); fw.newLine(); // copying all minor samples and loading the major samples into "major[]" major = new String[majorSize]; int majorIndex = 0; for (int i = 0; i < internalTrain.numInstances(); i++) { if (internalTrain.instance(i).classValue() != majorLabel) { fw.write(internalTrain.instance(i).toString()); fw.newLine(); } else if (internalTrain.instance(i).classValue() == majorLabel) { major[majorIndex] = internalTrain.instance(i).toString(); majorIndex++; } } // adding major samples into the file based on the particle information for (int j = 0; j < majorSize; j++) { if (particles[PId][j] == 1) { fw.write(major[j]); fw.newLine(); } } fw.close(); }
From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java
License:Open Source License
/** * create the balanced training data set * /*w w w .ja va2s . co m*/ * @throws IOException */ public void createBalanceData() throws IOException { int minorSize = 0; BufferedWriter bw = new BufferedWriter(new FileWriter("balanceTrain.arff")); //------------ output training data definition -------------// bw.write(Instances.ARFF_RELATION + " balancedTrainSet"); bw.newLine(); for (int i = 0; i < dataset.numAttributes(); i++) { bw.write(dataset.attribute(i).toString()); bw.newLine(); } bw.write(Instances.ARFF_DATA); bw.newLine(); //------------ output samples from minor class ------------// for (int j = 0; j < dataset.numInstances(); j++) { if (dataset.instance(j).classValue() != majorLabel) { minorSize++; bw.write(dataset.instance(j).toString()); bw.newLine(); } } //------------ add selected samples into hash table ------------// int max = 0; Hashtable<String, Integer> ht = new Hashtable<String, Integer>(); for (int i = 0; i < selectedSample.size(); i++) { if (ht.containsKey(selectedSample.get(i))) { Integer C = (Integer) ht.get(selectedSample.get(i)); int c = C.intValue(); c++; if (max < c) { max = c; } C = new Integer(c); ht.put(selectedSample.get(i), C); } else { ht.put(selectedSample.get(i), 1); } } //------------- Sort the hash table and create balanced training data set -------------// System.out.println("-------------------- rankings ---------------------"); System.out.println("sample selection count"); int sampleCount = 0; int curCount = max; while (curCount != 0) { Integer ccurCount = new Integer(curCount); Iterator<String> itr; itr = ht.keySet().iterator(); while (itr.hasNext()) { String key = (String) itr.next(); // sample String value = ht.get(key).toString(); // selection count // iterate through the selected samples and print the sample of current count if (value.equals(ccurCount.toString())) { System.out.println(key + "\t" + value); // direct sampling if (sampleCount < minorSize) { bw.write(key); bw.newLine(); sampleCount++; } } } curCount--; } // tournament selection sampling /* while (sampleCount < minorSize) { int winner = random.nextInt(selectedSample.size()); int j; for (int t = 1; t < tournamentSize; t++) { j = random.nextInt(selectedSample.size()); while (winner == j) { j = random.nextInt(selectedSample.size()); } // compare the rank int rank1 = ht.get(selectedSample.get(winner)); int rank2 = ht.get(selectedSample.get(j)); if (rank2 > rank1) { winner = j; } } bw.write(selectedSample.get(winner)); bw.newLine(); sampleCount++; } */ bw.close(); System.out.println("balanced traning dataset created as `balanceTrain.arff'"); System.out.println("-------------------- -------------- ---------------------"); }
From source file:mulan.data.generation.DataSetBuilder.java
License:Open Source License
public static InputStream CreateArffDataSet(DataSetDefinition dataSetDefinition) { StringBuilder sw = new StringBuilder(); sw.append(Instances.ARFF_RELATION).append(EMPTY_SPACE).append(dataSetDefinition.getName()).append(NEW_LINE); List<Attribute> attributes = dataSetDefinition.getAttributes(); for (Attribute attr : attributes) { sw.append(weka.core.Attribute.ARFF_ATTRIBUTE).append(EMPTY_SPACE); sw.append(attr.getName()).append(EMPTY_SPACE).append(getAttributeType(attr)).append(NEW_LINE); }// www. jav a 2 s . c o m sw.append(Instances.ARFF_DATA).append(NEW_LINE); // generate vectors for (int i = 0; i < dataSetDefinition.getExamplesCount(); i++) { for (Attribute attribute : attributes) { sw.append(getAttributeValue(attribute)).append(VALUE_SEPARATOR); } sw.deleteCharAt(sw.length() - 1); sw.append(NEW_LINE); } return new ByteArrayInputStream(sw.toString().getBytes()); }