Example usage for weka.core Instances ARFF_DATA

List of usage examples for weka.core Instances ARFF_DATA

Introduction

In this page you can find the example usage for weka.core Instances ARFF_DATA.

Prototype

String ARFF_DATA

To view the source code for weka.core Instances ARFF_DATA.

Click Source Link

Document

The keyword used to denote the start of the arff data section

Usage

From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java

License:Open Source License

/**
 * modify the internal training data with the particle information
 * //from  w  ww.  ja  va 2  s  . c  om
 * @param PId   particle Id
 * @throws IOException
 */
public void modifyData(int PId) throws IOException {
    // write the data definition
    BufferedWriter fw = new BufferedWriter(new FileWriter("reduced.arff"));
    fw.write(Instances.ARFF_RELATION + " reducedSet");
    fw.newLine();
    fw.newLine();

    for (int i = 0; i < internalTrain.numAttributes() - 1; i++) {
        fw.write(internalTrain.attribute(i).toString());
        fw.newLine();
    }

    fw.write(internalTrain.classAttribute().toString());
    fw.newLine();
    fw.newLine();
    fw.write(Instances.ARFF_DATA);
    fw.newLine();

    // copying all minor samples and loading the major samples into "major[]"
    major = new String[majorSize];

    int majorIndex = 0;

    for (int i = 0; i < internalTrain.numInstances(); i++) {
        if (internalTrain.instance(i).classValue() != majorLabel) {
            fw.write(internalTrain.instance(i).toString());
            fw.newLine();
        } else if (internalTrain.instance(i).classValue() == majorLabel) {
            major[majorIndex] = internalTrain.instance(i).toString();
            majorIndex++;
        }
    }

    // adding major samples into the file based on the particle information
    for (int j = 0; j < majorSize; j++) {
        if (particles[PId][j] == 1) {
            fw.write(major[j]);
            fw.newLine();
        }
    }

    fw.close();
}

From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java

License:Open Source License

/**
 * create the balanced training data set
 * //from ww w.ja v  a 2s.  c om
 * @throws IOException
 */
public void createBalanceData() throws IOException {

    int minorSize = 0;
    BufferedWriter bw = new BufferedWriter(new FileWriter("balanceTrain.arff"));

    //------------ output training data definition -------------//
    bw.write(Instances.ARFF_RELATION + " balancedTrainSet");
    bw.newLine();

    for (int i = 0; i < dataset.numAttributes(); i++) {
        bw.write(dataset.attribute(i).toString());
        bw.newLine();
    }

    bw.write(Instances.ARFF_DATA);
    bw.newLine();

    //------------ output samples from minor class ------------//
    for (int j = 0; j < dataset.numInstances(); j++) {
        if (dataset.instance(j).classValue() != majorLabel) {
            minorSize++;
            bw.write(dataset.instance(j).toString());
            bw.newLine();
        }
    }

    //------------ add selected samples into hash table ------------//
    int max = 0;
    Hashtable<String, Integer> ht = new Hashtable<String, Integer>();

    for (int i = 0; i < selectedSample.size(); i++) {
        if (ht.containsKey(selectedSample.get(i))) {
            Integer C = (Integer) ht.get(selectedSample.get(i));
            int c = C.intValue();
            c++;

            if (max < c) {
                max = c;
            }

            C = new Integer(c);
            ht.put(selectedSample.get(i), C);
        } else {
            ht.put(selectedSample.get(i), 1);
        }
    }

    //------------- Sort the hash table and create balanced training data set -------------//
    System.out.println("-------------------- rankings ---------------------");
    System.out.println("sample                              selection count");

    int sampleCount = 0;
    int curCount = max;
    while (curCount != 0) {
        Integer ccurCount = new Integer(curCount);

        Iterator<String> itr;
        itr = ht.keySet().iterator();
        while (itr.hasNext()) {
            String key = (String) itr.next(); // sample
            String value = ht.get(key).toString(); // selection count

            // iterate through the selected samples and print the sample of current count
            if (value.equals(ccurCount.toString())) {
                System.out.println(key + "\t" + value);

                // direct sampling
                if (sampleCount < minorSize) {
                    bw.write(key);
                    bw.newLine();
                    sampleCount++;
                }
            }
        }

        curCount--;
    }

    // tournament selection sampling
    /*
    while (sampleCount < minorSize)
    {
       int winner = random.nextInt(selectedSample.size());
       int j;
               
       for (int t = 1; t < tournamentSize; t++)
       {
    j = random.nextInt(selectedSample.size());
            
    while (winner == j)
    {
       j = random.nextInt(selectedSample.size());
    }
            
    // compare the rank
    int rank1 = ht.get(selectedSample.get(winner));
    int rank2 = ht.get(selectedSample.get(j));
            
    if (rank2 > rank1)
    {
       winner = j;
    }
       }
            
       bw.write(selectedSample.get(winner));
       bw.newLine();
               
       sampleCount++;
    }
    */

    bw.close();

    System.out.println("balanced traning dataset created as `balanceTrain.arff'");
    System.out.println("-------------------- -------------- ---------------------");
}

From source file:mulan.data.generation.DataSetBuilder.java

License:Open Source License

public static InputStream CreateArffDataSet(DataSetDefinition dataSetDefinition) {
    StringBuilder sw = new StringBuilder();
    sw.append(Instances.ARFF_RELATION).append(EMPTY_SPACE).append(dataSetDefinition.getName()).append(NEW_LINE);

    List<Attribute> attributes = dataSetDefinition.getAttributes();
    for (Attribute attr : attributes) {
        sw.append(weka.core.Attribute.ARFF_ATTRIBUTE).append(EMPTY_SPACE);
        sw.append(attr.getName()).append(EMPTY_SPACE).append(getAttributeType(attr)).append(NEW_LINE);
    }/*from w  w  w.jav a2s  .c  o  m*/

    sw.append(Instances.ARFF_DATA).append(NEW_LINE);

    // generate vectors
    for (int i = 0; i < dataSetDefinition.getExamplesCount(); i++) {
        for (Attribute attribute : attributes) {
            sw.append(getAttributeValue(attribute)).append(VALUE_SEPARATOR);
        }
        sw.deleteCharAt(sw.length() - 1);
        sw.append(NEW_LINE);
    }

    return new ByteArrayInputStream(sw.toString().getBytes());
}