PEBL.TwoStep.java Source code

Java tutorial

Introduction

Here is the source code for PEBL.TwoStep.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package PEBL;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.Random;
import weka.classifiers.Evaluation;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.lazy.KStar;
import weka.classifiers.trees.J48;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ArffSaver;
import weka.core.converters.ConverterUtils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NumericToNominal;

/**
 *
 * @author jxy
 */
public class TwoStep {

    public static void main(String[] args) throws Exception {

        ConverterUtils.DataSource source = new ConverterUtils.DataSource(
                "Z:\\\\shared from vm\\\\fourthset\\\\mixed.csv");

        Instances data = source.getDataSet();

        // setting class attribute if the data format does not provide this information
        // For example, the XRFF format saves the class attribute information as well
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        NumericToNominal nmf = new NumericToNominal();
        nmf.setInputFormat(data);
        data = Filter.useFilter(data, nmf);

        // build a c4.5 classifier
        String[] options = new String[1];
        // options[0] = "-C 0.25 -M 2";            // unpruned tree
        options[0] = "-K";
        NaiveBayes c = new NaiveBayes(); // new instance of tree
        c.setOptions(options); // set the options
        c.buildClassifier(data); // build classifier

        // eval
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(c, data, 10, new Random(1));
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
        System.out.println(eval.toClassDetailsString());
        System.out.println("--- model learned on mixed set ---");

        // load unlabeled data
        ConverterUtils.DataSource s = new ConverterUtils.DataSource(
                "Z:\\\\shared from vm\\\\fourthset\\\\unlabelled.csv");
        Instances unlabeled = s.getDataSet();
        // set class attribute
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);

        nmf = new NumericToNominal();
        nmf.setInputFormat(unlabeled);
        unlabeled = Filter.useFilter(unlabeled, nmf);

        // label instances
        for (int i = 0; i < unlabeled.numInstances(); i++) {
            double classZero = c.distributionForInstance(unlabeled.instance(i))[0];
            double classOne = c.distributionForInstance(unlabeled.instance(i))[1];
            System.out.print(
                    "classifying: " + unlabeled.instance(i) + " : " + classZero + " - " + classOne + " == class: ");
            if (classZero > classOne) {
                System.out.print("0");
                unlabeled.instance(i).setClassValue("0");
            } else {
                System.out.print("1");
                unlabeled.instance(i).setClassValue("1");
            }
            System.out.println("");
        }

        // save labeled data
        // BufferedWriter writer = new BufferedWriter(
        //         new FileWriter("Z:\\\\shared from vm\\\\thirdset\\\\relabelled.arff"));
        // writer.write(labeled.toString());
        // writer.newLine();
        // writer.flush();
        // writer.close();
        ArffSaver saver = new ArffSaver();
        saver.setInstances(unlabeled);
        saver.setFile(new File("Z:\\shared from vm\\thirdset\\relabelled.arff"));
        //        saver.setDestination(new File("Z:\\shared from vm\\thirdset\\relabelled.arff"));   // **not** necessary in 3.5.4 and later
        saver.writeBatch();

    }

}