Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package PEBL; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.util.Random; import weka.classifiers.Evaluation; import weka.classifiers.bayes.NaiveBayes; import weka.classifiers.lazy.KStar; import weka.classifiers.trees.J48; import weka.core.Instance; import weka.core.Instances; import weka.core.converters.ArffSaver; import weka.core.converters.ConverterUtils; import weka.filters.Filter; import weka.filters.unsupervised.attribute.NumericToNominal; /** * * @author jxy */ public class TwoStep { public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource( "Z:\\\\shared from vm\\\\fourthset\\\\mixed.csv"); Instances data = source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } NumericToNominal nmf = new NumericToNominal(); nmf.setInputFormat(data); data = Filter.useFilter(data, nmf); // build a c4.5 classifier String[] options = new String[1]; // options[0] = "-C 0.25 -M 2"; // unpruned tree options[0] = "-K"; NaiveBayes c = new NaiveBayes(); // new instance of tree c.setOptions(options); // set the options c.buildClassifier(data); // build classifier // eval Evaluation eval = new Evaluation(data); eval.crossValidateModel(c, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); System.out.println("--- model learned on mixed set ---"); // load unlabeled data ConverterUtils.DataSource s = new ConverterUtils.DataSource( "Z:\\\\shared from vm\\\\fourthset\\\\unlabelled.csv"); Instances unlabeled = s.getDataSet(); // set class attribute unlabeled.setClassIndex(unlabeled.numAttributes() - 1); nmf = new NumericToNominal(); nmf.setInputFormat(unlabeled); unlabeled = Filter.useFilter(unlabeled, nmf); // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { double classZero = c.distributionForInstance(unlabeled.instance(i))[0]; double classOne = c.distributionForInstance(unlabeled.instance(i))[1]; System.out.print( "classifying: " + unlabeled.instance(i) + " : " + classZero + " - " + classOne + " == class: "); if (classZero > classOne) { System.out.print("0"); unlabeled.instance(i).setClassValue("0"); } else { System.out.print("1"); unlabeled.instance(i).setClassValue("1"); } System.out.println(""); } // save labeled data // BufferedWriter writer = new BufferedWriter( // new FileWriter("Z:\\\\shared from vm\\\\thirdset\\\\relabelled.arff")); // writer.write(labeled.toString()); // writer.newLine(); // writer.flush(); // writer.close(); ArffSaver saver = new ArffSaver(); saver.setInstances(unlabeled); saver.setFile(new File("Z:\\shared from vm\\thirdset\\relabelled.arff")); // saver.setDestination(new File("Z:\\shared from vm\\thirdset\\relabelled.arff")); // **not** necessary in 3.5.4 and later saver.writeBatch(); } }