implements a simple text learner in Java using WEKA - Java Machine Learning AI

Java examples for Machine Learning AI:weka

Description

implements a simple text learner in Java using WEKA

Demo Code

/**/*from   w  w w.  j  a  v a  2s  . c om*/
 * A Java class that implements a simple text learner, based on WEKA.
 * To be used with MyClassifier.java.
 * WEKA is available at: http://www.cs.waikato.ac.nz/ml/weka/
 * Copyright (C) 2013 Jose Maria Gomez Hidalgo - http://www.esp.uem.es/jmgomez
 *
 * This program is free software: you can redistribute it and/or modify
 * it for any purpose.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */

import weka.core.Instances;
import weka.classifiers.Evaluation;
import java.util.Random;
import weka.classifiers.bayes.NaiveBayes;
import weka.core.converters.ArffLoader.ArffReader;
import java.io.*;

/**
 * This class implements a simple text learner in Java using WEKA.
 * It loads a text dataset written in ARFF format, evaluates a classifier on it,
 * and saves the learnt model for further use.
 * @author Jose Maria Gomez Hidalgo - http://www.esp.uem.es/jmgomez
 * @see MyClassifier
 */
public class MyLearner {

    /**
     * Object that stores training data.
     */
    Instances trainData;
    /**
     * Object that stores the classifier
     */
    NaiveBayes classifier;

    /**
     * This method loads a dataset in ARFF format. If the file does not exist, or
     * it has a wrong format, the attribute trainData is null.
     * @param fileName The name of the file that stores the dataset.
     */
    public void loadDataset(String fileName) {
        try {
            BufferedReader reader = new BufferedReader(new FileReader(
                    fileName));
            ArffReader arff = new ArffReader(reader);
            trainData = arff.getData();
            System.out.println("===== Loaded dataset: " + fileName
                    + " =====");
            reader.close();
        } catch (IOException e) {
            System.out.println("Problem found when reading: " + fileName);
        }
    }

    /**
     * This method evaluates the classifier. As recommended by WEKA documentation,
     * the classifier is defined but not trained yet. Evaluation of previously
     * trained classifiers can lead to unexpected results.
     */
    public void evaluate() {
        try {
            trainData.setClassIndex(trainData.numAttributes() - 1);
            classifier = new NaiveBayes();
            Evaluation eval = new Evaluation(trainData);
            eval.crossValidateModel(classifier, trainData, 4, new Random(1));
            System.out.println(eval.toSummaryString());
            System.out.println(eval.toClassDetailsString());
            System.out
                    .println("===== Evaluating on filtered (training) dataset done =====");
        } catch (Exception e) {
            System.out.println("Problem found when evaluating");
        }
    }

    /**
     * This method trains the classifier on the loaded dataset.
     */
    public void learn() {
        try {
            trainData.setClassIndex(trainData.numAttributes() - 1);
            classifier = new NaiveBayes();
            classifier.buildClassifier(trainData);
            // Uncomment to see the classifier
            System.out.println(classifier);
            System.out
                    .println("===== Training on filtered (training) dataset done =====");
        } catch (Exception e) {
            System.out.println("Problem found when training");
        }
    }

    /**
     * This method saves the trained model into a file. This is done by
     * simple serialization of the classifier object.
     * @param fileName The name of the file that will store the trained model.
     */
    public void saveModel(String fileName) {
        try {
            ObjectOutputStream out = new ObjectOutputStream(
                    new FileOutputStream(fileName));
            out.writeObject(classifier);
            out.close();
            System.out.println("===== Saved model: " + fileName + " =====");
        } catch (IOException e) {
            System.out.println("Problem found when writing: " + fileName);
        }
    }

    /**
     * Main method. It is an example of the usage of this class.
     * @param args Command-line arguments: fileData and fileModel.
     */
    public static void main(String[] args) {

        MyLearner learner;
        if (args.length < 2)
            System.out
                    .println("Usage: java Learner <fileData> <fileModel>");
        else {
            learner = new MyLearner();
            learner.loadDataset(args[0]);
            // Evaluation mus be done before training
            // More info in: http://weka.wikispaces.com/Use+WEKA+in+your+Java+code
            learner.evaluate();
            learner.learn();
            learner.saveModel(args[1]);
        }
    }
}

Related Tutorials