Java examples for Machine Learning AI:weka
implements a simple text classifier in Java using WEKA FilteredClassifier
/**//from w w w.j a v a 2s . c o m * A Java class that implements a simple language identifier, based on WEKA. * It requires a serialized model of the type FilteredClassifier. * WEKA is available at: http://www.cs.waikato.ac.nz/ml/weka/ * Copyright (C) 2013 Jose Maria Gomez Hidalgo - http://www.esp.uem.es/jmgomez * * This program is free software: you can redistribute it and/or modify * it for any purpose. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ import weka.core.*; import weka.classifiers.meta.FilteredClassifier; import java.util.List; import java.util.ArrayList; import java.io.*; /** * This class implements a simple text classifier in Java using WEKA. * It loads a file with the text to classify, and the model that has been * learnt with teh learn.sh script. * @author Jose Maria Gomez Hidalgo - http://www.esp.uem.es/jmgomez * @see MyFilteredLearner */ public class LanguageIdentifier { /** * String that stores the text to guess its language. */ String text; /** * Object that stores the instance. */ Instances instances; /** * Object that stores the classifier. */ FilteredClassifier classifier; /** * This method loads the text to be classified. * @param fileName The name of the file that stores the text. */ public void load(String fileName) { try { BufferedReader reader = new BufferedReader(new FileReader( fileName)); String line; text = ""; while ((line = reader.readLine()) != null) { text = text + " " + line; } System.out.println("===== Loaded text data: " + fileName + " ====="); reader.close(); System.out.println(text); } catch (IOException e) { System.out.println("Problem found when reading: " + fileName); } } /** * This method loads the model to be used as classifier. * @param fileName The name of the file that stores the text. */ public void loadModel(String fileName) { try { ObjectInputStream in = new ObjectInputStream( new FileInputStream(fileName)); Object tmp = in.readObject(); classifier = (FilteredClassifier) tmp; in.close(); System.out .println("===== Loaded model: " + fileName + " ====="); } catch (Exception e) { // Given the cast, a ClassNotFoundException must be caught along with the IOException System.out.println("Problem found when reading: " + fileName); } } /** * This method creates the instance to be classified, from the text that has been read. */ public void makeInstance() { // Create the header List attributeList = new ArrayList(2); // Create first attribute, the class List values = new ArrayList(3); values.add("EN"); values.add("FR"); values.add("SP"); Attribute attribute1 = new Attribute("language_class", values); attributeList.add(attribute1); // Create second attribute, the text Attribute attribute2 = new Attribute("text", (List) null); attributeList.add(attribute2); // Build instance set with just one instance instances = new Instances("Test relation", (java.util.ArrayList<Attribute>) attributeList, 1); // Set class index instances.setClassIndex(0); // Create and add the instance DenseInstance instance = new DenseInstance(2); instance.setDataset(instances); instance.setValue(attribute2, text); instances.add(instance); System.out .println("===== Instance created with reference dataset ====="); System.out.println(instances); } /** * This method performs the classification of the instance. * Output is done at the command-line. */ public void classify() { try { double pred = classifier .classifyInstance(instances.instance(0)); System.out.println("===== Classified instance ====="); System.out.println("Class predicted: " + instances.classAttribute().value((int) pred)); } catch (Exception e) { System.out.println("Problem found when classifying the text"); } } /** * Main method. It is an example of the usage of this class. * @param args Command-line arguments: fileData and fileModel. */ public static void main(String[] args) { LanguageIdentifier classifier; if (args.length < 2) System.out .println("Usage: java LanguageIdentifier <fileData> <fileModel>"); else { classifier = new LanguageIdentifier(); classifier.load(args[0]); classifier.loadModel(args[1]); classifier.makeInstance(); classifier.classify(); } } }