Example usage for weka.classifiers Evaluation crossValidateModel

List of usage examples for weka.classifiers Evaluation crossValidateModel

Introduction

In this page you can find the example usage for weka.classifiers Evaluation crossValidateModel.

Prototype

public void crossValidateModel(Classifier classifier, Instances data, int numFolds, Random random)
        throws Exception 

Source Link

Document

Performs a (stratified if class is nominal) cross-validation for a classifier on a set of instances.

Usage

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile2(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial2.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//from   www .  ja v a 2s.co m
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[3] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(4);
        for (int j = 0; j < 4; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 3)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(3);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwords.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(3);
    test.setClassIndex(3);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile3(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial3.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//from ww  w .j a  va 2s.  co  m
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[7] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(8);
        for (int j = 0; j < 8; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 7)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(7);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwordsAndpos.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(7);
    test.setClassIndex(7);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile4(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial4.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//from w w  w .  jav  a 2 s . c o  m
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[10] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(11);
        for (int j = 0; j < 11; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 10)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(10);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_asAndpolarwordsAndposAnddep.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(10);
    test.setClassIndex(10);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile5(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial5.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//  www .ja  v  a 2  s .c o  m
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");
    attr[10] = new Attribute("BLPos");
    attr[11] = new Attribute("BLNeg");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[12] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);
    attrs.addElement(attr[11]);
    attrs.addElement(attr[12]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(13);
        for (int j = 0; j < 13; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 12)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(12);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndbl.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(12);
    test.setClassIndex(12);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile6(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial6.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//from w w w. ja v a  2 s .c  o m
    Attribute attr[] = new Attribute[50];

    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");
    attr[10] = new Attribute("BLPos");
    attr[11] = new Attribute("BLNeg");
    attr[12] = new Attribute("VSPositive");
    attr[13] = new Attribute("VSNegative");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[14] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);
    attrs.addElement(attr[11]);
    attrs.addElement(attr[12]);
    attrs.addElement(attr[13]);
    attrs.addElement(attr[14]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(15);
        for (int j = 0; j < 15; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 14)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(14);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndblAndvs.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(14);
    test.setClassIndex(14);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:cyber009.main.UDALNeuralNetwork.java

public static void main(String[] args) {
    UDALNeuralNetwork udal = new UDALNeuralNetwork(0.014013);
    Statistics statis = new Statistics(udal.v);
    long timeStart = 0, timeEnd = 0;
    for (int f = 2; f <= 2; f++) {
        udal.initUDAL(4, 5000);/* ww w .ja v a  2  s . com*/
        udal.activeLearning(0, 5000);
        udal.arraytoInstances();
        udal.ann.weightReset();
        timeStart = System.currentTimeMillis();
        MultilayerPerceptron wekaNN = new MultilayerPerceptron();
        wekaNN.setAutoBuild(true);
        //wekaNN.setGUI(true);
        try {
            wekaNN.buildClassifier(udal.dataSet);
            Evaluation eval = new Evaluation(udal.dataSet);
            System.out.println(wekaNN.toString());
            eval.crossValidateModel(wekaNN, udal.dataSet, 4999, new Random(System.currentTimeMillis()));
            System.out.println(wekaNN.toString());
            System.out.println(eval.toClassDetailsString());

            //            udal.ann.gradientDescent(10000L, 3, 100);
            //            for (Double target : udal.v.CLASSES) {
            //                statis.calMVMuSigma(target);
            //                System.out.println(udal.v.N_DATA_IN_CLASS.get(target));
            //                System.out.println(statis.mu.get(target));
            //                System.out.println(statis.sigma.get(target));
            //            }
            //            for(int d=0; d<udal.v.D; d++) {
            //                if(udal.v.LABEL[d] == false) {
            //                    double [][] val = new double[udal.v.N-1][1];
            //                    for(int n=1; n<udal.v.N; n++) {
            //                        val[n-1][0] = udal.v.X[d][n];
            ////                        System.out.print(udal.v.X[d][n] + "   ");
            ////                        System.out.println(val[n-1][0]);
            //                    }
            //                    Matrix mVal = new Matrix(val);
            //                    double pp = 0.0D;
            //                    for (Double target : udal.v.CLASSES) {
            //                        //System.out.println("-----------------------\nClass:"+ target);
            //                        pp += statis.posteriorDistribution(target, mVal);
            //                        System.out.println("conditional: Entropy: "+ 
            //                                statis.conditionalEntropy(target, mVal, d));
            //                    }
            //                    System.out.print("Sum posterior:"+ pp+ " for "+new Matrix(val).transpose());
            //                    
            //                }
            //            }
            //            System.out.println("-----------------------");
            //            timeEnd = System.currentTimeMillis();
            //            System.out.println("feature #:"+udal.v.N+" time:("+ (timeEnd - timeStart) +")");
            //            udal.v.showResult();
            //            
        } catch (Exception ex) {
            Logger.getLogger(UDALNeuralNetwork.class.getName()).log(Level.SEVERE, null, ex);
        }

    }
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.CrossValidationProcessHandler.java

License:Open Source License

private void evaluate(Instances trainingSet) {
    try {/*from w  w w .j av a  2  s  .  com*/
        Evaluation evaluation = new Evaluation(trainingSet);
        int crossValidationFoldsCount = getCrossValidationFoldsCount();
        crossValidationFoldsCount = crossValidationFoldsCount > trainingSet.size() ? trainingSet.size()
                : crossValidationFoldsCount;
        evaluation.crossValidateModel(getInferenceModel().getClassifier(), trainingSet,
                crossValidationFoldsCount, new Random(1));
        updateVisualRepresentation(evaluation);
    } catch (Exception ex) {
        throw new InferenceModelClassifyException(ex.getMessage(), ex);
    }
}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard
 *
 *
 * @param gs_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation)
 * @throws Exception/*ww w  . j  a v a 2s.  c  o  m*/
 */

public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval)
        throws Exception {
    DataSource source = new DataSource(gs_arff);
    Instances data = source.getDataSet();
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible
    // meta-classifier
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);
    fc.setClassifier(bn);
    fc.buildClassifier(data); // build classifier
    SerializationHelper.write(model, fc);
    if (output_eval) {
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(fc, data, 10, new Random(1));
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
        System.out.println(eval.toClassDetailsString());
    }

}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java

License:Apache License

@Override
public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation)
        throws AnalysisEngineProcessException {
    // create attribute (including label) info
    ArrayList<Attribute> attributes = new ArrayList<>();
    List<String> labelNames = ClassifierProvider.labelNames(Y);
    labelNames.stream().map(attr -> new Attribute(attr, Arrays.asList("y", "n")))
            .forEachOrdered(attributes::add);
    List<String> featureNames = ClassifierProvider.featureNames(X);
    featureNames.stream().map(Attribute::new).forEachOrdered(attributes::add);
    String name = Files.getNameWithoutExtension(modelFile.getName());
    datasetSchema = new Instances(name, attributes, 0);
    datasetSchema.setClassIndex(labelNames.size());
    // add instances
    // due to the limitation of the interface definition, X, Y should be reorganized
    SetMultimap<Map<String, Double>, String> XY = HashMultimap.create();
    IntStream.range(0, X.size()).forEach(i -> XY.put(X.get(i), Y.get(i)));
    Instances trainingInstances = new Instances(datasetSchema, XY.size());
    for (Map.Entry<Map<String, Double>, Collection<String>> entry : XY.asMap().entrySet()) {
        Set<String> y = ImmutableSet.copyOf(entry.getValue());
        Map<String, Double> x = entry.getKey();
        SparseInstance instance = new SparseInstance(labelNames.size() + x.size());
        for (String labelName : labelNames) {
            instance.setValue(datasetSchema.attribute(labelName), y.contains(labelName) ? "y" : "n");
        }//from w  w  w . j  av a  2s.  c om
        for (Map.Entry<String, Double> e : x.entrySet()) {
            instance.setValue(datasetSchema.attribute(e.getKey()), e.getValue());
        }
        trainingInstances.add(instance);
    }
    // training
    try {
        classifier = (MultiLabelClassifier) AbstractClassifier.forName(classifierName, options);
        classifier.buildClassifier(trainingInstances);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    try {
        SerializationHelper.write(modelFile.getAbsolutePath(), classifier);
        SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    if (crossValidation) {
        try {
            Evaluation eval = new Evaluation(trainingInstances);
            Random rand = new Random();
            eval.crossValidateModel(classifier, trainingInstances, 10, rand);
            LOG.debug(eval.toSummaryString());
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider.java

License:Apache License

@Override
public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation)
        throws AnalysisEngineProcessException {
    // create attribute (including label) info
    ArrayList<Attribute> attributes = new ArrayList<>();
    ClassifierProvider.featureNames(X).stream().map(Attribute::new).forEachOrdered(attributes::add);
    Attribute label = new Attribute("__label__", ClassifierProvider.labelNames(Y));
    attributes.add(label);/*from w w w .j a v  a2s.  c om*/
    String name = Files.getNameWithoutExtension(modelFile.getName());
    datasetSchema = new Instances(name, attributes, X.size());
    datasetSchema.setClass(label);
    // add instances
    Instances trainingInstances = new Instances(datasetSchema, X.size());
    if (balanceWeight) {
        Multiset<String> labelCounts = HashMultiset.create(Y);
        double maxCount = labelCounts.entrySet().stream().mapToInt(Multiset.Entry::getCount).max()
                .orElseThrow(AnalysisEngineProcessException::new);
        for (int i = 0; i < X.size(); i++) {
            String y = Y.get(i);
            double weight = maxCount / labelCounts.count(y);
            trainingInstances.add(newInstance(X.get(i), y, weight, trainingInstances));
        }
    } else {
        for (int i = 0; i < X.size(); i++) {
            trainingInstances.add(newInstance(X.get(i), Y.get(i), 1.0, trainingInstances));
        }
    }
    // training
    try {
        classifier = AbstractClassifier.forName(classifierName, options);
        classifier.buildClassifier(trainingInstances);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    // write model and dataset schema
    try {
        SerializationHelper.write(modelFile.getAbsolutePath(), classifier);
        SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    // backup training dataset as arff file
    if (datasetExportFile != null) {
        try {
            ArffSaver saver = new ArffSaver();
            saver.setInstances(trainingInstances);
            saver.setFile(datasetExportFile);
            saver.writeBatch();
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
    if (crossValidation) {
        try {
            Evaluation eval = new Evaluation(trainingInstances);
            Random rand = new Random();
            eval.crossValidateModel(classifier, trainingInstances, 10, rand);
            LOG.debug(eval.toSummaryString());
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
}