com.imolinfo.offline.InModelValidation.java Source code

Java tutorial

Introduction

Here is the source code for com.imolinfo.offline.InModelValidation.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.imolinfo.offline;

import com.imolinfo.model.Document;
import com.imolinfo.plug.iface.DocumentProvider;
import com.imolinfo.plug.iface.DocumentToLabeledPoint;
import com.imolinfo.plug.impl.DocumentStandardCleaner;
import com.imolinfo.plug.impl.DocumentToTFIDFLabeledPoint;
import com.imolinfo.util.GlobalVariable;
import com.imolinfo.plug.clm.SVMOneVsAll;
import com.imolinfo.util.TestUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.Properties;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.classification.NaiveBayes;
import org.apache.spark.mllib.classification.NaiveBayesModel;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.rdd.RDD;

/**
 *
 * @author renzo
 */
public class InModelValidation {
    public static void main(String[] args)
            throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {

        SparkConf conf = new SparkConf().setAppName("Train");
        final JavaSparkContext jsc = new JavaSparkContext(conf);
        Properties p = new Properties();
        p.load(new FileInputStream("runtime.properties"));
        GlobalVariable.getInstance().setProperties(p);
        invokePipeline(jsc);
    }

    public static void invokePipeline(JavaSparkContext jsc)
            throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        /*
        Properties prop = GlobalVariable.getInstance().getProperties();
            
        DocumentProvider tp = (DocumentProvider) Class.forName(prop.getProperty("sourceClass")).newInstance();
        JavaRDD<Document> inputD = tp.getTextFromDs(jsc, prop.getProperty("trainingSet"));
            
        DocumentStandardCleaner tc = new DocumentStandardCleaner();
        inputD = tc.cleanData(inputD);
            
        DocumentToLabeledPoint tl = new DocumentToTFIDFLabeledPoint();
        inputD = tl.vectorize(inputD);
        JavaRDD<LabeledPoint> features = tl.convert(inputD);
        RDD<LabeledPoint> featureData = features.rdd();
            
        FileUtils.deleteDirectory(new File(prop.getProperty("outputModelPath")));
        File idfFile = new File(prop.getProperty("idfModelPath"));
        idfFile.mkdirs();
        FileOutputStream fos = new FileOutputStream(prop.getProperty("idfModelFile"));
        ObjectOutputStream oos = new ObjectOutputStream(fos);
        oos.writeObject(tl.getIDFModel());
        featureData.cache();
        features.cache();
            
        NaiveBayesModel nbModel = NaiveBayes.train(featureData, 1);
        //nbModel.save(jsc.sc(), prop.getProperty("nbPath"));
            
        SVMOneVsAll svmModel = SVMOneVsAll.train(jsc,features);
        //svmModel.save(jsc, prop.getProperty("svmOAPath"));
            
        TestUtils.analyze(features, nbModel);
        String nbResult=TestUtils.printStats("NAIVE BAYES");
        TestUtils.analyze(features, svmModel);
        String svmResult=TestUtils.printStats("SVM");
        System.out.println(nbResult);
        System.out.println(svmResult);
        */

    }

}