Example usage for weka.classifiers.functions SMO SMO

Introduction

In this page you can find the example usage for weka.classifiers.functions SMO SMO.

Prototype

SMO

Source Link

Usage

From source file:focusedCrawler.target.EMClassifier.java

License:Open Source License

private String buildClassifier(String suffix) throws Exception {
    String trainingData = rootDir + File.separator + "trainData_" + suffix;
    //      System.out.println("TRAIN:" +trainingData);
    String trainWekafile = wekaFileDir + "weka_" + suffix;
    String testFileDir = rootDir + File.separator + "testData_" + suffix;
    String outputModel = rootDir + File.separator + "model" + File.separator + "model_" + suffix;
    CreateWekaInput createWekaFile = new CreateWekaInput(new File(trainingData), new File(testFileDir),
            stoplist);/*  w w  w  .  ja v  a 2s  .  com*/
    attributes = createWekaFile.centroid2Weka(trainWekafile);
    double max = Double.MIN_VALUE;
    double cValue = 0;
    int count = 0;
    for (double c = 0.0625; count < 1; c = c * 0.5) {
        SMO classifier = new SMO();
        String[] argum = new String[] { "-t", trainWekafile, "-C", "" + c, "-v", "-M", "-d", outputModel + c };
        String output = Evaluation.evaluateModel(classifier, argum);
        int index = output.indexOf("Correctly Classified Instances");
        if (index >= 0) {
            int end = output.indexOf("%", index);
            String line = (output.substring(index, end)).trim();
            line = line.substring(line.lastIndexOf(" "));
            double accuracy = Double.parseDouble(line.trim());
            System.out.println("C=" + c + " acc=" + accuracy);
            if (accuracy > max) {
                max = accuracy;
                cValue = c;
            }
        }
        count++;
        if (c == 1) {
            testClassifier(trainWekafile + "_test", outputModel + c);
        }
    }
    return outputModel + cValue;
}

From source file:focusedCrawler.target.EMClassifier.java

License:Open Source License

private void testClassifier(String testFile, String outputModel) throws Exception {
    SMO classifier = new SMO();
    //        NaiveBayes classifier = new NaiveBayes();
    //        System.out.println("java -T " + testFile + " -l" + outputModel );
    String[] argum = new String[] { "-T", testFile, "-l", outputModel, "-i" };
    String output = Evaluation.evaluateModel(classifier, argum);
    int index = output.indexOf("F-Measure");
    if (index >= 0) {
        index = output.indexOf("\n", index);
        int end = output.indexOf("\n", index + 1);
        String line = (output.substring(index, end)).trim();
        StringTokenizer tokenizer = new StringTokenizer(line, " ");
        int count = 0;
        while (tokenizer.hasMoreTokens()) {
            String word = tokenizer.nextToken();
            if (count == 2) {
                System.out.println("PRECISION:" + word);
            }/*from w w w. j  av a2 s. co  m*/
            if (count == 3) {
                System.out.println("RECALL:" + word);
            }
            if (count == 4) {
                System.out.println("F-MEASURE:" + word);
            }
            count++;
        }
    }
    System.out.println("-----------");
}

From source file:focusedCrawler.target.PEBL.java

License:Open Source License

private String buildClassifier(String suffix) throws Exception {
    String trainingData = rootDir + File.separator + "trainData_" + suffix;
    //      System.out.println("TRAIN:" +trainingData);
    String trainWekafile = wekaFileDir + "weka_" + suffix;
    String testFileDir = rootDir + File.separator + "testData_" + suffix;
    String outputModel = rootDir + File.separator + "model" + File.separator + "model_" + suffix;
    CreateTCWekaInput createWekaFile = new CreateTCWekaInput(new File(trainingData), new File(testFileDir),
            stoplist);// w ww  . j a  va 2s  .  c o m
    attributes = createWekaFile.centroid2Weka(trainWekafile);
    double max = Double.MIN_VALUE;
    double cValue = 0;
    int count = 0;
    for (double c = 0.0625; count < 1; c = c * 0.5) {
        SMO classifier = new SMO();
        String[] argum = new String[] { "-t", trainWekafile, "-C", "" + c, "-v", "-d", outputModel + c };
        String output = Evaluation.evaluateModel(classifier, argum);
        int index = output.indexOf("Correctly Classified Instances");
        if (index >= 0) {
            int end = output.indexOf("%", index);
            String line = (output.substring(index, end)).trim();
            line = line.substring(line.lastIndexOf(" "));
            double accuracy = Double.parseDouble(line.trim());
            System.out.println("C=" + c + " acc=" + accuracy);
            if (accuracy > max) {
                max = accuracy;
                cValue = c;
            }
        }
        count++;
        if (c == 1) {
            testClassifier(trainWekafile + "_test", outputModel + c);
        }
    }
    return outputModel + cValue;
}

From source file:fr.unice.i3s.rockflows.experiments.main.IntermediateExecutor.java

private List<InfoClassifier> inputClassifier(Dataset original) throws Exception {
    List<InfoClassifier> cls = new ArrayList<>();
    int id = 0;/*  w w w. j  a v a2 s.c  o m*/
    //LogisticRegression:
    InfoClassifier ic1 = new InfoClassifier(id++);
    ic1.classifier = new Logistic();
    ic1.name = "Logistic Regression";
    ic1.properties.requireNumericDataset = true;
    cls.add(ic1);
    //SVM:
    InfoClassifier ic2 = new InfoClassifier(id++);
    LibSVM ccc = new LibSVM();
    //disable 
    ccc.setOptions(new String[] { "-J", //Turn off nominal to binary conversion.
            "-V" //Turn off missing value replacement
    });
    //ccc.setSVMType(new SelectedTag(LibSVM.SVMTYPE_C_SVC, LibSVM.TAGS_SVMTYPE));
    //ccc.setKernelType(new SelectedTag(LibSVM.KERNELTYPE_RBF, LibSVM.TAGS_KERNELTYPE));
    //ccc.setEps(0.001); //tolerance
    ic2.classifier = ccc;
    ic2.name = "Svm";
    ic2.properties.requireNumericDataset = true;
    cls.add(ic2);
    //J48:
    InfoClassifier ic3 = new InfoClassifier(id++);
    ic3.classifier = new J48();
    ic3.name = "J48";
    ic3.properties.manageMissingValues = true;
    cls.add(ic3);
    //NBTree:
    InfoClassifier ic4 = new InfoClassifier(id++);
    ic4.classifier = new NBTree();
    ic4.name = "NBTree";
    ic4.properties.manageMissingValues = true;
    cls.add(ic4);
    //RandomForest: 
    InfoClassifier ic5 = new InfoClassifier(id++);
    RandomForest ccc2 = new RandomForest();
    ccc2.setNumTrees(500);
    ccc2.setMaxDepth(0);
    ic5.classifier = ccc2;
    ic5.name = "Random Forest";
    ic5.properties.manageMissingValues = true;
    cls.add(ic5);
    //Logistic Model Trees (LMT):
    InfoClassifier ic6 = new InfoClassifier(id++);
    ic6.classifier = new LMT();
    ic6.name = "Logistic Model Tree";
    ic6.properties.manageMissingValues = true;
    cls.add(ic6);
    //Alternating Decision Trees (ADTree):
    InfoClassifier ic7 = new InfoClassifier(id++);
    if (original.trainingSet.numClasses() > 2) {
        MultiClassClassifier mc = new MultiClassClassifier();
        mc.setOptions(new String[] { "-M", "3" }); //1 vs 1
        mc.setClassifier(new ADTree());
        ic7.classifier = mc;
        ic7.name = "1-vs-1 Alternating Decision Tree";
    } else {
        ic7.classifier = new ADTree();
        ic7.name = "Alternating Decision Tree";
    }
    ic7.properties.manageMultiClass = false;
    ic7.properties.manageMissingValues = true;
    cls.add(ic7);
    //Naive Bayes:
    InfoClassifier ic8 = new InfoClassifier(id++);
    ic8.classifier = new NaiveBayes();
    ic8.name = "Naive Bayes";
    ic8.properties.manageMissingValues = true;
    cls.add(ic8);
    //Bayesian Networks:
    /*
    All Bayes network algorithms implemented in Weka assume the following for the data set: 
    all variables are discrete finite variables. If you have a data set with continuous variables, 
    you can use the following filter to discretize them: 
    weka.filters.unsupervised.attribute.Discretize 
    no instances have missing values. If there are missing values in the data set, 
    values are filled in using the following filter: 
    weka.filters.unsupervised.attribute.ReplaceMissingValues 
            
    The first step performed by buildClassifier is checking if the data set fulfills those assumptions. 
    If those assumptions are not met, 
    the data set is automatically filtered and a warning is written to STDERR.2         
     */
    InfoClassifier ic9 = new InfoClassifier(id++);
    ic9.classifier = new BayesNet();
    ic9.name = "Bayesian Network";
    ic9.properties.requireNominalDataset = true;
    cls.add(ic9);
    //IBK
    InfoClassifier ic10 = new InfoClassifier(id++);
    ic10.classifier = new IBk();
    ic10.name = "IBk";
    ic10.properties.manageMissingValues = true;
    cls.add(ic10);
    //JRip:
    InfoClassifier ic11 = new InfoClassifier(id++);
    ic11.classifier = new JRip();
    ic11.name = "JRip";
    ic11.properties.manageMissingValues = true;
    cls.add(ic11);
    //MultilayerPerceptron(MLP):
    InfoClassifier ic12 = new InfoClassifier(id++);
    ic12.classifier = new MultilayerPerceptron();
    ic12.name = "Multillayer Perceptron";
    ic12.properties.requireNumericDataset = true;
    cls.add(ic12);
    //Bagging RepTree:
    InfoClassifier ic14 = new InfoClassifier(id++);
    REPTree base3 = new REPTree();
    Bagging ccc4 = new Bagging();
    ccc4.setClassifier(base3);
    ic14.classifier = ccc4;
    ic14.name = "Bagging RepTree";
    ic14.properties.manageMissingValues = true;
    cls.add(ic14);
    //Bagging J48
    InfoClassifier ic15 = new InfoClassifier(id++);
    Bagging ccc5 = new Bagging();
    ccc5.setClassifier(new J48());
    ic15.classifier = ccc5;
    ic15.name = "Bagging J48";
    ic15.properties.manageMissingValues = true;
    cls.add(ic15);
    //Bagging NBTree
    InfoClassifier ic16 = new InfoClassifier(id++);
    Bagging ccc6 = new Bagging();
    ccc6.setClassifier(new NBTree());
    ic16.classifier = ccc6;
    ic16.name = "Bagging NBTree";
    ic16.properties.manageMissingValues = true;
    cls.add(ic16);

    //Bagging OneR:
    InfoClassifier ic17 = new InfoClassifier(id++);
    Bagging ccc7 = new Bagging();
    ccc7.setClassifier(new OneR());
    ic17.classifier = ccc7;
    ic17.name = "Bagging OneR";
    ic17.properties.requireNominalDataset = true;
    ic17.properties.manageMissingValues = true;
    cls.add(ic17);
    //Bagging Jrip
    InfoClassifier ic18 = new InfoClassifier(id++);
    Bagging ccc8 = new Bagging();
    ccc8.setClassifier(new JRip());
    ic18.classifier = ccc8;
    ic18.name = "Bagging JRip";
    ic18.properties.manageMissingValues = true;
    cls.add(ic18);
    //MultiboostAB DecisionStump
    InfoClassifier ic24 = new InfoClassifier(id++);
    MultiBoostAB ccc14 = new MultiBoostAB();
    ccc14.setClassifier(new DecisionStump());
    ic24.classifier = ccc14;
    ic24.name = "MultiboostAB DecisionStump";
    ic24.properties.manageMissingValues = true;
    cls.add(ic24);
    //MultiboostAB OneR
    InfoClassifier ic25 = new InfoClassifier(id++);
    MultiBoostAB ccc15 = new MultiBoostAB();
    ccc15.setClassifier(new OneR());
    ic25.classifier = ccc15;
    ic25.name = "MultiboostAB OneR";
    ic25.properties.requireNominalDataset = true;
    cls.add(ic25);
    //MultiboostAB J48
    InfoClassifier ic27 = new InfoClassifier(id++);
    MultiBoostAB ccc17 = new MultiBoostAB();
    ccc17.setClassifier(new J48());
    ic27.classifier = ccc17;
    ic27.name = "MultiboostAB J48";
    ic27.properties.manageMissingValues = true;
    cls.add(ic27);
    //MultiboostAB Jrip
    InfoClassifier ic28 = new InfoClassifier(id++);
    MultiBoostAB ccc18 = new MultiBoostAB();
    ccc18.setClassifier(new JRip());
    ic28.classifier = ccc18;
    ic28.name = "MultiboostAB JRip";
    cls.add(ic28);
    //MultiboostAB NBTree
    InfoClassifier ic29 = new InfoClassifier(id++);
    MultiBoostAB ccc19 = new MultiBoostAB();
    ccc19.setClassifier(new NBTree());
    ic29.classifier = ccc19;
    ic29.name = "MultiboostAB NBTree";
    ic29.properties.manageMissingValues = true;
    cls.add(ic29);
    //RotationForest RandomTree
    InfoClassifier ic32 = new InfoClassifier(id++);
    RotationForest ccc21 = new RotationForest();
    RandomTree rtr5 = new RandomTree();
    rtr5.setMinNum(2);
    rtr5.setAllowUnclassifiedInstances(true);
    ccc21.setClassifier(rtr5);
    ic32.classifier = ccc21;
    ic32.name = "RotationForest RandomTree";
    ic32.properties.manageMissingValues = true;
    cls.add(ic32);
    //RotationForest J48:
    InfoClassifier ic33 = new InfoClassifier(id++);
    J48 base6 = new J48();
    RotationForest ccc22 = new RotationForest();
    ccc22.setClassifier(base6);
    ic33.classifier = ccc22;
    ic33.name = "RotationForest J48";
    ic33.properties.manageMissingValues = true;
    cls.add(ic33);
    //RandomCommittee RandomTree:
    InfoClassifier ic34 = new InfoClassifier(id++);
    RandomTree rtr4 = new RandomTree();
    rtr4.setMinNum(2);
    rtr4.setAllowUnclassifiedInstances(true);
    RandomCommittee ccc23 = new RandomCommittee();
    ccc23.setClassifier(rtr4);
    ic34.classifier = ccc23;
    ic34.name = "RandomComittee RandomTree";
    ic34.properties.manageMissingValues = true;
    cls.add(ic34);
    //Class via Clustering: SimpleKMeans
    //N.B: it can't handle date attributes
    InfoClassifier ic35 = new InfoClassifier(id++);
    ClassificationViaClustering ccc24 = new ClassificationViaClustering();
    SimpleKMeans km = new SimpleKMeans();
    km.setNumClusters(original.trainingSet.numClasses());
    ccc24.setClusterer(km);
    ic35.classifier = ccc24;
    ic35.name = "Classification via Clustering: KMeans";
    ic35.properties.requireNumericDataset = true;
    cls.add(ic35);
    //Class via Clustering: FarthestFirst
    InfoClassifier ic36 = new InfoClassifier(id++);
    ClassificationViaClustering ccc25 = new ClassificationViaClustering();
    FarthestFirst ff = new FarthestFirst();
    ff.setNumClusters(original.trainingSet.numClasses());
    ccc25.setClusterer(ff);
    ic36.classifier = ccc25;
    ic36.name = "Classification via Clustering: FarthestFirst";
    ic36.properties.requireNumericDataset = true;
    cls.add(ic36);
    //SMO
    InfoClassifier ic37 = new InfoClassifier(id++);
    ic37.classifier = new SMO();
    ic37.properties.requireNumericDataset = true;
    ic37.properties.manageMultiClass = false;
    ic37.name = "Smo";
    cls.add(ic37);
    //Random Subspace
    InfoClassifier ic38 = new InfoClassifier(id++);
    RandomSubSpace sub = new RandomSubSpace();
    sub.setClassifier(new REPTree());
    ic38.classifier = sub;
    ic38.name = "Random Subspaces of RepTree";
    ic38.properties.manageMissingValues = true;
    cls.add(ic38);
    //PART rule based
    InfoClassifier ic39 = new InfoClassifier(id++);
    PART p39 = new PART();
    p39.setOptions(new String[] { "-C", "0.5" });
    ic39.classifier = new PART();
    ic39.name = "PART";
    ic39.properties.manageMissingValues = true;
    cls.add(ic39);
    //Decision-Table / Naive Bayes
    InfoClassifier ic40 = new InfoClassifier(id++);
    ic40.classifier = new DTNB();
    ic40.name = "DTNB";
    ic40.properties.manageMissingValues = true;
    cls.add(ic40);
    //Ridor Rule based
    InfoClassifier ic41 = new InfoClassifier(id++);
    ic41.classifier = new Ridor();
    ic41.name = "Ridor";
    ic41.properties.manageMissingValues = true;
    cls.add(ic41);
    //Decision Table
    InfoClassifier ic42 = new InfoClassifier(id++);
    ic42.classifier = new DecisionTable();
    ic42.name = "Decision Table";
    ic42.properties.manageMissingValues = true;
    cls.add(ic42);
    //Conjunctive Rule
    InfoClassifier ic43 = new InfoClassifier(id++);
    ic43.classifier = new ConjunctiveRule();
    ic43.name = "Conjunctive Rule";
    ic43.properties.manageMissingValues = true;
    cls.add(ic43);
    //LogitBoost Decision Stump
    InfoClassifier ic44 = new InfoClassifier(id++);
    LogitBoost lb = new LogitBoost();
    lb.setOptions(new String[] { "-L", "1.79" });
    lb.setClassifier(new DecisionStump());
    ic44.classifier = lb;
    ic44.name = "LogitBoost Decision Stump";
    ic44.properties.manageMissingValues = true;
    cls.add(ic44);
    //Raced Incremental Logit Boost, Decision Stump
    InfoClassifier ic45 = new InfoClassifier(id++);
    RacedIncrementalLogitBoost rlb = new RacedIncrementalLogitBoost();
    rlb.setClassifier(new DecisionStump());
    ic45.classifier = rlb;
    ic45.name = "Raced Incremental Logit Boost, Decision Stumps";
    ic45.properties.manageMissingValues = true;
    cls.add(ic45);
    //AdaboostM1 decision stump
    InfoClassifier ic46 = new InfoClassifier(id++);
    AdaBoostM1 adm = new AdaBoostM1();
    adm.setClassifier(new DecisionStump());
    ic46.classifier = adm;
    ic46.name = "AdaboostM1, Decision Stumps";
    ic46.properties.manageMissingValues = true;
    cls.add(ic46);
    //AdaboostM1 J48
    InfoClassifier ic47 = new InfoClassifier(id++);
    AdaBoostM1 adm2 = new AdaBoostM1();
    adm2.setClassifier(new J48());
    ic47.classifier = adm2;
    ic47.name = "AdaboostM1, J48";
    ic47.properties.manageMissingValues = true;
    cls.add(ic47);
    //MultiboostAb Decision Table
    InfoClassifier ic48 = new InfoClassifier(id++);
    MultiBoostAB mba = new MultiBoostAB();
    mba.setClassifier(new DecisionTable());
    ic48.classifier = mba;
    ic48.name = "MultiboostAB, Decision Table";
    ic48.properties.manageMissingValues = true;
    cls.add(ic48);
    //Multiboost NaiveBayes
    InfoClassifier ic49 = new InfoClassifier(id++);
    MultiBoostAB mba2 = new MultiBoostAB();
    mba2.setClassifier(new NaiveBayes());
    ic49.classifier = mba2;
    ic49.name = "MultiboostAB, Naive Bayes";
    ic49.properties.manageMissingValues = true;
    cls.add(ic49);
    //Multiboost PART
    InfoClassifier ic50 = new InfoClassifier(id++);
    MultiBoostAB mba3 = new MultiBoostAB();
    mba3.setClassifier(new PART());
    ic50.classifier = mba3;
    ic50.name = "MultiboostAB, PART";
    ic50.properties.manageMissingValues = true;
    cls.add(ic50);
    //Multiboost Random Tree
    InfoClassifier ic51 = new InfoClassifier(id++);
    MultiBoostAB mba4 = new MultiBoostAB();
    RandomTree rtr3 = new RandomTree();
    rtr3.setMinNum(2);
    rtr3.setAllowUnclassifiedInstances(true);
    mba4.setClassifier(rtr3);
    ic51.classifier = mba4;
    ic51.name = "MultiboostAB, RandomTree";
    ic51.properties.manageMissingValues = true;
    cls.add(ic51);
    //Multiboost Rep Tree
    InfoClassifier ic52 = new InfoClassifier(id++);
    MultiBoostAB mba5 = new MultiBoostAB();
    mba5.setClassifier(new REPTree());
    ic52.classifier = mba5;
    ic52.name = "MultiboostAB, RepTree";
    ic52.properties.manageMissingValues = true;
    cls.add(ic52);
    //Bagging Decision Stump
    InfoClassifier ic53 = new InfoClassifier(id++);
    Bagging bag = new Bagging();
    bag.setClassifier(new DecisionStump());
    ic53.classifier = bag;
    ic53.name = "Bagging Decision Stump";
    ic53.properties.manageMissingValues = true;
    cls.add(ic53);
    //Bagging Decision Table
    InfoClassifier ic54 = new InfoClassifier(id++);
    Bagging bag1 = new Bagging();
    bag1.setClassifier(new DecisionTable());
    ic54.classifier = bag1;
    ic54.name = "Bagging Decision Table";
    ic54.properties.manageMissingValues = true;
    cls.add(ic54);
    //Bagging HyperPipes
    InfoClassifier ic55 = new InfoClassifier(id++);
    Bagging bag2 = new Bagging();
    bag2.setClassifier(new HyperPipes());
    ic55.classifier = bag2;
    ic55.name = "Bagging Hyper Pipes";
    cls.add(ic55);
    //Bagging Naive Bayes
    InfoClassifier ic56 = new InfoClassifier(id++);
    Bagging bag3 = new Bagging();
    bag3.setClassifier(new NaiveBayes());
    ic56.classifier = bag3;
    ic56.name = "Bagging Naive Bayes";
    ic56.properties.manageMissingValues = true;
    cls.add(ic56);
    //Bagging PART
    InfoClassifier ic57 = new InfoClassifier(id++);
    Bagging bag4 = new Bagging();
    bag4.setClassifier(new PART());
    ic57.classifier = bag4;
    ic57.name = "Bagging PART";
    ic57.properties.manageMissingValues = true;
    cls.add(ic57);
    //Bagging RandomTree
    InfoClassifier ic58 = new InfoClassifier(id++);
    Bagging bag5 = new Bagging();
    RandomTree rtr2 = new RandomTree();
    rtr2.setMinNum(2);
    rtr2.setAllowUnclassifiedInstances(true);
    bag5.setClassifier(rtr2);
    ic58.classifier = bag5;
    ic58.name = "Bagging RandomTree";
    ic58.properties.manageMissingValues = true;
    cls.add(ic58);
    //NNge
    InfoClassifier ic59 = new InfoClassifier(id++);
    NNge nng = new NNge();
    nng.setNumFoldersMIOption(1);
    nng.setNumAttemptsOfGeneOption(5);
    ic59.classifier = nng;
    ic59.name = "NNge";
    cls.add(ic59);
    //OrdinalClassClassifier J48
    InfoClassifier ic60 = new InfoClassifier(id++);
    OrdinalClassClassifier occ = new OrdinalClassClassifier();
    occ.setClassifier(new J48());
    ic60.classifier = occ;
    ic60.name = "OrdinalClassClassifier J48";
    ic60.properties.manageMissingValues = true;
    cls.add(ic60);
    //Hyper Pipes
    InfoClassifier ic61 = new InfoClassifier(id++);
    ic61.classifier = new HyperPipes();
    ic61.name = "Hyper Pipes";
    cls.add(ic61);
    //Classification via Regression, M5P used by default
    InfoClassifier ic62 = new InfoClassifier(id++);
    ic62.classifier = new ClassificationViaRegression();
    ic62.name = "Classification ViaRegression, M5P";
    ic62.properties.requireNumericDataset = true;
    cls.add(ic62);
    //RBF Network
    InfoClassifier ic64 = new InfoClassifier(id++);
    RBFNetwork rbf = new RBFNetwork();
    rbf.setRidge(0.00000001); //10^-8
    rbf.setNumClusters(original.trainingSet.numAttributes() / 2);
    ic64.classifier = rbf;
    ic64.name = "RBF Network";
    ic64.properties.requireNumericDataset = true;
    if (!original.properties.isStandardized) {
        ic64.properties.compatibleWithDataset = false;
    }
    cls.add(ic64);
    //RandomTree
    InfoClassifier ic66 = new InfoClassifier(id++);
    RandomTree rtr = new RandomTree();
    rtr.setMinNum(2);
    rtr.setAllowUnclassifiedInstances(true);
    ic66.classifier = rtr;
    ic66.name = "Random Tree";
    ic66.properties.manageMissingValues = true;
    cls.add(ic66);
    //RepTree
    InfoClassifier ic67 = new InfoClassifier(id++);
    REPTree rept = new REPTree();
    ic67.classifier = rept;
    ic67.name = "Rep Tree";
    ic67.properties.manageMissingValues = true;
    cls.add(ic67);
    //Decision Stump
    InfoClassifier ic68 = new InfoClassifier(id++);
    ic68.classifier = new DecisionStump();
    ic68.name = "Decision Stump";
    ic68.properties.manageMissingValues = true;
    cls.add(ic68);
    //OneR
    InfoClassifier ic69 = new InfoClassifier(id++);
    ic69.classifier = new OneR();
    ic69.name = "OneR";
    ic69.properties.requireNominalDataset = true;
    ic69.properties.manageMissingValues = true;
    cls.add(ic69);
    //LWL
    InfoClassifier ic71 = new InfoClassifier(id++);
    ic71.classifier = new LWL();
    ic71.name = "LWL";
    ic71.properties.manageMissingValues = true;
    cls.add(ic71);
    //Bagging LWL
    InfoClassifier ic72 = new InfoClassifier(id++);
    Bagging bg72 = new Bagging();
    bg72.setClassifier(new LWL());
    ic72.classifier = bg72;
    ic72.name = "Bagging LWL";
    ic72.properties.manageMissingValues = true;
    cls.add(ic72);
    //Decorate
    InfoClassifier ic73 = new InfoClassifier(id++);
    ic73.classifier = new Decorate();
    ic73.name = "Decorate";
    ic73.properties.manageMissingValues = true;
    ic73.properties.minNumTrainingInstances = 15;
    this.indexDecorate = id - 1;
    cls.add(ic73);
    //Dagging
    InfoClassifier ic74 = new InfoClassifier(id++);
    Dagging dng = new Dagging();
    dng.setClassifier(new SMO());
    dng.setNumFolds(4);
    ic74.classifier = dng;
    ic74.properties.requireNumericDataset = true;
    ic74.properties.manageMultiClass = false;
    ic74.name = "Dagging SMO";
    cls.add(ic74);
    //IB1
    InfoClassifier ic75 = new InfoClassifier(id++);
    ic75.classifier = new IB1();
    ic75.properties.manageMissingValues = true;
    ic75.name = "IB1";
    cls.add(ic75);
    //Simple Logistic
    InfoClassifier ic76 = new InfoClassifier(id++);
    ic76.classifier = new SimpleLogistic();
    ic76.properties.requireNumericDataset = true;
    ic76.name = "Simple Logistic";
    cls.add(ic76);
    //VFI
    InfoClassifier ic77 = new InfoClassifier(id++);
    ic77.classifier = new VFI();
    ic77.properties.manageMissingValues = true;
    ic77.name = "VFI";
    cls.add(ic77);

    //check if classifier satisfies the constraints of min #instances
    checkMinNumInstanes(cls, original.trainingSet);

    return cls;
}

From source file:javaapplication1.JavaApplication1.java

/**
 * @param args the command line arguments
 *//*from  ww  w  . j  av a 2s.com*/

public static void main(String[] args) throws Exception {
    // TODO code application logic here
    int numInstances = 20000;

    double curr_acc = 0;
    double curr_oba = 0;
    double curr_NB = 0;
    double[] acc_adacc;
    acc_adacc = new double[numInstances];
    double[] acc_oba;
    acc_oba = new double[numInstances];
    double[] acc_NB;
    acc_NB = new double[numInstances];
    double[] pred_NB;
    pred_NB = new double[numInstances];
    Instance[] window;
    window = new Instance[numInstances];

    Classifier tr = new DecisionStump();
    Classifier lbay = new NaiveBayes();
    Classifier learner = new HoeffdingTree();
    ADWIN adw = new ADWIN(0.1);
    Classifier knn = new kNN();
    Classifier adacc = new ADACC();
    Classifier oba = new OzaBagAdwin();
    //Evaluator ev = new Accuracy();
    Classifier rep = new repro();
    Classifier tut = new DecisionStumpTutorial();
    //J48 tree = new J48();
    SMO svm = new SMO();
    ArffFileStream readfile = new ArffFileStream();

    readfile.arffFileOption.setValue("E:/PhD/data/SEA/comb_short.arff");
    readfile.prepareForUse();

    lbay.setModelContext(readfile.getHeader());
    learner.setModelContext(readfile.getHeader());
    tut.setModelContext(readfile.getHeader());
    knn.setModelContext(readfile.getHeader());
    adacc.setModelContext(readfile.getHeader());
    oba.setModelContext(readfile.getHeader());

    learner.prepareForUse();
    lbay.prepareForUse();
    tut.prepareForUse();
    knn.prepareForUse();
    adacc.prepareForUse();
    oba.prepareForUse();

    int numberSamplesCorrect = 0;
    int numberSamplesCorrectN = 0;
    int numberSamplesCorrectTR = 0;
    int numberSamplesCorrectknn = 0;
    int numberSamplesCorrectadacc = 0;
    int numberSamplesCorrectoba = 0;

    int numberSamples = 0;
    boolean isTesting = true;
    while (readfile.hasMoreInstances() && numberSamples < numInstances) {
        Instance trainInst = readfile.nextInstance();

        if (isTesting) {

            //adwin drift detection 
            //if change detected leaners are trained
            if (adw.setInput(trainInst.value(0))) //Input data into Adwin
            {
                System.out.println("Change Detected at " + numberSamples);
                System.out.println("window width is " + adw.getWidth());

                oba.trainOnInstance(trainInst);
                adacc.trainOnInstance(trainInst);
                lbay.trainOnInstance(trainInst);

            }

            if (learner.correctlyClassifies(trainInst)) {
                numberSamplesCorrect++;
            }
            if (lbay.correctlyClassifies(trainInst)) {
                numberSamplesCorrectN++;
                curr_NB = 1;

            } else {
                curr_NB = 0;
            }

            if (tut.correctlyClassifies(trainInst)) {
                numberSamplesCorrectTR++;
            }
            if (knn.correctlyClassifies(trainInst)) {
                numberSamplesCorrectknn++;
            }
            if (adacc.correctlyClassifies(trainInst)) {
                numberSamplesCorrectadacc++;
                curr_acc = 1;
            } else {
                curr_acc = 0;
            }
            if (oba.correctlyClassifies(trainInst)) {
                numberSamplesCorrectoba++;
                curr_oba = 1;
            } else {
                curr_oba = 0;
            }

        }

        if (numberSamples == 0) {
            acc_adacc[numberSamples] = curr_acc;
            acc_oba[numberSamples] = curr_oba;
            acc_NB[numberSamples] = curr_NB;
        }

        if (numberSamples > 0) {
            acc_adacc[numberSamples] = acc_adacc[numberSamples - 1]
                    + ((curr_acc - acc_adacc[numberSamples - 1]) / numberSamples);
            acc_oba[numberSamples] = acc_oba[numberSamples - 1]
                    + ((curr_oba - acc_oba[numberSamples - 1]) / numberSamples);
            acc_NB[numberSamples] = acc_NB[numberSamples - 1]
                    + ((curr_NB - acc_NB[numberSamples - 1]) / numberSamples);

        }

        numberSamples++;

        if (numberSamples < 5000) {
            oba.trainOnInstance(trainInst);
            adacc.trainOnInstance(trainInst);
            lbay.trainOnInstance(trainInst);
        }

        knn.trainOnInstance(trainInst);
        lbay.trainOnInstance(trainInst);
        learner.trainOnInstance(trainInst);
        tut.trainOnInstance(trainInst);
    }

    double accuracy = 100.0 * (double) numberSamplesCorrect / (double) numberSamples;
    System.out.println(numberSamples + " instances processed with HoeffdingTree " + accuracy + "% accuracy");

    double accuracyN = 100.0 * (double) numberSamplesCorrectN / (double) numberSamples;
    System.out.println(numberSamples + " instances processed with NaivBayes " + accuracyN + "% accuracy");

    double accuracyTR = 100.0 * (double) numberSamplesCorrectTR / (double) numberSamples;
    System.out.println(numberSamples + " instances processed with DecisionStump " + accuracyTR + "% accuracy");

    double accuracyADACC = 100.0 * (double) numberSamplesCorrectadacc / (double) numberSamples;
    System.out.println(numberSamples + " instances processed with ADACC " + accuracyADACC + "% accuracy");

    double accuracyoba = 100.0 * (double) numberSamplesCorrectoba / (double) numberSamples;
    System.out.println(numberSamples + " instances processed with OzaBagAdwin " + accuracyoba + "% accuracy");

    double accuracyknn = 100.0 * (double) numberSamplesCorrectknn / (double) numberSamples;
    System.out.println(numberSamples + " instances processed with kNN " + accuracyknn + "% accuracy");
    //System.out.println("Mean:"+adw.getEstimation());
    //System.out.println("Variance:"+adw.getVariance());
    //System.out.println("Stand. dev:"+Math.sqrt(adw.getVariance()));
    System.out.println("Number of ADWIN drift detections: " + adw.getNumberDetections());

    String s = Arrays.toString(acc_adacc);
    s = s.substring(1, s.length() - 2);

    String s2 = Arrays.toString(pred_NB);
    s2 = s2.substring(1, s2.length() - 1);

    String sNB = Arrays.toString(acc_NB);
    sNB = sNB.substring(1, sNB.length() - 1);

    String csv = "E:/PhD/data/SEA/accres.csv";
    CSVWriter writer = new CSVWriter(new FileWriter(csv));

    List<String[]> data = new ArrayList<>();
    data.add(new String[] { s });
    //data.add(new String[] {s2});

    writer.writeAll(data);
    System.out.println("CSV written successfully.");
    writer.close();
}

From source file:jjj.asap.sas.models1.job.BuildBasicMetaCostModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }//from w w  w  .ja va2s  . co m
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // Bagged REPTrees

    Bagging baggedTrees = new Bagging();
    baggedTrees.setNumExecutionSlots(1);
    baggedTrees.setNumIterations(100);
    baggedTrees.setClassifier(new REPTree());
    baggedTrees.setCalcOutOfBag(false);

    prototypes.put("Bagged-REPTrees", baggedTrees);

    // Bagged SMO

    Bagging baggedSVM = new Bagging();
    baggedSVM.setNumExecutionSlots(1);
    baggedSVM.setNumIterations(100);
    baggedSVM.setClassifier(new SMO());
    baggedSVM.setCalcOutOfBag(false);

    prototypes.put("Bagged-SMO", baggedSVM);

    // Meta Cost model for Naive Bayes

    Bagging bagging = new Bagging();
    bagging.setNumExecutionSlots(1);
    bagging.setNumIterations(100);
    bagging.setClassifier(new NaiveBayes());

    CostSensitiveClassifier meta = new CostSensitiveClassifier();
    meta.setClassifier(bagging);
    meta.setMinimizeExpectedCost(true);

    prototypes.put("CostSensitive-MinimizeExpectedCost-NaiveBayes", bagging);

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // 
            // speical logic for meta cost
            //

            Classifier alg = AbstractClassifier.makeCopy(prototype.getValue());

            if (alg instanceof CostSensitiveClassifier) {

                int essaySet = Contest.getEssaySet(dsn);

                String matrix = Contest.getRubrics(essaySet).size() == 3 ? "cost3.txt" : "cost4.txt";

                ((CostSensitiveClassifier) alg)
                        .setCostMatrix(new CostMatrix(new FileReader("/asap/sas/trunk/" + matrix)));

            }

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(alg);

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildBasicModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/*from w w  w .  j  a  v  a  2  s .  c om*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // bayes

    BayesNet net = new BayesNet();
    net.setEstimator(new BMAEstimator());
    prototypes.put("BayesNet", net);

    prototypes.put("NaiveBayes", new NaiveBayes());

    // functions

    prototypes.put("RBFNetwork", new RBFNetwork());
    prototypes.put("SMO", new SMO());

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue()));

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildRBFKernelModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }//  w  w w .ja va2 s.com
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        SMO smo = new SMO();
        smo.setFilterType(new SelectedTag(SMO.FILTER_NONE, SMO.TAGS_FILTER));
        smo.setBuildLogisticModels(true);
        RBFKernel kernel = new RBFKernel();
        kernel.setGamma(0.05);
        smo.setKernel(kernel);

        AttributeSelectedClassifier asc = new AttributeSelectedClassifier();
        asc.setEvaluator(new InfoGainAttributeEval());
        Ranker ranker = new Ranker();
        ranker.setThreshold(0.01);
        asc.setSearch(ranker);
        asc.setClassifier(smo);

        queue.add(Job.submit(new ModelBuilder(dsn, "InfoGain-SMO-RBFKernel", asc, this.outputBucket)));
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:kfst.classifier.WekaClassifier.java

License:Open Source License

/**
 * This method builds and evaluates the support vector machine(SVM)
 * classifier. The SMO are used as the SVM classifier implemented in the
 * Weka software./*  w ww  . j a va 2 s  .  c om*/
 *
 * @param pathTrainData the path of the train set
 * @param pathTestData the path of the test set
 * @param svmKernel the kernel to use
 * 
 * @return the classification accuracy
 */
public static double SVM(String pathTrainData, String pathTestData, String svmKernel) {
    double resultValue = 0;
    try {
        BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData));
        Instances dataTrain = new Instances(readerTrain);
        readerTrain.close();
        dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

        BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData));
        Instances dataTest = new Instances(readerTest);
        readerTest.close();
        dataTest.setClassIndex(dataTest.numAttributes() - 1);
        SMO svm = new SMO();
        if (svmKernel.equals("Polynomial kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance());
        } else if (svmKernel.equals("RBF kernel")) {
            svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance());
        } else {
            svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance());
        }
        svm.buildClassifier(dataTrain);
        Evaluation eval = new Evaluation(dataTest);
        eval.evaluateModel(svm, dataTest);
        resultValue = 100 - (eval.errorRate() * 100);
    } catch (Exception ex) {
        Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return resultValue;
}

From source file:KFST.featureSelection.embedded.SVMBasedMethods.MSVM_RFE.java

License:Open Source License

/**
 * generates binary classifiers (SVM by applying k-fold cross validation
 * resampling strategy) using input data and based on selected feature
 * subset./*from  w w w.  j a  v  a2s.c om*/
 *
 * @param selectedFeature an array of indices of the selected feature subset
 *
 * @return an array of the weights of features
 */
protected double[][] buildSVM_KFoldCrossValidation(int[] selectedFeature) {
    double[][] weights = new double[numRun * kFoldValue][selectedFeature.length];
    int classifier = 0;

    for (int i = 0; i < numRun; i++) {
        double[][] copyTrainSet = ArraysFunc.copyDoubleArray2D(trainSet);

        //shuffles the train set
        MathFunc.randomize(copyTrainSet);

        int numSampleInFold = copyTrainSet.length / kFoldValue;
        int remainder = copyTrainSet.length % kFoldValue;
        int indexStart = 0;
        for (int k = 0; k < kFoldValue; k++) {
            int indexEnd = indexStart + numSampleInFold;
            if (k < remainder) {
                indexEnd++;
            }
            double[][] subTrainSet = ArraysFunc.copyDoubleArray2D(copyTrainSet, indexStart, indexEnd);

            String nameDataCSV = TEMP_PATH + "dataCSV[" + i + "-" + k + "].csv";
            String nameDataARFF = TEMP_PATH + "dataARFF[" + i + "-" + k + "].arff";

            FileFunc.createCSVFile(subTrainSet, selectedFeature, nameDataCSV, nameFeatures, classLabel);
            FileFunc.convertCSVtoARFF(nameDataCSV, nameDataARFF, TEMP_PATH, selectedFeature.length, numFeatures,
                    nameFeatures, numClass, classLabel);

            try {
                BufferedReader readerTrain = new BufferedReader(new FileReader(nameDataARFF));
                Instances dataTrain = new Instances(readerTrain);
                readerTrain.close();
                dataTrain.setClassIndex(dataTrain.numAttributes() - 1);

                SMO svm = new SMO();
                svm.setC(parameterC);
                svm.setKernel(WekaSVMKernel.parse(kernelType));
                svm.buildClassifier(dataTrain);

                double[] weightsSparse = svm.sparseWeights()[0][1];
                int[] indicesSparse = svm.sparseIndices()[0][1];
                for (int m = 0; m < weightsSparse.length; m++) {
                    weights[classifier][indicesSparse[m]] = weightsSparse[m];
                }
            } catch (Exception ex) {
                Logger.getLogger(MSVM_RFE.class.getName()).log(Level.SEVERE, null, ex);
            }

            indexStart = indexEnd;
            classifier++;
        }
    }

    return weights;
}