Example usage for weka.classifiers.meta Bagging Bagging

List of usage examples for weka.classifiers.meta Bagging Bagging

Introduction

In this page you can find the example usage for weka.classifiers.meta Bagging Bagging.

Prototype

public Bagging() 

Source Link

Document

Constructor.

Usage

From source file:jjj.asap.sas.models1.job.BuildBasicMetaCostModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }//from  w  w  w.j a  v  a  2s  .  c  om
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // Bagged REPTrees

    Bagging baggedTrees = new Bagging();
    baggedTrees.setNumExecutionSlots(1);
    baggedTrees.setNumIterations(100);
    baggedTrees.setClassifier(new REPTree());
    baggedTrees.setCalcOutOfBag(false);

    prototypes.put("Bagged-REPTrees", baggedTrees);

    // Bagged SMO

    Bagging baggedSVM = new Bagging();
    baggedSVM.setNumExecutionSlots(1);
    baggedSVM.setNumIterations(100);
    baggedSVM.setClassifier(new SMO());
    baggedSVM.setCalcOutOfBag(false);

    prototypes.put("Bagged-SMO", baggedSVM);

    // Meta Cost model for Naive Bayes

    Bagging bagging = new Bagging();
    bagging.setNumExecutionSlots(1);
    bagging.setNumIterations(100);
    bagging.setClassifier(new NaiveBayes());

    CostSensitiveClassifier meta = new CostSensitiveClassifier();
    meta.setClassifier(bagging);
    meta.setMinimizeExpectedCost(true);

    prototypes.put("CostSensitive-MinimizeExpectedCost-NaiveBayes", bagging);

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // 
            // speical logic for meta cost
            //

            Classifier alg = AbstractClassifier.makeCopy(prototype.getValue());

            if (alg instanceof CostSensitiveClassifier) {

                int essaySet = Contest.getEssaySet(dsn);

                String matrix = Contest.getRubrics(essaySet).size() == 3 ? "cost3.txt" : "cost4.txt";

                ((CostSensitiveClassifier) alg)
                        .setCostMatrix(new CostMatrix(new FileReader("/asap/sas/trunk/" + matrix)));

            }

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(alg);

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:meddle.TrainModelByDomainOS.java

License:Open Source License

/**
 * Given the classifierName, return a classifier
 *
 * @param classifierName/*from  www.ja va  2  s.  c o m*/
 *            e.g. J48, Bagging etc.
 */
public static Classifier getClassifier(String classifierName) {
    Classifier classifier = null;
    if (classifierName.equals("J48")) {
        J48 j48 = new J48();
        j48.setUnpruned(true);
        classifier = j48;
    } else if (classifierName.equals("AdaBoostM1")) {
        AdaBoostM1 adm = new AdaBoostM1();
        adm.setNumIterations(10);
        J48 j48 = new J48();
        adm.setClassifier(j48);
        classifier = adm;
    } else if (classifierName.equals("Bagging")) {
        Bagging bagging = new Bagging();
        bagging.setNumIterations(10);
        J48 j48 = new J48();
        bagging.setClassifier(j48);
        classifier = bagging;
    } else if (classifierName.equals("Stacking")) {
        Stacking stacking = new Stacking();
        stacking.setMetaClassifier(new Logistic());
        Classifier cc[] = new Classifier[2];
        cc[0] = new J48();
        cc[1] = new IBk();
        stacking.setClassifiers(cc);
        classifier = stacking;
    } else if (classifierName.equals("AdditiveRegression")) {
        AdditiveRegression ar = new AdditiveRegression();
        ar.setClassifier(new J48());
        classifier = ar;
    } else if (classifierName.equals("LogitBoost")) {
        LogitBoost lb = new LogitBoost();
        lb.setClassifier(new J48());
        classifier = lb;
    }
    return classifier;
}

From source file:mulan.experiments.ICTAI2010.java

License:Open Source License

/**
 * Main class/*from w  w  w.  j a v a2s. co  m*/
 *
 * @param args command line arguments
 */
public static void main(String[] args) {

    try {
        String path = Utils.getOption("path", args);
        String filestem = Utils.getOption("filestem", args);

        System.out.println("Loading the data set");
        MultiLabelInstances dataset = new MultiLabelInstances(path + filestem + ".arff",
                path + filestem + ".xml");

        Evaluator eval = new Evaluator();
        MultipleEvaluation results;
        List<Measure> measures = new ArrayList<Measure>(1);
        measures.add(new HammingLoss());

        int numFolds = 10;

        MultiLabelLearner[] learner = new MultiLabelLearner[4];
        String[] learnerName = new String[learner.length];

        learner[0] = new MLkNN(10, 1.0);
        learnerName[0] = "MLkNN";
        learner[1] = new CalibratedLabelRanking(new J48());
        learnerName[1] = "CLR";
        Bagging bagging = new Bagging();
        bagging.setClassifier(new J48());
        learner[2] = new BinaryRelevance(bagging);
        learnerName[2] = "BR";
        learner[3] = new BPMLL();
        learnerName[3] = "BPMLL";

        // loop over learners
        for (int i = 0; i < learner.length; i++) {
            // Default
            results = eval.crossValidate(learner[i].makeCopy(), dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";default;-;" + results.toCSV());

            // One Threshold
            OneThreshold ot;
            ot = new OneThreshold(learner[i].makeCopy(), new HammingLoss());
            results = eval.crossValidate(ot, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";one threshold;train;" + results.toCSV());
            ot = new OneThreshold(learner[i].makeCopy(), new HammingLoss(), 5);
            results = eval.crossValidate(ot, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";one threshold;5-cv;" + results.toCSV());

            // RCut
            RCut rcut;
            rcut = new RCut(learner[i].makeCopy());
            results = eval.crossValidate(rcut, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";rcut;cardinality;" + results.toCSV());
            rcut = new RCut(learner[i].makeCopy(), new HammingLoss());
            results = eval.crossValidate(rcut, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";rcut;train;" + results.toCSV());
            rcut = new RCut(learner[i].makeCopy(), new HammingLoss(), 5);
            results = eval.crossValidate(rcut, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";rcut;5-cv;" + results.toCSV());

            // SCut
            SCut scut;
            scut = new SCut(learner[i].makeCopy(), new HammingLoss());
            results = eval.crossValidate(scut, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";scut;train;" + results.toCSV());
            scut = new SCut(learner[i].makeCopy(), new HammingLoss(), 5);
            results = eval.crossValidate(scut, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";scut;5-cv;" + results.toCSV());

            // MetaLabeler
            MetaLabeler ml;
            ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Content-Based", "Numeric-Class");
            ml.setFolds(1);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;m5p;train;content;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Score-Based", "Numeric-Class");
            ml.setFolds(1);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;m5p;train;scores;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Rank-Based", "Numeric-Class");
            ml.setFolds(1);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;m5p;train;ranks;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Content-Based", "Nominal-Class");
            ml.setFolds(1);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;j48;train;content;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Score-Based", "Nominal-Class");
            ml.setFolds(1);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;j48;train;scores;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Rank-Based", "Nominal-Class");
            ml.setFolds(1);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;j48;cv;ranks;" + results.toCSV());

            ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Content-Based", "Numeric-Class");
            ml.setFolds(5);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;m5p;cv;content;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Score-Based", "Numeric-Class");
            ml.setFolds(5);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;m5p;cv;scores;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Rank-Based", "Numeric-Class");
            ml.setFolds(5);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;m5p;cv;ranks;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Content-Based", "Nominal-Class");
            ml.setFolds(5);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;j48;cv;content;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Score-Based", "Nominal-Class");
            ml.setFolds(5);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;j48;cv;scores;" + results.toCSV());
            ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Rank-Based", "Nominal-Class");
            ml.setFolds(5);
            results = eval.crossValidate(ml, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";metalabeler;j48;cv;ranks;" + results.toCSV());

            // ThresholdPrediction
            ThresholdPrediction tp;
            tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Content-Based", 1);
            results = eval.crossValidate(tp, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";tp;m5p;train;content;" + results.toCSV());
            tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Score-Based", 1);
            results = eval.crossValidate(tp, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";tp;m5p;train;scores;" + results.toCSV());
            tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Rank-Based", 1);
            results = eval.crossValidate(tp, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";tp;m5p;train;ranks;" + results.toCSV());
            tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Content-Based", 5);
            results = eval.crossValidate(tp, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";tp;m5p;5-cv;content;" + results.toCSV());
            tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Score-Based", 5);
            results = eval.crossValidate(tp, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";tp;m5p;5-cv;scores;" + results.toCSV());
            tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Rank-Based", 5);
            results = eval.crossValidate(tp, dataset, measures, numFolds);
            System.out.println(learnerName[i] + ";tp;m5p;5-cv;ranks;" + results.toCSV());
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

/**
 * A demo of how to train and test the disambiguator. 
 * // w  w  w .j ava2s  .co m
 * @param args   an array of 2 or 4 String arguments; the connection string of the Wikipedia 
 * database server, the name of the Wikipedia database and (optionally, if anonymous access
 * is not allowed) a username and password for the database.
 * 
 * @throws Exception
 */
public static void main(String[] args) throws Exception {

    //set up an instance of wikipedia
    Wikipedia wikipedia = Wikipedia.getInstanceFromArguments(args);

    //use a text processor, so that terms and items in wikipedia will both be case-folded before being compared.
    TextProcessor tp = new CaseFolder();

    //cache tables that will be used extensively
    File dataDirectory = new File("/research/wikipediaminer/data/en/20080727");
    ProgressNotifier pn = new ProgressNotifier(4);

    TIntHashSet ids = wikipedia.getDatabase().getValidPageIds(dataDirectory, 2, pn);
    wikipedia.getDatabase().cachePages(dataDirectory, ids, pn);
    wikipedia.getDatabase().cacheAnchors(dataDirectory, tp, ids, 2, pn);
    wikipedia.getDatabase().cacheInLinks(dataDirectory, ids, pn);

    //gather article sets for training and testing      
    ArticleSet trainSet = new ArticleSet(new File("data/articleSets/trainingIds.csv"));
    ArticleSet testSet = new ArticleSet(new File("data/articleSets/testIds_disambig.csv"));

    //use relatedness cache, so we won't repeat these calculations unnecessarily
    RelatednessCache rc = new RelatednessCache();

    //train disambiguator
    Disambiguator disambiguator = new Disambiguator(wikipedia, tp, 0.01, 0.01, 25);
    disambiguator.train(trainSet, ArticleCleaner.ALL, "disambig_trainingIds", rc);

    //build disambiguation classifier
    Classifier classifier = new Bagging();
    classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2"));
    disambiguator.buildClassifier(classifier);
    disambiguator.saveClassifier(new File("data/models/disambig.model"));

    //test
    Result<Integer> r = disambiguator.test(testSet, ArticleCleaner.ALL, rc);
    System.out.println(r);
}

From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java

License:Open Source License

/**
 * A demo of how to train and test the link detector. 
 * /*from   w w  w  . ja va2  s.co  m*/
 * @param args   an array of 2 or 4 String arguments; the connection string of the Wikipedia 
 * database server, the name of the Wikipedia database and (optionally, if anonymous access
 * is not allowed) a username and password for the database.
 * 
 * @throws Exception
 */
public static void main(String[] args) throws Exception {

    //set up an instance of Wikipedia
    Wikipedia wikipedia = Wikipedia.getInstanceFromArguments(args);

    //use a text processor, so that terms and items in wikipedia will both be case-folded before being compared.
    TextProcessor tp = new CaseFolder();

    File stopwordFile = new File("/research/wikipediaminer/data/stopwords.txt");

    // cache tables that will be used extensively
    File dataDirectory = new File("/research/wikipediaminer/data/en/20080727");
    ProgressNotifier pn = new ProgressNotifier(5);

    TIntHashSet ids = wikipedia.getDatabase().getValidPageIds(dataDirectory, 2, pn);
    wikipedia.getDatabase().cachePages(dataDirectory, ids, pn);
    wikipedia.getDatabase().cacheAnchors(dataDirectory, tp, ids, 2, pn);
    wikipedia.getDatabase().cacheInLinks(dataDirectory, ids, pn);
    wikipedia.getDatabase().cacheGenerality(dataDirectory, ids, pn);

    //gather article sets for training and testing
    ArticleSet trainSet = new ArticleSet(new File("data/articleSets/trainingIds.csv"));
    ArticleSet testSet = new ArticleSet(new File("data/articleSets/testIds_wikify.csv"));

    // use relatedness cache, so we won't repeat these calculations unnecessarily
    RelatednessCache rc = null; //new RelatednessCache() ;

    // use a pre-trained disambiguator
    Disambiguator disambiguator = new Disambiguator(wikipedia, tp, 0.01, 0.01, 25);
    disambiguator.loadClassifier(new File("data/models/disambig.model"));

    // connect disambiguator to a new topic detector
    TopicDetector topicDetector = new TopicDetector(wikipedia, disambiguator, stopwordFile, true, false);

    // train a new link detector      
    LinkDetector linkDetector = new LinkDetector(wikipedia);
    linkDetector.train(trainSet, ArticleCleaner.ALL, "LinkDetection_Training", topicDetector, rc);

    // build link detection classifier
    Classifier classifier = new Bagging();
    classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2"));
    linkDetector.buildClassifier(classifier);

    linkDetector.saveClassifier(new File("data/models/linkDetect.model"));

    // test      
    Result<Integer> r = linkDetector.test(testSet, ArticleCleaner.ALL, topicDetector, rc);
    System.out.println(r);
}

From source file:themeextractor.filters.MauiFilter.java

License:Open Source License

/**
 * Builds the classifier./*  w w w  .  j  a va 2 s  .  c o m*/
 */
private void buildClassifier() throws Exception {

    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (i == documentAtt) {
            atts.addElement(new Attribute("Term_frequency")); // 2
            atts.addElement(new Attribute("IDF")); // 
            atts.addElement(new Attribute("TFxIDF")); // 
            atts.addElement(new Attribute("First_occurrence")); // 
            atts.addElement(new Attribute("Last_occurrence")); // 
            atts.addElement(new Attribute("Spread")); // 
            atts.addElement(new Attribute("Domain_keyphraseness")); // 
            atts.addElement(new Attribute("Length")); //
            atts.addElement(new Attribute("Generality")); //
            atts.addElement(new Attribute("Node_degree")); // 
            atts.addElement(new Attribute("Semantic_relatedness")); // 
            atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 
            atts.addElement(new Attribute("Inverse_Wikip_frequency")); // 
            atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 13

        } else if (i == keyphrasesAtt) {
            if (nominalClassValue) {
                FastVector vals = new FastVector(2);
                vals.addElement("False");
                vals.addElement("True");
                atts.addElement(new Attribute("Keyphrase?", vals));
            } else {
                atts.addElement(new Attribute("Keyphrase?"));
            }
        }
    }

    classifierData = new Instances("ClassifierData", atts, 0);

    classifierData.setClassIndex(numFeatures);

    if (debugMode) {
        System.err.println("--- Converting instances for classifier");
    }
    int totalDocuments = getInputFormat().numInstances();
    // Convert pending input instances into data for classifier
    for (int i = 0; i < totalDocuments; i++) {
        Instance current = getInputFormat().instance(i);

        // Get the key phrases for the document
        String keyphrases = current.stringValue(keyphrasesAtt);
        HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases);

        // Get the phrases for the document
        HashMap<String, Candidate> candidateList = allCandidates.get(current);

        // Compute the feature values for each phrase and
        // add the instance to the data for the classifier
        int countPos = 0;
        int countNeg = 0;

        if (debugMode) {
            System.err
                    .println("--- Computing features for document " + i + " out of " + totalDocuments + "...");
        }

        for (Candidate candidate : candidateList.values()) {

            // ignore all candidates that appear less than a threshold
            if (candidate.getFrequency() < minOccurFrequency) {
                continue;
            }

            // compute feature values
            double[] vals = computeFeatureValues(candidate, true, hashKeyphrases, candidateList);

            if (vals[vals.length - 1] == 0) {
                countNeg++;
            } else {
                countPos++;
            }
            Instance inst = new Instance(current.weight(), vals);
            // System.out.println(candidate + "\t" + inst);
            classifierData.add(inst);

        }
        if (debugMode) {
            System.err.println(countPos + " positive; " + countNeg + " negative instances");
        }
    }

    if (debugMode) {
        System.err.println("--- Building classifier");
    }

    if (classifier == null) {
        // Build classifier
        if (nominalClassValue) {

            //         FilteredClassifier fclass = new FilteredClassifier();
            //         fclass.setClassifier(new NaiveBayesSimple());
            //         fclass.setFilter(new Discretize());
            //         classifier = fclass;

            classifier = new Bagging(); // try also //
            classifier.setOptions(
                    Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2"));

        } else {

            classifier = new Bagging();
            // try also
            // classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W
            // weka.classifiers.trees.J48 -- -U -M 2")) ;
            String optionsString = "-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0";
            String[] options = Utils.splitOptions(optionsString);
            classifier.setOptions(options);

        }
    }

    classifier.buildClassifier(classifierData);

    if (debugMode) {
        System.err.println(classifier);
    }

    // Save space
    classifierData = new Instances(classifierData, 0);
}