List of usage examples for weka.classifiers.meta Bagging Bagging
public Bagging()
From source file:jjj.asap.sas.models1.job.BuildBasicMetaCostModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }//from w w w.j a v a 2s . c om if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // create prototype classifiers Map<String, Classifier> prototypes = new HashMap<String, Classifier>(); // Bagged REPTrees Bagging baggedTrees = new Bagging(); baggedTrees.setNumExecutionSlots(1); baggedTrees.setNumIterations(100); baggedTrees.setClassifier(new REPTree()); baggedTrees.setCalcOutOfBag(false); prototypes.put("Bagged-REPTrees", baggedTrees); // Bagged SMO Bagging baggedSVM = new Bagging(); baggedSVM.setNumExecutionSlots(1); baggedSVM.setNumIterations(100); baggedSVM.setClassifier(new SMO()); baggedSVM.setCalcOutOfBag(false); prototypes.put("Bagged-SMO", baggedSVM); // Meta Cost model for Naive Bayes Bagging bagging = new Bagging(); bagging.setNumExecutionSlots(1); bagging.setNumIterations(100); bagging.setClassifier(new NaiveBayes()); CostSensitiveClassifier meta = new CostSensitiveClassifier(); meta.setClassifier(bagging); meta.setMinimizeExpectedCost(true); prototypes.put("CostSensitive-MinimizeExpectedCost-NaiveBayes", bagging); // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { // for each prototype classifier for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) { // // speical logic for meta cost // Classifier alg = AbstractClassifier.makeCopy(prototype.getValue()); if (alg instanceof CostSensitiveClassifier) { int essaySet = Contest.getEssaySet(dsn); String matrix = Contest.getRubrics(essaySet).size() == 3 ? "cost3.txt" : "cost4.txt"; ((CostSensitiveClassifier) alg) .setCostMatrix(new CostMatrix(new FileReader("/asap/sas/trunk/" + matrix))); } // use InfoGain to discard useless attributes AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); classifier.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.0001); classifier.setSearch(ranker); classifier.setClassifier(alg); queue.add(Job.submit( new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket))); } } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:meddle.TrainModelByDomainOS.java
License:Open Source License
/** * Given the classifierName, return a classifier * * @param classifierName/*from www.ja va 2 s. c o m*/ * e.g. J48, Bagging etc. */ public static Classifier getClassifier(String classifierName) { Classifier classifier = null; if (classifierName.equals("J48")) { J48 j48 = new J48(); j48.setUnpruned(true); classifier = j48; } else if (classifierName.equals("AdaBoostM1")) { AdaBoostM1 adm = new AdaBoostM1(); adm.setNumIterations(10); J48 j48 = new J48(); adm.setClassifier(j48); classifier = adm; } else if (classifierName.equals("Bagging")) { Bagging bagging = new Bagging(); bagging.setNumIterations(10); J48 j48 = new J48(); bagging.setClassifier(j48); classifier = bagging; } else if (classifierName.equals("Stacking")) { Stacking stacking = new Stacking(); stacking.setMetaClassifier(new Logistic()); Classifier cc[] = new Classifier[2]; cc[0] = new J48(); cc[1] = new IBk(); stacking.setClassifiers(cc); classifier = stacking; } else if (classifierName.equals("AdditiveRegression")) { AdditiveRegression ar = new AdditiveRegression(); ar.setClassifier(new J48()); classifier = ar; } else if (classifierName.equals("LogitBoost")) { LogitBoost lb = new LogitBoost(); lb.setClassifier(new J48()); classifier = lb; } return classifier; }
From source file:mulan.experiments.ICTAI2010.java
License:Open Source License
/** * Main class/*from w w w. j a v a2s. co m*/ * * @param args command line arguments */ public static void main(String[] args) { try { String path = Utils.getOption("path", args); String filestem = Utils.getOption("filestem", args); System.out.println("Loading the data set"); MultiLabelInstances dataset = new MultiLabelInstances(path + filestem + ".arff", path + filestem + ".xml"); Evaluator eval = new Evaluator(); MultipleEvaluation results; List<Measure> measures = new ArrayList<Measure>(1); measures.add(new HammingLoss()); int numFolds = 10; MultiLabelLearner[] learner = new MultiLabelLearner[4]; String[] learnerName = new String[learner.length]; learner[0] = new MLkNN(10, 1.0); learnerName[0] = "MLkNN"; learner[1] = new CalibratedLabelRanking(new J48()); learnerName[1] = "CLR"; Bagging bagging = new Bagging(); bagging.setClassifier(new J48()); learner[2] = new BinaryRelevance(bagging); learnerName[2] = "BR"; learner[3] = new BPMLL(); learnerName[3] = "BPMLL"; // loop over learners for (int i = 0; i < learner.length; i++) { // Default results = eval.crossValidate(learner[i].makeCopy(), dataset, measures, numFolds); System.out.println(learnerName[i] + ";default;-;" + results.toCSV()); // One Threshold OneThreshold ot; ot = new OneThreshold(learner[i].makeCopy(), new HammingLoss()); results = eval.crossValidate(ot, dataset, measures, numFolds); System.out.println(learnerName[i] + ";one threshold;train;" + results.toCSV()); ot = new OneThreshold(learner[i].makeCopy(), new HammingLoss(), 5); results = eval.crossValidate(ot, dataset, measures, numFolds); System.out.println(learnerName[i] + ";one threshold;5-cv;" + results.toCSV()); // RCut RCut rcut; rcut = new RCut(learner[i].makeCopy()); results = eval.crossValidate(rcut, dataset, measures, numFolds); System.out.println(learnerName[i] + ";rcut;cardinality;" + results.toCSV()); rcut = new RCut(learner[i].makeCopy(), new HammingLoss()); results = eval.crossValidate(rcut, dataset, measures, numFolds); System.out.println(learnerName[i] + ";rcut;train;" + results.toCSV()); rcut = new RCut(learner[i].makeCopy(), new HammingLoss(), 5); results = eval.crossValidate(rcut, dataset, measures, numFolds); System.out.println(learnerName[i] + ";rcut;5-cv;" + results.toCSV()); // SCut SCut scut; scut = new SCut(learner[i].makeCopy(), new HammingLoss()); results = eval.crossValidate(scut, dataset, measures, numFolds); System.out.println(learnerName[i] + ";scut;train;" + results.toCSV()); scut = new SCut(learner[i].makeCopy(), new HammingLoss(), 5); results = eval.crossValidate(scut, dataset, measures, numFolds); System.out.println(learnerName[i] + ";scut;5-cv;" + results.toCSV()); // MetaLabeler MetaLabeler ml; ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Content-Based", "Numeric-Class"); ml.setFolds(1); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;m5p;train;content;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Score-Based", "Numeric-Class"); ml.setFolds(1); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;m5p;train;scores;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Rank-Based", "Numeric-Class"); ml.setFolds(1); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;m5p;train;ranks;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Content-Based", "Nominal-Class"); ml.setFolds(1); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;j48;train;content;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Score-Based", "Nominal-Class"); ml.setFolds(1); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;j48;train;scores;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Rank-Based", "Nominal-Class"); ml.setFolds(1); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;j48;cv;ranks;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Content-Based", "Numeric-Class"); ml.setFolds(5); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;m5p;cv;content;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Score-Based", "Numeric-Class"); ml.setFolds(5); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;m5p;cv;scores;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new M5P(), "Rank-Based", "Numeric-Class"); ml.setFolds(5); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;m5p;cv;ranks;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Content-Based", "Nominal-Class"); ml.setFolds(5); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;j48;cv;content;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Score-Based", "Nominal-Class"); ml.setFolds(5); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;j48;cv;scores;" + results.toCSV()); ml = new MetaLabeler(learner[i].makeCopy(), new J48(), "Rank-Based", "Nominal-Class"); ml.setFolds(5); results = eval.crossValidate(ml, dataset, measures, numFolds); System.out.println(learnerName[i] + ";metalabeler;j48;cv;ranks;" + results.toCSV()); // ThresholdPrediction ThresholdPrediction tp; tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Content-Based", 1); results = eval.crossValidate(tp, dataset, measures, numFolds); System.out.println(learnerName[i] + ";tp;m5p;train;content;" + results.toCSV()); tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Score-Based", 1); results = eval.crossValidate(tp, dataset, measures, numFolds); System.out.println(learnerName[i] + ";tp;m5p;train;scores;" + results.toCSV()); tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Rank-Based", 1); results = eval.crossValidate(tp, dataset, measures, numFolds); System.out.println(learnerName[i] + ";tp;m5p;train;ranks;" + results.toCSV()); tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Content-Based", 5); results = eval.crossValidate(tp, dataset, measures, numFolds); System.out.println(learnerName[i] + ";tp;m5p;5-cv;content;" + results.toCSV()); tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Score-Based", 5); results = eval.crossValidate(tp, dataset, measures, numFolds); System.out.println(learnerName[i] + ";tp;m5p;5-cv;scores;" + results.toCSV()); tp = new ThresholdPrediction(learner[i].makeCopy(), new M5P(), "Rank-Based", 5); results = eval.crossValidate(tp, dataset, measures, numFolds); System.out.println(learnerName[i] + ";tp;m5p;5-cv;ranks;" + results.toCSV()); } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.wikipedia.miner.annotation.Disambiguator.java
License:Open Source License
/** * A demo of how to train and test the disambiguator. * // w w w .j ava2s .co m * @param args an array of 2 or 4 String arguments; the connection string of the Wikipedia * database server, the name of the Wikipedia database and (optionally, if anonymous access * is not allowed) a username and password for the database. * * @throws Exception */ public static void main(String[] args) throws Exception { //set up an instance of wikipedia Wikipedia wikipedia = Wikipedia.getInstanceFromArguments(args); //use a text processor, so that terms and items in wikipedia will both be case-folded before being compared. TextProcessor tp = new CaseFolder(); //cache tables that will be used extensively File dataDirectory = new File("/research/wikipediaminer/data/en/20080727"); ProgressNotifier pn = new ProgressNotifier(4); TIntHashSet ids = wikipedia.getDatabase().getValidPageIds(dataDirectory, 2, pn); wikipedia.getDatabase().cachePages(dataDirectory, ids, pn); wikipedia.getDatabase().cacheAnchors(dataDirectory, tp, ids, 2, pn); wikipedia.getDatabase().cacheInLinks(dataDirectory, ids, pn); //gather article sets for training and testing ArticleSet trainSet = new ArticleSet(new File("data/articleSets/trainingIds.csv")); ArticleSet testSet = new ArticleSet(new File("data/articleSets/testIds_disambig.csv")); //use relatedness cache, so we won't repeat these calculations unnecessarily RelatednessCache rc = new RelatednessCache(); //train disambiguator Disambiguator disambiguator = new Disambiguator(wikipedia, tp, 0.01, 0.01, 25); disambiguator.train(trainSet, ArticleCleaner.ALL, "disambig_trainingIds", rc); //build disambiguation classifier Classifier classifier = new Bagging(); classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2")); disambiguator.buildClassifier(classifier); disambiguator.saveClassifier(new File("data/models/disambig.model")); //test Result<Integer> r = disambiguator.test(testSet, ArticleCleaner.ALL, rc); System.out.println(r); }
From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java
License:Open Source License
/** * A demo of how to train and test the link detector. * /*from w w w . ja va2 s.co m*/ * @param args an array of 2 or 4 String arguments; the connection string of the Wikipedia * database server, the name of the Wikipedia database and (optionally, if anonymous access * is not allowed) a username and password for the database. * * @throws Exception */ public static void main(String[] args) throws Exception { //set up an instance of Wikipedia Wikipedia wikipedia = Wikipedia.getInstanceFromArguments(args); //use a text processor, so that terms and items in wikipedia will both be case-folded before being compared. TextProcessor tp = new CaseFolder(); File stopwordFile = new File("/research/wikipediaminer/data/stopwords.txt"); // cache tables that will be used extensively File dataDirectory = new File("/research/wikipediaminer/data/en/20080727"); ProgressNotifier pn = new ProgressNotifier(5); TIntHashSet ids = wikipedia.getDatabase().getValidPageIds(dataDirectory, 2, pn); wikipedia.getDatabase().cachePages(dataDirectory, ids, pn); wikipedia.getDatabase().cacheAnchors(dataDirectory, tp, ids, 2, pn); wikipedia.getDatabase().cacheInLinks(dataDirectory, ids, pn); wikipedia.getDatabase().cacheGenerality(dataDirectory, ids, pn); //gather article sets for training and testing ArticleSet trainSet = new ArticleSet(new File("data/articleSets/trainingIds.csv")); ArticleSet testSet = new ArticleSet(new File("data/articleSets/testIds_wikify.csv")); // use relatedness cache, so we won't repeat these calculations unnecessarily RelatednessCache rc = null; //new RelatednessCache() ; // use a pre-trained disambiguator Disambiguator disambiguator = new Disambiguator(wikipedia, tp, 0.01, 0.01, 25); disambiguator.loadClassifier(new File("data/models/disambig.model")); // connect disambiguator to a new topic detector TopicDetector topicDetector = new TopicDetector(wikipedia, disambiguator, stopwordFile, true, false); // train a new link detector LinkDetector linkDetector = new LinkDetector(wikipedia); linkDetector.train(trainSet, ArticleCleaner.ALL, "LinkDetection_Training", topicDetector, rc); // build link detection classifier Classifier classifier = new Bagging(); classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2")); linkDetector.buildClassifier(classifier); linkDetector.saveClassifier(new File("data/models/linkDetect.model")); // test Result<Integer> r = linkDetector.test(testSet, ArticleCleaner.ALL, topicDetector, rc); System.out.println(r); }
From source file:themeextractor.filters.MauiFilter.java
License:Open Source License
/** * Builds the classifier./* w w w . j a va 2 s . c o m*/ */ private void buildClassifier() throws Exception { // Generate input format for classifier FastVector atts = new FastVector(); for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (i == documentAtt) { atts.addElement(new Attribute("Term_frequency")); // 2 atts.addElement(new Attribute("IDF")); // atts.addElement(new Attribute("TFxIDF")); // atts.addElement(new Attribute("First_occurrence")); // atts.addElement(new Attribute("Last_occurrence")); // atts.addElement(new Attribute("Spread")); // atts.addElement(new Attribute("Domain_keyphraseness")); // atts.addElement(new Attribute("Length")); // atts.addElement(new Attribute("Generality")); // atts.addElement(new Attribute("Node_degree")); // atts.addElement(new Attribute("Semantic_relatedness")); // atts.addElement(new Attribute("Wikipedia_keyphraseness")); // atts.addElement(new Attribute("Inverse_Wikip_frequency")); // atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 13 } else if (i == keyphrasesAtt) { if (nominalClassValue) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); atts.addElement(new Attribute("Keyphrase?", vals)); } else { atts.addElement(new Attribute("Keyphrase?")); } } } classifierData = new Instances("ClassifierData", atts, 0); classifierData.setClassIndex(numFeatures); if (debugMode) { System.err.println("--- Converting instances for classifier"); } int totalDocuments = getInputFormat().numInstances(); // Convert pending input instances into data for classifier for (int i = 0; i < totalDocuments; i++) { Instance current = getInputFormat().instance(i); // Get the key phrases for the document String keyphrases = current.stringValue(keyphrasesAtt); HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases); // Get the phrases for the document HashMap<String, Candidate> candidateList = allCandidates.get(current); // Compute the feature values for each phrase and // add the instance to the data for the classifier int countPos = 0; int countNeg = 0; if (debugMode) { System.err .println("--- Computing features for document " + i + " out of " + totalDocuments + "..."); } for (Candidate candidate : candidateList.values()) { // ignore all candidates that appear less than a threshold if (candidate.getFrequency() < minOccurFrequency) { continue; } // compute feature values double[] vals = computeFeatureValues(candidate, true, hashKeyphrases, candidateList); if (vals[vals.length - 1] == 0) { countNeg++; } else { countPos++; } Instance inst = new Instance(current.weight(), vals); // System.out.println(candidate + "\t" + inst); classifierData.add(inst); } if (debugMode) { System.err.println(countPos + " positive; " + countNeg + " negative instances"); } } if (debugMode) { System.err.println("--- Building classifier"); } if (classifier == null) { // Build classifier if (nominalClassValue) { // FilteredClassifier fclass = new FilteredClassifier(); // fclass.setClassifier(new NaiveBayesSimple()); // fclass.setFilter(new Discretize()); // classifier = fclass; classifier = new Bagging(); // try also // classifier.setOptions( Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2")); } else { classifier = new Bagging(); // try also // classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W // weka.classifiers.trees.J48 -- -U -M 2")) ; String optionsString = "-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0"; String[] options = Utils.splitOptions(optionsString); classifier.setOptions(options); } } classifier.buildClassifier(classifierData); if (debugMode) { System.err.println(classifier); } // Save space classifierData = new Instances(classifierData, 0); }