List of usage examples for weka.attributeSelection Ranker setThreshold
@Override public void setThreshold(double threshold)
From source file:com.ivanrf.smsspam.SpamClassifier.java
License:Apache License
private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp, boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception { StringToWordVector filter = new StringToWordVector(); filter.setDoNotOperateOnPerClassBasis(true); filter.setLowerCaseTokens(true);// ww w . j a va 2 s . c o m filter.setWordsToKeep(wordsToKeep); if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) { //Make a tokenizer WordTokenizer wt = new WordTokenizer(); if (tokenizerOp.equals(TOKENIZER_COMPLETE)) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}"); else //TOKENIZER_COMPLETE_NUMBERS) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789"); filter.setTokenizer(wt); } FilteredClassifier classifier = new FilteredClassifier(); classifier.setFilter(filter); if (useAttributeSelection) { AttributeSelection as = new AttributeSelection(); as.setEvaluator(new InfoGainAttributeEval()); Ranker r = new Ranker(); r.setThreshold(0); as.setSearch(r); MultiFilter mf = new MultiFilter(); mf.setFilters(new Filter[] { filter, as }); classifier.setFilter(mf); } if (classifierOp.equals(CLASSIFIER_SMO)) classifier.setClassifier(new SMO()); else if (classifierOp.equals(CLASSIFIER_NB)) classifier.setClassifier(new NaiveBayes()); else if (classifierOp.equals(CLASSIFIER_IB1)) classifier.setClassifier(new IBk(1)); else if (classifierOp.equals(CLASSIFIER_IB3)) classifier.setClassifier(new IBk(3)); else if (classifierOp.equals(CLASSIFIER_IB5)) classifier.setClassifier(new IBk(5)); else if (classifierOp.equals(CLASSIFIER_PART)) classifier.setClassifier(new PART()); //Tarda mucho if (boosting) { AdaBoostM1 boost = new AdaBoostM1(); boost.setClassifier(classifier.getClassifier()); classifier.setClassifier(boost); //Con NB tarda mucho } return classifier; }
From source file:jjj.asap.sas.models1.job.BuildBasicMetaCostModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }// w w w .j a va 2 s . co m if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // create prototype classifiers Map<String, Classifier> prototypes = new HashMap<String, Classifier>(); // Bagged REPTrees Bagging baggedTrees = new Bagging(); baggedTrees.setNumExecutionSlots(1); baggedTrees.setNumIterations(100); baggedTrees.setClassifier(new REPTree()); baggedTrees.setCalcOutOfBag(false); prototypes.put("Bagged-REPTrees", baggedTrees); // Bagged SMO Bagging baggedSVM = new Bagging(); baggedSVM.setNumExecutionSlots(1); baggedSVM.setNumIterations(100); baggedSVM.setClassifier(new SMO()); baggedSVM.setCalcOutOfBag(false); prototypes.put("Bagged-SMO", baggedSVM); // Meta Cost model for Naive Bayes Bagging bagging = new Bagging(); bagging.setNumExecutionSlots(1); bagging.setNumIterations(100); bagging.setClassifier(new NaiveBayes()); CostSensitiveClassifier meta = new CostSensitiveClassifier(); meta.setClassifier(bagging); meta.setMinimizeExpectedCost(true); prototypes.put("CostSensitive-MinimizeExpectedCost-NaiveBayes", bagging); // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { // for each prototype classifier for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) { // // speical logic for meta cost // Classifier alg = AbstractClassifier.makeCopy(prototype.getValue()); if (alg instanceof CostSensitiveClassifier) { int essaySet = Contest.getEssaySet(dsn); String matrix = Contest.getRubrics(essaySet).size() == 3 ? "cost3.txt" : "cost4.txt"; ((CostSensitiveClassifier) alg) .setCostMatrix(new CostMatrix(new FileReader("/asap/sas/trunk/" + matrix))); } // use InfoGain to discard useless attributes AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); classifier.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.0001); classifier.setSearch(ranker); classifier.setClassifier(alg); queue.add(Job.submit( new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket))); } } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:jjj.asap.sas.models1.job.BuildBasicModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }/* w ww . j a v a2 s. c o m*/ if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // create prototype classifiers Map<String, Classifier> prototypes = new HashMap<String, Classifier>(); // bayes BayesNet net = new BayesNet(); net.setEstimator(new BMAEstimator()); prototypes.put("BayesNet", net); prototypes.put("NaiveBayes", new NaiveBayes()); // functions prototypes.put("RBFNetwork", new RBFNetwork()); prototypes.put("SMO", new SMO()); // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { // for each prototype classifier for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) { // use InfoGain to discard useless attributes AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); classifier.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.0001); classifier.setSearch(ranker); classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue())); queue.add(Job.submit( new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket))); } } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:jjj.asap.sas.models1.job.BuildBasicModels2.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }/* w w w . j a v a2 s .c o m*/ if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // create prototype classifiers Map<String, Classifier> prototypes = new HashMap<String, Classifier>(); // models prototypes.put("NBTree", new NBTree()); prototypes.put("Logistic", new Logistic()); // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { // for each prototype classifier for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) { // use InfoGain to discard useless attributes AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); classifier.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.0001); classifier.setSearch(ranker); classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue())); queue.add(Job.submit( new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket))); } } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:jjj.asap.sas.models1.job.BuildRBFKernelModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }/*ww w.j a va 2 s.c o m*/ if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { SMO smo = new SMO(); smo.setFilterType(new SelectedTag(SMO.FILTER_NONE, SMO.TAGS_FILTER)); smo.setBuildLogisticModels(true); RBFKernel kernel = new RBFKernel(); kernel.setGamma(0.05); smo.setKernel(kernel); AttributeSelectedClassifier asc = new AttributeSelectedClassifier(); asc.setEvaluator(new InfoGainAttributeEval()); Ranker ranker = new Ranker(); ranker.setThreshold(0.01); asc.setSearch(ranker); asc.setClassifier(smo); queue.add(Job.submit(new ModelBuilder(dsn, "InfoGain-SMO-RBFKernel", asc, this.outputBucket))); } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:net.semanticmetadata.lire.classifiers.HashingSearchBasedClassifierMod.java
License:Open Source License
private static HashMap<String, Double> calculateInformationGain(String wekaFileLocation, double[] featureInformationGain, int featureSpace[], HashMap<String, Integer> featureSpaceHashMap, ArrayList<String> featureOrder, HashMap<String, Double> featureInformationGainHashMap) { Instances data = null;/* w ww .j a va2 s. co m*/ try { data = new Instances(new BufferedReader(new FileReader(wekaFileLocation))); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } AttributeSelection attsel = new AttributeSelection(); // package weka.attributeSelection! InfoGainAttributeEval eval = new InfoGainAttributeEval(); Ranker search = new Ranker(); search.setThreshold(-1.7976931348623157E308); search.setNumToSelect(-1); search.setGenerateRanking(true); attsel.setEvaluator(eval); attsel.setSearch(search); try { attsel.SelectAttributes(data); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } // obtain the attribute indices that were selected int[] indices = new int[0]; double[][] rankedAttribuesArray = new double[0][0]; try { rankedAttribuesArray = attsel.rankedAttributes(); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } try { indices = attsel.selectedAttributes(); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } for (int i = 0; i < rankedAttribuesArray.length; i++) { int currentFeature = Integer.parseInt(data.attribute((int) rankedAttribuesArray[i][0]).name() .substring(0, data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_"))); //System.out.println("DDDDDDDDDDDDDD"+currentFeature); // System.out.print(data.attribute((int) rankedAttribuesArray[i][0]).name() + "/" + rankedAttribuesArray[i][0] + "/"); // System.out.println(rankedAttribuesArray[i][1]); // data.attribute((int) rankedAttribuesArray[i][0]).name().substring(0,data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_")); // featureInformationGain[currentFeature] = featureInformationGain[currentFeature] + rankedAttribuesArray[i][1]; featureInformationGainHashMap.put(featureOrder.get(currentFeature), featureInformationGainHashMap.get(featureOrder.get(currentFeature)) + rankedAttribuesArray[i][1]); } //Caalculate the mean of the information gain (better comparable) // for (int i = 0; i < featureInformationGain.length; i++) { // featureInformationGain[i] = (featureInformationGain[i] / featureSpace[i]) * 100; // } //Calculate the mean of the information gain (better comparable) for (int i = 0; i < featureOrder.size(); i++) { // featureInformationGainHashMap.put(featureOrder.get(i), (featureInformationGainHashMap.get(featureOrder.get(i)) / featureSpaceHashMap.get(featureOrder.get(i))) * 100); featureInformationGainHashMap.put(featureOrder.get(i), (featureInformationGainHashMap.get(featureOrder.get(i)))); } // for(int i=0;i<0;i++){ // System.out.println(data.attribute(indices[i]).toString()); // } System.out.println("Scoring finished, starting with classification! Scores: "); for (int i = 0; i < featureOrder.size(); i++) { System.out.println(featureOrder.get(i) + " " + featureInformationGainHashMap.get(featureOrder.get(i))); // featureInformationGainHashMap.put(featureOrder.get(i),(featureInformationGainHashMap.get(featureOrder.get(i))/featureSpaceHashMap.get(featureOrder.get(i)))*100); } // return featureInformationGain; File deleteFile = new File(wekaFileLocation); deleteFile.delete(); return featureInformationGainHashMap; }
From source file:old.CFS.java
/** * uses the meta-classifier// ww w . j a v a 2 s .co m */ protected static void useClassifier(Instances data) throws Exception { System.out.println("\n1. Meta-classfier"); AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval(); Ranker search = new Ranker(); search.setThreshold(-1.7976931348623157E308); search.setNumToSelect(1000); J48 base = new J48(); classifier.setClassifier(base); classifier.setEvaluator(eval); classifier.setSearch(search); Evaluation evaluation = new Evaluation(data); evaluation.crossValidateModel(classifier, data, 10, new Random(1)); System.out.println(evaluation.toSummaryString()); }
From source file:old.CFS.java
/** * uses the filter//from ww w . j a v a2 s. co m */ protected static void useFilter(Instances data) throws Exception { System.out.println("\n2. Filter"); weka.filters.supervised.attribute.AttributeSelection filter = new weka.filters.supervised.attribute.AttributeSelection(); ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval(); Ranker search = new Ranker(); search.setThreshold(-1.7976931348623157E308); search.setNumToSelect(1000); filter.setEvaluator(eval); filter.setSearch(search); filter.setInputFormat(data); Instances newData = Filter.useFilter(data, filter); System.out.println(newData); }
From source file:old.CFS.java
/** * uses the low level approach/*from ww w. j a v a2 s . co m*/ * @param data */ protected static void useLowLevel(Instances data) throws Exception { System.out.println("\n3. Low-level"); AttributeSelection attsel = new AttributeSelection(); ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval(); Ranker search = new Ranker(); search.setThreshold(-1.7976931348623157E308); search.setNumToSelect(1000); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.setFolds(10); attsel.setXval(true); attsel.SelectAttributes(data); // System.out.println(data.toSummaryString()); // attsel.selectAttributesCVSplit(data); // attsel.SelectAttributes(data); System.out.println(attsel.CrossValidateAttributes()); // attsel.SelectAttributes(data); // attsel.selectAttributesCVSplit(data); Instances newData = attsel.reduceDimensionality(data); int[] indices = attsel.selectedAttributes(); System.out.println(newData); System.out.println("selected attribute indices (starting with 0):\n" + Utils.arrayToString(indices)); }