Example usage for weka.attributeSelection Ranker setThreshold

List of usage examples for weka.attributeSelection Ranker setThreshold

Introduction

In this page you can find the example usage for weka.attributeSelection Ranker setThreshold.

Prototype

@Override
public void setThreshold(double threshold) 

Source Link

Document

Set the threshold by which the AttributeSelection module can discard attributes.

Usage

From source file:com.ivanrf.smsspam.SpamClassifier.java

License:Apache License

private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp,
        boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    filter.setDoNotOperateOnPerClassBasis(true);
    filter.setLowerCaseTokens(true);//  ww  w  . j a  va  2 s  .  c  o m
    filter.setWordsToKeep(wordsToKeep);

    if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) {
        //Make a tokenizer
        WordTokenizer wt = new WordTokenizer();
        if (tokenizerOp.equals(TOKENIZER_COMPLETE))
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}");
        else //TOKENIZER_COMPLETE_NUMBERS)
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789");
        filter.setTokenizer(wt);
    }

    FilteredClassifier classifier = new FilteredClassifier();
    classifier.setFilter(filter);

    if (useAttributeSelection) {
        AttributeSelection as = new AttributeSelection();
        as.setEvaluator(new InfoGainAttributeEval());
        Ranker r = new Ranker();
        r.setThreshold(0);
        as.setSearch(r);

        MultiFilter mf = new MultiFilter();
        mf.setFilters(new Filter[] { filter, as });

        classifier.setFilter(mf);
    }

    if (classifierOp.equals(CLASSIFIER_SMO))
        classifier.setClassifier(new SMO());
    else if (classifierOp.equals(CLASSIFIER_NB))
        classifier.setClassifier(new NaiveBayes());
    else if (classifierOp.equals(CLASSIFIER_IB1))
        classifier.setClassifier(new IBk(1));
    else if (classifierOp.equals(CLASSIFIER_IB3))
        classifier.setClassifier(new IBk(3));
    else if (classifierOp.equals(CLASSIFIER_IB5))
        classifier.setClassifier(new IBk(5));
    else if (classifierOp.equals(CLASSIFIER_PART))
        classifier.setClassifier(new PART()); //Tarda mucho

    if (boosting) {
        AdaBoostM1 boost = new AdaBoostM1();
        boost.setClassifier(classifier.getClassifier());
        classifier.setClassifier(boost); //Con NB tarda mucho
    }

    return classifier;
}

From source file:jjj.asap.sas.models1.job.BuildBasicMetaCostModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }// w w w  .j  a  va 2  s  . co m
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // Bagged REPTrees

    Bagging baggedTrees = new Bagging();
    baggedTrees.setNumExecutionSlots(1);
    baggedTrees.setNumIterations(100);
    baggedTrees.setClassifier(new REPTree());
    baggedTrees.setCalcOutOfBag(false);

    prototypes.put("Bagged-REPTrees", baggedTrees);

    // Bagged SMO

    Bagging baggedSVM = new Bagging();
    baggedSVM.setNumExecutionSlots(1);
    baggedSVM.setNumIterations(100);
    baggedSVM.setClassifier(new SMO());
    baggedSVM.setCalcOutOfBag(false);

    prototypes.put("Bagged-SMO", baggedSVM);

    // Meta Cost model for Naive Bayes

    Bagging bagging = new Bagging();
    bagging.setNumExecutionSlots(1);
    bagging.setNumIterations(100);
    bagging.setClassifier(new NaiveBayes());

    CostSensitiveClassifier meta = new CostSensitiveClassifier();
    meta.setClassifier(bagging);
    meta.setMinimizeExpectedCost(true);

    prototypes.put("CostSensitive-MinimizeExpectedCost-NaiveBayes", bagging);

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // 
            // speical logic for meta cost
            //

            Classifier alg = AbstractClassifier.makeCopy(prototype.getValue());

            if (alg instanceof CostSensitiveClassifier) {

                int essaySet = Contest.getEssaySet(dsn);

                String matrix = Contest.getRubrics(essaySet).size() == 3 ? "cost3.txt" : "cost4.txt";

                ((CostSensitiveClassifier) alg)
                        .setCostMatrix(new CostMatrix(new FileReader("/asap/sas/trunk/" + matrix)));

            }

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(alg);

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildBasicModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/*  w  ww  .  j a v a2 s.  c o  m*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // bayes

    BayesNet net = new BayesNet();
    net.setEstimator(new BMAEstimator());
    prototypes.put("BayesNet", net);

    prototypes.put("NaiveBayes", new NaiveBayes());

    // functions

    prototypes.put("RBFNetwork", new RBFNetwork());
    prototypes.put("SMO", new SMO());

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue()));

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildBasicModels2.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/* w w w .  j  a v a2 s .c  o m*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // models

    prototypes.put("NBTree", new NBTree());
    prototypes.put("Logistic", new Logistic());

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue()));

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildRBFKernelModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/*ww w.j a va 2  s.c  o  m*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        SMO smo = new SMO();
        smo.setFilterType(new SelectedTag(SMO.FILTER_NONE, SMO.TAGS_FILTER));
        smo.setBuildLogisticModels(true);
        RBFKernel kernel = new RBFKernel();
        kernel.setGamma(0.05);
        smo.setKernel(kernel);

        AttributeSelectedClassifier asc = new AttributeSelectedClassifier();
        asc.setEvaluator(new InfoGainAttributeEval());
        Ranker ranker = new Ranker();
        ranker.setThreshold(0.01);
        asc.setSearch(ranker);
        asc.setClassifier(smo);

        queue.add(Job.submit(new ModelBuilder(dsn, "InfoGain-SMO-RBFKernel", asc, this.outputBucket)));
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:net.semanticmetadata.lire.classifiers.HashingSearchBasedClassifierMod.java

License:Open Source License

private static HashMap<String, Double> calculateInformationGain(String wekaFileLocation,
        double[] featureInformationGain, int featureSpace[], HashMap<String, Integer> featureSpaceHashMap,
        ArrayList<String> featureOrder, HashMap<String, Double> featureInformationGainHashMap) {

    Instances data = null;/*  w  ww  .j  a  va2  s. co  m*/
    try {
        data = new Instances(new BufferedReader(new FileReader(wekaFileLocation)));
    } catch (IOException e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    AttributeSelection attsel = new AttributeSelection(); // package weka.attributeSelection!
    InfoGainAttributeEval eval = new InfoGainAttributeEval();
    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(-1);
    search.setGenerateRanking(true);
    attsel.setEvaluator(eval);
    attsel.setSearch(search);
    try {

        attsel.SelectAttributes(data);
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    // obtain the attribute indices that were selected
    int[] indices = new int[0];
    double[][] rankedAttribuesArray = new double[0][0];
    try {
        rankedAttribuesArray = attsel.rankedAttributes();
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    try {
        indices = attsel.selectedAttributes();
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }

    for (int i = 0; i < rankedAttribuesArray.length; i++) {

        int currentFeature = Integer.parseInt(data.attribute((int) rankedAttribuesArray[i][0]).name()
                .substring(0, data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_")));
        //System.out.println("DDDDDDDDDDDDDD"+currentFeature);
        // System.out.print(data.attribute((int) rankedAttribuesArray[i][0]).name() + "/" + rankedAttribuesArray[i][0] + "/");
        //     System.out.println(rankedAttribuesArray[i][1]);
        // data.attribute((int) rankedAttribuesArray[i][0]).name().substring(0,data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_"));
        // featureInformationGain[currentFeature] = featureInformationGain[currentFeature] + rankedAttribuesArray[i][1];
        featureInformationGainHashMap.put(featureOrder.get(currentFeature),
                featureInformationGainHashMap.get(featureOrder.get(currentFeature))
                        + rankedAttribuesArray[i][1]);
    }

    //Caalculate the mean of the information gain (better comparable)
    // for (int i = 0; i < featureInformationGain.length; i++) {
    //     featureInformationGain[i] = (featureInformationGain[i] / featureSpace[i]) * 100;
    // }

    //Calculate the mean of the information gain (better comparable)
    for (int i = 0; i < featureOrder.size(); i++) {
        //  featureInformationGainHashMap.put(featureOrder.get(i), (featureInformationGainHashMap.get(featureOrder.get(i)) / featureSpaceHashMap.get(featureOrder.get(i))) * 100);
        featureInformationGainHashMap.put(featureOrder.get(i),
                (featureInformationGainHashMap.get(featureOrder.get(i))));
    }

    // for(int i=0;i<0;i++){
    //     System.out.println(data.attribute(indices[i]).toString());
    // }
    System.out.println("Scoring finished, starting with classification! Scores: ");
    for (int i = 0; i < featureOrder.size(); i++) {
        System.out.println(featureOrder.get(i) + " " + featureInformationGainHashMap.get(featureOrder.get(i)));
        // featureInformationGainHashMap.put(featureOrder.get(i),(featureInformationGainHashMap.get(featureOrder.get(i))/featureSpaceHashMap.get(featureOrder.get(i)))*100);
    }
    // return featureInformationGain;
    File deleteFile = new File(wekaFileLocation);
    deleteFile.delete();
    return featureInformationGainHashMap;
}

From source file:old.CFS.java

/**
 * uses the meta-classifier//  ww w . j a  v a 2 s  .co m
 */
protected static void useClassifier(Instances data) throws Exception {
    System.out.println("\n1. Meta-classfier");
    AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();
    ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval();
    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(1000);
    J48 base = new J48();
    classifier.setClassifier(base);
    classifier.setEvaluator(eval);
    classifier.setSearch(search);
    Evaluation evaluation = new Evaluation(data);
    evaluation.crossValidateModel(classifier, data, 10, new Random(1));
    System.out.println(evaluation.toSummaryString());
}

From source file:old.CFS.java

/**
 * uses the filter//from ww  w  .  j a v a2  s.  co m
 */
protected static void useFilter(Instances data) throws Exception {
    System.out.println("\n2. Filter");
    weka.filters.supervised.attribute.AttributeSelection filter = new weka.filters.supervised.attribute.AttributeSelection();
    ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval();

    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(1000);
    filter.setEvaluator(eval);

    filter.setSearch(search);
    filter.setInputFormat(data);
    Instances newData = Filter.useFilter(data, filter);
    System.out.println(newData);
}

From source file:old.CFS.java

/**
 * uses the low level approach/*from ww  w.  j a v  a2 s . co  m*/
   * @param data
 */
protected static void useLowLevel(Instances data) throws Exception {
    System.out.println("\n3. Low-level");
    AttributeSelection attsel = new AttributeSelection();
    ChiSquaredAttributeEval eval = new ChiSquaredAttributeEval();
    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(1000);
    attsel.setEvaluator(eval);
    attsel.setSearch(search);
    attsel.setFolds(10);
    attsel.setXval(true);
    attsel.SelectAttributes(data);
    //    System.out.println(data.toSummaryString());
    //    attsel.selectAttributesCVSplit(data);
    //    attsel.SelectAttributes(data);

    System.out.println(attsel.CrossValidateAttributes());
    //    attsel.SelectAttributes(data);
    //    attsel.selectAttributesCVSplit(data);
    Instances newData = attsel.reduceDimensionality(data);

    int[] indices = attsel.selectedAttributes();
    System.out.println(newData);
    System.out.println("selected attribute indices (starting with 0):\n" + Utils.arrayToString(indices));
}