Example usage for weka.attributeSelection InfoGainAttributeEval InfoGainAttributeEval

Introduction

In this page you can find the example usage for weka.attributeSelection InfoGainAttributeEval InfoGainAttributeEval.

Prototype

public InfoGainAttributeEval()

Source Link

Document

Constructor

Usage

From source file:FeatureSelectionClass.java

public AttributeSelection withGainRatio(String path) throws Exception {
    int N;/*from   www .  j a va2  s  . c o  m*/
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    InfoGainAttributeEval evaluator = new InfoGainAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;

}

From source file:ca.uottawa.balie.WekaAttributeSelection.java

License:Open Source License

/**
 * Select the top attributes/*from   w  w  w . ja v a  2 s  .c o  m*/
 */
public void Select(boolean pi_Debug) {
    Instances insts = m_DummyLearner.GetTrainInstances();

    try {
        ASEvaluation eval = null;
        ASSearch search = null;

        if (m_Evaluator == WEKA_CHI_SQUARE) {
            eval = new ChiSquaredAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_INFO_GAIN) {
            eval = new InfoGainAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_WRAPPER) {
            eval = new ClassifierSubsetEval();
            ((ClassifierSubsetEval) eval).setClassifier(new NaiveBayes());
            search = new Ranker(); // TODO: use something else than ranker
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_SYM_UNCERT) {
            eval = new SymmetricalUncertAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_SVM) {
            eval = new SVMAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_RELIEF) {
            eval = new ReliefFAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_ONER) {
            eval = new OneRAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        }

        m_AttributeSelection = new AttributeSelection();
        m_AttributeSelection.setEvaluator(eval);
        m_AttributeSelection.setSearch(search);

        m_AttributeSelection.SelectAttributes(insts);
        if (pi_Debug)
            System.out.println(m_AttributeSelection.toResultsString());

    } catch (Exception e) {
        System.err.println(e.getMessage());
    }

}

From source file:com.ivanrf.smsspam.SpamClassifier.java

License:Apache License

private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp,
        boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    filter.setDoNotOperateOnPerClassBasis(true);
    filter.setLowerCaseTokens(true);//from ww w . j  a va  2  s  .  c o  m
    filter.setWordsToKeep(wordsToKeep);

    if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) {
        //Make a tokenizer
        WordTokenizer wt = new WordTokenizer();
        if (tokenizerOp.equals(TOKENIZER_COMPLETE))
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}");
        else //TOKENIZER_COMPLETE_NUMBERS)
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789");
        filter.setTokenizer(wt);
    }

    FilteredClassifier classifier = new FilteredClassifier();
    classifier.setFilter(filter);

    if (useAttributeSelection) {
        AttributeSelection as = new AttributeSelection();
        as.setEvaluator(new InfoGainAttributeEval());
        Ranker r = new Ranker();
        r.setThreshold(0);
        as.setSearch(r);

        MultiFilter mf = new MultiFilter();
        mf.setFilters(new Filter[] { filter, as });

        classifier.setFilter(mf);
    }

    if (classifierOp.equals(CLASSIFIER_SMO))
        classifier.setClassifier(new SMO());
    else if (classifierOp.equals(CLASSIFIER_NB))
        classifier.setClassifier(new NaiveBayes());
    else if (classifierOp.equals(CLASSIFIER_IB1))
        classifier.setClassifier(new IBk(1));
    else if (classifierOp.equals(CLASSIFIER_IB3))
        classifier.setClassifier(new IBk(3));
    else if (classifierOp.equals(CLASSIFIER_IB5))
        classifier.setClassifier(new IBk(5));
    else if (classifierOp.equals(CLASSIFIER_PART))
        classifier.setClassifier(new PART()); //Tarda mucho

    if (boosting) {
        AdaBoostM1 boost = new AdaBoostM1();
        boost.setClassifier(classifier.getClassifier());
        classifier.setClassifier(boost); //Con NB tarda mucho
    }

    return classifier;
}

From source file:Helper.CustomFilter.java

public Instances removeAttribute(Instances structure) throws Exception {
    //NORMALIZE AND REMOVE USELESS ATTRIBUTES
    Normalize norm = new Normalize();
    norm.setInputFormat(structure);/*from w  ww.  jav  a 2  s  . c o  m*/
    structure = Filter.useFilter(structure, norm);

    RemoveUseless ru = new RemoveUseless();
    ru.setInputFormat(structure);
    structure = Filter.useFilter(structure, ru);

    Ranker rank = new Ranker();
    InfoGainAttributeEval eval = new InfoGainAttributeEval();
    eval.buildEvaluator(structure);
    //END OF NORMALIZATION

    return structure;
}

From source file:ia02classificacao.IA02Classificacao.java

/**
 * @param args the command line arguments
 *//*  w w w  .j a v  a  2  s.co m*/
public static void main(String[] args) throws Exception {

    // abre o banco de dados arff e mostra a quantidade de instancias (linhas)
    DataSource arquivo = new DataSource("data/zoo.arff");
    Instances dados = arquivo.getDataSet();
    System.out.println("Instancias lidas: " + dados.numInstances());

    // FILTER: remove o atributo nome do animal da classificao
    String[] parametros = new String[] { "-R", "1" };
    Remove filtro = new Remove();
    filtro.setOptions(parametros);
    filtro.setInputFormat(dados);
    dados = Filter.useFilter(dados, filtro);

    AttributeSelection selAtributo = new AttributeSelection();
    InfoGainAttributeEval avaliador = new InfoGainAttributeEval();
    Ranker busca = new Ranker();
    selAtributo.setEvaluator(avaliador);
    selAtributo.setSearch(busca);
    selAtributo.SelectAttributes(dados);
    int[] indices = selAtributo.selectedAttributes();
    System.out.println("Selected attributes: " + Utils.arrayToString(indices));

    // Usa o algoritimo J48 e mostra a classificao dos dados em forma textual
    String[] opcoes = new String[1];
    opcoes[0] = "-U";
    J48 arvore = new J48();
    arvore.setOptions(opcoes);
    arvore.buildClassifier(dados);
    System.out.println(arvore);

    // Usa o algoritimo J48 e mostra a classificao de dados em forma grafica
    /*
    TreeVisualizer tv = new TreeVisualizer(null, arvore.graph(), new PlaceNode2());
    JFrame frame = new javax.swing.JFrame("?rvore de Conhecimento");
    frame.setSize(800,500);
    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    frame.getContentPane().add(tv);
    frame.setVisible(true);
    tv.fitToScreen();
    */

    /*
    * Classificao de novos dados
    */

    System.out.println("\n\nCLASSIFICAO DE NOVOS DADOS");
    // criar atributos
    double[] vals = new double[dados.numAttributes()];
    vals[0] = 1.0; // hair
    vals[1] = 0.0; // feathers
    vals[2] = 0.0; // eggs
    vals[3] = 1.0; // milk
    vals[4] = 1.0; // airborne
    vals[5] = 0.0; // aquatic
    vals[6] = 0.0; // predator
    vals[7] = 1.0; // toothed
    vals[8] = 1.0; // backbone
    vals[9] = 1.0; // breathes
    vals[10] = 0.0; // venomous
    vals[11] = 0.0; // fins
    vals[12] = 4.0; // legs
    vals[13] = 1.0; // tail
    vals[14] = 1.0; // domestic
    vals[15] = 1.0; // catsize

    // Criar uma instncia baseada nestes atributos
    Instance meuUnicornio = new DenseInstance(1.0, vals);

    // Adicionar a instncia nos dados
    meuUnicornio.setDataset(dados);

    // Classificar esta nova instncia
    double label = arvore.classifyInstance(meuUnicornio);

    // Imprimir o resultado da classificao
    System.out.println("Novo Animal: Unicrnio");
    System.out.println("classificacao: " + dados.classAttribute().value((int) label));

    /*
    * Avaliao e predio de erros de mtrica
    */
    System.out.println("\n\nAVALIAO E PREDIO DE ERROS DE MTRICA");
    Classifier cl = new J48();
    Evaluation eval_roc = new Evaluation(dados);
    eval_roc.crossValidateModel(cl, dados, 10, new Random(1), new Object[] {});
    System.out.println(eval_roc.toSummaryString());

    /*
    * Matriz de confuso
    */
    System.out.println("\n\nMATRIZ DE CONFUSO");
    double[][] confusionMatrix = eval_roc.confusionMatrix();
    System.out.println(eval_roc.toMatrixString());

}

From source file:ia03classificador.jFrClassificador.java

public void doClassificate() throws Exception {

    // Quando clicado, a variavel recebe 1, quando no clicado recebe 0
    v00 = ((btn00.isSelected()) ? ((double) 1) : ((double) 0));
    v01 = ((btn01.isSelected()) ? ((double) 1) : ((double) 0));
    v02 = ((btn02.isSelected()) ? ((double) 1) : ((double) 0));
    v03 = ((btn03.isSelected()) ? ((double) 1) : ((double) 0));
    v04 = ((btn04.isSelected()) ? ((double) 1) : ((double) 0));
    v05 = ((btn05.isSelected()) ? ((double) 1) : ((double) 0));
    v06 = ((btn06.isSelected()) ? ((double) 1) : ((double) 0));
    v07 = ((btn07.isSelected()) ? ((double) 1) : ((double) 0));
    v08 = ((btn08.isSelected()) ? ((double) 1) : ((double) 0));
    v09 = ((btn09.isSelected()) ? ((double) 1) : ((double) 0));
    v10 = ((btn10.isSelected()) ? ((double) 1) : ((double) 0));
    v11 = ((btn11.isSelected()) ? ((double) 1) : ((double) 0));
    v13 = ((btn13.isSelected()) ? ((double) 1) : ((double) 0));
    v14 = ((btn14.isSelected()) ? ((double) 1) : ((double) 0));
    v15 = ((btn15.isSelected()) ? ((double) 1) : ((double) 0));
    legs = txtLegs.getText();// w ww  .  j a v a 2  s  . c om
    legs = ((legs == null || legs.trim().isEmpty() ? "2" : legs));
    name = txtName.getText();

    // abre o banco de dados arff e guarda os registros no objeto dados
    ConverterUtils.DataSource arquivo = new ConverterUtils.DataSource("data/zoo.arff");
    Instances dados = arquivo.getDataSet();

    // FILTER: remove o atributo nome do animal da classificao
    String[] parametros = new String[] { "-R", "1" };
    Remove filtro = new Remove();
    filtro.setOptions(parametros);
    filtro.setInputFormat(dados);
    dados = Filter.useFilter(dados, filtro);

    AttributeSelection selAtributo = new AttributeSelection();
    InfoGainAttributeEval avaliador = new InfoGainAttributeEval();
    Ranker busca = new Ranker();
    selAtributo.setEvaluator(avaliador);
    selAtributo.setSearch(busca);
    selAtributo.SelectAttributes(dados);
    int[] indices = selAtributo.selectedAttributes();
    //System.out.println("Selected attributes: " + Utils.arrayToString(indices));

    // Usa o algoritimo J48 para montar a arvore de dados
    String[] opcoes = new String[1];
    opcoes[0] = "-U";
    J48 arvore = new J48();
    arvore.setOptions(opcoes);
    arvore.buildClassifier(dados);

    // cria o novo elemento para comparao
    double[] vals = new double[dados.numAttributes()];
    vals[0] = v00; // hair
    vals[1] = v01; // feathers
    vals[2] = v02; // eggs
    vals[3] = v03; // milk
    vals[4] = v04; // airborne
    vals[5] = v05; // aquatic
    vals[6] = v06; // predator
    vals[7] = v07; // toothed
    vals[8] = v08; // backbone
    vals[9] = v09; // breathes
    vals[10] = v10; // venomous
    vals[11] = v11; // fins
    vals[12] = Double.parseDouble(legs); // legs
    vals[13] = v13; // tail
    vals[14] = v14; // domestic
    vals[15] = v15; // catsize

    // Criar uma instncia baseada nestes atributos
    Instance newAnimal = new DenseInstance(1.0, vals);

    // Adicionar a instncia nos dados
    newAnimal.setDataset(dados);

    // Classificar esta nova instncia
    double label = arvore.classifyInstance(newAnimal);

    // Imprimir o resultado da classificao
    lblClassification.setText(dados.classAttribute().value((int) label));

}

From source file:jjj.asap.sas.models1.job.BuildBasicMetaCostModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/*from   ww w. j  a va 2s.c o  m*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // Bagged REPTrees

    Bagging baggedTrees = new Bagging();
    baggedTrees.setNumExecutionSlots(1);
    baggedTrees.setNumIterations(100);
    baggedTrees.setClassifier(new REPTree());
    baggedTrees.setCalcOutOfBag(false);

    prototypes.put("Bagged-REPTrees", baggedTrees);

    // Bagged SMO

    Bagging baggedSVM = new Bagging();
    baggedSVM.setNumExecutionSlots(1);
    baggedSVM.setNumIterations(100);
    baggedSVM.setClassifier(new SMO());
    baggedSVM.setCalcOutOfBag(false);

    prototypes.put("Bagged-SMO", baggedSVM);

    // Meta Cost model for Naive Bayes

    Bagging bagging = new Bagging();
    bagging.setNumExecutionSlots(1);
    bagging.setNumIterations(100);
    bagging.setClassifier(new NaiveBayes());

    CostSensitiveClassifier meta = new CostSensitiveClassifier();
    meta.setClassifier(bagging);
    meta.setMinimizeExpectedCost(true);

    prototypes.put("CostSensitive-MinimizeExpectedCost-NaiveBayes", bagging);

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // 
            // speical logic for meta cost
            //

            Classifier alg = AbstractClassifier.makeCopy(prototype.getValue());

            if (alg instanceof CostSensitiveClassifier) {

                int essaySet = Contest.getEssaySet(dsn);

                String matrix = Contest.getRubrics(essaySet).size() == 3 ? "cost3.txt" : "cost4.txt";

                ((CostSensitiveClassifier) alg)
                        .setCostMatrix(new CostMatrix(new FileReader("/asap/sas/trunk/" + matrix)));

            }

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(alg);

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildBasicModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/*from w  ww  .  j  a va2 s. com*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // bayes

    BayesNet net = new BayesNet();
    net.setEstimator(new BMAEstimator());
    prototypes.put("BayesNet", net);

    prototypes.put("NaiveBayes", new NaiveBayes());

    // functions

    prototypes.put("RBFNetwork", new RBFNetwork());
    prototypes.put("SMO", new SMO());

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue()));

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildBasicModels2.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }// w  w w  .  j a  v a2 s. c  om
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // create prototype classifiers
    Map<String, Classifier> prototypes = new HashMap<String, Classifier>();

    // models

    prototypes.put("NBTree", new NBTree());
    prototypes.put("Logistic", new Logistic());

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        // for each prototype classifier
        for (Map.Entry<String, Classifier> prototype : prototypes.entrySet()) {

            // use InfoGain to discard useless attributes

            AttributeSelectedClassifier classifier = new AttributeSelectedClassifier();

            classifier.setEvaluator(new InfoGainAttributeEval());

            Ranker ranker = new Ranker();
            ranker.setThreshold(0.0001);
            classifier.setSearch(ranker);

            classifier.setClassifier(AbstractClassifier.makeCopy(prototype.getValue()));

            queue.add(Job.submit(
                    new ModelBuilder(dsn, "InfoGain-" + prototype.getKey(), classifier, this.outputBucket)));
        }
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:jjj.asap.sas.models1.job.BuildRBFKernelModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/*from w ww  .  jav  a2s  .  c  om*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        SMO smo = new SMO();
        smo.setFilterType(new SelectedTag(SMO.FILTER_NONE, SMO.TAGS_FILTER));
        smo.setBuildLogisticModels(true);
        RBFKernel kernel = new RBFKernel();
        kernel.setGamma(0.05);
        smo.setKernel(kernel);

        AttributeSelectedClassifier asc = new AttributeSelectedClassifier();
        asc.setEvaluator(new InfoGainAttributeEval());
        Ranker ranker = new Ranker();
        ranker.setThreshold(0.01);
        asc.setSearch(ranker);
        asc.setClassifier(smo);

        queue.add(Job.submit(new ModelBuilder(dsn, "InfoGain-SMO-RBFKernel", asc, this.outputBucket)));
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}