Example usage for weka.classifiers.meta FilteredClassifier FilteredClassifier

Introduction

In this page you can find the example usage for weka.classifiers.meta FilteredClassifier FilteredClassifier.

Prototype

public FilteredClassifier()

Source Link

Document

Default constructor.

Usage

From source file:PredictMention.java

public PredictMention() {
    try {/*from w  ww  .ja  va2  s. co  m*/

        String pathStopList = (new File("SpanishStoplist.txt")).getAbsolutePath();
        filter = new FilteredClassifier();
        filter.setOptions(weka.core.Utils.splitOptions(
                "-F \"weka.filters.unsupervised.attribute.StringToWordVector -R first-last -W 1000 -prune-rate -1.0 -N 0 -stemmer \\\"weka.core.stemmers.SnowballStemmer -S spanish\\\" -stopwords-handler \\\"weka.core.stopwords.WordsFromFile -stopwords "
                        + pathStopList
                        + "\\\" -M 1 -tokenizer \\\"weka.core.tokenizers.WordTokenizer -delimiters \\\\\\\" \\\\\\\\r\\\\\\\\n\\\\\\\\t.,;:\\\\\\\\\\\\\\'\\\\\\\\\\\\\\\"()?!\\\\\\\"\\\"\" -W weka.classifiers.trees.J48 -- -C 0.25 -M 2"));
    } catch (Exception ex) {
        Logger.getLogger(PredictMention.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:adams.opt.optimise.GeneticAlgorithm.java

License:Open Source License

public static void main(String[] args) {
    Environment.setEnvironmentClass(Environment.class);
    GeneticAlgorithm ga = new GeneticAlgorithm();
    ga.setBits(1);//from   ww  w.  j a  va 2 s.c  o  m
    ga.setNumChrom(8);
    ga.setIterations(10000);
    ga.setFavorZeroes(true);

    AttributeSelection as = new AttributeSelection();
    //as.setDataset(new PlaceholderFile("/home/dale/blgg/conversion/merged/m_5_.75.arff"));
    ArrayConsumer.setOptions(as, args);
    PLSClassifier pls = new PLSClassifier();
    PLSFilter pf = (PLSFilter) pls.getFilter();
    pf.setNumComponents(11);

    LinearRegressionJ reg = new LinearRegressionJ();
    reg.setEliminateColinearAttributes(false);
    reg.setAttributeSelectionMethod(
            new SelectedTag(LinearRegressionJ.SELECTION_NONE, LinearRegressionJ.TAGS_SELECTION));

    GPD gp = new GPD();
    gp.setNoise(.01);
    //RBFKernel rbf = new RBFKernel();
    //rbf.setChecksTurnedOff(true);
    //rbf.setGamma(.01);
    //gp.setKernel(rbf);

    Remove remove = new Remove();
    remove.setAttributeIndices("1");
    FilteredClassifier fc = new FilteredClassifier();

    MultiFilter mf = new MultiFilter();
    Filter[] filters = new Filter[2];
    filters[0] = remove;
    filters[1] = pf;
    mf.setFilters(filters);

    fc.setClassifier(gp);
    fc.setFilter(pf);

    as.setClassifier(gp);
    as.setClassIndex("last");
    //as.setDataset(new PlaceholderFile("/home/dale/OMD_clean.arff"));
    //as.setOutputDirectory(new PlaceholderFile("/research/dale"));
    ga.setLoggingLevel(LoggingLevel.INFO);
    as.setLoggingLevel(LoggingLevel.INFO);
    ga.optimise(as.getDataDef(), as);

}

From source file:aprendizadodemaquina.classificadores.ClassificadorDT.java

License:Open Source License

@Override
public int treinar(File arquivoTreinamento, String parametros) {

    Logger.getLogger("ARS logger").info("Treinamento de classificador J48");
    // TODO: Verificar por que os parmetros dos classificadores no esto funcionando

    try {// w w w  . j a v a2s.  c om

        ConverterUtils.DataSource fonte = new ConverterUtils.DataSource(arquivoTreinamento.getAbsolutePath());
        dadosTreinamento = fonte.getDataSet();

        // Identifica o atributo que ser a classe de interesse (ltimo atributo = qual relao )
        dadosTreinamento.setClassIndex(dadosTreinamento.numAttributes() - 1);

        // Cria um classificador que aceita filtros
        classificador = new FilteredClassifier();
        classificador.setClassifier(new J48());

        // Seleciona os atributos que no so parte da classificacao
        // TODO: SE OS DADOS DE TREINAMENTO TIVEREM A FEATURE DE IDENTIFICAO...
        /*
        Remove remove = new Remove();
        int[] atributosRemovidos = new int[4];
        atributosRemovidos[0] = 0;   // ID da sentena
        atributosRemovidos[1] = 1;   // termo1
        atributosRemovidos[2] = 2;   // termo2
        remove.setAttributeIndicesArray( atributosRemovidos );
        // Acopla o filtro de remoo ao classificador
        fc.setFilter( remove );
        */
        // FIM-SE

        //                String []param = weka.core.Utils.splitOptions(parametros[cmbTipoClassificador1.getSelectedIndex()]);
        //                fc.setOptions(param);

        // Faz o treinamento do classificador
        classificador.buildClassifier(dadosTreinamento);

        return 0;

    } catch (Exception ex) {
        Logger.getLogger("ARS logger").log(Level.SEVERE, null, ex);
        if (ex instanceof IllegalArgumentException) {
            javax.swing.JOptionPane.showMessageDialog(null,
                    "O arquivo de treinamento fornecido no  um arquivo ARFF vlido", "Erro",
                    javax.swing.JOptionPane.ERROR_MESSAGE);
        }
        return 1;
    }

}

From source file:CEP.CEPListener.java

FilteredClassifier CreateClassifier() {
    Remove rm = new Remove();
    //rm.setAttributeIndices("1");  // remove 1st attribute
    PART c = new PART();
    // meta-classifier
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);/*from  w  w w  .  jav a  2  s. co  m*/
    fc.setClassifier(c);
    return fc;
}

From source file:com.github.fracpete.multisearch.optimize.PLSFilterAndLinearRegression.java

License:Open Source License

/**
 * The first parameter must be dataset,/*from  ww w .  j  av a2s. c  o  m*/
 * the (optional) second the class index (1-based, 'first' and 'last'
 * also supported).
 *
 * @param args   the commandline options
 * @throws Exception   if optimization fails for some reason
 */
public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        System.err.println("\nUsage: PLSFilterAndLinearRegression <dataset> [classindex]\n");
        System.exit(1);
    }

    // load data
    Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null);

    // configure classifier we want to optimize
    PLSFilter pls = new PLSFilter();
    LinearRegression lr = new LinearRegression();
    FilteredClassifier fc = new FilteredClassifier();
    fc.setClassifier(lr);
    fc.setFilter(pls);
    // required for Weka > 3.7.13
    fc.setDoNotCheckForModifiedClassAttribute(true);

    // configure multisearch
    // 1. number of components
    ListParameter numComp = new ListParameter();
    numComp.setProperty("filter.numComponents");
    numComp.setList("2 5 7");
    // 2. ridge
    MathParameter ridge = new MathParameter();
    ridge.setProperty("classifier.ridge");
    ridge.setBase(10);
    ridge.setMin(-5);
    ridge.setMax(1);
    ridge.setStep(1);
    ridge.setExpression("pow(BASE,I)");
    // assemble everything
    MultiSearch multi = new MultiSearch();
    multi.setClassifier(fc);
    multi.setSearchParameters(new AbstractParameter[] { numComp, ridge });
    SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_RMSE,
            new DefaultEvaluationMetrics().getTags());
    multi.setEvaluation(tag);

    // output configuration
    System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi));

    // optimize
    System.out.println("\nOptimizing...\n");
    multi.buildClassifier(data);
    System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier()));
    System.out.println("Best parameters: " + multi.getGenerator().evaluate(multi.getBestValues()));
}

From source file:com.github.fracpete.multisearch.setupgenerator.PLSFilterAndLinearRegression.java

License:Open Source License

/**
 * Outputs the commandlines.//ww  w  . j a v a 2 s.  co  m
 *
 * @param args   the commandline options
 * @throws Exception   if setup generator fails for some reason
 */
public static void main(String[] args) throws Exception {
    // configure classifier we want to generate setups for
    PLSFilter pls = new PLSFilter();
    LinearRegression lr = new LinearRegression();
    FilteredClassifier fc = new FilteredClassifier();
    fc.setClassifier(lr);
    fc.setFilter(pls);
    // required for Weka > 3.7.13
    fc.setDoNotCheckForModifiedClassAttribute(true);

    // configure generator
    // 1. number of components
    ListParameter numComp = new ListParameter();
    numComp.setProperty("filter.numComponents");
    numComp.setList("2 5 7");
    // 2. ridge
    MathParameter ridge = new MathParameter();
    ridge.setProperty("classifier.ridge");
    ridge.setBase(10);
    ridge.setMin(-5);
    ridge.setMax(1);
    ridge.setStep(1);
    ridge.setExpression("pow(BASE,I)");
    // assemble everything
    SetupGenerator generator = new SetupGenerator();
    generator.setBaseObject(fc);
    generator.setParameters(new AbstractParameter[] { numComp, ridge });

    // output configuration
    System.out.println("\nSetupgenerator commandline:\n" + Utils.toCommandLine(generator));

    // output commandlines
    System.out.println("\nCommandlines:\n");
    Enumeration<Serializable> enm = generator.setups();
    while (enm.hasMoreElements())
        System.out.println(Utils.toCommandLine(enm.nextElement()));
}

From source file:com.ivanrf.smsspam.SpamClassifier.java

License:Apache License

private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp,
        boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    filter.setDoNotOperateOnPerClassBasis(true);
    filter.setLowerCaseTokens(true);//ww w .  ja v a  2 s .c o m
    filter.setWordsToKeep(wordsToKeep);

    if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) {
        //Make a tokenizer
        WordTokenizer wt = new WordTokenizer();
        if (tokenizerOp.equals(TOKENIZER_COMPLETE))
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}");
        else //TOKENIZER_COMPLETE_NUMBERS)
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789");
        filter.setTokenizer(wt);
    }

    FilteredClassifier classifier = new FilteredClassifier();
    classifier.setFilter(filter);

    if (useAttributeSelection) {
        AttributeSelection as = new AttributeSelection();
        as.setEvaluator(new InfoGainAttributeEval());
        Ranker r = new Ranker();
        r.setThreshold(0);
        as.setSearch(r);

        MultiFilter mf = new MultiFilter();
        mf.setFilters(new Filter[] { filter, as });

        classifier.setFilter(mf);
    }

    if (classifierOp.equals(CLASSIFIER_SMO))
        classifier.setClassifier(new SMO());
    else if (classifierOp.equals(CLASSIFIER_NB))
        classifier.setClassifier(new NaiveBayes());
    else if (classifierOp.equals(CLASSIFIER_IB1))
        classifier.setClassifier(new IBk(1));
    else if (classifierOp.equals(CLASSIFIER_IB3))
        classifier.setClassifier(new IBk(3));
    else if (classifierOp.equals(CLASSIFIER_IB5))
        classifier.setClassifier(new IBk(5));
    else if (classifierOp.equals(CLASSIFIER_PART))
        classifier.setClassifier(new PART()); //Tarda mucho

    if (boosting) {
        AdaBoostM1 boost = new AdaBoostM1();
        boost.setClassifier(classifier.getClassifier());
        classifier.setClassifier(boost); //Con NB tarda mucho
    }

    return classifier;
}

From source file:com.openkm.kea.filter.KEAFilter.java

License:Open Source License

/**
 * Builds the classifier.//from w w w  .  j  a  v  a 2  s . c  om
 */
// aly: The main function, where everything important happens
private void buildClassifier() throws Exception {
    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (i == m_DocumentAtt) {
            atts.addElement(new Attribute("TFxIDF"));
            atts.addElement(new Attribute("First_occurrence"));
            if (m_KFused) {
                atts.addElement(new Attribute("Keyphrase_frequency"));
            }
            if (m_STDEVfeature) {
                atts.addElement(new Attribute("Standard_deviation"));
            }
            if (m_NODEfeature) {
                atts.addElement(new Attribute("Relations_number"));
            }
            if (m_LENGTHfeature) {
                atts.addElement(new Attribute("Phrase_length"));
            }
        } else if (i == m_KeyphrasesAtt) {
            FastVector vals = new FastVector(2);
            vals.addElement("False");
            vals.addElement("True");
            //atts.addElement(new Attribute("Keyphrase?", vals));
            atts.addElement(new Attribute("Keyphrase?"));
        }
    }
    m_ClassifierData = new Instances("ClassifierData", atts, 0);
    m_ClassifierData.setClassIndex(m_NumFeatures);

    if (m_Debug) {
        log.info("--- Converting instances for classifier");
    }
    // Convert pending input instances into data for classifier
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);

        // Get the key phrases for the document
        String keyphrases = current.stringValue(m_KeyphrasesAtt);
        HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases, false);
        HashMap<String, Counter> hashKeysEval = getGivenKeyphrases(keyphrases, true);

        // Get the phrases for the document
        HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
        int length = getPhrases(hash, current.stringValue(m_DocumentAtt));
        // hash = getComposits(hash);

        // Compute the feature values for each phrase and
        // add the instance to the data for the classifier

        Iterator<String> it = hash.keySet().iterator();
        while (it.hasNext()) {
            String phrase = it.next();
            FastVector phraseInfo = (FastVector) hash.get(phrase);

            double[] vals = featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length, hash);
            //log.info(vals);
            Instance inst = new Instance(current.weight(), vals);
            // .err.println(phrase + "\t" + inst.toString());
            m_ClassifierData.add(inst);
        }
    }

    if (m_Debug) {
        log.info("--- Building classifier");
    }

    // Build classifier

    // Uncomment if you want to use a different classifier
    // Caution: Other places in the code will have to be adjusted!!
    /*I. Naive Bayes:
     FilteredClassifier fclass = new FilteredClassifier();      
     fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple());
     fclass.setFilter(new Discretize());
     m_Classifier = fclass;
     */

    //NaiveBayes nb = new NaiveBayes();
    //nb.setUseSupervisedDiscretization(true);
    //m_Classifier = nb;

    /* II. Linear Regression:
     LinearRegression lr = new LinearRegression();   
     lr.setAttributeSelectionMethod(new 
     weka.core.SelectedTag(1, LinearRegression.TAGS_SELECTION));
     lr.setEliminateColinearAttributes(false);
     lr.setDebug(false);
             
     m_Classifier = lr;*/

    /* III. Bagging with REPTrees
     Bagging bagging = new Bagging();   
             
     String[] ops_bagging = {
     new String("-P"),
     new String("100"),
     new String("-S"), 
     new String("1"),
     new String("-I"), 
     new String("50")};
             
     */

    /*
     * REPTree rept = new REPTree();
     //results are worse!
      rept.setNoPruning(true);
      String[] ops_rept = {
      new String("-M"), 
      new String("2"),
      new String("-V"), 
      new String("0.0010"),            
      new String("-N"), 
      new String("3"),
      new String("-S"), 
      new String("1"),
      new String("-L"), 
      new String("1"),};
              
      rept.setOptions(ops_rept);
      bagging.setClassifier(rept);
      */

    //   bagging.setOptions(ops_bagging);
    //FilteredClassifier fclass = new FilteredClassifier();      
    //fclass.setClassifier(new REPTree());
    //fclass.setFilter(new Discretize());
    //bagging.setClassifier(fclass);
    //   m_Classifier = bagging;

    RegressionByDiscretization rvd = new RegressionByDiscretization();
    FilteredClassifier fclass = new FilteredClassifier();
    fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple());
    fclass.setFilter(new Discretize());

    rvd.setClassifier(fclass);
    rvd.setNumBins(m_Indexers + 1);
    m_Classifier = rvd;

    // log.info(m_ClassifierData);   
    //System.exit(1);
    m_Classifier.buildClassifier(m_ClassifierData);

    if (m_Debug) {
        log.info("" + m_Classifier);
    }

    // Save space
    m_ClassifierData = new Instances(m_ClassifierData, 0);
}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard
 *
 *
 * @param gs_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation)
 * @throws Exception/*from   ww  w.j a  v  a  2 s .co  m*/
 */

public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval)
        throws Exception {
    DataSource source = new DataSource(gs_arff);
    Instances data = source.getDataSet();
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible
    // meta-classifier
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);
    fc.setClassifier(bn);
    fc.buildClassifier(data); // build classifier
    SerializationHelper.write(model, fc);
    if (output_eval) {
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(fc, data, 10, new Random(1));
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
        System.out.println(eval.toClassDetailsString());
    }

}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java

License:Open Source License

public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception {
    // Set parameters
    int folds = 10;
    Classifier baseClassifier = getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" });
    Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff");
    data.setClassIndex(data.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    data.randomize(random);//from w ww.  java  2 s .c  o  m

    // Perform cross-validation
    Instances predictedData = null;
    Evaluation eval = new Evaluation(data);

    for (int n = 0; n < folds; n++) {
        Instances train = data.trainCV(folds, n, random);
        Instances test = data.testCV(folds, n);

        // Apply log filter
        //          Filter logFilter = new LogFilter();
        //           logFilter.setInputFormat(train);
        //           train = Filter.useFilter(train, logFilter);        
        //           logFilter.setInputFormat(test);
        //           test = Filter.useFilter(test, logFilter);

        // Copy the classifier
        Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

        // Instantiate the FilteredClassifier
        FilteredClassifier filteredClassifier = new FilteredClassifier();
        filteredClassifier.setFilter(removeIDFilter);
        filteredClassifier.setClassifier(classifier);

        // Build the classifier
        filteredClassifier.buildClassifier(train);

        // Evaluate
        eval.evaluateModel(filteredClassifier, test);

        // Add predictions
        AddClassification filter = new AddClassification();
        filter.setClassifier(filteredClassifier);
        filter.setOutputClassification(true);
        filter.setOutputDistribution(false);
        filter.setOutputErrorFlag(true);
        filter.setInputFormat(train);
        Filter.useFilter(train, filter); // trains the classifier

        Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
        if (predictedData == null)
            predictedData = new Instances(pred, 0);
        for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));
    }

    // Prepare output classification
    String[] scores = new String[predictedData.numInstances()];

    for (Instance predInst : predictedData) {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

        int valueIdx = predictedData.numAttributes() - 2;

        String value = predInst.stringValue(predInst.attribute(valueIdx));

        scores[id] = value;
    }

    // Output
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(
            new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/output.csv"),
            sb.toString());
}