Example usage for weka.classifiers.meta FilteredClassifier setFilter

Introduction

In this page you can find the example usage for weka.classifiers.meta FilteredClassifier setFilter.

Prototype

public void setFilter(Filter filter)

Source Link

Document

Sets the filter

Usage

From source file:adams.flow.source.WekaClassifierGenerator.java

License:Open Source License

/**
 * Returns the default setup. Used in the options as default value.
 *
 * @return      the default setup/*from   ww w .  ja v  a 2 s .  c om*/
 */
protected weka.classifiers.Classifier getDefaultSetup() {
    FilteredClassifier result;

    result = new weka.classifiers.meta.FilteredClassifier();
    result.setFilter(new weka.filters.supervised.attribute.PLSFilter());
    result.setClassifier(new LinearRegressionJ());

    return result;
}

From source file:adams.opt.optimise.GeneticAlgorithm.java

License:Open Source License

public static void main(String[] args) {
    Environment.setEnvironmentClass(Environment.class);
    GeneticAlgorithm ga = new GeneticAlgorithm();
    ga.setBits(1);/*from   w  ww.ja  va 2 s  . c  o m*/
    ga.setNumChrom(8);
    ga.setIterations(10000);
    ga.setFavorZeroes(true);

    AttributeSelection as = new AttributeSelection();
    //as.setDataset(new PlaceholderFile("/home/dale/blgg/conversion/merged/m_5_.75.arff"));
    ArrayConsumer.setOptions(as, args);
    PLSClassifier pls = new PLSClassifier();
    PLSFilter pf = (PLSFilter) pls.getFilter();
    pf.setNumComponents(11);

    LinearRegressionJ reg = new LinearRegressionJ();
    reg.setEliminateColinearAttributes(false);
    reg.setAttributeSelectionMethod(
            new SelectedTag(LinearRegressionJ.SELECTION_NONE, LinearRegressionJ.TAGS_SELECTION));

    GPD gp = new GPD();
    gp.setNoise(.01);
    //RBFKernel rbf = new RBFKernel();
    //rbf.setChecksTurnedOff(true);
    //rbf.setGamma(.01);
    //gp.setKernel(rbf);

    Remove remove = new Remove();
    remove.setAttributeIndices("1");
    FilteredClassifier fc = new FilteredClassifier();

    MultiFilter mf = new MultiFilter();
    Filter[] filters = new Filter[2];
    filters[0] = remove;
    filters[1] = pf;
    mf.setFilters(filters);

    fc.setClassifier(gp);
    fc.setFilter(pf);

    as.setClassifier(gp);
    as.setClassIndex("last");
    //as.setDataset(new PlaceholderFile("/home/dale/OMD_clean.arff"));
    //as.setOutputDirectory(new PlaceholderFile("/research/dale"));
    ga.setLoggingLevel(LoggingLevel.INFO);
    as.setLoggingLevel(LoggingLevel.INFO);
    ga.optimise(as.getDataDef(), as);

}

From source file:CEP.CEPListener.java

FilteredClassifier CreateClassifier() {
    Remove rm = new Remove();
    //rm.setAttributeIndices("1");  // remove 1st attribute
    PART c = new PART();
    // meta-classifier
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);
    fc.setClassifier(c);//ww  w .ja v a  2 s .c  om
    return fc;
}

From source file:com.github.fracpete.multisearch.optimize.PLSFilterAndLinearRegression.java

License:Open Source License

/**
 * The first parameter must be dataset,//from w ww .  j av a  2  s. c om
 * the (optional) second the class index (1-based, 'first' and 'last'
 * also supported).
 *
 * @param args   the commandline options
 * @throws Exception   if optimization fails for some reason
 */
public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        System.err.println("\nUsage: PLSFilterAndLinearRegression <dataset> [classindex]\n");
        System.exit(1);
    }

    // load data
    Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null);

    // configure classifier we want to optimize
    PLSFilter pls = new PLSFilter();
    LinearRegression lr = new LinearRegression();
    FilteredClassifier fc = new FilteredClassifier();
    fc.setClassifier(lr);
    fc.setFilter(pls);
    // required for Weka > 3.7.13
    fc.setDoNotCheckForModifiedClassAttribute(true);

    // configure multisearch
    // 1. number of components
    ListParameter numComp = new ListParameter();
    numComp.setProperty("filter.numComponents");
    numComp.setList("2 5 7");
    // 2. ridge
    MathParameter ridge = new MathParameter();
    ridge.setProperty("classifier.ridge");
    ridge.setBase(10);
    ridge.setMin(-5);
    ridge.setMax(1);
    ridge.setStep(1);
    ridge.setExpression("pow(BASE,I)");
    // assemble everything
    MultiSearch multi = new MultiSearch();
    multi.setClassifier(fc);
    multi.setSearchParameters(new AbstractParameter[] { numComp, ridge });
    SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_RMSE,
            new DefaultEvaluationMetrics().getTags());
    multi.setEvaluation(tag);

    // output configuration
    System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi));

    // optimize
    System.out.println("\nOptimizing...\n");
    multi.buildClassifier(data);
    System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier()));
    System.out.println("Best parameters: " + multi.getGenerator().evaluate(multi.getBestValues()));
}

From source file:com.github.fracpete.multisearch.setupgenerator.PLSFilterAndLinearRegression.java

License:Open Source License

/**
 * Outputs the commandlines.// w  w w .  ja  v  a2  s  .c  om
 *
 * @param args   the commandline options
 * @throws Exception   if setup generator fails for some reason
 */
public static void main(String[] args) throws Exception {
    // configure classifier we want to generate setups for
    PLSFilter pls = new PLSFilter();
    LinearRegression lr = new LinearRegression();
    FilteredClassifier fc = new FilteredClassifier();
    fc.setClassifier(lr);
    fc.setFilter(pls);
    // required for Weka > 3.7.13
    fc.setDoNotCheckForModifiedClassAttribute(true);

    // configure generator
    // 1. number of components
    ListParameter numComp = new ListParameter();
    numComp.setProperty("filter.numComponents");
    numComp.setList("2 5 7");
    // 2. ridge
    MathParameter ridge = new MathParameter();
    ridge.setProperty("classifier.ridge");
    ridge.setBase(10);
    ridge.setMin(-5);
    ridge.setMax(1);
    ridge.setStep(1);
    ridge.setExpression("pow(BASE,I)");
    // assemble everything
    SetupGenerator generator = new SetupGenerator();
    generator.setBaseObject(fc);
    generator.setParameters(new AbstractParameter[] { numComp, ridge });

    // output configuration
    System.out.println("\nSetupgenerator commandline:\n" + Utils.toCommandLine(generator));

    // output commandlines
    System.out.println("\nCommandlines:\n");
    Enumeration<Serializable> enm = generator.setups();
    while (enm.hasMoreElements())
        System.out.println(Utils.toCommandLine(enm.nextElement()));
}

From source file:com.ivanrf.smsspam.SpamClassifier.java

License:Apache License

private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp,
        boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    filter.setDoNotOperateOnPerClassBasis(true);
    filter.setLowerCaseTokens(true);// ww  w .  j  av a2  s.  com
    filter.setWordsToKeep(wordsToKeep);

    if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) {
        //Make a tokenizer
        WordTokenizer wt = new WordTokenizer();
        if (tokenizerOp.equals(TOKENIZER_COMPLETE))
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}");
        else //TOKENIZER_COMPLETE_NUMBERS)
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789");
        filter.setTokenizer(wt);
    }

    FilteredClassifier classifier = new FilteredClassifier();
    classifier.setFilter(filter);

    if (useAttributeSelection) {
        AttributeSelection as = new AttributeSelection();
        as.setEvaluator(new InfoGainAttributeEval());
        Ranker r = new Ranker();
        r.setThreshold(0);
        as.setSearch(r);

        MultiFilter mf = new MultiFilter();
        mf.setFilters(new Filter[] { filter, as });

        classifier.setFilter(mf);
    }

    if (classifierOp.equals(CLASSIFIER_SMO))
        classifier.setClassifier(new SMO());
    else if (classifierOp.equals(CLASSIFIER_NB))
        classifier.setClassifier(new NaiveBayes());
    else if (classifierOp.equals(CLASSIFIER_IB1))
        classifier.setClassifier(new IBk(1));
    else if (classifierOp.equals(CLASSIFIER_IB3))
        classifier.setClassifier(new IBk(3));
    else if (classifierOp.equals(CLASSIFIER_IB5))
        classifier.setClassifier(new IBk(5));
    else if (classifierOp.equals(CLASSIFIER_PART))
        classifier.setClassifier(new PART()); //Tarda mucho

    if (boosting) {
        AdaBoostM1 boost = new AdaBoostM1();
        boost.setClassifier(classifier.getClassifier());
        classifier.setClassifier(boost); //Con NB tarda mucho
    }

    return classifier;
}

From source file:com.openkm.kea.filter.KEAFilter.java

License:Open Source License

/**
 * Builds the classifier./*from w  ww .  j a v  a2  s.co m*/
 */
// aly: The main function, where everything important happens
private void buildClassifier() throws Exception {
    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (i == m_DocumentAtt) {
            atts.addElement(new Attribute("TFxIDF"));
            atts.addElement(new Attribute("First_occurrence"));
            if (m_KFused) {
                atts.addElement(new Attribute("Keyphrase_frequency"));
            }
            if (m_STDEVfeature) {
                atts.addElement(new Attribute("Standard_deviation"));
            }
            if (m_NODEfeature) {
                atts.addElement(new Attribute("Relations_number"));
            }
            if (m_LENGTHfeature) {
                atts.addElement(new Attribute("Phrase_length"));
            }
        } else if (i == m_KeyphrasesAtt) {
            FastVector vals = new FastVector(2);
            vals.addElement("False");
            vals.addElement("True");
            //atts.addElement(new Attribute("Keyphrase?", vals));
            atts.addElement(new Attribute("Keyphrase?"));
        }
    }
    m_ClassifierData = new Instances("ClassifierData", atts, 0);
    m_ClassifierData.setClassIndex(m_NumFeatures);

    if (m_Debug) {
        log.info("--- Converting instances for classifier");
    }
    // Convert pending input instances into data for classifier
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);

        // Get the key phrases for the document
        String keyphrases = current.stringValue(m_KeyphrasesAtt);
        HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases, false);
        HashMap<String, Counter> hashKeysEval = getGivenKeyphrases(keyphrases, true);

        // Get the phrases for the document
        HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
        int length = getPhrases(hash, current.stringValue(m_DocumentAtt));
        // hash = getComposits(hash);

        // Compute the feature values for each phrase and
        // add the instance to the data for the classifier

        Iterator<String> it = hash.keySet().iterator();
        while (it.hasNext()) {
            String phrase = it.next();
            FastVector phraseInfo = (FastVector) hash.get(phrase);

            double[] vals = featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length, hash);
            //log.info(vals);
            Instance inst = new Instance(current.weight(), vals);
            // .err.println(phrase + "\t" + inst.toString());
            m_ClassifierData.add(inst);
        }
    }

    if (m_Debug) {
        log.info("--- Building classifier");
    }

    // Build classifier

    // Uncomment if you want to use a different classifier
    // Caution: Other places in the code will have to be adjusted!!
    /*I. Naive Bayes:
     FilteredClassifier fclass = new FilteredClassifier();      
     fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple());
     fclass.setFilter(new Discretize());
     m_Classifier = fclass;
     */

    //NaiveBayes nb = new NaiveBayes();
    //nb.setUseSupervisedDiscretization(true);
    //m_Classifier = nb;

    /* II. Linear Regression:
     LinearRegression lr = new LinearRegression();   
     lr.setAttributeSelectionMethod(new 
     weka.core.SelectedTag(1, LinearRegression.TAGS_SELECTION));
     lr.setEliminateColinearAttributes(false);
     lr.setDebug(false);
             
     m_Classifier = lr;*/

    /* III. Bagging with REPTrees
     Bagging bagging = new Bagging();   
             
     String[] ops_bagging = {
     new String("-P"),
     new String("100"),
     new String("-S"), 
     new String("1"),
     new String("-I"), 
     new String("50")};
             
     */

    /*
     * REPTree rept = new REPTree();
     //results are worse!
      rept.setNoPruning(true);
      String[] ops_rept = {
      new String("-M"), 
      new String("2"),
      new String("-V"), 
      new String("0.0010"),            
      new String("-N"), 
      new String("3"),
      new String("-S"), 
      new String("1"),
      new String("-L"), 
      new String("1"),};
              
      rept.setOptions(ops_rept);
      bagging.setClassifier(rept);
      */

    //   bagging.setOptions(ops_bagging);
    //FilteredClassifier fclass = new FilteredClassifier();      
    //fclass.setClassifier(new REPTree());
    //fclass.setFilter(new Discretize());
    //bagging.setClassifier(fclass);
    //   m_Classifier = bagging;

    RegressionByDiscretization rvd = new RegressionByDiscretization();
    FilteredClassifier fclass = new FilteredClassifier();
    fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple());
    fclass.setFilter(new Discretize());

    rvd.setClassifier(fclass);
    rvd.setNumBins(m_Indexers + 1);
    m_Classifier = rvd;

    // log.info(m_ClassifierData);   
    //System.exit(1);
    m_Classifier.buildClassifier(m_ClassifierData);

    if (m_Debug) {
        log.info("" + m_Classifier);
    }

    // Save space
    m_ClassifierData = new Instances(m_ClassifierData, 0);
}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard
 *
 *
 * @param gs_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation)
 * @throws Exception/*www.  ja va2s. c o m*/
 */

public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval)
        throws Exception {
    DataSource source = new DataSource(gs_arff);
    Instances data = source.getDataSet();
    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible
    // meta-classifier
    FilteredClassifier fc = new FilteredClassifier();
    fc.setFilter(rm);
    fc.setClassifier(bn);
    fc.buildClassifier(data); // build classifier
    SerializationHelper.write(model, fc);
    if (output_eval) {
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(fc, data, 10, new Random(1));
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
        System.out.println(eval.toClassDetailsString());
    }

}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java

License:Open Source License

public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception {
    // Set parameters
    int folds = 10;
    Classifier baseClassifier = getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" });
    Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff");
    data.setClassIndex(data.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    data.randomize(random);/*  ww  w . j a  v a2  s .  c o  m*/

    // Perform cross-validation
    Instances predictedData = null;
    Evaluation eval = new Evaluation(data);

    for (int n = 0; n < folds; n++) {
        Instances train = data.trainCV(folds, n, random);
        Instances test = data.testCV(folds, n);

        // Apply log filter
        //          Filter logFilter = new LogFilter();
        //           logFilter.setInputFormat(train);
        //           train = Filter.useFilter(train, logFilter);        
        //           logFilter.setInputFormat(test);
        //           test = Filter.useFilter(test, logFilter);

        // Copy the classifier
        Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

        // Instantiate the FilteredClassifier
        FilteredClassifier filteredClassifier = new FilteredClassifier();
        filteredClassifier.setFilter(removeIDFilter);
        filteredClassifier.setClassifier(classifier);

        // Build the classifier
        filteredClassifier.buildClassifier(train);

        // Evaluate
        eval.evaluateModel(filteredClassifier, test);

        // Add predictions
        AddClassification filter = new AddClassification();
        filter.setClassifier(filteredClassifier);
        filter.setOutputClassification(true);
        filter.setOutputDistribution(false);
        filter.setOutputErrorFlag(true);
        filter.setInputFormat(train);
        Filter.useFilter(train, filter); // trains the classifier

        Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
        if (predictedData == null)
            predictedData = new Instances(pred, 0);
        for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));
    }

    // Prepare output classification
    String[] scores = new String[predictedData.numInstances()];

    for (Instance predInst : predictedData) {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

        int valueIdx = predictedData.numAttributes() - 2;

        String value = predInst.stringValue(predInst.attribute(valueIdx));

        scores[id] = value;
    }

    // Output
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(
            new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/output.csv"),
            sb.toString());
}

From source file:dkpro.similarity.experiments.rte.util.Evaluator.java

License:Open Source License

public static void runClassifier(WekaClassifier wekaClassifier, Dataset trainDataset, Dataset testDataset)
        throws Exception {
    Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the train instances and get the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + trainDataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff" });
    Instances train = DataSource.read(MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff");
    train.setClassIndex(train.numAttributes() - 1);

    // Add IDs to the test instances and get the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + testDataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff" });
    Instances test = DataSource.read(MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff");
    test.setClassIndex(test.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    test.randomize(random);/*from w  w  w  .  java2 s . co m*/

    // Apply log filter
    //       Filter logFilter = new LogFilter();
    //       logFilter.setInputFormat(train);
    //       train = Filter.useFilter(train, logFilter);        
    //       logFilter.setInputFormat(test);
    //       test = Filter.useFilter(test, logFilter);

    // Copy the classifier
    Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

    // Instantiate the FilteredClassifier
    FilteredClassifier filteredClassifier = new FilteredClassifier();
    filteredClassifier.setFilter(removeIDFilter);
    filteredClassifier.setClassifier(classifier);

    // Build the classifier
    filteredClassifier.buildClassifier(train);

    // Prepare the output buffer 
    AbstractOutput output = new PlainText();
    output.setBuffer(new StringBuffer());
    output.setHeader(test);
    output.setAttributes("first");

    Evaluation eval = new Evaluation(train);
    eval.evaluateModel(filteredClassifier, test, output);

    // Convert predictions to CSV
    // Format: inst#, actual, predicted, error, probability, (ID)
    String[] scores = new String[new Double(eval.numInstances()).intValue()];
    double[] probabilities = new double[new Double(eval.numInstances()).intValue()];
    for (String line : output.getBuffer().toString().split("\n")) {
        String[] linesplit = line.split("\\s+");

        // If there's been an error, the length of linesplit is 6, otherwise 5,
        // due to the error flag "+"

        int id;
        String expectedValue, classification;
        double probability;

        if (line.contains("+")) {
            id = Integer.parseInt(linesplit[6].substring(1, linesplit[6].length() - 1));
            expectedValue = linesplit[2].substring(2);
            classification = linesplit[3].substring(2);
            probability = Double.parseDouble(linesplit[5]);
        } else {
            id = Integer.parseInt(linesplit[5].substring(1, linesplit[5].length() - 1));
            expectedValue = linesplit[2].substring(2);
            classification = linesplit[3].substring(2);
            probability = Double.parseDouble(linesplit[4]);
        }

        scores[id - 1] = classification;
        probabilities[id - 1] = probability;
    }

    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());

    // Output classifications
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/"
            + wekaClassifier.toString() + "/" + testDataset.toString() + ".csv"), sb.toString());

    // Output probabilities
    sb = new StringBuilder();
    for (Double probability : probabilities)
        sb.append(probability.toString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/"
            + wekaClassifier.toString() + "/" + testDataset.toString() + ".probabilities.csv"), sb.toString());

    // Output predictions
    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/"
            + wekaClassifier.toString() + "/" + testDataset.toString() + ".predictions.txt"),
            output.getBuffer().toString());

    // Output meta information
    sb = new StringBuilder();
    sb.append(classifier.toString() + LF);
    sb.append(eval.toSummaryString() + LF);
    sb.append(eval.toMatrixString() + LF);

    FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/"
            + wekaClassifier.toString() + "/" + testDataset.toString() + ".meta.txt"), sb.toString());
}