List of usage examples for weka.classifiers.meta FilteredClassifier setFilter
public void setFilter(Filter filter)
From source file:adams.flow.source.WekaClassifierGenerator.java
License:Open Source License
/** * Returns the default setup. Used in the options as default value. * * @return the default setup/*from ww w . ja v a 2 s . c om*/ */ protected weka.classifiers.Classifier getDefaultSetup() { FilteredClassifier result; result = new weka.classifiers.meta.FilteredClassifier(); result.setFilter(new weka.filters.supervised.attribute.PLSFilter()); result.setClassifier(new LinearRegressionJ()); return result; }
From source file:adams.opt.optimise.GeneticAlgorithm.java
License:Open Source License
public static void main(String[] args) { Environment.setEnvironmentClass(Environment.class); GeneticAlgorithm ga = new GeneticAlgorithm(); ga.setBits(1);/*from w ww.ja va 2 s . c o m*/ ga.setNumChrom(8); ga.setIterations(10000); ga.setFavorZeroes(true); AttributeSelection as = new AttributeSelection(); //as.setDataset(new PlaceholderFile("/home/dale/blgg/conversion/merged/m_5_.75.arff")); ArrayConsumer.setOptions(as, args); PLSClassifier pls = new PLSClassifier(); PLSFilter pf = (PLSFilter) pls.getFilter(); pf.setNumComponents(11); LinearRegressionJ reg = new LinearRegressionJ(); reg.setEliminateColinearAttributes(false); reg.setAttributeSelectionMethod( new SelectedTag(LinearRegressionJ.SELECTION_NONE, LinearRegressionJ.TAGS_SELECTION)); GPD gp = new GPD(); gp.setNoise(.01); //RBFKernel rbf = new RBFKernel(); //rbf.setChecksTurnedOff(true); //rbf.setGamma(.01); //gp.setKernel(rbf); Remove remove = new Remove(); remove.setAttributeIndices("1"); FilteredClassifier fc = new FilteredClassifier(); MultiFilter mf = new MultiFilter(); Filter[] filters = new Filter[2]; filters[0] = remove; filters[1] = pf; mf.setFilters(filters); fc.setClassifier(gp); fc.setFilter(pf); as.setClassifier(gp); as.setClassIndex("last"); //as.setDataset(new PlaceholderFile("/home/dale/OMD_clean.arff")); //as.setOutputDirectory(new PlaceholderFile("/research/dale")); ga.setLoggingLevel(LoggingLevel.INFO); as.setLoggingLevel(LoggingLevel.INFO); ga.optimise(as.getDataDef(), as); }
From source file:CEP.CEPListener.java
FilteredClassifier CreateClassifier() { Remove rm = new Remove(); //rm.setAttributeIndices("1"); // remove 1st attribute PART c = new PART(); // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(c);//ww w .ja v a 2 s .c om return fc; }
From source file:com.github.fracpete.multisearch.optimize.PLSFilterAndLinearRegression.java
License:Open Source License
/** * The first parameter must be dataset,//from w ww . j av a 2 s. c om * the (optional) second the class index (1-based, 'first' and 'last' * also supported). * * @param args the commandline options * @throws Exception if optimization fails for some reason */ public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("\nUsage: PLSFilterAndLinearRegression <dataset> [classindex]\n"); System.exit(1); } // load data Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null); // configure classifier we want to optimize PLSFilter pls = new PLSFilter(); LinearRegression lr = new LinearRegression(); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(lr); fc.setFilter(pls); // required for Weka > 3.7.13 fc.setDoNotCheckForModifiedClassAttribute(true); // configure multisearch // 1. number of components ListParameter numComp = new ListParameter(); numComp.setProperty("filter.numComponents"); numComp.setList("2 5 7"); // 2. ridge MathParameter ridge = new MathParameter(); ridge.setProperty("classifier.ridge"); ridge.setBase(10); ridge.setMin(-5); ridge.setMax(1); ridge.setStep(1); ridge.setExpression("pow(BASE,I)"); // assemble everything MultiSearch multi = new MultiSearch(); multi.setClassifier(fc); multi.setSearchParameters(new AbstractParameter[] { numComp, ridge }); SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_RMSE, new DefaultEvaluationMetrics().getTags()); multi.setEvaluation(tag); // output configuration System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi)); // optimize System.out.println("\nOptimizing...\n"); multi.buildClassifier(data); System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier())); System.out.println("Best parameters: " + multi.getGenerator().evaluate(multi.getBestValues())); }
From source file:com.github.fracpete.multisearch.setupgenerator.PLSFilterAndLinearRegression.java
License:Open Source License
/** * Outputs the commandlines.// w w w . ja v a2 s .c om * * @param args the commandline options * @throws Exception if setup generator fails for some reason */ public static void main(String[] args) throws Exception { // configure classifier we want to generate setups for PLSFilter pls = new PLSFilter(); LinearRegression lr = new LinearRegression(); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(lr); fc.setFilter(pls); // required for Weka > 3.7.13 fc.setDoNotCheckForModifiedClassAttribute(true); // configure generator // 1. number of components ListParameter numComp = new ListParameter(); numComp.setProperty("filter.numComponents"); numComp.setList("2 5 7"); // 2. ridge MathParameter ridge = new MathParameter(); ridge.setProperty("classifier.ridge"); ridge.setBase(10); ridge.setMin(-5); ridge.setMax(1); ridge.setStep(1); ridge.setExpression("pow(BASE,I)"); // assemble everything SetupGenerator generator = new SetupGenerator(); generator.setBaseObject(fc); generator.setParameters(new AbstractParameter[] { numComp, ridge }); // output configuration System.out.println("\nSetupgenerator commandline:\n" + Utils.toCommandLine(generator)); // output commandlines System.out.println("\nCommandlines:\n"); Enumeration<Serializable> enm = generator.setups(); while (enm.hasMoreElements()) System.out.println(Utils.toCommandLine(enm.nextElement())); }
From source file:com.ivanrf.smsspam.SpamClassifier.java
License:Apache License
private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp, boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception { StringToWordVector filter = new StringToWordVector(); filter.setDoNotOperateOnPerClassBasis(true); filter.setLowerCaseTokens(true);// ww w . j av a2 s. com filter.setWordsToKeep(wordsToKeep); if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) { //Make a tokenizer WordTokenizer wt = new WordTokenizer(); if (tokenizerOp.equals(TOKENIZER_COMPLETE)) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}"); else //TOKENIZER_COMPLETE_NUMBERS) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789"); filter.setTokenizer(wt); } FilteredClassifier classifier = new FilteredClassifier(); classifier.setFilter(filter); if (useAttributeSelection) { AttributeSelection as = new AttributeSelection(); as.setEvaluator(new InfoGainAttributeEval()); Ranker r = new Ranker(); r.setThreshold(0); as.setSearch(r); MultiFilter mf = new MultiFilter(); mf.setFilters(new Filter[] { filter, as }); classifier.setFilter(mf); } if (classifierOp.equals(CLASSIFIER_SMO)) classifier.setClassifier(new SMO()); else if (classifierOp.equals(CLASSIFIER_NB)) classifier.setClassifier(new NaiveBayes()); else if (classifierOp.equals(CLASSIFIER_IB1)) classifier.setClassifier(new IBk(1)); else if (classifierOp.equals(CLASSIFIER_IB3)) classifier.setClassifier(new IBk(3)); else if (classifierOp.equals(CLASSIFIER_IB5)) classifier.setClassifier(new IBk(5)); else if (classifierOp.equals(CLASSIFIER_PART)) classifier.setClassifier(new PART()); //Tarda mucho if (boosting) { AdaBoostM1 boost = new AdaBoostM1(); boost.setClassifier(classifier.getClassifier()); classifier.setClassifier(boost); //Con NB tarda mucho } return classifier; }
From source file:com.openkm.kea.filter.KEAFilter.java
License:Open Source License
/** * Builds the classifier./*from w ww . j a v a2 s.co m*/ */ // aly: The main function, where everything important happens private void buildClassifier() throws Exception { // Generate input format for classifier FastVector atts = new FastVector(); for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (i == m_DocumentAtt) { atts.addElement(new Attribute("TFxIDF")); atts.addElement(new Attribute("First_occurrence")); if (m_KFused) { atts.addElement(new Attribute("Keyphrase_frequency")); } if (m_STDEVfeature) { atts.addElement(new Attribute("Standard_deviation")); } if (m_NODEfeature) { atts.addElement(new Attribute("Relations_number")); } if (m_LENGTHfeature) { atts.addElement(new Attribute("Phrase_length")); } } else if (i == m_KeyphrasesAtt) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); //atts.addElement(new Attribute("Keyphrase?", vals)); atts.addElement(new Attribute("Keyphrase?")); } } m_ClassifierData = new Instances("ClassifierData", atts, 0); m_ClassifierData.setClassIndex(m_NumFeatures); if (m_Debug) { log.info("--- Converting instances for classifier"); } // Convert pending input instances into data for classifier for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); // Get the key phrases for the document String keyphrases = current.stringValue(m_KeyphrasesAtt); HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases, false); HashMap<String, Counter> hashKeysEval = getGivenKeyphrases(keyphrases, true); // Get the phrases for the document HashMap<String, FastVector> hash = new HashMap<String, FastVector>(); int length = getPhrases(hash, current.stringValue(m_DocumentAtt)); // hash = getComposits(hash); // Compute the feature values for each phrase and // add the instance to the data for the classifier Iterator<String> it = hash.keySet().iterator(); while (it.hasNext()) { String phrase = it.next(); FastVector phraseInfo = (FastVector) hash.get(phrase); double[] vals = featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length, hash); //log.info(vals); Instance inst = new Instance(current.weight(), vals); // .err.println(phrase + "\t" + inst.toString()); m_ClassifierData.add(inst); } } if (m_Debug) { log.info("--- Building classifier"); } // Build classifier // Uncomment if you want to use a different classifier // Caution: Other places in the code will have to be adjusted!! /*I. Naive Bayes: FilteredClassifier fclass = new FilteredClassifier(); fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple()); fclass.setFilter(new Discretize()); m_Classifier = fclass; */ //NaiveBayes nb = new NaiveBayes(); //nb.setUseSupervisedDiscretization(true); //m_Classifier = nb; /* II. Linear Regression: LinearRegression lr = new LinearRegression(); lr.setAttributeSelectionMethod(new weka.core.SelectedTag(1, LinearRegression.TAGS_SELECTION)); lr.setEliminateColinearAttributes(false); lr.setDebug(false); m_Classifier = lr;*/ /* III. Bagging with REPTrees Bagging bagging = new Bagging(); String[] ops_bagging = { new String("-P"), new String("100"), new String("-S"), new String("1"), new String("-I"), new String("50")}; */ /* * REPTree rept = new REPTree(); //results are worse! rept.setNoPruning(true); String[] ops_rept = { new String("-M"), new String("2"), new String("-V"), new String("0.0010"), new String("-N"), new String("3"), new String("-S"), new String("1"), new String("-L"), new String("1"),}; rept.setOptions(ops_rept); bagging.setClassifier(rept); */ // bagging.setOptions(ops_bagging); //FilteredClassifier fclass = new FilteredClassifier(); //fclass.setClassifier(new REPTree()); //fclass.setFilter(new Discretize()); //bagging.setClassifier(fclass); // m_Classifier = bagging; RegressionByDiscretization rvd = new RegressionByDiscretization(); FilteredClassifier fclass = new FilteredClassifier(); fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple()); fclass.setFilter(new Discretize()); rvd.setClassifier(fclass); rvd.setNumBins(m_Indexers + 1); m_Classifier = rvd; // log.info(m_ClassifierData); //System.exit(1); m_Classifier.buildClassifier(m_ClassifierData); if (m_Debug) { log.info("" + m_Classifier); } // Save space m_ClassifierData = new Instances(m_ClassifierData, 0); }
From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java
License:Apache License
/** * * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard * * * @param gs_arff the annotated gold standard in an .arff file * @param model output file for the model * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation) * @throws Exception/*www. ja va2s. c o m*/ */ public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval) throws Exception { DataSource source = new DataSource(gs_arff); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove ID attribute BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(bn); fc.buildClassifier(data); // build classifier SerializationHelper.write(model, fc); if (output_eval) { Evaluation eval = new Evaluation(data); eval.crossValidateModel(fc, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); } }
From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);/* ww w . j a v a2 s . c o m*/ // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(filteredClassifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } // Prepare output classification String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/output.csv"), sb.toString()); }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifier(WekaClassifier wekaClassifier, Dataset trainDataset, Dataset testDataset) throws Exception { Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the train instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + trainDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff" }); Instances train = DataSource.read(MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff"); train.setClassIndex(train.numAttributes() - 1); // Add IDs to the test instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + testDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff" }); Instances test = DataSource.read(MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff"); test.setClassIndex(test.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data test.randomize(random);/*from w w w . java2 s . co m*/ // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Prepare the output buffer AbstractOutput output = new PlainText(); output.setBuffer(new StringBuffer()); output.setHeader(test); output.setAttributes("first"); Evaluation eval = new Evaluation(train); eval.evaluateModel(filteredClassifier, test, output); // Convert predictions to CSV // Format: inst#, actual, predicted, error, probability, (ID) String[] scores = new String[new Double(eval.numInstances()).intValue()]; double[] probabilities = new double[new Double(eval.numInstances()).intValue()]; for (String line : output.getBuffer().toString().split("\n")) { String[] linesplit = line.split("\\s+"); // If there's been an error, the length of linesplit is 6, otherwise 5, // due to the error flag "+" int id; String expectedValue, classification; double probability; if (line.contains("+")) { id = Integer.parseInt(linesplit[6].substring(1, linesplit[6].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[5]); } else { id = Integer.parseInt(linesplit[5].substring(1, linesplit[5].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[4]); } scores[id - 1] = classification; probabilities[id - 1] = probability; } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".csv"), sb.toString()); // Output probabilities sb = new StringBuilder(); for (Double probability : probabilities) sb.append(probability.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".probabilities.csv"), sb.toString()); // Output predictions FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".predictions.txt"), output.getBuffer().toString()); // Output meta information sb = new StringBuilder(); sb.append(classifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".meta.txt"), sb.toString()); }