List of usage examples for weka.classifiers.meta FilteredClassifier FilteredClassifier
public FilteredClassifier()
From source file:PredictMention.java
public PredictMention() { try {/*from w ww .ja va2 s. co m*/ String pathStopList = (new File("SpanishStoplist.txt")).getAbsolutePath(); filter = new FilteredClassifier(); filter.setOptions(weka.core.Utils.splitOptions( "-F \"weka.filters.unsupervised.attribute.StringToWordVector -R first-last -W 1000 -prune-rate -1.0 -N 0 -stemmer \\\"weka.core.stemmers.SnowballStemmer -S spanish\\\" -stopwords-handler \\\"weka.core.stopwords.WordsFromFile -stopwords " + pathStopList + "\\\" -M 1 -tokenizer \\\"weka.core.tokenizers.WordTokenizer -delimiters \\\\\\\" \\\\\\\\r\\\\\\\\n\\\\\\\\t.,;:\\\\\\\\\\\\\\'\\\\\\\\\\\\\\\"()?!\\\\\\\"\\\"\" -W weka.classifiers.trees.J48 -- -C 0.25 -M 2")); } catch (Exception ex) { Logger.getLogger(PredictMention.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:adams.opt.optimise.GeneticAlgorithm.java
License:Open Source License
public static void main(String[] args) { Environment.setEnvironmentClass(Environment.class); GeneticAlgorithm ga = new GeneticAlgorithm(); ga.setBits(1);//from ww w. j a va 2 s.c o m ga.setNumChrom(8); ga.setIterations(10000); ga.setFavorZeroes(true); AttributeSelection as = new AttributeSelection(); //as.setDataset(new PlaceholderFile("/home/dale/blgg/conversion/merged/m_5_.75.arff")); ArrayConsumer.setOptions(as, args); PLSClassifier pls = new PLSClassifier(); PLSFilter pf = (PLSFilter) pls.getFilter(); pf.setNumComponents(11); LinearRegressionJ reg = new LinearRegressionJ(); reg.setEliminateColinearAttributes(false); reg.setAttributeSelectionMethod( new SelectedTag(LinearRegressionJ.SELECTION_NONE, LinearRegressionJ.TAGS_SELECTION)); GPD gp = new GPD(); gp.setNoise(.01); //RBFKernel rbf = new RBFKernel(); //rbf.setChecksTurnedOff(true); //rbf.setGamma(.01); //gp.setKernel(rbf); Remove remove = new Remove(); remove.setAttributeIndices("1"); FilteredClassifier fc = new FilteredClassifier(); MultiFilter mf = new MultiFilter(); Filter[] filters = new Filter[2]; filters[0] = remove; filters[1] = pf; mf.setFilters(filters); fc.setClassifier(gp); fc.setFilter(pf); as.setClassifier(gp); as.setClassIndex("last"); //as.setDataset(new PlaceholderFile("/home/dale/OMD_clean.arff")); //as.setOutputDirectory(new PlaceholderFile("/research/dale")); ga.setLoggingLevel(LoggingLevel.INFO); as.setLoggingLevel(LoggingLevel.INFO); ga.optimise(as.getDataDef(), as); }
From source file:aprendizadodemaquina.classificadores.ClassificadorDT.java
License:Open Source License
@Override public int treinar(File arquivoTreinamento, String parametros) { Logger.getLogger("ARS logger").info("Treinamento de classificador J48"); // TODO: Verificar por que os parmetros dos classificadores no esto funcionando try {// w w w . j a v a2s. c om ConverterUtils.DataSource fonte = new ConverterUtils.DataSource(arquivoTreinamento.getAbsolutePath()); dadosTreinamento = fonte.getDataSet(); // Identifica o atributo que ser a classe de interesse (ltimo atributo = qual relao ) dadosTreinamento.setClassIndex(dadosTreinamento.numAttributes() - 1); // Cria um classificador que aceita filtros classificador = new FilteredClassifier(); classificador.setClassifier(new J48()); // Seleciona os atributos que no so parte da classificacao // TODO: SE OS DADOS DE TREINAMENTO TIVEREM A FEATURE DE IDENTIFICAO... /* Remove remove = new Remove(); int[] atributosRemovidos = new int[4]; atributosRemovidos[0] = 0; // ID da sentena atributosRemovidos[1] = 1; // termo1 atributosRemovidos[2] = 2; // termo2 remove.setAttributeIndicesArray( atributosRemovidos ); // Acopla o filtro de remoo ao classificador fc.setFilter( remove ); */ // FIM-SE // String []param = weka.core.Utils.splitOptions(parametros[cmbTipoClassificador1.getSelectedIndex()]); // fc.setOptions(param); // Faz o treinamento do classificador classificador.buildClassifier(dadosTreinamento); return 0; } catch (Exception ex) { Logger.getLogger("ARS logger").log(Level.SEVERE, null, ex); if (ex instanceof IllegalArgumentException) { javax.swing.JOptionPane.showMessageDialog(null, "O arquivo de treinamento fornecido no um arquivo ARFF vlido", "Erro", javax.swing.JOptionPane.ERROR_MESSAGE); } return 1; } }
From source file:CEP.CEPListener.java
FilteredClassifier CreateClassifier() { Remove rm = new Remove(); //rm.setAttributeIndices("1"); // remove 1st attribute PART c = new PART(); // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm);/*from w w w . jav a 2 s. co m*/ fc.setClassifier(c); return fc; }
From source file:com.github.fracpete.multisearch.optimize.PLSFilterAndLinearRegression.java
License:Open Source License
/** * The first parameter must be dataset,/*from ww w . j av a2s. c o m*/ * the (optional) second the class index (1-based, 'first' and 'last' * also supported). * * @param args the commandline options * @throws Exception if optimization fails for some reason */ public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("\nUsage: PLSFilterAndLinearRegression <dataset> [classindex]\n"); System.exit(1); } // load data Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null); // configure classifier we want to optimize PLSFilter pls = new PLSFilter(); LinearRegression lr = new LinearRegression(); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(lr); fc.setFilter(pls); // required for Weka > 3.7.13 fc.setDoNotCheckForModifiedClassAttribute(true); // configure multisearch // 1. number of components ListParameter numComp = new ListParameter(); numComp.setProperty("filter.numComponents"); numComp.setList("2 5 7"); // 2. ridge MathParameter ridge = new MathParameter(); ridge.setProperty("classifier.ridge"); ridge.setBase(10); ridge.setMin(-5); ridge.setMax(1); ridge.setStep(1); ridge.setExpression("pow(BASE,I)"); // assemble everything MultiSearch multi = new MultiSearch(); multi.setClassifier(fc); multi.setSearchParameters(new AbstractParameter[] { numComp, ridge }); SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_RMSE, new DefaultEvaluationMetrics().getTags()); multi.setEvaluation(tag); // output configuration System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi)); // optimize System.out.println("\nOptimizing...\n"); multi.buildClassifier(data); System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier())); System.out.println("Best parameters: " + multi.getGenerator().evaluate(multi.getBestValues())); }
From source file:com.github.fracpete.multisearch.setupgenerator.PLSFilterAndLinearRegression.java
License:Open Source License
/** * Outputs the commandlines.//ww w . j a v a 2 s. co m * * @param args the commandline options * @throws Exception if setup generator fails for some reason */ public static void main(String[] args) throws Exception { // configure classifier we want to generate setups for PLSFilter pls = new PLSFilter(); LinearRegression lr = new LinearRegression(); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(lr); fc.setFilter(pls); // required for Weka > 3.7.13 fc.setDoNotCheckForModifiedClassAttribute(true); // configure generator // 1. number of components ListParameter numComp = new ListParameter(); numComp.setProperty("filter.numComponents"); numComp.setList("2 5 7"); // 2. ridge MathParameter ridge = new MathParameter(); ridge.setProperty("classifier.ridge"); ridge.setBase(10); ridge.setMin(-5); ridge.setMax(1); ridge.setStep(1); ridge.setExpression("pow(BASE,I)"); // assemble everything SetupGenerator generator = new SetupGenerator(); generator.setBaseObject(fc); generator.setParameters(new AbstractParameter[] { numComp, ridge }); // output configuration System.out.println("\nSetupgenerator commandline:\n" + Utils.toCommandLine(generator)); // output commandlines System.out.println("\nCommandlines:\n"); Enumeration<Serializable> enm = generator.setups(); while (enm.hasMoreElements()) System.out.println(Utils.toCommandLine(enm.nextElement())); }
From source file:com.ivanrf.smsspam.SpamClassifier.java
License:Apache License
private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp, boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception { StringToWordVector filter = new StringToWordVector(); filter.setDoNotOperateOnPerClassBasis(true); filter.setLowerCaseTokens(true);//ww w . ja v a 2 s .c o m filter.setWordsToKeep(wordsToKeep); if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) { //Make a tokenizer WordTokenizer wt = new WordTokenizer(); if (tokenizerOp.equals(TOKENIZER_COMPLETE)) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}"); else //TOKENIZER_COMPLETE_NUMBERS) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789"); filter.setTokenizer(wt); } FilteredClassifier classifier = new FilteredClassifier(); classifier.setFilter(filter); if (useAttributeSelection) { AttributeSelection as = new AttributeSelection(); as.setEvaluator(new InfoGainAttributeEval()); Ranker r = new Ranker(); r.setThreshold(0); as.setSearch(r); MultiFilter mf = new MultiFilter(); mf.setFilters(new Filter[] { filter, as }); classifier.setFilter(mf); } if (classifierOp.equals(CLASSIFIER_SMO)) classifier.setClassifier(new SMO()); else if (classifierOp.equals(CLASSIFIER_NB)) classifier.setClassifier(new NaiveBayes()); else if (classifierOp.equals(CLASSIFIER_IB1)) classifier.setClassifier(new IBk(1)); else if (classifierOp.equals(CLASSIFIER_IB3)) classifier.setClassifier(new IBk(3)); else if (classifierOp.equals(CLASSIFIER_IB5)) classifier.setClassifier(new IBk(5)); else if (classifierOp.equals(CLASSIFIER_PART)) classifier.setClassifier(new PART()); //Tarda mucho if (boosting) { AdaBoostM1 boost = new AdaBoostM1(); boost.setClassifier(classifier.getClassifier()); classifier.setClassifier(boost); //Con NB tarda mucho } return classifier; }
From source file:com.openkm.kea.filter.KEAFilter.java
License:Open Source License
/** * Builds the classifier.//from w w w . j a v a 2 s . c om */ // aly: The main function, where everything important happens private void buildClassifier() throws Exception { // Generate input format for classifier FastVector atts = new FastVector(); for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (i == m_DocumentAtt) { atts.addElement(new Attribute("TFxIDF")); atts.addElement(new Attribute("First_occurrence")); if (m_KFused) { atts.addElement(new Attribute("Keyphrase_frequency")); } if (m_STDEVfeature) { atts.addElement(new Attribute("Standard_deviation")); } if (m_NODEfeature) { atts.addElement(new Attribute("Relations_number")); } if (m_LENGTHfeature) { atts.addElement(new Attribute("Phrase_length")); } } else if (i == m_KeyphrasesAtt) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); //atts.addElement(new Attribute("Keyphrase?", vals)); atts.addElement(new Attribute("Keyphrase?")); } } m_ClassifierData = new Instances("ClassifierData", atts, 0); m_ClassifierData.setClassIndex(m_NumFeatures); if (m_Debug) { log.info("--- Converting instances for classifier"); } // Convert pending input instances into data for classifier for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); // Get the key phrases for the document String keyphrases = current.stringValue(m_KeyphrasesAtt); HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases, false); HashMap<String, Counter> hashKeysEval = getGivenKeyphrases(keyphrases, true); // Get the phrases for the document HashMap<String, FastVector> hash = new HashMap<String, FastVector>(); int length = getPhrases(hash, current.stringValue(m_DocumentAtt)); // hash = getComposits(hash); // Compute the feature values for each phrase and // add the instance to the data for the classifier Iterator<String> it = hash.keySet().iterator(); while (it.hasNext()) { String phrase = it.next(); FastVector phraseInfo = (FastVector) hash.get(phrase); double[] vals = featVals(phrase, phraseInfo, true, hashKeysEval, hashKeyphrases, length, hash); //log.info(vals); Instance inst = new Instance(current.weight(), vals); // .err.println(phrase + "\t" + inst.toString()); m_ClassifierData.add(inst); } } if (m_Debug) { log.info("--- Building classifier"); } // Build classifier // Uncomment if you want to use a different classifier // Caution: Other places in the code will have to be adjusted!! /*I. Naive Bayes: FilteredClassifier fclass = new FilteredClassifier(); fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple()); fclass.setFilter(new Discretize()); m_Classifier = fclass; */ //NaiveBayes nb = new NaiveBayes(); //nb.setUseSupervisedDiscretization(true); //m_Classifier = nb; /* II. Linear Regression: LinearRegression lr = new LinearRegression(); lr.setAttributeSelectionMethod(new weka.core.SelectedTag(1, LinearRegression.TAGS_SELECTION)); lr.setEliminateColinearAttributes(false); lr.setDebug(false); m_Classifier = lr;*/ /* III. Bagging with REPTrees Bagging bagging = new Bagging(); String[] ops_bagging = { new String("-P"), new String("100"), new String("-S"), new String("1"), new String("-I"), new String("50")}; */ /* * REPTree rept = new REPTree(); //results are worse! rept.setNoPruning(true); String[] ops_rept = { new String("-M"), new String("2"), new String("-V"), new String("0.0010"), new String("-N"), new String("3"), new String("-S"), new String("1"), new String("-L"), new String("1"),}; rept.setOptions(ops_rept); bagging.setClassifier(rept); */ // bagging.setOptions(ops_bagging); //FilteredClassifier fclass = new FilteredClassifier(); //fclass.setClassifier(new REPTree()); //fclass.setFilter(new Discretize()); //bagging.setClassifier(fclass); // m_Classifier = bagging; RegressionByDiscretization rvd = new RegressionByDiscretization(); FilteredClassifier fclass = new FilteredClassifier(); fclass.setClassifier(new weka.classifiers.bayes.NaiveBayesSimple()); fclass.setFilter(new Discretize()); rvd.setClassifier(fclass); rvd.setNumBins(m_Indexers + 1); m_Classifier = rvd; // log.info(m_ClassifierData); //System.exit(1); m_Classifier.buildClassifier(m_ClassifierData); if (m_Debug) { log.info("" + m_Classifier); } // Save space m_ClassifierData = new Instances(m_ClassifierData, 0); }
From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java
License:Apache License
/** * * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard * * * @param gs_arff the annotated gold standard in an .arff file * @param model output file for the model * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation) * @throws Exception/*from ww w.j a v a 2 s .co m*/ */ public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval) throws Exception { DataSource source = new DataSource(gs_arff); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove ID attribute BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(bn); fc.buildClassifier(data); // build classifier SerializationHelper.write(model, fc); if (output_eval) { Evaluation eval = new Evaluation(data); eval.crossValidateModel(fc, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); } }
From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);//from w ww. java 2 s .c o m // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(filteredClassifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } // Prepare output classification String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/output.csv"), sb.toString()); }