List of usage examples for weka.classifiers.functions SMO SMO
SMO
From source file:ClassificationClass.java
public Evaluation cls_svm(Instances data) { Evaluation eval = null;/*from w w w . ja va2 s. c o m*/ try { Classifier classifier; data.setClassIndex(data.numAttributes() - 1); classifier = new SMO(); classifier.buildClassifier(data); eval = new Evaluation(data); eval.evaluateModel(classifier, data); } catch (Exception ex) { Logger.getLogger(ClassificationClass.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:algoritmogeneticocluster.NewClass.java
public static void main(String[] args) throws Exception { BufferedReader datafile = readDataFile("tabela10.arff"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); // Do 10-split cross validation Instances[][] split = crossValidationSplit(data, 10); // Separate split into training and testing arrays Instances[] trainingSplits = split[0]; Instances[] testingSplits = split[1]; // Use a set of classifiers Classifier[] models = { new SMO(), new J48(), // a decision tree new PART(), new DecisionTable(), //decision table majority classifier new DecisionStump() //one-level decision tree };//from www . ja va 2 s. c o m // Run for each model for (int j = 0; j < models.length; j++) { // Collect every group of predictions for current model in a FastVector FastVector predictions = new FastVector(); // For each training-testing split pair, train and test the classifier for (int i = 0; i < trainingSplits.length; i++) { Evaluation validation = classify(models[j], trainingSplits[i], testingSplits[i]); predictions.appendElements(validation.predictions()); // Uncomment to see the summary for each training-testing pair. //System.out.println(models[j].toString()); } // Calculate overall accuracy of current classifier on all splits double accuracy = calculateAccuracy(predictions); // Print current classifier's name and accuracy in a complicated, // but nice-looking way. System.out.println("Accuracy of " + models[j].getClass().getSimpleName() + ": " + String.format("%.2f%%", accuracy) + "\n---------------------------------"); } }
From source file:algoritmogeneticocluster.WekaSimulation.java
/** * @param args the command line arguments *//*w ww . ja v a 2 s. c o m*/ public static void main(String[] args) { SMO classifier = new SMO(); HyperPipes hy = new HyperPipes(); // classifier.buildClassifier(trainset); BufferedReader datafile = readDataFile("tabela10.arff"); Instances data; Evaluation eval; try { data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); eval = new Evaluation(data); Random rand = new Random(1); // using seed = 1 int folds = 10; eval.crossValidateModel(classifier, data, folds, rand); System.out.println(eval.toString()); System.out.println(eval.numInstances()); System.out.println(eval.correct()); System.out.println(eval.incorrect()); System.out.println(eval.pctCorrect()); System.out.println(eval.pctIncorrect()); } catch (Exception ex) { Logger.getLogger(WekaSimulation.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:applications.FoodSpectrograms.java
public static void shapeletClassifier() { int nosExp = 3; ThreadedClassifierExperiment[] runs = new ThreadedClassifierExperiment[nosExp]; Thread[] threads = new Thread[nosExp]; for (int i = 0; i < nosExp; i++) { Classifier c = new SMO(); PolyKernel kernel = new PolyKernel(); kernel.setExponent(1);/*from w w w. ja va2 s . c om*/ ((SMO) c).setKernel(kernel); FullShapeletTransform s = new FullShapeletTransform(); s.setDebug(false); s.setNumberOfShapelets(train[i].numAttributes() / 2); int minLength = 5; int maxLength = train[i].numAttributes() / 4; // int maxLength=(train.numAttributes()-1)/10; s.setShapeletMinAndMax(minLength, maxLength); s.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.F_STAT); s.turnOffLog(); runs[i] = new ThreadedClassifierExperiment(train[i], test[i], c); runs[i].setTransform(s); threads[i] = new Thread(runs[i]); } for (int i = 0; i < nosExp; i++) threads[i].start(); try { for (int i = 0; i < nosExp; i++) threads[i].join(); } catch (InterruptedException e) { System.out.println(" Interrupted!!"); } for (int i = 0; i < nosExp; i++) System.out.println(" ED Accuracy for " + names[i] + " is " + runs[i].getTestAccuracy()); }
From source file:ca.uottawa.balie.LanguageIdentification.java
License:Open Source License
private WekaLearner TrainModel(ArrayList<String> pi_Languages) { // Extract N-Gram from every training text Iterator<String> iCur = pi_Languages.iterator(); Hashtable<String, ArrayList<CharacterNGram>> Lan2NGrams = new Hashtable<String, ArrayList<CharacterNGram>>(); Hashtable<String, Integer> hashAllNGrams = new Hashtable<String, Integer>(); Hashtable<String, Integer> hashAllUNIGrams = new Hashtable<String, Integer>(); if (Balie.DEBUG_LANGUAGE_IDENTIFICATION) DebugInfo.Out("Reading files."); while (iCur.hasNext()) { String strCurLan = (String) iCur.next(); if (Balie.DEBUG_LANGUAGE_IDENTIFICATION) DebugInfo.Out(strCurLan);// w w w . j a va 2 s. c o m ArrayList<CharacterNGram> alCurNGrams = null; try { alCurNGrams = Files2NGram(Balie.LANGUAGE_ID_TRAINING_CORPUS, strCurLan, hashAllNGrams, hashAllUNIGrams); } catch (Exception e) { throw new Error("Training corpus was not found here: " + Balie.LANGUAGE_ID_TRAINING_CORPUS); } Lan2NGrams.put(strCurLan, alCurNGrams); if (Balie.DEBUG_LANGUAGE_IDENTIFICATION) DebugInfo.Out("Done"); } // Get a reasonable list of attributes (remove low freq) ArrayList<String> alSelectedNgramAttributes = GetGlobalNGramList(hashAllNGrams); ArrayList<String> alSelectedUnigramAttributes = GetGlobalNGramList(hashAllUNIGrams); // Proceed to Attribute Selection only on the BiGRAMS if (Balie.DEBUG_LANGUAGE_IDENTIFICATION) DebugInfo.Out("Attribute Selection..."); WekaAttribute[] wekaAttr = new WekaAttribute[alSelectedNgramAttributes.size()]; for (int i = 0; i != alSelectedNgramAttributes.size(); ++i) { wekaAttr[i] = new WekaAttribute((String) alSelectedNgramAttributes.get(i)); } //String[] strAttr = (String[])alSelectedNgramAttributes.toArray(new String [alSelectedNgramAttributes.size()]); String[] strClass = (String[]) pi_Languages.toArray(new String[pi_Languages.size()]); WekaAttributeSelection was = new WekaAttributeSelection(WekaAttributeSelection.WEKA_CHI_SQUARE, wekaAttr, strClass); iCur = pi_Languages.iterator(); while (iCur.hasNext()) { String strCurLan = (String) iCur.next(); ArrayList<CharacterNGram> alCurNGrams = Lan2NGrams.get(strCurLan); Iterator<CharacterNGram> iNCur = alCurNGrams.iterator(); while (iNCur.hasNext()) { CharacterNGram cng = iNCur.next(); Double[] nGram = cng.Instance((String[]) alSelectedNgramAttributes.toArray(new String[0])); was.AddInstance(nGram, strCurLan); } if (Balie.DEBUG_LANGUAGE_IDENTIFICATION) DebugInfo.Out(String.valueOf(alCurNGrams.size()) + " instances created for " + strCurLan); } was.NumAttributes(m_NGramMaxNum); was.Select(true); String[] strReducedAttr = was.ReduceDimentionality(); // Now, concat chosen nGram and unigram in the final list of attributes String[] strAllAttributes = new String[m_NGramMaxNum + alSelectedUnigramAttributes.size()]; WekaAttribute[] wekaFinalAttr = new WekaAttribute[strAllAttributes.length]; if (Balie.DEBUG_LANGUAGE_IDENTIFICATION) DebugInfo.Out("Creating the classifier..."); for (int i = 0; i != strReducedAttr.length; ++i) { wekaFinalAttr[i] = new WekaAttribute(strReducedAttr[i]); strAllAttributes[i] = strReducedAttr[i]; } // At the list of the Reduced Attributes(NGrams) add the list of UNIGrams int nPos = strReducedAttr.length; for (int i = 0; i != alSelectedUnigramAttributes.size(); ++i) { wekaFinalAttr[nPos] = new WekaAttribute((String) alSelectedUnigramAttributes.get(i)); strAllAttributes[nPos] = (String) alSelectedUnigramAttributes.get(i); ++nPos; } WekaLearner wl = new WekaLearner(wekaFinalAttr, strClass); iCur = pi_Languages.iterator(); while (iCur.hasNext()) { String strCurLan = (String) iCur.next(); ArrayList<CharacterNGram> alCurNGrams = Lan2NGrams.get(strCurLan); Iterator<CharacterNGram> iNCur = alCurNGrams.iterator(); while (iNCur.hasNext()) { CharacterNGram cng = iNCur.next(); Double[] nGram = cng.Instance(strAllAttributes); wl.AddTrainInstance(nGram, strCurLan); } if (Balie.DEBUG_LANGUAGE_IDENTIFICATION) DebugInfo.Out(String.valueOf(alCurNGrams.size()) + " TRAIN instances created for " + strCurLan); } SMO smo = new SMO(); wl.CreateModel(smo); /* // Create a classifier if (MODE.equals(Balie.LANGUAGE_ID_MODEL_HUGE)) { SMO smo = new SMO(); wl.CreateModel(smo); } else { NaiveBayes nb = new NaiveBayes(); //nb.setUseKernelEstimator(true); wl.CreateModel(nb); } */ return wl; }
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];/*from w w w . j a v a 2s. c o m*/ String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }
From source file:com.github.fracpete.multisearch.optimize.SMOKernels.java
License:Open Source License
/** * The first parameter must be dataset,/*from ww w . j a va 2 s. co m*/ * the (optional) second the class index (1-based, 'first' and 'last' * also supported). * * @param args the commandline options * @throws Exception if optimization fails for some reason */ public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("\nUsage: SMOKernels <dataset> [classindex]\n"); System.exit(1); } // load data Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null); // configure classifier we want to optimize SMO smo = new SMO(); // configure multisearch // 1. RBFKernel ListParameter listRBF = new ListParameter(); listRBF.setProperty("kernel"); listRBF.setList(RBFKernel.class.getName()); MathParameter gamma = new MathParameter(); gamma.setProperty("kernel.gamma"); gamma.setBase(10); gamma.setMin(-4); gamma.setMax(1); gamma.setStep(1); gamma.setExpression("pow(BASE,I)"); ParameterGroup groupRBF = new ParameterGroup(); groupRBF.setParameters(new AbstractParameter[] { listRBF, gamma }); // 2. PolyKernel ListParameter listPoly = new ListParameter(); listPoly.setProperty("kernel"); listPoly.setList(PolyKernel.class.getName()); MathParameter exp = new MathParameter(); exp.setProperty("kernel.exponent"); exp.setBase(10); exp.setMin(1); exp.setMax(5); exp.setStep(1); exp.setExpression("I"); ParameterGroup groupPoly = new ParameterGroup(); groupPoly.setParameters(new AbstractParameter[] { listPoly, exp }); // assemble everything MultiSearch multi = new MultiSearch(); multi.setClassifier(smo); multi.setSearchParameters(new AbstractParameter[] { groupRBF, groupPoly }); SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_ACC, new DefaultEvaluationMetrics().getTags()); multi.setEvaluation(tag); // output configuration System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi)); // optimize System.out.println("\nOptimizing...\n"); multi.buildClassifier(data); System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier())); System.out.println("Best parameters: " + multi.getGenerator().evaluate(multi.getBestValues())); }
From source file:com.ivanrf.smsspam.SpamClassifier.java
License:Apache License
private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp, boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception { StringToWordVector filter = new StringToWordVector(); filter.setDoNotOperateOnPerClassBasis(true); filter.setLowerCaseTokens(true);//from w w w. jav a2 s . co m filter.setWordsToKeep(wordsToKeep); if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) { //Make a tokenizer WordTokenizer wt = new WordTokenizer(); if (tokenizerOp.equals(TOKENIZER_COMPLETE)) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}"); else //TOKENIZER_COMPLETE_NUMBERS) wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789"); filter.setTokenizer(wt); } FilteredClassifier classifier = new FilteredClassifier(); classifier.setFilter(filter); if (useAttributeSelection) { AttributeSelection as = new AttributeSelection(); as.setEvaluator(new InfoGainAttributeEval()); Ranker r = new Ranker(); r.setThreshold(0); as.setSearch(r); MultiFilter mf = new MultiFilter(); mf.setFilters(new Filter[] { filter, as }); classifier.setFilter(mf); } if (classifierOp.equals(CLASSIFIER_SMO)) classifier.setClassifier(new SMO()); else if (classifierOp.equals(CLASSIFIER_NB)) classifier.setClassifier(new NaiveBayes()); else if (classifierOp.equals(CLASSIFIER_IB1)) classifier.setClassifier(new IBk(1)); else if (classifierOp.equals(CLASSIFIER_IB3)) classifier.setClassifier(new IBk(3)); else if (classifierOp.equals(CLASSIFIER_IB5)) classifier.setClassifier(new IBk(5)); else if (classifierOp.equals(CLASSIFIER_PART)) classifier.setClassifier(new PART()); //Tarda mucho if (boosting) { AdaBoostM1 boost = new AdaBoostM1(); boost.setClassifier(classifier.getClassifier()); classifier.setClassifier(boost); //Con NB tarda mucho } return classifier; }
From source file:cs.man.ac.uk.mvc.ClassifierBuilder.java
License:Open Source License
/** * Builds and tests the classifier specified by the algorithm variable. * Note if no unlabelled data is in the test set, then meta data can be set to null. * @return confusion matrix describing binary classification outcomes. *//*from w ww . j a va 2 s .com*/ public int[][] test() { switch (algorithm) { case Classifiers.J48: return stdloadAndTest(new StandardAlgorithmTester(this.outputFile, "J48", this.verbose, new J48())); case Classifiers.MLP: return stdloadAndTest( new StandardAlgorithmTester(this.outputFile, "MLP", this.verbose, new MultilayerPerceptron())); case Classifiers.NB: return stdloadAndTest( new StandardAlgorithmTester(this.outputFile, "NB", this.verbose, new NaiveBayes())); case Classifiers.SVM: return stdloadAndTest(new StandardAlgorithmTester(this.outputFile, "SVM", this.verbose, new SMO())); case Classifiers.HTREE: return streamloadAndTest( new StreamAlgorithmTester(this.outputFile, "HTREE", this.verbose, new HoeffdingTree())); case Classifiers.GHVFDT: return streamloadAndTest( new StreamAlgorithmTester(this.outputFile, "GHVFDT", this.verbose, new GHVFDT())); case Classifiers.PNB: return streamloadAndTest(new StreamAlgorithmTester(this.outputFile, "PNB", this.verbose, new PNB())); case Classifiers.OCVFDT: return streamloadAndTest( new StreamAlgorithmTester(this.outputFile, "OCVFDT", this.verbose, new OCVFDT())); default: int[][] confusion_matrix = { { 0, 0 }, { 0, 0 } }; return confusion_matrix; } }
From source file:cs.man.ac.uk.predict.Predictor.java
License:Open Source License
public static void makePredictionsEnsembleNew(String trainPath, String testPath, String resultPath) { System.out.println("Training set: " + trainPath); System.out.println("Test set: " + testPath); /**/*from www. j a v a 2 s . co m*/ * The ensemble classifiers. This is a heterogeneous ensemble. */ J48 learner1 = new J48(); SMO learner2 = new SMO(); NaiveBayes learner3 = new NaiveBayes(); MultilayerPerceptron learner5 = new MultilayerPerceptron(); System.out.println("Training Ensemble."); long startTime = System.nanoTime(); try { BufferedReader reader = new BufferedReader(new FileReader(trainPath)); Instances data = new Instances(reader); data.setClassIndex(data.numAttributes() - 1); System.out.println("Training data length: " + data.numInstances()); learner1.buildClassifier(data); learner2.buildClassifier(data); learner3.buildClassifier(data); learner5.buildClassifier(data); long endTime = System.nanoTime(); long nanoseconds = endTime - startTime; double seconds = (double) nanoseconds / 1000000000.0; System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s)."); } catch (IOException e) { System.out.println("Could not train Ensemble classifier IOException on training data file."); } catch (Exception e) { System.out.println("Could not train Ensemble classifier Exception building model."); } try { String line = ""; // Read the file and display it line by line. BufferedReader in = null; // Read in and store each positive prediction in the tree map. try { //open stream to file in = new BufferedReader(new FileReader(testPath)); while ((line = in.readLine()) != null) { if (line.toLowerCase().contains("@data")) break; } } catch (Exception e) { } // A different ARFF loader used here (compared to above) as // the ARFF file may be extremely large. In which case the whole // file cannot be read in. Instead it is read in incrementally. ArffLoader loader = new ArffLoader(); loader.setFile(new File(testPath)); Instances data = loader.getStructure(); data.setClassIndex(data.numAttributes() - 1); System.out.println("Ensemble Classifier is ready."); System.out.println("Testing on all instances avaialable."); startTime = System.nanoTime(); int instanceNumber = 0; // label instances Instance current; while ((current = loader.getNextInstance(data)) != null) { instanceNumber += 1; line = in.readLine(); double classification1 = learner1.classifyInstance(current); double classification2 = learner2.classifyInstance(current); double classification3 = learner3.classifyInstance(current); double classification5 = learner5.classifyInstance(current); // All classifiers must agree. This is a very primitive ensemble strategy! if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) { if (line != null) { //System.out.println("Instance: "+instanceNumber+"\t"+line); //System.in.read(); } Writer.append(resultPath, instanceNumber + "\n"); } } in.close(); System.out.println("Test set instances: " + instanceNumber); long endTime = System.nanoTime(); long duration = endTime - startTime; double seconds = (double) duration / 1000000000.0; System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s)."); } catch (Exception e) { System.out.println("Could not test Ensemble classifier due to an error."); } }