List of usage examples for weka.classifiers.trees J48 J48
J48
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile6(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial6.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES/*from w w w . j a v a2s. com*/ Attribute attr[] = new Attribute[50]; attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); attr[10] = new Attribute("BLPos"); attr[11] = new Attribute("BLNeg"); attr[12] = new Attribute("VSPositive"); attr[13] = new Attribute("VSNegative"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[14] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); attrs.addElement(attr[11]); attrs.addElement(attr[12]); attrs.addElement(attr[13]); attrs.addElement(attr[14]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(15); for (int j = 0; j < 15; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 14) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(14); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndblAndvs.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(14); test.setClassIndex(14); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:DataMiningLogHistoriKIRI.DecisionTree.java
public String[] id3(Instances arff) { J48 tree = new J48(); try {//w w w . j ava 2s . c o m tree.buildClassifier(arff); } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } System.out.println(tree.toString()); int nilaiBenar = 0, resultInt; float result = 0; for (int i = 0; i < arff.numInstances(); i++) { try { result = (float) tree.classifyInstance(arff.instance(i)); resultInt = Math.round(result); //System.out.println(dataAfterPreprocessing.get(i)[6] + " " + arff.instance(i).stringValue(6)); if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) { nilaiBenar++; } } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println("nilai: " + nilaiBenar + " " + arff.numInstances()); double confident = nilaiBenar * 1.0 / arff.numInstances() * 100; System.out.println("Confident = " + confident + "%"); String[] result2 = new String[5]; return result2; }
From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java
public String j48(Instances arff) { tree = new J48(); try {//from www.j av a 2s.com tree.buildClassifier(arff); } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } return tree.toString(); }
From source file:DataMining_FP.interfaz.java
public static void inicializando_weka() { //Inicializando los objetos de weka try {/* w ww .j av a 2s . co m*/ reader = new BufferedReader(new FileReader("WISDM_ar_v1.1_transformed.arff")); data = new Instances(reader); reader.close(); // especificando el atributo de clase data.setClassIndex(data.numAttributes() - 1); String[] options = new String[1]; options[0] = "-U"; // unpruned tree tree = new J48(); // new instance of tree tree.setOptions(options); // set the options tree.buildClassifier(data); // build classifier } catch (Exception e) { System.out.println("Error inicializando los objetos de weka"); } System.out.println("Weka inicio bien"); }
From source file:de.fub.maps.project.detector.model.inference.impl.J48InferenceModel.java
License:Open Source License
@Override protected final synchronized Classifier createClassifier() { classifierJ48 = new J48(); configureClassifier();/*from w ww . ja v a 2s . co m*/ return classifierJ48; }
From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java
License:Apache License
private Classifier getClassifier() throws Exception { Classifier cl = null;/*from w w w . j a va 2 s. c o m*/ // Build and evaluate classifier // The options given correspond to the default settings in the WEKA GUI if (classifier.equals("smo")) { SMO smo = new SMO(); smo.setOptions(Utils.splitOptions( "-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\"")); cl = smo; } else if (classifier.equals("j48")) { J48 j48 = new J48(); j48.setOptions(new String[] { "-C", "0.25", "-M", "2" }); cl = j48; } else if (classifier.equals("naivebayes")) { cl = new NaiveBayes(); } else if (classifier.equals("randomforest")) { RandomForest rf = new RandomForest(); rf.setOptions(Utils.splitOptions("-I 10 -K 0 -S 1")); cl = rf; } return cl; }
From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java
License:Open Source License
public static Classifier getClassifier(WekaClassifier classifier) throws IllegalArgumentException { try {/*www.j a v a 2 s . co m*/ switch (classifier) { case NAIVE_BAYES: return new NaiveBayes(); case J48: J48 j48 = new J48(); j48.setOptions(new String[] { "-C", "0.25", "-M", "2" }); return j48; // case SMO: // SMO smo = new SMO(); // smo.setOptions(Utils.splitOptions("-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\"")); // return smo; // case LOGISTIC: // Logistic logistic = new Logistic(); // logistic.setOptions(Utils.splitOptions("-R 1.0E-8 -M -1")); // return logistic; default: throw new IllegalArgumentException("Classifier " + classifier + " not found!"); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
From source file:de.ugoe.cs.cpdp.dataprocessing.TopMetricFilter.java
License:Apache License
private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet) throws Exception { Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1]; IntStream.range(0, counts.length).forEach(val -> counts[val] = 0); for (Instances traindata : traindataSet) { J48 decisionTree = new J48(); decisionTree.buildClassifier(traindata); int k = 0; for (int j = 0; j < traindata.numAttributes(); j++) { if (j != traindata.classIndex()) { if (decisionTree.toString().contains(traindata.attribute(j).name())) { counts[k] = counts[k] + 1; }/*from www .j a v a 2 s .c o m*/ k++; } } } int[] topkIndex = new int[counts.length]; IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val); SortUtils.quicksort(counts, topkIndex, true); // get CFSs for each training set List<Set<Integer>> cfsSets = new LinkedList<>(); for (Instances traindata : traindataSet) { boolean selectionSuccessful = false; boolean secondAttempt = false; Instances traindataCopy = null; do { try { if (secondAttempt) { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(traindataCopy); Set<Integer> cfsSet = new HashSet<>(); for (int attr : attsel.selectedAttributes()) { cfsSet.add(attr); } cfsSets.add(cfsSet); selectionSuccessful = true; } else { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(traindata); Set<Integer> cfsSet = new HashSet<>(); for (int attr : attsel.selectedAttributes()) { cfsSet.add(attr); } cfsSets.add(cfsSet); selectionSuccessful = true; } } catch (IllegalArgumentException e) { String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*"; Pattern p = Pattern.compile(regex); Matcher m = p.matcher(e.getMessage()); if (!m.find()) { // cannot treat problem, rethrow exception throw e; } String attributeName = m.group(1); int attrIndex = traindata.attribute(attributeName).index(); if (secondAttempt) { traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex); } else { traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex); } Console.traceln(Level.FINE, "upscaled attribute " + attributeName + "; restarting training"); secondAttempt = true; continue; } } while (!selectionSuccessful); // dummy loop for internal continue } double[] coverages = new double[topkIndex.length]; for (Set<Integer> cfsSet : cfsSets) { Set<Integer> topkSet = new HashSet<>(); for (int k = 0; k < topkIndex.length; k++) { topkSet.add(topkIndex[k]); coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size()); } } double bestCoverageValue = Double.MIN_VALUE; int bestCoverageIndex = 0; for (int i = 0; i < coverages.length; i++) { if (coverages[i] > bestCoverageValue) { bestCoverageValue = coverages[i]; bestCoverageIndex = i; } } // build correlation matrix SpearmansCorrelation corr = new SpearmansCorrelation(); double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex]; for (Instances traindata : traindataSet) { double[][] vectors = new double[bestCoverageIndex][traindata.size()]; for (int i = 0; i < traindata.size(); i++) { for (int j = 0; j < bestCoverageIndex; j++) { vectors[j][i] = traindata.get(i).value(topkIndex[j]); } } for (int j = 0; j < bestCoverageIndex; j++) { for (int k = j + 1; k < bestCoverageIndex; k++) { correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k])); } } } Set<Integer> topkSetIndexSet = new TreeSet<>(); // j<30 ensures that the computational time does not explode since the powerset is 2^n in // complexity for (int j = 0; j < bestCoverageIndex && j < 30; j++) { topkSetIndexSet.add(j); } Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet); double bestOptCoverage = Double.MIN_VALUE; Set<Integer> opttopkSetIndexSet = null; for (Set<Integer> combination : allCombinations) { if (isUncorrelated(correlationMatrix, combination)) { double currentCoverage = 0.0; Set<Integer> topkCombination = new TreeSet<>(); for (Integer index : combination) { topkCombination.add(topkIndex[index]); } for (Set<Integer> cfsSet : cfsSets) { currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size()); } if (currentCoverage > bestOptCoverage) { bestOptCoverage = currentCoverage; opttopkSetIndexSet = combination; } } } Set<Integer> opttopkIndex = new TreeSet<>(); for (Integer index : opttopkSetIndexSet) { opttopkIndex.add(topkIndex[index]); } Console.traceln(Level.FINE, "selected the following metrics:"); for (Integer index : opttopkIndex) { Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name()); } // finally remove attributes for (int j = testdata.numAttributes() - 1; j >= 0; j--) { if (j != testdata.classIndex() && !opttopkIndex.contains(j)) { testdata.deleteAttributeAt(j); for (Instances traindata : traindataSet) { traindata.deleteAttributeAt(j); } } } }
From source file:de.ugoe.cs.cpdp.dataselection.DecisionTreeSelection.java
License:Apache License
@Override public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) { final Instances data = characteristicInstances(testdata, traindataSet); final ArrayList<String> attVals = new ArrayList<String>(); attVals.add("same"); attVals.add("more"); attVals.add("less"); final ArrayList<Attribute> atts = new ArrayList<Attribute>(); for (int j = 0; j < data.numAttributes(); j++) { atts.add(new Attribute(data.attribute(j).name(), attVals)); }/*from www. j a v a 2 s. c o m*/ atts.add(new Attribute("score")); Instances similarityData = new Instances("similarity", atts, 0); similarityData.setClassIndex(similarityData.numAttributes() - 1); try { Classifier classifier = new J48(); for (int i = 0; i < traindataSet.size(); i++) { classifier.buildClassifier(traindataSet.get(i)); for (int j = 0; j < traindataSet.size(); j++) { if (i != j) { double[] similarity = new double[data.numAttributes() + 1]; for (int k = 0; k < data.numAttributes(); k++) { if (0.9 * data.get(i + 1).value(k) > data.get(j + 1).value(k)) { similarity[k] = 2.0; } else if (1.1 * data.get(i + 1).value(k) < data.get(j + 1).value(k)) { similarity[k] = 1.0; } else { similarity[k] = 0.0; } } Evaluation eval = new Evaluation(traindataSet.get(j)); eval.evaluateModel(classifier, traindataSet.get(j)); similarity[data.numAttributes()] = eval.fMeasure(1); similarityData.add(new DenseInstance(1.0, similarity)); } } } REPTree repTree = new REPTree(); if (repTree.getNumFolds() > similarityData.size()) { repTree.setNumFolds(similarityData.size()); } repTree.setNumFolds(2); repTree.buildClassifier(similarityData); Instances testTrainSimilarity = new Instances(similarityData); testTrainSimilarity.clear(); for (int i = 0; i < traindataSet.size(); i++) { double[] similarity = new double[data.numAttributes() + 1]; for (int k = 0; k < data.numAttributes(); k++) { if (0.9 * data.get(0).value(k) > data.get(i + 1).value(k)) { similarity[k] = 2.0; } else if (1.1 * data.get(0).value(k) < data.get(i + 1).value(k)) { similarity[k] = 1.0; } else { similarity[k] = 0.0; } } testTrainSimilarity.add(new DenseInstance(1.0, similarity)); } int bestScoringProductIndex = -1; double maxScore = Double.MIN_VALUE; for (int i = 0; i < traindataSet.size(); i++) { double score = repTree.classifyInstance(testTrainSimilarity.get(i)); if (score > maxScore) { maxScore = score; bestScoringProductIndex = i; } } Instances bestScoringProduct = traindataSet.get(bestScoringProductIndex); traindataSet.clear(); traindataSet.add(bestScoringProduct); } catch (Exception e) { Console.printerr("failure during DecisionTreeSelection: " + e.getMessage()); throw new RuntimeException(e); } }
From source file:development.CrossValidateShapelets.java
public static ArrayList<Classifier> setSingleClassifiers(ArrayList<String> names) { ArrayList<Classifier> sc = new ArrayList<>(); kNN n = new kNN(50); n.setCrossValidate(true);//from w w w . ja v a 2s. c o m sc.add(n); names.add("kNN"); sc.add(new J48()); names.add("C45"); sc.add(new NaiveBayes()); names.add("NB"); BayesNet bn = new BayesNet(); sc.add(bn); names.add("BayesNet"); RandomForest rf = new RandomForest(); rf.setNumTrees(200); sc.add(rf); names.add("RandForest"); RotationForest rot = new RotationForest(); rot.setNumIterations(30); sc.add(rf); names.add("RotForest"); SMO svmL = new SMO(); PolyKernel kernel = new PolyKernel(); kernel.setExponent(1); svmL.setKernel(kernel); sc.add(svmL); names.add("SVML"); kernel = new PolyKernel(); kernel.setExponent(2); SMO svmQ = new SMO(); svmQ.setKernel(kernel); sc.add(svmQ); names.add("SVMQ"); return sc; }