Example usage for weka.classifiers.trees J48 J48

Introduction

In this page you can find the example usage for weka.classifiers.trees J48 J48.

Prototype

J48

Source Link

Usage

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile6(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial6.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES/*from w  w w . j  a v  a2s.  com*/
    Attribute attr[] = new Attribute[50];

    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");
    attr[10] = new Attribute("BLPos");
    attr[11] = new Attribute("BLNeg");
    attr[12] = new Attribute("VSPositive");
    attr[13] = new Attribute("VSNegative");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[14] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);
    attrs.addElement(attr[11]);
    attrs.addElement(attr[12]);
    attrs.addElement(attr[13]);
    attrs.addElement(attr[14]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(15);
        for (int j = 0; j < 15; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 14)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(14);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndblAndvs.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(14);
    test.setClassIndex(14);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:DataMiningLogHistoriKIRI.DecisionTree.java

public String[] id3(Instances arff) {
    J48 tree = new J48();
    try {//w  w  w  .  j ava  2s  . c  o m
        tree.buildClassifier(arff);
    } catch (Exception ex) {
        Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
    }
    System.out.println(tree.toString());

    int nilaiBenar = 0, resultInt;
    float result = 0;
    for (int i = 0; i < arff.numInstances(); i++) {
        try {
            result = (float) tree.classifyInstance(arff.instance(i));
            resultInt = Math.round(result);
            //System.out.println(dataAfterPreprocessing.get(i)[6] + " " + arff.instance(i).stringValue(6));
            if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) {
                nilaiBenar++;
            }
        } catch (Exception ex) {
            Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println("nilai: " + nilaiBenar + " " + arff.numInstances());
    double confident = nilaiBenar * 1.0 / arff.numInstances() * 100;
    System.out.println("Confident = " + confident + "%");

    String[] result2 = new String[5];
    return result2;
}

From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java

public String j48(Instances arff) {
    tree = new J48();
    try {//from www.j av a 2s.com
        tree.buildClassifier(arff);
    } catch (Exception ex) {
        Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
    }

    return tree.toString();
}

From source file:DataMining_FP.interfaz.java

public static void inicializando_weka() {
    //Inicializando los objetos de weka
    try {/*  w  ww .j av a 2s  .  co m*/
        reader = new BufferedReader(new FileReader("WISDM_ar_v1.1_transformed.arff"));
        data = new Instances(reader);
        reader.close();

        // especificando el atributo de clase
        data.setClassIndex(data.numAttributes() - 1);

        String[] options = new String[1];
        options[0] = "-U"; // unpruned tree
        tree = new J48(); // new instance of tree
        tree.setOptions(options); // set the options
        tree.buildClassifier(data); // build classifier
    } catch (Exception e) {
        System.out.println("Error inicializando los objetos de weka");
    }
    System.out.println("Weka inicio bien");
}

From source file:de.fub.maps.project.detector.model.inference.impl.J48InferenceModel.java

License:Open Source License

@Override
protected final synchronized Classifier createClassifier() {
    classifierJ48 = new J48();
    configureClassifier();/*from  w  ww . ja v a  2s  . co  m*/
    return classifierJ48;
}

From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java

License:Apache License

private Classifier getClassifier() throws Exception {
    Classifier cl = null;/*from w  w w . j a  va  2  s. c o  m*/
    // Build and evaluate classifier
    // The options given correspond to the default settings in the WEKA GUI
    if (classifier.equals("smo")) {
        SMO smo = new SMO();
        smo.setOptions(Utils.splitOptions(
                "-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
        cl = smo;
    } else if (classifier.equals("j48")) {
        J48 j48 = new J48();
        j48.setOptions(new String[] { "-C", "0.25", "-M", "2" });
        cl = j48;
    } else if (classifier.equals("naivebayes")) {
        cl = new NaiveBayes();
    } else if (classifier.equals("randomforest")) {
        RandomForest rf = new RandomForest();
        rf.setOptions(Utils.splitOptions("-I 10 -K 0 -S 1"));
        cl = rf;
    }
    return cl;
}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java

License:Open Source License

public static Classifier getClassifier(WekaClassifier classifier) throws IllegalArgumentException {
    try {/*www.j  a v a 2 s .  co m*/
        switch (classifier) {
        case NAIVE_BAYES:
            return new NaiveBayes();
        case J48:
            J48 j48 = new J48();
            j48.setOptions(new String[] { "-C", "0.25", "-M", "2" });
            return j48;
        //            case SMO:
        //               SMO smo = new SMO();
        //               smo.setOptions(Utils.splitOptions("-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
        //               return smo;
        //            case LOGISTIC:
        //               Logistic logistic = new Logistic();
        //               logistic.setOptions(Utils.splitOptions("-R 1.0E-8 -M -1"));
        //               return logistic;
        default:
            throw new IllegalArgumentException("Classifier " + classifier + " not found!");
        }
    } catch (Exception e) {
        throw new IllegalArgumentException(e);
    }

}

From source file:de.ugoe.cs.cpdp.dataprocessing.TopMetricFilter.java

License:Apache License

private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet)
        throws Exception {
    Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1];
    IntStream.range(0, counts.length).forEach(val -> counts[val] = 0);
    for (Instances traindata : traindataSet) {
        J48 decisionTree = new J48();
        decisionTree.buildClassifier(traindata);
        int k = 0;
        for (int j = 0; j < traindata.numAttributes(); j++) {
            if (j != traindata.classIndex()) {
                if (decisionTree.toString().contains(traindata.attribute(j).name())) {
                    counts[k] = counts[k] + 1;
                }/*from   www  .j  a v  a  2 s .c  o  m*/
                k++;
            }
        }
    }
    int[] topkIndex = new int[counts.length];
    IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val);
    SortUtils.quicksort(counts, topkIndex, true);

    // get CFSs for each training set
    List<Set<Integer>> cfsSets = new LinkedList<>();
    for (Instances traindata : traindataSet) {
        boolean selectionSuccessful = false;
        boolean secondAttempt = false;
        Instances traindataCopy = null;
        do {
            try {
                if (secondAttempt) {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindataCopy);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                } else {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindata);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                }
            } catch (IllegalArgumentException e) {
                String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*";
                Pattern p = Pattern.compile(regex);
                Matcher m = p.matcher(e.getMessage());
                if (!m.find()) {
                    // cannot treat problem, rethrow exception
                    throw e;
                }
                String attributeName = m.group(1);
                int attrIndex = traindata.attribute(attributeName).index();
                if (secondAttempt) {
                    traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex);
                } else {
                    traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex);
                }
                Console.traceln(Level.FINE, "upscaled attribute " + attributeName + "; restarting training");
                secondAttempt = true;
                continue;
            }
        } while (!selectionSuccessful); // dummy loop for internal continue
    }

    double[] coverages = new double[topkIndex.length];
    for (Set<Integer> cfsSet : cfsSets) {
        Set<Integer> topkSet = new HashSet<>();
        for (int k = 0; k < topkIndex.length; k++) {
            topkSet.add(topkIndex[k]);
            coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size());
        }
    }
    double bestCoverageValue = Double.MIN_VALUE;
    int bestCoverageIndex = 0;
    for (int i = 0; i < coverages.length; i++) {
        if (coverages[i] > bestCoverageValue) {
            bestCoverageValue = coverages[i];
            bestCoverageIndex = i;
        }
    }
    // build correlation matrix
    SpearmansCorrelation corr = new SpearmansCorrelation();
    double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex];
    for (Instances traindata : traindataSet) {
        double[][] vectors = new double[bestCoverageIndex][traindata.size()];
        for (int i = 0; i < traindata.size(); i++) {
            for (int j = 0; j < bestCoverageIndex; j++) {
                vectors[j][i] = traindata.get(i).value(topkIndex[j]);
            }
        }
        for (int j = 0; j < bestCoverageIndex; j++) {
            for (int k = j + 1; k < bestCoverageIndex; k++) {
                correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k]));
            }
        }
    }
    Set<Integer> topkSetIndexSet = new TreeSet<>();
    // j<30 ensures that the computational time does not explode since the powerset is 2^n in
    // complexity
    for (int j = 0; j < bestCoverageIndex && j < 30; j++) {
        topkSetIndexSet.add(j);
    }
    Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet);
    double bestOptCoverage = Double.MIN_VALUE;
    Set<Integer> opttopkSetIndexSet = null;
    for (Set<Integer> combination : allCombinations) {
        if (isUncorrelated(correlationMatrix, combination)) {
            double currentCoverage = 0.0;
            Set<Integer> topkCombination = new TreeSet<>();
            for (Integer index : combination) {
                topkCombination.add(topkIndex[index]);
            }
            for (Set<Integer> cfsSet : cfsSets) {
                currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size());
            }
            if (currentCoverage > bestOptCoverage) {
                bestOptCoverage = currentCoverage;
                opttopkSetIndexSet = combination;
            }
        }
    }
    Set<Integer> opttopkIndex = new TreeSet<>();
    for (Integer index : opttopkSetIndexSet) {
        opttopkIndex.add(topkIndex[index]);
    }
    Console.traceln(Level.FINE, "selected the following metrics:");
    for (Integer index : opttopkIndex) {
        Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name());
    }
    // finally remove attributes
    for (int j = testdata.numAttributes() - 1; j >= 0; j--) {
        if (j != testdata.classIndex() && !opttopkIndex.contains(j)) {
            testdata.deleteAttributeAt(j);
            for (Instances traindata : traindataSet) {
                traindata.deleteAttributeAt(j);
            }
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.DecisionTreeSelection.java

License:Apache License

@Override
public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
    final Instances data = characteristicInstances(testdata, traindataSet);

    final ArrayList<String> attVals = new ArrayList<String>();
    attVals.add("same");
    attVals.add("more");
    attVals.add("less");
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();
    for (int j = 0; j < data.numAttributes(); j++) {
        atts.add(new Attribute(data.attribute(j).name(), attVals));
    }/*from www. j  a  v  a 2 s.  c o  m*/
    atts.add(new Attribute("score"));
    Instances similarityData = new Instances("similarity", atts, 0);
    similarityData.setClassIndex(similarityData.numAttributes() - 1);

    try {
        Classifier classifier = new J48();
        for (int i = 0; i < traindataSet.size(); i++) {
            classifier.buildClassifier(traindataSet.get(i));
            for (int j = 0; j < traindataSet.size(); j++) {
                if (i != j) {
                    double[] similarity = new double[data.numAttributes() + 1];
                    for (int k = 0; k < data.numAttributes(); k++) {
                        if (0.9 * data.get(i + 1).value(k) > data.get(j + 1).value(k)) {
                            similarity[k] = 2.0;
                        } else if (1.1 * data.get(i + 1).value(k) < data.get(j + 1).value(k)) {
                            similarity[k] = 1.0;
                        } else {
                            similarity[k] = 0.0;
                        }
                    }

                    Evaluation eval = new Evaluation(traindataSet.get(j));
                    eval.evaluateModel(classifier, traindataSet.get(j));
                    similarity[data.numAttributes()] = eval.fMeasure(1);
                    similarityData.add(new DenseInstance(1.0, similarity));
                }
            }
        }
        REPTree repTree = new REPTree();
        if (repTree.getNumFolds() > similarityData.size()) {
            repTree.setNumFolds(similarityData.size());
        }
        repTree.setNumFolds(2);
        repTree.buildClassifier(similarityData);

        Instances testTrainSimilarity = new Instances(similarityData);
        testTrainSimilarity.clear();
        for (int i = 0; i < traindataSet.size(); i++) {
            double[] similarity = new double[data.numAttributes() + 1];
            for (int k = 0; k < data.numAttributes(); k++) {
                if (0.9 * data.get(0).value(k) > data.get(i + 1).value(k)) {
                    similarity[k] = 2.0;
                } else if (1.1 * data.get(0).value(k) < data.get(i + 1).value(k)) {
                    similarity[k] = 1.0;
                } else {
                    similarity[k] = 0.0;
                }
            }
            testTrainSimilarity.add(new DenseInstance(1.0, similarity));
        }

        int bestScoringProductIndex = -1;
        double maxScore = Double.MIN_VALUE;
        for (int i = 0; i < traindataSet.size(); i++) {
            double score = repTree.classifyInstance(testTrainSimilarity.get(i));
            if (score > maxScore) {
                maxScore = score;
                bestScoringProductIndex = i;
            }
        }
        Instances bestScoringProduct = traindataSet.get(bestScoringProductIndex);
        traindataSet.clear();
        traindataSet.add(bestScoringProduct);
    } catch (Exception e) {
        Console.printerr("failure during DecisionTreeSelection: " + e.getMessage());
        throw new RuntimeException(e);
    }
}

From source file:development.CrossValidateShapelets.java

public static ArrayList<Classifier> setSingleClassifiers(ArrayList<String> names) {
    ArrayList<Classifier> sc = new ArrayList<>();
    kNN n = new kNN(50);
    n.setCrossValidate(true);//from   w  w w . ja  v  a  2s. c  o m
    sc.add(n);
    names.add("kNN");
    sc.add(new J48());
    names.add("C45");
    sc.add(new NaiveBayes());
    names.add("NB");
    BayesNet bn = new BayesNet();
    sc.add(bn);
    names.add("BayesNet");
    RandomForest rf = new RandomForest();
    rf.setNumTrees(200);
    sc.add(rf);
    names.add("RandForest");
    RotationForest rot = new RotationForest();
    rot.setNumIterations(30);
    sc.add(rf);
    names.add("RotForest");
    SMO svmL = new SMO();
    PolyKernel kernel = new PolyKernel();
    kernel.setExponent(1);
    svmL.setKernel(kernel);
    sc.add(svmL);
    names.add("SVML");
    kernel = new PolyKernel();
    kernel.setExponent(2);
    SMO svmQ = new SMO();
    svmQ.setKernel(kernel);
    sc.add(svmQ);
    names.add("SVMQ");
    return sc;
}