Example usage for weka.classifiers.trees J48 J48

List of usage examples for weka.classifiers.trees J48 J48

Introduction

In this page you can find the example usage for weka.classifiers.trees J48 J48.

Prototype

J48

Source Link

Usage

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile6(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial6.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES/*from w  w w . j  a v  a2s.  com*/
    Attribute attr[] = new Attribute[50];

    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");
    attr[10] = new Attribute("BLPos");
    attr[11] = new Attribute("BLNeg");
    attr[12] = new Attribute("VSPositive");
    attr[13] = new Attribute("VSNegative");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[14] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);
    attrs.addElement(attr[11]);
    attrs.addElement(attr[12]);
    attrs.addElement(attr[13]);
    attrs.addElement(attr[14]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(15);
        for (int j = 0; j < 15; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 14)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(14);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndblAndvs.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(14);
    test.setClassIndex(14);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:DataMiningLogHistoriKIRI.DecisionTree.java

public String[] id3(Instances arff) {
    J48 tree = new J48();
    try {//w  w  w  .  j ava  2s  . c  o m
        tree.buildClassifier(arff);
    } catch (Exception ex) {
        Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
    }
    System.out.println(tree.toString());

    int nilaiBenar = 0, resultInt;
    float result = 0;
    for (int i = 0; i < arff.numInstances(); i++) {
        try {
            result = (float) tree.classifyInstance(arff.instance(i));
            resultInt = Math.round(result);
            //System.out.println(dataAfterPreprocessing.get(i)[6] + " " + arff.instance(i).stringValue(6));
            if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) {
                nilaiBenar++;
            }
        } catch (Exception ex) {
            Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println("nilai: " + nilaiBenar + " " + arff.numInstances());
    double confident = nilaiBenar * 1.0 / arff.numInstances() * 100;
    System.out.println("Confident = " + confident + "%");

    String[] result2 = new String[5];
    return result2;
}

From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java

public String j48(Instances arff) {
    tree = new J48();
    try {//from www.j av a 2s.com
        tree.buildClassifier(arff);
    } catch (Exception ex) {
        Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
    }

    return tree.toString();
}

From source file:DataMining_FP.interfaz.java

public static void inicializando_weka() {
    //Inicializando los objetos de weka
    try {/*  w  ww .j av a 2s  .  co m*/
        reader = new BufferedReader(new FileReader("WISDM_ar_v1.1_transformed.arff"));
        data = new Instances(reader);
        reader.close();

        // especificando el atributo de clase
        data.setClassIndex(data.numAttributes() - 1);

        String[] options = new String[1];
        options[0] = "-U"; // unpruned tree
        tree = new J48(); // new instance of tree
        tree.setOptions(options); // set the options
        tree.buildClassifier(data); // build classifier
    } catch (Exception e) {
        System.out.println("Error inicializando los objetos de weka");
    }
    System.out.println("Weka inicio bien");
}

From source file:de.fub.maps.project.detector.model.inference.impl.J48InferenceModel.java

License:Open Source License

@Override
protected final synchronized Classifier createClassifier() {
    classifierJ48 = new J48();
    configureClassifier();/*from  w  ww . ja v a  2s  . co  m*/
    return classifierJ48;
}

From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java

License:Apache License

private Classifier getClassifier() throws Exception {
    Classifier cl = null;/*from w  w w . j a  va  2  s. c o  m*/
    // Build and evaluate classifier
    // The options given correspond to the default settings in the WEKA GUI
    if (classifier.equals("smo")) {
        SMO smo = new SMO();
        smo.setOptions(Utils.splitOptions(
                "-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
        cl = smo;
    } else if (classifier.equals("j48")) {
        J48 j48 = new J48();
        j48.setOptions(new String[] { "-C", "0.25", "-M", "2" });
        cl = j48;
    } else if (classifier.equals("naivebayes")) {
        cl = new NaiveBayes();
    } else if (classifier.equals("randomforest")) {
        RandomForest rf = new RandomForest();
        rf.setOptions(Utils.splitOptions("-I 10 -K 0 -S 1"));
        cl = rf;
    }
    return cl;
}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java

License:Open Source License

public static Classifier getClassifier(WekaClassifier classifier) throws IllegalArgumentException {
    try {/*www.j  a v a 2 s .  co m*/
        switch (classifier) {
        case NAIVE_BAYES:
            return new NaiveBayes();
        case J48:
            J48 j48 = new J48();
            j48.setOptions(new String[] { "-C", "0.25", "-M", "2" });
            return j48;
        //            case SMO:
        //               SMO smo = new SMO();
        //               smo.setOptions(Utils.splitOptions("-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
        //               return smo;
        //            case LOGISTIC:
        //               Logistic logistic = new Logistic();
        //               logistic.setOptions(Utils.splitOptions("-R 1.0E-8 -M -1"));
        //               return logistic;
        default:
            throw new IllegalArgumentException("Classifier " + classifier + " not found!");
        }
    } catch (Exception e) {
        throw new IllegalArgumentException(e);
    }

}

From source file:de.ugoe.cs.cpdp.dataprocessing.TopMetricFilter.java

License:Apache License

private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet)
        throws Exception {
    Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1];
    IntStream.range(0, counts.length).forEach(val -> counts[val] = 0);
    for (Instances traindata : traindataSet) {
        J48 decisionTree = new J48();
        decisionTree.buildClassifier(traindata);
        int k = 0;
        for (int j = 0; j < traindata.numAttributes(); j++) {
            if (j != traindata.classIndex()) {
                if (decisionTree.toString().contains(traindata.attribute(j).name())) {
                    counts[k] = counts[k] + 1;
                }/*from   www  .j  a v  a  2 s .c  o  m*/
                k++;
            }
        }
    }
    int[] topkIndex = new int[counts.length];
    IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val);
    SortUtils.quicksort(counts, topkIndex, true);

    // get CFSs for each training set
    List<Set<Integer>> cfsSets = new LinkedList<>();
    for (Instances traindata : traindataSet) {
        boolean selectionSuccessful = false;
        boolean secondAttempt = false;
        Instances traindataCopy = null;
        do {
            try {
                if (secondAttempt) {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindataCopy);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                } else {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindata);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                }
            } catch (IllegalArgumentException e) {
                String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*";
                Pattern p = Pattern.compile(regex);
                Matcher m = p.matcher(e.getMessage());
                if (!m.find()) {
                    // cannot treat problem, rethrow exception
                    throw e;
                }
                String attributeName = m.group(1);
                int attrIndex = traindata.attribute(attributeName).index();
                if (secondAttempt) {
                    traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex);
                } else {
                    traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex);
                }
                Console.traceln(Level.FINE, "upscaled attribute " + attributeName + "; restarting training");
                secondAttempt = true;
                continue;
            }
        } while (!selectionSuccessful); // dummy loop for internal continue
    }

    double[] coverages = new double[topkIndex.length];
    for (Set<Integer> cfsSet : cfsSets) {
        Set<Integer> topkSet = new HashSet<>();
        for (int k = 0; k < topkIndex.length; k++) {
            topkSet.add(topkIndex[k]);
            coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size());
        }
    }
    double bestCoverageValue = Double.MIN_VALUE;
    int bestCoverageIndex = 0;
    for (int i = 0; i < coverages.length; i++) {
        if (coverages[i] > bestCoverageValue) {
            bestCoverageValue = coverages[i];
            bestCoverageIndex = i;
        }
    }
    // build correlation matrix
    SpearmansCorrelation corr = new SpearmansCorrelation();
    double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex];
    for (Instances traindata : traindataSet) {
        double[][] vectors = new double[bestCoverageIndex][traindata.size()];
        for (int i = 0; i < traindata.size(); i++) {
            for (int j = 0; j < bestCoverageIndex; j++) {
                vectors[j][i] = traindata.get(i).value(topkIndex[j]);
            }
        }
        for (int j = 0; j < bestCoverageIndex; j++) {
            for (int k = j + 1; k < bestCoverageIndex; k++) {
                correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k]));
            }
        }
    }
    Set<Integer> topkSetIndexSet = new TreeSet<>();
    // j<30 ensures that the computational time does not explode since the powerset is 2^n in
    // complexity
    for (int j = 0; j < bestCoverageIndex && j < 30; j++) {
        topkSetIndexSet.add(j);
    }
    Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet);
    double bestOptCoverage = Double.MIN_VALUE;
    Set<Integer> opttopkSetIndexSet = null;
    for (Set<Integer> combination : allCombinations) {
        if (isUncorrelated(correlationMatrix, combination)) {
            double currentCoverage = 0.0;
            Set<Integer> topkCombination = new TreeSet<>();
            for (Integer index : combination) {
                topkCombination.add(topkIndex[index]);
            }
            for (Set<Integer> cfsSet : cfsSets) {
                currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size());
            }
            if (currentCoverage > bestOptCoverage) {
                bestOptCoverage = currentCoverage;
                opttopkSetIndexSet = combination;
            }
        }
    }
    Set<Integer> opttopkIndex = new TreeSet<>();
    for (Integer index : opttopkSetIndexSet) {
        opttopkIndex.add(topkIndex[index]);
    }
    Console.traceln(Level.FINE, "selected the following metrics:");
    for (Integer index : opttopkIndex) {
        Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name());
    }
    // finally remove attributes
    for (int j = testdata.numAttributes() - 1; j >= 0; j--) {
        if (j != testdata.classIndex() && !opttopkIndex.contains(j)) {
            testdata.deleteAttributeAt(j);
            for (Instances traindata : traindataSet) {
                traindata.deleteAttributeAt(j);
            }
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataselection.DecisionTreeSelection.java

License:Apache License

@Override
public void apply(Instances testdata, SetUniqueList<Instances> traindataSet) {
    final Instances data = characteristicInstances(testdata, traindataSet);

    final ArrayList<String> attVals = new ArrayList<String>();
    attVals.add("same");
    attVals.add("more");
    attVals.add("less");
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();
    for (int j = 0; j < data.numAttributes(); j++) {
        atts.add(new Attribute(data.attribute(j).name(), attVals));
    }/*from www. j  a  v  a 2 s.  c o  m*/
    atts.add(new Attribute("score"));
    Instances similarityData = new Instances("similarity", atts, 0);
    similarityData.setClassIndex(similarityData.numAttributes() - 1);

    try {
        Classifier classifier = new J48();
        for (int i = 0; i < traindataSet.size(); i++) {
            classifier.buildClassifier(traindataSet.get(i));
            for (int j = 0; j < traindataSet.size(); j++) {
                if (i != j) {
                    double[] similarity = new double[data.numAttributes() + 1];
                    for (int k = 0; k < data.numAttributes(); k++) {
                        if (0.9 * data.get(i + 1).value(k) > data.get(j + 1).value(k)) {
                            similarity[k] = 2.0;
                        } else if (1.1 * data.get(i + 1).value(k) < data.get(j + 1).value(k)) {
                            similarity[k] = 1.0;
                        } else {
                            similarity[k] = 0.0;
                        }
                    }

                    Evaluation eval = new Evaluation(traindataSet.get(j));
                    eval.evaluateModel(classifier, traindataSet.get(j));
                    similarity[data.numAttributes()] = eval.fMeasure(1);
                    similarityData.add(new DenseInstance(1.0, similarity));
                }
            }
        }
        REPTree repTree = new REPTree();
        if (repTree.getNumFolds() > similarityData.size()) {
            repTree.setNumFolds(similarityData.size());
        }
        repTree.setNumFolds(2);
        repTree.buildClassifier(similarityData);

        Instances testTrainSimilarity = new Instances(similarityData);
        testTrainSimilarity.clear();
        for (int i = 0; i < traindataSet.size(); i++) {
            double[] similarity = new double[data.numAttributes() + 1];
            for (int k = 0; k < data.numAttributes(); k++) {
                if (0.9 * data.get(0).value(k) > data.get(i + 1).value(k)) {
                    similarity[k] = 2.0;
                } else if (1.1 * data.get(0).value(k) < data.get(i + 1).value(k)) {
                    similarity[k] = 1.0;
                } else {
                    similarity[k] = 0.0;
                }
            }
            testTrainSimilarity.add(new DenseInstance(1.0, similarity));
        }

        int bestScoringProductIndex = -1;
        double maxScore = Double.MIN_VALUE;
        for (int i = 0; i < traindataSet.size(); i++) {
            double score = repTree.classifyInstance(testTrainSimilarity.get(i));
            if (score > maxScore) {
                maxScore = score;
                bestScoringProductIndex = i;
            }
        }
        Instances bestScoringProduct = traindataSet.get(bestScoringProductIndex);
        traindataSet.clear();
        traindataSet.add(bestScoringProduct);
    } catch (Exception e) {
        Console.printerr("failure during DecisionTreeSelection: " + e.getMessage());
        throw new RuntimeException(e);
    }
}

From source file:development.CrossValidateShapelets.java

public static ArrayList<Classifier> setSingleClassifiers(ArrayList<String> names) {
    ArrayList<Classifier> sc = new ArrayList<>();
    kNN n = new kNN(50);
    n.setCrossValidate(true);//from   w  w w . ja  v  a  2s. c  o m
    sc.add(n);
    names.add("kNN");
    sc.add(new J48());
    names.add("C45");
    sc.add(new NaiveBayes());
    names.add("NB");
    BayesNet bn = new BayesNet();
    sc.add(bn);
    names.add("BayesNet");
    RandomForest rf = new RandomForest();
    rf.setNumTrees(200);
    sc.add(rf);
    names.add("RandForest");
    RotationForest rot = new RotationForest();
    rot.setNumIterations(30);
    sc.add(rf);
    names.add("RotForest");
    SMO svmL = new SMO();
    PolyKernel kernel = new PolyKernel();
    kernel.setExponent(1);
    svmL.setKernel(kernel);
    sc.add(svmL);
    names.add("SVML");
    kernel = new PolyKernel();
    kernel.setExponent(2);
    SMO svmQ = new SMO();
    svmQ.setKernel(kernel);
    sc.add(svmQ);
    names.add("SVMQ");
    return sc;
}