Example usage for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex)

Source Link

Document

Sets the class index of the set.

Usage

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * /*  www  .  j av a 2s . c o  m*/
 * @param wekaClusterer
 * @param instances
 * @return
 * @throws Exception
 */
public static List<IndexedInstance> computeClusters(final Clusterer wekaClusterer, final Instances instances)
        throws Exception {
    final Instances ii = new Instances(instances);
    ii.setClassIndex(-1);

    wekaClusterer.buildClusterer(ii);

    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(wekaClusterer);
    eval.evaluateClusterer(ii);

    final int clustersCount = eval.getNumClusters();
    final List<IndexedInstance> clustersList = new ArrayList<IndexedInstance>(clustersCount);

    //Initialize instances
    for (int k = 0; k < clustersCount; k++) {
        clustersList.add(new IndexedInstance(new Instances(instances, 0), new HashMap<Integer, Integer>()));
    }

    final double[] ass = eval.getClusterAssignments();
    if (ass.length != ii.numInstances())
        throw new IllegalStateException();
    for (int i = 0; i < ass.length; i++) {
        IndexedInstance idxi = clustersList.get((int) ass[i]);
        idxi.getInstances().add(instances.instance(i));
        int pos = idxi.getInstances().size() - 1;
        idxi.getMapOrigIndex().put(pos, i);
    }

    return clustersList;
}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

/**
 * /*from  w w  w  .  j a  v  a  2s.c o  m*/
 */
private static KmeansResult getSimplifiedInstances(final Instances instances, final DistanceFunction df,
        final int maxInstances) throws Exception {
    Instances centroids = null;
    List<Instances> clusters = null;

    final int savedClassIndex = instances.classIndex();
    instances.setClassIndex(-1);
    final SimpleKMeans clusterer = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(maxInstances, df);
    clusterer.buildClusterer(instances);
    clusters = WekaMachineLearningUtil.computeClusters(clusterer, instances).getClustersList();
    instances.setClassIndex(savedClassIndex);
    final int numClusters = clusters.size();
    //Set class index for each cluster instances
    //System.out.println("Setting class index to each cluster : " + savedClassIndex);
    for (int i = 0; i < numClusters; i++) {
        clusters.get(i).setClassIndex(savedClassIndex);
    }
    //Save centroids
    centroids = clusterer.getClusterCentroids();

    return new KmeansResult(centroids, clusters);
}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

public static void main(String[] args) throws Exception {
    final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(
            //new File("./samples/csv/uci/zoo.csv"));
            new File("./samples/csv/test50bis.csv"));
    //new File("./samples/arff/UCI/cmc.arff"));
    //new File("./samples/csv/direct-marketing-bank-reduced.csv"));
    //new File("./samples/csv/bank.csv"));
    //new File("./samples/csv/preswissroll.csv"));
    //new File("./samples/csv/preswissroll-mod4.csv"));

    ds.setClassIndex(-1);

    final int N = ds.size();
    final int M = ds.numAttributes();
    SimpleMatrix dist = new SimpleMatrix(N, N);

    for (int i = 0; i < N; i++) {
        for (int j = i + 1; j < N; j++) {
            Instance xi = ds.instance(i);
            Instance xj = ds.instance(j);
            double d = 0, s = 0, a = 0, b = 0;
            for (int k = 1; k < M; k++) {
                s += xi.value(k) * xj.value(k);
                a += xi.value(k) * xi.value(k);
                b += xi.value(k) * xi.value(k);
            }/* w ww  .  j  a  v a2 s  .  com*/
            d = 1 - s / (Math.sqrt(a) * Math.sqrt(b));
            dist.set(i, j, d);
            dist.set(j, i, d);
        }
    }

    final MDSResult res = ClassicMDS.doMDSV1(ds, dist);

    JXPanel p2 = MDSViewBuilder.buildMDSViewFromDataSet(ds, res, 5000, null);
    p2.setPreferredSize(new Dimension(800, 600));

    final JXFrame f = new JXFrame();
    f.setPreferredSize(new Dimension(1024, 768));
    final Container c = f.getContentPane();
    c.add(p2);
    f.pack();
    f.setVisible(true);
    f.setDefaultCloseOperation(JXFrame.EXIT_ON_CLOSE);

    System.out.println("Kruskal stress : =" + getKruskalStressFromMDSResult(res));

}

From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java

License:Open Source License

/**
 * /*from  ww  w .  j  av  a  2s.co m*/
 */
private static void buildFilteredSeries(final MDSResult mdsResult, final XYPlot xyPlot,
        final String... attrNameToUseAsPointTitle) throws Exception {

    final CollapsedInstances distMdsRes = mdsResult.getCInstances();
    final Instances instances = distMdsRes.getInstances();

    final SimpleMatrix coordinates = mdsResult.getCoordinates();

    final Instances collapsedInstances = mdsResult.getCollapsedInstances();
    int maxSize = 0;
    if (distMdsRes.isCollapsed()) {
        final List<Instances> clusters = distMdsRes.getCentroidMap().getClusters();
        final int nbCentroids = clusters.size();
        maxSize = clusters.get(0).size();
        for (int i = 1; i < nbCentroids; i++) {
            final int currentSize = clusters.get(i).size();
            if (currentSize > maxSize) {
                maxSize = currentSize;
            }
        }
    }

    Attribute clsAttribute = null;
    int nbClass = 1;
    if (instances.classIndex() != -1) {
        clsAttribute = instances.classAttribute();
        nbClass = clsAttribute.numValues();
    }
    final XYSeriesCollection dataset = (XYSeriesCollection) xyPlot.getDataset();
    final int fMaxSize = maxSize;

    final List<XYSeries> lseries = new ArrayList<XYSeries>();

    //No class : add one dummy serie
    if (nbClass <= 1) {
        lseries.add(new XYSeries("Serie #1", false));
    } else {
        //Some class : add one serie per class
        for (int i = 0; i < nbClass; i++) {
            lseries.add(new XYSeries(clsAttribute.value(i), false));
        }
    }
    dataset.removeAllSeries();

    /**
     * Initialize filtered series
     */
    final List<Instances> filteredInstances = new ArrayList<Instances>();
    for (int i = 0; i < lseries.size(); i++) {
        filteredInstances.add(new Instances(collapsedInstances, 0));
    }

    final Map<Tuple<Integer, Integer>, Integer> correspondanceMap = new HashMap<Tuple<Integer, Integer>, Integer>();
    for (int i = 0; i < collapsedInstances.numInstances(); i++) {
        final Instance oInst = collapsedInstances.instance(i);
        int indexOfSerie = 0;
        if (oInst.classIndex() != -1) {
            if (distMdsRes.isCollapsed()) {
                indexOfSerie = getStrongestClass(i, distMdsRes);
            } else {
                indexOfSerie = (int) oInst.value(oInst.classAttribute());
            }
        }
        lseries.get(indexOfSerie).add(coordinates.get(i, 0), coordinates.get(i, 1));

        filteredInstances.get(indexOfSerie).add(oInst);
        if (distMdsRes.isCollapsed()) {
            correspondanceMap.put(new Tuple<Integer, Integer>(indexOfSerie,
                    filteredInstances.get(indexOfSerie).numInstances() - 1), i);
        }
    }

    final List<Paint> colors = new ArrayList<Paint>();

    for (final XYSeries series : lseries) {
        dataset.addSeries(series);
    }

    if (distMdsRes.isCollapsed()) {
        final XYLineAndShapeRenderer xyRenderer = new XYLineAndShapeRenderer(false, true) {
            private static final long serialVersionUID = -6019883886470934528L;

            @Override
            public void drawItem(Graphics2D g2, XYItemRendererState state, java.awt.geom.Rectangle2D dataArea,
                    PlotRenderingInfo info, XYPlot plot, ValueAxis domainAxis, ValueAxis rangeAxis,
                    XYDataset dataset, int series, int item, CrosshairState crosshairState, int pass) {

                if (distMdsRes.isCollapsed()) {

                    final Integer centroidIndex = correspondanceMap
                            .get(new Tuple<Integer, Integer>(series, item));
                    final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex);
                    int size = cluster.size();

                    final int shapeSize = (int) (MAX_POINT_SIZE * size / fMaxSize + 1);

                    final double x1 = plot.getDataset().getX(series, item).doubleValue();
                    final double y1 = plot.getDataset().getY(series, item).doubleValue();

                    Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>();
                    mapRepartition.put("No class", size);
                    if (cluster.classIndex() != -1) {
                        mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster);
                    }

                    final RectangleEdge xAxisLocation = plot.getDomainAxisEdge();
                    final RectangleEdge yAxisLocation = plot.getRangeAxisEdge();
                    final double fx = domainAxis.valueToJava2D(x1, dataArea, xAxisLocation);
                    final double fy = rangeAxis.valueToJava2D(y1, dataArea, yAxisLocation);

                    setSeriesShape(series,
                            new Ellipse2D.Double(-shapeSize / 2, -shapeSize / 2, shapeSize, shapeSize));

                    super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series,
                            item, crosshairState, pass);

                    //Draw pie
                    if (ENABLE_PIE_SHART) {
                        createPieChart(g2, (int) (fx - shapeSize / 2), (int) (fy - shapeSize / 2), shapeSize,
                                mapRepartition, size, colors);
                    }

                } else {

                    super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series,
                            item, crosshairState, pass);

                }

            }

        };

        xyPlot.setRenderer(xyRenderer);
    }

    final XYToolTipGenerator gen = new XYToolTipGenerator() {
        @Override
        public String generateToolTip(XYDataset dataset, int series, int item) {
            if (distMdsRes.isCollapsed()) {
                final StringBuilder res = new StringBuilder("<html>");
                final Integer centroidIndex = correspondanceMap.get(new Tuple<Integer, Integer>(series, item));
                final Instance centroid = distMdsRes.getCentroidMap().getCentroids().get(centroidIndex);
                final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex);

                //Set same class index for cluster than for original instances
                //System.out.println("Cluster index = "  + cluster.classIndex() + "/" + instances.classIndex());
                cluster.setClassIndex(instances.classIndex());

                Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>();
                mapRepartition.put("No class", cluster.size());
                if (cluster.classIndex() != -1) {
                    mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster);
                }
                res.append(InstanceFormatter.htmlFormat(centroid, false)).append("<br/>");
                for (final Map.Entry<Object, Integer> entry : mapRepartition.entrySet()) {
                    if (entry.getValue() != 0) {
                        res.append("Class :<b>'" + StringEscapeUtils.escapeHtml(entry.getKey().toString())
                                + "</b>' -> " + entry.getValue()).append("<br/>");
                    }
                }
                res.append("</html>");
                return res.toString();
            } else {
                //return InstanceFormatter.htmlFormat(filteredInstances.get(series).instance(item),true);
                return InstanceFormatter.shortHtmlFormat(filteredInstances.get(series).instance(item));
            }
        }
    };

    final Shape shape = new Ellipse2D.Float(0f, 0f, MAX_POINT_SIZE, MAX_POINT_SIZE);

    ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setUseOutlinePaint(true);

    for (int p = 0; p < nbClass; p++) {
        xyPlot.getRenderer().setSeriesToolTipGenerator(p, gen);
        ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setLegendShape(p, shape);
        xyPlot.getRenderer().setSeriesOutlinePaint(p, Color.BLACK);
    }

    for (int ii = 0; ii < nbClass; ii++) {
        colors.add(xyPlot.getRenderer().getItemPaint(ii, 0));
    }

    if (attrNameToUseAsPointTitle.length > 0) {
        final Attribute attrToUseAsPointTitle = instances.attribute(attrNameToUseAsPointTitle[0]);
        if (attrToUseAsPointTitle != null) {
            final XYItemLabelGenerator lg = new XYItemLabelGenerator() {
                @Override
                public String generateLabel(final XYDataset dataset, final int series, final int item) {
                    return filteredInstances.get(series).instance(item).stringValue(attrToUseAsPointTitle);
                }
            };
            xyPlot.getRenderer().setBaseItemLabelGenerator(lg);
            xyPlot.getRenderer().setBaseItemLabelsVisible(true);
        }
    }
}

From source file:LVCoref.WekaWrapper.java

License:Open Source License

public static void main(String[] args) {
    try {/*from   w  ww  . j a  v a 2s .  c o m*/
        List<Document> docs = new LinkedList<Document>();
        Document d = new Document();
        d.readCONLL("data/pipeline/interview_16.lvsem.conll");
        d.addAnnotationMMAX("data/interview_16_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_23.lvsem.conll");
        d.addAnnotationMMAX("data/interview_23_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_27.lvsem.conll");
        d.addAnnotationMMAX("data/interview_27_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_38.lvsem.conll");
        d.addAnnotationMMAX("data/interview_38_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Instances train = toArff2(docs);
        train.setClassIndex(train.numAttributes() - 1);
        String[] options = { "-U" };//, "-C", "0.5"};
        Classifier cls = new J48();
        cls.setOptions(options);
        cls.buildClassifier(train);

        docs = new LinkedList<Document>();
        d = new Document();
        d.readCONLL("data/pipeline/interview_43.lvsem.conll");
        d.addAnnotationMMAX("data/interview_43_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);
        d = new Document();
        d.readCONLL("data/pipeline/interview_46.lvsem.conll");
        d.addAnnotationMMAX("data/interview_46_coref_level.xml");
        d.useGoldMentions();
        docs.add(d);

        Evaluation eval = new Evaluation(train);

        Instances data = toArff2(docs);
        data.setClassIndex(data.numAttributes() - 1);
        for (int i = 0; i < data.numInstances(); i++) {
            double clsLabel = cls.classifyInstance(data.instance(i));
            //System.out.println(clsLabel);
            data.instance(i).setClassValue(clsLabel);
            System.out.println(data.instance(i).toString(data.classIndex()));
        }

        //     eval.crossValidateModel(cls, train, 10, new Random(1));
        //            // generate curve
        //     ThresholdCurve tc = new ThresholdCurve();
        //     //int classIndex = test.numAttributes()-1;
        //     Instances result = tc.getCurve(eval.predictions());//, classIndex);
        // 
        //     // plot curve
        //     ThresholdVisualizePanel vmc = new ThresholdVisualizePanel();
        //     vmc.setROCString("(Area under ROC = " + 
        //         weka.core.Utils.doubleToString(tc.getROCArea(result), 4) + ")");
        //     vmc.setName(result.relationName());
        //     PlotData2D tempd = new PlotData2D(result);
        //     tempd.setPlotName(result.relationName());
        //     tempd.addInstanceNumberAttribute();
        //     // specify which points are connected
        //     boolean[] cp = new boolean[result.numInstances()];
        //     for (int n = 1; n < cp.length; n++)
        //       cp[n] = true;
        //     tempd.setConnectPoints(cp);
        //     // add plot
        //     vmc.addPlot(tempd);
        // 
        //     // display curve
        //     String plotName = vmc.getName(); 
        //     final javax.swing.JFrame jf = 
        //       new javax.swing.JFrame("Weka Classifier Visualize: "+plotName);
        //     jf.setSize(500,400);
        //     jf.getContentPane().setLayout(new BorderLayout());
        //     jf.getContentPane().add(vmc, BorderLayout.CENTER);
        //     jf.addWindowListener(new java.awt.event.WindowAdapter() {
        //       public void windowClosing(java.awt.event.WindowEvent e) {
        //       jf.dispose();
        //       }
        //     });
        //     jf.setVisible(true);

        //            Instances test = toArff2(docs);
        //            test.setClassIndex(test.numAttributes()-1);
        //            
        //            
        //           Evaluation evals = new Evaluation(train); 
        //
        //            evals.evaluateModel(cls, test);
        //            System.out.println(evals.toSummaryString("\nResults\n======\n", false));
        //             System.out.println(evals.toMatrixString());
        //              System.out.println(evals.toClassDetailsString());
        //            
        //            System.out.println(cls);
        //            //System.out.println(toArff2(docs));

    } catch (Exception ex) {
        Logger.getLogger(WekaWrapper.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:machinelearningcw.MachineLearningCw.java

public static void perceptron(Instances data) throws Exception {
    perceptronClassifier p = new perceptronClassifier();
    data.setClassIndex(data.numAttributes() - 1);

    int numFolds = 10;
    EvaluationUtils eval = new EvaluationUtils();
    ArrayList<Prediction> preds = eval.getCVPredictions(p, data, numFolds);
    int correct = 0;
    int total = 0;
    for (Prediction pred : preds) {
        if (pred.predicted() == pred.actual()) {
            correct++;/*from   w  ww. j a  va  2s. c om*/
        }
        total++;
    }
    double acc = ((double) correct / total);

    System.out.println(acc);

}

From source file:machinelearningproject.MachineLearningProject.java

/**
 * @param args the command line arguments
 *///from   ww w .j ava 2s.c o  m
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    DataSource source = new DataSource("D:\\spambase.arff");
    //        DataSource source = new DataSource("D:\\weather-nominal.arff");
    Instances instances = source.getDataSet();
    int numAttr = instances.numAttributes();
    instances.setClassIndex(instances.numAttributes() - 1);

    int runs = 5;
    int seed = 15;
    for (int i = 0; i < runs; i++) {
        //randomize data
        seed = seed + 1; // the seed for randomizing the data
        Random rand = new Random(seed); // create seeded number generator
        Instances randData = new Instances(instances); // create copy of original data
        Collections.shuffle(randData);

        Evaluation evalDTree = new Evaluation(randData);
        Evaluation evalRF = new Evaluation(randData);
        Evaluation evalSVM = new Evaluation(randData);

        int folds = 10;
        for (int n = 0; n < folds; n++) {
            Instances train = randData.trainCV(folds, n, rand);
            Instances test = randData.testCV(folds, n);
            //instantiate classifiers
            DecisionTree dtree = new DecisionTree();
            RandomForest rf = new RandomForest(100);
            SMO svm = new SMO();
            RBFKernel rbfKernel = new RBFKernel();
            double gamma = 0.70;
            rbfKernel.setGamma(gamma);

            dtree.buildClassifier(train);
            rf.buildClassifier(train);
            svm.buildClassifier(train);

            evalDTree.evaluateModel(dtree, test);
            evalRF.evaluateModel(rf, test);
            evalSVM.evaluateModel(svm, test);
        }
        System.out.println("=== Decision Tree Evaluation ===");
        System.out.println(evalDTree.toSummaryString());
        System.out.println(evalDTree.toClassDetailsString());
        System.out.println(evalDTree.toMatrixString());

        System.out.println("=== Random Forest Evaluation ===");
        System.out.println(evalRF.toSummaryString());
        System.out.println(evalRF.toClassDetailsString());
        System.out.println(evalRF.toMatrixString());

        System.out.println("=== SVM Evaluation ===");
        System.out.println(evalSVM.toSummaryString());
        System.out.println(evalSVM.toClassDetailsString());
        System.out.println(evalSVM.toMatrixString());
    }
}

From source file:machinelearningproject.RFTree.java

@Override
public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();
    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    //choose random fraction
    int numAttr = instances.numAttributes();
    int k = (int) round(sqrt(numAttr));
    ArrayList<Integer> randomIdx = randomFraction(numAttr);

    for (int idx = 0; idx < k; idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }//from www.ja  v  a 2  s  . com
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningproject.Tree.java

public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();

    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    for (int idx = 0; idx < instances.numAttributes(); idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }/* w  ww. jav  a  2  s .c o  m*/
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningq2.BasicNaiveBayesV1.java

/**
 *
 * This initial classifier will contain a two dimension array of counts
 *
 * @param ins/*from   w  w w  .j a va 2 s. c  o  m*/
 * @throws Exception
 */
@Override
public void buildClassifier(Instances ins) throws Exception {
    ins.setClassIndex(ins.numAttributes() - 1);
    countData = ins.size();
    // assigns the class position of the instance 
    classValueCounts = new int[ins.numClasses()];
    System.out.println(ins);
    if (laplace == true) {
        laplaceCorrection(ins);
    }
    // store the values
    for (Instance line : ins) {
        double classValue = line.classValue();
        classValueCounts[(int) classValue]++;
        for (int i = 0; i < line.numAttributes() - 1; i++) {
            double attributeValue = line.value(i);
            DataFound d = new DataFound(attributeValue, classValue, i);
            int index = data.indexOf(d);
            // then it doesn't exist
            if (index == -1) {
                data.add(d);
            } else {
                data.get(index).incrementCount();
            }
        }
    }
    System.out.println("");

    System.out.println(Arrays.toString(classValueCounts));

}