List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * /* www . j av a 2s . c o m*/ * @param wekaClusterer * @param instances * @return * @throws Exception */ public static List<IndexedInstance> computeClusters(final Clusterer wekaClusterer, final Instances instances) throws Exception { final Instances ii = new Instances(instances); ii.setClassIndex(-1); wekaClusterer.buildClusterer(ii); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(wekaClusterer); eval.evaluateClusterer(ii); final int clustersCount = eval.getNumClusters(); final List<IndexedInstance> clustersList = new ArrayList<IndexedInstance>(clustersCount); //Initialize instances for (int k = 0; k < clustersCount; k++) { clustersList.add(new IndexedInstance(new Instances(instances, 0), new HashMap<Integer, Integer>())); } final double[] ass = eval.getClusterAssignments(); if (ass.length != ii.numInstances()) throw new IllegalStateException(); for (int i = 0; i < ass.length; i++) { IndexedInstance idxi = clustersList.get((int) ass[i]); idxi.getInstances().add(instances.instance(i)); int pos = idxi.getInstances().size() - 1; idxi.getMapOrigIndex().put(pos, i); } return clustersList; }
From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java
License:Open Source License
/** * /*from w w w . j a v a 2s.c o m*/ */ private static KmeansResult getSimplifiedInstances(final Instances instances, final DistanceFunction df, final int maxInstances) throws Exception { Instances centroids = null; List<Instances> clusters = null; final int savedClassIndex = instances.classIndex(); instances.setClassIndex(-1); final SimpleKMeans clusterer = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(maxInstances, df); clusterer.buildClusterer(instances); clusters = WekaMachineLearningUtil.computeClusters(clusterer, instances).getClustersList(); instances.setClassIndex(savedClassIndex); final int numClusters = clusters.size(); //Set class index for each cluster instances //System.out.println("Setting class index to each cluster : " + savedClassIndex); for (int i = 0; i < numClusters; i++) { clusters.get(i).setClassIndex(savedClassIndex); } //Save centroids centroids = clusterer.getClusterCentroids(); return new KmeansResult(centroids, clusters); }
From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java
License:Open Source License
public static void main(String[] args) throws Exception { final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile( //new File("./samples/csv/uci/zoo.csv")); new File("./samples/csv/test50bis.csv")); //new File("./samples/arff/UCI/cmc.arff")); //new File("./samples/csv/direct-marketing-bank-reduced.csv")); //new File("./samples/csv/bank.csv")); //new File("./samples/csv/preswissroll.csv")); //new File("./samples/csv/preswissroll-mod4.csv")); ds.setClassIndex(-1); final int N = ds.size(); final int M = ds.numAttributes(); SimpleMatrix dist = new SimpleMatrix(N, N); for (int i = 0; i < N; i++) { for (int j = i + 1; j < N; j++) { Instance xi = ds.instance(i); Instance xj = ds.instance(j); double d = 0, s = 0, a = 0, b = 0; for (int k = 1; k < M; k++) { s += xi.value(k) * xj.value(k); a += xi.value(k) * xi.value(k); b += xi.value(k) * xi.value(k); }/* w ww . j a v a2 s . com*/ d = 1 - s / (Math.sqrt(a) * Math.sqrt(b)); dist.set(i, j, d); dist.set(j, i, d); } } final MDSResult res = ClassicMDS.doMDSV1(ds, dist); JXPanel p2 = MDSViewBuilder.buildMDSViewFromDataSet(ds, res, 5000, null); p2.setPreferredSize(new Dimension(800, 600)); final JXFrame f = new JXFrame(); f.setPreferredSize(new Dimension(1024, 768)); final Container c = f.getContentPane(); c.add(p2); f.pack(); f.setVisible(true); f.setDefaultCloseOperation(JXFrame.EXIT_ON_CLOSE); System.out.println("Kruskal stress : =" + getKruskalStressFromMDSResult(res)); }
From source file:lu.lippmann.cdb.lab.mds.MDSViewBuilder.java
License:Open Source License
/** * /*from ww w . j av a 2s.co m*/ */ private static void buildFilteredSeries(final MDSResult mdsResult, final XYPlot xyPlot, final String... attrNameToUseAsPointTitle) throws Exception { final CollapsedInstances distMdsRes = mdsResult.getCInstances(); final Instances instances = distMdsRes.getInstances(); final SimpleMatrix coordinates = mdsResult.getCoordinates(); final Instances collapsedInstances = mdsResult.getCollapsedInstances(); int maxSize = 0; if (distMdsRes.isCollapsed()) { final List<Instances> clusters = distMdsRes.getCentroidMap().getClusters(); final int nbCentroids = clusters.size(); maxSize = clusters.get(0).size(); for (int i = 1; i < nbCentroids; i++) { final int currentSize = clusters.get(i).size(); if (currentSize > maxSize) { maxSize = currentSize; } } } Attribute clsAttribute = null; int nbClass = 1; if (instances.classIndex() != -1) { clsAttribute = instances.classAttribute(); nbClass = clsAttribute.numValues(); } final XYSeriesCollection dataset = (XYSeriesCollection) xyPlot.getDataset(); final int fMaxSize = maxSize; final List<XYSeries> lseries = new ArrayList<XYSeries>(); //No class : add one dummy serie if (nbClass <= 1) { lseries.add(new XYSeries("Serie #1", false)); } else { //Some class : add one serie per class for (int i = 0; i < nbClass; i++) { lseries.add(new XYSeries(clsAttribute.value(i), false)); } } dataset.removeAllSeries(); /** * Initialize filtered series */ final List<Instances> filteredInstances = new ArrayList<Instances>(); for (int i = 0; i < lseries.size(); i++) { filteredInstances.add(new Instances(collapsedInstances, 0)); } final Map<Tuple<Integer, Integer>, Integer> correspondanceMap = new HashMap<Tuple<Integer, Integer>, Integer>(); for (int i = 0; i < collapsedInstances.numInstances(); i++) { final Instance oInst = collapsedInstances.instance(i); int indexOfSerie = 0; if (oInst.classIndex() != -1) { if (distMdsRes.isCollapsed()) { indexOfSerie = getStrongestClass(i, distMdsRes); } else { indexOfSerie = (int) oInst.value(oInst.classAttribute()); } } lseries.get(indexOfSerie).add(coordinates.get(i, 0), coordinates.get(i, 1)); filteredInstances.get(indexOfSerie).add(oInst); if (distMdsRes.isCollapsed()) { correspondanceMap.put(new Tuple<Integer, Integer>(indexOfSerie, filteredInstances.get(indexOfSerie).numInstances() - 1), i); } } final List<Paint> colors = new ArrayList<Paint>(); for (final XYSeries series : lseries) { dataset.addSeries(series); } if (distMdsRes.isCollapsed()) { final XYLineAndShapeRenderer xyRenderer = new XYLineAndShapeRenderer(false, true) { private static final long serialVersionUID = -6019883886470934528L; @Override public void drawItem(Graphics2D g2, XYItemRendererState state, java.awt.geom.Rectangle2D dataArea, PlotRenderingInfo info, XYPlot plot, ValueAxis domainAxis, ValueAxis rangeAxis, XYDataset dataset, int series, int item, CrosshairState crosshairState, int pass) { if (distMdsRes.isCollapsed()) { final Integer centroidIndex = correspondanceMap .get(new Tuple<Integer, Integer>(series, item)); final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex); int size = cluster.size(); final int shapeSize = (int) (MAX_POINT_SIZE * size / fMaxSize + 1); final double x1 = plot.getDataset().getX(series, item).doubleValue(); final double y1 = plot.getDataset().getY(series, item).doubleValue(); Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>(); mapRepartition.put("No class", size); if (cluster.classIndex() != -1) { mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster); } final RectangleEdge xAxisLocation = plot.getDomainAxisEdge(); final RectangleEdge yAxisLocation = plot.getRangeAxisEdge(); final double fx = domainAxis.valueToJava2D(x1, dataArea, xAxisLocation); final double fy = rangeAxis.valueToJava2D(y1, dataArea, yAxisLocation); setSeriesShape(series, new Ellipse2D.Double(-shapeSize / 2, -shapeSize / 2, shapeSize, shapeSize)); super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series, item, crosshairState, pass); //Draw pie if (ENABLE_PIE_SHART) { createPieChart(g2, (int) (fx - shapeSize / 2), (int) (fy - shapeSize / 2), shapeSize, mapRepartition, size, colors); } } else { super.drawItem(g2, state, dataArea, info, plot, domainAxis, rangeAxis, dataset, series, item, crosshairState, pass); } } }; xyPlot.setRenderer(xyRenderer); } final XYToolTipGenerator gen = new XYToolTipGenerator() { @Override public String generateToolTip(XYDataset dataset, int series, int item) { if (distMdsRes.isCollapsed()) { final StringBuilder res = new StringBuilder("<html>"); final Integer centroidIndex = correspondanceMap.get(new Tuple<Integer, Integer>(series, item)); final Instance centroid = distMdsRes.getCentroidMap().getCentroids().get(centroidIndex); final Instances cluster = distMdsRes.getCentroidMap().getClusters().get(centroidIndex); //Set same class index for cluster than for original instances //System.out.println("Cluster index = " + cluster.classIndex() + "/" + instances.classIndex()); cluster.setClassIndex(instances.classIndex()); Map<Object, Integer> mapRepartition = new HashMap<Object, Integer>(); mapRepartition.put("No class", cluster.size()); if (cluster.classIndex() != -1) { mapRepartition = WekaDataStatsUtil.getClassRepartition(cluster); } res.append(InstanceFormatter.htmlFormat(centroid, false)).append("<br/>"); for (final Map.Entry<Object, Integer> entry : mapRepartition.entrySet()) { if (entry.getValue() != 0) { res.append("Class :<b>'" + StringEscapeUtils.escapeHtml(entry.getKey().toString()) + "</b>' -> " + entry.getValue()).append("<br/>"); } } res.append("</html>"); return res.toString(); } else { //return InstanceFormatter.htmlFormat(filteredInstances.get(series).instance(item),true); return InstanceFormatter.shortHtmlFormat(filteredInstances.get(series).instance(item)); } } }; final Shape shape = new Ellipse2D.Float(0f, 0f, MAX_POINT_SIZE, MAX_POINT_SIZE); ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setUseOutlinePaint(true); for (int p = 0; p < nbClass; p++) { xyPlot.getRenderer().setSeriesToolTipGenerator(p, gen); ((XYLineAndShapeRenderer) xyPlot.getRenderer()).setLegendShape(p, shape); xyPlot.getRenderer().setSeriesOutlinePaint(p, Color.BLACK); } for (int ii = 0; ii < nbClass; ii++) { colors.add(xyPlot.getRenderer().getItemPaint(ii, 0)); } if (attrNameToUseAsPointTitle.length > 0) { final Attribute attrToUseAsPointTitle = instances.attribute(attrNameToUseAsPointTitle[0]); if (attrToUseAsPointTitle != null) { final XYItemLabelGenerator lg = new XYItemLabelGenerator() { @Override public String generateLabel(final XYDataset dataset, final int series, final int item) { return filteredInstances.get(series).instance(item).stringValue(attrToUseAsPointTitle); } }; xyPlot.getRenderer().setBaseItemLabelGenerator(lg); xyPlot.getRenderer().setBaseItemLabelsVisible(true); } } }
From source file:LVCoref.WekaWrapper.java
License:Open Source License
public static void main(String[] args) { try {/*from w ww . j a v a 2s . c o m*/ List<Document> docs = new LinkedList<Document>(); Document d = new Document(); d.readCONLL("data/pipeline/interview_16.lvsem.conll"); d.addAnnotationMMAX("data/interview_16_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_23.lvsem.conll"); d.addAnnotationMMAX("data/interview_23_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_27.lvsem.conll"); d.addAnnotationMMAX("data/interview_27_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_38.lvsem.conll"); d.addAnnotationMMAX("data/interview_38_coref_level.xml"); d.useGoldMentions(); docs.add(d); Instances train = toArff2(docs); train.setClassIndex(train.numAttributes() - 1); String[] options = { "-U" };//, "-C", "0.5"}; Classifier cls = new J48(); cls.setOptions(options); cls.buildClassifier(train); docs = new LinkedList<Document>(); d = new Document(); d.readCONLL("data/pipeline/interview_43.lvsem.conll"); d.addAnnotationMMAX("data/interview_43_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_46.lvsem.conll"); d.addAnnotationMMAX("data/interview_46_coref_level.xml"); d.useGoldMentions(); docs.add(d); Evaluation eval = new Evaluation(train); Instances data = toArff2(docs); data.setClassIndex(data.numAttributes() - 1); for (int i = 0; i < data.numInstances(); i++) { double clsLabel = cls.classifyInstance(data.instance(i)); //System.out.println(clsLabel); data.instance(i).setClassValue(clsLabel); System.out.println(data.instance(i).toString(data.classIndex())); } // eval.crossValidateModel(cls, train, 10, new Random(1)); // // generate curve // ThresholdCurve tc = new ThresholdCurve(); // //int classIndex = test.numAttributes()-1; // Instances result = tc.getCurve(eval.predictions());//, classIndex); // // // plot curve // ThresholdVisualizePanel vmc = new ThresholdVisualizePanel(); // vmc.setROCString("(Area under ROC = " + // weka.core.Utils.doubleToString(tc.getROCArea(result), 4) + ")"); // vmc.setName(result.relationName()); // PlotData2D tempd = new PlotData2D(result); // tempd.setPlotName(result.relationName()); // tempd.addInstanceNumberAttribute(); // // specify which points are connected // boolean[] cp = new boolean[result.numInstances()]; // for (int n = 1; n < cp.length; n++) // cp[n] = true; // tempd.setConnectPoints(cp); // // add plot // vmc.addPlot(tempd); // // // display curve // String plotName = vmc.getName(); // final javax.swing.JFrame jf = // new javax.swing.JFrame("Weka Classifier Visualize: "+plotName); // jf.setSize(500,400); // jf.getContentPane().setLayout(new BorderLayout()); // jf.getContentPane().add(vmc, BorderLayout.CENTER); // jf.addWindowListener(new java.awt.event.WindowAdapter() { // public void windowClosing(java.awt.event.WindowEvent e) { // jf.dispose(); // } // }); // jf.setVisible(true); // Instances test = toArff2(docs); // test.setClassIndex(test.numAttributes()-1); // // // Evaluation evals = new Evaluation(train); // // evals.evaluateModel(cls, test); // System.out.println(evals.toSummaryString("\nResults\n======\n", false)); // System.out.println(evals.toMatrixString()); // System.out.println(evals.toClassDetailsString()); // // System.out.println(cls); // //System.out.println(toArff2(docs)); } catch (Exception ex) { Logger.getLogger(WekaWrapper.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:machinelearningcw.MachineLearningCw.java
public static void perceptron(Instances data) throws Exception { perceptronClassifier p = new perceptronClassifier(); data.setClassIndex(data.numAttributes() - 1); int numFolds = 10; EvaluationUtils eval = new EvaluationUtils(); ArrayList<Prediction> preds = eval.getCVPredictions(p, data, numFolds); int correct = 0; int total = 0; for (Prediction pred : preds) { if (pred.predicted() == pred.actual()) { correct++;/*from w ww. j a va 2s. c om*/ } total++; } double acc = ((double) correct / total); System.out.println(acc); }
From source file:machinelearningproject.MachineLearningProject.java
/** * @param args the command line arguments *///from ww w .j ava 2s.c o m public static void main(String[] args) throws Exception { // TODO code application logic here DataSource source = new DataSource("D:\\spambase.arff"); // DataSource source = new DataSource("D:\\weather-nominal.arff"); Instances instances = source.getDataSet(); int numAttr = instances.numAttributes(); instances.setClassIndex(instances.numAttributes() - 1); int runs = 5; int seed = 15; for (int i = 0; i < runs; i++) { //randomize data seed = seed + 1; // the seed for randomizing the data Random rand = new Random(seed); // create seeded number generator Instances randData = new Instances(instances); // create copy of original data Collections.shuffle(randData); Evaluation evalDTree = new Evaluation(randData); Evaluation evalRF = new Evaluation(randData); Evaluation evalSVM = new Evaluation(randData); int folds = 10; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n, rand); Instances test = randData.testCV(folds, n); //instantiate classifiers DecisionTree dtree = new DecisionTree(); RandomForest rf = new RandomForest(100); SMO svm = new SMO(); RBFKernel rbfKernel = new RBFKernel(); double gamma = 0.70; rbfKernel.setGamma(gamma); dtree.buildClassifier(train); rf.buildClassifier(train); svm.buildClassifier(train); evalDTree.evaluateModel(dtree, test); evalRF.evaluateModel(rf, test); evalSVM.evaluateModel(svm, test); } System.out.println("=== Decision Tree Evaluation ==="); System.out.println(evalDTree.toSummaryString()); System.out.println(evalDTree.toClassDetailsString()); System.out.println(evalDTree.toMatrixString()); System.out.println("=== Random Forest Evaluation ==="); System.out.println(evalRF.toSummaryString()); System.out.println(evalRF.toClassDetailsString()); System.out.println(evalRF.toMatrixString()); System.out.println("=== SVM Evaluation ==="); System.out.println(evalSVM.toSummaryString()); System.out.println(evalSVM.toClassDetailsString()); System.out.println(evalSVM.toMatrixString()); } }
From source file:machinelearningproject.RFTree.java
@Override public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; //choose random fraction int numAttr = instances.numAttributes(); int k = (int) round(sqrt(numAttr)); ArrayList<Integer> randomIdx = randomFraction(numAttr); for (int idx = 0; idx < k; idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }//from www.ja v a 2 s . com } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningproject.Tree.java
public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }/* w ww. jav a 2 s .c o m*/ } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningq2.BasicNaiveBayesV1.java
/** * * This initial classifier will contain a two dimension array of counts * * @param ins/*from w w w .j a va 2 s. c o m*/ * @throws Exception */ @Override public void buildClassifier(Instances ins) throws Exception { ins.setClassIndex(ins.numAttributes() - 1); countData = ins.size(); // assigns the class position of the instance classValueCounts = new int[ins.numClasses()]; System.out.println(ins); if (laplace == true) { laplaceCorrection(ins); } // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } System.out.println(""); System.out.println(Arrays.toString(classValueCounts)); }