List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java
License:Open Source License
private static Instances buildDerivatedDataset(final Instances dataSet, final List<String> possibleValues, final List<Integer> valueForEachFeature) throws Exception { final int numInstances = dataSet.numInstances(); final ArrayList<Attribute> attrs = new ArrayList<Attribute>(numInstances + 2); attrs.add(new Attribute(FEATUREDESC_ATTRNAME, (java.util.List<String>) null)); for (int i = 0; i < numInstances; i++) { attrs.add(new Attribute(i + "_eval")); }//from w w w . j a v a 2s . c om attrs.add(new Attribute("__", possibleValues)); final Instances newds = new Instances("unsupervisedFeaturesEval", attrs, 0); final int numAttributes = dataSet.numAttributes(); for (int j = 0; j < numAttributes; j++) { double[] val = ArraysUtil.concat(dataSet.attributeToDoubleArray(j), new double[] { 0.0d }); val = ArraysUtil.concat(new double[] { 0.0d }, val); newds.add(new DenseInstance(1.0d, val)); } for (int j = 0; j < numAttributes; j++) { newds.instance(j).setValue(0, dataSet.attribute(j).name()); newds.instance(j).setValue(numInstances + 1, possibleValues.get(valueForEachFeature.get(j))); } newds.setClassIndex(numInstances + 1); return newds; }
From source file:lu.lippmann.cdb.datasetview.tabs.WeightedMapOfDecisionTreesTabView.java
License:Open Source License
/** * {@inheritDoc}// w w w.jav a2s . com */ @Override public void update0(final Instances dataSet) throws Exception { if (this.mp != null) this.panel.remove(this.mp); if (this.cl != null) this.slider.removeChangeListener(cl); //if (this.cl!=null) this.slider.removeChangeListener(cl); this.cl = new ChangeListener() { @Override public void stateChanged(final ChangeEvent e) { if (!slider.getValueIsAdjusting()) { dtFactory = new J48DecisionTreeFactory(slider.getValue() / 100d, false); update(dataSet); } } }; this.slider.addChangeListener(cl); final double frameWidth = this.panel.getSize().getWidth() * 0.95d; final double frameHeight = this.panel.getSize().getHeight() * 0.95d; final ListOrderedMap<JComponent, Integer> mapPanels = new ListOrderedMap<JComponent, Integer>(); final String oldSelected; if (this.attrSelectionCombo.getSelectedItem() == null) { oldSelected = dataSet.classAttribute().name(); } else { final Attribute oldAttr = dataSet.attribute(this.attrSelectionCombo.getSelectedItem().toString()); if (oldAttr != null) { oldSelected = oldAttr.name(); } else { oldSelected = dataSet.classAttribute().name(); } } final int idx = dataSet.attribute(oldSelected).index(); final Set<Object> presentValues = WekaDataStatsUtil.getNominalRepartition(dataSet, idx).keySet(); for (final Object o : presentValues) { final Instances part = WekaDataProcessingUtil.filterDataSetOnNominalValue(dataSet, idx, o.toString()); final DecisionTree dti = dtFactory.buildDecisionTree(part); final int ratio = 100 * part.numInstances() / dataSet.numInstances(); final GraphView myGraph = DecisionTreeToGraphViewHelper.buildGraphView(dti, eventPublisher, commandDispatcher); myGraph.hideSharedLabel(); myGraph.addMetaInfo("size=" + dti.getSize(), ""); myGraph.addMetaInfo("depth=" + dti.getDepth(), ""); myGraph.addMetaInfo("err=" + FormatterUtil.DECIMAL_FORMAT.format(100d * dti.getErrorRate()) + "%", ""); final JButton openInEditorButton = new JButton("Edit"); openInEditorButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { GraphUtil.importDecisionTreeInEditor(dtFactory, part, applicationContext, eventPublisher, commandDispatcher); } }); myGraph.addMetaInfoComponent(openInEditorButton); myGraph.fitGraphToSubPanel(frameWidth - 10 * presentValues.size(), frameHeight - 10, ratio); mapPanels.put((JComponent) myGraph, ratio); } this.mp = new MultiPanel(mapPanels, (int) frameWidth, (int) frameHeight, this.withWeightCheckBox.isSelected()); this.panel.add(this.mp, BorderLayout.CENTER); if (this.attrSelectionCombo.getActionListeners().length > 0) { this.attrSelectionCombo.removeActionListener(attrSelectionComboListener); } if (this.withWeightCheckBox.getActionListeners().length > 0) { this.withWeightCheckBox.removeActionListener(attrSelectionComboListener); } this.attrSelectionCombo.removeAllItems(); for (final Attribute attr : WekaDataStatsUtil.getNominalAttributesList(dataSet)) { this.attrSelectionCombo.addItem(attr.name()); } this.attrSelectionCombo.setSelectedItem(oldSelected); this.attrSelectionComboListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { update(dataSet); } }; this.attrSelectionCombo.addActionListener(attrSelectionComboListener); this.withWeightCheckBox.addActionListener(attrSelectionComboListener); }
From source file:lu.lippmann.cdb.datasetview.tasks.SetAttributeAsTimestampTask.java
License:Open Source License
/** * {@inheritDoc}//from w ww. j a v a 2 s . co m */ @Override Instances process0(final Instances dataSet) throws Exception { final String s = (String) JOptionPane.showInputDialog(null, "Select an attribute:\n", "Attribute selection", JOptionPane.PLAIN_MESSAGE, null, WekaDataStatsUtil.getNumericAttributesNames(dataSet).toArray(), ""); if (s != null) { final Instances newds = new Instances(dataSet); newds.insertAttributeAt(new Attribute("date", "dd-MM-yyyy HH:mm"), newds.numAttributes()); final int sidx = newds.attribute(s).index(); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(sidx)); } return newds; } else return dataSet; }
From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDB.java
License:Open Source License
public static Instances findSimilarCases(final String attrname, final double x, final double y, final int year, final String season, final int gapSize, final int gapPosition, final boolean isDuringRising, final boolean hasDownstream, final boolean hasUpstream, final String flow) throws Exception { /* build the current case */ final StringBuilder newsb = new StringBuilder(DATABASE_AS_STRINGBUILDER); newsb.append(attrname).append(",").append(x).append(",").append(y).append(",") .append(gapSize).append(",").append(gapPosition).append(",") .append(season).append(",").append(year).append(",") .append(isDuringRising).append(",").append(flow).append(",") .append(hasDownstream).append(",").append(hasUpstream).append(",") .append("?").append(",").append("?").append(",").append("?").append(",").append("?").append(",") .append("?").append(",").append("?").append(",").append(0) // MAE .append(",").append(0) // RMSE .append(",").append(0) // RSR .append(",").append(0) // PBIAS .append(",").append(1) // NS .append(",").append(1) // IOA .append(",").append(true) // BEST SOLUTION .append("\n"); final Instances tmpDB = WekaDataAccessUtil.loadInstancesFromCSVString(newsb.toString(), false); final Instance newcase = tmpDB.instance(tmpDB.numInstances() - 1); /* compute NN for the current case */ final Instances knn = WekaMachineLearningUtil.computeNearestNeighbours(tmpDB, newcase, 10, "2,3,4,6,7,8,9,10,23"); knn.add(0, newcase);// w w w. java2s . com System.out.println(knn.toSummaryString()); return knn; }
From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java
License:Open Source License
private static int getCountOfFictiveGaps(final Instances newkdb) { final Set<String> set = new HashSet<String>(); for (int i = 0; i < newkdb.numInstances(); i++) { final String key = newkdb.instance(i).stringValue(newkdb.attribute("serieName").index()) + "-" + newkdb.instance(i).value(newkdb.attribute("gapSize").index()) + "-" + newkdb.instance(i).value(newkdb.attribute("gapPosition").index()); set.add(key);/*from w w w . j ava2 s . co m*/ } return set.size(); }
From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java
License:Open Source License
/** * Main method.//from w w w . ja v a 2 s.c o m * @param args command line arguments */ public static void main(final String[] args) { try { HydroRunner.init(false); Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB()); System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb)); System.out.println(newkdb.toSummaryString()); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useDownstream").index(), "false"); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useUpstream").index(), "false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false"); //System.out.println(newkdb.toSummaryString()); Instances withGoodNashSutcliffe = new Instances(newkdb, 0); for (int i = 0; i < newkdb.numInstances(); i++) { if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) { withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray())); } } System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances()); final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe) / getCountOfFictiveGaps(newkdb); System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> " + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)"); WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe, new File("./withGoodNashSutcliffe.arff")); } catch (final Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java
License:Open Source License
private ChartPanel buildMapPanel(final Instances dataSet, final int xidx, final int yidx, final boolean withLegend) { final XYSeriesCollection data = new XYSeriesCollection(); final Map<Integer, java.util.List<Instance>> filteredInstances = new HashMap<Integer, java.util.List<Instance>>(); final int classIndex = dataSet.classIndex(); if (classIndex < 0) { final XYSeries series = new XYSeries("Serie", false); for (int i = 0; i < dataSet.numInstances(); i++) { series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx)); }//from w ww.ja v a 2 s . com data.addSeries(series); } else { final Set<String> pvs = new TreeSet<String>( WekaDataStatsUtil.getPresentValuesForNominalAttribute(dataSet, classIndex)); int p = 0; for (final String pv : pvs) { final XYSeries series = new XYSeries(pv, false); for (int i = 0; i < dataSet.numInstances(); i++) { if (dataSet.instance(i).stringValue(classIndex).equals(pv)) { if (!filteredInstances.containsKey(p)) { filteredInstances.put(p, new ArrayList<Instance>()); } filteredInstances.get(p).add(dataSet.instance(i)); series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx)); } } data.addSeries(series); p++; } } final JFreeChart chart = ChartFactory.createScatterPlot(null, // chart title dataSet.attribute(xidx).name(), // x axis label dataSet.attribute(yidx).name(), // y axis label data, // data PlotOrientation.VERTICAL, withLegend, // include legend true, // tooltips false // urls ); final XYPlot xyPlot = (XYPlot) chart.getPlot(); xyPlot.setBackgroundImage(shapeImage); final XYItemRenderer renderer = xyPlot.getRenderer(); final XYToolTipGenerator gen = new XYToolTipGenerator() { @Override public String generateToolTip(XYDataset dataset, int series, int item) { if (classIndex < 0) { return InstanceFormatter.htmlFormat(dataSet.instance(item), true); } else { return InstanceFormatter.htmlFormat(filteredInstances.get(series).get(item), true); } } }; xyPlot.getRangeAxis().setVisible(false); xyPlot.getDomainAxis().setVisible(false); xyPlot.getRangeAxis().setLowerBound(60000); xyPlot.getRangeAxis().setUpperBound(135000); xyPlot.getDomainAxis().setLowerBound(45000); xyPlot.getDomainAxis().setUpperBound(110000); xyPlot.setDomainGridlinesVisible(false); xyPlot.setRangeGridlinesVisible(false); xyPlot.setBackgroundPaint(Color.white); int nbSeries; if (classIndex < 0) { nbSeries = 1; } else { nbSeries = filteredInstances.keySet().size(); } for (int i = 0; i < nbSeries; i++) { renderer.setSeriesToolTipGenerator(i, gen); } final XYItemLabelGenerator lg = new XYItemLabelGenerator() { @Override public String generateLabel(final XYDataset ds, final int series, final int item) { final Instance iii = filteredInstances.get(series).get(item); if (iii.stringValue(3).equals(SELECTED_STATUS)) { final String label = iii.stringValue(0); return label.substring(0, label.length() - 4); } else return null; } }; xyPlot.getRenderer().setBaseItemLabelGenerator(lg); xyPlot.getRenderer().setBaseItemLabelsVisible(true); xyPlot.getRenderer().setBaseItemLabelFont(new Font("Tahoma", Font.PLAIN, 12)); xyPlot.getRenderer().setSeriesPaint(1, Color.BLUE); xyPlot.getRenderer().setSeriesPaint(0, new Color(210, 210, 210)); xyPlot.getRenderer().setSeriesPaint(2, Color.DARK_GRAY); //System.out.println("shape -> "+xyPlot.getRenderer().getSeriesStroke(0)); final ChartPanel cp = new ChartPanel(chart); cp.setDomainZoomable(false); cp.setRangeZoomable(false); return cp; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
private Instances fillAllGaps(final Instances ds) throws Exception { Instances newds = new Instances(ds); final int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); final String datename = newds.attribute(firstDateIdx).name(); if (firstDateIdx == -1) { throw new Exception("No date attribute in this dataset!"); }/*w w w.jav a2s.co m*/ /* add a 'fake numerical' time field */ newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx)); } /* remove the 'true' time field */ newds.deleteAttributeAt(firstDateIdx); /* process the dataset */ newds = fillGaps0(newds); /* re-add the 'true' time field according to the 'fake numerical' time field */ final String df = ds.attribute(firstDateIdx).getDateFormat(); newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(newds.numAttributes() - 2)); } /* delete the 'fake numerical' time field */ newds.deleteAttributeAt(newds.numAttributes() - 2); newds.sort(newds.numAttributes() - 1); return newds; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
private Instances fillAllGapsWithDiscretizedTime(final Instances ds) throws Exception { int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(ds); final String datename = ds.attribute(firstDateIdx).name(); if (firstDateIdx == -1) { throw new Exception("No date attribute in this dataset!"); }/*from w w w.java 2 s. c o m*/ Instances newds = new Instances(ds); /* add discretized time */ newds = WekaTimeSeriesUtil.buildDataSetWithDiscretizedTime(newds); /* add fake numerical time */ newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx)); } /* remove 'true' date */ while (firstDateIdx != -1) { newds.deleteAttributeAt(firstDateIdx); firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); } /* transform nominal as binaries */ for (int iidx : WekaDataStatsUtil.getNominalAttributesIndexes(newds)) { newds = WekaDataProcessingUtil.buildDataSetWithNominalAsBinary(newds, iidx); } /* rename attributes for which the name can occur issues in tree evaluation */ for (int k = 0; k < newds.numAttributes(); k++) { String atn = newds.attribute(k).name(); if (atn.contains("=")) atn = atn.replaceAll("=", (int) (Math.random() * 1000) + ""); if (atn.contains("<")) atn = atn.replaceAll("<", (int) (Math.random() * 1000) + ""); if (atn.contains(">")) atn = atn.replaceAll(">", (int) (Math.random() * 1000) + ""); if (atn.contains(".")) atn = atn.replace(".", (int) (Math.random() * 1000) + ""); newds = WekaDataProcessingUtil.renameAttribute(newds, k, atn); } /* replace missing values */ newds = fillGaps0(newds); /* reconstruct date according to discretized time */ final String df = ds.attribute(WekaDataStatsUtil.getFirstDateAttributeIdx(ds)).getDateFormat(); newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes()); final int newfirstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); for (int i = 0; i < newds.numInstances(); i++) { final Instance inst = newds.instance(i); inst.setValue(newfirstDateIdx, newds.instance(i).value(newds.numAttributes() - 2)); } /* sort by date ! */ newds.sort(newfirstDateIdx); /* remove discretized time */ final Set<String> toRemove = new HashSet<String>(); for (int i = 0; i < newds.numAttributes(); i++) { if (newds.attribute(i).name().startsWith("t_")) toRemove.add(newds.attribute(i).name()); } for (final String tr : toRemove) newds.deleteAttributeAt(newds.attribute(tr).index()); /* delete the fake attribute time */ newds.deleteAttributeAt(newds.numAttributes() - 2); return newds; }