List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java
License:Open Source License
private static int getCountOfFictiveGaps(final Instances newkdb) { final Set<String> set = new HashSet<String>(); for (int i = 0; i < newkdb.numInstances(); i++) { final String key = newkdb.instance(i).stringValue(newkdb.attribute("serieName").index()) + "-" + newkdb.instance(i).value(newkdb.attribute("gapSize").index()) + "-" + newkdb.instance(i).value(newkdb.attribute("gapPosition").index()); set.add(key);//from ww w . j a v a 2s. c om } return set.size(); }
From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java
License:Open Source License
/** * Main method.//from ww w.j ava 2 s. c o m * @param args command line arguments */ public static void main(final String[] args) { try { HydroRunner.init(false); Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB()); System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb)); System.out.println(newkdb.toSummaryString()); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useDownstream").index(), "false"); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useUpstream").index(), "false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false"); //System.out.println(newkdb.toSummaryString()); Instances withGoodNashSutcliffe = new Instances(newkdb, 0); for (int i = 0; i < newkdb.numInstances(); i++) { if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) { withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray())); } } System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances()); final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe) / getCountOfFictiveGaps(newkdb); System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> " + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)"); WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe, new File("./withGoodNashSutcliffe.arff")); } catch (final Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java
License:Open Source License
private ChartPanel buildMapPanel(final Instances dataSet, final int xidx, final int yidx, final boolean withLegend) { final XYSeriesCollection data = new XYSeriesCollection(); final Map<Integer, java.util.List<Instance>> filteredInstances = new HashMap<Integer, java.util.List<Instance>>(); final int classIndex = dataSet.classIndex(); if (classIndex < 0) { final XYSeries series = new XYSeries("Serie", false); for (int i = 0; i < dataSet.numInstances(); i++) { series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx)); }// w w w . j a v a 2 s. co m data.addSeries(series); } else { final Set<String> pvs = new TreeSet<String>( WekaDataStatsUtil.getPresentValuesForNominalAttribute(dataSet, classIndex)); int p = 0; for (final String pv : pvs) { final XYSeries series = new XYSeries(pv, false); for (int i = 0; i < dataSet.numInstances(); i++) { if (dataSet.instance(i).stringValue(classIndex).equals(pv)) { if (!filteredInstances.containsKey(p)) { filteredInstances.put(p, new ArrayList<Instance>()); } filteredInstances.get(p).add(dataSet.instance(i)); series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx)); } } data.addSeries(series); p++; } } final JFreeChart chart = ChartFactory.createScatterPlot(null, // chart title dataSet.attribute(xidx).name(), // x axis label dataSet.attribute(yidx).name(), // y axis label data, // data PlotOrientation.VERTICAL, withLegend, // include legend true, // tooltips false // urls ); final XYPlot xyPlot = (XYPlot) chart.getPlot(); xyPlot.setBackgroundImage(shapeImage); final XYItemRenderer renderer = xyPlot.getRenderer(); final XYToolTipGenerator gen = new XYToolTipGenerator() { @Override public String generateToolTip(XYDataset dataset, int series, int item) { if (classIndex < 0) { return InstanceFormatter.htmlFormat(dataSet.instance(item), true); } else { return InstanceFormatter.htmlFormat(filteredInstances.get(series).get(item), true); } } }; xyPlot.getRangeAxis().setVisible(false); xyPlot.getDomainAxis().setVisible(false); xyPlot.getRangeAxis().setLowerBound(60000); xyPlot.getRangeAxis().setUpperBound(135000); xyPlot.getDomainAxis().setLowerBound(45000); xyPlot.getDomainAxis().setUpperBound(110000); xyPlot.setDomainGridlinesVisible(false); xyPlot.setRangeGridlinesVisible(false); xyPlot.setBackgroundPaint(Color.white); int nbSeries; if (classIndex < 0) { nbSeries = 1; } else { nbSeries = filteredInstances.keySet().size(); } for (int i = 0; i < nbSeries; i++) { renderer.setSeriesToolTipGenerator(i, gen); } final XYItemLabelGenerator lg = new XYItemLabelGenerator() { @Override public String generateLabel(final XYDataset ds, final int series, final int item) { final Instance iii = filteredInstances.get(series).get(item); if (iii.stringValue(3).equals(SELECTED_STATUS)) { final String label = iii.stringValue(0); return label.substring(0, label.length() - 4); } else return null; } }; xyPlot.getRenderer().setBaseItemLabelGenerator(lg); xyPlot.getRenderer().setBaseItemLabelsVisible(true); xyPlot.getRenderer().setBaseItemLabelFont(new Font("Tahoma", Font.PLAIN, 12)); xyPlot.getRenderer().setSeriesPaint(1, Color.BLUE); xyPlot.getRenderer().setSeriesPaint(0, new Color(210, 210, 210)); xyPlot.getRenderer().setSeriesPaint(2, Color.DARK_GRAY); //System.out.println("shape -> "+xyPlot.getRenderer().getSeriesStroke(0)); final ChartPanel cp = new ChartPanel(chart); cp.setDomainZoomable(false); cp.setRangeZoomable(false); return cp; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
private Instances fillAllGaps(final Instances ds) throws Exception { Instances newds = new Instances(ds); final int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); final String datename = newds.attribute(firstDateIdx).name(); if (firstDateIdx == -1) { throw new Exception("No date attribute in this dataset!"); }//from w w w . jav a 2 s . co m /* add a 'fake numerical' time field */ newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx)); } /* remove the 'true' time field */ newds.deleteAttributeAt(firstDateIdx); /* process the dataset */ newds = fillGaps0(newds); /* re-add the 'true' time field according to the 'fake numerical' time field */ final String df = ds.attribute(firstDateIdx).getDateFormat(); newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(newds.numAttributes() - 2)); } /* delete the 'fake numerical' time field */ newds.deleteAttributeAt(newds.numAttributes() - 2); newds.sort(newds.numAttributes() - 1); return newds; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
private Instances fillAllGapsWithDiscretizedTime(final Instances ds) throws Exception { int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(ds); final String datename = ds.attribute(firstDateIdx).name(); if (firstDateIdx == -1) { throw new Exception("No date attribute in this dataset!"); }//from w ww . java 2 s. co m Instances newds = new Instances(ds); /* add discretized time */ newds = WekaTimeSeriesUtil.buildDataSetWithDiscretizedTime(newds); /* add fake numerical time */ newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes()); for (int i = 0; i < newds.numInstances(); i++) { newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx)); } /* remove 'true' date */ while (firstDateIdx != -1) { newds.deleteAttributeAt(firstDateIdx); firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); } /* transform nominal as binaries */ for (int iidx : WekaDataStatsUtil.getNominalAttributesIndexes(newds)) { newds = WekaDataProcessingUtil.buildDataSetWithNominalAsBinary(newds, iidx); } /* rename attributes for which the name can occur issues in tree evaluation */ for (int k = 0; k < newds.numAttributes(); k++) { String atn = newds.attribute(k).name(); if (atn.contains("=")) atn = atn.replaceAll("=", (int) (Math.random() * 1000) + ""); if (atn.contains("<")) atn = atn.replaceAll("<", (int) (Math.random() * 1000) + ""); if (atn.contains(">")) atn = atn.replaceAll(">", (int) (Math.random() * 1000) + ""); if (atn.contains(".")) atn = atn.replace(".", (int) (Math.random() * 1000) + ""); newds = WekaDataProcessingUtil.renameAttribute(newds, k, atn); } /* replace missing values */ newds = fillGaps0(newds); /* reconstruct date according to discretized time */ final String df = ds.attribute(WekaDataStatsUtil.getFirstDateAttributeIdx(ds)).getDateFormat(); newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes()); final int newfirstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds); for (int i = 0; i < newds.numInstances(); i++) { final Instance inst = newds.instance(i); inst.setValue(newfirstDateIdx, newds.instance(i).value(newds.numAttributes() - 2)); } /* sort by date ! */ newds.sort(newfirstDateIdx); /* remove discretized time */ final Set<String> toRemove = new HashSet<String>(); for (int i = 0; i < newds.numAttributes(); i++) { if (newds.attribute(i).name().startsWith("t_")) toRemove.add(newds.attribute(i).name()); } for (final String tr : toRemove) newds.deleteAttributeAt(newds.attribute(tr).index()); /* delete the fake attribute time */ newds.deleteAttributeAt(newds.numAttributes() - 2); return newds; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
public final double evaluateMAEWithAFictiveGap(final Instances ds, final int begin, final int end, final int idx) throws Exception { /* build a new dataset with a new fictive gap */ final Instances newds = new Instances(ds); for (int i = Math.max(0, begin); i < Math.min(ds.numInstances() - 1, end); i++) { newds.instance(i).setMissing(idx); }// ww w . ja va 2 s.c o m /* fill the gap */ final Instances predictedds = fillGaps(newds); /* compute the MAE ;-) */ return mae(ds, predictedds, idx, begin, end); }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
public final double evaluateRMSEWithAFictiveGap(final Instances ds, final int begin, final int end, final int idx) throws Exception { /* build a new dataset with a new fictive gap */ final Instances newds = new Instances(ds); for (int i = Math.max(0, begin); i < Math.min(ds.numInstances() - 1, end); i++) { newds.instance(i).setMissing(idx); }/*from w w w .j a v a2 s . c o m*/ /* fill the gap */ final Instances predictedds = fillGaps(newds); /* compute the RMSE ;-) */ return rmse(ds, predictedds, idx, begin, end); }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
public final double evaluateNSWithAFictiveGap(final Instances ds, final int begin, final int end, final int idx) throws Exception { /* build a new dataset with a new fictive gap */ final Instances newds = new Instances(ds); for (int i = Math.max(0, begin); i < Math.min(ds.numInstances() - 1, end); i++) { newds.instance(i).setMissing(idx); }/*from w ww .jav a 2 s.c om*/ /* fill the gap */ final Instances predictedds = fillGaps(newds); /* compute the NS ;-) */ return nashSutcliffe(ds, predictedds, idx, begin, end); }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
/** * TODO: refactor it in order to use MathsUtil *//*from w w w . j a va2 s . c o m*/ public static double rmse(final Instances expected, final Instances predicted, final int idx, final int begin, final int end) { int trueN = 0; double rmse = 0d; for (int i = Math.max(0, begin); i < Math.min(expected.numInstances() - 1, end); i++) { if (expected.instance(i).hasMissingValue()) continue; final double diff = expected.instance(i).value(idx) - predicted.instance(i).value(idx); rmse += diff * diff; trueN++; } rmse = Math.sqrt(rmse / trueN); return rmse; }
From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java
License:Open Source License
/** * TODO: refactor it in order to use MathsUtil *//*from ww w . jav a2 s. c o m*/ public static double mae(final Instances expected, final Instances predicted, final int idx, final int begin, final int end) { int trueN = 0; double sumErr = 0d; for (int i = Math.max(0, begin); i < Math.min(expected.numInstances() - 1, end); i++) { if (expected.instance(i).hasMissingValue()) continue; final double diff = Math.abs(expected.instance(i).value(idx) - predicted.instance(i).value(idx)); sumErr += diff; trueN++; } return sumErr / trueN; }