Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java

License:Open Source License

private static int getCountOfFictiveGaps(final Instances newkdb) {
    final Set<String> set = new HashSet<String>();
    for (int i = 0; i < newkdb.numInstances(); i++) {
        final String key = newkdb.instance(i).stringValue(newkdb.attribute("serieName").index()) + "-"
                + newkdb.instance(i).value(newkdb.attribute("gapSize").index()) + "-"
                + newkdb.instance(i).value(newkdb.attribute("gapPosition").index());
        set.add(key);//from  ww  w .  j a v a  2s.  c om
    }
    return set.size();
}

From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java

License:Open Source License

/**
 * Main method.//from  ww w.j  ava 2 s. c o  m
 * @param args command line arguments
 */
public static void main(final String[] args) {
    try {
        HydroRunner.init(false);

        Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB());

        System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb));

        System.out.println(newkdb.toSummaryString());

        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useDownstream").index(), "false");
        newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,
                newkdb.attribute("useUpstream").index(), "false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false");
        //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false");

        //System.out.println(newkdb.toSummaryString());

        Instances withGoodNashSutcliffe = new Instances(newkdb, 0);
        for (int i = 0; i < newkdb.numInstances(); i++) {
            if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) {
                withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray()));
            }
        }

        System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances());

        final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe)
                / getCountOfFictiveGaps(newkdb);
        System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> "
                + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)");

        WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe,
                new File("./withGoodNashSutcliffe.arff"));
    } catch (final Exception e) {
        e.printStackTrace();
    }
}

From source file:lu.lippmann.cdb.ext.hydviga.data.StationsDataProvider.java

License:Open Source License

private ChartPanel buildMapPanel(final Instances dataSet, final int xidx, final int yidx,
        final boolean withLegend) {
    final XYSeriesCollection data = new XYSeriesCollection();
    final Map<Integer, java.util.List<Instance>> filteredInstances = new HashMap<Integer, java.util.List<Instance>>();
    final int classIndex = dataSet.classIndex();
    if (classIndex < 0) {
        final XYSeries series = new XYSeries("Serie", false);
        for (int i = 0; i < dataSet.numInstances(); i++) {
            series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx));
        }//  w w  w  . j a  v a 2 s. co m
        data.addSeries(series);
    } else {
        final Set<String> pvs = new TreeSet<String>(
                WekaDataStatsUtil.getPresentValuesForNominalAttribute(dataSet, classIndex));
        int p = 0;
        for (final String pv : pvs) {
            final XYSeries series = new XYSeries(pv, false);
            for (int i = 0; i < dataSet.numInstances(); i++) {
                if (dataSet.instance(i).stringValue(classIndex).equals(pv)) {
                    if (!filteredInstances.containsKey(p)) {
                        filteredInstances.put(p, new ArrayList<Instance>());
                    }
                    filteredInstances.get(p).add(dataSet.instance(i));

                    series.add(dataSet.instance(i).value(xidx), dataSet.instance(i).value(yidx));
                }
            }
            data.addSeries(series);

            p++;
        }

    }

    final JFreeChart chart = ChartFactory.createScatterPlot(null, // chart title
            dataSet.attribute(xidx).name(), // x axis label
            dataSet.attribute(yidx).name(), // y axis label
            data, // data
            PlotOrientation.VERTICAL, withLegend, // include legend
            true, // tooltips
            false // urls
    );

    final XYPlot xyPlot = (XYPlot) chart.getPlot();
    xyPlot.setBackgroundImage(shapeImage);

    final XYItemRenderer renderer = xyPlot.getRenderer();
    final XYToolTipGenerator gen = new XYToolTipGenerator() {
        @Override
        public String generateToolTip(XYDataset dataset, int series, int item) {
            if (classIndex < 0) {
                return InstanceFormatter.htmlFormat(dataSet.instance(item), true);
            } else {
                return InstanceFormatter.htmlFormat(filteredInstances.get(series).get(item), true);
            }
        }
    };

    xyPlot.getRangeAxis().setVisible(false);
    xyPlot.getDomainAxis().setVisible(false);

    xyPlot.getRangeAxis().setLowerBound(60000);
    xyPlot.getRangeAxis().setUpperBound(135000);
    xyPlot.getDomainAxis().setLowerBound(45000);
    xyPlot.getDomainAxis().setUpperBound(110000);

    xyPlot.setDomainGridlinesVisible(false);
    xyPlot.setRangeGridlinesVisible(false);

    xyPlot.setBackgroundPaint(Color.white);

    int nbSeries;
    if (classIndex < 0) {
        nbSeries = 1;
    } else {
        nbSeries = filteredInstances.keySet().size();
    }

    for (int i = 0; i < nbSeries; i++) {
        renderer.setSeriesToolTipGenerator(i, gen);
    }

    final XYItemLabelGenerator lg = new XYItemLabelGenerator() {
        @Override
        public String generateLabel(final XYDataset ds, final int series, final int item) {
            final Instance iii = filteredInstances.get(series).get(item);
            if (iii.stringValue(3).equals(SELECTED_STATUS)) {
                final String label = iii.stringValue(0);
                return label.substring(0, label.length() - 4);
            } else
                return null;
        }
    };
    xyPlot.getRenderer().setBaseItemLabelGenerator(lg);
    xyPlot.getRenderer().setBaseItemLabelsVisible(true);
    xyPlot.getRenderer().setBaseItemLabelFont(new Font("Tahoma", Font.PLAIN, 12));

    xyPlot.getRenderer().setSeriesPaint(1, Color.BLUE);
    xyPlot.getRenderer().setSeriesPaint(0, new Color(210, 210, 210));
    xyPlot.getRenderer().setSeriesPaint(2, Color.DARK_GRAY);

    //System.out.println("shape -> "+xyPlot.getRenderer().getSeriesStroke(0));

    final ChartPanel cp = new ChartPanel(chart);
    cp.setDomainZoomable(false);
    cp.setRangeZoomable(false);

    return cp;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

private Instances fillAllGaps(final Instances ds) throws Exception {
    Instances newds = new Instances(ds);

    final int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds);
    final String datename = newds.attribute(firstDateIdx).name();
    if (firstDateIdx == -1) {
        throw new Exception("No date attribute in this dataset!");
    }//from  w  w  w  .  jav a  2 s .  co  m

    /* add a 'fake numerical' time field */
    newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx));
    }

    /* remove the 'true' time field */
    newds.deleteAttributeAt(firstDateIdx);

    /* process the dataset */
    newds = fillGaps0(newds);

    /* re-add the 'true' time field according to the 'fake numerical' time field */
    final String df = ds.attribute(firstDateIdx).getDateFormat();
    newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1,
                newds.instance(i).value(newds.numAttributes() - 2));
    }

    /* delete the 'fake numerical' time field */
    newds.deleteAttributeAt(newds.numAttributes() - 2);

    newds.sort(newds.numAttributes() - 1);

    return newds;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

private Instances fillAllGapsWithDiscretizedTime(final Instances ds) throws Exception {
    int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(ds);
    final String datename = ds.attribute(firstDateIdx).name();
    if (firstDateIdx == -1) {
        throw new Exception("No date attribute in this dataset!");
    }//from   w ww  .  java 2 s. co m

    Instances newds = new Instances(ds);

    /* add discretized time */
    newds = WekaTimeSeriesUtil.buildDataSetWithDiscretizedTime(newds);

    /* add fake numerical time */
    newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx));
    }

    /* remove 'true' date */
    while (firstDateIdx != -1) {
        newds.deleteAttributeAt(firstDateIdx);
        firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds);
    }

    /* transform nominal as binaries */
    for (int iidx : WekaDataStatsUtil.getNominalAttributesIndexes(newds)) {
        newds = WekaDataProcessingUtil.buildDataSetWithNominalAsBinary(newds, iidx);
    }

    /* rename attributes for which the name can occur issues in tree evaluation */
    for (int k = 0; k < newds.numAttributes(); k++) {
        String atn = newds.attribute(k).name();
        if (atn.contains("="))
            atn = atn.replaceAll("=", (int) (Math.random() * 1000) + "");
        if (atn.contains("<"))
            atn = atn.replaceAll("<", (int) (Math.random() * 1000) + "");
        if (atn.contains(">"))
            atn = atn.replaceAll(">", (int) (Math.random() * 1000) + "");
        if (atn.contains("."))
            atn = atn.replace(".", (int) (Math.random() * 1000) + "");
        newds = WekaDataProcessingUtil.renameAttribute(newds, k, atn);
    }

    /* replace missing values */
    newds = fillGaps0(newds);

    /* reconstruct date according to discretized time */
    final String df = ds.attribute(WekaDataStatsUtil.getFirstDateAttributeIdx(ds)).getDateFormat();
    newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes());
    final int newfirstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds);
    for (int i = 0; i < newds.numInstances(); i++) {
        final Instance inst = newds.instance(i);
        inst.setValue(newfirstDateIdx, newds.instance(i).value(newds.numAttributes() - 2));
    }

    /* sort by date ! */
    newds.sort(newfirstDateIdx);

    /* remove discretized time */
    final Set<String> toRemove = new HashSet<String>();
    for (int i = 0; i < newds.numAttributes(); i++) {
        if (newds.attribute(i).name().startsWith("t_"))
            toRemove.add(newds.attribute(i).name());
    }
    for (final String tr : toRemove)
        newds.deleteAttributeAt(newds.attribute(tr).index());

    /* delete the fake attribute time */
    newds.deleteAttributeAt(newds.numAttributes() - 2);

    return newds;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

public final double evaluateMAEWithAFictiveGap(final Instances ds, final int begin, final int end,
        final int idx) throws Exception {
    /* build a new dataset with a new fictive gap */
    final Instances newds = new Instances(ds);
    for (int i = Math.max(0, begin); i < Math.min(ds.numInstances() - 1, end); i++) {
        newds.instance(i).setMissing(idx);
    }//  ww w .  ja va 2 s.c o m

    /* fill the gap */
    final Instances predictedds = fillGaps(newds);

    /* compute the MAE ;-) */
    return mae(ds, predictedds, idx, begin, end);
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

public final double evaluateRMSEWithAFictiveGap(final Instances ds, final int begin, final int end,
        final int idx) throws Exception {
    /* build a new dataset with a new fictive gap */
    final Instances newds = new Instances(ds);
    for (int i = Math.max(0, begin); i < Math.min(ds.numInstances() - 1, end); i++) {
        newds.instance(i).setMissing(idx);
    }/*from  w w  w .j a v  a2 s .  c  o  m*/

    /* fill the gap */
    final Instances predictedds = fillGaps(newds);

    /* compute the RMSE ;-) */
    return rmse(ds, predictedds, idx, begin, end);
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

public final double evaluateNSWithAFictiveGap(final Instances ds, final int begin, final int end, final int idx)
        throws Exception {
    /* build a new dataset with a new fictive gap */
    final Instances newds = new Instances(ds);
    for (int i = Math.max(0, begin); i < Math.min(ds.numInstances() - 1, end); i++) {
        newds.instance(i).setMissing(idx);
    }/*from   w ww .jav  a  2  s.c  om*/

    /* fill the gap */
    final Instances predictedds = fillGaps(newds);

    /* compute the NS ;-) */
    return nashSutcliffe(ds, predictedds, idx, begin, end);
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

/**
 * TODO: refactor it in order to use MathsUtil
 *//*from   w w  w .  j a va2 s . c o  m*/
public static double rmse(final Instances expected, final Instances predicted, final int idx, final int begin,
        final int end) {
    int trueN = 0;
    double rmse = 0d;
    for (int i = Math.max(0, begin); i < Math.min(expected.numInstances() - 1, end); i++) {
        if (expected.instance(i).hasMissingValue())
            continue;
        final double diff = expected.instance(i).value(idx) - predicted.instance(i).value(idx);
        rmse += diff * diff;
        trueN++;
    }
    rmse = Math.sqrt(rmse / trueN);
    return rmse;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

/**
 * TODO: refactor it in order to use MathsUtil
 *//*from  ww w . jav a2  s.  c  o m*/
public static double mae(final Instances expected, final Instances predicted, final int idx, final int begin,
        final int end) {
    int trueN = 0;
    double sumErr = 0d;
    for (int i = Math.max(0, begin); i < Math.min(expected.numInstances() - 1, end); i++) {
        if (expected.instance(i).hasMissingValue())
            continue;
        final double diff = Math.abs(expected.instance(i).value(idx) - predicted.instance(i).value(idx));
        sumErr += diff;
        trueN++;
    }
    return sumErr / trueN;
}