Java tutorial
/* * Copyright (c) 2010 Pentaho Corporation. All rights reserved. * This software was developed by Pentaho Corporation and is provided under the terms * of the GNU Lesser General Public License, Version 2.1. You may not use * this file except in compliance with the license. If you need a copy of the license, * please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Time Series * Forecasting. The Initial Developer is Pentaho Corporation. * * Software distributed under the GNU Lesser Public License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. Please refer to * the license for the specific language governing your rights and limitations. */ /* * TSEvaluation.java * Copyright (C) 2010 Pentaho Corporation */ package weka.classifiers.timeseries.eval; import java.beans.BeanInfo; import java.beans.Introspector; import java.beans.MethodDescriptor; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.PrintStream; import java.lang.reflect.Method; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import javax.swing.JPanel; import weka.classifiers.evaluation.NumericPrediction; import weka.classifiers.timeseries.AbstractForecaster; import weka.classifiers.timeseries.TSForecaster; import weka.classifiers.timeseries.core.OverlayForecaster; import weka.classifiers.timeseries.core.TSLagMaker; import weka.classifiers.timeseries.core.TSLagUser; import weka.classifiers.timeseries.eval.graph.GraphDriver; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.Utils; import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; /** * Main evaluation routines for time series forecasting models. Also provides a * command line interface for building and evaluating time series models. * * @author Mark Hall (mhall{[at]}pentaho{[dot]}com) * @version $Revision: 51352 $ */ public class TSEvaluation { public ArrayList<Double> anomaliesValuesPerParameter = new ArrayList<Double>(); public ArrayList<Integer> anomaliesIndesPerParameter = new ArrayList<Integer>(); public ArrayList<Double> anomaliesMapePerParameter = new ArrayList<Double>(); public List<NumericPrediction> preds = new ArrayList<NumericPrediction>(); public List<Double> MAPEperPred = new ArrayList<Double>(); public DescriptiveStatistics MAPEstats = new DescriptiveStatistics(); public double mape; public double std; public double median; public double treshold; public double upperPercentile; public int trainSize = 0; private boolean readTreshsFromFile = false; public void SetReadTreaholdFromFileOrGetFromCalibFunc(boolean readTreshFromFile) { readTreshsFromFile = readTreshFromFile; } public void SetTreahold(double tresh) { treshold = tresh; } public double CalcOutlierTreshold(DescriptiveStatistics MAPEstats) throws Exception { double tresh = 0; mape = MAPEstats.getMean();//result[i]; std = MAPEstats.getStandardDeviation(); median = MAPEstats.getPercentile(50); upperPercentile = MAPEstats.getPercentile(85); if (readTreshsFromFile == false) { if (mape < 1) { if (std < 5) { tresh = upperPercentile + 3 * std + mape + median; ; } else if ((std >= 5) && (std < 10)) { tresh = upperPercentile + 1.5 * std + mape + median; } else if ((std >= 10)) { tresh = upperPercentile + 1 * std + mape + median; } } else if ((mape >= 1) && (mape < 2)) { if (std < 5) { tresh = upperPercentile + 3 * std + mape + median; ; } else if ((std >= 5) && (std < 10)) { tresh = upperPercentile + 2.5 * std + mape + median; } else if ((std >= 10)) { tresh = upperPercentile + 1 * std + mape + median; } } else if ((mape >= 2) && (mape < 5)) { if (std < 5) { tresh = 2 * upperPercentile + std + mape + 1.25 * median; ; } else if ((std >= 5) && (std < 10)) { tresh = 1 * upperPercentile + std + mape + 2.75 * median; } else if ((std >= 10)) { tresh = 0.5 * upperPercentile + std + mape + median; } } else if ((mape >= 5) && (mape < 10)) { if (std < 5) { tresh = 1.5 * upperPercentile + std + mape + 1.25 * median; } else if ((std >= 5) && (std < 7.5)) { tresh = 0.85 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 7.5) && (std < 10)) { tresh = 0.85 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 10)) { tresh = 0.85 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 10) && (mape < 15)) { if (std < 5) { tresh = 0.75 * upperPercentile + std + mape + 2 * median; } else if ((std >= 5) && (std < 10)) { tresh = 0.5 * upperPercentile + std + mape + 1.65 * median; } else if ((std >= 10) && (std < 15)) { tresh = 2 * upperPercentile + std + mape + 2.5 * median; } else if ((std >= 15) && (std < 20)) { tresh = 2 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 0.5 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 15) && (mape < 20)) { if (std < 10) { tresh = 1 * upperPercentile + std + mape + 2 * median; } else if ((std >= 10) && (std < 15)) { tresh = 1 * upperPercentile + std + mape + 3 * median; } else if ((std >= 15) && (std < 20)) { tresh = 0.5 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 1 * upperPercentile + std + mape + 2 * median; } } else if ((mape >= 20) && (mape < 30)) { if (std < 10) { tresh = 1 * upperPercentile + std + mape + 2 * median; } else if ((std >= 10) && (std < 20)) { tresh = 1.25 * upperPercentile + std + mape + 2 * median; } else if ((std >= 20)) { tresh = 1 * upperPercentile + std + mape + 2 * median; } //if (tresh>(3.5*mape)) // tresh=3.5*mape; } else { if (std < 10) { tresh = 1 * upperPercentile + std + mape + median; } else if ((std >= 10) && (std < 20)) { tresh = 1 * upperPercentile + std + mape + median; } else if ((std >= 20)) { tresh = 0.5 * upperPercentile + std + mape + median; } //if (tresh>(3*mape)) // tresh=3*mape; } } else { tresh = treshold; } return tresh; /* if (upperPercentile>25) factor=3; else if ((upperPercentile>15)&&(upperPercentile>25)) factor=2; else{ factor=1.5; } if (mape<15){ if ((mape<4)&&(median<=2)&&(std>7)){ tresh= 4*factor*upperPercentile+2*std+mape+median;//+ } else if ((median<4.0)&&(std<=7)){ tresh= factor*upperPercentile+std+mape+median;//+ } else if ((mape<10)&&(std>12)){ tresh= factor*upperPercentile+mape+std; } else if ((mape<10)){ tresh= upperPercentile+std+2*median;//factor*upperPercentile;//+2*median } else { tresh =factor*upperPercentile+mape;//2.5*(std+mape);//+median if (std>=10) tresh=tresh+std; } if (tresh<1) tresh=2*tresh; } else{ tresh =upperPercentile+2*(mape+median)+(std/2); if (tresh>(3.5*mape)) tresh=3.5*mape; } */ // return tresh; } public void OutlierDetection(List<Double> MAPEperPred, List<NumericPrediction> preds) throws Exception { int cnt = 0; int lastInd = 0; treshold = CalcOutlierTreshold(MAPEstats); for (NumericPrediction p : preds) { if (!Utils.isMissingValue(p.error()) && Math.abs(p.actual()) > 0) { if (cnt < MAPEperPred.size()) { if (MAPEperPred.get(cnt) > (treshold)) { //if (cnt!=(lastInd+1)){//in order to remove sequential anomalies anomaliesValuesPerParameter.add(p.actual()); anomaliesIndesPerParameter.add(cnt); anomaliesMapePerParameter.add(MAPEperPred.get(cnt)); lastInd = cnt; //} } } } cnt = cnt + 1; } } public ArrayList<Double> GetAnomaliesValues() throws Exception { return anomaliesValuesPerParameter; } public ArrayList<Integer> GetAnomaliesIndex() throws Exception { return anomaliesIndesPerParameter; } public ArrayList<Double> GetAnomaliesMape() throws Exception { return anomaliesMapePerParameter; } public Double Get_std_Measure() throws Exception { int truncateStd = (int) (std * 100); std = ((double) truncateStd) / 100; return std; } public Double Get_median_Measure() throws Exception { int truncateMedian = (int) (median * 100); median = ((double) truncateMedian) / 100; return median; } public Double Get_treshold_Measure() throws Exception { int truncateTreshold = (int) (treshold * 100); treshold = ((double) truncateTreshold) / 100; return treshold; } public Double Get_MAPE_Measure() throws Exception { int truncateMape = (int) (mape * 100); mape = ((double) truncateMape) / 100; return mape; } public Double Get_upperPercentile_Measure() throws Exception { int truncateUpperPercentile = (int) (upperPercentile * 100); upperPercentile = ((double) truncateUpperPercentile) / 100; return upperPercentile; } public void CalculateAnomalies() throws Exception { preds = null; MAPEperPred = null; MAPEstats = null; StringBuffer result = new StringBuffer(); int maxWidth = "10-steps-ahead".length() + 1; int maxWidthForLeftCol = "Target".length(); List<String> targetFieldNames = null; // update max width with respect to metrics for training data (if any) if (m_evaluateTrainingData) { Set<String> keys = m_metricsForTrainingData.keySet(); for (String key : keys) { List<TSEvalModule> evalsForKey = m_metricsForTrainingData.get(key); for (TSEvalModule e : evalsForKey) { if (targetFieldNames == null) { targetFieldNames = e.getTargetFields(); } double[] metricsForTargets = e.calculateMeasure(); preds = e.preds; MAPEperPred = e.MAPEperPred; MAPEstats = e.MAPEstats; // treshold=CalcOutlierTreshold(MAPEstats); for (double m : metricsForTargets) { maxWidth = maxWidth(maxWidth, m); } } } } // update max width with respect to metrics for test data (if any) if (m_evaluateTestData) { Set<String> keys = m_metricsForTestData.keySet(); for (String key : keys) { List<TSEvalModule> evalsForKey = m_metricsForTestData.get(key); for (TSEvalModule e : evalsForKey) { if (targetFieldNames == null) { targetFieldNames = e.getTargetFields(); } double[] metricsForTargets = e.calculateMeasure(); preds = e.preds; MAPEperPred = e.MAPEperPred; MAPEstats = e.MAPEstats; // treshold=CalcOutlierTreshold(MAPEstats); OutlierDetection(MAPEperPred, preds); for (double m : metricsForTargets) { maxWidth = maxWidth(maxWidth, m); } } } } } /** * Number of time steps to forecast out for in each forecasting iteration * <-horizon> */ protected int m_horizon = 1; /** * The number of instances to prime the forecaster with prior to each * forecasting iteration. <-p> */ protected int m_primeWindowSize = 1; /** * In the case where there is both training and test data available, setting * this to true will prime the forecaster with the first primeWindowSize * instances of the test data (instead of the last primeWindowSize instances * of the training data) before predicting for the test data. This should be * set to true if the test data does not follow immediately in time from the * training data. <-primeWithTest> */ protected boolean m_primeForTestDataWithTestData = false; /** * If true, then the model will be rebuilt after each forecasting iteration on * the test data. I.e. if N is the training data and t is the first testing * data point, then then after forecasting t, the model will be * rebuilt/updated to encompass N + t. Otherwise, the model remains static * after N and only lagged inputs will be updated using subsequent test data * points. <-r> */ protected boolean m_rebuildModelAfterEachTestForecastStep = false; /** Forecast future values beyond the end of the series <-f> */ protected boolean m_forecastFuture = true; /** Evaluate on the training data (if supplied) <-a> */ protected boolean m_evaluateTrainingData = true; /** Evaluate on the test data (if supplied) <-b> */ protected boolean m_evaluateTestData = true; /** * A list of errors (and predictions) for the training data. The list is * indexed by step (i.e. the first element holds the errors/predictions that * are one-step-ahead, the second holds those that are two-steps-ahead etc.). */ protected List<ErrorModule> m_predictionsForTrainingData; /** * A list of errors (and predictions) for the test data. The list is indexed * by step (i.e. the first element holds the errors/predictions that are * one-step-ahead, the second holds those that are two-steps-ahead etc.). */ protected List<ErrorModule> m_predictionsForTestData; /** Predictions for time steps beyond the end of the training data */ protected List<List<NumericPrediction>> m_trainingFuture; /** Predictions for time steps beyond the end of the test data */ protected List<List<NumericPrediction>> m_testFuture; /** * A map (keyed by metric name) of metrics for the training data. The value * for a key is a List (indexed by step) of the associated metric. */ protected Map<String, List<TSEvalModule>> m_metricsForTrainingData; /** * A map (keyed by metric name) of metrics for the test data. The value for a * key is a List (indexed by step) of the associated metric. */ protected Map<String, List<TSEvalModule>> m_metricsForTestData; /** The evaluation modules to use */ protected List<TSEvalModule> m_evalModules; /** The training data */ protected Instances m_trainingData; /** The test data */ protected Instances m_testData; protected Instances m_dataStructure; protected List<Integer> m_missingTargetListTestSet; protected List<Integer> m_missingTimeStampListTestSet; protected List<String> m_missingTimeStampTestSetRows; /** * Constructor. * * @param trainingData the training data * @param testSplitSize the number or percentage of instances to hold out from * the end of the training data to be test data. * @throws Exception if a problem occurs. */ public TSEvaluation(Instances trainingData, double testSplitSize) throws Exception { //cng = new INI_Reader(); //cng.ReadFile(); if (trainingData != null) { Instances train = new Instances(trainingData); Instances test = null; if (testSplitSize > 0) { if (trainingData.numInstances() < 2) { throw new Exception("Need at least 2 training instances to do hold out evaluation!"); } int numToHoldOut = 0; trainSize = 0; if (testSplitSize >= 1) { numToHoldOut = (int) testSplitSize; trainSize = trainingData.numInstances() - numToHoldOut; if (trainSize <= 0) { throw new Exception("Can't hold out more instances than there is in the data!"); } } else if (testSplitSize > 0) { double trainP = 1.0 - testSplitSize; trainSize = (int) Math.round(trainingData.numInstances() * trainP); numToHoldOut = trainingData.numInstances() - trainSize; } train = new Instances(trainingData, 0, trainSize); test = new Instances(trainingData, trainSize, numToHoldOut); } else if (testSplitSize < 0) { throw new Exception("Testing holdout size can't be less than zero!"); } setTrainingData(train); setTestData(test); } else { setEvaluateOnTrainingData(false); } // default eval modules setEvaluationModules("MAE,RMSE"); // automatically adds an error module too // setEvaluationModules(""); // automatically adds an error module too } /** * Constructor. * * @param trainingData the training data to use * @param testData the test data to use * @throws Exception if a problem occurs. */ public TSEvaluation(Instances trainingData, Instances testData) throws Exception { // training or test data may be null (but not both) if (trainingData != null && testData != null && !trainingData.equalHeaders(testData)) { throw new Exception("Training and testing data are not compatible!"); } if (trainingData == null && testData == null) { throw new Exception("Can't specify null for both training and test data"); } setTrainingData(trainingData); setTestData(testData); // default eval modules setEvaluationModules("MAE,RMSE"); // automatically adds an error module too // setEvaluationModules(""); // automatically adds an error module too } /** * Set the training data to use * * @param train the training data to use */ public void setTrainingData(Instances train) { if (train == null || train.numInstances() > 0) { m_trainingData = train; if (m_trainingData == null) { m_evaluateTrainingData = false; } } else { m_dataStructure = train; } } /** * Set the test data to use * * @param testData the test data to use */ public void setTestData(Instances testData) { if (testData == null || testData.numInstances() > 0) { m_testData = testData; if (m_testData == null) { m_evaluateTestData = false; } else { m_evaluateTestData = true; } } else { m_dataStructure = testData; m_evaluateTestData = false; } } /** * Get the training data (if any) * * @return the training data or null if none is in use */ public Instances getTrainingData() { return m_trainingData; } /** * Get the test data (if any) * * @return the test data or null if none is in use. */ public Instances getTestData() { return m_testData; } /** * Set whether to perform evaluation on the training data * * @param evalOnTraining true if evaluation is to be performed on the training * data */ public void setEvaluateOnTrainingData(boolean evalOnTraining) { m_evaluateTrainingData = evalOnTraining; } /** * Get whether evaluation is to be performed on the training data * * @return true if evaluation is to be performed on the training data. */ public boolean getEvaluateOnTrainingData() { return m_evaluateTrainingData; } /** * Set whether to perform evaluation on the training data * * @param evalOnTest true if evalution is to be performed on the training data */ public void setEvaluateOnTestData(boolean evalOnTest) { m_evaluateTestData = evalOnTest; } /** * Get whether evaluation is to be performed on the test data. * * @return true if evaluation is to be performed on the test data. */ public boolean getEvaluateOnTestData() { return m_evaluateTestData; } /** * Set the horizon - i.e. the number of steps to forecast into the future. * * @param horizon the number of steps to forecast into the future */ public void setHorizon(int horizon) { m_horizon = horizon; } /** * Set the size of the priming window - i.e. the number of historical * instances to be presented to the forecaster before a forecast is requested * * @param primeSize the number of instances to prime with */ public void setPrimeWindowSize(int primeSize) { m_primeWindowSize = primeSize; } /** * Get the size of the priming window - i.e. the number of historical * instances that will be presented to the forecaster before a forecast is * requested. * * @return the size of the priming window. */ public int getPrimeWindowSize() { return m_primeWindowSize; } /** * Set whether evaluation for test data should begin by priming with the first * x test data instances and then forecasting from step x + 1. This is the * only option if there is no training data and a model has been deserialized * from disk. If we have training data, and it occurs immediately before the * test data in time, then we can prime with the last x instances from the * training data. * * @param p true if we should start evaluation of the test data by priming * with the first instances from the test data. */ public void setPrimeForTestDataWithTestData(boolean p) { m_primeForTestDataWithTestData = p; } /** * Gets whether evaluation for the test data will begin by priming with the * first x instances from the test data and then forecasting from step x + 1. * This is the only option if there is no training data and a model has been * deserialized from disk. If we have training data, and it occurs immediately * before the test data in time, then we can prime with the last x instances * from the training data. * * @return true if evaluation of the test data will begin by priming with the * first x instances from the test data. */ public boolean getPrimeForTestDataWithTestData() { return m_primeForTestDataWithTestData; } /** * Set whether the forecasting model should be rebuilt after each forecasting * step on the test data using both the training data and test data up to the * current instance. * * @param r true if the forecasting model should be rebuilt after each * forecasting step on the test data to take into account all data up * to the current point in time */ public void setRebuildModelAfterEachTestForecastStep(boolean r) { m_rebuildModelAfterEachTestForecastStep = r; } /** * Set whether we should generate a future forecast beyond the end of the * training and/or test data. * * @param future true if future forecasts beyond the end of training/test data * should be generated. */ public void setForecastFuture(boolean future) { m_forecastFuture = future; } /** * Get whether future forecasts beyond the end of the training and/or test * data will be generated. * * @return true if future forecasts will be generated. */ public boolean getForecastFuture() { return m_forecastFuture; } /** * Set the evaluation modules to use/ * * @param evalModNames a comma-separated list of evaluation module names. * @throws Exception if there are unknown evaluation modules requested. */ public void setEvaluationModules(String evalModNames) throws Exception { String[] names = evalModNames.split(","); m_evalModules = new ArrayList<TSEvalModule>(); // we always want an error module m_evalModules.add(new ErrorModule()); for (String modName : names) { if (modName.length() > 0) { TSEvalModule mod = TSEvalModule.getModule(modName.trim()); if (!mod.equals("Error")) { m_evalModules.add(mod); } } } } /* * public void setEvaluationModules(List<TSEvalModule> modules) { // check for * an error module for (TSEvalModule m : modules) { * * } * * m_evalModules = modules; } */ /** * Get the evaluation modules in use * * @return a list of the evaluation modules in use. */ public List<TSEvalModule> getEvaluationModules() { return m_evalModules; } /** * Get predictions for all targets for the specified step number on the * training data * * @param stepNumber number of the step into the future to return predictions * for * @return the stepNumber step ahead predictions for all targets * @throws Exception if there are no predictions available for the training * data */ public ErrorModule getPredictionsForTrainingData(int stepNumber) throws Exception { if (m_predictionsForTrainingData == null) { throw new Exception("No predictions for training data available!"); } int numSteps = m_predictionsForTrainingData.size(); if (stepNumber > m_predictionsForTrainingData.size()) { throw new Exception("Only predictions up to " + numSteps + (numSteps > 1 ? "steps" : "step") + "-ahead are available"); } ErrorModule m = m_predictionsForTrainingData.get(stepNumber - 1); return m; } /** * Get predictions for all targets for the specified step number on the test * data * * @param stepNumber number of the step into the future to return predictions * for * @return the stepNumber step ahead predictions for all targets * @throws Exception if there are no predictions available for the test data */ public ErrorModule getPredictionsForTestData(int stepNumber) throws Exception { if (m_predictionsForTestData == null) { throw new Exception("No predictions for test data available!"); } int numSteps = m_predictionsForTestData.size(); if (stepNumber > m_predictionsForTestData.size()) { throw new Exception("Only predictions up to " + numSteps + (numSteps > 1 ? "steps" : "step") + "-ahead are available"); } ErrorModule m = m_predictionsForTestData.get(stepNumber - 1); return m; } private void setupEvalModules(List<ErrorModule> predHolders, Map<String, List<TSEvalModule>> evalHolders, List<String> fieldsToForecast) { for (int i = 0; i < m_horizon; i++) { ErrorModule e = new ErrorModule(); e.setTargetFields(fieldsToForecast); predHolders.add(e); } for (TSEvalModule m : m_evalModules) { if (!(m.getEvalName().equals("Error"))) { String key = m.getEvalName(); List<TSEvalModule> evalForSteps = new ArrayList<TSEvalModule>(); TSEvalModule firstMod = null; for (int i = 0; i < m_horizon; i++) { TSEvalModule newMod = TSEvalModule.getModule(key); newMod.setTargetFields(fieldsToForecast); if (i == 0) { firstMod = newMod; } else { if (newMod.getEvalName().equals("RRSE")) { // make relative to the one step ahead RRSE module ((RRSEModule) newMod).setRelativeRRSEModule((RRSEModule) firstMod); } else if (newMod.getEvalName().equals("RAE")) { ((RAEModule) newMod).setRelativeRAEModule((RAEModule) firstMod); } } evalForSteps.add(newMod); } evalHolders.put(key, evalForSteps); } } } private void updateEvalModules(List<ErrorModule> predHolders, Map<String, List<TSEvalModule>> evalHolders, List<List<NumericPrediction>> predsForSteps, int currentInstanceNum, Instances toPredict) throws Exception { // errors first for (int i = 0; i < m_horizon; i++) { // when using overlay data there will only be as many predictions as there // are // instances to predict if (i < predsForSteps.size()) { List<NumericPrediction> predsForStepI = predsForSteps.get(i); if (currentInstanceNum + i < toPredict.numInstances()) { predHolders.get(i).evaluateForInstance(predsForStepI, toPredict.instance(currentInstanceNum + i)); } else { predHolders.get(i).evaluateForInstance(predsForStepI, null); } } } // other evaluation metrics for (TSEvalModule m : m_evalModules) { if (!(m.getEvalName().equals("Error"))) { String key = m.getEvalName(); List<TSEvalModule> evalForSteps = evalHolders.get(key); for (int i = 0; i < m_horizon; i++) { if (i < predsForSteps.size()) { List<NumericPrediction> predsForStepI = predsForSteps.get(i); if (currentInstanceNum + i < toPredict.numInstances()) { evalForSteps.get(i).evaluateForInstance(predsForStepI, toPredict.instance(currentInstanceNum + i)); } else { evalForSteps.get(i).evaluateForInstance(predsForStepI, null); } } } } } } /** * Evaluate the supplied forecaster. Trains the forecaster if a training set * has been configured. * * @param forecaster the forecaster to evaluate * @throws Exception if something goes wrong during evaluation */ public void evaluateForecaster(TSForecaster forecaster, PrintStream... progress) throws Exception { evaluateForecaster(forecaster, true, progress); } /** * Creates overlay data to use during evaluation. * * @param forecaster the forecaster being evaluated * @param source the source instances * @param start the index of the first instance to be part of the overlay data * @param numSteps the number of steps to forecast (and hence the number of * instances to make up the overlay data set * @return the overlay instances. */ protected Instances createOverlayForecastData(TSForecaster forecaster, Instances source, int start, int numSteps) { int toCopy = Math.min(numSteps, source.numInstances() - start); Instances overlay = new Instances(source, start, toCopy); // set all targets to missing List<String> fieldsToForecast = AbstractForecaster.stringToList(forecaster.getFieldsToForecast()); for (int i = 0; i < overlay.numInstances(); i++) { Instance current = overlay.instance(i); for (String target : fieldsToForecast) { current.setValue(overlay.attribute(target), Utils.missingValue()); } } return overlay; } /** * Evaluate a forecaster on training and/or test data. * * @param forecaster the forecaster to evaluate * @param buildModel true if the model is to be built (given that there is a * training data set to build it with) * @throws Exception if something goes wrong during evaluation */ public void evaluateForecaster(TSForecaster forecaster, boolean buildModel, PrintStream... progress) throws Exception { m_predictionsForTrainingData = null; m_predictionsForTestData = null; m_trainingFuture = null; m_testFuture = null; // train the forecaster first (if necessary) if (m_trainingData != null && buildModel) { for (PrintStream p : progress) { p.println("Building forecaster..."); } forecaster.buildForecaster(m_trainingData); } // We need to know if the forecaster's lag maker has // inserted any entirely missing rows into the training data if (forecaster instanceof TSLagUser) { // Check training data even if there is no evaluation on the // training data as output for a future forecast beyond the // end of the training data will require updated training data // too TSLagMaker lagMaker = ((TSLagUser) forecaster).getTSLagMaker(); Instances trainMod = new Instances(m_trainingData); trainMod = weka.classifiers.timeseries.core.Utils.replaceMissing(trainMod, lagMaker.getFieldsToLag(), lagMaker.getTimeStampField(), false, lagMaker.getPeriodicity(), lagMaker.getSkipEntries()); if (trainMod.numInstances() != m_trainingData.numInstances()) { m_trainingData = trainMod; } if (m_evaluateTestData) { m_missingTimeStampTestSetRows = new ArrayList<String>(); m_missingTargetListTestSet = new ArrayList<Integer>(); m_missingTimeStampListTestSet = new ArrayList<Integer>(); Instances testMod = new Instances(m_testData); testMod = weka.classifiers.timeseries.core.Utils.replaceMissing(testMod, lagMaker.getFieldsToLag(), lagMaker.getTimeStampField(), false, lagMaker.getPeriodicity(), lagMaker.getSkipEntries(), m_missingTargetListTestSet, m_missingTimeStampListTestSet, m_missingTimeStampTestSetRows); if (testMod.numInstances() != m_testData.numInstances()) { // m_testData = testMod; //deddy commented this line because replaceMissing() generates more instances and not only interpolate and replace missing instances //TODO :fix this bug or do data preprocess with this functionality } } } if (m_evaluateTrainingData) { for (PrintStream p : progress) { p.println("Evaluating on training set..."); } // set up training set prediction and eval modules m_predictionsForTrainingData = new ArrayList<ErrorModule>(); m_metricsForTrainingData = new HashMap<String, List<TSEvalModule>>(); setupEvalModules(m_predictionsForTrainingData, m_metricsForTrainingData, AbstractForecaster.stringToList(forecaster.getFieldsToForecast())); Instances primeData = new Instances(m_trainingData, 0); if (forecaster instanceof TSLagUser) { // if an artificial time stamp is being used, make sure it is reset for // evaluating the training data if (((TSLagUser) forecaster).getTSLagMaker().isUsingAnArtificialTimeIndex()) { ((TSLagUser) forecaster).getTSLagMaker().setArtificialTimeStartValue(m_primeWindowSize); } } for (int i = 0; i < m_trainingData.numInstances(); i++) { Instance current = m_trainingData.instance(i); if (i < m_primeWindowSize) { primeData.add(current); } else { if (i % 10000 == 0) { for (PrintStream p : progress) { p.println("Evaluating on training set: processed " + i + " instances..."); } } forecaster.primeForecaster(primeData); /* * System.err.println(primeData); System.exit(1); */ List<List<NumericPrediction>> forecast = null; if (forecaster instanceof OverlayForecaster && ((OverlayForecaster) forecaster).isUsingOverlayData()) { // can only generate forecasts for remaining training data that // we can use as overlay data if (current != null) { Instances overlay = createOverlayForecastData(forecaster, m_trainingData, i, m_horizon); forecast = ((OverlayForecaster) forecaster).forecast(m_horizon, overlay, progress); } } else { forecast = forecaster.forecast(m_horizon, progress); } updateEvalModules(m_predictionsForTrainingData, m_metricsForTrainingData, forecast, i, m_trainingData); // remove the oldest prime instance and add this one if (m_primeWindowSize > 0 && current != null) { primeData.remove(0); primeData.add(current); primeData.compactify(); } } } } if (m_trainingData != null && m_forecastFuture /* && !m_evaluateTrainingData */) { // generate a forecast beyond the end of the training data for (PrintStream p : progress) { p.println("Generating future forecast for training data..."); } Instances primeData = new Instances(m_trainingData, m_trainingData.numInstances() - m_primeWindowSize, m_primeWindowSize); forecaster.primeForecaster(primeData); // if overlay data is being used then the only way we can make a forecast // beyond the end of the training data is if we have a held-out test set // to // use for the "future" overlay fields values if (forecaster instanceof OverlayForecaster && ((OverlayForecaster) forecaster).isUsingOverlayData()) { if (m_testData != null) { Instances overlay = createOverlayForecastData(forecaster, m_testData, 0, m_horizon); m_trainingFuture = ((OverlayForecaster) forecaster).forecast(m_horizon, overlay, progress); } else { // print an error message for (PrintStream p : progress) { p.println("WARNING: Unable to generate a future forecast beyond the end " + "of the training data because there is no future overlay " + "data available."); } } } else { m_trainingFuture = forecaster.forecast(m_horizon); } } if (m_evaluateTestData) { for (PrintStream p : progress) { p.println("Evaluating on test set..."); } // set up training set prediction and eval modules m_predictionsForTestData = new ArrayList<ErrorModule>(); m_metricsForTestData = new HashMap<String, List<TSEvalModule>>(); setupEvalModules(m_predictionsForTestData, m_metricsForTestData, AbstractForecaster.stringToList(forecaster.getFieldsToForecast())); Instances primeData = null; Instances rebuildData = null; if (m_trainingData != null) { primeData = new Instances(m_trainingData, 0); if (forecaster instanceof TSLagUser) { // initialize the artificial time stamp value (if in use) if (((TSLagUser) forecaster).getTSLagMaker().isUsingAnArtificialTimeIndex()) { ((TSLagUser) forecaster).getTSLagMaker() .setArtificialTimeStartValue(m_trainingData.numInstances()); } } if (m_rebuildModelAfterEachTestForecastStep) { rebuildData = new Instances(m_trainingData); } } else { primeData = new Instances(m_testData, 0); } int predictionOffsetForTestData = 0; if (m_trainingData == null || m_primeForTestDataWithTestData) { if (m_primeWindowSize >= m_testData.numInstances()) { throw new Exception("The test data needs to have at least as many " + "instances as the the priming window size!"); } predictionOffsetForTestData = m_primeWindowSize; if (predictionOffsetForTestData >= m_testData.numInstances()) { throw new Exception("Priming using test data requires more instances " + "than are available in the test data!"); } primeData = new Instances(m_testData, 0, m_primeWindowSize); if (forecaster instanceof TSLagUser) { if (((TSLagUser) forecaster).getTSLagMaker().isUsingAnArtificialTimeIndex()) { double artificialTimeStampStart = 0; if (m_primeForTestDataWithTestData) { if (m_trainingData == null) { artificialTimeStampStart = ((TSLagUser) forecaster).getTSLagMaker() .getArtificialTimeStartValue(); artificialTimeStampStart += m_primeWindowSize; ((TSLagUser) forecaster).getTSLagMaker() .setArtificialTimeStartValue(artificialTimeStampStart); } else { ((TSLagUser) forecaster).getTSLagMaker().setArtificialTimeStartValue( m_trainingData.numInstances() + m_primeWindowSize); } } } } } else { // use the last primeWindowSize instances from the training // data to prime with predictionOffsetForTestData = 0; primeData = new Instances(m_trainingData, (m_trainingData.numInstances() - m_primeWindowSize), m_primeWindowSize); } for (int i = predictionOffsetForTestData; i < m_testData.numInstances(); i++) { Instance current = m_testData.instance(i); if (m_primeWindowSize > 0) { forecaster.primeForecaster(primeData); } if (i % 10000 == 0) { for (PrintStream p : progress) { p.println("Evaluating on test set: processed " + i + " instances..."); } } List<List<NumericPrediction>> forecast = null; if (forecaster instanceof OverlayForecaster && ((OverlayForecaster) forecaster).isUsingOverlayData()) { // can only generate forecasts for remaining test data that // we can use as overlay data if (current != null) { Instances overlay = createOverlayForecastData(forecaster, m_testData, i, m_horizon); forecast = ((OverlayForecaster) forecaster).forecast(m_horizon, overlay, progress); } } else { forecast = forecaster.forecast(m_horizon, progress); } updateEvalModules(m_predictionsForTestData, m_metricsForTestData, forecast, i, m_testData); // remove the oldest prime instance and add this one if (m_primeWindowSize > 0 && current != null) { primeData.remove(0); primeData.add(current); primeData.compactify(); } // add the current instance to the training data and rebuild the // forecaster (if requested and if there is training data). if (m_rebuildModelAfterEachTestForecastStep && rebuildData != null) { rebuildData.add(current); forecaster.buildForecaster(rebuildData); } } } if (m_testData != null && m_forecastFuture /* && !m_evaluateTestData */) { // generate a forecast beyond the end of the training data // if we have training data but we're not evaluating on // the training data (evaluation gets us the future forecast // as a by-product of evaluating for (PrintStream p : progress) { p.println("Generating future forecast for test data..."); } // If we need more priming data than is available in the test // set then use some of the training data (if available) Instances primeData = null; if (m_primeWindowSize > m_testData.numInstances()) { int difference = m_primeWindowSize - m_testData.numInstances(); if (m_trainingData != null) { if (difference > m_trainingData.numInstances()) { primeData = new Instances(m_trainingData); } else { primeData = new Instances(m_trainingData, m_trainingData.numInstances() - difference, difference); } } // now add all the test data too for (int z = 0; z < m_testData.numInstances(); z++) { primeData.add(m_testData.instance(z)); } } else { primeData = new Instances(m_testData, m_testData.numInstances() - m_primeWindowSize, m_primeWindowSize); } forecaster.primeForecaster(primeData); if (forecaster instanceof OverlayForecaster && ((OverlayForecaster) forecaster).isUsingOverlayData()) { // There is no way to produce a forecast beyond the end of // the test data because we have no future overlay data for (PrintStream p : progress) { p.println("WARNING: Unable to generate a future forecast beyond the end " + "of the test data because there is no future overlay " + "data available."); } } else { m_testFuture = forecaster.forecast(m_horizon, progress); } } } /** * Construct a string containing all general and forecaster-specific options. * * @param forecaster the forecaster to use * @param globalInfo true if global information (about info) on the forecaster * is to be included in the string * @return a string containing all general and forecaster-specific options + * possibly global about info */ protected static String makeOptionString(TSForecaster forecaster, boolean globalInfo) { StringBuffer optionsText = new StringBuffer(""); // General options optionsText.append("\n\tGeneral options:\n\n"); optionsText.append("-h or -help\n"); optionsText.append("\tOutput help information.\n"); optionsText.append("-synopsis or -info\n"); optionsText.append("\tOutput synopsis for forecaster (use in conjunction " + " with -h)\n"); optionsText.append("-t <name of training file>\n"); optionsText.append("\tSets training file.\n"); optionsText.append("-T <name of test file>\n"); optionsText.append("\tSets test file.\n"); optionsText.append("-holdout <#instances | percentage>\n"); optionsText.append("\tSets the number of instances or percentage of" + " the training\n\tdata to hold out for testing.\n"); optionsText.append("-horizon <#steps>\n"); optionsText.append("\tThe maximum number of steps to forecast into the future.\n"); optionsText.append("-prime <#instances>\n"); optionsText.append( "\tThe number of instances to prime the forecaster with " + "before\n\tgenerating a forecast.\n"); optionsText.append("-metrics <list of metrics>\n"); optionsText.append("\tA list of metrics to use for evaluation." + "\n\t(default: MAE,RMSE)\n"); optionsText.append("-p <target:step#>\n"); optionsText.append( "\tOutput predictions for the specified target at the\n\t" + "specified step-ahead level.\n"); optionsText.append("-graph <file name:target list:step list>\n"); optionsText.append("\tGenerate a PNG graph of predictions for the specified\n\t" + "target(s) at the specified step-ahead level(s) on the training\n\t" + "and/or test data. Note that only one of the targets or steps can\n\t" + " be a list - i.e. either a single target is plotted for multiple " + "step-ahead levels, or, multiple targets are plotted at a single\n\t" + " step-ahead level.\n"); optionsText.append("-future\n"); optionsText.append("\tOutput training/test data plus predictions for all\n\t" + "targets up to horizon steps into the future.\n"); optionsText.append("-future-graph <file name>\n"); optionsText.append("\tGenerate a PNG graph of future predictions (beyond the end\n\t" + "of the series) for the training and/or test data for all targets.\n"); optionsText.append("-l <name of input file>\n"); optionsText.append("\tSets model input file.\n"); optionsText.append("-d <name of output file>\n"); optionsText.append("\tSets model output file.\n"); // Get scheme-specific options if (forecaster instanceof OptionHandler) { optionsText.append("\nOptions specific to " + forecaster.getClass().getName() + ":\n\n"); Enumeration enu = ((OptionHandler) forecaster).listOptions(); while (enu.hasMoreElements()) { Option option = (Option) enu.nextElement(); optionsText.append(option.synopsis()).append("\n"); optionsText.append(option.description()).append("\n"); } } // Get global information (if available) if (globalInfo) { try { String gi = getGlobalInfo(forecaster); optionsText.append(gi); } catch (Exception ex) { // quietly ignore } } return optionsText.toString(); } /** * Return the global info (if it exists) for the supplied forecaster. * * @param forecaster the forecaster to get the global info for * @return the global info (synopsis) for the classifier * @throws Exception if there is a problem reflecting on the forecaster */ protected static String getGlobalInfo(TSForecaster forecaster) throws Exception { BeanInfo bi = Introspector.getBeanInfo(forecaster.getClass()); MethodDescriptor[] methods; methods = bi.getMethodDescriptors(); Object[] args = {}; String result = "\nSynopsis for " + forecaster.getClass().getName() + ":\n\n"; for (int i = 0; i < methods.length; i++) { String name = methods[i].getDisplayName(); Method meth = methods[i].getMethod(); if (name.equals("globalInfo")) { String globalInfo = (String) (meth.invoke(forecaster, args)); result += globalInfo; break; } } return result; } /** * Evaluate the supplied forecaster using the supplied command-line options. * * @param forecaster the forecaster to evaluate * @param options an array of command-line options * @throws Exception if a problem occurs during evaluation */ public static void evaluateForecaster(TSForecaster forecaster, String[] options) throws Exception { if (Utils.getFlag('h', options) || Utils.getFlag("-help", options)) { // global info requested as well? boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options); throw new Exception("\nHelp requested." + makeOptionString(forecaster, globalInfo)); } String trainingFileName = Utils.getOption('t', options); String testingFileName = Utils.getOption('T', options); String testSizeS = Utils.getOption("holdout", options); String loadForecasterName = Utils.getOption('l', options); String saveForecasterName = Utils.getOption('d', options); String horizonS = Utils.getOption("horizon", options); String primeWindowSizeS = Utils.getOption("prime", options); String evalModuleList = Utils.getOption("metrics", options); String outputPredictionsS = Utils.getOption('p', options); String saveGraphS = Utils.getOption("graph", options); boolean printFutureForecast = Utils.getFlag("future", options); String graphFutureForecastS = Utils.getOption("future-graph", options); Instances inputHeader = null; Instances trainingData = null; Instances testData = null; int testSize = 0; int horizon = 1; int primeWindowSize = 1; boolean primeWithTest = Utils.getFlag("primeWithTest", options); boolean rebuildModelAfterEachStep = Utils.getFlag('r', options); boolean noEvalForTrainingData = Utils.getFlag('v', options); if (loadForecasterName.length() > 0 && trainingFileName.length() > 0) { System.out.println("When a model is loaded from a file any training" + " data is only used for priming the model."); } if (loadForecasterName.length() > 0) { ObjectInputStream ois = new ObjectInputStream(new FileInputStream(loadForecasterName)); forecaster = (TSForecaster) ois.readObject(); // try and read the header try { inputHeader = (Instances) ois.readObject(); } catch (Exception ex) { } ois.close(); // TODO should we save the last primeWindowSize instances of the training // data // for priming the model (probably not)? } if (testingFileName.length() > 0 && testSizeS.length() > 0) { throw new Exception("Can't specify both a test file and a holdout size."); } if (testingFileName.length() == 0 && primeWithTest) { throw new Exception( "Can't prime the forecaster with data from the test " + "file if no test file is specified!"); } if (testSizeS.length() > 0) { testSize = Integer.parseInt(testSizeS); } if (testingFileName.length() == 0 && testSize == 0 && rebuildModelAfterEachStep) { throw new Exception("Can't rebuild the model after each forecasting step on" + " the test data if there is no test data specified or no holdout" + " size specified!"); } if (trainingFileName.length() > 0) { trainingData = new Instances(new BufferedReader(new FileReader(trainingFileName))); } else { noEvalForTrainingData = true; } if (testingFileName.length() > 0) { testData = new Instances(new BufferedReader(new FileReader(testingFileName))); } if (horizonS.length() > 0) { horizon = Integer.parseInt(horizonS); } if (primeWindowSizeS.length() > 0) { primeWindowSize = Integer.parseInt(primeWindowSizeS); } else { throw new Exception("Must specify the number of instances to prime the " + "forecaster with for generating predictions (-prime)"); } // output predictions setup String outputPredsForTarget = null; int stepNum = 1; if (outputPredictionsS.length() > 0) { String[] parts = outputPredictionsS.split(":"); outputPredsForTarget = parts[0].trim(); if (parts.length > 1) { stepNum = Integer.parseInt(parts[1]); } } // save graph setup String saveGraphFileName = null; List<String> graphTargets = null; List<Integer> stepsToPlot = new ArrayList<Integer>(); graphTargets = new ArrayList<String>(); if (saveGraphS.length() > 0) { String[] parts = saveGraphS.split(":"); saveGraphFileName = parts[0].trim(); String targets = parts[1].trim(); String[] targetParts = targets.split(","); for (String s : targetParts) { graphTargets.add(s.trim()); } if (parts.length > 2) { String[] graphStepNums = parts[2].split(","); // graphStepNum = Integer.parseInt(parts[2]); for (String s : graphStepNums) { Integer step = new Integer(Integer.parseInt(s.trim())); if (step < 1 || step > horizon) { throw new Exception("Can't specify a step to graph that is less " + "than 1 or greater than the selected horizon!"); } stepsToPlot.add(step); } } else { stepsToPlot.add(new Integer(1)); } } if (graphTargets.size() > 1 && stepsToPlot.size() > 1) { throw new Exception("Can't specify multiple targets to plot and multiple steps. " + "Specify either multiple target names to plot for a single step, or, " + "a single target to plot at multiple steps."); } if (forecaster instanceof OptionHandler && loadForecasterName.length() == 0) { ((OptionHandler) forecaster).setOptions(options); } TSEvaluation eval = new TSEvaluation(trainingData, testSize); if (testData != null) { eval.setTestData(testData); } eval.setHorizon(horizon); eval.setPrimeWindowSize(primeWindowSize); eval.setPrimeForTestDataWithTestData(primeWithTest); eval.setRebuildModelAfterEachTestForecastStep(rebuildModelAfterEachStep); eval.setForecastFuture(printFutureForecast || graphFutureForecastS.length() > 0); if (evalModuleList.length() > 0) { eval.setEvaluationModules(evalModuleList); } eval.setEvaluateOnTrainingData(!noEvalForTrainingData); if (loadForecasterName.length() == 0) { forecaster.buildForecaster(eval.getTrainingData()); } System.out.println(forecaster.toString()); // save the forecaster if requested if (saveForecasterName.length() > 0) { ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(saveForecasterName)); oos.writeObject(forecaster); oos.writeObject(new Instances(eval.getTrainingData(), 0)); oos.close(); } eval.evaluateForecaster(forecaster, false); if (outputPredsForTarget != null && trainingData != null && !noEvalForTrainingData) { System.out.println(eval.printPredictionsForTrainingData( "=== Predictions " + "for training data: " + outputPredsForTarget + " (" + stepNum + (stepNum > 1 ? "-steps ahead)" : "-step ahead)") + " ===", outputPredsForTarget, stepNum, primeWindowSize)); } if (outputPredsForTarget != null && (testData != null || testSize > 0)) { int instanceNumberOffset = (trainingData != null && testSize > 0) ? eval.getTrainingData().numInstances() : 0; if (primeWithTest) { instanceNumberOffset += primeWindowSize; } System.out.println(eval.printPredictionsForTestData( "=== Predictions for test data: " + outputPredsForTarget + " (" + stepNum + (stepNum > 1 ? "-steps ahead)" : "-step ahead)") + " ===", outputPredsForTarget, stepNum, instanceNumberOffset)); } if (printFutureForecast) { if (trainingData != null && !noEvalForTrainingData) { System.out.println("=== Future predictions from end of training data ===\n"); System.out.println(eval.printFutureTrainingForecast(forecaster)); } if (testData != null || testSize > 0) { System.out.println("=== Future predictions from end of test data ===\n"); System.out.println(eval.printFutureTestForecast(forecaster)); } } System.out.println(eval.toSummaryString()); if (saveGraphFileName != null && trainingData != null && !noEvalForTrainingData) { if (graphTargets.size() >= 1 || (graphTargets.size() == 1 && stepsToPlot.size() == 1)) { JPanel result = eval.graphPredictionsForTargetsOnTraining(GraphDriver.getDefaultDriver(), forecaster, graphTargets, stepsToPlot.get(0), primeWindowSize); GraphDriver.getDefaultDriver().saveChartToFile(result, saveGraphFileName + "_train", 1000, 600); } else { JPanel result = eval.graphPredictionsForStepsOnTraining(GraphDriver.getDefaultDriver(), forecaster, graphTargets.get(0), stepsToPlot, primeWindowSize); GraphDriver.getDefaultDriver().saveChartToFile(result, saveGraphFileName + "_train", 1000, 600); } } if (saveGraphFileName != null && (testData != null || testSize > 0)) { int instanceOffset = (primeWithTest) ? primeWindowSize : 0; if (graphTargets.size() > 1 || (graphTargets.size() == 1 && stepsToPlot.size() == 1)) { JPanel result = eval.graphPredictionsForTargetsOnTesting(GraphDriver.getDefaultDriver(), forecaster, graphTargets, stepsToPlot.get(0), instanceOffset); GraphDriver.getDefaultDriver().saveChartToFile(result, saveGraphFileName + "_test", 1000, 600); } else { JPanel result = eval.graphPredictionsForStepsOnTesting(GraphDriver.getDefaultDriver(), forecaster, graphTargets.get(0), stepsToPlot, instanceOffset); GraphDriver.getDefaultDriver().saveChartToFile(result, saveGraphFileName + "_test", 1000, 600); } } if (graphFutureForecastS.length() > 0) { if (trainingData != null && !noEvalForTrainingData && eval.m_trainingFuture != null) { JPanel result = eval.graphFutureForecastOnTraining(GraphDriver.getDefaultDriver(), forecaster, AbstractForecaster.stringToList(forecaster.getFieldsToForecast())); GraphDriver.getDefaultDriver().saveChartToFile(result, graphFutureForecastS + "_train", 1000, 600); } if (testData != null || testSize > 0 && eval.m_testFuture != null) { JPanel result = eval.graphFutureForecastOnTesting(GraphDriver.getDefaultDriver(), forecaster, AbstractForecaster.stringToList(forecaster.getFieldsToForecast())); GraphDriver.getDefaultDriver().saveChartToFile(result, graphFutureForecastS + "_test", 1000, 600); } } } /** * Print a textual future forecast. Prints the historical supplied targets * followed by the forecasted values * * @param forecaster the forecaster used to generate the supplied predictions * @param futureForecast the future forecasted values as a list of list of * NumericPrediction objects. The outer list is indexed by step, i.e. * the first element contains the predictions for t+1, the second for * t+2, .... The inner list is indexed by target. * @param insts the historical instances * @return a String containing the historical data and forecasted values * @throws Exception if a problem occurs. */ protected String printFutureForecast(TSForecaster forecaster, List<List<NumericPrediction>> futureForecast, Instances insts) throws Exception { if (futureForecast == null) { if (forecaster instanceof OverlayForecaster && ((OverlayForecaster) forecaster).isUsingOverlayData()) { return "Unable to generate future forecast because there is no " + "future overlay data available."; } else { return "Unable to generate future forecast."; } } boolean timeIsInstanceNum = true; double timeStart = 1; double timeDelta = 1; String dateFormat = null; TSLagMaker lagMaker = null; if (forecaster instanceof TSLagUser) { lagMaker = ((TSLagUser) forecaster).getTSLagMaker(); if (!lagMaker.isUsingAnArtificialTimeIndex() && lagMaker.getAdjustForTrends()) { // suss out the time stamp String timeStampName = lagMaker.getTimeStampField(); if (insts.attribute(timeStampName).isDate()) { dateFormat = insts.attribute(timeStampName).getDateFormat(); } timeStart = insts.instance(0).value(insts.attribute(timeStampName)); timeDelta = lagMaker.getDeltaTime(); timeIsInstanceNum = false; } } List<String> targets = AbstractForecaster.stringToList(forecaster.getFieldsToForecast()); int maxColWidth = 1; for (String t : targets) { if (t.length() > maxColWidth) { maxColWidth = t.length(); } } int maxTimeColWidth = "inst#".length(); SimpleDateFormat format = null; if (dateFormat != null) { // timestamp needs to be interpreted as a date maxTimeColWidth = "Time".length(); format = new SimpleDateFormat(dateFormat); String formatted = format.format(new Date((long) timeStart)); if (formatted.length() > maxTimeColWidth) { maxTimeColWidth = formatted.length(); } } else { // covers instance numbers and numeric timestamps double maxTime = timeStart + ((insts.numInstances() + m_horizon) * timeDelta); maxTimeColWidth = maxWidth(maxTimeColWidth, maxTime); } for (int i = 0; i < insts.numInstances(); i++) { Instance current = insts.instance(i); for (String t : targets) { if (!current.isMissing(insts.attribute(t))) { maxColWidth = maxWidth(maxColWidth, current.value(insts.attribute(t))); } } } StringBuffer result = new StringBuffer(); if (dateFormat != null) { result.append(pad("Time", " ", maxTimeColWidth, false)).append(" "); } else { result.append(pad("inst#", " ", maxTimeColWidth, false)).append(" "); } for (String t : targets) { result.append(pad(t, " ", maxColWidth, true)).append(" "); } result.append("\n"); int instNum = (int) timeStart; long dateTime = (long) timeStart; for (int i = 0; i < insts.numInstances() + futureForecast.size(); i++) { String predMarker = (i < insts.numInstances()) ? "" : "*"; if (timeIsInstanceNum) { result.append(pad("" + instNum + predMarker, " ", maxTimeColWidth, false)).append(" "); instNum += (int) timeDelta; } else if (dateFormat != null) { result.append(pad(format.format(new Date(dateTime)) + predMarker, " ", maxTimeColWidth, false)) .append(" "); if (lagMaker != null) { dateTime = (long) lagMaker.advanceSuppliedTimeValue(dateTime); } else { dateTime += (long) timeDelta; } } else { result.append(pad(Utils.doubleToString(timeStart, 4) + predMarker, " ", maxTimeColWidth, false)) .append(" "); timeStart += timeDelta; } if (i < insts.numInstances()) { Instance current = insts.instance(i); for (String t : targets) { if (!current.isMissing(insts.attribute(t))) { double value = current.value(insts.attribute(t)); result.append(pad(Utils.doubleToString(value, 4), " ", maxColWidth, true)).append(" "); } else { result.append(pad("?", " ", maxColWidth, true)).append(" "); } } } else { if (futureForecast != null) { int step = i - insts.numInstances(); List<NumericPrediction> preds = futureForecast.get(step); // loop over the targets for (NumericPrediction p : preds) { result.append(pad(Utils.doubleToString(p.predicted(), 4), " ", maxColWidth, true)) .append(" "); } } } result.append("\n"); } return result.toString(); } /** * Print the target values from the test data followed by the future forecast * from the end of the test data. * * @param forecaster the forecaster in use * @return a String containing the test target values and the future * forecasted values beyond the end of the test data * @throws Exception if something goes wrong. */ public String printFutureTestForecast(TSForecaster forecaster) throws Exception { if (m_testData == null || m_testData.numInstances() == 0) { throw new Exception("Can't forecast beyond the end of the" + " test instances because no test instances have been " + "supplied!"); } return printFutureForecast(forecaster, m_testFuture, m_testData); } /** * Print the forecasted values (for all targets) beyond the end of the * training data * * @param forecaster the forecaster * @return A string which contains the target values from the training data * followed by predictions for all targets for 1 - horizon number of * steps into the future. */ public String printFutureTrainingForecast(TSForecaster forecaster) throws Exception { if (m_trainingData == null || m_trainingData.numInstances() == 0) { throw new Exception("Can't forecast beyond the end of the" + " training instances because no training instances have been " + "supplied!"); } // Assume that client has called evaluateForecaster first /* * // make sure that the evaluation has been run! if * (m_predictionsForTrainingData == null && m_trainingFuture == null) { * evaluateForecaster(forecaster); } */ return printFutureForecast(forecaster, m_trainingFuture, m_trainingData); } /** * Graph historical and future forecasted values using a graph driver. * * @param driver the graph driver to graph with * @param forecaster the forecaster in use * @param targetNames the names of the targets to graph * @param preds a list (indexed by step) of list (indexed by target) of * predictions * @param history Instances containing historical target values * @return a JPanel containing the graph * @throws Exception if a problem occurs during graphing */ protected JPanel graphFutureForecast(GraphDriver driver, TSForecaster forecaster, List<String> targetNames, List<List<NumericPrediction>> preds, Instances history) throws Exception { JPanel result = driver.getPanelFutureForecast(forecaster, preds, targetNames, history); return result; } /** * Graph historical and future forecasted values using a graph driver for the * training data. * * @param driver the graph driver to graph with * @param forecaster the forecaster in use * @param targetNames the names of the targets to graph * @return a JPanel containing the graph * @throws Exception if a problem occurs during graphing */ public JPanel graphFutureForecastOnTraining(GraphDriver driver, TSForecaster forecaster, List<String> targetNames) throws Exception { if (m_trainingData == null || m_trainingData.numInstances() == 0) { throw new Exception("Can't forecast beyond the end of the" + " training instances because no training instances have been " + "supplied!"); } if (m_trainingFuture != null) { return graphFutureForecast(driver, forecaster, targetNames, m_trainingFuture, m_trainingData); } throw new Exception("[TSEvaluation] can't graph future forecast for training " + " because there are no future predicitons."); } /** * Graph historical and future forecasted values using a graph driver for the * test. * * @param driver the graph driver to graph with * @param forecaster the forecaster in use * @param targetNames the names of the targets to graph * @return a JPanel containing the graph * @throws Exception if a problem occurs during graphing */ public JPanel graphFutureForecastOnTesting(GraphDriver driver, TSForecaster forecaster, List<String> targetNames) throws Exception { if (m_testData == null || m_testData.numInstances() == 0) { throw new Exception("Can't forecast beyond the end of the" + " test instances because no test instances have been " + "supplied!"); } if (m_testFuture != null) { return graphFutureForecast(driver, forecaster, targetNames, m_testFuture, m_testData); } throw new Exception("[TSEvaluation] can't graph future forecast for test set " + " because there are no future predicitons."); } /** * Graph predicted values at the given step-ahead levels for a single target * on the training data. * * @param driver the graph driver to graph with * @param forecaster the forecaster in use * @param targetName the target to plot * @param stepsToPlot a list of steps at which to plot the predicted target * values - e.g. 3,4,5 would plot 3-step-ahead, 4-step-ahead and * 5-step-ahead predictions * @return a JPanel containing the graph * @throws Exception if a problem occurs during graphing */ public JPanel graphPredictionsForStepsOnTraining(GraphDriver driver, TSForecaster forecaster, String targetName, List<Integer> stepsToPlot, int instanceNumberOffset) throws Exception { JPanel result = driver.getGraphPanelSteps(forecaster, m_predictionsForTrainingData, targetName, stepsToPlot, instanceNumberOffset, getTrainingData()); return result; } /** * Graph predicted values at the given step-ahead levels for a single target * on the test data. * * @param driver the graph driver to graph with * @param forecaster the forecaster in use * @param targetName the target to plot * @param stepsToPlot a list of steps at which to plot the predicted target * values - e.g. 3,4,5 would plot 3-step-ahead, 4-step-ahead and * 5-step-ahead predictions * @return a JPanel containing the graph * @throws Exception if a problem occurs during graphing */ public JPanel graphPredictionsForStepsOnTesting(GraphDriver driver, TSForecaster forecaster, String targetName, List<Integer> stepsToPlot, int instanceNumberOffset) throws Exception { JPanel result = driver.getGraphPanelSteps(forecaster, m_predictionsForTestData, targetName, stepsToPlot, instanceNumberOffset, getTestData()); return result; } /** * Graph predicted values at the given step-ahead level for the supplied * targets on the training data. * * @param driver the graph driver to graph with * @param forecaster the forecaster in use * @param graphTargets a list of targets to plot * @param graphStepNum the step at which to plot the targets - e.g. 4 would * result in a graph of the 4-step-ahead predictions for the targets * @return a JPanel containing the graph * @throws Exception if a problem occurs during graphing */ public JPanel graphPredictionsForTargetsOnTraining(GraphDriver driver, TSForecaster forecaster, List<String> graphTargets, int graphStepNum, int instanceNumberOffset) throws Exception { ErrorModule preds = m_predictionsForTrainingData.get(graphStepNum - 1); JPanel result = driver.getGraphPanelTargets(forecaster, preds, graphTargets, graphStepNum, instanceNumberOffset, getTrainingData()); return result; } /** * Graph predicted values at the given step-ahead level for the supplied * targets on the test data. * * @param driver the graph driver to graph with * @param forecaster the forecaster in use * @param graphTargets a list of targets to plot * @param graphStepNum the step at which to plot the targets - e.g. 4 would * result in a graph of the 4-step-ahead predictions for the targets * @return a JPanel containing the graph * @throws Exception if a problem occurs during graphing */ public JPanel graphPredictionsForTargetsOnTesting(GraphDriver driver, TSForecaster forecaster, List<String> graphTargets, int graphStepNum, int primeWindowSize) throws Exception { ErrorModule preds = m_predictionsForTestData.get(graphStepNum - 1); JPanel result = driver.getGraphPanelTargets(forecaster, preds, graphTargets, graphStepNum, primeWindowSize, getTestData()); return result; } /** * Print the predictions for a given target at a given step-ahead level on the * training data. * * @param title a title for the output * @param targetName the name of the target to print predictions for * @param stepAhead the step-ahead level - e.g. 3 would print the 3-step-ahead * predictions * @return a String containing the predicted and actual values * @throws Exception if the predictions can't be printed for some reason. */ public String printPredictionsForTrainingData(String title, String targetName, int stepAhead) throws Exception { return printPredictionsForTrainingData(title, targetName, stepAhead, 0); } /** * Print the predictions for a given target at a given step-ahead level from a * given offset on the training data. * * @param title a title for the output * @param targetName the name of the target to print predictions for * @param stepAhead the step-ahead level - e.g. 3 would print the 3-step-ahead * predictions * @param instanceNumberOffset the offset from the start of the training data * from which to print actual and predicted values * @return a String containing the predicted and actual values * @throws Exception if the predictions can't be printed for some reason. */ public String printPredictionsForTrainingData(String title, String targetName, int stepAhead, int instanceNumberOffset) throws Exception { ErrorModule predsForStep = getPredictionsForTrainingData(stepAhead); List<NumericPrediction> preds = predsForStep.getPredictionsForTarget(targetName); return printPredictions(title, preds, stepAhead, instanceNumberOffset); } /** * Print the predictions for a given target at a given step-ahead level on the * training data. * * @param title a title for the output * @param targetName the name of the target to print predictions for * @param stepAhead the step-ahead level - e.g. 3 would print the 3-step-ahead * predictions * @return a String containing the predicted and actual values * @throws Exception if the predictions can't be printed for some reason. */ public String printPredictionsForTestData(String title, String targetName, int stepAhead) throws Exception { return printPredictionsForTestData(title, targetName, stepAhead, 0); } /** * Print the predictions for a given target at a given step-ahead level from a * given offset on the training data. * * @param title a title for the output * @param targetName the name of the target to print predictions for * @param stepAhead the step-ahead level - e.g. 3 would print the 3-step-ahead * predictions * @param instanceNumberOffset the offset from the start of the training data * from which to print actual and predicted values * @return a String containing the predicted and actual values * @throws Exception if the predictions can't be printed for some reason. */ public String printPredictionsForTestData(String title, String targetName, int stepAhead, int instanceNumberOffset) throws Exception { ErrorModule predsForStep = getPredictionsForTestData(stepAhead); List<NumericPrediction> preds = predsForStep.getPredictionsForTarget(targetName); return printPredictions(title, preds, stepAhead, instanceNumberOffset); } /** * Prints predictions at the supplied step ahead level from the supplied * instance number offset. * * @param title a title to use for the output * @param preds the predictions for a single target * @param stepAhead the step that the supplied predictions are at * @param instanceNumberOffset the offset tfrom the start of the data that was * used to generate the predictions that the predictions start at * @return */ protected String printPredictions(String title, List<NumericPrediction> preds, int stepAhead, int instanceNumberOffset) { StringBuffer temp = new StringBuffer(); boolean hasConfidenceIntervals = false; int maxColWidth = "predictions".length(); int maxConfWidth = "interval".length(); for (NumericPrediction p : preds) { if (!Utils.isMissingValue(p.actual())) { maxColWidth = maxWidth(maxColWidth, p.actual()); } if (!Utils.isMissingValue(p.predicted())) { maxColWidth = maxWidth(maxColWidth, p.predicted()); } double[][] conf = p.predictionIntervals(); if (conf.length > 0) { hasConfidenceIntervals = true; maxConfWidth = maxWidth(maxConfWidth, conf[0][0]); maxConfWidth = maxWidth(maxConfWidth, conf[0][1]); } } maxConfWidth = (maxConfWidth * 2) + 1; temp.append(title + "\n\n"); temp.append(pad("inst#", " ", maxColWidth, true) + " "); temp.append(pad("actual", " ", maxColWidth, true) + " "); temp.append(pad("predicted", " ", maxColWidth, true) + " "); if (hasConfidenceIntervals) { temp.append(pad("conf", " ", maxConfWidth, true) + " "); } temp.append(pad("error", " ", maxColWidth, true) + "\n"); for (int i = 0; i < preds.size(); i++) { NumericPrediction pred = preds.get(i); String instNum = pad("" + (instanceNumberOffset + i + stepAhead), " ", maxColWidth, true); temp.append(instNum + " "); double actual = pred.actual(); String actualS = null; if (Utils.isMissingValue(actual)) { actualS = pad("?", " ", maxColWidth, true); } else { actualS = Utils.doubleToString(actual, 4); actualS = pad(actualS, " ", maxColWidth, true); } temp.append(actualS + " "); double predicted = pred.predicted(); String predictedS = null; if (Utils.isMissingValue(predicted)) { predictedS = pad("?", " ", maxColWidth, true); } else { predictedS = Utils.doubleToString(predicted, 4); predictedS = pad(predictedS, " ", maxColWidth, true); } temp.append(predictedS + " "); if (hasConfidenceIntervals) { double[][] limits = pred.predictionIntervals(); double low = limits[0][0]; double high = limits[0][1]; String limitsS = Utils.doubleToString(low, 3) + ":" + Utils.doubleToString(high, 3); limitsS = pad(limitsS, " ", maxConfWidth, true); temp.append(limitsS).append(" "); } double error = pred.error(); String errorS = null; if (Utils.isMissingValue(error)) { errorS = pad("?", " ", maxColWidth, true); } else { errorS = Utils.doubleToString(error, 4); errorS = pad(errorS, " ", maxColWidth, true); } temp.append(errorS + "\n"); } return temp.toString(); } private static String pad(String source, String padChar, int length, boolean leftPad) { StringBuffer temp = new StringBuffer(); length = length - source.length(); if (length > 0) { if (leftPad) { for (int i = 0; i < length; i++) { temp.append(padChar); } temp.append(source); } else { temp.append(source); for (int i = 0; i < length; i++) { temp.append(padChar); } } } else { temp.append(source); } return temp.toString(); } private static int maxWidth(int maxWidth, double value) { double width = Math.log(Math.abs(value)) / Math.log(10); if (width < 0) { width = 1; } // decimal + # decimal places + 1 width += 6.0; if ((int) width > maxWidth) { maxWidth = (int) width; } return maxWidth; } /** * Generates a String containing the results of evaluating the forecaster. * * @return a String containing the results of evaluating the forecaster * @throws Exception if a problem occurs */ public String toSummaryString() throws Exception { StringBuffer result = new StringBuffer(); int maxWidth = "10-steps-ahead".length() + 1; int maxWidthForLeftCol = "Target".length(); List<String> targetFieldNames = null; // update max width with respect to metrics for training data (if any) if (m_evaluateTrainingData) { Set<String> keys = m_metricsForTrainingData.keySet(); for (String key : keys) { List<TSEvalModule> evalsForKey = m_metricsForTrainingData.get(key); for (TSEvalModule e : evalsForKey) { if (targetFieldNames == null) { targetFieldNames = e.getTargetFields(); } double[] metricsForTargets = e.calculateMeasure(); for (double m : metricsForTargets) { maxWidth = maxWidth(maxWidth, m); } } } } // update max width with respect to metrics for test data (if any) if (m_evaluateTestData) { Set<String> keys = m_metricsForTestData.keySet(); for (String key : keys) { List<TSEvalModule> evalsForKey = m_metricsForTestData.get(key); for (TSEvalModule e : evalsForKey) { if (targetFieldNames == null) { targetFieldNames = e.getTargetFields(); } double[] metricsForTargets = e.calculateMeasure(); for (double m : metricsForTargets) { maxWidth = maxWidth(maxWidth, m); } } } } // update the max width of the left-hand column with respect to // the target field names for (String targetName : targetFieldNames) { if (targetName.length() > maxWidthForLeftCol) { maxWidthForLeftCol = targetName.length(); } } // update the max width of the left-hand column with respect to // the metric names for (TSEvalModule mod : m_evalModules) { if (mod.getDescription().length() + 2 > maxWidthForLeftCol) { maxWidthForLeftCol = mod.getDescription().length() + 2; // we'll indent // these 2 // spaces } } if (m_missingTimeStampTestSetRows != null && m_missingTimeStampTestSetRows.size() > 0) { result.append("\n--------------------------------------------------\n" + "Instances were inserted in the test data for the following\n" + "time-stamps (target values set by interpolation):\n\n"); for (int i = 0; i < m_missingTimeStampTestSetRows.size(); i++) { if (i == 0) { result.append(" " + m_missingTimeStampTestSetRows.get(i)); } else { result.append(", " + m_missingTimeStampTestSetRows.get(i)); } } result.append("\n--------------------------------------------------\n"); } if (m_missingTargetListTestSet != null && m_missingTargetListTestSet.size() > 0) { Collections.sort(m_missingTargetListTestSet); result.append("\n---------------------------------------------------\n" + "The following test instances had missing values\n" + "imputed via interpolation. Check source data as\n" + "this may affect forecasting performance:\n\n"); for (int i = 0; i < m_missingTargetListTestSet.size(); i++) { if (i == 0) { result.append(" " + m_missingTargetListTestSet.get(i)); } else if (!m_missingTargetListTestSet.get(i).equals(m_missingTargetListTestSet.get(i - 1))) { result.append("," + m_missingTargetListTestSet.get(i)); } } result.append("\n---------------------------------------------------\n"); } if (m_missingTimeStampListTestSet != null && m_missingTimeStampListTestSet.size() > 0) { Collections.sort(m_missingTimeStampListTestSet); result.append("\n--------------------------------------------------------\n" + "The following test instances had missing time stamps:\n\n"); for (int i = 0; i < m_missingTimeStampListTestSet.size(); i++) { if (i == 0) { result.append(" " + m_missingTimeStampListTestSet.get(i)); } else { result.append("," + m_missingTimeStampListTestSet.get(i)); } } result.append("\n-------------------------------------------------------\n"); } if (m_evaluateTrainingData) { String temp = summaryMetrics(maxWidthForLeftCol, maxWidth, targetFieldNames, m_metricsForTrainingData); result.append("=== Evaluation on training data ===\n"); result.append(temp).append("\n"); result.append("Total number of instances: " + m_trainingData.numInstances()).append("\n\n"); } if (m_evaluateTestData) { String temp = summaryMetrics(maxWidthForLeftCol, maxWidth, targetFieldNames, m_metricsForTestData); result.append("=== Evaluation on test data ===\n"); result.append(temp).append("\n"); result.append("Total number of instances: " + m_testData.numInstances()).append("\n\n"); } return result.toString(); } private String summaryMetrics(int maxWidthForLeftCol, int maxWidth, List<String> targetFieldNames, Map<String, List<TSEvalModule>> metricsToUse) throws Exception { StringBuffer temp = new StringBuffer(); temp.append(pad("Target", " ", maxWidthForLeftCol, false)); /* * for (String targetName : targetFieldNames) { temp.append(pad(targetName, * " ", maxWidth, true)); } */ for (int i = 0; i < m_horizon; i++) { String stepS = (i == 0) ? "step-ahead" : "steps-ahead"; temp.append(pad("" + (i + 1) + "-" + stepS, " ", maxWidth, true)); } temp.append("\n"); /* * String tempKey = m_evalModules.get(1).getEvalName(); * temp.append(pad(" ", " ", maxWidthForLeftCol, false)); for (int i = * 0; i < m_horizon; i++) { int n = metricsToUse.get(tempKey).get(i).getN } */ temp.append(pad("=", "=", maxWidthForLeftCol + (m_horizon * maxWidth), true)); temp.append("\n"); for (int i = 0; i < targetFieldNames.size(); i++) { temp.append(targetFieldNames.get(i) + "\n"); // print out the count (N) for each step ahead for each target if (m_evalModules.get(1) instanceof ErrorModule) { String tempKey = m_evalModules.get(1).getEvalName(); temp.append(pad(" N", " ", maxWidthForLeftCol, false)); List<TSEvalModule> metricForSteps = metricsToUse.get(tempKey); for (TSEvalModule m : metricForSteps) { double[] countsForTargets = ((ErrorModule) m).countsForTargets(); double countForTarget = countsForTargets[i]; temp.append(pad(Utils.doubleToString(countForTarget, 0), " ", maxWidth, true)); } temp.append("\n"); } Set<String> keys = metricsToUse.keySet(); for (String key : keys) { String metricName = " " + metricsToUse.get(key).get(0).getDescription(); List<TSEvalModule> metricForSteps = metricsToUse.get(key); temp.append(pad(metricName, " ", maxWidthForLeftCol, false)); for (TSEvalModule m : metricForSteps) { double[] metricsForTargets = m.calculateMeasure(); double metricForTargetI = metricsForTargets[i]; String result = (Utils.isMissingValue(metricForTargetI)) ? "N/A" : Utils.doubleToString(metricForTargetI, 4); temp.append(pad(result, " ", maxWidth, true)); } temp.append("\n"); } } return temp.toString(); } }