weka.classifiers.timeseries.WekaForecaster.java Source code

Java tutorial

Introduction

Here is the source code for weka.classifiers.timeseries.WekaForecaster.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    WekaForecaster.java
 *    Copyright (C) 2010-2016 University of Waikato, Hamilton, New Zealand
 */

package weka.classifiers.timeseries;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.evaluation.NumericPrediction;
import weka.classifiers.functions.LinearRegression;
import weka.classifiers.timeseries.core.*;
import weka.filters.supervised.attribute.TSLagMaker;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.logging.Logger;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.unsupervised.attribute.RemoveType;

import java.io.PrintStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Map;
import java.util.Vector;

/**
 * Class that implements time series forecasting using a Weka regression scheme.
 * Makes use of the TSLagMaker class to handle all lagged attribute creation,
 * periodic attributes etc.
 * 
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision: 52593 $
 */
public class WekaForecaster extends AbstractForecaster implements TSLagUser, ConfidenceIntervalForecaster,
        OverlayForecaster, IncrementallyPrimeable, OptionHandler, Serializable {

    /** For serialization */
    private static final long serialVersionUID = 5562710925011828590L;

    /** The format of the original incoming instances */
    protected Instances m_originalHeader;

    /**
     * A temporary header used when updating base learners that implement
     * PrimingDataLearner
     */
    protected Instances m_tempHeader;

    /** A copy of the input data provided to primeForecaster() */
    protected transient Instances m_primedInput;

    /** The format of the transformed data */
    protected Instances m_transformedHeader;

    /** The base regression scheme to use */
    protected Classifier m_forecaster = new LinearRegression();

    /** The individual forecasters for each target */
    protected List<SingleTargetForecaster> m_singleTargetForecasters;

    /** True if the forecaster has been built */
    protected boolean m_modelBuilt = false;

    /** True if an artificial time index has been added to the data */
    protected boolean m_useArtificialTimeIndex = false;

    /**
     * The estimator used for calculating confidence limits.
     */
    protected ErrorBasedConfidenceIntervalEstimator m_confidenceLimitEstimator;

    /**
     * Number of steps ahead to calculate confidence limits for (0 = don't
     * calculate confidence limits
     */
    protected int m_calculateConfLimitsSteps = 0;

    /** Confidence level to compute confidence limits at */
    protected double m_confidenceLevel = 0.95;
    /**
     * For removing any date attributes (TSLagMaker will remap date timestamps to
     * numeric)
     */
    protected RemoveType m_dateRemover;
    /**
     * Holds a list of training instance indexes that contained missing target
     * values that were replaced via interpolation
     */
    protected List<Integer> m_missingTargetList;
    /**
     * Holds a list of training instance indexes that contained missing date
     * values (if a date time stamp is being used)
     */
    protected List<Integer> m_missingTimeStampList;
    protected List<String> m_missingTimeStampRows;
    /**
     * Logging object
     */
    protected Logger m_log;
    /** The lag maker to use */
    TSLagMaker m_lagMaker = new TSLagMaker();
    // used by the incremental method when detecting missing values in
    // targets/date
    private transient Instance m_previousPrimeInstance = null;
    private transient Instances m_missingBuffer = null;
    private transient boolean m_hadLeadingMissingPrime = false;
    private transient boolean m_first = false;
    private transient boolean m_atLeastOneNonMissingTimeStamp = false;

    /**
     * Main method for running this class from the command line
     *
     * @param args general and scheme-specific command line arguments
     */
    public static void main(String[] args) {
        try {
            /*
             * Instances train = new Instances(new BufferedReader(new
             * FileReader(args[0]))); WekaForecaster wf = new WekaForecaster();
             * ArrayList<String> fieldsToForecast = new ArrayList<String>();
             * fieldsToForecast.add(args[1]);
             * wf.setFieldsToForecast(fieldsToForecast);
             * wf.setPrimaryPeriodicFieldName(args[2]); Instances trans =
             * wf.getTransformedData(train); System.out.println(trans);
             */

            WekaForecaster fs = new WekaForecaster();
            fs.runForecaster(fs, args);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    /**
     * Check whether the base learner requires special serialization
     *
     * @return true if base learner requires special serialization, false otherwise
     */
    public boolean baseModelHasSerializer() {
        return m_forecaster instanceof BaseModelSerializer;
    }

    /**
     * Save underlying classifier
     *
     * @param filepath the path of the file to save the base model to
     * @throws Exception
     */
    public void saveBaseModel(String filepath) throws Exception {
        if (baseModelHasSerializer()) {
            for (int i = 0; i < m_singleTargetForecasters.size(); i++)
                ((BaseModelSerializer) m_singleTargetForecasters.get(i).getWrappedClassifier())
                        .serializeModel(filepath + ".base" + i);
        }
    }

    /**
     * Load serialized classifier
     *
     * @param filepath the path of the file to load the base model from
     * @throws Exception
     */
    public void loadBaseModel(String filepath) throws Exception {
        if (baseModelHasSerializer()) {
            for (int i = 0; i < m_singleTargetForecasters.size(); i++)
                ((BaseModelSerializer) m_singleTargetForecasters.get(i).getWrappedClassifier())
                        .loadSerializedModel(filepath + ".base" + i);
        }
    }

    /**
     * Serialize model state
     *
     * @param filepath the path of the file to save the model state to
     * @throws Exception
     */
    public void serializeState(String filepath) throws Exception {
        if (usesState()) {
            for (int i = 0; i < m_singleTargetForecasters.size(); i++)
                ((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier())
                        .serializeState(filepath + ".state" + i);
        }
    }

    /**
     * Load serialized model state
     *
     * @param filepath the path of the file to save the model state from
     * @throws Exception
     */
    public void loadSerializedState(String filepath) throws Exception {
        if (usesState()) {
            for (int i = 0; i < m_singleTargetForecasters.size(); i++)
                ((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier())
                        .loadSerializedState(filepath + ".state" + i);
        }
    }

    /**
     * Check whether the base learner requires operations regarding state
     *
     * @return true if base learner uses state-based predictions, false otherwise
     */
    public boolean usesState() {
        return m_forecaster instanceof StateDependentPredictor;
    }

    /**
     * Reset model state.
     */
    public void clearPreviousState() {
        if (usesState()) {
            for (int i = 0; i < m_singleTargetForecasters.size(); i++)
                ((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier())
                        .clearPreviousState();
        }
    }

    /**
     * Load state into model.
     */
    public void setPreviousState(List<Object> previousState) {
        if (usesState()) {
            for (int i = 0; i < m_singleTargetForecasters.size(); i++)
                ((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier())
                        .setPreviousState(previousState.get(i));
        }
    }

    /**
     * Get the last set state of the model.
     *
     * @return the state of the model to be used in next prediction
     */
    public List<Object> getPreviousState() {
        List<Object> state = new ArrayList<>();

        if (usesState()) {
            for (int i = 0; i < m_singleTargetForecasters.size(); i++)
                state.add(i, ((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier())
                        .getPreviousState());
        }
        return state;
    }

    /**
     * Provides a short name that describes the underlying algorithm in some way.
     *
     * @return a short description of this forecaster.
     */
    @Override
    public String getAlgorithmName() {
        if (m_forecaster != null) {
            String spec = getForecasterSpec();
            spec = spec.replace("weka.classifiers.", "");
            spec = spec.replace("functions.", "");
            spec = spec.replace("bayes.", "");
            spec = spec.replace("rules.", "");
            spec = spec.replace("trees.", "");
            spec = spec.replace("meta.", "");
            spec = spec.replace("lazy.", "");
            spec = spec.replace("supportVector.", "");
            return spec;
        }

        return "";
    }

    /**
     * Get the TSLagMaker that we are using. All options pertaining to lag
     * creation, periodic attributes etc. are set via the lag maker.
     *
     * @return the TSLagMaker that we are using.
     */
    @Override
    public TSLagMaker getTSLagMaker() {
        return m_lagMaker;
    }

    /**
     * Set the TSLagMaker to use. All options pertaining to lag creation, periodic
     * attributes etc. are set via the lag maker.
     *
     * @param lagMaker the TSLagMaker to use.
     */
    @Override
    public void setTSLagMaker(TSLagMaker lagMaker) {
        m_lagMaker = lagMaker;
    }

    /**
     * Returns an enumeration describing the available options.
     *
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>();

        newVector.add(new Option("\tSet the fields to forecast.", "F", 1, "-F <comma separated list of names>"));
        newVector.add(new Option("\tSet the fields to be considered " + "as overlay data.", "overlay", 1,
                "-overlay <comma separated list of names>"));

        newVector.add(
                new Option("\tSet the minimum lag length to generate." + "\n\t(default = 1)", "L", 1, "-L <num>"));
        newVector.add(
                new Option("\tSet the maximum lag length to generate." + "\n\t(default = 12)", "M", 1, "-M <num>"));
        newVector.add(new Option("\tRemove leading instances where the values " + "of lagged variables are unknown",
                "trim-leading", 0, "-trim-leading"));
        newVector.add(new Option("\tFine tune selection of lags within min and " + "max by specifying" + " ranges",
                "R", 1, "-R <ranges>"));
        newVector.add(new Option("\tAverage consecutive long lags.", "A", 0, "-A"));
        newVector.add(new Option("\tAverage those lags longer than this number of"
                + "time steps.\n\tUse in conjuction with -A.\n\t" + "(default = 2)", "B", 1, "-B <num>"));
        newVector.add(new Option(
                "\tAverage this many consecutive long lags.\n\t" + "Use in conjuction with -B (default = 2)", "C",
                1, "-C <num>"));
        newVector.add(new Option("\tDon't adjust for trends.", "Z", 0, "-Z"));
        newVector.add(new Option("\tDon't include time lag products", "-no-time-lag-products", 1,
                "-no-time-lag-products"));
        newVector.add(
                new Option("\tDon't include time powers of time", "no-powers-of-time", 1, "-no-powers-of-time"));
        newVector.add(new Option("\tSpecify the name of the timestamp field", "G", 1, "-G <timestamp name>"));
        newVector.add(new Option("\tAdjust for variance.", "V", 0, "-V"));
        newVector.add(new Option("\tSpecify the primary periodic field, " + "\n\tif one exists already in the data "
                + "(e.g. day, month, quarter etc.\n\tIf there is more than"
                + "one such field, choose the one with the finest granularity.\n\t" + "This field must be"
                + "cyclic and declared as nominal.", "periodic", 1, "-periodic <field name>"));
        newVector.add(new Option(
                "\tCalculate confidence limits for predictions\n\t" + "(based on errors) for up to, and including, "
                        + "the specified\n\tnumber of time steps"
                        + "into the future\n\t(default = 0 (don't compute conf. levels)).",
                "conf", 1, "-conf <num steps>"));
        newVector.add(new Option("\tConfidence level for computing confidence limits.\n\t"
                + "Use in conjunction with -conf.\n\t(default = 0.95).", "P", 1, "-P <confidence level>"));
        newVector.add(new Option("\tSpecify the base regression scheme to use.\n\t"
                + "Supply a fully qualified name, along with options, enclosed in\n\t"
                + "quotes (e.g. \"weka.classifiers.functions.SMOreg -R 0.5\")."
                + "\n\t(default = weka.classifiers.functions.SMOreg)", "W", 1, "-W"));
        newVector.add(new Option("\tAdd an AM/PM indicator (requires a date timestamp)", "am-pm", 0, "-am-pm"));
        newVector.add(new Option("\tAdd a day of the week field (requres a date" + " timestamp)", "day", 0,
                "-dayofweek"));
        newVector.add(new Option("\tAdd a day of the month field (requres a date" + " timestamp)", "dayofmonth", 0,
                "-dayofmonth"));
        newVector.add(new Option("\tAdd a number of days in the month field (requres a date" + " timestamp)",
                "numdaysinmonth", 0, "-numdaysinmonth"));
        newVector
                .add(new Option("\tAdd a weekend indicator (requires a date timestamp)", "weekend", 0, "-weekend"));
        newVector.add(new Option("\tAdd a month field (requires a date timestamp)", "month", 0, "-month"));
        newVector.add(new Option("\tAdd a quarter of the year field (" + "requires a date timestamp)", "quarter", 0,
                "-quarter"));

        newVector.add(new Option("\tAdd a custom date-derived boolean field ("
                + "requires a date timestamp).\n\tFormat: \"fieldName=" + "Test Test|Test Test| ...\n\twhere "
                + "Test = OPERATORyear:month:week-of-yr:week-of-month:"
                + "day-of-yr:day-of-month:day-of-week:hour:min:second\n\te.g."
                + "XmasHoliday=>*:dec:*:*:*:24:*:*:*:* <*:jan:*:*:*:3:*:*:*:*\n\t"
                + "Legal OPERATORs are =,>,<,>=,<=. For = operator only\n\t"
                + "one Test is needed rather than a pair.\n\tThis option may"
                + " be specified more than once on the command line\n\t" + "in order to define multiple variables.",
                "custom", 1, "-custom"));
        newVector.add(new Option("\tAdd a comma-separated 'skip' list of dates that should not\n\t"
                + "be considered as a time step. Days of the week,\n\t"
                + "months of the year, 'weekend', integers (indicating day of year\n\t"
                + ", hour of day etc.) or specific dates are all valid entries.\n\t"
                + "E.g sat,sun,27-08-2011,28-08-2011", "skip", 1, "-skip"));

        return newVector.elements();
    }

    /**
     * Gets the current settings of this Forecaster.
     *
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {
        ArrayList<String> options = new ArrayList<String>();

        // List<String> fieldsToForecast = m_lagMaker.getFieldsToLag();
        options.add("-F"); // options.add(fieldsToForecast.toString());
        options.add(getFieldsToForecast());

        if (getOverlayFields() != null && getOverlayFields().length() > 0) {
            options.add("-O");
            options.add(getOverlayFields());
        }

        if (m_lagMaker.getRemoveLeadingInstancesWithUnknownLagValues()) {
            options.add("-trim-leading");
        }

        options.add("-L");
        options.add("" + m_lagMaker.getMinLag());
        options.add("-M");
        options.add("" + m_lagMaker.getMaxLag());

        if (m_lagMaker.getLagRange().length() > 0) {
            options.add("-R");
            options.add(m_lagMaker.getLagRange());
        }

        if (m_lagMaker.getAverageConsecutiveLongLags()) {
            options.add("-A");
        } else {
            options.add("-B");
            options.add("" + m_lagMaker.getAverageLagsAfter());
        }

        options.add("-C");
        options.add("" + m_lagMaker.getNumConsecutiveLongLagsToAverage());

        if (!m_lagMaker.getAdjustForTrends()) {
            options.add("-Z");
        }

        if (!m_lagMaker.getIncludeTimeLagProducts()) {
            options.add("-no-time-lag_products");
        }

        if (!m_lagMaker.getIncludePowersOfTime()) {
            options.add("-no-powers-of-time");
        }

        if (m_lagMaker.getAdjustForVariance()) {
            options.add("-V");
        }

        if (m_lagMaker.getTimeStampField() != null && m_lagMaker.getTimeStampField().length() > 0) {
            options.add("-G");
            options.add(m_lagMaker.getTimeStampField());
        }

        if (m_lagMaker.getAddAMIndicator()) {
            options.add("-am-pm");
        }

        if (m_lagMaker.getAddDayOfWeek()) {
            options.add("-dayofweek");
        }

        if (m_lagMaker.getAddDayOfMonth()) {
            options.add("-dayofmonth");
        }

        if (m_lagMaker.getAddWeekendIndicator()) {
            options.add("-weekend");
        }

        if (m_lagMaker.getAddMonthOfYear()) {
            options.add("-month");
        }

        if (m_lagMaker.getAddNumDaysInMonth()) {
            options.add("-numdaysinmonth");
        }

        if (m_lagMaker.getAddQuarterOfYear()) {
            options.add("-quarter");
        }

        Map<String, ArrayList<CustomPeriodicTest>> customPeriodics = m_lagMaker.getCustomPeriodics();

        if (customPeriodics != null && customPeriodics.keySet().size() > 0) {
            for (String name : customPeriodics.keySet()) {
                List<CustomPeriodicTest> tests = customPeriodics.get(name);

                options.add("-custom");
                StringBuffer tempBuff = new StringBuffer();
                tempBuff.append("\"");
                for (int i = 0; i < tests.size(); i++) {
                    tempBuff.append(tests.get(i).toString());
                    if (i < tests.size() - 1) {
                        tempBuff.append("|");
                    } else {
                        tempBuff.append("\"");
                    }
                }
                options.add(tempBuff.toString());
            }
        }

        if (m_lagMaker.getSkipEntries() != null && m_lagMaker.getSkipEntries().length() > 0) {
            options.add("-skip");
            options.add(m_lagMaker.getSkipEntries());
        }

        if (m_lagMaker.getPrimaryPeriodicFieldName() != null
                && m_lagMaker.getPrimaryPeriodicFieldName().length() > 0) {
            options.add("-periodic");
            options.add(m_lagMaker.getPrimaryPeriodicFieldName());
        }

        options.add("-conf");
        options.add("" + getCalculateConfIntervalsForForecasts());

        options.add("-P");
        options.add("" + getConfidenceLevel());

        options.add("-W");
        options.add(getForecasterSpec());

        return options.toArray(new String[1]);
    }

    /**
     * Set the options for the forecaster
     *
     * @param options an array of options
     * @throws Exception if unknown options are supplied
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        String fieldsToForecast = Utils.getOption('F', options);
        if (fieldsToForecast.length() == 0) {
            throw new Exception("Must specify the name of at least one field to forecast!");
        }
        setFieldsToForecast(fieldsToForecast);

        String overlayFields = Utils.getOption("overlay", options);
        if (overlayFields.length() > 0) {
            setOverlayFields(overlayFields);
        }

        m_lagMaker.setRemoveLeadingInstancesWithUnknownLagValues(Utils.getFlag("trim-leading", options));

        String minL = Utils.getOption('L', options);
        if (minL.length() > 0) {
            int mL = Integer.parseInt(minL);
            // setMinLag(mL);
            m_lagMaker.setMinLag(mL);
            if (mL < 1) {
                throw new Exception("Minimum lag can't be less than 1!");
            }
        }

        String maxL = Utils.getOption('M', options);
        if (maxL.length() > 0) {
            int mL = Integer.parseInt(maxL);
            // setMaxLag(mL);
            m_lagMaker.setMaxLag(mL);
        }

        if (m_lagMaker.getMaxLag() < m_lagMaker.getMinLag()) {
            throw new Exception("Can't have the maximum lag set lower than the minimum lag!");
        }

        String lagRange = Utils.getOption('R', options);
        if (lagRange.length() > 0) {
            m_lagMaker.setLagRange(lagRange);
        }

        boolean avLongLags = Utils.getFlag('A', options);
        // setAverageConsecutiveLongLags(!dontAv);
        m_lagMaker.setAverageConsecutiveLongLags(avLongLags);

        String avLongerThan = Utils.getOption('B', options);
        if (avLongerThan.length() > 0) {
            int avL = Integer.parseInt(avLongerThan);
            if (avL < m_lagMaker.getMinLag() || avL > m_lagMaker.getMaxLag()) {
                throw new Exception("Average consecutive long lags value can't "
                        + "be less than the minimum lag or greater than the " + "maximum lag!");
            }
            // setAverageLagsAfter(avL);
            m_lagMaker.setAverageLagsAfter(avL);
        }

        String consecutiveLongLagS = Utils.getOption('C', options);
        if (consecutiveLongLagS.length() > 0) {
            int consecutive = Integer.parseInt(consecutiveLongLagS);
            if (consecutive < 1 || consecutive > (m_lagMaker.getMaxLag() - m_lagMaker.getAverageLagsAfter())) {
                throw new Exception(
                        "Number of consecutive long lags to average " + "must be greater than 0 and less than "
                                + (m_lagMaker.getMaxLag() - m_lagMaker.getMinLag()));
            }
            // setNumConsecutiveLongLagsToAverage(consecutive);
            m_lagMaker.setNumConsecutiveLongLagsToAverage(consecutive);
        }

        boolean dontAdjTrends = Utils.getFlag('Z', options);
        // setAdjustForTrends(!dontAdjTrends);
        m_lagMaker.setAdjustForTrends(!dontAdjTrends);

        boolean noTimeLagProds = Utils.getFlag("no-time-lag-products", options);
        m_lagMaker.setIncludeTimeLagProducts(!noTimeLagProds);

        boolean noPowersOfTime = Utils.getFlag("no-powers-of-time", options);
        m_lagMaker.setIncludePowersOfTime(!noPowersOfTime);

        boolean adjVariance = Utils.getFlag("V", options);
        // setAdjustForVariance(!dontAdjVariance);
        m_lagMaker.setAdjustForVariance(adjVariance);

        String timeStampF = Utils.getOption('G', options);
        if (timeStampF.length() > 0) {
            m_lagMaker.setTimeStampField(timeStampF);
        }

        m_lagMaker.setAddAMIndicator(Utils.getFlag("am-pm", options));
        m_lagMaker.setAddDayOfWeek(Utils.getFlag("dayofweek", options));
        m_lagMaker.setAddWeekendIndicator(Utils.getFlag("weekend", options));
        m_lagMaker.setAddMonthOfYear(Utils.getFlag("month", options));
        m_lagMaker.setAddQuarterOfYear(Utils.getFlag("quarter", options));
        m_lagMaker.setAddDayOfMonth(Utils.getFlag("dayofmonth", options));
        m_lagMaker.setAddNumDaysInMonth(Utils.getFlag("numdaysinmonth", options));

        // custom date-derived periodic fields
        String customPeriodic = Utils.getOption("custom", options);
        while (customPeriodic.length() > 0) {
            m_lagMaker.addCustomPeriodic(customPeriodic);
            customPeriodic = Utils.getOption("custom", options);
        }

        String primaryPeriodicN = Utils.getOption("periodic", options);
        if (primaryPeriodicN.length() > 0) {
            m_lagMaker.setPrimaryPeriodicFieldName(primaryPeriodicN);
        }

        String skipString = Utils.getOption("skip", options);
        if (skipString.length() > 0) {
            m_lagMaker.setSkipEntries(skipString);
        }

        String confSteps = Utils.getOption("conf", options);
        if (confSteps.length() > 0) {
            int numSteps = Integer.parseInt(confSteps);
            if (numSteps < 0) {
                throw new Exception("Number of steps must be >= 0");
            }
            setCalculateConfIntervalsForForecasts(numSteps);
        }

        String confLevel = Utils.getOption('P', options);
        if (confLevel.length() > 0) {
            double cL = Double.parseDouble(confLevel);
            if (cL < 0 || cL > 1) {
                throw new Exception("Confidence level must be between 0 and 1.");
            }
            setConfidenceLevel(cL);
        }

        String baseClassifierS = Utils.getOption('W', options);
        if (baseClassifierS.length() == 0) {
            baseClassifierS = "weka.classifiers.functions.SMOreg";
        }
        String[] classifierSpec = Utils.splitOptions(baseClassifierS);
        if (classifierSpec.length == 0) {
            throw new Exception("Invalid classifier specification.");
        }
        String classifierName = classifierSpec[0];
        classifierSpec[0] = "";
        setBaseForecaster(AbstractClassifier.forName(classifierName, classifierSpec));
    }

    /**
     * Set the name of the time stamp field
     *
     * @param name the name of the time stamp attribute
     */
    /*
     * public void setTimeStampField(String name) {
     * m_lagMaker.setTimeStampField(name); }
     */

    /**
     * Get the name of the time stamp attribute
     *
     * @return the name of the time stamp attribute or an empty string if none has
     *         been specified/is in use
     */
    /*
     * public String getTimeStampField() { return m_lagMaker.getTimeStampField();
     * }
     */

    /**
     * Set whether to include an AM binary indicator attribute.
     *
     * @param am true if a binary AM indicator attribute is to be generated. Only
     *          has an effect if a date time stamp is in use.
     */
    /*
     * public void setAddAMIndicator(boolean am) {
     * m_lagMaker.setAddAMIndicator(am); }
     */

    /**
     * Returns true if an AM binary indicator is to be generated.
     *
     * @return true if an AM binary indicator is to be generated.
     */
    /*
     * public boolean getAddAMIndicator() { return m_lagMaker.getAddAMIndicator();
     * }
     */

    /**
     * Set whether to include a day of the week attribute
     *
     * @param am true if a day of the week attribute is to be generated. Only has
     *          an effect if a date time stamp is in use.
     */
    /*
     * public void setAddDayOfWeek(boolean d) { m_lagMaker.setAddDayOfWeek(d); }
     */

    /**
     * Returns true if a day of the week attribute is to be generated.
     *
     * @return true if a day of the week attribute is to be generated.
     */
    /*
     * public boolean getAddDayOfWeek() { return m_lagMaker.getAddDayOfWeek(); }
     */

    /**
     * Set whether to include a weekend indicator attribute.
     *
     * @param am true if a binary weekend indicator attribute is to be generated.
     *          Only has an effect if a date time stamp is in use.
     */
    /*
     * public void setAddWeekendIndicator(boolean w) {
     * m_lagMaker.setAddWeekendIndicator(w); }
     */

    /**
     * Returns true if a weekend binary indicator attribute is to be generated.
     *
     * @return true if a weekend binary indicator attribute is to be generated.
     */
    /*
     * public boolean getAddWeekendIndicator() { return
     * m_lagMaker.getAddWeekendIndicator(); }
     */

    /**
     * Set whether to include a month of the year attribute.
     *
     * @param am true if a month of the year attribute is to be generated. Only
     *          has an effect if a date time stamp is in use.
     */
    /*
     * public void setAddMonthOfYear(boolean m) { m_lagMaker.setAddMonthOfYear(m);
     * }
     */

    /**
     * Returns true if a month of the year attribute is to be generated.
     *
     * @return true if a month of the year attribute is to be generated.
     */
    /*
     * public boolean getAddMonthOfYear() { return m_lagMaker.getAddMonthOfYear();
     * }
     */

    /**
     * Set whether to include a quarter of the year attribute.
     *
     * @param am true if a quarter of the year attribute is to be generated. Only
     *          has an effect if a date time stamp is in use.
     */
    /*
     * public void setAddQuarterOfYear(boolean q) {
     * m_lagMaker.setAddQuarterOfYear(q); }
     */

    /**
     * Return true if a quarter of the year attribute is to be generated.
     *
     * @return if a quarter of the year attribute is to be generated.
     */
    /*
     * public boolean getAddQuarterOfYear() { return
     * m_lagMaker.getAddQuarterOfYear(); }
     */

    /**
     * Set the name of the field to be considered the primary periodic field (if
     * any). This field is one which is not a date-based attribute but is periodic
     * and cyclic and declared as nominal. Each distinct value can only be
     * succeeded by a single value (so that it is possible to set the appropriate
     * values in successive future instances). Any secondary, higher-grained
     * periodic fields will automatically be detected once a primary field is
     * specified.
     *
     *
     * @param p the name of a primary periodic field (if any)
     */
    /*
     * public void setPrimaryPeriodicFieldName(String p) { //m_primaryPeriodicName
     * = p; m_lagMaker.setPrimaryPeriodicFieldName(p); }
     */

    /**
     * Get the name of the primary periodic field (if set).
     *
     * @return the name of the primary periodic field or an empty string if none
     *         has been set/exists.
     */
    /*
     * public String getPrimaryPeriodicFieldName() { return
     * m_lagMaker.getPrimaryPeriodicFieldName(); }
     */

    /**
     * Get the specification (scheme name + option setttings) of the underlying
     * Weka classifier.
     *
     * @return the scheme name and options of the underlying Weka classifier
     */
    protected String getForecasterSpec() {
        Classifier c = getBaseForecaster();

        if (c instanceof OptionHandler) {
            return c.getClass().getName() + " " + Utils.joinOptions(((OptionHandler) c).getOptions());
        } else {
            return c.getClass().getName();
        }
    }

    /**
     * Add a custom date-derived periodic attribute
     *
     * @param customPeriodic the string definition of the custom date derived
     *          periodic attribute to add
     */
    public void addCustomPeriodic(String customPeriodic) {
        m_lagMaker.addCustomPeriodic(customPeriodic);
    }

    /**
     * clear the list of custom date-derived periodic attributes
     */
    public void clearCustomPeriodics() {
        m_lagMaker.clearCustomPeriodics();
    }

    /**
     * Set the names of the fields/attributes in the data to forecast.
     *
     * @param fieldsToForecast a list of names of fields to forecast
     * @throws Exception if a field(s) can't be found, or if multiple fields are
     *           specified and this forecaster can't predict multiple fields.
     */
    @Override
    public void setFieldsToForecast(String fieldsToForecast) throws Exception {
        super.setFieldsToForecast(fieldsToForecast);
        m_lagMaker.setFieldsToLag(m_fieldsToForecast);
    }

    /**
     * Get a comma-separated list of fields that considered to be overlay fields
     *
     * @return a list of field names
     */
    @Override
    public String getOverlayFields() {
        String list = "";
        List<String> overlayF = m_lagMaker.getOverlayFields();
        if (overlayF != null) {
            for (String f : overlayF) {
                list += (f + ",");
            }

            list = list.substring(0, list.lastIndexOf(','));
        }

        return list;
    }

    /**
     * Set the fields to consider as overlay fields
     *
     * @param overlayFields a comma-separated list of fieldnames
     * @throws Exception if there is a problem setting the overlay fields
     */
    @Override
    public void setOverlayFields(String overlayFields) throws Exception {
        if (overlayFields == null) {
            m_lagMaker.setOverlayFields(null);
        } else {
            m_lagMaker.setOverlayFields(AbstractForecaster.stringToList(overlayFields));
        }
    }

    /**
     * Return the number of steps for which confidence intervals will be computed.
     *
     * @return the number of steps for which confidence intervals will be
     *         computed.
     */
    @Override
    public int getCalculateConfIntervalsForForecasts() {
        return m_calculateConfLimitsSteps;
    }

    /**
     * Set the number of steps for which to compute confidence intervals for. E.g.
     * a value of 5 means that confidence bounds will be computed for 1-step-ahead
     * predictions, 2-step-ahead predictions, ..., 5-step-ahead predictions.
     *
     * @param steps the number of steps for which to compute confidence intervals
     *          for.
     */
    @Override
    public void setCalculateConfIntervalsForForecasts(int steps) {
        m_calculateConfLimitsSteps = steps;
    }

    /**
     * Returns true if this forecaster is computing confidence limits for some or
     * all of its future forecasts (i.e. getCalculateConfIntervalsForForecasts() >
     * 0).
     *
     * @return true if confidence limits will be produced for some or all of its
     *         future forecasts.
     */
    @Override
    public boolean isProducingConfidenceIntervals() {
        return (getCalculateConfIntervalsForForecasts() > 0);
    }

    /**
     * Get the confidence level in use for computing confidence intervals.
     *
     * @return the confidence level.
     */
    @Override
    public double getConfidenceLevel() {
        return m_confidenceLevel;
    }

    /**
     * Set the confidence level for confidence intervals.
     *
     * @param confLevel the confidence level to use.
     */
    @Override
    public void setConfidenceLevel(double confLevel) {
        m_confidenceLevel = confLevel;
    }

    /**
     * Get the base Weka regression scheme being used to make forecasts
     *
     * @return the base Weka regression scheme
     */
    public Classifier getBaseForecaster() {
        return m_forecaster;
    }

    /**
     * Set the base Weka regression scheme to use.
     *
     * @param f the base Weka regression scheme to use for forecasting.
     */
    public void setBaseForecaster(Classifier f) {
        m_forecaster = f;
    }

    /**
     * Returns true if overlay data has been used to train this forecaster, and
     * thus is expected to be supplied for future time steps when making a
     * forecast.
     *
     * @return true if overlay data is expected.
     */
    @Override
    public boolean isUsingOverlayData() {
        if (m_lagMaker.getOverlayFields() != null && m_lagMaker.getOverlayFields().size() > 0) {
            return true;
        }

        return false;
    }

    /**
     * Reset the forecaster.
     */
    @Override
    public void reset() {
        m_modelBuilt = false;

        /*
         * m_varianceAdjusters = null; m_lagMakers = null; m_averagedLagMakers =
         * null; m_timeIndexMakers = null; m_timeLagCrossProductMakers = null;
         */

        m_lagMaker.reset();
        m_dateRemover = null;

        m_primedInput = null;
        m_confidenceLimitEstimator = null;
        m_missingTargetList = new ArrayList<Integer>();
        m_missingTimeStampList = new ArrayList<Integer>();
        m_missingTimeStampRows = new ArrayList<String>();
    }

    /**
     * Builds a new forecasting model using the supplied training data. The
     * instances in the data are assumed to be sorted in ascending order of time
     * and equally spaced in time. Some methods may not need to implement this
     * method and may instead do their work in the primeForecaster method.
     *
     * @param insts the training instances.
     * @param progress an optional varargs parameter supplying progress objects to
     *          report/log to
     * @throws Exception if the model can't be constructed for some reason.
     */
    @Override
    public void buildForecaster(Instances insts, PrintStream... progress) throws Exception {

        reset();
        m_originalHeader = new Instances(insts, 0);
        /*
         * insts = m_lagMaker.replaceMissing(insts, false, m_missingTargetList,
         * m_missingTimeStampList);
         */
        insts = new Instances(insts);
        insts = weka.classifiers.timeseries.core.Utils.replaceMissing(insts, m_fieldsToForecast,
                m_lagMaker.getTimeStampField(), false, m_lagMaker.getPeriodicity(), m_lagMaker.getSkipEntries(),
                m_missingTargetList, m_missingTimeStampList, m_missingTimeStampRows);

        /*
         * int classIndex = insts.attribute(m_fieldsToForecast.get(0)).index(); if
         * (classIndex < 0) { throw new Exception("Can't find target field '" +
         * m_fieldsToForecast + "' in" + "the data!"); }
         */

        // setupPeriodicMaps(insts);

        for (PrintStream p : progress) {
            p.println("Transforming input data...");
        }

        // Instances trainingData = removeExtraneousAttributes(insts);
        Instances trainingData = insts;
        trainingData = m_lagMaker.getTransformedData(trainingData);
        // System.err.println(trainingData);

        m_dateRemover = new RemoveType();
        m_dateRemover.setOptions(new String[] { "-T", "date" });
        m_dateRemover.setInputFormat(trainingData);
        trainingData = Filter.useFilter(trainingData, m_dateRemover);
        m_transformedHeader = new Instances(trainingData, 0);

        // m_lastHistoricInstance = insts.instance(insts.numInstances() - 1);

        m_singleTargetForecasters = new ArrayList<SingleTargetForecaster>();
        for (int i = 0; i < m_fieldsToForecast.size(); i++) {
            SingleTargetForecaster f = new SingleTargetForecaster();
            Classifier c = AbstractClassifier.makeCopy(m_forecaster);
            f.setClassifier(c);
            f.buildForecaster(trainingData, m_fieldsToForecast.get(i));
            m_singleTargetForecasters.add(f);
        }

        /*
         * classIndex = trainingData.attribute(m_fieldsToForecast.get(0)).index();
         * trainingData.setClassIndex(classIndex);
         * m_forecaster.buildClassifier(trainingData);
         */

        m_modelBuilt = true;
        /*
         * for (int i = 0; i < m_singleTargetForecasters.size(); i++) {
         * System.out.println(m_singleTargetForecasters.get(i)); }
         */

        if (m_calculateConfLimitsSteps > 0) {
            for (PrintStream p : progress) {
                p.println("Computing confidence intervals...");
            }
            // -1 indicates not using an artificial time index
            int artificialTimeStart = (m_lagMaker.isUsingAnArtificialTimeIndex()) ? 1 : -1;
            ErrorBasedConfidenceIntervalEstimator e = new ErrorBasedConfidenceIntervalEstimator();
            e.calculateConfidenceOffsets(this, insts, m_lagMaker.getMaxLag(), artificialTimeStart,
                    m_calculateConfLimitsSteps, m_confidenceLevel, progress);
            m_confidenceLimitEstimator = e;
        }
        // System.out.println(trainingData);
    }

    @Override
    public String toString() {
        if (!m_modelBuilt) {
            return "Forecaster has not been built yet!";
        }

        StringBuffer result = new StringBuffer();
        result.append("Transformed training data:\n\n");
        for (int i = 0; i < m_transformedHeader.numAttributes(); i++) {
            result.append("              " + m_transformedHeader.attribute(i).name()).append("\n");
        }

        if (m_missingTimeStampRows != null && m_missingTimeStampRows.size() > 0) {
            result.append("\n--------------------------------------------------------\n"
                    + "Instances were inserted in the taining data for the\n"
                    + "following time-stamps (target values set by interpolation):\n\n");
            for (int i = 0; i < m_missingTimeStampRows.size(); i++) {
                if (i == 0) {
                    result.append("              " + m_missingTimeStampRows.get(i));
                } else {
                    result.append(", " + m_missingTimeStampRows.get(i));
                }
            }

            result.append("\n--------------------------------------------------------\n");
        }

        if (m_missingTargetList != null && m_missingTargetList.size() > 0) {
            Collections.sort(m_missingTargetList);
            result.append("\n---------------------------------------------------\n"
                    + "The following training instances had missing values\n"
                    + "imputed via interpolation. Check source data as\n"
                    + "this may affect forecasting performance:\n\n");
            for (int i = 0; i < m_missingTargetList.size(); i++) {
                if (i == 0) {
                    result.append("              " + m_missingTargetList.get(i));
                } else if (!m_missingTargetList.get(i).equals(m_missingTargetList.get(i - 1))) {
                    result.append("," + m_missingTargetList.get(i));
                }
            }
            result.append("\n---------------------------------------------------\n");
        }

        if (m_missingTimeStampList != null && m_missingTimeStampList.size() > 0) {
            Collections.sort(m_missingTimeStampList);
            result.append("\n--------------------------------------------------------\n"
                    + "The following training instances had missing time stamps:\n\n");
            for (int i = 0; i < m_missingTimeStampList.size(); i++) {
                if (i == 0) {
                    result.append("              " + m_missingTimeStampList.get(i));
                } else {
                    result.append("," + m_missingTimeStampList.get(i));
                }
            }
            result.append("\n-------------------------------------------------------\n");
        }

        // System.out.println(m_transformedHeader);
        for (int i = 0; i < m_singleTargetForecasters.size(); i++) {
            result.append("\n" + m_singleTargetForecasters.get(i)).append("\n");
        }

        return result.toString();
    }

    protected Instance applyFilters(Instance source, boolean incrementArtificialTime, boolean setAnyPeriodic)
            throws Exception {
        Instance result = source;

        /*
         * if (m_extraneousAttributeRemover != null) {
         * m_extraneousAttributeRemover.input(result); result =
         * m_extraneousAttributeRemover.output(); }
         */

        result = m_lagMaker.processInstance(result, incrementArtificialTime, setAnyPeriodic);

        return result;
    }

    /**
     * Supply the (potentially) trained model with enough historical data, up to
     * and including the current time point, in order to produce a forecast.
     * Instances are assumed to be sorted in ascending order of time and equally
     * spaced in time.
     *
     * @param insts the instances to prime the model with
     * @throws Exception if the model can't be primed for some reason.
     */
    @Override
    public void primeForecaster(Instances insts) throws Exception {

        m_primedInput = new Instances(insts);
        m_previousPrimeInstance = null; // only used by the incremental method
        m_missingBuffer = new Instances(insts, 0);
        m_hadLeadingMissingPrime = false;
        m_first = true;
        m_atLeastOneNonMissingTimeStamp = false;

        // m_lastHistoricInstance =
        // m_primedInput.instance(m_primedInput.numInstances() - 1);
        m_lagMaker.clearLagHistories();

        // Does the underlying forecaster learn/update on priming data?
        if (m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof PrimingDataLearner) {
            if (!(m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof TSLagUser)) {
                m_tempHeader = new Instances(insts, 0);
                for (int i = 0; i < m_fieldsToForecast.size(); i++) {
                    PrimingDataLearner l = (PrimingDataLearner) m_singleTargetForecasters.get(i)
                            .getWrappedClassifier();
                    l.reset();
                }
            }
        }

        // System.err.println(insts + "\n\n");
        for (int i = 0; i < m_primedInput.numInstances(); i++) {
            // applyFilters(m_primedInput.instance(i), false, false);

            primeForecasterIncremental(m_primedInput.instance(i));
            m_first = false;
        }
    }

    /**
     * Update the priming information incrementally, i.e. one instance at a time.
     * To indicate the start of a new batch of priming data an empty set of
     * instances must be passed to TSForecaster.primeForecaster() before the first
     * call to primeForecasterIncremental()
     *
     * @param inst the instance to prime with.
     * @throws Exception if something goes wrong.
     */
    @Override
    public void primeForecasterIncremental(Instance inst) throws Exception {
        if (m_primedInput == null) {
            throw new Exception("WekaForecaster hasn't been initialized with " + "a call to primeForecaster()!!");
        }

        // Does the underlying forecaster learn/update on priming data?
        if (m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof PrimingDataLearner) {
            if (!(m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof TSLagUser)) {
                // underlying forecaster does not use lagged data, so we can
                // just update using the untransformed priming data
                for (int i = 0; i < m_fieldsToForecast.size(); i++) {
                    PrimingDataLearner l = (PrimingDataLearner) m_singleTargetForecasters.get(i)
                            .getWrappedClassifier();

                    Instance toUpdateWith = (Instance) inst.copy();
                    toUpdateWith.setDataset(m_tempHeader);
                    m_tempHeader.setClass(m_tempHeader.attribute(m_fieldsToForecast.get(i)));
                    l.updateForecaster(toUpdateWith.classValue());
                }
            }
        }

        if (!m_lagMaker.isUsingAnArtificialTimeIndex() && m_lagMaker.getAdjustForTrends()
                && m_lagMaker.getTimeStampField() != null && m_lagMaker.getTimeStampField().length() > 0) {

            // we have at least one valid time stamp value - missing value routine
            // can increment/decrement from this to fill in missing time stamp values
            // forward (increment) is done below; backward is done by
            // Utils.replaceMissing()

            // if we have a previous row, then check that time values are increasing
            if (!m_first && m_previousPrimeInstance != null && !m_previousPrimeInstance
                    .isMissing(inst.dataset().attribute(m_lagMaker.getTimeStampField()))) {

                double previous = m_previousPrimeInstance
                        .value(inst.dataset().attribute(m_lagMaker.getTimeStampField()));
                double current = inst.value(inst.dataset().attribute(m_lagMaker.getTimeStampField()));

                if (current <= previous) {
                    throw new Exception("Priming instances do not appear to be in "
                            + "ascending order of the time stamp field (" + m_lagMaker.getTimeStampField() + ")! "
                            + m_previousPrimeInstance + " : " + inst);
                }
            }
        }

        boolean wasBuffered = false;
        boolean onlyTimeMissing = false;

        if (inst.hasMissingValue()) {
            // first check to see if its a target or date
            boolean ok = true;
            for (String target : m_fieldsToForecast) {
                if (inst.isMissing(inst.dataset().attribute(target))) {
                    ok = false;
                    break;
                }
            }
            // check date
            if (!m_lagMaker.isUsingAnArtificialTimeIndex() && m_lagMaker.getAdjustForTrends()
                    && m_lagMaker.getTimeStampField() != null && m_lagMaker.getTimeStampField().length() > 0) {

                if (inst.isMissing(inst.dataset().attribute(m_lagMaker.getTimeStampField()))) {
                    onlyTimeMissing = ok;

                    // do we have a previous instance with non-missing time stamp?
                    if (m_previousPrimeInstance != null && !m_previousPrimeInstance
                            .isMissing(inst.dataset().attribute(m_lagMaker.getTimeStampField()))) {

                        // set the correct time stamp value by incrementing by the
                        // lag maker's delta time

                        // this handles trailing missing time stamp values
                        double newValue = m_previousPrimeInstance
                                .value(inst.dataset().attribute(m_lagMaker.getTimeStampField()));
                        newValue = m_lagMaker.advanceSuppliedTimeValue(newValue);
                        inst.setValue(inst.dataset().attribute(m_lagMaker.getTimeStampField()), newValue);
                        // System.err.println("** " + inst);
                    }
                } else {
                    m_atLeastOneNonMissingTimeStamp = true;
                }
            }

            if (!ok) {
                if (m_first) {
                    // can't do anything with leading missing values, unless its the
                    // time stamp that's missing

                    // leading missing time stamps will get filled in by the missing
                    // value replacement routine when the buffer gets flushed. Trailing
                    // missing time stamp values get handled above. Exception when
                    // *all* time stamp values are missing. Nothing can be done in this
                    // case

                    m_hadLeadingMissingPrime = !onlyTimeMissing;
                    // ---
                    m_missingBuffer.add(inst);
                    // m_previousPrimeInstance = inst;
                    wasBuffered = true;
                } /*
                   * else if (m_missingBuffer.numInstances() == 0 &&
                   * m_previousPrimeInstance != null) { // first one with missing - need
                   * to add the previous instance
                   * m_missingBuffer.add(m_previousPrimeInstance);
                   * m_previousPrimeInstance = null;
                   *
                   * m_missingBuffer.add(inst); wasBuffered = true; }
                   */else /* if (m_missingBuffer.numInstances() > 0) */ {
                    m_missingBuffer.add(inst);
                    wasBuffered = true;
                    // m_previousPrimeInstance = inst;
                }
            }
        } else {
            if (!m_lagMaker.isUsingAnArtificialTimeIndex() && m_lagMaker.getAdjustForTrends()
                    && m_lagMaker.getTimeStampField() != null && m_lagMaker.getTimeStampField().length() > 0) {

                // we have at least one valid time stamp value - missing value routine
                // can increment/decrement from this to fill in missing time stamp
                // values
                // forward (increment) is done above; backward is done by
                // Utils.replaceMissing()
                m_atLeastOneNonMissingTimeStamp = true;
            }
        }

        m_previousPrimeInstance = inst;

        if (!wasBuffered && m_missingBuffer.numInstances() > 0) {
            // add this one first
            m_missingBuffer.add(inst);
            wasBuffered = true;

            // interpolate missing and then flush the buffer
            Instances missingReplaced = weka.classifiers.timeseries.core.Utils.replaceMissing(m_missingBuffer,
                    m_fieldsToForecast, m_lagMaker.getTimeStampField(), false, m_lagMaker.getPeriodicity(),
                    m_lagMaker.getSkipEntries());

            /*
             * // don't push the first instance into the filters because this one //
             * has already been pushed in earlier.
             */
            for (int i = 0; i < missingReplaced.numInstances(); i++) {
                applyFilters(missingReplaced.instance(i), false, false);
            }
            m_missingBuffer = new Instances(m_primedInput, 0);
            // m_previousPrimeInstance = inst;
        } else if (!wasBuffered) {
            applyFilters(inst, false, false);
            // m_previousPrimeInstance = inst;
        }
        m_first = false;
    }

    /**
     * Make a one-step-ahead forecast for the supplied test instance
     *
     * @param transformed a test instance, corresponding to the next time step,
     *          that has been transformed using the lag maker
     *
     * @return a one-step-ahead forecast corresponding to the test instance.
     * @throws Exception if a problem occurs
     */
    protected double forecastOneStepAhead(Instance transformed) throws Exception {
        return m_forecaster.classifyInstance(transformed);
    }

    /**
     * Produce a forecast for the target field(s). Assumes that the model has been
     * built and/or primed so that a forecast can be generated.
     *
     * @param numSteps number of forecasted values to produce for each target.
     *          E.g. a value of 5 would produce a prediction for t+1, t+2, ...,
     *          t+5. if no overlay data has been used during training)
     * @param progress an optional varargs parameter supplying progress objects to
     *          report/log to
     * @return a List of Lists (one for each step) of forecasted values for each
     *         target
     * @throws Exception if the forecast can't be produced for some reason.
     */
    @Override
    public List<List<NumericPrediction>> forecast(int numSteps, PrintStream... progress) throws Exception {
        return forecast(numSteps, null, progress);
    }

    /**
     * Produce a forecast for the target field(s). Assumes that the model has been
     * built and/or primed so that a forecast can be generated.
     *
     * @param numSteps number of forecasted values to produce for each target.
     *          E.g. a value of 5 would produce a prediction for t+1, t+2, ...,
     *          t+5.
     * @param overlay optional overlay data for the period to be forecasted (may
     *          be null if no overlay data has been used during training)
     * @param progress an optional varargs parameter supplying progress objects to
     *          report/log to
     * @return a List of Lists (one for each step) of forecasted values for each
     *         target
     * @throws Exception if the forecast can't be produced for some reason.
     */
    @Override
    public List<List<NumericPrediction>> forecast(int numSteps, Instances overlay, PrintStream... progress)
            throws Exception {

        if (overlay != null) {
            if (m_lagMaker.getOverlayFields() == null || m_lagMaker.getOverlayFields().size() == 0) {
                throw new Exception("[WekaForecaster] overlay data has been supplied to the"
                        + " forecasting routine but no overlay data has been trained with.");
            }

            String message = m_originalHeader.equalHeadersMsg(overlay);
            if (message != null) {
                throw new Exception("[WekaForecaster] supplied overlay data does not "
                        + "have the same structure as the data used to learn " + "the model!");
            }
        } else {
            // check to see if we've been trained with overlay data
            if (m_lagMaker.getOverlayFields() != null && m_lagMaker.getOverlayFields().size() > 0) {
                throw new Exception("[WekaForecaster] was trained with overlay data but "
                        + "none has been supplied for making a forecast!");
            }
        }

        // we need to:
        // 1) input a new instance with ? for target into the filter chain in order
        // to push the most recent
        // known target value into the history
        // 2) output() from filter
        // 3) make the t + 1 prediction
        // 4) set the value of the target for the input instance (this instance, now
        // stored in
        // the history buffer of the TimeseriesTranslate filters, will now have the
        // predicted target
        // value - hopefully)

        // 4 won't work. Need to add the input instance (with prediction set) to the
        // end
        // of the primed input data set and then call primeForecaster() again

        // double[] finalForecast = new double[numSteps];

        // Check the incremental prime buffer to see if there are any pending
        // instances to prime. We won't be able to interpolate missing values for
        // the
        // remaining instances (since there wasn't a prime instance received with
        // non-missing
        // values to right-hand-side bracket the ones with missing values. So, we'll
        // just
        // have to flush this buffer (which means the missing values will go into
        // the history
        // list and the underlying predictor's missing value strategy will be
        // invoked). We
        // should warn to the progress/log though. Similarly, for leading prime
        // instances
        // with missing values (i.e. no left-hand-side non-missing bracketing
        // instance) we
        // should warn to the progress/log
        if (m_missingBuffer != null && m_missingBuffer.numInstances() > 0) {
            // make one more attempt to interpolate missing values. In the incremental
            // priming process, the missing value interpolation for currently buffered
            // leading instances is *only* triggered when receiving a priming instance
            // where *all* target values are not missing. If this never occurs, i.e.
            // every priming instance has at least one of the targets missing, then
            // it is still possible that some of the missing values for some targets
            // can be interpolated
            System.err.println("Here..... \n\n" + m_missingBuffer);
            Instances missingReplaced = weka.classifiers.timeseries.core.Utils.replaceMissing(m_missingBuffer,
                    m_fieldsToForecast, m_lagMaker.getTimeStampField(), false, m_lagMaker.getPeriodicity(),
                    m_lagMaker.getSkipEntries());

            for (int i = 0; i < m_missingBuffer.numInstances(); i++) {
                applyFilters(missingReplaced.instance(i), false, false);
            }

            for (PrintStream p : progress) {
                p.println("WARNING: priming data contained missing target/date values that could "
                        + "not be interpolated/replaced. Forecasting performance may be " + "adversely affected.");
            }
        }
        if (m_hadLeadingMissingPrime) {
            for (PrintStream p : progress) {
                p.println("WARNING: priming data contained missing target/date values that could "
                        + "not be interpolated/replaced. Forecasting performance may be " + "adversely affected.");
            }
        }

        if (!m_lagMaker.isUsingAnArtificialTimeIndex() && m_lagMaker.getAdjustForTrends()
                && m_lagMaker.getTimeStampField() != null && m_lagMaker.getTimeStampField().length() > 0
                && !m_atLeastOneNonMissingTimeStamp) {

            throw new Exception("All values of the time stamp field (" + m_lagMaker.getTimeStampField()
                    + ") were missing in the priming " + "data!");

        }

        List<List<NumericPrediction>> forecastForSteps = new ArrayList<List<NumericPrediction>>();
        int stepsToDo = (overlay != null) ? overlay.numInstances() : numSteps;
        boolean setPeriodics = true, incrementTime = true;

        // check overlay fields (if present)
        if (overlay != null) {
            for (String field : m_lagMaker.getOverlayFields()) {
                Attribute overl = m_originalHeader.attribute(field);
                if (overl == null) {
                    throw new Exception(
                            "Unable to find overlay field '" + field + "' in the supplied overlay instances");
                }
            }
        }

        for (int i = 0; i < stepsToDo; i++) {
            incrementTime = true;

            // set the target to missing first
            double[] newVals = new double[m_originalHeader.numAttributes()];
            // set all to missing
            for (int j = 0; j < newVals.length; j++) {
                newVals[j] = Utils.missingValue();
            }

            // copy over any overlay fields and time (if present in overlay data)
            if (overlay != null) {
                Instance overlayI = overlay.instance(i);
                for (String field : m_lagMaker.getOverlayFields()) {
                    int index = m_originalHeader.attribute(field).index();
                    newVals[index] = overlayI.value(index);
                }

                // non missing time stamp?
                if (!m_lagMaker.isUsingAnArtificialTimeIndex() && m_lagMaker.getAdjustForTrends()
                        && m_lagMaker.getTimeStampField() != null && m_lagMaker.getTimeStampField().length() > 0) {

                    int timeStampIndex = m_originalHeader.attribute(m_lagMaker.getTimeStampField()).index();
                    if (!overlayI.isMissing(timeStampIndex)) {
                        newVals[timeStampIndex] = overlayI.value(timeStampIndex);

                        // want to store, rather than increment, time value since
                        // we've read a time value from the overlay data
                        incrementTime = false;
                    }
                }
            }

            // create the test instance (original format)
            Instance origTest = new DenseInstance(1.0, newVals);
            origTest.setDataset(m_originalHeader);

            // System.err.println("Original with periodic set " + origTest);

            Instance transformedWithDate = origTest;

            // do all the filters
            // System.err.println("--- " + transformedWithDate);

            // transformedWithDate = applyFilters(transformedWithDate, true, true);
            transformedWithDate = m_lagMaker.processInstancePreview(transformedWithDate, incrementTime,
                    setPeriodics);

            // the date time stamp (if exists) has now been remapped, so we can remove
            // the original
            m_dateRemover.input(transformedWithDate);
            Instance transformed = m_dateRemover.output();

            // System.err.println(transformedWithDate.dataset());
            // System.err.println(transformedWithDate);

            // System.err.println("Transformed: " + transformed);

            // get a prediction
            double[] preds = new double[m_singleTargetForecasters.size()];
            for (int j = 0; j < m_singleTargetForecasters.size(); j++) {
                preds[j] = m_singleTargetForecasters.get(j).forecastOneStepAhead(transformed);
            }

            // predictions at step i for all the targets (can only handle a single
            // target at
            // present)
            List<NumericPrediction> finalForecast = new ArrayList<NumericPrediction>();

            // add confidence limits (if applicable)
            for (int j = 0; j < m_fieldsToForecast.size(); j++) {
                if (m_confidenceLimitEstimator != null && i < m_calculateConfLimitsSteps) {
                    double[] limits = m_confidenceLimitEstimator
                            .getConfidenceLimitsForTarget(m_fieldsToForecast.get(j), preds[j], i + 1);
                    double[][] limitsToAdd = new double[1][];
                    limitsToAdd[0] = limits;
                    finalForecast.add(new NumericPrediction(Utils.missingValue(), preds[j], 1.0, limitsToAdd));
                } else {
                    finalForecast.add(new NumericPrediction(Utils.missingValue(), preds[j]));
                }
            }
            forecastForSteps.add(finalForecast);

            // set the value of the target in the original test instance
            for (int j = 0; j < m_fieldsToForecast.size(); j++) {
                int targetIndex = m_originalHeader.attribute(m_fieldsToForecast.get(j)).index();
                origTest.setValue(targetIndex, preds[j]);
            }

            // If we have a real time stamp, then set the incremented value in the
            // original
            // test instance (doesn't really need to be done if we've read a
            // non-missing
            // time value out of any supplied overlay data)
            if (!m_lagMaker.isUsingAnArtificialTimeIndex() && m_lagMaker.getAdjustForTrends()
                    && m_lagMaker.getTimeStampField() != null && m_lagMaker.getTimeStampField().length() > 0) {
                int timeIndex = m_originalHeader.attribute(m_lagMaker.getTimeStampField()).index();
                double timeValue = transformedWithDate
                        .value(transformedWithDate.dataset().attribute(m_lagMaker.getTimeStampField()));
                origTest.setValue(timeIndex, timeValue);
            }

            // now re-prime the forecaster. Incremental method will never buffer here
            // because we never have missing targets, since we've just forecasted
            // them!
            primeForecasterIncremental(origTest);
        }

        // TODO fix this - move to eval class?
        if (m_lagMaker.isUsingAnArtificialTimeIndex()) {
            m_lagMaker.incrementArtificialTimeValue(-(stepsToDo - 1));
            // -= (numSteps - 1);
        }

        // invalidate the primed input header
        m_primedInput = null;

        return forecastForSteps;
    }

    /**
     * Inner class implementing a forecaster for a single target.
     *
     * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
     *
     */
    protected class SingleTargetForecaster implements Serializable {

        /** for serialization */
        private static final long serialVersionUID = -4404412501006669036L;

        /** the underlying Weka classifier used to make forecasts */
        protected Classifier m_targetForecaster;

        /** filter for removing the targets other than the one to be forecasted */
        private Remove m_otherTargetRemover;
        private int m_classIndex;
        private String m_className;

        /**
         * Set the base classifier to use
         *
         * @param classifier the base classifier to use
         */
        public void setClassifier(Classifier classifier) {
            m_targetForecaster = classifier;
        }

        /**
         * Get the base classifier
         *
         * @return the base classifier
         */
        public Classifier getWrappedClassifier() {
            return m_targetForecaster;
        }

        /**
         * Builds the single target forecaster. Assumes that the training data has
         * already been transformed by the lag maker.
         *
         * @param train the transformed training data
         * @param targetName the name of the target to forecast
         * @param progress an optional varargs parameter of PrintStream to report
         *          progress to
         * @throws Exception if a problem occurs
         */
        public void buildForecaster(Instances train, String targetName, PrintStream... progress) throws Exception {

            if (m_targetForecaster == null) {
                throw new Exception("[SingleTargetForecaster] base classifier has" + " not been set!");
            }

            train = new Instances(train);
            m_classIndex = train.attribute(targetName).index();

            if (m_classIndex < 0) {
                throw new Exception("Can't find target field '" + targetName + "' in" + "the data!");
            }

            if (!train.attribute(m_classIndex).isNumeric()) {
                throw new Exception("[SingleTargetForecaster] target '" + targetName + "' is not numeric!");
            }

            train.setClassIndex(m_classIndex);
            m_className = targetName;

            String otherTargets = "";
            for (String n : m_fieldsToForecast) {
                if (!n.equals(targetName)) {
                    int i = train.attribute(n).index();
                    if (i >= 0) {
                        otherTargets += (i + 1) + ",";
                    }
                }
            }

            if (otherTargets.length() > 0) {
                otherTargets = otherTargets.substring(0, otherTargets.lastIndexOf(','));
                m_otherTargetRemover = new Remove();
                m_otherTargetRemover.setAttributeIndices(otherTargets);
                m_otherTargetRemover.setInputFormat(train);
                train = Filter.useFilter(train, m_otherTargetRemover);
            }

            for (PrintStream p : progress) {
                p.println("Building forecaster for target: " + m_className);
            }

            m_targetForecaster.buildClassifier(train);
        }

        /**
         * Makes a one-step-ahead forecast
         *
         * @param transformed the test instance for the next time step. This will
         *          have already been processed by the lag maker, and thus will
         *          contain lagged variables and other derived variables.
         *
         * @return the one-step-ahead forecast corresponding to the supplied test
         *         instance
         * @throws Exception if something goes wrong during the forecasting process
         */
        public double forecastOneStepAhead(Instance transformed) throws Exception {
            transformed.dataset().setClassIndex(m_classIndex);

            if (m_otherTargetRemover != null) {
                m_otherTargetRemover.input(transformed);
                transformed = m_otherTargetRemover.output();
            }
            double pred = m_targetForecaster.classifyInstance(transformed);

            // undo the log if adjusting for variance
            if (m_lagMaker.getAdjustForVariance()) {
                pred = Math.exp(pred);
            }
            return pred;
        }

        @Override
        public String toString() {
            if (m_targetForecaster == null) {
                return "SingleTargetForecaster: no model built yet!";
            }

            return m_className + ":\n" + m_targetForecaster.toString();
        }
    }
}