cerrla.Performance.java Source code

Java tutorial

Introduction

Here is the source code for cerrla.Performance.java

Source

/*
 *    This file is part of the CERRLA algorithm
 *
 *    CERRLA is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    CERRLA is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with CERRLA. If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    src/cerrla/Performance.java
 *    Copyright (C) 2012 Samuel Sarjant
 */
package cerrla;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.RandomAccessFile;
import java.io.Serializable;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Queue;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;

import org.apache.commons.math.stat.descriptive.moment.Mean;
import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;

import cerrla.modular.GoalCondition;
import rrlFramework.Config;
import rrlFramework.RRLExperiment;
import rrlFramework.RRLObservations;

/**
 * This object notes performance of the agent with regards to a particular goal.
 * 
 * @author Sam Sarjant
 */
public class Performance implements Serializable {
    private static final long serialVersionUID = -2959329608447253751L;

    /**
     * The amount of SD the score is allowed to wander (currently corresponds to
     * 5% +- difference).
     */
    private static final double CONVERGENCE_PERCENT_BUFFER = 0.13;

    /** Unicode symbol for +-. */
    public static final String SD_SYMBOL = "\u00b1";

    private SortedMap<Integer, Double[]> performanceDetails_;

    /** If the performance is frozen. */
    private transient boolean frozen_;
    /** A queue of the standard deviation for each single policy. */
    private Queue<Double> internalSDs_;
    /** The minimum reward received in the episodeRewards. */
    private double minEpisodeReward_;
    /** Notes the minimum and maximum reward received. */
    private double[] minMaxReward_;
    /** If this performance is for a modular generator. */
    private boolean modularPerformance_;
    /** A queue of the most recent episodic rewards. */
    private Queue<Double> recentScores_;
    /** The current run for this performance object. */
    private int runIndex_;
    /** The time at which this performance object was created. */
    private long trainingStartTime_;
    /** The time at which this performance object was created. */
    private long trainingEndTime_;
    /** The final elite scores received for the best policy. */
    private ArrayList<Double> finalEliteScores_;

    /** A recording of performance scores for each value. */
    private static SortedMap<Integer, Float[]> performanceMap_;
    /** The parsed runtime (in seconds) of the experiment. */
    private static long runTime_;

    /**
     * A new performance object for a module learner, so files are saved in the
     * module directory.
     * 
     * @param modulePerformance
     *            A throwaway boolean to denote modular performance.
     * @param run
     *            The current run number.
     */
    public Performance(boolean modulePerformance, int run) {
        this(run);
        modularPerformance_ = true;
    }

    /**
     * A constructor for a fresh performance object.
     * 
     * @param runIndex
     *            The run index to append to saved files.
     */
    public Performance(int runIndex) {
        performanceDetails_ = new TreeMap<Integer, Double[]>();
        finalEliteScores_ = new ArrayList<Double>();
        recentScores_ = new LinkedList<Double>();
        internalSDs_ = new LinkedList<Double>();
        minMaxReward_ = new double[2];
        minMaxReward_[0] = Float.MAX_VALUE;
        minMaxReward_[1] = -Float.MAX_VALUE;
        trainingStartTime_ = System.currentTimeMillis();
        runIndex_ = runIndex;
    }

    /**
     * Records performance scores using sliding windows of results.
     * 
     * @param currentEpisode
     *            The current episode.
     */
    public void recordPerformanceScore(int currentEpisode) {
        if (recentScores_.isEmpty())
            return;
        // Transform the queues into arrays
        double[] vals = new double[recentScores_.size()];
        int i = 0;
        for (Double val : recentScores_)
            vals[i++] = val.doubleValue();
        double[] envSDs = new double[internalSDs_.size()];
        i = 0;
        for (Double envSD : internalSDs_)
            envSDs[i++] = envSD.doubleValue();

        Mean m = new Mean();
        StandardDeviation sd = new StandardDeviation();
        double mean = m.evaluate(vals);
        double meanDeviation = sd.evaluate(envSDs) * CONVERGENCE_PERCENT_BUFFER;

        Double[] details = new Double[PerformanceDetails.values().length];
        details[PerformanceDetails.EPISODE.ordinal()] = Double.valueOf(currentEpisode);
        details[PerformanceDetails.MEAN.ordinal()] = mean;
        details[PerformanceDetails.SD.ordinal()] = sd.evaluate(vals);
        performanceDetails_.put(currentEpisode, details);

        // Output current means
        if (ProgramArgument.SYSTEM_OUTPUT.booleanValue() && !frozen_) {
            DecimalFormat formatter = new DecimalFormat("#0.00");
            String meanString = formatter.format(mean);
            String sdString = formatter.format(meanDeviation);
            System.out.println("Average performance: " + meanString + " " + SD_SYMBOL + " " + sdString);
        }
        if (frozen_) {
            System.out.println(currentEpisode + ": " + details[PerformanceDetails.MEAN.ordinal()]);
        }
    }

    /**
     * Saves the elite policies to file.
     * 
     * @param elites
     *            The best policy, in string format.
     * @param goal
     *            The goal of the behaviour.
     */
    private void saveElitePolicies(Collection<PolicyValue> elites, GoalCondition goal) throws Exception {
        File outputFile = new File(Config.TEMP_FOLDER, Config.getInstance().getElitesFile().getName());
        outputFile.createNewFile();
        FileWriter wr = new FileWriter(outputFile);
        BufferedWriter buf = new BufferedWriter(wr);

        Config.writeFileHeader(buf, goal);

        for (PolicyValue pv : elites) {
            buf.write(pv.getPolicy().toOnlyUsedString() + "\n");
            buf.write(pv.getValue() + "\n\n");
        }

        buf.close();
        wr.close();
    }

    /**
     * Saves the performance to file and outputs them. Saves to two files: One
     * with a breakdown of the generators, and another with purely episodic
     * performances.
     * 
     * @param policyGenerator
     *            The policy generator to save the distributions from.
     * @param perfFile
     *            The performance file to save to.
     * @param finalWrite
     *            If this write was the final write for the run.
     */
    private void savePerformance(PolicyGenerator policyGenerator, File perfFile, boolean finalWrite)
            throws Exception {
        // TODO May be memory leak around here somewhere.
        if (performanceDetails_.isEmpty())
            return;

        FileWriter wr = null;
        BufferedWriter buf = null;
        int lastKey = performanceDetails_.lastKey();
        Double[] lastDetails = performanceDetails_.get(lastKey);

        if (Config.getInstance().getGeneratorFile() == null) {
            // If the file has just been created, add the arguments to the head
            // of the file
            boolean newFile = perfFile.createNewFile();

            wr = new FileWriter(perfFile, true);
            buf = new BufferedWriter(wr);

            // If the file is fresh, add the program args to the top
            if (newFile)
                Config.writeFileHeader(buf, policyGenerator.getGoalCondition());

            policyGenerator.saveGenerators(buf, finalWrite);
            buf.write("\n\n" + lastKey + "\t" + lastDetails[PerformanceDetails.MEAN.ordinal()] + "\n");
            buf.write("\n\n\n");

            if (finalWrite) {
                buf.write(Config.END_PERFORMANCE + "\n");
                buf.write("Total training time: "
                        + RRLExperiment.toTimeFormat(trainingEndTime_ - trainingStartTime_));
            }

            buf.close();
            wr.close();
        }

        // Writing the raw performance
        File rawNumbers = null;
        if (Config.getInstance().getGeneratorFile() == null)
            rawNumbers = new File(perfFile.getAbsoluteFile() + "raw");
        else
            rawNumbers = new File(perfFile.getAbsoluteFile() + "greedy");

        wr = new FileWriter(rawNumbers);
        buf = new BufferedWriter(wr);

        if (ProgramArgument.SYSTEM_OUTPUT.booleanValue() && policyGenerator.getGoalCondition().isMainGoal())
            System.out.println("Average episode scores:");

        if (finalWrite) {
            // Average the final elite scores
            Mean m = new Mean();
            double[] finalElites = new double[finalEliteScores_.size()];
            int i = 0;
            for (Double val : finalEliteScores_)
                finalElites[i++] = val;
            double meanBestVal = m.evaluate(finalElites);
            lastDetails[PerformanceDetails.ELITEMAX.ordinal()] = meanBestVal;
        }

        // Noting the raw numbers
        buf.write("Episode\tMean\tSD\tEliteMean\tEliteMax\tNumSlots\tNumRules\tN\tConvergence\n");
        for (Integer episode : performanceDetails_.keySet()) {
            Double[] details = performanceDetails_.get(episode);
            String performanceData = episode + "\t" + details[PerformanceDetails.MEAN.ordinal()] + "\t"
                    + details[PerformanceDetails.SD.ordinal()] + "\t"
                    + details[PerformanceDetails.ELITEMEAN.ordinal()] + "\t"
                    + details[PerformanceDetails.ELITEMAX.ordinal()] + "\t"
                    + details[PerformanceDetails.NUMSLOTS.ordinal()] + "\t"
                    + details[PerformanceDetails.NUMRULES.ordinal()] + "\t"
                    + details[PerformanceDetails.POPULATION.ordinal()] + "\t"
                    + details[PerformanceDetails.CONVERGENCE.ordinal()] + "\t" + "\n";
            buf.write(performanceData);

            if (ProgramArgument.SYSTEM_OUTPUT.booleanValue() && policyGenerator.getGoalCondition().isMainGoal()) {
                System.out.println(episode + "\t" + details[PerformanceDetails.MEAN.ordinal()] + "\t" + SD_SYMBOL
                        + "\t" + details[PerformanceDetails.SD.ordinal()]);
            }
        }

        buf.close();
        wr.close();

        //      if (Config.getInstance().getGeneratorFile() == null) {
        //         // Writing the mutation tree
        //         File mutationTreeFile = new File(perfFile.getAbsoluteFile()
        //               + "mutation");
        //
        //         wr = new FileWriter(mutationTreeFile);
        //         buf = new BufferedWriter(wr);
        //         // policyGenerator.saveMutationTree(buf);
        //
        //         buf.close();
        //         wr.close();
        //      }
    }

    /**
     * Outputs performance information and estimates convergence.
     * 
     * @param convergence
     *            The convergence as given by the rule distributions.
     * @param numElites
     *            The minimum number of elites.
     * @param elites
     *            The current elites.
     * @param numSlots
     *            The number of slots in the distribution.
     * @param goalCondition
     *            The goal condition this performance is concerned with.
     */
    public void estimateETA(double convergence, int numElites, SortedSet<PolicyValue> elites, int numSlots,
            GoalCondition goalCondition) {
        if (!ProgramArgument.SYSTEM_OUTPUT.booleanValue())
            return;

        boolean mainGoal = goalCondition.isMainGoal();

        if (mainGoal) {
            long currentTime = System.currentTimeMillis();
            long elapsedTime = currentTime - trainingStartTime_;
            String elapsed = "Elapsed: " + RRLExperiment.toTimeFormat(elapsedTime);
            System.out.println(elapsed);
        }

        boolean noUpdates = false;
        if (convergence == PolicyGenerator.NO_UPDATES_CONVERGENCE) {
            noUpdates = true;
            convergence = 0;
        }
        double totalRunComplete = (1.0 * runIndex_ + convergence) / Config.getInstance().getNumRepetitions();
        if (frozen_)
            totalRunComplete = 1.0 * (runIndex_ + 1) / Config.getInstance().getNumRepetitions();

        DecimalFormat formatter = new DecimalFormat("#0.0000");
        String modular = "";
        if (!goalCondition.isMainGoal())
            modular = "MODULAR: [" + goalCondition + "] ";
        // No updates yet, convergence unknown
        String percentStr = null;
        if (noUpdates) {
            percentStr = "Unknown convergence; No updates yet.";
        } else if (!frozen_) {
            percentStr = "~" + formatter.format(100 * convergence) + "% " + modular + "converged (" + numSlots
                    + " slots).";
        } else {
            if (convergence <= 1)
                percentStr = formatter.format(100 * convergence) + "% " + modular + "test complete.";
            else
                percentStr = "---FULLY CONVERGED---";
        }
        System.out.println(percentStr);

        if (!frozen_) {
            // Adjust numElites if using bounded elites
            String best = (!elites.isEmpty()) ? "" + formatter.format(elites.first().getValue()) : "?";
            String worst = (!elites.isEmpty()) ? "" + formatter.format(elites.last().getValue()) : "?";
            String eliteString = "N_E: " + numElites + ", |E|: " + elites.size() + ", E_best: " + best
                    + ", E_worst: " + worst;
            System.out.println(eliteString);
        }

        if (mainGoal) {
            String totalPercentStr = formatter.format(100 * totalRunComplete) + "% experiment complete.";
            System.out.println(totalPercentStr);
        }
    }

    /**
     * Wipes the performance learning so that formal testing figures can take
     * place.
     * 
     * @param b
     *            Freezing or unfreezing.
     */
    public void freeze(boolean b) {
        frozen_ = b;
        if (frozen_)
            trainingEndTime_ = System.currentTimeMillis();
        recentScores_.clear();
        internalSDs_.clear();
    }

    public double getMinimumReward() {
        return minMaxReward_[0];
    }

    public double noteBestPolicyValue(ArrayList<double[]> policyRewards) {
        double average = 0;
        for (double[] reward : policyRewards)
            average += reward[RRLObservations.ENVIRONMENTAL_INDEX];
        average /= policyRewards.size();
        finalEliteScores_.add(average);
        return average;
    }

    public void noteElitesReward(int episode, Double meanEliteValue, Double maxEliteValue) {
        if (meanEliteValue == null || maxEliteValue == null)
            return;

        Double[] details = performanceDetails_.get(episode);
        if (details != null) {
            details[PerformanceDetails.ELITEMEAN.ordinal()] = meanEliteValue;
            details[PerformanceDetails.ELITEMAX.ordinal()] = maxEliteValue;
        }
    }

    public void noteGeneratorDetails(int episode, PolicyGenerator generator, int population, double convergence) {
        Double[] details = performanceDetails_.get(episode);
        if (details != null) {
            details[PerformanceDetails.NUMSLOTS.ordinal()] = Double.valueOf(generator.size());
            double numRules = 0;
            for (Slot s : generator.getGenerator()) {
                numRules += s.size();
            }
            details[PerformanceDetails.NUMRULES.ordinal()] = numRules;
            details[PerformanceDetails.POPULATION.ordinal()] = Double.valueOf(population);
            details[PerformanceDetails.CONVERGENCE.ordinal()] = Math.max(0, convergence);
        }
    }

    /**
     * Notes the rewards the sample received.
     * 
     * @param policyRewards
     *            The rewards the sample received.
     * @param currentEpisode
     *            The current episode.
     * @return The computed average of the internal rewards.
     */
    public double noteSampleRewards(ArrayList<double[]> policyRewards, int currentEpisode) {
        // First pass through the rewards to determine min reward.
        double environmentAverage = 0;
        double internalAverage = 0;
        minEpisodeReward_ = Float.MAX_VALUE;
        for (double[] reward : policyRewards) {
            double internalReward = reward[RRLObservations.INTERNAL_INDEX];
            minEpisodeReward_ = Math.min(internalReward, minEpisodeReward_);
            minMaxReward_[0] = Math.min(internalReward, minMaxReward_[0]);
            minMaxReward_[1] = Math.max(internalReward, minMaxReward_[1]);
            internalAverage += internalReward;

            environmentAverage += reward[RRLObservations.ENVIRONMENTAL_INDEX];
        }
        internalAverage /= policyRewards.size();
        environmentAverage /= policyRewards.size();

        // Second pass through to note the internal policy SDs
        for (double[] reward : policyRewards) {
            if (internalSDs_.size() == ProgramArgument.PERFORMANCE_TESTING_SIZE.intValue()
                    * ProgramArgument.POLICY_REPEATS.intValue())
                internalSDs_.poll();
            internalSDs_.add(reward[RRLObservations.ENVIRONMENTAL_INDEX] - minEpisodeReward_);
        }

        // Note scores only if there are enough to average (or simply frozen).
        boolean noteScores = frozen_;
        if (!noteScores && recentScores_.size() == ProgramArgument.PERFORMANCE_TESTING_SIZE.intValue()) {
            recentScores_.poll();
            noteScores = true;
        }
        recentScores_.add(environmentAverage);
        if (!frozen_)
            recordPerformanceScore(currentEpisode);

        return internalAverage;
    }

    /**
     * Saves performance and distributions to file.
     * 
     * @param distribution
     *            The current CEDistribution.
     * @param elites
     *            The current elites.
     * @param currentEpisode
     *            The current episode.
     * @param finalWrite
     *            If this write is the final write for this generator.
     */
    public void saveFiles(LocalCrossEntropyDistribution distribution, SortedSet<PolicyValue> elites,
            int currentEpisode, boolean hasUpdated, boolean finalWrite) {
        // Determine the temp filenames
        File tempPerf = null;
        if (modularPerformance_) {
            File modTemps = LocalCrossEntropyDistribution.getModFolder(distribution.getGoalCondition().toString(),
                    runIndex_);
            tempPerf = new File(modTemps, distribution.getGoalCondition() + "performance.txt");
        } else {
            Config.TEMP_FOLDER.mkdir();
            tempPerf = new File(Config.TEMP_FOLDER,
                    Config.getInstance().getPerformanceFile().getName() + runIndex_);
        }

        // Remove any old file if this is the first run
        if (performanceDetails_.size() <= 1 && Config.getInstance().getSerializedFile() == null)
            tempPerf.delete();

        // Write the files
        try {
            if (hasUpdated) {
                saveElitePolicies(elites, distribution.getGoalCondition());
                // Output the episode averages
                if (finalWrite && Config.getInstance().getGeneratorFile() == null)
                    recordPerformanceScore(currentEpisode);
                savePerformance(distribution.getPolicyGenerator(), tempPerf, finalWrite);
            }

            if (Config.getInstance().getGeneratorFile() == null) {
                // Serialise the generator
                distribution.saveCEDistribution(
                        new File(tempPerf.getAbsolutePath() + LocalCrossEntropyDistribution.SERIALISED_SUFFIX),
                        !modularPerformance_, runIndex_);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        // Basic update of run
        if (!ProgramArgument.SYSTEM_OUTPUT.booleanValue() && !modularPerformance_) {
            long elapsedTime = System.currentTimeMillis() - trainingStartTime_;
            String elapsed = "Elapsed: " + RRLExperiment.toTimeFormat(elapsedTime);
            if (hasUpdated && !performanceDetails_.isEmpty()) {
                PolicyGenerator policyGenerator = distribution.getPolicyGenerator();
                DecimalFormat formatter = new DecimalFormat("#0.0000");
                String percentStr = "~" + formatter.format(100 * policyGenerator.getConvergenceValue()) + "% "
                        + "converged (" + policyGenerator.getGenerator().size() + " slots).";
                System.out.println("Run " + runIndex_ + ", learning: " + currentEpisode + ": "
                        + performanceDetails_.get(performanceDetails_.lastKey())[PerformanceDetails.MEAN.ordinal()]
                        + ", " + elapsed + ", " + percentStr);
                System.out.println("Learning...");
            } else
                System.out.println("Learning...");
        }
    }

    public static SortedMap<Integer, Float[]> getPerformanceArray() {
        return performanceMap_;
    }

    public static long getRunTime() {
        return runTime_;
    }

    /**
     * Reads a raw numerical performance file and stores the values as
     * accessible private values.
     * 
     * @param perfFile
     *            The performance file to read.
     * @return True if the file was read successfully, false otherwise.
     */
    public static boolean readRawPerformanceFile(File perfFile, boolean byEpisode) throws Exception {
        if (Config.getInstance().getGeneratorFile() == null) {
            // First, read the last line of the normal file for the time
            RandomAccessFile raf = new RandomAccessFile(perfFile, "r");
            long pos = perfFile.length() - 1;
            StringBuffer line = new StringBuffer();
            char c;
            boolean foundIt = false;
            do {
                raf.seek(pos);
                c = (char) raf.read();
                foundIt |= Character.isDigit(c);
                line.append(c);
                pos--;
            } while (!foundIt || Character.isDigit(c) || c == ':');
            raf.close();
            String time = line.reverse().toString().trim();
            String[] timeSplit = time.split(":");
            runTime_ = (Long.parseLong(timeSplit[2]) + 60 * Long.parseLong(timeSplit[1])
                    + 3600 * Long.parseLong(timeSplit[0])) * 1000;
        }

        if (Config.getInstance().getGeneratorFile() == null)
            perfFile = new File(perfFile.getPath() + "raw");
        else
            perfFile = new File(perfFile.getPath() + "greedy");
        performanceMap_ = new TreeMap<Integer, Float[]>();
        FileReader reader = new FileReader(perfFile);
        BufferedReader buf = new BufferedReader(reader);

        // For every value within the performance file
        String input = null;
        Float[] prevPerfs = null;
        while ((input = buf.readLine()) != null) {
            String[] vals = input.split("\t");
            if (vals[PerformanceDetails.EPISODE.ordinal()].equals("Episode"))
                continue;

            Float[] perfs = new Float[PerformanceDetails.values().length];
            int episode = 0;
            for (PerformanceDetails detail : PerformanceDetails.values()) {
                if (vals.length > detail.ordinal()) {
                    if (!vals[detail.ordinal()].equals("null"))
                        perfs[detail.ordinal()] = Float.parseFloat(vals[detail.ordinal()]);
                    else if (detail.equals(PerformanceDetails.ELITEMEAN)
                            && !vals[PerformanceDetails.ELITEMAX.ordinal()].equals("null"))
                        perfs[detail.ordinal()] = Float.parseFloat(vals[PerformanceDetails.ELITEMAX.ordinal()]);
                    else if (detail.equals(PerformanceDetails.ELITEMEAN)
                            || detail.equals(PerformanceDetails.ELITEMAX))
                        perfs[detail.ordinal()] = Float.parseFloat(vals[PerformanceDetails.MEAN.ordinal()]);
                    else if (prevPerfs != null)
                        perfs[detail.ordinal()] = prevPerfs[detail.ordinal()];
                }

                if (detail.equals(PerformanceDetails.EPISODE))
                    episode = perfs[detail.ordinal()].intValue();
            }

            performanceMap_.put(episode, perfs);
            prevPerfs = perfs;
        }

        buf.close();
        reader.close();

        return true;
    }

    /** The details recorded by Performance. */
    public enum PerformanceDetails {
        EPISODE, MEAN, SD, ELITEMEAN, ELITEMAX, NUMSLOTS, NUMRULES, POPULATION, CONVERGENCE;
    }
}