edu.oregonstate.eecs.mcplan.sim.OptionSimulator.java Source code

Introduction

Here is the source code for edu.oregonstate.eecs.mcplan.sim.OptionSimulator.java
Source

/* LICENSE
Copyright (c) 2013-2016, Jesse Hostetler (jessehostetler@gmail.com)
All rights reserved.
    
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
    
1. Redistributions of source code must retain the above copyright notice,
   this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.
    
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/**
 * 
 */
package edu.oregonstate.eecs.mcplan.sim;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.ListIterator;

import org.apache.commons.math3.random.MersenneTwister;
import org.apache.commons.math3.random.RandomGenerator;

import edu.oregonstate.eecs.mcplan.JointAction;
import edu.oregonstate.eecs.mcplan.Option;
import edu.oregonstate.eecs.mcplan.VirtualConstructor;
import edu.oregonstate.eecs.mcplan.util.ListUtil;
import edu.oregonstate.eecs.mcplan.util.WrapListIterator;
import gnu.trove.list.array.TIntArrayList;

/**
 * Adapts a simulator of primitive actions to simulate options over those
 * actions.
 * <p>
 * The takeAction() function sets the active option for the current player.
 * If all players have active options, it then calls takeAction() on the
 * base simulator using the policies from the active options until one or
 * more options terminate. It then returns.
 * <p>
 * The getTurn() function returns the index of the next player in the move
 * ordering who does not have an active option. This is not necessarily the
 * next player to move in the base simulator.
 */
public class OptionSimulator<S, A extends VirtualConstructor<A>> implements UndoSimulator<S, Option<S, A>> {
    private final UndoSimulator<S, A> base_;

    private final ArrayList<Option<S, A>> active_;
    private TIntArrayList turn_ = new TIntArrayList();
    private final int Nplayers_;
    private final RandomGenerator rng_;

    /**
     * Stores information needed to undo an action.
     */
    private class StackFrame {
        public final TIntArrayList turn;
        public final ArrayList<Option<S, A>> option;
        public int steps = 0;

        public StackFrame(final TIntArrayList turn, final ArrayList<Option<S, A>> option) {
            this.turn = turn;
            this.option = option;
        }
    }

    private final ArrayList<Option<S, A>> terminated_;
    private final Deque<StackFrame> history_ = new ArrayDeque<StackFrame>();

    private int action_count_ = 0;

    public OptionSimulator(final UndoSimulator<S, A> base, final long seed) {
        base_ = base;
        Nplayers_ = base_.nagents();
        active_ = new ArrayList<Option<S, A>>(Nplayers_);
        ListUtil.populateList(active_, null, Nplayers_);
        terminated_ = new ArrayList<Option<S, A>>(Nplayers_);
        ListUtil.populateList(terminated_, null, Nplayers_);
        rng_ = new MersenneTwister(seed);
    }

    //   public OptionSimulator( final UndoSimulator<S, A> base, final long seed, final int turn_shift )
    //   {
    //      this( base, seed );
    //      turn_ = turn_shift;
    //   }

    @Override
    public S state() {
        return base_.state();
    }

    private boolean terminate(final S s, final long t, final Option<S, A> o) {
        final double beta = o.terminate(s, t);
        if (beta == 1.0) {
            return true;
        } else if (beta == 0.0) {
            return false;
        } else {
            return rng_.nextDouble() < beta;
        }
    }

    private int nextTurn(final int current) {
        int next = current + 1;
        if (next == Nplayers_) {
            next = 0;
        }
        return next;
    }

    private int nextNullOption(final List<Option<S, A>> os, final int i) {
        final int k = 0; // TODO: debugging
        final WrapListIterator<Option<S, A>> itr = new WrapListIterator<Option<S, A>>(os, i);
        while (itr.hasNext()) {
            final Option<S, A> o = itr.next();
            //         System.out.println( "Option " + (itr.previousIndex()) + " is " + (o == null ? "NULL" : "not null") );
            if (o == null) {
                return itr.previousIndex();
            }
        }
        return -1;
    }

    private TIntArrayList nullOptions(final List<Option<S, A>> os) {
        final TIntArrayList result = new TIntArrayList();
        for (int i = 0; i < os.size(); ++i) {
            if (os.get(i) == null) {
                result.add(i);
            }
        }
        return result;
    }

    private void pushFrame(final TIntArrayList turn, final ArrayList<Option<S, A>> option) {
        history_.push(new StackFrame(turn, option));
    }

    @Override
    public void takeAction(final JointAction<Option<S, A>> j) {
        action_count_ += 1;
        //      System.out.println( "Setting option " + o + " for player " + turn_ );

        // Find all options that terminated
        final ArrayList<Option<S, A>> term = new ArrayList<Option<S, A>>();
        for (int i = 0; i < turn_.size(); ++i) {
            assert (active_.get(i) == null);
            term.add(terminated_.get(i));
            terminated_.set(i, null);
        }
        pushFrame(turn_, term);

        // Activate new options
        for (int i = 0; i < turn_.size(); ++i) {
            assert (j.get(i) != null);
            final Option<S, A> o = j.get(i);
            active_.set(i, o);
            o.start(base_.state(), base_.t());
        }

        assert (history_.size() == action_count_); // TODO: Debugging

        // Take actions according to active options until one or more
        // options terminates.
        final long told = base_.t();
        final long tnew = told;
        while (true) {
            final S s = base_.state();
            if (base_.isTerminalState()) {
                return;
            }

            // See if any options terminate
            final ListIterator<Option<S, A>> check_itr = active_.listIterator();
            final TIntArrayList next_turn = new TIntArrayList();
            while (check_itr.hasNext()) {
                final Option<S, A> ocheck = check_itr.next();
                if (terminate(s, base_.t(), ocheck)) {
                    //               System.out.println( "! Option " + (check_itr.previousIndex()) + " terminated" );
                    check_itr.set(null);
                    final int pidx = check_itr.previousIndex();
                    terminated_.set(pidx, ocheck);
                    next_turn.add(pidx);
                }
            }

            // If they do, wait for new options
            if (!next_turn.isEmpty()) {
                turn_ = next_turn;
                return;
            }

            // Construct a JointAction over primitive actions and execute it
            final JointAction.Builder<A> ab = new JointAction.Builder<A>(Nplayers_);
            for (int i = 0; i < Nplayers_; ++i) {
                final Option<S, A> o = active_.get(i);
                assert (o != null);
                o.setState(s, base_.t());
                ab.a(i, o.getAction());
            }
            base_.takeAction(ab.finish());

            //         System.out.println( "Take action " + base_.getTurn() + " " + a );
            history_.peek().steps += 1;
            // TODO: Give pi its reward
            // pi.actionResult( ??? );
        }
    }

    //   public void old_takeAction( final JointAction<Option<S, A>> o )
    //   {
    //      assert( active_.get( turn_ ) == null );
    //      action_count_ += 1;
    ////      System.out.println( "Setting option " + o + " for player " + turn_ );
    //      pushFrame( turn_, terminated_.get( turn_ ) );
    //      terminated_.set( turn_, null );
    //      active_.set( turn_, o );
    //      o.start( base_.state(), base_.t() );
    //
    //      assert( history_.size() == action_count_ ); // TODO: Debugging
    //
    //      // Do some players still need to choose an option?
    //      final int next_null = nextNullOption( active_, nextTurn( turn_ ) );
    //      if( next_null != -1 ) {
    //         turn_ = next_null;
    ////         System.out.println( "Player " + turn_ + " still needs an option" );
    //         return;
    //      }
    //
    ////      System.out.println( "Everyone has an option!" );
    //      // Take actions according to active options until one or more
    //      // options terminates.
    //      final long told = base_.t();
    //      final long tnew = told;
    //      while( true ) {
    //         final S s = base_.state();
    //         if( base_.isTerminalState( ) ) {
    ////            System.out.println( "! terminal" );
    //            return;
    //         }
    //
    //         // See if any options terminate
    ////         final WrapListIterator<Option<S, A>> check_itr
    ////            = new WrapListIterator<Option<S, A>>( active_, base_.getTurn() );
    //         final ListIterator<Option<S, A>> check_itr = active_.listIterator();
    //         int null_idx = Integer.MAX_VALUE;
    //         while( check_itr.hasNext() ) {
    //            final Option<S, A> ocheck = check_itr.next();
    //            if( terminate( s, base_.t(), ocheck ) ) {
    ////               System.out.println( "! Option " + (check_itr.previousIndex()) + " terminated" );
    //               check_itr.set( null );
    //               final int pidx = check_itr.previousIndex();
    //               terminated_.set( pidx, ocheck );
    //               if( pidx < null_idx ) {
    //                  null_idx = pidx;
    //               }
    //            }
    //         }
    //         // If they do, wait for new options
    //         if( null_idx != Integer.MAX_VALUE ) {
    ////            System.out.println( "! Next option choice " + null_idx );
    //            turn_ = null_idx;
    //            return;
    //         }
    //
    //         // Execute current option policy
    //         final Option<S, A> oi = active_.get( base_.turn() );
    //         oi.setState( s, base_.t() );
    //         final A a = oi.getAction();
    ////         System.out.println( "Take action " + base_.getTurn() + " " + a );
    //         base_.takeAction( a );
    //         history_.peek().steps += 1;
    //         // TODO: Give pi its reward
    //         // pi.actionResult( ??? );
    //         turn_ = nextTurn( turn_ );
    //      }
    //   }

    @Override
    public void untakeLastAction() {
        action_count_ -= 1;
        final StackFrame f = history_.pop();
        assert (action_count_ == history_.size());
        while (f.steps-- > 0) {
            base_.untakeLastAction();
        }
        turn_ = f.turn;
        for (int i = 0; i < turn_.size(); ++i) {
            final int one_turn = turn_.get(i);
            active_.set(one_turn, null);
            terminated_.set(one_turn, f.option.get(i));
        }
    }

    @Override
    public long depth() {
        // TODO: Do we actually want the Option depth instead?
        return base_.depth();
    }

    @Override
    public long t() {
        return base_.t();
    }

    @Override
    public int nagents() {
        return base_.nagents();
    }

    @Override
    public int[] turn() {
        //      final TIntArrayList null_options = nullOptions( active_ );
        //      final int base_turn = base_.getTurn();
        //      System.out.println( "" + base_turn + "; " + null_options );
        //      assert( null_options.contains( base_turn ) );
        //      return base_turn;
        return turn_.toArray();
    }

    @Override
    public double[] reward() {
        // TODO: How do we implement this?
        return new double[nagents()];
    }

    @Override
    public boolean isTerminalState() {
        return base_.isTerminalState();
    }

    @Override
    public long horizon() {
        return base_.horizon();
    }

    @Override
    public String detailString() {
        final StringBuilder sb = new StringBuilder();
        sb.append("OptionSimulator[").append(base_.detailString()).append("]");
        return sb.toString();
    }

}