com.odiago.flumebase.exec.BucketedAggregationElement.java Source code

Introduction

Here is the source code for com.odiago.flumebase.exec.BucketedAggregationElement.java
Source

/**
 * Licensed to Odiago, Inc. under one or more contributor license
 * agreements.  See the NOTICE.txt file distributed with this work for
 * additional information regarding copyright ownership.  Odiago, Inc.
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the
 * License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */

package com.odiago.flumebase.exec;

import java.io.IOException;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import java.util.concurrent.PriorityBlockingQueue;

import org.apache.avro.Schema;

import org.apache.avro.generic.GenericData;

import org.apache.hadoop.conf.Configuration;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.util.Pair;

import com.odiago.flumebase.exec.local.TimerFlowElemContext;

import com.odiago.flumebase.lang.TimeSpan;

import com.odiago.flumebase.parser.AliasedExpr;
import com.odiago.flumebase.parser.Expr;
import com.odiago.flumebase.parser.FnCallExpr;
import com.odiago.flumebase.parser.TypedField;
import com.odiago.flumebase.parser.WindowSpec;

import com.odiago.flumebase.plan.AggregateNode;
import com.odiago.flumebase.plan.PlanNode;

import com.odiago.flumebase.util.IterableIterator;
import com.odiago.flumebase.util.PairLeftRightComparator;

import com.odiago.flumebase.util.concurrent.SelectableQueue;

@SuppressWarnings("rawtypes")
/**
 * Perform aggregation functions over time series data divided into
 * a fixed number of buckets over the aggregation time interval.
 */
public class BucketedAggregationElement extends AvroOutputElementImpl {
    private static final Logger LOG = LoggerFactory.getLogger(BucketedAggregationElement.class.getName());

    /** Configuration key for the number of buckets that subdivide the aggregation time interval. */
    private static final String NUM_BUCKETS_KEY = "flumebase.aggregation.buckets";
    private static final int DEFAULT_NUM_BUCKETS = 100;

    /**
     * Configuration key specifying whether continuous output should be used.
     * If true, output should be generated for every bucket interval, even if no new data
     * is available in that bucket; if false, only generate output when the input
     * condition changes.
     */
    private static final String CONTINUOUS_OUTPUT_KEY = "flumebase.aggregation.continuous.output";
    private static final boolean DEFAULT_CONTINUOUS_OUTPUT = false;

    /**
     * Configuration key specifying the amount of slack time we tolerate between
     * events that should occur at the same time.
     */
    public static final String SLACK_INTERVAL_KEY = "flumebase.slack.time";
    public static final int DEFAULT_SLACK_INTERVAL = 200;

    /**
     * Configuration key specifying how far in the past we emit as output when
     * an insertion forces a close/emit of prior time windows.
     * If there's a massive stall, don't worry about data that is more than
     * this many milliseconds old.
     */
    private static final String MAX_PRIOR_EMIT_INTERVAL_KEY = "flumebase.aggregation.max.prior.interval";
    private static final long DEFAULT_MAX_PRIOR_EMIT_INTERVAL = 5000;

    /** The number of buckets that subdivide the aggregation time interval. */
    private final int mNumBuckets;

    /** Indicates whether continuous output is enabled. */
    private final boolean mContinuousOutput;

    /** How far into the past we will look for windows to close when catching up to the present. */
    private final long mMaxPriorEmitInterval;

    private final List<TypedField> mGroupByFields;

    /** The window specification over which we're aggregating. */
    private final WindowSpec mWindowSpec;

    /** The actual time interval over which we're aggregating. (Derived from mWindowSpec) */
    private final TimeSpan mTimeSpan;

    /**
     * The "width" of each bucket, in milliseconds. Specifies how we round the
     * true timestamps for events off, into the timestamps associated with
     * buckets in the time interval.
     */
    private final long mTimeModulus;

    /**
     * The maximum lateness (specified in milliseconds) we will tolerate for an
     * event.
     */
    private final long mSlackTime;

    /**
     * The set of aliased expressions describing the aggregation functions to run
     * over records we receive, and what alias to assign to their outputs.
     */
    private final List<AliasedExpr> mAggregateExprs;

    private final List<TypedField> mPropagateFields;

    /**
     * Map that returns a set of Bucket objects. Each bucket object
     * contains the state associated with a single aggregation function.
     * The key is a pair consisting of the timestamp (as a Long) and a HashedEvent:
     * an object that implements equals() and hashCode() based on a subset of the
     * fields of an EventWrapper.
     */
    private Map<Pair<Long, HashedEvent>, List<Bucket>> mBucketMap;

    /**
     * The same set of buckets as mBucketMap, organized as time-ordered lists
     * arranged by the group-by columns.
     */
    private Map<HashedEvent, List<Pair<Long, List<Bucket>>>> mBucketsByGroup;

    /**
     * Timestamp associated with the newest buckets in the pipeline.
     * This is used for auto-closing old windows when newer ones arrive.
     */
    private long mHeadBucketTime = 0;

    /**
     * Timestamp associated with the oldest bucket that can act as a window head
     * in the pipeline.
     */
    private long mTailBucketTime = 0;

    /** Timestamp of the most recent wakeup call enqueued. */
    private long mLastEnqueuedWakeup = 0;

    /**
     * SelectableQueue for the downstream timer element, which our eviction thread
     * enqueues into.
     */
    private SelectableQueue<Object> mTimerQueue = null;

    private EvictionThread mEvictionThread;

    public BucketedAggregationElement(FlowElementContext ctxt, AggregateNode aggregateNode) {
        super(ctxt, (Schema) aggregateNode.getAttr(PlanNode.OUTPUT_SCHEMA_ATTR));

        Configuration conf = aggregateNode.getConf();
        assert null != conf;
        mNumBuckets = conf.getInt(NUM_BUCKETS_KEY, DEFAULT_NUM_BUCKETS);
        mContinuousOutput = conf.getBoolean(CONTINUOUS_OUTPUT_KEY, DEFAULT_CONTINUOUS_OUTPUT);
        mMaxPriorEmitInterval = conf.getLong(MAX_PRIOR_EMIT_INTERVAL_KEY, DEFAULT_MAX_PRIOR_EMIT_INTERVAL);
        int slackTime = conf.getInt(SLACK_INTERVAL_KEY, DEFAULT_SLACK_INTERVAL);
        if (slackTime < 0) {
            mSlackTime = DEFAULT_SLACK_INTERVAL;
        } else {
            mSlackTime = slackTime;
        }

        assert mMaxPriorEmitInterval > 0;
        assert mMaxPriorEmitInterval > mSlackTime;

        List<TypedField> groupByFields = aggregateNode.getGroupByFields();
        if (null == groupByFields) {
            mGroupByFields = Collections.emptyList();
        } else {
            mGroupByFields = groupByFields;
        }

        mAggregateExprs = aggregateNode.getAggregateExprs();
        assert mAggregateExprs != null;
        mPropagateFields = aggregateNode.getPropagateFields();

        Expr windowExpr = aggregateNode.getWindowExpr();
        assert windowExpr.isConstant();
        try {
            mWindowSpec = (WindowSpec) windowExpr.eval(new EmptyEventWrapper());
            assert mWindowSpec.getRangeSpec().isConstant();
            mTimeSpan = (TimeSpan) mWindowSpec.getRangeSpec().eval(new EmptyEventWrapper());
        } catch (IOException ioe) {
            // The only way this can be thrown is if the window expr isn't actually constant.
            // This should not happen due to the assert above..
            LOG.error("Got IOException when calculating window width: " + ioe);
            throw new RuntimeException(ioe);
        }

        mBucketMap = new HashMap<Pair<Long, HashedEvent>, List<Bucket>>(mNumBuckets);
        mBucketsByGroup = new HashMap<HashedEvent, List<Pair<Long, List<Bucket>>>>();

        // Calculate the width of each bucket.
        mTimeModulus = mTimeSpan.getWidth() / mNumBuckets;
        if (mTimeModulus * mNumBuckets != mTimeSpan.getWidth()) {
            LOG.warn("Aggregation time step does not cleanly divide the time interval; "
                    + "results may be inaccurate. Set " + NUM_BUCKETS_KEY + " to a better divisor.");
        }
    }

    /** {@inheritDoc} */
    @Override
    public void open() throws IOException, InterruptedException {
        TimerFlowElemContext timerContext = (TimerFlowElemContext) getContext();
        // Start the auto-closing thread. Initialize the reference to the queue it populates
        // from our timer context.
        mTimerQueue = timerContext.getTimerQueue();
        mEvictionThread = new EvictionThread();
        mEvictionThread.start();
        super.open();
    }

    /** {@inheritDoc} */
    @Override
    public void close() throws IOException, InterruptedException {
        // We've got no new elements coming in; expire all buckets immediately.
        LOG.debug("Immediately expiring all buckets to mHeadBucketTime=" + mHeadBucketTime);
        closeUntil(mHeadBucketTime, mHeadBucketTime, getContext());
        mEvictionThread.finish();
        mEvictionThread = null;
        super.close();
    }

    /**
     * Initialize the list of Bucket entries that are associated with a new
     * timestamp -&gt; bucket mapping. This is typically done just before inserting
     * a value in a new bucket at the head of a new time window.
     * @return the list of initialized Bucket objects for this time subrange.
     */
    private List<Bucket> initBuckets(Pair<Long, HashedEvent> bucketKey) {
        List<Bucket> newBuckets = new ArrayList<Bucket>(mAggregateExprs.size());
        for (int i = 0; i < mAggregateExprs.size(); i++) {
            // Put in a new bucket instance for each aggregation funtion we're going to run.
            newBuckets.add(new Bucket());
        }

        assert null == mBucketMap.get(bucketKey);
        mBucketMap.put(bucketKey, newBuckets);

        // Put this into the map organized by group, as well.
        // Get the set of (time, bucketlist) pairs for the group.
        List<Pair<Long, List<Bucket>>> bucketsByTime = mBucketsByGroup.get(bucketKey.getRight());
        if (null == bucketsByTime) {
            bucketsByTime = new LinkedList<Pair<Long, List<Bucket>>>();
            mBucketsByGroup.put(bucketKey.getRight(), bucketsByTime);
        }
        bucketsByTime.add(new Pair<Long, List<Bucket>>(bucketKey.getLeft(), newBuckets));

        // Return the initialized set of Bucket objects back to the caller.
        return newBuckets;
    }

    /**
     * @return a key into our group-by map that is composed of the bucket
     * timestamp for the event, and a HashedEvent that reads the fields
     * of the event necessary to group by those fields. If we are not grouping
     * by any fields, this component of the pair is null.
     */
    private Pair<Long, HashedEvent> getEventKey(EventWrapper e) {
        long eventTime = e.getEvent().getTimestamp();
        long remainder = eventTime % mTimeModulus;
        Long bucketTime;

        // If we're on an interval boundary (e.g., t=100) we go into that bucket.
        // If we're off-boundary (e.g., t=103), we go into the closest "previous" bucket (t=100).
        bucketTime = Long.valueOf(eventTime - remainder);
        HashedEvent hashedEvent = new HashedEvent(e, mGroupByFields);
        return new Pair<Long, HashedEvent>(bucketTime, hashedEvent);
    }

    /**
     * Given a set of time buckets associated with a given group,
     * iterate over the time buckets for a specific time interval,
     * for a particular aggregation function.
     */
    private static class BucketIterator<T> implements Iterator<Bucket<T>> {
        /**
         * Offset of the true Bucket object in the final List<Buckets> that
         * specifies buckest for each aggregation function we operate.
         */
        private final int mFunctionId;

        /** Lowest timestamped bucket we return. */
        private final long mLoTime;

        /** Highest timestamped bucket we return. */
        private final long mHiTime;

        /**
         * Iterator over the outer list. We require this iterator
         * to return values in order.
         */
        private final Iterator<Pair<Long, List<Bucket>>> mIterator;

        /** The next value we return. */
        private Bucket<T> mNextBucket;

        /** Set to true if prepBucket() has been called, but not next(). */
        private boolean mIsReady;

        /** The number of buckets in the time interval that were returned by this iterator. */
        private int mYieldCount;

        public BucketIterator(int functionId, long loTime, long hiTime, List<Pair<Long, List<Bucket>>> inputList) {
            mFunctionId = functionId;
            mLoTime = loTime;
            mHiTime = hiTime;
            mIterator = inputList.iterator();
            mYieldCount = 0;
        }

        /**
         * Scan ahead in the underlying iterator til we find the next element.
         * Set mNextBucket to the next value that next() should return, or null
         * if we cannot yield any more values.
         * Sets mIsReady to true.
         */
        private void prepBucket() {
            assert !mIsReady; // This should not be called twice in a row.

            mIsReady = true;
            mNextBucket = null;

            while (mIterator.hasNext()) {
                Pair<Long, List<Bucket>> nextPair = mIterator.next();
                long timestamp = nextPair.getLeft();
                if (timestamp > mLoTime && timestamp <= mHiTime) {
                    // We found the next one to return.
                    mNextBucket = nextPair.getRight().get(mFunctionId);
                    return;
                }
            }
        }

        public Bucket<T> next() {
            if (!mIsReady) {
                prepBucket();
            }

            assert mIsReady;
            mIsReady = false;
            if (mNextBucket != null) {
                mYieldCount++;
            }
            return mNextBucket;
        }

        public boolean hasNext() {
            if (!mIsReady) {
                prepBucket();
            }

            assert mIsReady;
            return mNextBucket != null;
        }

        public void remove() {
            throw new RuntimeException("Not implemented.");
        }

        int getYieldCount() {
            return mYieldCount;
        }
    }

    /**
     * Close the window ending with the bucket for 'closeTime'.
     * Remove any buckets that are older than closeTime - aggregationIntervalWidth.
     * since they will no longer contribute to any open windows.
     */
    private void closeWindow(long closeTime, FlowElementContext context) throws IOException, InterruptedException {
        long loTime = closeTime - mTimeSpan.getWidth();
        Long closeBucketTimestamp = Long.valueOf(closeTime);

        LOG.debug("Closing window for range: " + loTime + " -> " + closeTime);

        // For each group, emit an output record containing the aggregate values over
        // the whole time window.
        for (Map.Entry<HashedEvent, List<Pair<Long, List<Bucket>>>> entry : mBucketsByGroup.entrySet()) {
            HashedEvent group = entry.getKey();

            // In non-continuous (demand-only) mode, check whether there's a bucket associated
            // with this window's closing time for this group.
            if (!mContinuousOutput
                    && mBucketMap.get(new Pair<Long, HashedEvent>(closeBucketTimestamp, group)) == null) {
                continue; // Nothing to do.
            }

            GenericData.Record record = new GenericData.Record(getOutputSchema());
            List<Pair<Long, List<Bucket>>> bucketsByTime = entry.getValue();

            int numBucketsInRangeForGroup = 0;
            // Execute each aggregation function over the applicable subset of buckets
            // in bucketsByTime.
            for (int i = 0; i < mAggregateExprs.size(); i++) {
                BucketIterator aggIterator = new BucketIterator(i, loTime, closeTime, bucketsByTime);
                AliasedExpr aliasExpr = mAggregateExprs.get(i);
                FnCallExpr fnCall = (FnCallExpr) aliasExpr.getExpr();
                Object result = fnCall.finishWindow(new IterableIterator(aggIterator));
                numBucketsInRangeForGroup += aggIterator.getYieldCount();
                record.put(aliasExpr.getAvroLabel(), result);
            }

            // If there are no buckets in bucketsByTime that are in our time range,
            // we should not emit anything for this group. Just silently continue.
            if (0 == numBucketsInRangeForGroup) {
                // Discard this output; we didn't actually calculate anything.
                continue;
            }

            // Copy the specified fields to propagate from the record used to define
            // the group, into the output record.
            EventWrapper groupWrapper = group.getEventWrapper();
            for (TypedField propagateField : mPropagateFields) {
                record.put(propagateField.getAvroName(), groupWrapper.getField(propagateField));
            }

            // Emit this as an output event!
            emitAvroRecord(record, groupWrapper.getEvent(), closeTime, context);
        }

        // Remove any buckets that are too old to be useful to any subsequent windows.
        // TODO(aaron): This is O(groups * num_buckets). We should actually use a TreeMap
        // instad of a list internally, so we can quickly cull the herd. That would be
        // O(groups * log(num_buckets)).
        Iterator<Map.Entry<HashedEvent, List<Pair<Long, List<Bucket>>>>> bucketsByGrpIter = mBucketsByGroup
                .entrySet().iterator();
        while (bucketsByGrpIter.hasNext()) {
            Map.Entry<HashedEvent, List<Pair<Long, List<Bucket>>>> entry = bucketsByGrpIter.next();
            HashedEvent group = entry.getKey();
            List<Pair<Long, List<Bucket>>> bucketsByTime = entry.getValue();
            Iterator<Pair<Long, List<Bucket>>> bucketsByTimeIter = bucketsByTime.iterator();
            while (bucketsByTimeIter.hasNext()) {
                Pair<Long, List<Bucket>> timedBucket = bucketsByTimeIter.next();
                Long timestamp = timedBucket.getLeft();
                if (timestamp.longValue() < loTime) {
                    bucketsByTimeIter.remove(); // Remove from bucketsByTime list.
                    Pair<HashedEvent, Long> key = new Pair<HashedEvent, Long>(group, timestamp);
                    mBucketMap.remove(key); // Remove from mBucketMap.
                }
            }

            if (bucketsByTime.size() == 0) {
                // We've removed the last time bucket for a given group from mBucketsByGroup.
                // Remove the group from that map.
                bucketsByGrpIter.remove();
            }
        }
    }

    /**
     * Close all open windows up to and including the window that ends with the bucket
     * for time 'lastWindow'.
     */
    private void closeUntil(long curBucketTime, long lastWindow, FlowElementContext context)
            throws IOException, InterruptedException {

        LOG.debug("Close until: cur=" + curBucketTime + ", lastWindow=" + lastWindow + ", mTailBucketTime="
                + mTailBucketTime + ", mTimeMod=" + mTimeModulus + ", mMaxPrior=" + mMaxPriorEmitInterval);
        if (lastWindow <= mTailBucketTime) {
            return; // We've already closed this window.
        }

        // If mHeadBucketTime is too far back from the current time,
        // do a mass expiration and throw out old data. closeTime is bounded by
        // mMaxPriorEmitInterval.
        for (long closeTime = Math.max(mTailBucketTime,
                curBucketTime - mMaxPriorEmitInterval); closeTime <= lastWindow; closeTime += mTimeModulus) {
            LOG.debug("Close window: closeTime=" + closeTime);
            closeWindow(closeTime, context);
        }

        mTailBucketTime = lastWindow + mTimeModulus;
    }

    @Override
    public void takeEvent(EventWrapper e) throws IOException, InterruptedException {
        Pair<Long, HashedEvent> bucketKey = getEventKey(e);

        long curBucketTime = bucketKey.getLeft();
        LOG.debug("Handling event time=" + curBucketTime);
        if (curBucketTime > mHeadBucketTime) {
            // We've just received an event that is newer than any others we've yet
            // received. This advances the sliding window to match this event's timestamp.
            // Emit any output groups that are older than this one by at least the
            // slack time interval.
            LOG.debug("New bucket: cur=" + curBucketTime + "; mHeadBucketTime=" + mHeadBucketTime);
            closeUntil(curBucketTime, curBucketTime - mSlackTime - mTimeModulus, getContext());
            // Since we've already handled these, remove their wake-up calls..
            mEvictionThread.discardUntil(mHeadBucketTime - mSlackTime);
            mHeadBucketTime = curBucketTime; // This insert advances our head bucket.
        } else if (curBucketTime < mHeadBucketTime - mMaxPriorEmitInterval) {
            // This event is too old -- ignore it.
            // TODO: Should this be mHeadBucketTiem - mSlackTime?
            LOG.debug("Dropping late event arriving at aggregator; HeadBucketTime=" + mHeadBucketTime
                    + " and event is for bucket " + curBucketTime);
            return;
        }

        // Get the bucket for the (timestamp, group-by-fields) of this event.
        // Actually returns a list of Bucket objects, one per AggregateFunc to
        // execute.
        List<Bucket> buckets = mBucketMap.get(bucketKey);
        if (null == buckets) {
            // We're putting the first event into a new bucket.
            buckets = initBuckets(bucketKey);
        }

        // For each aggregation function we're performing, insert this event into
        // the bucket for the aggregate function.
        assert buckets.size() == mAggregateExprs.size();
        for (int i = 0; i < mAggregateExprs.size(); i++) {
            AliasedExpr aliasExpr = mAggregateExprs.get(i);
            Expr expr = aliasExpr.getExpr();
            assert expr instanceof FnCallExpr;
            FnCallExpr fnCall = (FnCallExpr) expr;
            Bucket bucket = buckets.get(i);
            fnCall.insertAggregate(e, bucket);
        }

        // Insert a callback into a queue to allow time to expire these windows.
        enqueueWakeup(curBucketTime);
    }

    /**
     * Enqueue a wakeup in the EvictionThread that closes the bucket with the
     * specified bucket timestamp.
     */
    private void enqueueWakeup(long bucketTime) {
        if (bucketTime <= mLastEnqueuedWakeup) {
            // We've already enqueued a wakeup to close this bucket.
            return;
        }

        long curTime = System.currentTimeMillis();
        long offset = mTimeModulus + mSlackTime;
        long closeTime = curTime + offset; // local time to close the bucket.
        LOG.debug("Insert wakeup call: " + bucketTime + " at time offset=" + offset);
        mEvictionThread.insert(new Pair<Long, Long>(closeTime, bucketTime));
        mLastEnqueuedWakeup = bucketTime;
    }

    /**
     * Thread that sends notices to our coprocessor FlowElement when it is time to
     * close old windows based on elapsed local time.
     */
    private class EvictionThread extends Thread {
        private final Logger LOG = LoggerFactory.getLogger(EvictionThread.class.getName());

        /**
         * Set to true when it's time for the thread to go home. The thread
         * actually exits after this flag is set to true and the incoming queue
         * is empty.
         */
        private boolean mIsFinished;

        /**
         * Priority queue (heap) of times when we should insert expiry-times in
         * the coprocessor FlowElement's input queue.
         *
         * <p>The queue holds tuples of two long values. The first is a local
         * time when this thread should wake up; this is what the queue is
         * ordered on. The latter is the window time that should be expired.</p>
         */
        private PriorityBlockingQueue<Pair<Long, Long>> mQueue;

        // Maximum queue length == number of open windows + the newly-opening window
        //     + the currently-closing window.
        final long mMaxQueueLen = 2 + (mSlackTime / mTimeModulus);

        public EvictionThread() {
            super("AggregatorEvictionThread");

            mQueue = new PriorityBlockingQueue<Pair<Long, Long>>((int) mMaxQueueLen,
                    new PairLeftRightComparator<Long, Long>());
        }

        /**
         * Add a wake-up call to the queue.
         */
        public void insert(Pair<Long, Long> wakeUpCall) {
            synchronized (this) {
                assert mQueue.size() < mMaxQueueLen; // This operation should never block.
                mQueue.put(wakeUpCall);
                this.notify();
            }

            // Interrupt any wait that's going on, in case we are asleep and should
            // actually immediately service this wake-up call.
            this.interrupt();
        }

        /**
         * Discard all wakeup calls up to time 'minTime'.
         * minTime is a 'bucket time', not a 'local time'.
         */
        public void discardUntil(long minTime) {
            synchronized (this) {
                LOG.debug("discardUntil: " + minTime);
                Iterator<Pair<Long, Long>> iterator = mQueue.iterator();
                while (iterator.hasNext()) {
                    Pair<Long, Long> wakeUpCall = iterator.next();
                    if (wakeUpCall.getRight() < minTime) {
                        LOG.debug("discard@ " + wakeUpCall);
                        iterator.remove();
                    }
                }

                this.notify();
            }
        }

        /**
         * Set the finished flag to true; try to get the thread to stop as
         * quickly as possible.
         */
        public void finish() {
            synchronized (this) {
                this.mIsFinished = true;
                this.notify();
            }
            this.interrupt(); // Interrupt any current sleep.
        }

        /**
         * Main loop of the thread.
         * Continually sleeps until the next timer event is ready to occur.
         */
        public void run() {
            while (true) {
                Pair<Long, Long> wakeUpCall = null;
                long curTime;
                long nextWakeUp;

                synchronized (this) {
                    while (mQueue.size() == 0) {
                        try {
                            if (this.mIsFinished) {
                                // Parent is finished and we have drained our input queue. Go home.
                                return;
                            }
                            this.wait();
                        } catch (InterruptedException ie) {
                            // Interrupted while waiting for another wake-up call to enter our queue.
                            // Try again, if we're not already finished.
                            continue;
                        }
                    }

                    assert mQueue.size() > 0;
                    wakeUpCall = mQueue.peek();
                }

                if (null == wakeUpCall) {
                    continue;
                }

                curTime = System.currentTimeMillis();
                nextWakeUp = wakeUpCall.getLeft();
                if (nextWakeUp <= curTime) {
                    // TODO(aaron): This section probably bears further deadlock analysis.
                    // The put() into the timer queue can block (it has fixed length
                    // LocalEnvironment.MAX_QUEUE_LEN) until the timer FE services its
                    // existing list.
                    // If we are interrupted doing this, it is because the main thread
                    // has just inserted another wakeup call while we were blocking.
                    // This thread's input queue must not block when being filled from
                    // the main aggregation FE. I believe mMaxQueueLen should be sufficient
                    // to guarantee this is the case, because before we call enqueueWakeup(),
                    // we will have had to call closeUntil() in BucketedAggElem.takeEvent()
                    // on enough windows to free up the slots in this queue.
                    try {
                        LOG.debug("Timer evicting at " + curTime + ": " + wakeUpCall);
                        // Service this by injecting the getRight() into our outbound queue.
                        mTimerQueue.put(new TimeoutEventWrapper(wakeUpCall.getRight()));
                    } catch (InterruptedException ie) {
                        // Not a problem. If we were interrupted doing the put into mTimerQueue,
                        // then we'll service this again on the next go-around of the loop.
                        // Just make sure we don't mark this as 'complete.'
                        continue;
                    }

                    synchronized (this) {
                        // Now actually remove this from the input queue.
                        if (mQueue.peek() == wakeUpCall) {
                            // O(1) fast path; no intervening push.
                            mQueue.remove();
                        } else {
                            // intervening push of an earlier wakeup (?). Slow path.
                            mQueue.remove(wakeUpCall);
                        }
                    }
                } else {
                    // If we're down here, we need to sleep until it is the next wake-up time.
                    long napTime = nextWakeUp - curTime;
                    try {
                        Thread.sleep(napTime);
                    } catch (InterruptedException ie) {
                        // We were awoken early... this is expected (there may have been a
                        // new enqueue, etc).
                    }
                }
            }
        }
    }

    /** EventWrapper used to deliver the expiry time payload to the TimeoutEvictionElement. */
    private static class TimeoutEventWrapper extends EmptyEventWrapper {
        /** The time window that should be expired. */
        private final Long mExpireWindow;

        public TimeoutEventWrapper(Long expire) {
            mExpireWindow = expire;
        }

        @Override
        public Object getField(TypedField field) {
            return mExpireWindow;
        }
    }

    /**
     * Separate FlowElement that handles notifications from the EvictionThread; this
     * operates in the main thread, closing windows that cannot receive new events
     * because they are past the slack time interval.
     */
    public class TimeoutEvictionElement extends AvroOutputElementImpl {
        private final Logger LOG = LoggerFactory.getLogger(TimeoutEvictionElement.class.getName());

        private TimeoutEvictionElement(FlowElementContext ctxt, Schema outSchema) {
            super(ctxt, outSchema);
        }

        public void takeEvent(EventWrapper e) throws IOException, InterruptedException {
            assert e instanceof TimeoutEventWrapper;
            Long expireTime = (Long) e.getField(null); // TimeoutEventWrapper returns a single Long val
            LOG.debug("Handling in eviction element - timeout to: " + expireTime);
            closeUntil(expireTime, expireTime, getContext());
        }
    }

    /**
     * Create a TimeoutEvictionElement coupled to this BucketedAggregationElement.
     */
    public TimeoutEvictionElement getTimeoutElement(FlowElementContext timeoutContext) {
        return this.new TimeoutEvictionElement(timeoutContext, getOutputSchema());
    }
}