org.archive.crawler.frontier.WorkQueueFrontier.java Source code

Java tutorial

Introduction

Here is the source code for org.archive.crawler.frontier.WorkQueueFrontier.java

Source

/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.archive.crawler.frontier;

import static org.archive.crawler.event.CrawlURIDispositionEvent.Disposition.DEFERRED_FOR_RETRY;
import static org.archive.crawler.event.CrawlURIDispositionEvent.Disposition.DISREGARDED;
import static org.archive.crawler.event.CrawlURIDispositionEvent.Disposition.FAILED;
import static org.archive.crawler.event.CrawlURIDispositionEvent.Disposition.SUCCEEDED;
import static org.archive.modules.fetcher.FetchStatusCodes.S_DEFERRED;
import static org.archive.modules.fetcher.FetchStatusCodes.S_RUNTIME_EXCEPTION;

import java.io.Closeable;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.SortedMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

import org.apache.commons.collections.iterators.ObjectArrayIterator;
import org.archive.crawler.datamodel.UriUniqFilter;
import org.archive.crawler.event.CrawlURIDispositionEvent;
import org.archive.crawler.framework.ToeThread;
import org.archive.crawler.frontier.precedence.BaseQueuePrecedencePolicy;
import org.archive.crawler.frontier.precedence.QueuePrecedencePolicy;
import org.archive.crawler.util.TopNSet;
import org.archive.modules.CrawlURI;
import org.archive.spring.KeyedProperties;
import org.archive.util.ArchiveUtils;
import org.archive.util.ObjectIdentityCache;
import org.archive.util.ObjectIdentityMemCache;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.context.support.AbstractApplicationContext;

import com.sleepycat.collections.StoredSortedMap;
import com.sleepycat.je.DatabaseException;

/**
 * A common Frontier base using several queues to hold pending URIs. 
 * 
 * Uses in-memory map of all known 'queues' inside a single database.
 * Round-robins between all queues.
 *
 * @author Gordon Mohr
 * @author Christian Kohlschuetter
 */
public abstract class WorkQueueFrontier extends AbstractFrontier implements Closeable, ApplicationContextAware {
    @SuppressWarnings("unused")
    private static final long serialVersionUID = 570384305871965843L;

    /**
     * If we know that only a small amount of queues is held in memory,
     * we can avoid using a disk-based BigMap.
     * This only works efficiently if the WorkQueue does not hold its
     * entries in memory as well.
     */
    private static final int MAX_QUEUES_TO_HOLD_ALLQUEUES_IN_MEMORY = 3000;

    /**
     * When a snooze target for a queue is longer than this amount, the queue
     * will be "long snoozed" instead of "short snoozed".  A "long snoozed"
     * queue may be swapped to disk because it's not needed soon.  
     */
    protected long snoozeLongMs = 5L * 60L * 1000L;

    public long getSnoozeLongMs() {
        return snoozeLongMs;
    }

    public void setSnoozeLongMs(long snooze) {
        this.snoozeLongMs = snooze;
    }

    private static final Logger logger = Logger.getLogger(WorkQueueFrontier.class.getName());

    // ApplicationContextAware implementation, for eventing
    protected AbstractApplicationContext appCtx;

    public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
        this.appCtx = (AbstractApplicationContext) applicationContext;
    }

    /** amount to replenish budget on each activation (duty cycle) */
    {
        setBalanceReplenishAmount(3000);
    }

    public int getBalanceReplenishAmount() {
        return (Integer) kp.get("balanceReplenishAmount");
    }

    public void setBalanceReplenishAmount(int replenish) {
        kp.put("balanceReplenishAmount", replenish);
    }

    /** budget penalty for an error fetch */
    {
        setErrorPenaltyAmount(100);
    }

    public int getErrorPenaltyAmount() {
        return (Integer) kp.get("errorPenaltyAmount");
    }

    public void setErrorPenaltyAmount(int penalty) {
        kp.put("errorPenaltyAmount", penalty);
    }

    /** total expenditure to allow a queue before 'retiring' it  */
    {
        setQueueTotalBudget(-1L);
    }

    public long getQueueTotalBudget() {
        return (Long) kp.get("queueTotalBudget");
    }

    public void setQueueTotalBudget(long budget) {
        kp.put("queueTotalBudget", budget);
    }

    /** queue precedence assignment policy to use. */
    {
        setQueuePrecedencePolicy(new BaseQueuePrecedencePolicy());
    }

    public QueuePrecedencePolicy getQueuePrecedencePolicy() {
        return (QueuePrecedencePolicy) kp.get("queuePrecedencePolicy");
    }

    public void setQueuePrecedencePolicy(QueuePrecedencePolicy policy) {
        kp.put("queuePrecedencePolicy", policy);
    }

    /** precedence rank at or below which queues are not crawled */
    protected int precedenceFloor = 255;

    public int getPrecedenceFloor() {
        return this.precedenceFloor;
    }

    public void setPrecedenceFloor(int floor) {
        this.precedenceFloor = floor;
    }

    /** truncate reporting of queues at this large but not unbounded number */
    protected int maxQueuesPerReportCategory = 2000;

    public int getMaxQueuesPerReportCategory() {
        return this.maxQueuesPerReportCategory;
    }

    public void setMaxQueuesPerReportCategory(int max) {
        this.maxQueuesPerReportCategory = max;
    }

    /** All known queues.
     */
    protected ObjectIdentityCache<WorkQueue> allQueues = null;
    // of classKey -> ClassKeyQueue

    /**
     * All per-class queues whose first item may be handed out.
     * Linked-list of keys for the queues.
     */
    protected BlockingQueue<String> readyClassQueues;

    /** all per-class queues from whom a URI is outstanding */
    protected Set<WorkQueue> inProcessQueues = Collections
            .newSetFromMap(new ConcurrentHashMap<WorkQueue, Boolean>()); // of ClassKeyQueue

    /**
     * All per-class queues held in snoozed state, sorted by wake time.
     */
    transient protected DelayQueue<DelayedWorkQueue> snoozedClassQueues;
    protected StoredSortedMap<Long, DelayedWorkQueue> snoozedOverflow;
    protected AtomicInteger snoozedOverflowCount = new AtomicInteger(0);
    protected static int MAX_SNOOZED_IN_MEMORY = 10000;

    /** URIs scheduled to be re-enqueued at future date */
    protected StoredSortedMap<Long, CrawlURI> futureUris;

    /** remember keys of small number of largest queues for reporting */
    transient protected TopNSet largestQueues = new TopNSet(20);

    /** remember this many largest queues for reporting's sake; actual tracking
     *  can be somewhat approximate when some queues shrink before others' 
     *  sizes are again noted, or if the size is adjusted mid-crawl. */
    public int getLargestQueuesCount() {
        return largestQueues.getMaxSize();
    }

    public void setLargestQueuesCount(int count) {
        largestQueues.setMaxSize(count);
    }

    protected int highestPrecedenceWaiting = Integer.MAX_VALUE;

    /** The UriUniqFilter to use, tracking those UURIs which are 
     * already in-process (or processed), and thus should not be 
     * rescheduled. Also known as the 'alreadyIncluded' or
     * 'alreadySeen' structure */
    protected UriUniqFilter uriUniqFilter;

    public UriUniqFilter getUriUniqFilter() {
        return this.uriUniqFilter;
    }

    @Autowired
    public void setUriUniqFilter(UriUniqFilter uriUniqFilter) {
        this.uriUniqFilter = uriUniqFilter;
    }

    /**
     * Constructor.
     */
    public WorkQueueFrontier() {
        super();
    }

    public void start() {
        if (isRunning()) {
            return;
        }
        uriUniqFilter.setDestination(this);
        super.start();
        try {
            initInternalQueues();
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }

    }

    /**
     * Initializes internal queues.  May decide to keep all queues in memory based on
     * {@link QueueAssignmentPolicy#maximumNumberOfKeys}.  Otherwise invokes
     * {@link #initAllQueues()} to actually set up the queues.
     * 
     * Subclasses should invoke this method with recycle set to "true" in 
     * a private readObject method, to restore queues after a checkpoint.
     * 
     * @param recycle
     * @throws IOException
     * @throws DatabaseException
     */
    protected void initInternalQueues() throws IOException, DatabaseException {
        this.initOtherQueues();
        if (workQueueDataOnDisk() && preparer.getQueueAssignmentPolicy().maximumNumberOfKeys() >= 0 && preparer
                .getQueueAssignmentPolicy().maximumNumberOfKeys() <= MAX_QUEUES_TO_HOLD_ALLQUEUES_IN_MEMORY) {
            this.allQueues = new ObjectIdentityMemCache<WorkQueue>(701, .9f, 100);
        } else {
            this.initAllQueues();
        }
    }

    /**
     * Initialize the allQueues field in an implementation-appropriate
     * way.
     * @throws DatabaseException
     */
    protected abstract void initAllQueues() throws DatabaseException;

    /**
     * Initialize all other internal queues in an implementation-appropriate
     * way.
     * @throws DatabaseException
     */
    protected abstract void initOtherQueues() throws DatabaseException;

    /* (non-Javadoc)
     * @see org.archive.crawler.frontier.AbstractFrontier#stop()
     */
    @Override
    public void stop() {
        super.stop();
    }

    public void destroy() {
        // release resources and trigger end-of-frontier actions
        close();
    }

    /**
     * Release resources only needed when running
     */
    public void close() {
        ArchiveUtils.closeQuietly(uriUniqFilter);
        ArchiveUtils.closeQuietly(allQueues);
    }

    /**
     * Accept the given CrawlURI for scheduling, as it has
     * passed the alreadyIncluded filter. 
     * 
     * Choose a per-classKey queue and enqueue it. If this
     * item has made an unready queue ready, place that 
     * queue on the readyClassQueues queue. 
     * @param caUri CrawlURI.
     */
    protected void processScheduleAlways(CrawlURI curi) {
        //        assert Thread.currentThread() == managerThread;
        assert KeyedProperties.overridesActiveFrom(curi);

        prepForFrontier(curi);
        sendToQueue(curi);
    }

    /**
     * Arrange for the given CrawlURI to be visited, if it is not
     * already enqueued/completed. 
     * 
     * Differs from superclass in that it operates in calling thread, rather 
     * than deferring operations via in-queue to managerThread. TODO: settle
     * on either defer or in-thread approach after testing. 
     *
     * @see org.archive.crawler.framework.Frontier#schedule(org.archive.modules.CrawlURI)
     */
    @Override
    public void schedule(CrawlURI curi) {
        sheetOverlaysManager.applyOverlaysTo(curi);
        try {
            KeyedProperties.loadOverridesFrom(curi);
            if (curi.getClassKey() == null) {
                // remedial processing
                preparer.prepare(curi);
            }
            processScheduleIfUnique(curi);
        } finally {
            KeyedProperties.clearOverridesFrom(curi);
        }
    }

    /**
     * Arrange for the given CrawlURI to be visited, if it is not
     * already scheduled/completed.
     *
     * @see org.archive.crawler.framework.Frontier#schedule(org.archive.modules.CrawlURI)
     */
    protected void processScheduleIfUnique(CrawlURI curi) {
        //        assert Thread.currentThread() == managerThread;
        assert KeyedProperties.overridesActiveFrom(curi);

        // Canonicalization may set forceFetch flag.  See
        // #canonicalization(CrawlURI) javadoc for circumstance.
        String canon = curi.getCanonicalString();
        if (curi.forceFetch()) {
            uriUniqFilter.addForce(canon, curi);
        } else {
            uriUniqFilter.add(canon, curi);
        }
    }

    /**
     * Send a CrawlURI to the appropriate subqueue.
     * 
     * @param curi
     */
    protected void sendToQueue(CrawlURI curi) {
        //        assert Thread.currentThread() == managerThread;

        WorkQueue wq = getQueueFor(curi.getClassKey());
        synchronized (wq) {
            int originalPrecedence = wq.getPrecedence();
            wq.enqueue(this, curi);
            // always take budgeting values from current curi
            // (whose overlay settings should be active here)
            wq.setSessionBudget(getBalanceReplenishAmount());
            wq.setTotalBudget(getQueueTotalBudget());

            if (!wq.isRetired()) {
                incrementQueuedUriCount();
                int currentPrecedence = wq.getPrecedence();
                if (!wq.isManaged() || currentPrecedence < originalPrecedence) {
                    // queue newly filled or bumped up in precedence; ensure enqueuing
                    // at precedence level (perhaps duplicate; if so that's handled elsewhere)
                    deactivateQueue(wq);
                }
            }
        }
        // Update recovery log.
        doJournalAdded(curi);
        wq.makeDirty();
        largestQueues.update(wq.getClassKey(), wq.getCount());
    }

    /**
     * Put the given queue on the readyClassQueues queue
     * @param wq
     */
    protected void readyQueue(WorkQueue wq) {
        //        assert Thread.currentThread() == managerThread;

        try {
            readyClassQueues.put(wq.getClassKey());
            if (logger.isLoggable(Level.FINE)) {
                logger.log(Level.FINE, "queue readied: " + wq.getClassKey());
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
            System.err.println("unable to ready queue " + wq);
            // propagate interrupt up 
            throw new RuntimeException(e);
        }
    }

    /**
     * Put the given queue on the inactiveQueues queue
     * @param wq
     */
    protected void deactivateQueue(WorkQueue wq) {
        int precedence = wq.getPrecedence();

        synchronized (wq) {
            wq.noteDeactivated();
            inProcessQueues.remove(wq);
            if (wq.getCount() == 0) {
                System.err.println("deactivate empty queue?");
            }

            synchronized (getInactiveQueuesByPrecedence()) {
                getInactiveQueuesForPrecedence(precedence).add(wq.getClassKey());
                if (wq.getPrecedence() < highestPrecedenceWaiting) {
                    highestPrecedenceWaiting = wq.getPrecedence();
                }
            }

            if (logger.isLoggable(Level.FINE)) {
                logger.log(Level.FINE, "queue deactivated to p" + precedence + ": " + wq.getClassKey());
            }
        }
    }

    /**
     * Get the queue of inactive uri-queue names at the given precedence. 
     * 
     * @param precedence
     * @return queue of inacti
     */
    protected Queue<String> getInactiveQueuesForPrecedence(int precedence) {
        Map<Integer, Queue<String>> inactiveQueuesByPrecedence = getInactiveQueuesByPrecedence();
        Queue<String> candidate = inactiveQueuesByPrecedence.get(precedence);
        if (candidate == null) {
            candidate = createInactiveQueueForPrecedence(precedence);
            inactiveQueuesByPrecedence.put(precedence, candidate);
        }
        return candidate;
    }

    /**
     * Return a sorted map of all queues of WorkQueue keys, keyed by precedence
     * @return SortedMap<Integer, Queue<String>> of inactiveQueues
     */
    protected abstract SortedMap<Integer, Queue<String>> getInactiveQueuesByPrecedence();

    /**
     * Create an inactiveQueue to hold queue names at the given precedence
     * @param precedence
     * @return Queue<String> for names of inactive queues
     */
    protected abstract Queue<String> createInactiveQueueForPrecedence(int precedence);

    /**
     * Put the given queue on the retiredQueues queue
     * @param wq
     */
    protected void retireQueue(WorkQueue wq) {
        //        assert Thread.currentThread() == managerThread;

        inProcessQueues.remove(wq);
        getRetiredQueues().add(wq.getClassKey());
        decrementQueuedCount(wq.getCount());
        wq.setRetired(true);
        if (logger.isLoggable(Level.FINE)) {
            logger.log(Level.FINE, "queue retired: " + wq.getClassKey());
        }
    }

    /**
     * Return queue of all retired queue names.
     * 
     * @return Queue<String> of retired queue names
     */
    protected abstract Queue<String> getRetiredQueues();

    /** 
     * Accommodate any changes in retirement-determining settings (like
     * total-budget or force-retire changes/overlays. 
     * 
     * (Essentially, exists to be called from tools like the UI 
     * Scripting Console when the operator knows it's necessary.)
     */
    public void reconsiderRetiredQueues() {

        // The rules for a 'retired' queue may have changed; so,
        // unretire all queues to 'inactive'. If they still qualify
        // as retired/overbudget next time they come up, they'll
        // be re-retired; if not, they'll get a chance to become
        // active under the new rules.

        // TODO: Do this automatically, only when necessary.

        String key = getRetiredQueues().poll();
        while (key != null) {
            WorkQueue q = (WorkQueue) this.allQueues.get(key);
            if (q != null) {
                unretireQueue(q);
                q.makeDirty();
            }
            key = getRetiredQueues().poll();
        }
    }

    /**
     * Restore a retired queue to the 'inactive' state. 
     * 
     * @param q
     */
    private void unretireQueue(WorkQueue q) {
        //        assert Thread.currentThread() == managerThread;

        deactivateQueue(q);
        q.setRetired(false);
        incrementQueuedUriCount(q.getCount());
    }

    /**
     * Return the work queue for the given classKey, or null
     * if no such queue exists.
     * 
     * @param classKey key to look for
     * @return the found WorkQueue
     */
    protected abstract WorkQueue getQueueFor(String classKey);

    /**
     * Return the next CrawlURI eligible to be processed (and presumably
     * visited/fetched) by a a worker thread.
     *
     * Relies on the readyClassQueues having been loaded with
     * any work queues that are eligible to provide a URI. 
     *
     * @return next CrawlURI eligible to be processed, or null if none available
     *
     * @see org.archive.crawler.framework.Frontier#next()
     */
    protected CrawlURI findEligibleURI() {
        // wake any snoozed queues
        wakeQueues();
        // consider rescheduled URIS
        checkFutures();

        // find a non-empty ready queue, if any 
        // TODO: refactor to untangle these loops, early-exits, etc!
        WorkQueue readyQ = null;
        findauri: while (true) {
            findaqueue: do {
                String key = readyClassQueues.poll();
                if (key == null) {
                    // no ready queues; try to activate one
                    if (!getInactiveQueuesByPrecedence().isEmpty()
                            && highestPrecedenceWaiting < getPrecedenceFloor()) {
                        activateInactiveQueue();
                        continue findaqueue;
                    } else {
                        // nothing ready or readyable
                        break findaqueue;
                    }
                }
                readyQ = getQueueFor(key);
                if (readyQ == null) {
                    // readyQ key wasn't in all queues: unexpected
                    logger.severe("Key " + key + " in readyClassQueues but not allQueues");
                    break findaqueue;
                }
                if (readyQ.getCount() == 0) {
                    // readyQ is empty and ready: it's exhausted
                    readyQ.noteExhausted();
                    readyQ.makeDirty();
                    readyQ = null;
                    continue;
                }
                if (!inProcessQueues.add(readyQ)) {
                    // double activation; discard this and move on
                    // (this guard allows other enqueuings to ready or 
                    // the various inactive-by-precedence queues to 
                    // sometimes redundantly enqueue a queue key)
                    readyQ = null;
                    continue;
                }
                // queue has gone 'in process' 
                readyQ.considerActive();
                readyQ.setWakeTime(0); // clear obsolete wake time, if any

                // we know readyQ is not empty (getCount()!=0) so peek() shouldn't return null
                CrawlURI readyQUri = readyQ.peek(this);
                // see HER-1973 and HER-1946
                sheetOverlaysManager.applyOverlaysTo(readyQUri);
                try {
                    KeyedProperties.loadOverridesFrom(readyQUri);
                    readyQ.setSessionBudget(getBalanceReplenishAmount());
                    readyQ.setTotalBudget(getQueueTotalBudget());
                } finally {
                    KeyedProperties.clearOverridesFrom(readyQUri);
                }

                if (readyQ.isOverSessionBudget()) {
                    deactivateQueue(readyQ);
                    readyQ.makeDirty();
                    readyQ = null;
                    continue;
                }
                if (readyQ.isOverTotalBudget()) {
                    retireQueue(readyQ);
                    readyQ.makeDirty();
                    readyQ = null;
                    continue;
                }
            } while (readyQ == null);

            if (readyQ == null) {
                // no queues left in ready or readiable
                break findauri;
            }

            returnauri: while (true) { // loop left by explicit return or break on empty
                CrawlURI curi = null;
                curi = readyQ.peek(this);
                if (curi == null) {
                    // should not reach
                    logger.severe("No CrawlURI from ready non-empty queue " + readyQ.classKey + "\n"
                            + readyQ.shortReportLegend() + "\n" + readyQ.shortReportLine() + "\n");
                    break returnauri;
                }

                // from queues, override names persist but not map source
                curi.setOverlayMapsSource(sheetOverlaysManager);
                // TODO: consider optimizations avoiding this recalc of
                // overrides when not necessary
                sheetOverlaysManager.applyOverlaysTo(curi);
                // check if curi belongs in different queue
                String currentQueueKey;
                try {
                    KeyedProperties.loadOverridesFrom(curi);
                    currentQueueKey = getClassKey(curi);
                } finally {
                    KeyedProperties.clearOverridesFrom(curi);
                }
                if (currentQueueKey.equals(curi.getClassKey())) {
                    // curi was in right queue, emit
                    noteAboutToEmit(curi, readyQ);
                    return curi;
                }
                // URI's assigned queue has changed since it
                // was queued (eg because its IP has become
                // known). Requeue to new queue.
                // TODO: consider synchronization on readyQ
                readyQ.dequeue(this, curi);
                doJournalRelocated(curi);
                curi.setClassKey(currentQueueKey);
                decrementQueuedCount(1);
                curi.setHolderKey(null);
                sendToQueue(curi);
                if (readyQ.getCount() == 0) {
                    // readyQ is empty and ready: it's exhausted
                    // release held status, allowing any subsequent 
                    // enqueues to again put queue in ready
                    // FIXME: tiny window here where queue could 
                    // receive new URI, be readied, fail not-in-process?
                    inProcessQueues.remove(readyQ);
                    readyQ.noteExhausted();
                    readyQ.makeDirty();
                    readyQ = null;
                    continue findauri;
                }
            }
        }

        if (inProcessQueues.size() == 0) {
            // Nothing was ready or in progress or imminent to wake; ensure 
            // any piled-up pending-scheduled URIs are considered
            uriUniqFilter.requestFlush();
        }

        // if truly nothing ready, wait a moment before returning null
        // so that loop in surrounding next() has a chance of getting something
        // next time
        if (getTotalEligibleInactiveQueues() == 0) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                // 
            }
        }

        // nothing eligible
        return null;
    }

    /**
     * Check for any future-scheduled URIs now eligible for reenqueuing
     */
    protected void checkFutures() {
        //        assert Thread.currentThread() == managerThread;
        // TODO: consider only checking this every set interval
        if (!futureUris.isEmpty()) {
            synchronized (futureUris) {
                Iterator<CrawlURI> iter = futureUris.headMap(System.currentTimeMillis()).values().iterator();
                while (iter.hasNext()) {
                    CrawlURI curi = iter.next();
                    curi.setRescheduleTime(-1); // unless again set elsewhere
                    iter.remove();
                    futureUriCount.decrementAndGet();
                    receive(curi);
                }
            }
        }
    }

    /**
     * Activate an inactive queue, if any are available. 
     */
    protected boolean activateInactiveQueue() {
        for (Entry<Integer, Queue<String>> entry : getInactiveQueuesByPrecedence().entrySet()) {
            int expectedPrecedence = entry.getKey();
            Queue<String> queueOfWorkQueueKeys = entry.getValue();

            while (true) {
                String workQueueKey;
                synchronized (getInactiveQueuesByPrecedence()) {
                    workQueueKey = queueOfWorkQueueKeys.poll();
                    if (workQueueKey == null) {
                        break;
                    }
                    updateHighestWaiting(expectedPrecedence);
                }

                WorkQueue candidateQ = (WorkQueue) this.allQueues.get(workQueueKey);
                if (candidateQ.getPrecedence() > expectedPrecedence) {
                    // queue demoted since placed; re-deactivate
                    deactivateQueue(candidateQ);
                    candidateQ.makeDirty();
                    continue;
                }

                try {
                    readyClassQueues.put(workQueueKey);
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }

                return true;
            }
        }

        return false;
    }

    /**
     * Recalculate the value of thehighest-precedence queue waiting
     * among inactive queues. 
     * 
     * @param startFrom start looking at this precedence value
     */
    protected void updateHighestWaiting(int startFrom) {
        // probe for new highestWaiting
        for (int precedenceKey : getInactiveQueuesByPrecedence().tailMap(startFrom).keySet()) {
            if (!getInactiveQueuesByPrecedence().get(precedenceKey).isEmpty()) {
                highestPrecedenceWaiting = precedenceKey;
                return;
            }
        }
        // nothing waiting
        highestPrecedenceWaiting = Integer.MAX_VALUE;
    }

    /**
     * Enqueue the given queue to either readyClassQueues or inactiveQueues,
     * as appropriate.
     * 
     * @param wq
     */
    protected void reenqueueQueue(WorkQueue wq) {
        if (logger.isLoggable(Level.FINE)) {
            logger.fine("queue reenqueued: " + wq.getClassKey());
        }
        if (highestPrecedenceWaiting < wq.getPrecedence() || wq.getPrecedence() >= getPrecedenceFloor()) {
            // if still over budget, deactivate
            deactivateQueue(wq);
        } else {
            readyQueue(wq);
        }
    }

    /* (non-Javadoc)
     * @see org.archive.crawler.frontier.AbstractFrontier#getMaxInWait()
     */
    @Override
    protected long getMaxInWait() {
        Delayed next = snoozedClassQueues.peek();
        return next == null ? 60000 : next.getDelay(TimeUnit.MILLISECONDS);
    }

    /**
     * Utility method for advanced users/experimentation: force wake all snoozed
     * queues -- for example to kick a crawl where connectivity problems have
     * put all queues in slow-retry-snoozes back to busy-ness. 
     */
    public void forceWakeQueues() {
        Iterator<DelayedWorkQueue> iterSnoozed = snoozedClassQueues.iterator();
        while (iterSnoozed.hasNext()) {
            WorkQueue queue = iterSnoozed.next().getWorkQueue(WorkQueueFrontier.this);
            queue.setWakeTime(0);
            reenqueueQueue(queue);
            queue.makeDirty();
            iterSnoozed.remove();
        }
        Iterator<DelayedWorkQueue> iterOverflow = snoozedOverflow.values().iterator();
        while (iterOverflow.hasNext()) {
            WorkQueue queue = iterOverflow.next().getWorkQueue(WorkQueueFrontier.this);
            queue.setWakeTime(0);
            reenqueueQueue(queue);
            queue.makeDirty();
            iterOverflow.remove();
            snoozedOverflowCount.decrementAndGet();
        }
    }

    /**
     * Wake any queues sitting in the snoozed queue whose time has come.
     */
    protected void wakeQueues() {
        DelayedWorkQueue waked;
        while ((waked = snoozedClassQueues.poll()) != null) {
            WorkQueue queue = waked.getWorkQueue(this);
            queue.setWakeTime(0);
            queue.makeDirty();
            reenqueueQueue(queue);
        }
        // also consider overflow (usually empty)
        if (!snoozedOverflow.isEmpty()) {
            synchronized (snoozedOverflow) {
                Iterator<DelayedWorkQueue> iter = snoozedOverflow.headMap(System.currentTimeMillis()).values()
                        .iterator();
                while (iter.hasNext()) {
                    DelayedWorkQueue dq = iter.next();
                    iter.remove();
                    snoozedOverflowCount.decrementAndGet();
                    WorkQueue queue = dq.getWorkQueue(this);
                    queue.setWakeTime(0);
                    queue.makeDirty();
                    reenqueueQueue(queue);
                }
            }
        }
    }

    /**
     * Note that the previously emitted CrawlURI has completed
     * its processing (for now).
     *
     * The CrawlURI may be scheduled to retry, if appropriate,
     * and other related URIs may become eligible for release
     * via the next next() call, as a result of finished().
     *
     * TODO: make as many decisions about what happens to the CrawlURI
     * (success, failure, retry) and queue (retire, snooze, ready) as 
     * possible elsewhere, such as in DispositionProcessor. Then, break
     * this into simple branches or focused methods for each case. 
     *  
     * @see org.archive.crawler.framework.Frontier#finished(org.archive.modules.CrawlURI)
     */
    protected void processFinish(CrawlURI curi) {
        //        assert Thread.currentThread() == managerThread;

        long now = System.currentTimeMillis();

        curi.incrementFetchAttempts();
        logNonfatalErrors(curi);

        WorkQueue wq = (WorkQueue) curi.getHolder();
        // always refresh budgeting values from current curi
        // (whose overlay settings should be active here)
        wq.setSessionBudget(getBalanceReplenishAmount());
        wq.setTotalBudget(getQueueTotalBudget());

        assert (wq.peek(this) == curi) : "unexpected peek " + wq;

        int holderCost = curi.getHolderCost();

        if (needsReenqueuing(curi)) {
            // codes/errors which don't consume the URI, leaving it atop queue
            if (curi.getFetchStatus() != S_DEFERRED) {
                wq.expend(holderCost); // all retries but DEFERRED cost
            }
            long delay_ms = retryDelayFor(curi) * 1000;
            curi.processingCleanup(); // lose state that shouldn't burden retry
            wq.unpeek(curi);
            wq.update(this, curi); // rewrite any changes
            handleQueue(wq, curi.includesRetireDirective(), now, delay_ms);
            appCtx.publishEvent(new CrawlURIDispositionEvent(this, curi, DEFERRED_FOR_RETRY));
            doJournalReenqueued(curi);
            wq.makeDirty();
            return; // no further dequeueing, logging, rescheduling to occur
        }

        // Curi will definitely be disposed of without retry, so remove from queue
        wq.dequeue(this, curi);
        decrementQueuedCount(1);
        largestQueues.update(wq.getClassKey(), wq.getCount());
        log(curi);

        if (curi.isSuccess()) {
            // codes deemed 'success' 
            incrementSucceededFetchCount();
            totalProcessedBytes.addAndGet(curi.getRecordedSize());
            appCtx.publishEvent(new CrawlURIDispositionEvent(this, curi, SUCCEEDED));
            doJournalFinishedSuccess(curi);

        } else if (isDisregarded(curi)) {
            // codes meaning 'undo' (even though URI was enqueued, 
            // we now want to disregard it from normal success/failure tallies)
            // (eg robots-excluded, operator-changed-scope, etc)
            incrementDisregardedUriCount();
            appCtx.publishEvent(new CrawlURIDispositionEvent(this, curi, DISREGARDED));
            holderCost = 0; // no charge for disregarded URIs
            // TODO: consider reinstating forget-URI capability, so URI could be
            // re-enqueued if discovered again
            doJournalDisregarded(curi);

        } else {
            // codes meaning 'failure'
            incrementFailedFetchCount();
            appCtx.publishEvent(new CrawlURIDispositionEvent(this, curi, FAILED));
            // if exception, also send to crawlErrors
            if (curi.getFetchStatus() == S_RUNTIME_EXCEPTION) {
                Object[] array = { curi };
                loggerModule.getRuntimeErrors().log(Level.WARNING, curi.getUURI().toString(), array);
            }
            // charge queue any extra error penalty
            wq.noteError(getErrorPenaltyAmount());
            doJournalFinishedFailure(curi);

        }

        wq.expend(holderCost); // successes & failures charge cost to queue

        long delay_ms = curi.getPolitenessDelay();
        handleQueue(wq, curi.includesRetireDirective(), now, delay_ms);
        wq.makeDirty();

        if (curi.getRescheduleTime() > 0) {
            // marked up for forced-revisit at a set time
            curi.processingCleanup();
            curi.resetForRescheduling();
            futureUris.put(curi.getRescheduleTime(), curi);
            futureUriCount.incrementAndGet();
        } else {
            curi.stripToMinimal();
            curi.processingCleanup();
        }
    }

    /**
     * Send an active queue to its next state, based on the supplied 
     * parameters.
     * 
     * @param wq
     * @param forceRetire
     * @param now
     * @param delay_ms
     */
    protected void handleQueue(WorkQueue wq, boolean forceRetire, long now, long delay_ms) {
        inProcessQueues.remove(wq);
        if (forceRetire) {
            retireQueue(wq);
        } else if (delay_ms > 0) {
            snoozeQueue(wq, now, delay_ms);
        } else {
            getQueuePrecedencePolicy().queueReevaluate(wq);
            reenqueueQueue(wq);
        }
    }

    /**
     * Place the given queue into 'snoozed' state, ineligible to
     * supply any URIs for crawling, for the given amount of time. 
     * 
     * @param wq queue to snooze 
     * @param now time now in ms 
     * @param delay_ms time to snooze in ms
     */
    private void snoozeQueue(WorkQueue wq, long now, long delay_ms) {
        long nextTime = now + delay_ms;
        wq.setWakeTime(nextTime);
        DelayedWorkQueue dq = new DelayedWorkQueue(wq);
        if (snoozedClassQueues.size() < MAX_SNOOZED_IN_MEMORY) {
            snoozedClassQueues.add(dq);
        } else {
            synchronized (snoozedOverflow) {
                snoozedOverflow.put(nextTime, dq);
                snoozedOverflowCount.incrementAndGet();
            }
        }
    }

    /**
     * Forget the given CrawlURI. This allows a new instance
     * to be created in the future, if it is reencountered under
     * different circumstances.
     *
     * @param curi The CrawlURI to forget
     */
    protected void forget(CrawlURI curi) {
        logger.finer("Forgetting " + curi);
        uriUniqFilter.forget(curi.getCanonicalString(), curi);
    }

    /**  (non-Javadoc)
     * @see org.archive.crawler.framework.Frontier#discoveredUriCount()
     */
    public long discoveredUriCount() {
        return (this.uriUniqFilter != null) ? this.uriUniqFilter.count() : 0;
    }

    /**
     * @param match String to  match.
     * @return Number of items deleted.
     */
    public long deleteURIs(String queueRegex, String uriRegex) {
        long count = 0;
        Pattern queuePat = Pattern.compile(queueRegex);
        for (String qname : allQueues.keySet()) {
            if (queuePat.matcher(qname).matches()) {
                WorkQueue wq = getQueueFor(qname);
                wq.unpeek(null);
                long delCount = wq.deleteMatching(this, uriRegex);
                if (!wq.isRetired()) {
                    count += delCount;
                }
                wq.makeDirty();
            }
        }
        decrementQueuedCount(count);
        return count;
    }

    //
    // Reporter implementation
    //

    @Override
    public Map<String, Object> shortReportMap() {
        if (this.allQueues == null) {
            return null;
        }

        int allCount = allQueues.size();
        int inProcessCount = inProcessQueues.size();
        int readyCount = readyClassQueues.size();
        int snoozedCount = getSnoozedCount();
        int activeCount = inProcessCount + readyCount + snoozedCount;
        int inactiveCount = getTotalEligibleInactiveQueues();
        int ineligibleCount = getTotalIneligibleInactiveQueues();
        int retiredCount = getRetiredQueues().size();
        int exhaustedCount = allCount - activeCount - inactiveCount - retiredCount;

        Map<String, Object> map = new LinkedHashMap<String, Object>();
        map.put("totalQueues", allCount);
        map.put("inProcessQueues", inProcessCount);
        map.put("readyQueues", readyCount);
        map.put("snoozedQueues", snoozedCount);
        map.put("activeQueues", activeCount);
        map.put("inactiveQueues", inactiveCount);
        map.put("ineligibleQueues", ineligibleCount);
        map.put("retiredQueues", retiredCount);
        map.put("exhaustedQueues", exhaustedCount);
        map.put("lastReachedState", lastReachedState);

        return map;
    }

    /**
     * @param w Where to write to.
     */
    @Override
    public void shortReportLineTo(PrintWriter w) {
        if (!isRunning())
            return; //???

        if (this.allQueues == null) {
            return;
        }
        int allCount = allQueues.size();
        int inProcessCount = inProcessQueues.size();
        int readyCount = readyClassQueues.size();
        int snoozedCount = getSnoozedCount();
        int activeCount = inProcessCount + readyCount + snoozedCount;
        int inactiveCount = getTotalEligibleInactiveQueues();
        int ineligibleCount = getTotalIneligibleInactiveQueues();
        int retiredCount = getRetiredQueues().size();
        int exhaustedCount = allCount - activeCount - inactiveCount - retiredCount;
        State last = lastReachedState;
        w.print(last);
        w.print(" - ");
        w.print(allCount);
        w.print(" URI queues: ");
        w.print(activeCount);
        w.print(" active (");
        w.print(inProcessCount);
        w.print(" in-process; ");
        w.print(readyCount);
        w.print(" ready; ");
        w.print(snoozedCount);
        w.print(" snoozed); ");
        w.print(inactiveCount);
        w.print(" inactive; ");
        w.print(ineligibleCount);
        w.print(" ineligible; ");
        w.print(retiredCount);
        w.print(" retired; ");
        w.print(exhaustedCount);
        w.print(" exhausted");
        w.flush();
    }

    /**
     * Total of all URIs in inactive queues at all precedences
     * @return int total 
     */
    protected int getTotalInactiveQueues() {
        return tallyInactiveTotals(getInactiveQueuesByPrecedence());
    }

    /**
     * Total of all URIs in inactive queues at precedences above the floor
     * @return int total 
     */
    protected int getTotalEligibleInactiveQueues() {
        return tallyInactiveTotals(getInactiveQueuesByPrecedence().headMap(getPrecedenceFloor()));
    }

    /**
     * Total of all URIs in inactive queues at precedences at or below the floor
     * @return int total 
     */
    protected int getTotalIneligibleInactiveQueues() {
        return tallyInactiveTotals(getInactiveQueuesByPrecedence().tailMap(getPrecedenceFloor()));
    }

    /**
     * @param iqueue 
     * @return
     */
    private int tallyInactiveTotals(SortedMap<Integer, Queue<String>> iqueues) {
        int inactiveCount = 0;
        for (Queue<String> q : iqueues.values()) {
            inactiveCount += q.size();
        }
        return inactiveCount;
    }

    /* (non-Javadoc)
     * @see org.archive.util.Reporter#singleLineLegend()
     */
    @Override
    public String shortReportLegend() {
        return "total active in-process ready snoozed inactive retired exhausted";
    }

    /**
     * This method compiles a human readable report on the status of the frontier
     * at the time of the call.
     * @param name Name of report.
     * @param writer Where to write to.
     */
    @Override
    public synchronized void reportTo(PrintWriter writer) {
        int allCount = allQueues.size();
        int inProcessCount = inProcessQueues.size();
        int readyCount = readyClassQueues.size();
        int snoozedCount = getSnoozedCount();
        int activeCount = inProcessCount + readyCount + snoozedCount;
        int inactiveCount = getTotalInactiveQueues();
        int retiredCount = getRetiredQueues().size();
        int exhaustedCount = allCount - activeCount - inactiveCount - retiredCount;

        writer.print("Frontier report - ");
        writer.print(ArchiveUtils.get12DigitDate());
        writer.print("\n");
        writer.print(" Job being crawled: ");
        writer.print(controller.getMetadata().getJobName());
        writer.print("\n");
        writer.print("\n -----===== STATS =====-----\n");
        writer.print(" Discovered:    ");
        writer.print(Long.toString(discoveredUriCount()));
        writer.print("\n");
        writer.print(" Queued:        ");
        writer.print(Long.toString(queuedUriCount()));
        writer.print("\n");
        writer.print(" Finished:      ");
        writer.print(Long.toString(finishedUriCount()));
        writer.print("\n");
        writer.print("  Successfully: ");
        writer.print(Long.toString(succeededFetchCount()));
        writer.print("\n");
        writer.print("  Failed:       ");
        writer.print(Long.toString(failedFetchCount()));
        writer.print("\n");
        writer.print("  Disregarded:  ");
        writer.print(Long.toString(disregardedUriCount()));
        writer.print("\n");
        writer.print("\n -----===== QUEUES =====-----\n");
        writer.print(" Already included size:     ");
        writer.print(Long.toString(uriUniqFilter.count()));
        writer.print("\n");
        writer.print("               pending:     ");
        writer.print(Long.toString(uriUniqFilter.pending()));
        writer.print("\n");
        writer.print("\n All class queues map size: ");
        writer.print(Long.toString(allCount));
        writer.print("\n");
        writer.print("             Active queues: ");
        writer.print(activeCount);
        writer.print("\n");
        writer.print("                    In-process: ");
        writer.print(inProcessCount);
        writer.print("\n");
        writer.print("                         Ready: ");
        writer.print(readyCount);
        writer.print("\n");
        writer.print("                       Snoozed: ");
        writer.print(snoozedCount);
        writer.print("\n");
        writer.print("           Inactive queues: ");
        writer.print(inactiveCount);
        writer.print(" (");
        Map<Integer, Queue<String>> inactives = getInactiveQueuesByPrecedence();
        boolean betwixt = false;
        for (Integer k : inactives.keySet()) {
            if (betwixt) {
                writer.print("; ");
            }
            writer.print("p");
            writer.print(k);
            writer.print(": ");
            writer.print(inactives.get(k).size());
            betwixt = true;
        }
        writer.print(")\n");
        writer.print("            Retired queues: ");
        writer.print(retiredCount);
        writer.print("\n");
        writer.print("          Exhausted queues: ");
        writer.print(exhaustedCount);
        writer.print("\n");

        State last = lastReachedState;
        writer.print("\n             Last state: " + last);

        writer.print("\n -----===== MANAGER THREAD =====-----\n");
        ToeThread.reportThread(managerThread, writer);

        writer.print("\n -----===== " + largestQueues.size() + " LONGEST QUEUES =====-----\n");
        appendQueueReports(writer, "LONGEST", largestQueues.getEntriesDescending().iterator(), largestQueues.size(),
                largestQueues.size());

        writer.print("\n -----===== IN-PROCESS QUEUES =====-----\n");
        Collection<WorkQueue> inProcess = inProcessQueues;
        ArrayList<WorkQueue> copy = extractSome(inProcess, maxQueuesPerReportCategory);
        appendQueueReports(writer, "IN-PROCESS", copy.iterator(), copy.size(), maxQueuesPerReportCategory);

        writer.print("\n -----===== READY QUEUES =====-----\n");
        appendQueueReports(writer, "READY", this.readyClassQueues.iterator(), this.readyClassQueues.size(),
                maxQueuesPerReportCategory);

        writer.print("\n -----===== SNOOZED QUEUES =====-----\n");
        Object[] objs = snoozedClassQueues.toArray();
        DelayedWorkQueue[] qs = Arrays.copyOf(objs, objs.length, DelayedWorkQueue[].class);
        Arrays.sort(qs);
        appendQueueReports(writer, "SNOOZED", new ObjectArrayIterator(qs), getSnoozedCount(),
                maxQueuesPerReportCategory);

        writer.print("\n -----===== INACTIVE QUEUES =====-----\n");
        SortedMap<Integer, Queue<String>> sortedInactives = getInactiveQueuesByPrecedence();
        for (Integer prec : sortedInactives.keySet()) {
            Queue<String> inactiveQueues = sortedInactives.get(prec);
            appendQueueReports(writer, "INACTIVE-p" + prec, inactiveQueues.iterator(), inactiveQueues.size(),
                    maxQueuesPerReportCategory);
        }

        writer.print("\n -----===== RETIRED QUEUES =====-----\n");
        appendQueueReports(writer, "RETIRED", getRetiredQueues().iterator(), getRetiredQueues().size(),
                maxQueuesPerReportCategory);

        writer.flush();
    }

    /** Compact report of all nonempty queues (one queue per line)
     * 
     * @param writer
     */
    public void allNonemptyReportTo(PrintWriter writer) {
        ArrayList<WorkQueue> inProcessQueuesCopy;
        synchronized (this.inProcessQueues) {
            // grab a copy that will be stable against mods for report duration 
            Collection<WorkQueue> inProcess = this.inProcessQueues;
            inProcessQueuesCopy = new ArrayList<WorkQueue>(inProcess);
        }
        writer.print("\n -----===== IN-PROCESS QUEUES =====-----\n");
        queueSingleLinesTo(writer, inProcessQueuesCopy.iterator());

        writer.print("\n -----===== READY QUEUES =====-----\n");
        queueSingleLinesTo(writer, this.readyClassQueues.iterator());

        writer.print("\n -----===== SNOOZED QUEUES =====-----\n");
        queueSingleLinesTo(writer, this.snoozedClassQueues.iterator());
        queueSingleLinesTo(writer, this.snoozedOverflow.values().iterator());

        writer.print("\n -----===== INACTIVE QUEUES =====-----\n");
        for (Queue<String> inactiveQueues : getInactiveQueuesByPrecedence().values()) {
            queueSingleLinesTo(writer, inactiveQueues.iterator());
        }

        writer.print("\n -----===== RETIRED QUEUES =====-----\n");
        queueSingleLinesTo(writer, getRetiredQueues().iterator());
    }

    /** Compact report of all nonempty queues (one queue per line)
     * 
     * @param writer
     */
    public void allQueuesReportTo(PrintWriter writer) {
        queueSingleLinesTo(writer, allQueues.keySet().iterator());
    }

    /**
     * Writer the single-line reports of all queues in the
     * iterator to the writer 
     * 
     * @param writer to receive report
     * @param iterator over queues of interest.
     */
    private void queueSingleLinesTo(PrintWriter writer, Iterator<?> iterator) {
        Object obj;
        WorkQueue q;
        boolean legendWritten = false;
        while (iterator.hasNext()) {
            obj = iterator.next();
            if (obj == null) {
                continue;
            }
            if (obj instanceof WorkQueue) {
                q = (WorkQueue) obj;
            } else if (obj instanceof DelayedWorkQueue) {
                q = ((DelayedWorkQueue) obj).getWorkQueue(this);
            } else {
                try {
                    q = this.allQueues.get((String) obj);
                } catch (ClassCastException cce) {
                    logger.log(Level.SEVERE, "not convertible to workqueue:" + obj, cce);
                    q = null;
                }
            }

            if (q != null) {
                if (!legendWritten) {
                    writer.println(q.shortReportLegend());
                    legendWritten = true;
                }
                q.shortReportLineTo(writer);
            } else {
                writer.print(" ERROR: " + obj);
            }
        }
    }

    /**
     * Extract some of the elements in the given collection to an
     * ArrayList.  This method synchronizes on the given collection's
     * monitor.  The returned list will never contain more than the
     * specified maximum number of elements.
     * 
     * @param c    the collection whose elements to extract
     * @param max  the maximum number of elements to extract
     * @return  the extraction
     */
    private static <T> ArrayList<T> extractSome(Collection<T> c, int max) {
        // Try to guess a sane initial capacity for ArrayList
        // Hopefully given collection won't grow more than 10 items
        // between now and the synchronized block...
        int initial = Math.min(c.size() + 10, max);
        int count = 0;
        ArrayList<T> list = new ArrayList<T>(initial);
        synchronized (c) {
            Iterator<T> iter = c.iterator();
            while (iter.hasNext() && (count < max)) {
                list.add(iter.next());
                count++;
            }
        }
        return list;
    }

    /**
     * Append queue report to general Frontier report.
     * @param w StringBuffer to append to.
     * @param iterator An iterator over 
     * @param total
     * @param max
     */
    @SuppressWarnings("rawtypes")
    protected void appendQueueReports(PrintWriter w, String label, Iterator<?> iterator, int total, int max) {
        Object obj;
        WorkQueue q;
        int count;
        for (count = 0; iterator.hasNext() && (count < max); count++) {
            obj = iterator.next();
            if (obj == null) {
                continue;
            }
            if (obj instanceof WorkQueue) {
                q = (WorkQueue) obj;
            } else if (obj instanceof DelayedWorkQueue) {
                q = (WorkQueue) ((DelayedWorkQueue) obj).getWorkQueue(this);
            } else if (obj instanceof Map.Entry) {
                q = this.allQueues.get((String) ((Map.Entry) obj).getKey());
            } else {
                q = this.allQueues.get((String) obj);
            }
            if (q != null) {
                w.println(label + "#" + count + ":");
                q.reportTo(w);
            } else {
                w.print("WARNING: No report for queue " + obj);
            }
        }
        count++;
        if (count < total) {
            w.print("...and " + (total - count) + " more " + label + ".\n");
        }
    }

    /**
     * Force logging, etc. of operator- deleted CrawlURIs
     * 
     * @see org.archive.crawler.framework.Frontier#deleted(org.archive.modules.CrawlURI)
     */
    public void deleted(CrawlURI curi) {
        //treat as disregarded
        appCtx.publishEvent(new CrawlURIDispositionEvent(this, curi, DISREGARDED));
        log(curi);
        incrementDisregardedUriCount();
        curi.stripToMinimal();
        curi.processingCleanup();
    }

    public void considerIncluded(CrawlURI curi) {
        sheetOverlaysManager.applyOverlaysTo(curi);
        if (curi.getClassKey() == null) {
            // remedial processing
            preparer.prepare(curi);
        }
        this.uriUniqFilter.note(curi.getCanonicalString());
        try {
            KeyedProperties.loadOverridesFrom(curi);
            curi.setClassKey(getClassKey(curi));
            WorkQueue wq = getQueueFor(curi.getClassKey());
            wq.expend(curi.getHolderCost());
            wq.makeDirty();
        } finally {
            KeyedProperties.clearOverridesFrom(curi);
        }
    }

    /**
     * Returns <code>true</code> if the WorkQueue implementation of this
     * Frontier stores its workload on disk instead of relying
     * on serialization mechanisms.
     * 
     * TODO: rename! (this is a very misleading name) or kill (don't
     * see any implementations that return false)
     * 
     * @return a constant boolean value for this class/instance
     */
    protected abstract boolean workQueueDataOnDisk();

    public long averageDepth() {
        if (inProcessQueues == null || readyClassQueues == null || snoozedClassQueues == null) {
            return 0;
        }
        int inProcessCount = inProcessQueues.size();
        int readyCount = readyClassQueues.size();
        int snoozedCount = getSnoozedCount();
        int activeCount = inProcessCount + readyCount + snoozedCount;
        int inactiveCount = getTotalInactiveQueues();
        int totalQueueCount = (activeCount + inactiveCount);
        return (totalQueueCount == 0) ? 0 : queuedUriCount.get() / totalQueueCount;
    }

    protected int getSnoozedCount() {
        return snoozedClassQueues.size() + snoozedOverflowCount.get();
    }

    public float congestionRatio() {
        if (inProcessQueues == null || readyClassQueues == null || snoozedClassQueues == null) {
            return 0;
        }
        int inProcessCount = inProcessQueues.size();
        int readyCount = readyClassQueues.size();
        int snoozedCount = getSnoozedCount();
        int activeCount = inProcessCount + readyCount + snoozedCount;
        int eligibleInactiveCount = getTotalEligibleInactiveQueues();
        return (float) (activeCount + eligibleInactiveCount) / (inProcessCount + snoozedCount);
    }

    public long deepestUri() {
        return largestQueues.getTopSet().size() == 0 ? -1
                : largestQueues.getTopSet().get(largestQueues.getLargest());
    }

    /** 
     * Return whether frontier is exhausted: all crawlable URIs done (none
     * waiting or pending). Only gives precise answer inside managerThread.
     * 
     * @see org.archive.crawler.framework.Frontier#isEmpty()
     */
    public boolean isEmpty() {
        return queuedUriCount.get() == 0 && (uriUniqFilter == null || uriUniqFilter.pending() == 0)
                && futureUriCount.get() == 0;
    }

    /* (non-Javadoc)
     * @see org.archive.crawler.frontier.AbstractFrontier#getInProcessCount()
     */
    @Override
    protected int getInProcessCount() {
        return inProcessQueues.size();
    }

} // TODO: slim class! Suspect it should be < 800 lines, shedding budgeting/reporting