org.archive.crawler.framework.ToePool.java Source code

Java tutorial

Introduction

Here is the source code for org.archive.crawler.framework.ToePool.java

Source

/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.archive.crawler.framework;

import java.io.PrintWriter;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.lang.StringUtils;
import org.archive.crawler.reporting.AlertThreadGroup;
import org.archive.util.ArchiveUtils;
import org.archive.util.Histotable;
import org.archive.util.Reporter;

/**
 * A collection of ToeThreads. The class manages the ToeThreads currently
 * running. It offers methods for increasing and decreasing their 
 * number, keeping track of their state and (not necessarily safely)
 * killing hung threads.
 *
 * @author Gordon Mohr
 * @author Kristinn Sigurdsson
 *
 * @see org.archive.crawler.framework.ToeThread
 */
public class ToePool extends ThreadGroup implements Reporter {
    /** run worker thread slightly lower than usual */
    public static int DEFAULT_TOE_PRIORITY = Thread.NORM_PRIORITY - 1;

    protected CrawlController controller;
    protected int nextSerialNumber = 1;
    protected int targetSize = 0;

    /**
     * Constructor. Creates a pool of ToeThreads. 
     *
     * @param c A reference to the CrawlController for the current crawl.
     */
    public ToePool(AlertThreadGroup atg, CrawlController c) {
        super(atg, "ToeThreads");
        this.controller = c;
        setDaemon(true);
    }

    public void cleanup() {
        // force all Toes waiting on queues, etc to proceed
        Thread[] toes = getToes();
        for (Thread toe : toes) {
            if (toe != null) {
                toe.interrupt();
            }
        }

        // see HER-2036
        this.controller = null;
    }

    /**
     * @return The number of ToeThreads that are not available (Approximation).
     */
    public int getActiveToeCount() {
        Thread[] toes = getToes();
        int count = 0;
        for (int i = 0; i < toes.length; i++) {
            if ((toes[i] instanceof ToeThread) && ((ToeThread) toes[i]).isActive()) {
                count++;
            }
        }
        return count;
    }

    /**
     * @return The number of ToeThreads. This may include killed ToeThreads
     *         that were not replaced.
     */
    public int getToeCount() {
        Thread[] toes = getToes();
        int count = 0;
        for (int i = 0; i < toes.length; i++) {
            if ((toes[i] instanceof ToeThread)) {
                count++;
            }
        }
        return count;
    }

    private Thread[] getToes() {
        Thread[] toes = new Thread[activeCount() + 10];
        this.enumerate(toes);
        return toes;
    }

    /**
     * Change the number of ToeThreads.
     *
     * @param newsize The new number of ToeThreads.
     */
    public void setSize(int newsize) {
        targetSize = newsize;
        int difference = newsize - getToeCount();
        if (difference > 0) {
            // must create threads
            for (int i = 1; i <= difference; i++) {
                startNewThread();
            }
        } else {
            // must retire extra threads
            int retainedToes = targetSize;
            Thread[] toes = this.getToes();
            for (int i = 0; i < toes.length; i++) {
                if (!(toes[i] instanceof ToeThread)) {
                    continue;
                }
                retainedToes--;
                if (retainedToes >= 0) {
                    continue; // this toe is spared
                }
                // otherwise:
                ToeThread tt = (ToeThread) toes[i];
                tt.retire();
            }
        }
    }

    /**
     * Kills specified thread. Killed thread can be optionally replaced with a
     * new thread.
     *
     * <p><b>WARNING:</b> This operation should be used with great care. It may
     * destabilize the crawler.
     *
     * @param threadNumber Thread to kill
     * @param replace If true then a new thread will be created to take the
     *           killed threads place. Otherwise the total number of threads
     *           will decrease by one.
     */
    public void killThread(int threadNumber, boolean replace) {

        Thread[] toes = getToes();
        for (int i = 0; i < toes.length; i++) {
            if (!(toes[i] instanceof ToeThread)) {
                continue;
            }
            ToeThread toe = (ToeThread) toes[i];
            if (toe.getSerialNumber() == threadNumber) {
                toe.kill();
            }
        }

        if (replace) {
            // Create a new toe thread to take its place. Replace toe
            startNewThread();
        }
    }

    private synchronized void startNewThread() {
        ToeThread newThread = new ToeThread(this, nextSerialNumber++);
        newThread.setPriority(DEFAULT_TOE_PRIORITY);
        newThread.start();
    }

    /**
     * @return Instance of CrawlController.
     */
    public CrawlController getController() {
        return controller;
    }

    //
    // Reporter implementation
    //

    @Override
    public void reportTo(PrintWriter writer) {
        writer.print("Toe threads report - " + ArchiveUtils.get12DigitDate() + "\n");
        writer.print(" Job being crawled: " + this.controller.getMetadata().getJobName() + "\n");
        writer.print(
                " Number of toe threads in pool: " + getToeCount() + " (" + getActiveToeCount() + " active)\n\n");

        Thread[] toes = this.getToes();
        synchronized (toes) {
            for (int i = 0; i < toes.length; i++) {
                if (!(toes[i] instanceof ToeThread)) {
                    continue;
                }
                ToeThread tt = (ToeThread) toes[i];
                if (tt != null) {
                    tt.reportTo(writer);
                }
            }
        }
    }

    public void compactReportTo(PrintWriter writer) {
        writer.print(getToeCount() + " threads (" + getActiveToeCount() + " active)\n");

        Thread[] toes = this.getToes();
        boolean legendWritten = false;
        // TODO: sort by activity: those with curi the longest at front
        synchronized (toes) {
            for (int i = 0; i < toes.length; i++) {
                if (!(toes[i] instanceof ToeThread)) {
                    continue;
                }
                ToeThread tt = (ToeThread) toes[i];
                if (tt != null) {
                    if (!legendWritten) {
                        writer.println(tt.shortReportLegend());
                        legendWritten = true;
                    }
                    tt.shortReportLineTo(writer);
                }
            }
        }
    }

    @Override
    public Map<String, Object> shortReportMap() {
        Histotable<Object> steps = new Histotable<Object>();
        Histotable<Object> processors = new Histotable<Object>();
        Thread[] toes = getToes();
        for (int i = 0; i < toes.length; i++) {
            if (!(toes[i] instanceof ToeThread)) {
                continue;
            }
            ToeThread tt = (ToeThread) toes[i];
            if (tt != null) {
                steps.tally(tt.getStep().toString());
                String currentProcessorName = tt.getCurrentProcessorName();
                if (StringUtils.isEmpty(currentProcessorName)) {
                    currentProcessorName = "noActiveProcessor";
                }
                processors.tally(currentProcessorName);
            }
        }

        Map<String, Object> data = new LinkedHashMap<String, Object>();

        data.put("toeCount", getToeCount());

        LinkedList<String> unwound = new LinkedList<String>();
        for (Entry<?, Long> step : steps.getSortedByCounts()) {
            unwound.add(step.getValue() + " " + step.getKey());
        }
        data.put("steps", unwound);

        unwound = new LinkedList<String>();
        for (Entry<?, Long> proc : processors.getSortedByCounts()) {
            unwound.add(proc.getValue() + " " + proc.getKey());
        }
        data.put("processors", unwound);

        return data;
    }

    @SuppressWarnings("unchecked")
    @Override
    public void shortReportLineTo(PrintWriter w) {
        Map<String, Object> map = shortReportMap();
        w.print(map.get("toeCount"));
        w.print(" threads: ");

        LinkedList<String> sortedSteps = (LinkedList<String>) map.get("steps");
        {
            Iterator<String> iter = sortedSteps.iterator();
            if (!iter.hasNext()) {
                return;
            }
            w.print(iter.next());
            if (iter.hasNext()) {
                w.print(", ");
                w.print(iter.next());
                if (iter.hasNext()) {
                    w.print(", etc...");
                }
            }
            w.print("; ");
        }
        LinkedList<String> sortedProcesses = (LinkedList<String>) map.get("processors");
        {
            Iterator<String> iter = sortedProcesses.iterator();
            if (iter.hasNext()) {
                w.print(iter.next());
                while (iter.hasNext()) {
                    w.print(", ");
                    w.print(iter.next());
                }
            }
        }

    }

    /* (non-Javadoc)
     * @see org.archive.util.Reporter#singleLineLegend()
     */
    @Override
    public String shortReportLegend() {
        return "total: mostCommonStateTotal secondMostCommonStateTotal";
    }

    public void waitForAll() {
        while (true)
            try {
                if (isAllAlive(getToes())) {
                    return;
                }
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                throw new IllegalStateException(e);
            }
    }

    private static boolean isAllAlive(Thread[] threads) {
        for (Thread t : threads) {
            if ((t != null) && (!t.isAlive())) {
                return false;
            }
        }
        return true;
    }
}