com.cyberway.issue.crawler.Heritrix.java Source code

Java tutorial

Introduction

Here is the source code for com.cyberway.issue.crawler.Heritrix.java

Source

/* Heritrix
 *
 * $Id: Heritrix.java 6007 2008-10-20 20:17:14Z nlevitt $
 *
 * Created on May 15, 2003
 *
 * Copyright (C) 2003 Internet Archive.
 *
 * This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 * Heritrix is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * any later version.
 *
 * Heritrix is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser Public License for more details.
 *
 * You should have received a copy of the GNU Lesser Public License
 * along with Heritrix; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
package com.cyberway.issue.crawler;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.InetAddress;
import java.net.URL;
import java.net.URLConnection;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.TimeZone;
import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.LogManager;
import java.util.logging.Logger;

import javax.management.Attribute;
import javax.management.AttributeList;
import javax.management.AttributeNotFoundException;
import javax.management.DynamicMBean;
import javax.management.InstanceAlreadyExistsException;
import javax.management.InstanceNotFoundException;
import javax.management.InvalidAttributeValueException;
import javax.management.MBeanInfo;
import javax.management.MBeanNotificationInfo;
import javax.management.MBeanOperationInfo;
import javax.management.MBeanRegistration;
import javax.management.MBeanRegistrationException;
import javax.management.MBeanServer;
import javax.management.MBeanServerFactory;
import javax.management.MalformedObjectNameException;
import javax.management.NotCompliantMBeanException;
import javax.management.ObjectName;
import javax.management.ReflectionException;
import javax.management.RuntimeOperationsException;
import javax.management.openmbean.CompositeData;
import javax.management.openmbean.CompositeDataSupport;
import javax.management.openmbean.CompositeType;
import javax.management.openmbean.OpenDataException;
import javax.management.openmbean.OpenMBeanAttributeInfoSupport;
import javax.management.openmbean.OpenMBeanConstructorInfoSupport;
import javax.management.openmbean.OpenMBeanInfoSupport;
import javax.management.openmbean.OpenMBeanOperationInfoSupport;
import javax.management.openmbean.OpenMBeanParameterInfo;
import javax.management.openmbean.OpenMBeanParameterInfoSupport;
import javax.management.openmbean.OpenType;
import javax.management.openmbean.SimpleType;
import javax.management.openmbean.TabularData;
import javax.management.openmbean.TabularDataSupport;
import javax.management.openmbean.TabularType;
import javax.naming.CompoundName;
import javax.naming.Context;
import javax.naming.NameNotFoundException;
import javax.naming.NamingException;
import javax.naming.NoInitialContextException;

import org.apache.commons.cli.Option;
import com.cyberway.issue.crawler.admin.CrawlJob;
import com.cyberway.issue.crawler.admin.CrawlJobErrorHandler;
import com.cyberway.issue.crawler.admin.CrawlJobHandler;
import com.cyberway.issue.crawler.datamodel.CredentialStore;
import com.cyberway.issue.crawler.datamodel.credential.Credential;
import com.cyberway.issue.crawler.event.CrawlStatusListener;
import com.cyberway.issue.crawler.framework.AlertManager;
import com.cyberway.issue.crawler.framework.CrawlController;
import com.cyberway.issue.crawler.framework.exceptions.FatalConfigurationException;
import com.cyberway.issue.crawler.framework.exceptions.InitializationException;
import com.cyberway.issue.crawler.selftest.SelfTestCrawlJobHandler;
import com.cyberway.issue.crawler.settings.XMLSettingsHandler;
import com.cyberway.issue.io.SinkHandler;
import com.cyberway.issue.io.SinkHandlerLogRecord;
import com.cyberway.issue.net.UURI;
import com.cyberway.issue.util.FileUtils;
import com.cyberway.issue.util.IoUtils;
import com.cyberway.issue.util.JmxUtils;
import com.cyberway.issue.util.JndiUtils;
import com.cyberway.issue.util.PropertyUtils;
import com.cyberway.issue.util.TextUtils;

import sun.net.www.protocol.file.FileURLConnection;

/**
 * Main class for Heritrix crawler.
 *
 * Heritrix is usually launched by a shell script that backgrounds heritrix
 * that redirects all stdout and stderr emitted by heritrix to a log file.  So
 * that startup messages emitted subsequent to the redirection of stdout and
 * stderr show on the console, this class prints usage or startup output
 * such as where the web UI can be found, etc., to a STARTLOG that the shell
 * script is waiting on.  As soon as the shell script sees output in this file,
 * it prints its content and breaks out of its wait.
 * See ${HERITRIX_HOME}/bin/heritrix.
 * 
 * <p>Heritrix can also be embedded or launched by webapp initialization or
 * by JMX bootstrapping.  So far I count 4 methods of instantiation:
 * <ol>
 * <li>From this classes main -- the method usually used;</li>
 * <li>From the Heritrix UI (The local-instances.jsp) page;</li>
 * <li>A creation by a JMX agent at the behest of a remote JMX client; and</li>
 * <li>A container such as tomcat or jboss.</li>
 * </ol>
 *
 * @author gojomo
 * @author Kristinn Sigurdsson
 * @author Stack
 */
public class Heritrix implements DynamicMBean, MBeanRegistration {
    /**
     * Heritrix logging instance.
     */
    private static final Logger logger = Logger.getLogger(Heritrix.class.getName());

    private static final File TMPDIR = new File(System.getProperty("java.io.tmpdir", "/tmp"));

    /**
     * Name of the heritrix properties file.
     */
    private static final String PROPERTIES = "heritrix.properties";

    /**
     * Name of the key to use specifying alternate heritrix properties on
     * command line.
     */
    private static final String PROPERTIES_KEY = PROPERTIES;

    /**
     * Prefix used on our properties we'll add to the System.properties list.
     */
    private static final String HERITRIX_PROPERTIES_PREFIX = "heritrix.";

    /**
     * Prefix used on other properties we'll add to the System.properties 
     * list (after stripping this prefix). 
     */
    private static final String SYSTEM_PREFIX = "system.";

    /**
     * Instance of web server if one was started.
     */
    private static SimpleHttpServer httpServer = null;

    /**
     * CrawlJob handler. Manages multiple crawl jobs at runtime.
     */
    private CrawlJobHandler jobHandler = null;

    /**
     * Heritrix start log file.
     *
     * This file contains standard out produced by this main class for startup
     * only.  Used by heritrix shell script.  Name here MUST match that in the
     * <code>bin/heritrix</code> shell script.  This is a DEPENDENCY the shell
     * wrapper has on this here java heritrix.
     */
    private static final String STARTLOG = "heritrix_dmesg.log";

    /**
     * Default encoding.
     * 
     * Used for content when fetching if none specified.
     */
    public static final String DEFAULT_ENCODING = "ISO-8859-1";

    /**
     * Heritrix stderr/stdout log file.
     *
     * This file should have nothing in it except messages over which we have
     * no control (JVM stacktrace, 3rd-party lib emissions).  The wrapper
     * startup script directs stderr/stdout here. This is an INTERDEPENDENCY
     * this program has with the wrapper shell script.  Shell can actually
     * pass us an alternate to use for this file.
     */
    private static String DEFAULT_HERITRIX_OUT = "heritrix_out.log";

    /**
     * Where to write this classes startup output.
     * 
     * This out should only be used if Heritrix is being run from the
     * command-line.
     */
    private static PrintWriter out = null;

    /**
     * The com.cyberway.issue package
     */
    private static final String ARCHIVE_PACKAGE = "com.cyberway.issue.";

    /**
     * The crawler package.
     */
    private static final String CRAWLER_PACKAGE = Heritrix.class.getName().substring(0,
            Heritrix.class.getName().lastIndexOf('.'));

    /**
     * The root context for a webapp.
     */
    private static final String ROOT_CONTEXT = "/";

    /**
     * Set to true if application is started from command line.
     */
    private static boolean commandLine = false;

    /**
     * True if container initialization has been run.
     */
    private static boolean containerInitialized = false;

    /**
     * True if properties have been loaded.
     */
    private static boolean propertiesLoaded = false;

    private static final String JAR_SUFFIX = ".jar";

    private AlertManager alertManager;

    /**
     * The context of the GUI webapp.  Default is root.
     */
    private static String adminContext = ROOT_CONTEXT;

    /**
     * True if we're to put up a GUI.
     * Cmdline processing can override.
     */
    private static boolean gui = !PropertyUtils.getBooleanProperty("heritrix.cmdline.nowui");

    /**
     * Port to put the GUI up on.
     * Cmdline processing can override.
     */
    private static int guiPort = SimpleHttpServer.DEFAULT_PORT;

    /**
     * A collection containing only localhost.  Used as default value
     * for guiHosts, and passed to SimpleHttpServer when doing selftest.
     */
    final private static Collection<String> LOCALHOST_ONLY = Collections
            .unmodifiableList(Arrays.asList(new String[] { "127.0.0.1" }));

    /**
     * Hosts to bind the GUI webserver to.
     * By default, only contans localhost.
     * Set to an empty collection to indicate that all available network
     * interfaces should be used for the webserver.
     */
    private static Collection<String> guiHosts = LOCALHOST_ONLY;

    /**
     * Web UI server, realm, context name.
     */
    private static String ADMIN = "admin";

    // OpenMBean support.
    /**
     * The MBean server we're registered with (May be null).
     */
    private MBeanServer mbeanServer = null;

    /**
     * MBean name we were registered as.
     */
    private ObjectName mbeanName = null;

    /**
     * Keep reference to all instances of Heritrix.
     * Used by the UI to figure which of the local Heritrice it should
     * be going against and to figure what to shutdown on the way out (If
     * there was always a JMX Agent, we wouldn't need to keep this list.  We
     * could always ask the JMX Agent for all instances. UPDATE: True we could
     * always ask the JMX Agent but we might keep around this local reference
     * because it will allow faster, less awkward -- think of marshalling the args
     * for JMX invoke operation -- access to local Heritrix instances.  A new
     * usage for this instances Map is in CrawlJob#preRegister to find the hosting
     * Heritrix instance).
     */
    private static Map<String, Heritrix> instances = new Hashtable<String, Heritrix>();

    private OpenMBeanInfoSupport openMBeanInfo;
    private final static String STATUS_ATTR = "Status";
    private final static String VERSION_ATTR = "Version";
    private final static String ISRUNNING_ATTR = "IsRunning";
    private final static String ISCRAWLING_ATTR = "IsCrawling";
    private final static String ALERTCOUNT_ATTR = "AlertCount";
    private final static String NEWALERTCOUNT_ATTR = "NewAlertCount";
    private final static String CURRENTJOB_ATTR = "CurrentJob";
    private final static List ATTRIBUTE_LIST;
    static {
        ATTRIBUTE_LIST = Arrays.asList(new String[] { STATUS_ATTR, VERSION_ATTR, ISRUNNING_ATTR, ISCRAWLING_ATTR,
                ALERTCOUNT_ATTR, NEWALERTCOUNT_ATTR, CURRENTJOB_ATTR });
    }

    private final static String START_OPER = "start";
    private final static String STOP_OPER = "stop";
    private final static String DESTROY_OPER = "destroy";
    private final static String INTERRUPT_OPER = "interrupt";
    private final static String START_CRAWLING_OPER = "startCrawling";
    private final static String STOP_CRAWLING_OPER = "stopCrawling";
    private final static String ADD_CRAWL_JOB_OPER = "addJob";
    private final static String TERMINATE_CRAWL_JOB_OPER = "terminateCurrentJob";
    private final static String DELETE_CRAWL_JOB_OPER = "deleteJob";
    private final static String ALERT_OPER = "alert";
    private final static String ADD_CRAWL_JOB_BASEDON_OPER = "addJobBasedon";
    private final static String PENDING_JOBS_OPER = "pendingJobs";
    private final static String COMPLETED_JOBS_OPER = "completedJobs";
    private final static String CRAWLEND_REPORT_OPER = "crawlendReport";
    private final static String SHUTDOWN_OPER = "shutdown";
    private final static String LOG_OPER = "log";
    private final static String REBIND_JNDI_OPER = "rebindJNDI";
    private final static List OPERATION_LIST;
    static {
        OPERATION_LIST = Arrays.asList(new String[] { START_OPER, STOP_OPER, INTERRUPT_OPER, START_CRAWLING_OPER,
                STOP_CRAWLING_OPER, ADD_CRAWL_JOB_OPER, ADD_CRAWL_JOB_BASEDON_OPER, DELETE_CRAWL_JOB_OPER,
                ALERT_OPER, PENDING_JOBS_OPER, COMPLETED_JOBS_OPER, CRAWLEND_REPORT_OPER, SHUTDOWN_OPER, LOG_OPER,
                DESTROY_OPER, TERMINATE_CRAWL_JOB_OPER, REBIND_JNDI_OPER });
    }
    private CompositeType jobCompositeType = null;
    private TabularType jobsTabularType = null;
    private static final String[] JOB_KEYS = new String[] { "uid", "name", "status" };

    private static String adminUsername;

    private static String adminPassword;

    /**
     * Constructor.
     * Does not register the created instance with JMX.  Assumed this
     * constructor is used by such as JMX agent creating an instance of
     * Heritrix at the commmand of a remote client (In this case Heritrix will
     * be registered by the invoking agent).
     * @throws IOException
     */
    public Heritrix() throws IOException {
        this(null, false);
    }

    public Heritrix(final boolean jmxregister) throws IOException {
        this(null, jmxregister);
    }

    /**
     * Constructor.
     * @param name If null, we bring up the default Heritrix instance.
     * @param jmxregister True if we are to register this instance with JMX
     * agent.
     * @throws IOException
     */
    public Heritrix(final String name, final boolean jmxregister) throws IOException {
        this(name, jmxregister, new CrawlJobHandler(getJobsdir()));
    }

    /**
     * Constructor.
     * @param name If null, we bring up the default Heritrix instance.
     * @param jmxregister True if we are to register this instance with JMX
     * agent.
     * @param cjh CrawlJobHandler to use.
     * @throws IOException
     */
    public Heritrix(final String name, final boolean jmxregister, final CrawlJobHandler cjh) throws IOException {
        super();
        containerInitialization();
        this.jobHandler = cjh;
        this.openMBeanInfo = buildMBeanInfo();
        // Set up the alerting system.  SinkHandler is also a global so will
        // catch alerts for all running Heritrix instances.  Will need to
        // address (Add name of instance that threw the alert to SinkRecord?).
        final SinkHandler sinkHandler = SinkHandler.getInstance();
        if (sinkHandler == null) {
            throw new NullPointerException("SinkHandler not found.");
        }
        // Adapt the alerting system to use SinkHandler.
        this.alertManager = new AlertManager() {
            public void add(SinkHandlerLogRecord record) {
                sinkHandler.publish(record);
            }

            public Vector getAll() {
                return sinkHandler.getAll();
            }

            public Vector getNewAll() {
                return sinkHandler.getAllUnread();
            }

            public SinkHandlerLogRecord get(String alertID) {
                return sinkHandler.get(Long.parseLong(alertID));
            }

            public int getCount() {
                return sinkHandler.getCount();
            }

            public int getNewCount() {
                return sinkHandler.getUnreadCount();
            }

            public void remove(String alertID) {
                sinkHandler.remove(Long.parseLong(alertID));
            }

            public void read(String alertID) {
                sinkHandler.read(Long.parseLong(alertID));
            }
        };

        try {
            Heritrix.registerHeritrix(this, name, jmxregister);
        } catch (InstanceAlreadyExistsException e) {
            throw new RuntimeException(e);
        } catch (MBeanRegistrationException e) {
            throw new RuntimeException(e);
        } catch (NotCompliantMBeanException e) {
            throw new RuntimeException(e);
        } catch (MalformedObjectNameException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Run setup tasks for this 'container'. Idempotent.
     * 
     * @throws IOException
     */
    protected static void containerInitialization() throws IOException {
        if (Heritrix.containerInitialized) {
            return;
        }
        Heritrix.containerInitialized = true;
        // Load up the properties.  This invocation adds heritrix properties
        // to system properties so all available via System.getProperty.
        // Note, loadProperties and patchLogging have global effects.  May be an
        // issue if we're running inside a container such as tomcat or jboss.
        Heritrix.loadProperties();
        Heritrix.patchLogging();
        Heritrix.configureTrustStore();
        // Will run on SIGTERM but not on SIGKILL, unfortunately.
        // Otherwise, ensures we cleanup after ourselves (Deregister from
        // JMX and JNDI).
        Runtime.getRuntime().addShutdownHook(Heritrix.getShutdownThread(false, 0, "Heritrix shutdown hook"));
        // Register this heritrix 'container' though we may be inside another
        // tomcat or jboss container.
        try {
            registerContainerJndi();
        } catch (Exception e) {
            logger.log(Level.WARNING, "Failed jndi container registration.", e);
        }
    }

    /**
     * Do inverse of construction. Used by anyone who does a 'new Heritrix' when
     * they want to cleanup the instance.
     * Of note, there may be Heritrix threads still hanging around after the
     * call to destroy completes.  They'll eventually go down after they've
     * finished their cleanup routines.  In particular, if you are watching
     * Heritrix via JMX, you can see the Heritrix instance JMX bean unregister
     * ahead of the CrawlJob JMX bean that its hosting.
     */
    public void destroy() {
        stop();
        try {
            Heritrix.unregisterHeritrix(this);
        } catch (InstanceNotFoundException e) {
            e.printStackTrace();
        } catch (MBeanRegistrationException e) {
            e.printStackTrace();
        } catch (NullPointerException e) {
            e.printStackTrace();
        }
        this.jobHandler = null;
        this.openMBeanInfo = null;
    }

    /**
     * Launch program.
     * Optionally will launch a web server to host UI.  Will also register
     * Heritrix MBean with first found JMX Agent (Usually the 1.5.0 JVM
     * Agent).
     * 
     * @param args Command line arguments.
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        Heritrix.commandLine = true;

        // Set timezone here.  Would be problematic doing it if we're running
        // inside in a container.
        TimeZone.setDefault(TimeZone.getTimeZone("GMT"));

        File startLog = new File(getHeritrixHome(), STARTLOG);
        Heritrix.out = new PrintWriter(
                isDevelopment() ? System.out : new PrintStream(new FileOutputStream(startLog)));

        try {
            containerInitialization();
            String status = doCmdLineArgs(args);
            if (status != null) {
                Heritrix.out.println(status);
            }
        }

        catch (Exception e) {
            // Show any exceptions in STARTLOG.
            e.printStackTrace(Heritrix.out);
            throw e;
        }

        finally {
            // If not development, close the file that signals the wrapper
            // script that we've started.  Otherwise, just flush it; if in
            // development, the output is probably a console.
            if (!isDevelopment()) {
                if (Heritrix.out != null) {
                    Heritrix.out.close();
                }
                System.out.println("Heritrix version: " + Heritrix.getVersion());
            } else {
                if (Heritrix.out != null) {
                    Heritrix.out.flush();
                }
            }
        }
    }

    protected static String doCmdLineArgs(final String[] args) throws Exception {
        // Get defaults for commandline arguments from the properties file.
        String tmpStr = PropertyUtils.getPropertyOrNull("heritrix.context");
        if (tmpStr != null) {
            Heritrix.adminContext = tmpStr;
        }
        tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.port");
        if (tmpStr != null) {
            Heritrix.guiPort = Integer.parseInt(tmpStr);
        }
        tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.admin");
        String adminLoginPassword = (tmpStr == null) ? "" : tmpStr;
        String crawlOrderFile = PropertyUtils.getPropertyOrNull("heritrix.cmdline.order");
        tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.run");
        boolean runMode = PropertyUtils.getBooleanProperty("heritrix.cmdline.run");
        boolean selfTest = false;
        String selfTestName = null;
        CommandLineParser clp = new CommandLineParser(args, Heritrix.out, Heritrix.getVersion());
        List arguments = clp.getCommandLineArguments();
        Option[] options = clp.getCommandLineOptions();

        // Check passed argument.  Only one argument, the ORDER_FILE is allowed.
        // If one argument, make sure exists and xml suffix.
        if (arguments.size() > 1) {
            clp.usage(1);
        } else if (arguments.size() == 1) {
            crawlOrderFile = (String) arguments.get(0);
            if (!(new File(crawlOrderFile).exists())) {
                clp.usage("ORDER.XML <" + crawlOrderFile + "> specified does not exist.", 1);
            }
            // Must end with '.xml'
            if (crawlOrderFile.length() > 4
                    && !crawlOrderFile.substring(crawlOrderFile.length() - 4).equalsIgnoreCase(".xml")) {
                clp.usage("ORDER.XML <" + crawlOrderFile + "> does not have required '.xml' suffix.", 1);
            }
        }

        // Now look at options passed.
        for (int i = 0; i < options.length; i++) {
            switch (options[i].getId()) {
            case 'h':
                clp.usage();
                break;

            case 'a':
                adminLoginPassword = options[i].getValue();
                break;

            case 'n':
                if (crawlOrderFile == null) {
                    clp.usage("You must specify an ORDER_FILE with" + " '--nowui' option.", 1);
                }
                Heritrix.gui = false;
                break;

            case 'b':
                Heritrix.guiHosts = parseHosts(options[i].getValue());
                break;

            case 'p':
                try {
                    Heritrix.guiPort = Integer.parseInt(options[i].getValue());
                } catch (NumberFormatException e) {
                    clp.usage("Failed parse of port number: " + options[i].getValue(), 1);
                }
                if (Heritrix.guiPort <= 0) {
                    clp.usage("Nonsensical port number: " + options[i].getValue(), 1);
                }
                break;

            case 'r':
                runMode = true;
                break;

            case 's':
                selfTestName = options[i].getValue();
                selfTest = true;
                break;

            default:
                assert false : options[i].getId();
            }
        }

        // Ok, we should now have everything to launch the program.
        String status = null;
        if (selfTest) {
            // If more than just '--selftest' and '--port' passed, then
            // there is confusion on what is being asked of us.  Print usage
            // rather than proceed.
            for (int i = 0; i < options.length; i++) {
                if (options[i].getId() != 'p' && options[i].getId() != 's') {
                    clp.usage(1);
                }
            }

            if (arguments.size() > 0) {
                // No arguments accepted by selftest.
                clp.usage(1);
            }
            status = selftest(selfTestName, Heritrix.guiPort);
        } else {
            if (!isValidLoginPasswordString(adminLoginPassword)) {
                clp.usage("Invalid admin login:password value, or none " + "specified. ", 1);
            }

            if (!Heritrix.gui) {
                if (options.length > 1) {
                    // If more than just '--nowui' passed, then there is
                    // confusion on what is being asked of us. Print usage
                    // rather than proceed.
                    clp.usage(1);
                }
                Heritrix h = new Heritrix(true);
                status = h.doOneCrawl(crawlOrderFile);
            } else {
                status = startEmbeddedWebserver(Heritrix.guiHosts, Heritrix.guiPort, adminLoginPassword);
                Heritrix h = new Heritrix(true);

                String tmp = h.launch(crawlOrderFile, runMode);
                if (tmp != null) {
                    status += ('\n' + tmp);
                }
            }
        }
        return status;
    }

    /**
    * @return The file we dump stdout and stderr into.
    */
    public static String getHeritrixOut() {
        String tmp = System.getProperty("heritrix.out");
        if (tmp == null || tmp.length() == 0) {
            tmp = Heritrix.DEFAULT_HERITRIX_OUT;
        }
        return tmp;
    }

    /**
     * Exploit <code>-Dheritrix.home</code> if available to us.
     * Is current working dir if no heritrix.home property supplied.
     * @return Heritrix home directory.
     * @throws IOException
     */
    protected static File getHeritrixHome() throws IOException {
        File heritrixHome = null;
        String home = System.getProperty("heritrix.home");
        if (home != null && home.length() > 0) {
            heritrixHome = new File(home);
            if (!heritrixHome.exists()) {
                throw new IOException("HERITRIX_HOME <" + home + "> does not exist.");
            }
        } else {
            heritrixHome = new File(new File("").getAbsolutePath());
        }
        return heritrixHome;
    }

    /**
     * @return The directory into which we put jobs.  If the system property
     * 'heritrix.jobsdir' is set, we will use its value in place of the default
     * 'jobs' directory in the current working directory.
     * @throws IOException
     */
    public static File getJobsdir() throws IOException {
        Heritrix.loadProperties(); // if called in constructor
        String jobsdirStr = System.getProperty("heritrix.jobsdir", "jobs");
        File jobsdir = new File(jobsdirStr);
        return (jobsdir.isAbsolute()) ? jobsdir : new File(getHeritrixHome(), jobsdirStr);
    }

    /**
     * Get and check for existence of expected subdir.
     *
     * If development flag set, then look for dir under src dir.
     *
     * @param subdirName Dir to look for.
     * @return The extant subdir.  Otherwise null if we're running
     * in a webapp context where there is no conf directory available.
     * @throws IOException if unable to find expected subdir.
     */
    protected static File getSubDir(String subdirName) throws IOException {
        return getSubDir(subdirName, true);
    }

    /**
     * Get and optionally check for existence of subdir.
     *
     * If development flag set, then look for dir under src dir.
     *
     * @param subdirName Dir to look for.
     * @param fail True if we are to fail if directory does not
     * exist; false if we are to return false if the directory does not exist.
     * @return The extant subdir.  Otherwise null if we're running
     * in a webapp context where there is no subdir directory available.
     * @throws IOException if unable to find expected subdir.
     */
    protected static File getSubDir(String subdirName, boolean fail) throws IOException {
        String path = isDevelopment() ? "src" + File.separator + subdirName : subdirName;
        File dir = new File(getHeritrixHome(), path);
        if (!dir.exists()) {
            if (fail) {
                throw new IOException("Cannot find subdir: " + subdirName);
            }
            dir = null;
        }
        return dir;
    }

    /**
     * Test string is valid login/password string.
     *
     * A valid login/password string has the login and password compounded
     * w/ a ':' delimiter.
     *
     * @param str String to test.
     * @return True if valid password/login string.
     */
    protected static boolean isValidLoginPasswordString(String str) {
        boolean isValid = false;
        StringTokenizer tokenizer = new StringTokenizer(str, ":");
        if (tokenizer.countTokens() == 2) {
            String login = ((String) tokenizer.nextElement()).trim();
            String password = ((String) tokenizer.nextElement()).trim();
            if (login.length() > 0 && password.length() > 0) {
                isValid = true;
            }
        }
        return isValid;
    }

    protected static boolean isDevelopment() {
        return System.getProperty("heritrix.development") != null;
    }

    /**
     * Load the heritrix.properties file.
     * 
     * Adds any property that starts with
     * <code>HERITRIX_PROPERTIES_PREFIX</code>
     * or <code>ARCHIVE_PACKAGE</code>
     * into system properties (except logging '.level' directives).
     * @return Loaded properties.
     * @throws IOException
     */
    protected static Properties loadProperties() throws IOException {
        if (Heritrix.propertiesLoaded) {
            return System.getProperties();
        }
        Heritrix.propertiesLoaded = true;

        Properties properties = new Properties();
        properties.load(getPropertiesInputStream());

        // Any property that begins with ARCHIVE_PACKAGE, make it
        // into a system property. While iterating, check to see if anything
        // defined on command-line, and if so, it overrules whats in
        // heritrix.properties.
        for (Enumeration e = properties.keys(); e.hasMoreElements();) {
            String key = ((String) e.nextElement()).trim();
            if (key.startsWith(ARCHIVE_PACKAGE) || key.startsWith(HERITRIX_PROPERTIES_PREFIX)) {
                // Don't add the heritrix.properties entries that are
                // changing the logging level of particular classes.
                String value = properties.getProperty(key).trim();
                if (key.indexOf(".level") < 0) {
                    copyToSystemProperty(key, value);
                }
            } else if (key.startsWith(SYSTEM_PREFIX)) {
                String value = properties.getProperty(key).trim();
                copyToSystemProperty(key.substring(SYSTEM_PREFIX.length()), value);
            }
        }
        return properties;
    }

    /**
     * Copy the given key-value into System properties, as long as there
     * is no existing value. 
     * @param key property key 
     * @param value property value
     */
    protected static void copyToSystemProperty(String key, String value) {
        if (System.getProperty(key) == null || System.getProperty(key).length() == 0) {
            System.setProperty(key, value);
        }
    }

    protected static InputStream getPropertiesInputStream() throws IOException {
        File file = null;
        // Look to see if properties have been passed on the cmd-line.
        String alternateProperties = System.getProperty(PROPERTIES_KEY);
        if (alternateProperties != null && alternateProperties.length() > 0) {
            file = new File(alternateProperties);
        }
        // Get properties from conf directory if one available.
        if ((file == null || !file.exists()) && getConfdir(false) != null) {
            file = new File(getConfdir(), PROPERTIES);
            if (!file.exists()) {
                // If no properties file in the conf dir, set file back to
                // null so we go looking for heritrix.properties on classpath.
                file = null;
            }
        }
        // If not on the command-line, there is no conf dir. Then get the
        // properties from the CLASSPATH (Classpath file separator is always
        // '/', whatever the platform.
        InputStream is = (file != null) ? new FileInputStream(file)
                : Heritrix.class.getResourceAsStream("/" + PROPERTIES_KEY);
        if (is == null) {
            throw new IOException("Failed to load properties file from" + " filesystem or from classpath.");
        }
        return is;
    }

    /**
     * If the user hasn't altered the default logging parameters, tighten them
     * up somewhat: some of our libraries are way too verbose at the INFO or
     * WARNING levels.
     * 
     * This might be a problem running inside in someone else's
     * container.  Container's seem to prefer commons logging so we
     * ain't messing them doing the below.
     *
     * @throws IOException
     * @throws SecurityException
     */
    protected static void patchLogging() throws SecurityException, IOException {
        if (System.getProperty("java.util.logging.config.class") != null) {
            return;
        }

        if (System.getProperty("java.util.logging.config.file") != null) {
            return;
        }

        // No user-set logging properties established; use defaults
        // from distribution-packaged 'heritrix.properties'.
        LogManager.getLogManager().readConfiguration(getPropertiesInputStream());
    }

    /**
     * Configure our trust store.
     *
     * If system property is defined, then use it for our truststore.  Otherwise
     * use the heritrix truststore under conf directory if it exists.
     * 
     * <p>If we're not launched from the command-line, we will not be able
     * to find our truststore.  The truststore is nor normally used so rare
     * should this be a problem (In case where we don't use find our trust
     * store, we'll use the 'default' -- either the JVMs or the containers).
     */
    protected static void configureTrustStore() {
        // Below must be defined in jsse somewhere but can' find it.
        final String TRUSTSTORE_KEY = "javax.net.ssl.trustStore";
        String value = System.getProperty(TRUSTSTORE_KEY);
        File confdir = null;
        try {
            confdir = getConfdir(false);
        } catch (IOException e) {
            logger.log(Level.WARNING, "Failed to get confdir.", e);
        }
        if ((value == null || value.length() <= 0) && confdir != null) {
            // Use the heritrix store if it exists on disk.
            File heritrixStore = new File(confdir, "heritrix.cacerts");
            if (heritrixStore.exists()) {
                value = heritrixStore.getAbsolutePath();
            }
        }

        if (value != null && value.length() > 0) {
            System.setProperty(TRUSTSTORE_KEY, value);
        }
    }

    /**
     * Run the selftest
     *
     * @param oneSelfTestName Name of a test if we are to run one only rather
     * than the default running all tests.
     * @param port Port number to use for web UI.
     *
     * @exception Exception
     * @return Status of how selftest startup went.
     */
    protected static String selftest(final String oneSelfTestName, final int port) throws Exception {
        // Put up the webserver w/ the root and selftest webapps only.
        final String SELFTEST = "selftest";
        Heritrix.httpServer = new SimpleHttpServer(SELFTEST, Heritrix.adminContext, LOCALHOST_ONLY, port, true);
        // Set up digest auth for a section of the server so selftest can run
        // auth tests.  Looks like can only set one login realm going by the
        // web.xml dtd.  Otherwise, would be nice to selftest basic and digest.
        // Have login, password and role all be SELFTEST.  Must match what is
        // in the selftest order.xml file.
        Heritrix.httpServer.setAuthentication(SELFTEST, Heritrix.adminContext, SELFTEST, SELFTEST, SELFTEST);
        Heritrix.httpServer.startServer();
        // Get the order file from the CLASSPATH unless we're running in dev
        // environment.
        File selftestDir = (isDevelopment()) ? new File(getConfdir(), SELFTEST)
                : new File(File.separator + SELFTEST);
        File crawlOrderFile = new File(selftestDir, "order.xml");
        // Create a job based off the selftest order file.  Then use this as
        // a template to pass jobHandler.newJob().  Doing this gets our
        // selftest output to show under the jobs directory.
        // Pass as a seed a pointer to the webserver we just put up.
        final String ROOTURI = "127.0.0.1:" + Integer.toString(port);
        String selfTestUrl = "http://" + ROOTURI + '/';
        if (oneSelfTestName != null && oneSelfTestName.length() > 0) {
            selfTestUrl += (oneSelfTestName + '/');
        }
        CrawlJobHandler cjh = new SelfTestCrawlJobHandler(getJobsdir(), oneSelfTestName, selfTestUrl);
        Heritrix h = new Heritrix("Selftest", true, cjh);
        CrawlJob job = createCrawlJob(cjh, crawlOrderFile, "Template");
        job = h.getJobHandler().newJob(job, null, SELFTEST, "Integration self test", selfTestUrl,
                CrawlJob.PRIORITY_AVERAGE);
        h.getJobHandler().addJob(job);
        // Before we start, need to change some items in the settings file.
        CredentialStore cs = (CredentialStore) job.getSettingsHandler().getOrder()
                .getAttribute(CredentialStore.ATTR_NAME);
        for (Iterator i = cs.iterator(null); i.hasNext();) {
            ((Credential) i.next()).setCredentialDomain(null, ROOTURI);
        }
        h.getJobHandler().startCrawler();
        StringBuffer buffer = new StringBuffer();
        buffer.append("Heritrix " + Heritrix.getVersion() + " selftest started.");
        buffer.append("\nSelftest first crawls " + selfTestUrl + " and then runs an analysis.");
        buffer.append("\nResult of analysis printed to " + getHeritrixOut() + " when done.");
        buffer.append("\nSelftest job directory for logs and arcs:\n" + job.getDirectory().getAbsolutePath());
        return buffer.toString();
    }

    /**
     * Launch the crawler without a web UI and run the passed crawl only.
     * 
     * Specialized version of {@link #launch()}.
     *
     * @param crawlOrderFile The crawl order to crawl.
     * @throws InitializationException
     * @throws InvalidAttributeValueException
     * @return Status string.
     */
    protected String doOneCrawl(String crawlOrderFile)
            throws InitializationException, InvalidAttributeValueException {
        return doOneCrawl(crawlOrderFile, null);
    }

    /**
     * Launch the crawler without a web UI and run passed crawl only.
     * 
     * Specialized version of {@link #launch()}.
     *
     * @param crawlOrderFile The crawl order to crawl.
     * @param listener Register this crawl status listener before starting
     * crawl (You can use this listener to notice end-of-crawl).
     * @throws InitializationException
     * @throws InvalidAttributeValueException
     * @return Status string.
     */
    protected String doOneCrawl(String crawlOrderFile, CrawlStatusListener listener)
            throws InitializationException, InvalidAttributeValueException {
        XMLSettingsHandler handler = new XMLSettingsHandler(new File(crawlOrderFile));
        handler.initialize();
        CrawlController controller = new CrawlController();
        controller.initialize(handler);
        if (listener != null) {
            controller.addCrawlStatusListener(listener);
        }
        controller.requestCrawlStart();
        return "Crawl started using " + crawlOrderFile + ".";
    }

    /**
     * Launch the crawler for a web UI.
     *
     * Crawler hangs around waiting on jobs.
     *
     * @exception Exception
     * @return A status string describing how the launch went.
     * @throws Exception
     */
    public String launch() throws Exception {
        return launch(null, false);
    }

    /**
     * Launch the crawler for a web UI.
     *
     * Crawler hangs around waiting on jobs.
     * 
     * @param crawlOrderFile File to crawl.  May be null.
     * @param runMode Whether crawler should be set to run mode.
     *
     * @exception Exception
     * @return A status string describing how the launch went.
     */
    public String launch(String crawlOrderFile, boolean runMode) throws Exception {
        String status = null;
        if (crawlOrderFile != null) {
            addCrawlJob(crawlOrderFile, "Autolaunched", "", "");
            if (runMode) {
                this.jobHandler.startCrawler();
                status = "Job being crawled: " + crawlOrderFile;
            } else {
                status = "Crawl job ready and pending: " + crawlOrderFile;
            }
        } else if (runMode) {
            // The use case is that jobs are to be run on a schedule and that
            // if the crawler is in run mode, then the scheduled job will be
            // run at appropriate time.  Otherwise, not.
            this.jobHandler.startCrawler();
            status = "Crawler set to run mode.";
        }
        return status;
    }

    /**
     * Start up the embedded Jetty webserver instance.
     * This is done when we're run from the command-line.
     * @param port Port number to use for web UI.
     * @param adminLoginPassword Compound of login and password.
     * @throws Exception
     * @return Status on webserver startup.
     * @deprecated  Use startEmbeddedWebserver(hosts, port, adminLoginPassword)
     */
    protected static String startEmbeddedWebserver(final int port, final boolean lho,
            final String adminLoginPassword) throws Exception {
        ArrayList<String> hosts = new ArrayList<String>();
        if (lho) {
            hosts.add("127.0.0.1");
        }
        return startEmbeddedWebserver(hosts, port, adminLoginPassword);
    }

    /**
     * Parses a list of host names.
     * 
     * <p>If the given string is <code>/</code>, then an empty
     * collection is returned.  This indicates that all available network
     * interfaces should be used.
     * 
     * <p>Otherwise, the string must contain a comma-separated list of 
     * IP addresses or host names.  The parsed list is then returned.
     * 
     * @param hosts  the string to parse
     * @return  the parsed collection of hosts 
     */
    private static Collection<String> parseHosts(String hosts) {
        hosts = hosts.trim();
        if (hosts.equals("/")) {
            return new ArrayList<String>(1);
        }
        String[] hostArray = hosts.split(",");
        for (int i = 0; i < hostArray.length; i++) {
            hostArray[i] = hostArray[i].trim();
        }
        return Arrays.asList(hostArray);
    }

    /**
     * Start up the embedded Jetty webserver instance.
     * This is done when we're run from the command-line.
     * 
     * @param hosts  a list of IP addresses or hostnames to bind to, or an
     *               empty collection to bind to all available network 
     *               interfaces
     * @param port Port number to use for web UI.
     * @param adminLoginPassword Compound of login and password.
     * @throws Exception
     * @return Status on webserver startup.
     */
    protected static String startEmbeddedWebserver(Collection<String> hosts, int port, String adminLoginPassword)
            throws Exception {
        adminUsername = adminLoginPassword.substring(0, adminLoginPassword.indexOf(":"));
        adminPassword = adminLoginPassword.substring(adminLoginPassword.indexOf(":") + 1);
        Heritrix.httpServer = new SimpleHttpServer("admin", Heritrix.adminContext, hosts, port, false);

        final String DOTWAR = ".war";
        final String SELFTEST = "selftest";

        // Look for additional WAR files beyond 'selftest' and 'admin'.
        File[] wars = getWarsdir().listFiles();
        for (int i = 0; i < wars.length; i++) {
            if (wars[i].isFile()) {
                final String warName = wars[i].getName();
                final String warNameNC = warName.toLowerCase();
                if (warNameNC.endsWith(DOTWAR) && !warNameNC.equals(ADMIN + DOTWAR)
                        && !warNameNC.equals(SELFTEST + DOTWAR)) {
                    int dot = warName.indexOf('.');
                    Heritrix.httpServer.addWebapp(warName.substring(0, dot), null, true);
                }
            }
        }

        // Name of passed 'realm' must match what is in configured in web.xml.
        // We'll use ROLE for 'realm' and 'role'.
        final String ROLE = ADMIN;
        Heritrix.httpServer.setAuthentication(ROLE, Heritrix.adminContext, adminUsername, adminPassword, ROLE);
        Heritrix.httpServer.startServer();
        StringBuffer buffer = new StringBuffer();
        buffer.append("Heritrix " + Heritrix.getVersion() + " is running.");
        for (String host : httpServer.getHosts()) {
            buffer.append("\nWeb console is at: http://");
            buffer.append(host).append(':').append(port);
        }
        buffer.append("\nWeb console login and password: " + adminUsername + "/" + adminPassword);
        return buffer.toString();
    }

    /**
     * Replace existing administrator login info with new info.
     * 
     * @param newUsername new administrator login username
     * @param newPassword new administrator login password
     */
    public static void resetAuthentication(String newUsername, String newPassword) {
        Heritrix.httpServer.resetAuthentication(ADMIN, adminUsername, newUsername, newPassword);
        adminUsername = newUsername;
        adminPassword = newPassword;
        logger.info("administrative login changed to " + newUsername + ":" + newPassword);
    }

    protected static CrawlJob createCrawlJob(CrawlJobHandler handler, File crawlOrderFile, String name)
            throws InvalidAttributeValueException {
        XMLSettingsHandler settings = new XMLSettingsHandler(crawlOrderFile);
        settings.initialize();
        return new CrawlJob(handler.getNextJobUID(), name, settings, new CrawlJobErrorHandler(Level.SEVERE),
                CrawlJob.PRIORITY_HIGH, crawlOrderFile.getAbsoluteFile().getParentFile());
    }

    /**
     * This method is called when we have an order file to hand that we want
     * to base a job on.  It leaves the order file in place and just starts up
     * a job that uses all the order points to for locations for logs, etc.
     * @param orderPathOrUrl Path to an order file or to a seeds file.
     * @param name Name to use for this job.
     * @param description 
     * @param seeds 
     * @return A status string.
     * @throws IOException 
     * @throws FatalConfigurationException 
     */
    public String addCrawlJob(String orderPathOrUrl, String name, String description, String seeds)
            throws IOException, FatalConfigurationException {
        if (!UURI.hasScheme(orderPathOrUrl)) {
            // Assume its a file path.
            return addCrawlJob(new File(orderPathOrUrl), name, description, seeds);
        }

        // Otherwise, must be an URL.
        URL url = new URL(orderPathOrUrl);

        // Handle http and file only for now (Tried to handle JarUrlConnection
        // but too awkward undoing jar stream.  Rather just look for URLs that
        // end in '.jar').
        String result = null;
        URLConnection connection = url.openConnection();
        if (connection instanceof HttpURLConnection) {
            result = addCrawlJob(url, (HttpURLConnection) connection, name, description, seeds);
        } else if (connection instanceof FileURLConnection) {
            result = addCrawlJob(new File(url.getPath()), name, description, seeds);
        } else {
            throw new UnsupportedOperationException("No support for " + connection);
        }

        return result;
    }

    protected String addCrawlJob(final URL url, final HttpURLConnection connection, final String name,
            final String description, final String seeds) throws IOException, FatalConfigurationException {
        // Look see if its a jar file.  If it is undo it.
        boolean isJar = url.getPath() != null && url.getPath().toLowerCase().endsWith(JAR_SUFFIX);
        // If http url connection, bring down the resource local.
        File localFile = File.createTempFile(Heritrix.class.getName(), isJar ? JAR_SUFFIX : null, TMPDIR);
        connection.connect();
        String result = null;
        try {
            IoUtils.readFullyToFile(connection.getInputStream(), localFile);
            result = addCrawlJob(localFile, name, description, seeds);
        } catch (IOException ioe) {
            // Cleanup if an Exception.
            localFile.delete();
            localFile = null;
        } finally {
            connection.disconnect();
            // If its a jar file, then we made a job based on the jar contents.
            // Its no longer needed.  Remove it.  If not a jar file, then leave
            // the file around because the job depends on it.
            if (isJar && localFile != null && localFile.exists()) {
                localFile.delete();
            }
        }
        return result;
    }

    protected String addCrawlJob(final File order, final String name, final String description, final String seeds)
            throws FatalConfigurationException, IOException {
        CrawlJob addedJob = null;
        if (this.jobHandler == null) {
            throw new NullPointerException("Heritrix jobhandler is null.");
        }
        try {
            if (order.getName().toLowerCase().endsWith(JAR_SUFFIX)) {
                return addCrawlJobBasedonJar(order, name, description, seeds);
            }
            addedJob = this.jobHandler.addJob(createCrawlJob(this.jobHandler, order, name));
        } catch (InvalidAttributeValueException e) {
            FatalConfigurationException fce = new FatalConfigurationException(
                    "Converted InvalidAttributeValueException on " + order.getAbsolutePath() + ": "
                            + e.getMessage());
            fce.setStackTrace(e.getStackTrace());
        }
        return addedJob != null ? addedJob.getUID() : null;
    }

    /**
     * Undo jar file and use as basis for a new job.
     * @param jarFile Pointer to file that holds jar.
     * @param name Name to use for new job.
     * @param description 
     * @param seeds 
     * @return Message.
     * @throws IOException
     * @throws FatalConfigurationException
     */
    protected String addCrawlJobBasedonJar(final File jarFile, final String name, final String description,
            final String seeds) throws IOException, FatalConfigurationException {
        if (jarFile == null || !jarFile.exists()) {
            throw new FileNotFoundException(jarFile.getAbsolutePath());
        }
        // Create a directory with a tmp name.  Do it by first creating file,
        // removing it, then creating the directory. There is a hole during
        // which the OS may put a file of same exact name in our way but
        // unlikely.
        File dir = File.createTempFile(Heritrix.class.getName(), ".expandedjar", TMPDIR);
        dir.delete();
        dir.mkdir();
        try {
            com.cyberway.issue.crawler.util.IoUtils.unzip(jarFile, dir);
            // Expect to find an order file at least.
            File orderFile = new File(dir, "order.xml");
            if (!orderFile.exists()) {
                throw new IOException("Missing order: " + orderFile.getAbsolutePath());
            }
            CrawlJob job = createCrawlJobBasedOn(orderFile, name, description, seeds);
            // Copy into place any seeds and settings directories before we
            // add job to Heritrix to crawl.
            File seedsFile = new File(dir, "seeds.txt");
            if (seedsFile.exists()) {
                FileUtils.copyFiles(seedsFile, new File(job.getDirectory(), seedsFile.getName()));
            }
            addCrawlJob(job);
            return job.getUID();
        } finally {
            // After job has been added, no more need of expanded content.
            // (Let the caller be responsible for cleanup of jar. Sometimes
            // its should be deleted -- when its a local copy of a jar pulled
            // across the net -- wherease other times, if its a jar passed
            // in w/ a 'file' scheme, it shouldn't be deleted.
            com.cyberway.issue.util.FileUtils.deleteDir(dir);
        }
    }

    public String addCrawlJobBasedOn(String jobUidOrProfile, String name, String description, String seeds) {
        try {
            CrawlJob cj = getJobHandler().getJob(jobUidOrProfile);
            if (cj == null) {
                throw new InvalidAttributeValueException(
                        jobUidOrProfile + " is not a job UID or profile name (Job UIDs are "
                                + " usually the 14 digit date portion of job name).");
            }
            CrawlJob job = addCrawlJobBasedOn(cj.getSettingsHandler().getOrderFile(), name, description, seeds);
            return job.getUID();
        } catch (Exception e) {
            e.printStackTrace();
            return "Exception on " + jobUidOrProfile + ": " + e.getMessage();
        }
    }

    protected CrawlJob addCrawlJobBasedOn(final File orderFile, final String name, final String description,
            final String seeds) throws FatalConfigurationException {
        return addCrawlJob(createCrawlJobBasedOn(orderFile, name, description, seeds));
    }

    protected CrawlJob createCrawlJobBasedOn(final File orderFile, final String name, final String description,
            final String seeds) throws FatalConfigurationException {
        CrawlJob job = getJobHandler().newJob(orderFile, name, description, seeds);
        return CrawlJobHandler.ensureNewJobWritten(job, name, description);
    }

    protected CrawlJob addCrawlJob(final CrawlJob job) {
        return getJobHandler().addJob(job);
    }

    public void startCrawling() {
        if (getJobHandler() == null) {
            throw new NullPointerException("Heritrix jobhandler is null.");
        }
        getJobHandler().startCrawler();
    }

    public void stopCrawling() {
        if (getJobHandler() == null) {
            throw new NullPointerException("Heritrix jobhandler is null.");
        }
        getJobHandler().stopCrawler();
    }

    /**
     * Get the heritrix version.
     *
     * @return The heritrix version.  May be null.
     */
    public static String getVersion() {
        return System.getProperty("heritrix.version");
    }

    /**
     * Get the job handler
     *
     * @return The CrawlJobHandler being used.
     */
    public CrawlJobHandler getJobHandler() {
        return this.jobHandler;
    }

    /**
     * Get the configuration directory.
     * @return The conf directory under HERITRIX_HOME or null if none can
     * be found.
     * @throws IOException
     */
    public static File getConfdir() throws IOException {
        return getConfdir(true);
    }

    /**
     * Get the configuration directory.
     * @param fail Throw IOE if can't find directory if true, else just
     * return null.
     * @return The conf directory under HERITRIX_HOME or null (or an IOE) if
     * can't be found.
     * @throws IOException
     */
    public static File getConfdir(final boolean fail) throws IOException {
        final String key = "heritrix.conf";
        // Look to see if heritrix.conf property passed on the cmd-line.
        String tmp = System.getProperty(key);
        // if not fall back to default $HERITIX_HOME/conf
        if (tmp == null || tmp.length() == 0) {
            return getSubDir("conf", fail);
        }
        File dir = new File(tmp);
        if (!dir.exists()) {
            if (fail) {
                throw new IOException("Cannot find conf dir: " + tmp);
            } else {
                logger.log(Level.WARNING, "Specified " + key + " dir does not exist.  Falling back on default");
            }
            dir = getSubDir("conf", fail);
        }
        return dir;
    }

    /**
     * @return Returns the httpServer. May be null if one was not started.
     */
    public static SimpleHttpServer getHttpServer() {
        return Heritrix.httpServer;
    }

    /**
     * @throws IOException
     * @return Returns the directory under which reside the WAR files
     * we're to load into the servlet container.
     */
    public static File getWarsdir() throws IOException {
        return getSubDir("webapps");
    }

    /**
     * Prepars for program shutdown. This method does it's best to prepare the
     * program so that it can exit normally. It will kill the httpServer and
     * terminate any running job.<br>
     * It is advisible to wait a few (~1000) millisec after calling this method
     * and before calling performHeritrixShutDown() to allow as many threads as
     * possible to finish what they are doing.
     */
    public static void prepareHeritrixShutDown() {
        // Stop and destroy all running Heritrix instances.
        // Get array of the key set to avoid CCEs for case where call to
        // destroy does a remove of an instance from Heritrix.instances.
        final Object[] keys = Heritrix.instances.keySet().toArray();
        for (int i = 0; i < keys.length; i++) {
            ((Heritrix) Heritrix.instances.get(keys[i])).destroy();
        }

        try {
            deregisterJndi(getJndiContainerName());
        } catch (NameNotFoundException e) {
            // We were probably unbound already. Ignore.
            logger.log(Level.WARNING, "deregistration of jndi", e);
        } catch (Exception e) {
            e.printStackTrace();
        }

        if (Heritrix.httpServer != null) {
            // Shut down the web access.
            try {
                Heritrix.httpServer.stopServer();
            } catch (InterruptedException e) {
                // Generally this can be ignored, but we'll print a stack trace
                // just in case.
                e.printStackTrace();
            } finally {
                Heritrix.httpServer = null;
            }
        }
    }

    /**
     * Exit program. Recommended that prepareHeritrixShutDown() be invoked
     * prior to this method.
     */
    public static void performHeritrixShutDown() {
        performHeritrixShutDown(0);
    }

    /**
     * Exit program. Recommended that prepareHeritrixShutDown() be invoked
     * prior to this method.
     *
     * @param exitCode Code to pass System.exit.
     *
     */
    public static void performHeritrixShutDown(int exitCode) {
        System.exit(exitCode);
    }

    /**
     * Shutdown all running heritrix instances and the JVM.
     * Assumes stop has already been called.
    * @param exitCode Exit code to pass system exit.
    */
    public static void shutdown(final int exitCode) {
        getShutdownThread(true, exitCode, "Heritrix shutdown").start();
    }

    protected static Thread getShutdownThread(final boolean sysexit, final int exitCode, final String name) {
        Thread t = new Thread(name) {
            public void run() {
                Heritrix.prepareHeritrixShutDown();
                if (sysexit) {
                    Heritrix.performHeritrixShutDown(exitCode);
                }
            }
        };
        t.setDaemon(true);
        return t;
    }

    public static void shutdown() {
        shutdown(0);
    }

    /**
     * Register Heritrix with JNDI, JMX, and with the static hashtable of all
     * Heritrix instances known to this JVM.
     * 
     * If launched from cmdline, register Heritrix MBean if an agent to register
     * ourselves with. Usually this method will only have effect if we're
     * running in a 1.5.0 JDK and command line options such as
     * '-Dcom.sun.management.jmxremote.port=8082
     * -Dcom.sun.management.jmxremote.authenticate=false
     * -Dcom.sun.management.jmxremote.ssl=false' are supplied.
     * See <a href="http://java.sun.com/j2se/1.5.0/docs/guide/management/agent.html">Monitoring
     * and Management Using JMX</a>
     * for more on the command line options and how to connect to the
     * Heritrix bean using the JDK 1.5.0 jconsole tool.  We register currently
     * with first server we find (TODO: Make configurable).
     * 
     * <p>If we register successfully with a JMX agent, then part of the
     * registration will include our registering ourselves with JNDI.
     * 
     * <p>Finally, add the heritrix instance to the hashtable of all the
     * Heritrix instances floating in the current VM.  This latter registeration
     * happens whether or no there is a JMX agent to register with.  This is
     * a list we keep out of convenience so its easy iterating over all
     *  all instances calling stop when main application is going down.
     * 
     * @param h Instance of heritrix to register.
     * @param name Name to use for this Heritrix instance.
     * @param jmxregister True if we are to register this instance with JMX.
     * @throws NullPointerException
     * @throws MalformedObjectNameException
     * @throws NotCompliantMBeanException 
     * @throws MBeanRegistrationException 
     * @throws InstanceAlreadyExistsException 
     */
    protected static void registerHeritrix(final Heritrix h, final String name, final boolean jmxregister)
            throws MalformedObjectNameException, InstanceAlreadyExistsException, MBeanRegistrationException,
            NotCompliantMBeanException {
        MBeanServer server = getMBeanServer();
        if (server != null) {
            // Are we to manage the jmx registration?  Or is it being done for
            // us by an external process: e.g. This instance was created by
            // MBeanAgent.
            if (jmxregister) {
                ObjectName objName = (name == null || name.length() <= 0) ? getJmxObjectName()
                        : getJmxObjectName(name);
                registerMBean(server, h, objName);
            }
        } else {
            // JMX ain't available. Put this instance into the list of Heritrix
            // instances so findable by the UI (Normally this is done in the
            // JMX postRegister routine below).  When no JMX, can only have
            // one instance of Heritrix so no need to do the deregisteration.
            Heritrix.instances.put(h.getNoJmxName(), h);
        }
    }

    protected static void unregisterHeritrix(final Heritrix h)
            throws InstanceNotFoundException, MBeanRegistrationException, NullPointerException {
        MBeanServer server = getMBeanServer();
        if (server != null) {
            server.unregisterMBean(h.mbeanName);
        } else {
            // JMX ain't available. Remove from list of Heritrix instances.
            // Usually this is done by the JMX postDeregister below.
            Heritrix.instances.remove(h.getNoJmxName());
        }
    }

    /**
     * Get MBeanServer.
     * Currently uses first MBeanServer found.  This will definetly not be whats
     * always wanted. TODO: Make which server settable. Also, if none, put up
     * our own MBeanServer.
     * @return An MBeanServer to register with or null.
     */
    public static MBeanServer getMBeanServer() {
        MBeanServer result = null;
        List servers = MBeanServerFactory.findMBeanServer(null);
        if (servers == null) {
            return result;
        }
        for (Iterator i = servers.iterator(); i.hasNext();) {
            MBeanServer server = (MBeanServer) i.next();
            if (server == null) {
                continue;
            }
            result = server;
            break;
        }
        return result;
    }

    public static MBeanServer registerMBean(final Object objToRegister, final String name, final String type)
            throws InstanceAlreadyExistsException, MBeanRegistrationException, NotCompliantMBeanException {
        MBeanServer server = getMBeanServer();
        if (server != null) {
            server = registerMBean(server, objToRegister, name, type);
        }
        return server;
    }

    public static MBeanServer registerMBean(final MBeanServer server, final Object objToRegister, final String name,
            final String type)
            throws InstanceAlreadyExistsException, MBeanRegistrationException, NotCompliantMBeanException {
        try {
            Hashtable<String, String> ht = new Hashtable<String, String>();
            ht.put(JmxUtils.NAME, name);
            ht.put(JmxUtils.TYPE, type);
            registerMBean(server, objToRegister, new ObjectName(CRAWLER_PACKAGE, ht));
        } catch (MalformedObjectNameException e) {
            e.printStackTrace();
        }
        return server;
    }

    public static MBeanServer registerMBean(final MBeanServer server, final Object objToRegister,
            final ObjectName objName)
            throws InstanceAlreadyExistsException, MBeanRegistrationException, NotCompliantMBeanException {
        server.registerMBean(objToRegister, objName);
        return server;
    }

    public static void unregisterMBean(final MBeanServer server, final String name, final String type) {
        if (server == null) {
            return;
        }
        try {
            unregisterMBean(server, getJmxObjectName(name, type));
        } catch (MalformedObjectNameException e) {
            e.printStackTrace();
        }
    }

    public static void unregisterMBean(final MBeanServer server, final ObjectName name) {
        try {
            server.unregisterMBean(name);
            logger.info("Unregistered bean " + name.getCanonicalName());
        } catch (InstanceNotFoundException e) {
            e.printStackTrace();
        } catch (MBeanRegistrationException e) {
            e.printStackTrace();
        } catch (NullPointerException e) {
            e.printStackTrace();
        }
    }

    /**
     * @return Name to use when no JMX agent available.
     */
    protected String getNoJmxName() {
        return this.getClass().getName();
    }

    public static ObjectName getJmxObjectName() throws MalformedObjectNameException, NullPointerException {
        return getJmxObjectName("Heritrix", JmxUtils.SERVICE);
    }

    public static ObjectName getJmxObjectName(final String name)
            throws MalformedObjectNameException, NullPointerException {
        return getJmxObjectName(name, JmxUtils.SERVICE);
    }

    public static ObjectName getJmxObjectName(final String name, final String type)
            throws MalformedObjectNameException, NullPointerException {
        Hashtable<String, String> ht = new Hashtable<String, String>();
        ht.put(JmxUtils.NAME, name);
        ht.put(JmxUtils.TYPE, type);
        return new ObjectName(CRAWLER_PACKAGE, ht);
    }

    /**
     * @return Returns true if Heritrix was launched from the command line.
     * (When launched from command line, we do stuff like put up a web server
     * to manage our web interface and we register ourselves with the first
     * available jmx agent).
     */
    public static boolean isCommandLine() {
        return Heritrix.commandLine;
    }

    /**
     * @return True if heritrix has been started.
     */
    public boolean isStarted() {
        return this.jobHandler != null;
    }

    public String getStatus() {
        StringBuffer buffer = new StringBuffer();
        if (this.getJobHandler() != null) {
            buffer.append("isRunning=");
            buffer.append(this.getJobHandler().isRunning());
            buffer.append(" isCrawling=");
            buffer.append(this.getJobHandler().isCrawling());
            buffer.append(" alertCount=");
            buffer.append(getAlertsCount());
            buffer.append(" newAlertCount=");
            buffer.append(getNewAlertsCount());
            if (this.getJobHandler().isCrawling()) {
                buffer.append(" currentJob=");
                buffer.append(this.getJobHandler().getCurrentJob().getJmxJobName());
            }
        }
        return buffer.toString();
    }

    // Alert methods.
    public int getAlertsCount() {
        return this.alertManager.getCount();
    }

    public int getNewAlertsCount() {
        return this.alertManager.getNewCount();
    }

    public Vector getAlerts() {
        return this.alertManager.getAll();
    }

    public Vector getNewAlerts() {
        return this.alertManager.getNewAll();
    }

    public SinkHandlerLogRecord getAlert(final String id) {
        return this.alertManager.get(id);
    }

    public void readAlert(final String id) {
        this.alertManager.read(id);
    }

    public void removeAlert(final String id) {
        this.alertManager.remove(id);
    }

    /**
     * Start Heritrix.
     * 
     * Used by JMX and webapp initialization for starting Heritrix.
     * Not by the cmdline launched Heritrix. Idempotent.
     * If start is called by JMX, then new instance of Heritrix is automatically
     * registered w/ JMX Agent.  If started by webapp, need to register the new
     * Heritrix instance.
     */
    public void start() {
        // Don't start if we've been launched from the command line.
        // Don't start if already started.
        if (!Heritrix.isCommandLine() && !isStarted()) {
            try {
                logger.info(launch());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * Stop Heritrix.
     * 
     * Used by JMX and webapp initialization for stopping Heritrix.
     */
    public void stop() {
        if (this.jobHandler != null) {
            this.jobHandler.stop();
        }
    }

    public String interrupt(String threadName) {
        String result = "Thread " + threadName + " not found";
        ThreadGroup group = Thread.currentThread().getThreadGroup();
        if (group == null) {
            return result;
        }
        // Back up to the root threadgroup before starting
        // to iterate over threads.
        ThreadGroup parent = null;
        while ((parent = group.getParent()) != null) {
            group = parent;
        }
        // Do an array that is twice the size of active
        // thread count.  That should be big enough.
        final int max = group.activeCount() * 2;
        Thread[] threads = new Thread[max];
        int threadCount = group.enumerate(threads, true);
        if (threadCount >= max) {
            logger.info("Some threads not found...array too small: " + max);
        }
        for (int j = 0; j < threadCount; j++) {
            if (threads[j].getName().equals(threadName)) {
                threads[j].interrupt();
                result = "Interrupt sent to " + threadName;
                break;
            }
        }
        return result;
    }

    // OpenMBean implementation.

    /**
     * Build up the MBean info for Heritrix main.
     * @return Return created mbean info instance.
     */
    protected OpenMBeanInfoSupport buildMBeanInfo() {
        OpenMBeanAttributeInfoSupport[] attributes = new OpenMBeanAttributeInfoSupport[Heritrix.ATTRIBUTE_LIST
                .size()];
        OpenMBeanConstructorInfoSupport[] constructors = new OpenMBeanConstructorInfoSupport[1];
        OpenMBeanOperationInfoSupport[] operations = new OpenMBeanOperationInfoSupport[Heritrix.OPERATION_LIST
                .size()];
        MBeanNotificationInfo[] notifications = new MBeanNotificationInfo[0];

        // Attributes.
        attributes[0] = new OpenMBeanAttributeInfoSupport(Heritrix.STATUS_ATTR, "Short basic status message",
                SimpleType.STRING, true, false, false);
        // Attributes.
        attributes[1] = new OpenMBeanAttributeInfoSupport(Heritrix.VERSION_ATTR, "Heritrix version",
                SimpleType.STRING, true, false, false);
        // Attributes.
        attributes[2] = new OpenMBeanAttributeInfoSupport(Heritrix.ISRUNNING_ATTR, "Whether the crawler is running",
                SimpleType.BOOLEAN, true, false, false);
        // Attributes.
        attributes[3] = new OpenMBeanAttributeInfoSupport(Heritrix.ISCRAWLING_ATTR,
                "Whether the crawler is crawling", SimpleType.BOOLEAN, true, false, false);
        // Attributes.
        attributes[4] = new OpenMBeanAttributeInfoSupport(Heritrix.ALERTCOUNT_ATTR, "The number of alerts",
                SimpleType.INTEGER, true, false, false);
        // Attributes.
        attributes[5] = new OpenMBeanAttributeInfoSupport(Heritrix.NEWALERTCOUNT_ATTR, "The number of new alerts",
                SimpleType.INTEGER, true, false, false);
        // Attributes.
        attributes[6] = new OpenMBeanAttributeInfoSupport(Heritrix.CURRENTJOB_ATTR,
                "The name of the job currently being crawled", SimpleType.STRING, true, false, false);

        // Constructors.
        constructors[0] = new OpenMBeanConstructorInfoSupport("HeritrixOpenMBean",
                "Constructs Heritrix OpenMBean instance ", new OpenMBeanParameterInfoSupport[0]);

        // Operations.
        operations[0] = new OpenMBeanOperationInfoSupport(Heritrix.START_OPER, "Start Heritrix instance", null,
                SimpleType.VOID, MBeanOperationInfo.ACTION);

        operations[1] = new OpenMBeanOperationInfoSupport(Heritrix.STOP_OPER, "Stop Heritrix instance", null,
                SimpleType.VOID, MBeanOperationInfo.ACTION);

        OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[1];
        args[0] = new OpenMBeanParameterInfoSupport("threadName", "Name of thread to send interrupt",
                SimpleType.STRING);
        operations[2] = new OpenMBeanOperationInfoSupport(Heritrix.INTERRUPT_OPER,
                "Send thread an interrupt " + "(Used debugging)", args, SimpleType.STRING,
                MBeanOperationInfo.ACTION_INFO);

        operations[3] = new OpenMBeanOperationInfoSupport(Heritrix.START_CRAWLING_OPER,
                "Set Heritrix instance " + "into crawling mode", null, SimpleType.VOID, MBeanOperationInfo.ACTION);

        operations[4] = new OpenMBeanOperationInfoSupport(Heritrix.STOP_CRAWLING_OPER,
                "Unset Heritrix instance " + " crawling mode", null, SimpleType.VOID, MBeanOperationInfo.ACTION);

        args = new OpenMBeanParameterInfoSupport[4];
        args[0] = new OpenMBeanParameterInfoSupport("pathOrURL", "Path/URL to order or jar of order+seed",
                SimpleType.STRING);
        args[1] = new OpenMBeanParameterInfoSupport("name", "Basename for new job", SimpleType.STRING);
        args[2] = new OpenMBeanParameterInfoSupport("description", "Description to save with new job",
                SimpleType.STRING);
        args[3] = new OpenMBeanParameterInfoSupport("seeds", "Initial seed(s)", SimpleType.STRING);
        operations[5] = new OpenMBeanOperationInfoSupport(Heritrix.ADD_CRAWL_JOB_OPER, "Add new crawl job", args,
                SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);

        args = new OpenMBeanParameterInfoSupport[4];
        args[0] = new OpenMBeanParameterInfoSupport("uidOrName", "Job UID or profile name", SimpleType.STRING);
        args[1] = new OpenMBeanParameterInfoSupport("name", "Basename for new job", SimpleType.STRING);
        args[2] = new OpenMBeanParameterInfoSupport("description", "Description to save with new job",
                SimpleType.STRING);
        args[3] = new OpenMBeanParameterInfoSupport("seeds", "Initial seed(s)", SimpleType.STRING);
        operations[6] = new OpenMBeanOperationInfoSupport(Heritrix.ADD_CRAWL_JOB_BASEDON_OPER,
                "Add a new crawl job based on passed Job UID or profile", args, SimpleType.STRING,
                MBeanOperationInfo.ACTION_INFO);

        args = new OpenMBeanParameterInfoSupport[1];
        args[0] = new OpenMBeanParameterInfoSupport("UID", "Job UID", SimpleType.STRING);
        operations[7] = new OpenMBeanOperationInfoSupport(DELETE_CRAWL_JOB_OPER, "Delete/stop this crawl job", args,
                SimpleType.VOID, MBeanOperationInfo.ACTION);

        args = new OpenMBeanParameterInfoSupport[1];
        args[0] = new OpenMBeanParameterInfoSupport("index", "Zero-based index into array of alerts",
                SimpleType.INTEGER);
        operations[8] = new OpenMBeanOperationInfoSupport(Heritrix.ALERT_OPER, "Return alert at passed index", args,
                SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);

        try {
            this.jobCompositeType = new CompositeType("job", "Job attributes", JOB_KEYS,
                    new String[] { "Job unique ID", "Job name", "Job status" },
                    new OpenType[] { SimpleType.STRING, SimpleType.STRING, SimpleType.STRING });
            this.jobsTabularType = new TabularType("jobs", "List of jobs", this.jobCompositeType,
                    new String[] { "uid" });
        } catch (OpenDataException e) {
            // This should never happen.
            throw new RuntimeException(e);
        }
        operations[9] = new OpenMBeanOperationInfoSupport(Heritrix.PENDING_JOBS_OPER,
                "List of pending jobs (or null if none)", null, this.jobsTabularType, MBeanOperationInfo.INFO);
        operations[10] = new OpenMBeanOperationInfoSupport(Heritrix.COMPLETED_JOBS_OPER,
                "List of completed jobs (or null if none)", null, this.jobsTabularType, MBeanOperationInfo.INFO);

        args = new OpenMBeanParameterInfoSupport[2];
        args[0] = new OpenMBeanParameterInfoSupport("uid", "Job unique ID", SimpleType.STRING);
        args[1] = new OpenMBeanParameterInfoSupport("name", "Report name (e.g. crawl-report, etc.)",
                SimpleType.STRING);
        operations[11] = new OpenMBeanOperationInfoSupport(Heritrix.CRAWLEND_REPORT_OPER, "Return crawl-end report",
                args, SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);

        operations[12] = new OpenMBeanOperationInfoSupport(Heritrix.SHUTDOWN_OPER, "Shutdown container", null,
                SimpleType.VOID, MBeanOperationInfo.ACTION);

        args = new OpenMBeanParameterInfoSupport[2];
        args[0] = new OpenMBeanParameterInfoSupport("level", "Log level: e.g. SEVERE, WARNING, etc.",
                SimpleType.STRING);
        args[1] = new OpenMBeanParameterInfoSupport("message", "Log message", SimpleType.STRING);
        operations[13] = new OpenMBeanOperationInfoSupport(Heritrix.LOG_OPER, "Add a log message", args,
                SimpleType.VOID, MBeanOperationInfo.ACTION);

        operations[14] = new OpenMBeanOperationInfoSupport(Heritrix.DESTROY_OPER, "Destroy Heritrix instance", null,
                SimpleType.VOID, MBeanOperationInfo.ACTION);

        operations[15] = new OpenMBeanOperationInfoSupport(Heritrix.TERMINATE_CRAWL_JOB_OPER,
                "Returns false if no current job", null, SimpleType.BOOLEAN, MBeanOperationInfo.ACTION);

        operations[16] = new OpenMBeanOperationInfoSupport(Heritrix.REBIND_JNDI_OPER,
                "Rebinds this Heritrix with JNDI.", null, SimpleType.VOID, MBeanOperationInfo.ACTION);

        // Build the info object.
        return new OpenMBeanInfoSupport(this.getClass().getName(), "Heritrix Main OpenMBean", attributes,
                constructors, operations, notifications);
    }

    public Object getAttribute(String attribute_name) throws AttributeNotFoundException {
        if (attribute_name == null) {
            throw new RuntimeOperationsException(new IllegalArgumentException("Attribute name cannot be null"),
                    "Cannot call getAttribute with null attribute name");
        }
        if (!Heritrix.ATTRIBUTE_LIST.contains(attribute_name)) {
            throw new AttributeNotFoundException("Attribute " + attribute_name + " is unimplemented.");
        }
        // The pattern in the below is to match an attribute and when found
        // do a return out of if clause.  Doing it this way, I can fall
        // on to the AttributeNotFoundException for case where we've an
        // attribute but no handler.
        if (attribute_name.equals(STATUS_ATTR)) {
            return getStatus();
        }
        if (attribute_name.equals(VERSION_ATTR)) {
            return getVersion();
        }

        if (attribute_name.equals(ISRUNNING_ATTR)) {
            return new Boolean(this.getJobHandler().isRunning());
        }
        if (attribute_name.equals(ISCRAWLING_ATTR)) {
            return new Boolean(this.getJobHandler().isCrawling());
        }
        if (attribute_name.equals(ALERTCOUNT_ATTR)) {
            return new Integer(getAlertsCount());
        }
        if (attribute_name.equals(NEWALERTCOUNT_ATTR)) {
            return new Integer(getNewAlertsCount());
        }
        if (attribute_name.equals(CURRENTJOB_ATTR)) {
            if (this.getJobHandler().isCrawling()) {
                return this.getJobHandler().getCurrentJob().getJmxJobName();
            }
            return null;
        }
        throw new AttributeNotFoundException("Attribute " + attribute_name + " not found.");
    }

    public void setAttribute(Attribute attribute) throws AttributeNotFoundException {
        throw new AttributeNotFoundException("No attribute can be set in " + "this MBean");
    }

    public AttributeList getAttributes(String[] attributeNames) {
        if (attributeNames == null) {
            throw new RuntimeOperationsException(
                    new IllegalArgumentException("attributeNames[] cannot be " + "null"),
                    "Cannot call getAttributes with null attribute " + "names");
        }
        AttributeList resultList = new AttributeList();
        if (attributeNames.length == 0) {
            return resultList;
        }
        for (int i = 0; i < attributeNames.length; i++) {
            try {
                Object value = getAttribute(attributeNames[i]);
                resultList.add(new Attribute(attributeNames[i], value));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return (resultList);
    }

    public AttributeList setAttributes(AttributeList attributes) {
        return new AttributeList(); // always empty
    }

    public Object invoke(final String operationName, final Object[] params, final String[] signature)
            throws ReflectionException {
        if (operationName == null) {
            throw new RuntimeOperationsException(new IllegalArgumentException("Operation name cannot be null"),
                    "Cannot call invoke with null operation name");
        }
        // INFO logging of JMX invokes: [#HER-907]
        if (logger.isLoggable(Level.INFO)) {
            String paramsString = "";
            for (Object o : params) {
                paramsString.concat("[" + o.toString() + "]");
            }
            logger.info("JMX invoke: " + operationName + " [" + paramsString + "]");
        }
        // The pattern in the below is to match an operation and when found
        // do a return out of if clause.  Doing it this way, I can fall
        // on to the MethodNotFoundException for case where we've an
        // attribute but no handler.
        if (operationName.equals(START_OPER)) {
            JmxUtils.checkParamsCount(START_OPER, params, 0);
            start();
            return null;
        }
        if (operationName.equals(STOP_OPER)) {
            JmxUtils.checkParamsCount(STOP_OPER, params, 0);
            stop();
            return null;
        }
        if (operationName.equals(DESTROY_OPER)) {
            JmxUtils.checkParamsCount(DESTROY_OPER, params, 0);
            destroy();
            return null;
        }
        if (operationName.equals(TERMINATE_CRAWL_JOB_OPER)) {
            JmxUtils.checkParamsCount(TERMINATE_CRAWL_JOB_OPER, params, 0);
            return new Boolean(this.jobHandler.terminateCurrentJob());
        }
        if (operationName.equals(REBIND_JNDI_OPER)) {
            JmxUtils.checkParamsCount(REBIND_JNDI_OPER, params, 0);
            try {
                registerContainerJndi();
            } catch (MalformedObjectNameException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            } catch (UnknownHostException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            } catch (NamingException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
            return null;
        }
        if (operationName.equals(SHUTDOWN_OPER)) {
            JmxUtils.checkParamsCount(SHUTDOWN_OPER, params, 0);
            Heritrix.shutdown();
            return null;
        }
        if (operationName.equals(LOG_OPER)) {
            JmxUtils.checkParamsCount(LOG_OPER, params, 2);
            logger.log(Level.parse((String) params[0]), (String) params[1]);
            return null;
        }
        if (operationName.equals(INTERRUPT_OPER)) {
            JmxUtils.checkParamsCount(INTERRUPT_OPER, params, 1);
            return interrupt((String) params[0]);
        }
        if (operationName.equals(START_CRAWLING_OPER)) {
            JmxUtils.checkParamsCount(START_CRAWLING_OPER, params, 0);
            startCrawling();
            return null;
        }
        if (operationName.equals(STOP_CRAWLING_OPER)) {
            JmxUtils.checkParamsCount(STOP_CRAWLING_OPER, params, 0);
            stopCrawling();
            return null;
        }
        if (operationName.equals(ADD_CRAWL_JOB_OPER)) {
            JmxUtils.checkParamsCount(ADD_CRAWL_JOB_OPER, params, 4);
            try {
                return addCrawlJob((String) params[0], (String) params[1],
                        checkForEmptyPlaceHolder((String) params[2]), checkForEmptyPlaceHolder((String) params[3]));
            } catch (IOException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            } catch (FatalConfigurationException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
        }
        if (operationName.equals(DELETE_CRAWL_JOB_OPER)) {
            JmxUtils.checkParamsCount(DELETE_CRAWL_JOB_OPER, params, 1);
            this.jobHandler.deleteJob((String) params[0]);
            return null;
        }

        if (operationName.equals(ADD_CRAWL_JOB_BASEDON_OPER)) {
            JmxUtils.checkParamsCount(ADD_CRAWL_JOB_BASEDON_OPER, params, 4);
            return addCrawlJobBasedOn((String) params[0], (String) params[1],
                    checkForEmptyPlaceHolder((String) params[2]), checkForEmptyPlaceHolder((String) params[3]));
        }
        if (operationName.equals(ALERT_OPER)) {
            JmxUtils.checkParamsCount(ALERT_OPER, params, 1);
            SinkHandlerLogRecord slr = null;
            if (this.alertManager.getCount() > 0) {
                // This is creating a vector of all alerts just so I can then
                // use passed index into resultant vector -- needs to be
                // improved.
                slr = (SinkHandlerLogRecord) this.alertManager.getAll().get(((Integer) params[0]).intValue());
            }
            return (slr != null) ? slr.toString() : null;
        }

        if (operationName.equals(PENDING_JOBS_OPER)) {
            JmxUtils.checkParamsCount(PENDING_JOBS_OPER, params, 0);
            try {
                return makeJobsTabularData(getJobHandler().getPendingJobs());
            } catch (OpenDataException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
        }

        if (operationName.equals(COMPLETED_JOBS_OPER)) {
            JmxUtils.checkParamsCount(COMPLETED_JOBS_OPER, params, 0);
            try {
                return makeJobsTabularData(getJobHandler().getCompletedJobs());
            } catch (OpenDataException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
        }

        if (operationName.equals(CRAWLEND_REPORT_OPER)) {
            JmxUtils.checkParamsCount(CRAWLEND_REPORT_OPER, params, 2);
            try {
                return getCrawlendReport((String) params[0], (String) params[1]);
            } catch (IOException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
        }

        throw new ReflectionException(new NoSuchMethodException(operationName),
                "Cannot find the operation " + operationName);
    }

    /**
     * Return named crawl end report for job with passed uid.
     * Crawler makes reports when its finished its crawl.  Use this method
     * to get a String version of one of these files.
     * @param jobUid The unique ID for the job whose reports you want to see
     * (Must be a completed job).
     * @param reportName Name of report minus '.txt' (e.g. crawl-report).
     * @return String version of the on-disk report.
     * @throws IOException 
     */
    protected String getCrawlendReport(String jobUid, String reportName) throws IOException {
        CrawlJob job = getJobHandler().getJob(jobUid);
        if (job == null) {
            throw new IOException("No such job: " + jobUid);
        }
        File report = new File(job.getDirectory(), reportName + ".txt");
        if (!report.exists()) {
            throw new FileNotFoundException(report.getAbsolutePath());
        }
        return FileUtils.readFileAsString(report);
    }

    protected TabularData makeJobsTabularData(List jobs) throws OpenDataException {
        if (jobs == null || jobs.size() == 0) {
            return null;
        }
        TabularData td = new TabularDataSupport(this.jobsTabularType);
        for (Iterator i = jobs.iterator(); i.hasNext();) {
            CrawlJob job = (CrawlJob) i.next();
            CompositeData cd = new CompositeDataSupport(this.jobCompositeType, JOB_KEYS,
                    new String[] { job.getUID(), job.getJobName(), job.getStatus() });
            td.put(cd);
        }
        return td;
    }

    /**
     * If passed str has placeholder for the empty string, return the empty
     * string else return orginal.
     * Dumb jmx clients can't pass empty string so they'll pass a representation
     * of empty string such as ' ' or '-'.  Convert such strings to empty
     * string.
     * @param str String to check.
     * @return Original <code>str</code> or empty string if <code>str</code>
     * contains a placeholder for the empty-string (e.g. '-', or ' ').
     */
    protected String checkForEmptyPlaceHolder(String str) {
        return TextUtils.matches("-| +", str) ? "" : str;
    }

    public MBeanInfo getMBeanInfo() {
        return this.openMBeanInfo;
    }

    /**
     * @return Name this instance registered in JMX (Only available after JMX
     * registration).
     */
    public ObjectName getMBeanName() {
        return this.mbeanName;
    }

    public ObjectName preRegister(MBeanServer server, ObjectName name) throws Exception {
        this.mbeanServer = server;
        @SuppressWarnings("unchecked")
        Hashtable<String, String> ht = name.getKeyPropertyList();
        if (!ht.containsKey(JmxUtils.NAME)) {
            throw new IllegalArgumentException("Name property required" + name.getCanonicalName());
        }
        if (!ht.containsKey(JmxUtils.TYPE)) {
            ht.put(JmxUtils.TYPE, JmxUtils.SERVICE);
            name = new ObjectName(name.getDomain(), ht);
        }
        this.mbeanName = addGuiPort(addVitals(name));
        Heritrix.instances.put(this.mbeanName.getCanonicalKeyPropertyListString(), this);
        return this.mbeanName;
    }

    /**
     * Add vital stats to passed in ObjectName.
     * @param name ObjectName to add to.
     * @return name with host, guiport, and jmxport added.
     * @throws UnknownHostException
     * @throws MalformedObjectNameException
     * @throws NullPointerException
     */
    protected static ObjectName addVitals(ObjectName name)
            throws UnknownHostException, MalformedObjectNameException, NullPointerException {
        @SuppressWarnings("unchecked")
        Hashtable<String, String> ht = name.getKeyPropertyList();
        if (!ht.containsKey(JmxUtils.HOST)) {
            ht.put(JmxUtils.HOST, InetAddress.getLocalHost().getHostName());
            name = new ObjectName(name.getDomain(), ht);
        }
        if (!ht.containsKey(JmxUtils.JMX_PORT)) {
            // Add jdk jmx-port. This will be present if we've attached
            // ourselves to the jdk jmx agent.  Otherwise, we've been
            // deployed in a j2ee container with its own jmx agent.  In
            // this case we won't know how to get jmx port.
            String p = System.getProperty("com.sun.management.jmxremote.port");
            if (p != null && p.length() > 0) {
                ht.put(JmxUtils.JMX_PORT, p);
                name = new ObjectName(name.getDomain(), ht);
            }
        }
        return name;
    }

    protected static ObjectName addGuiPort(ObjectName name)
            throws MalformedObjectNameException, NullPointerException {
        @SuppressWarnings("unchecked")
        Hashtable<String, String> ht = name.getKeyPropertyList();
        if (!ht.containsKey(JmxUtils.GUI_PORT)) {
            // Add gui port if this instance was started with a gui.
            if (Heritrix.gui) {
                ht.put(JmxUtils.GUI_PORT, Integer.toString(Heritrix.guiPort));
                name = new ObjectName(name.getDomain(), ht);
            }
        }
        return name;
    }

    public void postRegister(Boolean registrationDone) {
        if (logger.isLoggable(Level.INFO)) {
            logger.info(JmxUtils.getLogRegistrationMsg(this.mbeanName.getCanonicalName(), this.mbeanServer,
                    registrationDone.booleanValue()));
        }
        try {
            registerJndi(this.mbeanName);
        } catch (Exception e) {
            logger.log(Level.SEVERE, "Failed jndi registration", e);
        }
    }

    public void preDeregister() throws Exception {
        deregisterJndi(this.mbeanName);
    }

    public void postDeregister() {
        Heritrix.instances.remove(this.mbeanName.getCanonicalKeyPropertyListString());
        if (logger.isLoggable(Level.INFO)) {
            logger.info(JmxUtils.getLogUnregistrationMsg(this.mbeanName.getCanonicalName(), this.mbeanServer));
        }
    }

    protected static void registerContainerJndi()
            throws MalformedObjectNameException, NullPointerException, UnknownHostException, NamingException {
        registerJndi(getJndiContainerName());
    }

    protected static void registerJndi(final ObjectName name) throws NullPointerException, NamingException {
        Context c = getJndiContext();
        if (c == null) {
            return;
        }
        CompoundName key = JndiUtils.bindObjectName(c, name);
        if (logger.isLoggable(Level.FINE)) {
            logger.fine("Bound '" + key + "' to '" + JndiUtils.getCompoundName(c.getNameInNamespace()).toString()
                    + "' jndi context");
        }
    }

    protected static void deregisterJndi(final ObjectName name) throws NullPointerException, NamingException {
        Context c = getJndiContext();
        if (c == null) {
            return;
        }
        CompoundName key = JndiUtils.unbindObjectName(c, name);
        if (logger.isLoggable(Level.FINE)) {
            logger.fine("Unbound '" + key + "' from '"
                    + JndiUtils.getCompoundName(c.getNameInNamespace()).toString() + "' jndi context");
        }
    }

    /**
     * @return Jndi context for the crawler or null if none found.
     * @throws NamingException 
     */
    protected static Context getJndiContext() throws NamingException {
        Context c = null;
        try {
            c = JndiUtils.getSubContext(CRAWLER_PACKAGE);
        } catch (NoInitialContextException e) {
            logger.fine("No JNDI Context: " + e.toString());
        }
        return c;
    }

    /**
     * @return Jndi container name -- the name to use for the 'container' that
     * can host zero or more heritrix instances (Return a JMX ObjectName.  We
     * use ObjectName because then we're sync'd with JMX naming and ObjectName
     * has nice parsing).
     * @throws NullPointerException 
     * @throws MalformedObjectNameException 
     * @throws UnknownHostException 
     */
    protected static ObjectName getJndiContainerName()
            throws MalformedObjectNameException, NullPointerException, UnknownHostException {
        ObjectName objName = new ObjectName(CRAWLER_PACKAGE, "type", "container");
        return addVitals(objName);
    }

    /**
     * @return Return all registered instances of Heritrix (Rare are there 
     * more than one).
     */
    public static Map getInstances() {
        return Heritrix.instances;
    }

    /**
     * @return True if only one instance of Heritrix.
     */
    public static boolean isSingleInstance() {
        return Heritrix.instances != null && Heritrix.instances.size() == 1;
    }

    /**
     * @return Returns single instance or null if no instance or multiple.
     */
    public static Heritrix getSingleInstance() {
        return !isSingleInstance() ? null
                : (Heritrix) Heritrix.instances.get(Heritrix.instances.keySet().iterator().next());
    }
}