com.cyberway.issue.crawler.admin.CrawlJob.java Source code

Java tutorial

Introduction

Here is the source code for com.cyberway.issue.crawler.admin.CrawlJob.java

Source

/* CrawlJob
 *
 * Copyright (C) 2003 Internet Archive.
 *
 * This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 * Heritrix is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * any later version.
 *
 * Heritrix is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser Public License for more details.
 *
 * You should have received a copy of the GNU Lesser Public License
 * along with Heritrix; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
package com.cyberway.issue.crawler.admin;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.EventObject;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.management.Attribute;
import javax.management.AttributeList;
import javax.management.AttributeNotFoundException;
import javax.management.DynamicMBean;
import javax.management.InstanceAlreadyExistsException;
import javax.management.InvalidAttributeValueException;
import javax.management.MBeanAttributeInfo;
import javax.management.MBeanException;
import javax.management.MBeanInfo;
import javax.management.MBeanNotificationInfo;
import javax.management.MBeanOperationInfo;
import javax.management.MBeanParameterInfo;
import javax.management.MBeanRegistration;
import javax.management.MBeanRegistrationException;
import javax.management.MBeanServer;
import javax.management.NotCompliantMBeanException;
import javax.management.Notification;
import javax.management.NotificationBroadcasterSupport;
import javax.management.ObjectName;
import javax.management.ReflectionException;
import javax.management.RuntimeOperationsException;
import javax.management.openmbean.CompositeData;
import javax.management.openmbean.CompositeDataSupport;
import javax.management.openmbean.CompositeType;
import javax.management.openmbean.OpenDataException;
import javax.management.openmbean.OpenMBeanAttributeInfo;
import javax.management.openmbean.OpenMBeanAttributeInfoSupport;
import javax.management.openmbean.OpenMBeanConstructorInfoSupport;
import javax.management.openmbean.OpenMBeanInfoSupport;
import javax.management.openmbean.OpenMBeanOperationInfo;
import javax.management.openmbean.OpenMBeanOperationInfoSupport;
import javax.management.openmbean.OpenMBeanParameterInfo;
import javax.management.openmbean.OpenMBeanParameterInfoSupport;
import javax.management.openmbean.SimpleType;

import org.apache.commons.httpclient.URIException;
import org.apache.commons.io.IOUtils;

import com.cyberway.core.utils.ServiceLocator;
import com.cyberway.crawl.jobs.service.JobsManagerService;
import com.cyberway.issue.crawler.Heritrix;
import com.cyberway.issue.crawler.datamodel.CandidateURI;
import com.cyberway.issue.crawler.datamodel.Checkpoint;
import com.cyberway.issue.crawler.datamodel.CrawlOrder;
import com.cyberway.issue.crawler.event.CrawlStatusListener;
import com.cyberway.issue.crawler.framework.CrawlController;
import com.cyberway.issue.crawler.framework.FrontierMarker;
import com.cyberway.issue.crawler.framework.StatisticsTracking;
import com.cyberway.issue.crawler.framework.exceptions.InitializationException;
import com.cyberway.issue.crawler.framework.exceptions.InvalidFrontierMarkerException;
import com.cyberway.issue.crawler.frontier.AbstractFrontier;
import com.cyberway.issue.crawler.settings.ComplexType;
import com.cyberway.issue.crawler.settings.ModuleAttributeInfo;
import com.cyberway.issue.crawler.settings.TextField;
import com.cyberway.issue.crawler.settings.XMLSettingsHandler;
import com.cyberway.issue.crawler.util.CheckpointUtils;
import com.cyberway.issue.crawler.util.IoUtils;
import com.cyberway.issue.util.ArchiveUtils;
import com.cyberway.issue.util.FileUtils;
import com.cyberway.issue.util.JEMBeanHelper;
import com.cyberway.issue.util.JmxUtils;
import com.cyberway.issue.util.iterator.LineReadingIterator;
import com.cyberway.issue.util.iterator.RegexpLineIterator;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;

/**
 * A CrawlJob encapsulates a 'crawl order' with any and all information and
 * methods needed by a CrawlJobHandler to accept and execute them.
 *
 * <p>A given crawl job may also be a 'profile' for a crawl. In that case it
 * should not be executed as a crawl but can be edited and used as a template
 * for creating new CrawlJobs.
 *
 * <p>All of it's constructors are protected since only a CrawlJobHander
 * should construct new CrawlJobs.
 *
 * @author Kristinn Sigurdsson
 *
 * @see com.cyberway.issue.crawler.admin.CrawlJobHandler#newJob(CrawlJob, String,
 * String, String, String, int)
 * @see com.cyberway.issue.crawler.admin.CrawlJobHandler#newProfile(CrawlJob,
 *  String, String, String)
 */

public class CrawlJob extends NotificationBroadcasterSupport
        implements DynamicMBean, MBeanRegistration, CrawlStatusListener, Serializable {
    /**
     * Eclipse generated serial number.
     */
    private static final long serialVersionUID = 3411161000452525856L;

    private static final Logger logger = Logger.getLogger(CrawlJob.class.getName());
    /*
     * Possible values for Priority
     */
    /** lowest */
    public static final int PRIORITY_MINIMAL = 0;
    /** low */
    public static final int PRIORITY_LOW = 1;
    /** average */
    public static final int PRIORITY_AVERAGE = 2;
    /** high */
    public static final int PRIORITY_HIGH = 3;
    /** highest */
    public static final int PRIORITY_CRITICAL = 4;

    /*
     * Possible states for a Job.
     */
    /** Inital value. May not be ready to run/incomplete. */
    public static final String STATUS_CREATED = "Created";
    /** Job has been successfully submitted to a CrawlJobHandler */
    public static final String STATUS_PENDING = "Pending";
    /** Job is being crawled */
    public static final String STATUS_RUNNING = "Running";
    /** Job was deleted by user, will not be displayed in UI. */
    public static final String STATUS_DELETED = "Deleted";
    /** Job was terminted by user input while crawling */
    public static final String STATUS_ABORTED = "Finished - Ended by operator";
    /** Something went very wrong */
    public static final String STATUS_FINISHED_ABNORMAL = "Finished - Abnormal exit from crawling";
    /** Job finished normally having completed its crawl. */
    public static final String STATUS_FINISHED = "Finished";
    /** Job finished normally when the specified timelimit was hit. */
    public static final String STATUS_FINISHED_TIME_LIMIT = "Finished - Timelimit hit";
    /** Job finished normally when the specifed amount of 
     * data (MB) had been downloaded */
    public static final String STATUS_FINISHED_DATA_LIMIT = "Finished - Maximum amount of data limit hit";
    /** Job finished normally when the specified number of documents had been
     * fetched.
     */
    public static final String STATUS_FINISHED_DOCUMENT_LIMIT = "Finished - Maximum number of documents limit hit";
    /** Job is going to be temporarly stopped after active threads are finished. */
    public static final String STATUS_WAITING_FOR_PAUSE = "Pausing - " + "Waiting for threads to finish";
    /** Job was temporarly stopped. State is kept so it can be resumed */
    public static final String STATUS_PAUSED = "Paused";
    /**
     * Job is being checkpointed.  When finished checkpointing, job is set
     * back to STATUS_PAUSED (Job must be first paused before checkpointing
     * will run).
     */
    public static final String STATUS_CHECKPOINTING = "Checkpointing";
    /** Job could not be launced due to an InitializationException */
    public static final String STATUS_MISCONFIGURED = "Could not launch job " + "- Fatal InitializationException";
    /** Job is actually a profile */
    public static final String STATUS_PROFILE = "Profile";

    public static final String STATUS_PREPARING = "Preparing";

    // Class variables
    private String UID; //A UID issued by the CrawlJobHandler.
    private String name;
    private String status;
    private boolean isReadOnly = false;
    private boolean isNew = true;
    private boolean isProfile = false;
    private boolean isRunning = false;
    private int priority;
    private int numberOfJournalEntries = 0;

    private String statisticsFileSave = "";

    private String errorMessage = null;

    private File jobDir = null;

    private transient CrawlJobErrorHandler errorHandler = null;

    protected transient XMLSettingsHandler settingsHandler;

    private transient CrawlController controller = null;

    private static final String RECOVERY_JOURNAL_STYLE = "recoveryJournal";
    private static final String CRAWL_LOG_STYLE = "crawlLog";

    // OpenMBean support.

    /**
     * Server we registered with. Maybe null.
     */
    private transient MBeanServer mbeanServer = null;
    private transient ObjectName mbeanName = null;
    private static final String CRAWLJOB_JMXMBEAN_TYPE = JmxUtils.SERVICE + ".Job";
    private transient JEMBeanHelper bdbjeMBeanHelper = null;
    private transient List<String> bdbjeAttributeNameList = null;
    private transient List<String> bdbjeOperationsNameList = null;

    /**
     * The MBean we've registered ourselves with (May be null
     * throughout life of Heritrix).
     */
    private transient OpenMBeanInfoSupport openMBeanInfo;

    private final static String NAME_ATTR = "Name";
    private final static String UID_ATTR = "UID";
    private final static String STATUS_ATTR = "Status";
    private final static String FRONTIER_SHORT_REPORT_ATTR = "FrontierShortReport";
    private final static String THREADS_SHORT_REPORT_ATTR = "ThreadsShortReport";
    private final static String TOTAL_DATA_ATTR = "TotalData";
    private final static String CRAWL_TIME_ATTR = "CrawlTime";
    private final static String DOC_RATE_ATTR = "DocRate";
    private final static String CURRENT_DOC_RATE_ATTR = "CurrentDocRate";
    private final static String KB_RATE_ATTR = "KbRate";
    private final static String CURRENT_KB_RATE_ATTR = "CurrentKbRate";
    private final static String THREAD_COUNT_ATTR = "ThreadCount";
    private final static String DOWNLOAD_COUNT_ATTR = "DownloadedCount";
    private final static String DISCOVERED_COUNT_ATTR = "DiscoveredCount";
    private final static String[] ATTRIBUTE_ARRAY = { NAME_ATTR, UID_ATTR, STATUS_ATTR, FRONTIER_SHORT_REPORT_ATTR,
            THREADS_SHORT_REPORT_ATTR, TOTAL_DATA_ATTR, CRAWL_TIME_ATTR, DOC_RATE_ATTR, CURRENT_DOC_RATE_ATTR,
            KB_RATE_ATTR, CURRENT_KB_RATE_ATTR, THREAD_COUNT_ATTR, DOWNLOAD_COUNT_ATTR, DISCOVERED_COUNT_ATTR };
    private final static List ATTRIBUTE_LIST = Arrays.asList(ATTRIBUTE_ARRAY);

    private final static String IMPORT_URI_OPER = "importUri";
    private final static String IMPORT_URIS_OPER = "importUris";
    private final static String DUMP_URIS_OPER = "dumpUris";
    private final static String PAUSE_OPER = "pause";
    private final static String RESUME_OPER = "resume";
    private final static String FRONTIER_REPORT_OPER = "frontierReport";
    private final static String THREADS_REPORT_OPER = "threadsReport";
    private final static String SEEDS_REPORT_OPER = "seedsReport";
    private final static String CHECKPOINT_OPER = "startCheckpoint";
    private final static String PROGRESS_STATISTICS_OPER = "progressStatistics";
    private final static String PROGRESS_STATISTICS_LEGEND_OPER = "progressStatisticsLegend";

    private final static String PROG_STATS = "progressStatistics";

    // Same as JEMBeanHelper.OP_DB_STAT
    private final static String OP_DB_STAT = "getDatabaseStats";

    /**
     * Don't add the following crawl-order items.
     */
    private final static List ORDER_EXCLUDE;
    static {
        ORDER_EXCLUDE = Arrays.asList(
                new String[] { "bdb-cache-percent", "extract-processors", "DNS", "uri-included-structure" });
    }

    /**
     * Sequence number for jmx notifications.
     */
    private static int notificationsSequenceNumber = 1;

    /**
     * A shutdown Constructor.
     */
    protected CrawlJob() {
        super();
    }

    /**
     * A constructor for jobs.
     *
     * <p> Create, ready to crawl, jobs.
     * @param UID A unique ID for this job. Typically emitted by the
     *            CrawlJobHandler.
     * @param name The name of the job
     * @param settingsHandler The associated settings
     * @param errorHandler The crawl jobs settings error handler.
     *           <tt>null</tt> means none is set
     * @param priority job priority.
     * @param dir The directory that is considered this jobs working directory.
     */
    public CrawlJob(final String UID, final String name, final XMLSettingsHandler settingsHandler,
            final CrawlJobErrorHandler errorHandler, final int priority, final File dir) {
        this(UID, name, settingsHandler, errorHandler, priority, dir, null, false, true);
    }

    /**
     * A constructor for profiles.
     *
     * <p> Any job created with this constructor will be
     * considered a profile. Profiles are not stored on disk (only their
     * settings files are stored on disk). This is because their data is
     * predictible given any settings files.
     * @param UIDandName A unique ID for this job. For profiles this is the same
     *           as name
     * @param settingsHandler The associated settings
     * @param errorHandler The crawl jobs settings error handler.
     *           <tt>null</tt> means none is set
     */
    protected CrawlJob(final String UIDandName, final XMLSettingsHandler settingsHandler,
            final CrawlJobErrorHandler errorHandler) {
        this(UIDandName, UIDandName, settingsHandler, errorHandler, PRIORITY_AVERAGE, null, STATUS_PROFILE, true,
                false);
    }

    public CrawlJob(final String UID, final String name, final XMLSettingsHandler settingsHandler,
            final CrawlJobErrorHandler errorHandler, final int priority, final File dir, final String status,
            final boolean isProfile, final boolean isNew) {
        super();
        this.UID = UID;
        this.name = name;
        this.settingsHandler = settingsHandler;
        this.errorHandler = errorHandler;
        this.status = status;
        this.isProfile = isProfile;
        this.isNew = isNew;
        this.jobDir = dir;
        this.priority = priority;
    }

    /**
     * A constructor for reloading jobs from disk. Jobs (not profiles) have
     * their data written to persistent storage in the file system. This method
     * is used to load the job from such storage. This is done by the
     * <code>CrawlJobHandler</code>.
     * <p>
     * Proper structure of a job file (TODO: Maybe one day make this an XML file)
     * Line 1. UID <br>
     * Line 2. Job name (string) <br>
     * Line 3. Job status (string) <br>
     * Line 4. is job read only (true/false) <br>
     * Line 5. is job running (true/false) <br>
     * Line 6. job priority (int) <br>
     * Line 7. number of journal entries <br>
     * Line 8. setting file (with path) <br>
     * Line 9. statistics tracker file (with path) <br>
     * Line 10-?. error message (String, empty for null), can be many lines <br>
     * @param jobFile
     *            a file containing information about the job to load.
     * @param errorHandler The crawl jobs settings error handler.
     *            null means none is set
     * @throws InvalidJobFileException
     *            if the specified file does not refer to a valid job file.
     * @throws IOException
     *            if io operations fail
     */
    protected CrawlJob(final File jobFile, final CrawlJobErrorHandler errorHandler)
            throws InvalidJobFileException, IOException {
        this(null, null, null, errorHandler, PRIORITY_AVERAGE, null, null, false, true);
        this.jobDir = jobFile.getParentFile();

        // Check for corrupt job.state files (can be corrupt if we crash).
        if (jobFile.length() == 0) {
            throw new InvalidJobFileException(jobFile.getCanonicalPath() + " is corrupt (length is zero)");
        }

        // Open file. Read data and set up class variables accordingly...
        BufferedReader jobReader = new BufferedReader(new FileReader(jobFile), 4096);
        // UID
        this.UID = jobReader.readLine();
        // name
        this.name = jobReader.readLine();
        // status
        this.status = jobReader.readLine();
        if (status.equals(STATUS_ABORTED) == false && status.equals(STATUS_CREATED) == false
                && status.equals(STATUS_DELETED) == false && status.equals(STATUS_FINISHED) == false
                && status.equals(STATUS_FINISHED_ABNORMAL) == false
                && status.equals(STATUS_FINISHED_DATA_LIMIT) == false
                && status.equals(STATUS_FINISHED_DOCUMENT_LIMIT) == false
                && status.equals(STATUS_FINISHED_TIME_LIMIT) == false
                && status.equals(STATUS_MISCONFIGURED) == false && status.equals(STATUS_PAUSED) == false
                && status.equals(STATUS_CHECKPOINTING) == false && status.equals(STATUS_PENDING) == false
                && status.equals(STATUS_RUNNING) == false && status.equals(STATUS_WAITING_FOR_PAUSE) == false
                && status.equals(STATUS_PREPARING) == false) {
            // status is invalid. Must be one of the above
            throw new InvalidJobFileException("Status (line 3) in job file " + "is not valid: '" + status + "'");
        }
        // isReadOnly
        String tmp = jobReader.readLine();
        if (tmp.equals("true")) {
            isReadOnly = true;
        } else if (tmp.equals("false")) {
            isReadOnly = false;
        } else {
            throw new InvalidJobFileException("isReadOnly (line 4) in job" + " file '" + jobFile.getAbsolutePath()
                    + "' is not " + "valid: '" + tmp + "'");
        }
        // isRunning
        tmp = jobReader.readLine();
        if (tmp.equals("true")) {
            this.isRunning = true;
        } else if (tmp.equals("false")) {
            this.isRunning = false;
        } else {
            throw new InvalidJobFileException("isRunning (line 5) in job " + "file '" + jobFile.getAbsolutePath()
                    + "' is not valid: " + "'" + tmp + "'");
        }
        // priority
        tmp = jobReader.readLine();
        try {
            this.priority = Integer.parseInt(tmp);
        } catch (NumberFormatException e) {
            throw new InvalidJobFileException("priority (line 5) in job " + "file '" + jobFile.getAbsolutePath()
                    + "' is not valid: " + "'" + tmp + "'");
        }
        // numberOfJournalEntries
        tmp = jobReader.readLine();
        try {
            this.numberOfJournalEntries = Integer.parseInt(tmp);
        } catch (NumberFormatException e) {
            throw new InvalidJobFileException("numberOfJournalEntries " + "(line 5) in job file '"
                    + jobFile.getAbsolutePath() + "' is not valid: " + "'" + tmp + "'");
        }
        // settingsHandler
        tmp = jobReader.readLine();
        try {
            File f = new File(tmp);
            this.settingsHandler = new XMLSettingsHandler((f.isAbsolute()) ? f : new File(jobDir, f.getName()));
            if (this.errorHandler != null) {
                this.settingsHandler.registerValueErrorHandler(errorHandler);
            }
            this.settingsHandler.initialize();
        } catch (InvalidAttributeValueException e1) {
            throw new InvalidJobFileException("Problem reading from settings " + "file (" + tmp
                    + ") specified in job file '" + jobFile.getAbsolutePath() + "'\n" + e1.getMessage());
        }
        // Statistics tracker.
        jobReader.readLine();
        // errorMessage
        // TODO: Multilines
        tmp = jobReader.readLine();
        errorMessage = "";
        while (tmp != null) {
            errorMessage += tmp + '\n';
            tmp = jobReader.readLine();
        }
        if (errorMessage.length() == 0) {
            // Empty error message should be null
            errorMessage = null;
        }
        // TODO: Load stattrack if needed.

        // TODO: This should be inside a finally block.
        jobReader.close();
    }

    /**
     * Cause the job to be written to persistent storage.
     * This will also save the statistics tracker if it is not null and the
     * job status is finished (regardless of how it's finished)
     */
    private void writeJobFile() {
        if (isProfile) {
            return;
        }

        final String jobDirAbsolute = jobDir.getAbsolutePath();
        if (!jobDir.exists() || !jobDir.canWrite()) {
            logger.warning("Can't update status on " + jobDirAbsolute + " because file does not"
                    + " exist (or is unwriteable)");
            return;
        }
        File f = new File(jobDirAbsolute, "state.job");

        String settingsFile = getSettingsDirectory();
        // Make settingsFile's path relative if order.xml is somewhere in the
        // job's directory tree
        if (settingsFile.startsWith(jobDirAbsolute.concat(File.separator))) {
            settingsFile = settingsFile.substring(jobDirAbsolute.length() + 1);
        }
        try {
            FileWriter jobWriter = new FileWriter(f, false);
            try {
                jobWriter.write(UID + "\n");
                jobWriter.write(name + "\n");
                jobWriter.write(status + "\n");
                jobWriter.write(isReadOnly + "\n");
                jobWriter.write(isRunning + "\n");
                jobWriter.write(priority + "\n");
                jobWriter.write(numberOfJournalEntries + "\n");
                jobWriter.write(settingsFile + "\n");
                jobWriter.write(statisticsFileSave + "\n");// TODO: Is this
                                                           // right?
                                                           // Can be multiple lines so we keep it last
                if (errorMessage != null) {
                    jobWriter.write(errorMessage + "\n");
                }
            } finally {
                if (jobWriter != null) {
                    jobWriter.close();
                }
            }
        } catch (IOException e) {
            logger.log(Level.WARNING, "An IOException occured saving job " + name + " (" + UID + ")", e);
        }
    }

    /**
     * Returns this jobs unique ID (UID) that was issued by the
     * CrawlJobHandler() when this job was first created.
     * 
     * @return Job This jobs UID.
     * @see CrawlJobHandler#getNextJobUID()
     */
    public String getUID() {
        return UID;
    }

    /**
     * Returns this job's 'name'. The name comes from the settings for this job,
     * need not be unique and may change. For a unique identifier use
     * {@link #getUID() getUID()}.
     * <p>
     * The name corrisponds to the value of the 'name' tag in the 'meta' section
     * of the settings file.
     *
     * @return This job's 'name'
     */
    public String getJobName() {
        return name;
    }

    /**
     * Return the combination of given name and UID most commonly
     * used in administrative interface.
     *
     * @return Job's name with UID notation
     */
    public String getDisplayName() {
        return getJobName() + " [" + getUID() + "]";
    }

    /**
     * Set this job's level of priority.
     *
     * @param priority The level of priority
     *
     * @see #getJobPriority()
     * @see #PRIORITY_MINIMAL
     * @see #PRIORITY_LOW
     * @see #PRIORITY_AVERAGE
     * @see #PRIORITY_HIGH
     * @see #PRIORITY_CRITICAL
     */
    public void setJobPriority(int priority) {
        this.priority = priority;
    }

    /**
     * Get this job's level of priority.
     *
     * @return this job's priority
     * @see #setJobPriority(int)
     * @see #PRIORITY_MINIMAL
     * @see #PRIORITY_LOW
     * @see #PRIORITY_AVERAGE
     * @see #PRIORITY_HIGH
     * @see #PRIORITY_CRITICAL
     */
    public int getJobPriority() {
        return priority;
    }

    /**
     * Once called no changes can be made to the settings for this job.
     * Typically this is done once a crawl is completed and further changes
     * to the crawl order are therefor meaningless.
     */
    public void setReadOnly() {
        isReadOnly = true;
        writeJobFile(); //Save changes
    }

    /**
     * Is job read only?
     * @return false until setReadOnly has been invoked, after that it returns true.
     */
    public boolean isReadOnly() {
        return isReadOnly;
    }

    /**
     * Set the status of this CrawlJob.
     *
     * @param status Current status of CrawlJob
     *         (see constants defined here beginning with STATUS)
     */
    public void setStatus(String status) {
        this.status = status;
        try {
            JobsManagerService jobsManagerService = (JobsManagerService) ServiceLocator
                    .getBean("jobsManagerService");
            jobsManagerService.updateStatusByJobUID(UID, status);
        } catch (Exception e) {
            e.printStackTrace();
        }
        writeJobFile(); //Save changes
        // TODO: If job finished, save StatisticsTracker!
    }

    /**
     * @return Status of the crawler (Used by JMX).
     */
    public String getCrawlStatus() {
        return this.controller != null ? this.controller.getState().toString() : "Illegal State";
    }

    /**
     * Get the current status of this CrawlJob
     *
     * @return The current status of this CrawlJob
     *         (see constants defined here beginning with STATUS)
     */
    public String getStatus() {
        return this.status;
    }

    /**
     * Returns the settings handler for this job. It will have been initialized.
     * @return the settings handler for this job.
     */
    public XMLSettingsHandler getSettingsHandler() {
        return this.settingsHandler;
    }

    /**
     * Is this a new job?
     * @return True if is new.
     */
    public boolean isNew() {
        return isNew;
    }

    /**
     * Set if the job is considered to be a profile
     * @return True if is a profile.
     */
    public boolean isProfile() {
        return isProfile;
    }

    /**
     * Set if the job is considered a new job or not.
     * @param b Is the job considered to be new.
     */
    public void setNew(boolean b) {
        isNew = b;
        writeJobFile(); //Save changes
    }

    /**
     * Returns true if the job is being crawled.
     * @return true if the job is being crawled
     */
    public boolean isRunning() {
        return isRunning;
    }

    /**
     * Set if job is being crawled.
     * @param b Is job being crawled.
     */
    protected void setRunning(boolean b) {
        isRunning = b;
        writeJobFile(); // Save changes
        //TODO: Job ending -> Save statistics tracker.
        //TODO: This is likely to happen as the CrawlEnding event occurs,
        // need to ensure that the StatisticsTracker is saved to disk on
        // CrawlEnded. Maybe move responsibility for this into the
        // StatisticsTracker?
    }

    protected void unregisterMBean() {
        // Unregister current job from JMX agent, if there one.
        if (this.mbeanServer == null) {
            return;
        }
        try {
            this.mbeanServer.unregisterMBean(this.mbeanName);
            this.mbeanServer = null;
        } catch (Exception e) {
            logger.log(Level.SEVERE, "Failed with " + this.mbeanName, e);
        }
    }

    /**
     * Subclass of crawlcontroller that unregisters beans when stopped.
     * Done as subclass so CrawlController doesn't get any JMX (or 'CrawlJob')
     * pollution, so for sure CrawlJob is unregistered with JMX and so any
     * listeners on the CrawlJob get a chance to get crawl ended message
     * (These latter notifications may not actually be getting through -- TBD).
     * <p>TODO: This override dirtys the data model since CC knows about CJs.
     * The facility provided by this class emitting events and statistics so
     * they can be read by JMX needs to go back into CC.  Probably best to
     * registering in JMX the CC, rather than CJ.  Lets do this in Heritrix 2.0
     * since means changing the JMX API some.
     */
    public class MBeanCrawlController extends CrawlController implements Serializable {
        private static final long serialVersionUID = -4608537998168407222L;
        private CrawlJob cj = null;
        private CompositeType ct = null;

        public CrawlJob getCrawlJob() {
            return this.cj;
        }

        public void setCrawlJob(CrawlJob cj) {
            this.cj = cj;
        }

        public void progressStatisticsEvent(final EventObject e) {
            super.progressStatisticsEvent(e);
            if (this.cj.getMbeanName() == null) {
                // Can be null around job startup.  Return w/o doing anything.
                return;
            }

            Map s = ((StatisticsTracking) e.getSource()).getProgressStatistics();
            // Convert the statistics to OpenType CompositeData and add as
            // user data to Notification.
            CompositeData cd = null;
            try {
                if (this.ct == null) {
                    this.ct = JmxUtils.createCompositeType(s, PROG_STATS,
                            PROG_STATS + " for " + this.cj.getMbeanName());
                }
                cd = new CompositeDataSupport(this.ct, s);
            } catch (OpenDataException ode) {
                ode.printStackTrace();
            }
            if (cd != null) {
                Notification n = new Notification(PROG_STATS, this.cj.getMbeanName(),
                        getNotificationsSequenceNumber(),
                        ((StatisticsTracking) e.getSource()).getProgressStatisticsLine());
                n.setUserData(cd);
                this.cj.sendNotification(n);
            }
        }

        protected void completeStop() {
            try {
                super.completeStop();
            } finally {
                if (this.cj != null) {
                    this.cj.unregisterMBean();
                }
                this.cj = null;
            }
        }
    }

    protected CrawlController setupCrawlController() throws InitializationException {
        CrawlController controller = null;

        // Check if we're to do a checkpoint recover.  If so, deserialize
        // the checkpoint's CrawlController and use that in place of a new
        // CrawlController instance.
        Checkpoint cp = CrawlController.getCheckpointRecover(getSettingsHandler().getOrder());
        if (cp != null) {
            try {
                controller = (MBeanCrawlController) CheckpointUtils.readObjectFromFile(MBeanCrawlController.class,
                        cp.getDirectory());
            } catch (FileNotFoundException e) {
                throw new InitializationException(e);
            } catch (IOException e) {
                throw new InitializationException(e);
            } catch (ClassNotFoundException e) {
                throw new InitializationException(e);
            }
        } else {
            controller = new MBeanCrawlController();
        }
        return controller;
    }

    protected CrawlController createCrawlController() {
        return new MBeanCrawlController();
    }

    public void setupForCrawlStart() throws InitializationException {
        try {
            this.controller = setupCrawlController();
            // Register as listener to get job finished notice.
            this.controller.addCrawlStatusListener(this);
            this.controller.initialize(getSettingsHandler());
            // Set the crawl job this MBeanCrawlController needs to worry about.
            ((MBeanCrawlController) this.controller).setCrawlJob(this);
            // Create our mbean description and register our crawljob.
            this.openMBeanInfo = buildMBeanInfo();
            try {
                Heritrix.registerMBean(this, getJmxJobName(), CRAWLJOB_JMXMBEAN_TYPE);
            } catch (InstanceAlreadyExistsException e) {
                throw new InitializationException(e);
            } catch (MBeanRegistrationException e) {
                throw new InitializationException(e);
            } catch (NotCompliantMBeanException e) {
                throw new InitializationException(e);
            }
        } catch (InitializationException e) {
            // Can't load current job since it is misconfigured.
            setStatus(CrawlJob.STATUS_MISCONFIGURED);
            setErrorMessage("A fatal InitializationException occured when " + "loading job:\n" + e.getMessage());
            // Log to stdout so its seen in logs as well as in UI.
            e.printStackTrace();
            this.controller = null;
            throw e;
        }
        setStatus(CrawlJob.STATUS_RUNNING);
        setRunning(true);
    }

    public void stopCrawling() {
        if (this.controller != null) {
            this.controller.requestCrawlStop();
        }
    }

    /**
     * @return One-line Frontier report.
     */
    public String getFrontierOneLine() {
        if (this.controller == null || this.controller.getFrontier() == null) {
            return "Crawler not running";
        }
        return this.controller.getFrontier().singleLineReport();
    }

    /**
     * @param reportName Name of report to write.
     * @return A report of the frontier's status.
     */
    public String getFrontierReport(final String reportName) {
        if (this.controller == null || this.controller.getFrontier() == null) {
            return "Crawler not running";
        }
        return ArchiveUtils.writeReportToString(this.controller.getFrontier(), reportName);
    }

    /**
     * Write the requested frontier report to the given PrintWriter
     * @param reportName Name of report to write.
     * @param writer Where to write to.
     */
    public void writeFrontierReport(String reportName, PrintWriter writer) {
        if (this.controller == null || this.controller.getFrontier() == null) {
            writer.println("Crawler not running.");
            return;
        }
        this.controller.getFrontier().reportTo(reportName, writer);
    }

    /**
     * @return One-line threads report.
     */
    public String getThreadOneLine() {
        if (this.controller == null) {
            return "Crawler not running";
        }
        return this.controller.oneLineReportThreads();
    }

    /**
     * Get the CrawlControllers ToeThreads report for the running crawl.
     * @return The CrawlControllers ToeThreads report
     */
    public String getThreadsReport() {
        if (this.controller == null) {
            return "Crawler not running";
        }
        return ArchiveUtils.writeReportToString(this.controller.getToePool(), null);
    }

    /**
     * Write the requested threads report to the given PrintWriter
     * @param reportName Name of report to write.
     * @param writer Where to write to.
     */
    public void writeThreadsReport(String reportName, PrintWriter writer) {
        if (this.controller == null || this.controller.getFrontier() == null) {
            writer.println("Crawler not running.");
            return;
        }
        this.controller.getToePool().reportTo(reportName, writer);
    }

    /**
     * Kills a thread. For details see
     * {@link com.cyberway.issue.crawler.framework.ToePool#killThread(int, boolean)
     * ToePool.killThread(int, boolean)}.
     * @param threadNumber Thread to kill.
     * @param replace Should thread be replaced.
     * @see com.cyberway.issue.crawler.framework.ToePool#killThread(int, boolean)
     */
    public void killThread(int threadNumber, boolean replace) {
        if (this.controller == null) {
            return;
        }
        this.controller.killThread(threadNumber, replace);
    }

    /**
     * Get the Processors report for the running crawl.
     * @return The Processors report for the running crawl.
     */
    public String getProcessorsReport() {
        if (this.controller == null) {
            return "Crawler not running";
        }
        return ArchiveUtils.writeReportToString(this.controller, CrawlController.PROCESSORS_REPORT);
    }

    /**
     * Returns the directory where the configuration files for this job are
     * located.
     *
     * @return the directory where the configuration files for this job are
     *         located
     */
    public String getSettingsDirectory() {
        return settingsHandler.getOrderFile().getPath();
    }

    /**
     * Returns the path of the job's base directory. For profiles this is always
     * equal to <code>new File(getSettingsDirectory())</code>.
     * @return the path of the job's base directory.
     */
    public File getDirectory() {
        return isProfile ? new File(getSettingsDirectory()) : jobDir;
    }

    /**
     * Get the error message associated with this job. Will return null if there
     * is no error message.
     * @return the error message associated with this job
     */
    public String getErrorMessage() {
        return errorMessage;
    }

    /**
     * Set an error message for this job. Generally this only occurs if the job
     * is misconfigured.
     * @param string the error message associated with this job
     */
    public void setErrorMessage(String string) {
        errorMessage = string;
        writeJobFile(); //Save changes
    }

    /**
     * @return Returns the number of journal entries.
     */
    public int getNumberOfJournalEntries() {
        return numberOfJournalEntries;
    }

    /**
     * @param numberOfJournalEntries The number of journal entries to set.
     */
    public void setNumberOfJournalEntries(int numberOfJournalEntries) {
        this.numberOfJournalEntries = numberOfJournalEntries;
        writeJobFile();
    }

    /**
     * @return Returns the error handler for this crawl job
     */
    public CrawlJobErrorHandler getErrorHandler() {
        return errorHandler;
    }

    /**
     * Read all the checkpoints found in the job's checkpoints
     * directory into Checkpoint instances
     * @return Collection containing list of all checkpoints.
     */
    public Collection scanCheckpoints() {
        File checkpointsDirectory = settingsHandler.getOrder().getCheckpointsDirectory();
        File[] perCheckpointDirs = checkpointsDirectory.listFiles();
        Collection<Checkpoint> checkpoints = new ArrayList<Checkpoint>();
        if (perCheckpointDirs != null) {
            for (int i = 0; i < perCheckpointDirs.length; i++) {
                Checkpoint cp = new Checkpoint(perCheckpointDirs[i]);
                checkpoints.add(cp);
            }
        }
        return checkpoints;
    }

    /**
     * Returns the absolute path of the specified log.
     * Note: If crawl has not begun, this file may not exist.
     * @param log
     * @return the absolute path for the specified log.
     * @throws AttributeNotFoundException
     * @throws ReflectionException
     * @throws MBeanException
     */
    public String getLogPath(String log) throws AttributeNotFoundException, MBeanException, ReflectionException {
        String logsPath = (String) settingsHandler.getOrder().getAttribute(CrawlOrder.ATTR_LOGS_PATH);
        CrawlOrder order = settingsHandler.getOrder();
        String diskPath = (String) order.getAttribute(null, CrawlOrder.ATTR_DISK_PATH);
        File disk = settingsHandler.getPathRelativeToWorkingDirectory(diskPath);
        File f = new File(logsPath, log);
        if (!f.isAbsolute()) {
            f = new File(disk.getPath(), f.getPath());
        }
        return f.getAbsolutePath();
    }

    // OpenMBean implementation.

    protected void pause() {
        if (this.controller != null && this.controller.isPaused() == false) {
            this.controller.requestCrawlPause();
        }
    }

    protected void resume() {
        if (this.controller != null) {
            this.controller.requestCrawlResume();
        }
    }

    /**
     * @throws IllegalStateException Thrown if crawl is not paused.
     */
    protected void checkpoint() throws IllegalStateException {
        if (this.controller != null) {
            this.controller.requestCrawlCheckpoint();
        }
    }

    /**
     * @return True if checkpointing.
     */
    public boolean isCheckpointing() {
        return this.controller != null ? this.controller.isCheckpointing() : false;
    }

    /**
     * If its a HostQueuesFrontier, needs to be flushed for the queued.
     */
    protected void flush() {
        // Nothing to do.
    }

    /**
     * Delete any URI from the frontier of the current (paused) job that match
     * the specified regular expression. If the current job is not paused (or
     * there is no current job) nothing will be done.
     * @param regexpr Regular expression to delete URIs by.
     * @return the number of URIs deleted
     */
    public long deleteURIsFromPending(String regexpr) {
        return deleteURIsFromPending(regexpr, null);
    }

    /**
     * Delete any URI from the frontier of the current (paused) job that match
     * the specified regular expression. If the current job is not paused (or
     * there is no current job) nothing will be done.
     * @param regexpr Regular expression to delete URIs by.
     * @return the number of URIs deleted
     */
    public long deleteURIsFromPending(String uriPattern, String queuePattern) {
        return (this.controller != null && this.controller.getFrontier() != null && this.controller.isPaused())
                ? this.controller.getFrontier().deleteURIs(uriPattern, queuePattern)
                : 0;
    }

    public String importUris(String file, String style, String force) {
        return importUris(file, style, "true".equals(force));
    }

    public String importUris(final String fileOrUrl, final String style, final boolean forceRevisit) {
        return importUris(fileOrUrl, style, forceRevisit, false);
    }

    /**
     * @param fileOrUrl Name of file w/ seeds.
     * @param style What style of seeds -- crawl log, recovery journal, or
     * seeds file.
     * @param forceRevisit Should we revisit even if seen before?
     * @param areSeeds Is the file exclusively seeds?
     * @return A display string that has a count of all added.
     */
    public String importUris(final String fileOrUrl, final String style, final boolean forceRevisit,
            final boolean areSeeds) {
        InputStream is = IoUtils.getInputStream(this.controller.getDisk(), fileOrUrl);
        String message = null;
        // Do we have an inputstream?
        if (is == null) {
            message = "Failed to get inputstream from " + fileOrUrl;
            logger.severe(message);
        } else {
            int addedCount = importUris(is, style, forceRevisit, areSeeds);
            message = Integer.toString(addedCount) + " URIs added from " + fileOrUrl;
        }
        return message;
    }

    protected int importUris(InputStream is, String style, boolean forceRevisit) {
        return importUris(is, style, forceRevisit, false);
    }

    /**
     * Import URIs.
     * @param is Stream to use as URI source.
     * @param style Style in which URIs are rendored.  Currently support for
     * <code>recoveryJournal</code>, <code>crawlLog</code>, and seeds file
     * format (i.e <code>default</code>) where <code>default</code> style is
     * a UURI per line (comments allowed).
     * @param forceRevisit Whether we should revisit this URI even if we've
     * visited it previously.
     * @param areSeeds Are the imported URIs seeds?
     * @return Count of added URIs.
     */
    protected int importUris(InputStream is, String style, boolean forceRevisit, final boolean areSeeds) {
        // Figure the regex to use parsing each line of input stream.
        String extractor;
        String output;
        if (CRAWL_LOG_STYLE.equals(style)) {
            // Skip first 3 fields
            extractor = "\\S+\\s+\\S+\\s+\\S+\\s+(\\S+\\s+\\S+\\s+\\S+\\s+).*";
            output = "$1";
        } else if (RECOVERY_JOURNAL_STYLE.equals(style)) {
            // Skip the begin-of-line directive
            extractor = "\\S+\\s+((\\S+)(?:\\s+\\S+\\s+\\S+)?)\\s*";
            output = "$1";
        } else {
            extractor = RegexpLineIterator.NONWHITESPACE_ENTRY_TRAILING_COMMENT;
            output = RegexpLineIterator.ENTRY;
        }

        controller.installThreadContextSettingsHandler();

        // Read the input stream.
        BufferedReader br = null;
        int addedCount = 0;
        try {
            br = new BufferedReader(new InputStreamReader(is));
            Iterator iter = new RegexpLineIterator(new LineReadingIterator(br), RegexpLineIterator.COMMENT_LINE,
                    extractor, output);
            while (iter.hasNext()) {
                try {
                    importUri((String) iter.next(), forceRevisit, areSeeds, false);
                    addedCount++;
                } catch (URIException e) {
                    e.printStackTrace();
                }
            }
            br.close();
            flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return addedCount;
    }

    /**
     * Schedule a uri.
     * @param uri Uri to schedule.
     * @param forceFetch Should it be forcefetched.
     * @param isSeed True if seed.
     * @throws URIException
     */
    public void importUri(final String uri, final boolean forceFetch, final boolean isSeed) throws URIException {
        importUri(uri, forceFetch, isSeed, true);
    }

    /**
     * Schedule a uri.
     * @param str String that can be: 1. a UURI, 2. a snippet of the
     * crawl.log line, or 3. a snippet from recover log.  See
     * {@link #importUris(InputStream, String, boolean)} for how it subparses
     * the lines from crawl.log and recover.log.
     * @param forceFetch Should it be forcefetched.
     * @param isSeed True if seed.
     * @param isFlush If true, flush the frontier IF it implements
     * flushing.
     * @throws URIException
     */
    public void importUri(final String str, final boolean forceFetch, final boolean isSeed, final boolean isFlush)
            throws URIException {
        CandidateURI caUri = CandidateURI.fromString(str);
        caUri.setForceFetch(forceFetch);
        if (isSeed) {
            caUri.setIsSeed(isSeed);
            if (caUri.getVia() == null || caUri.getVia().length() <= 0) {
                // Danger of double-add of seeds because of this code here.
                // Only call addSeed if no via.  If a via, the schedule will
                // take care of updating scope.
                this.controller.getScope().addSeed(caUri);
            }
        }
        this.controller.getFrontier().schedule(caUri);
        if (isFlush) {
            flush();
        }
    }

    /**
     * @return Our mbean info (Needed for CrawlJob to qualify as a
     * DynamicMBean).
     */
    public MBeanInfo getMBeanInfo() {
        return this.openMBeanInfo;
    }

    /**
     * Build up the MBean info for Heritrix main.
     * @return Return created mbean info instance.
     * @throws InitializationException 
     */
    protected OpenMBeanInfoSupport buildMBeanInfo() throws InitializationException {
        // Start adding my attributes.
        List<OpenMBeanAttributeInfo> attributes = new ArrayList<OpenMBeanAttributeInfo>();

        // Attributes.
        attributes.add(new OpenMBeanAttributeInfoSupport(NAME_ATTR, "Crawl job name", SimpleType.STRING, true,
                false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(STATUS_ATTR, "Short basic status message",
                SimpleType.STRING, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(FRONTIER_SHORT_REPORT_ATTR, "Short frontier report",
                SimpleType.STRING, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(THREADS_SHORT_REPORT_ATTR, "Short threads report",
                SimpleType.STRING, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(UID_ATTR, "Crawl job UID", SimpleType.STRING, true, false,
                false));
        attributes.add(new OpenMBeanAttributeInfoSupport(TOTAL_DATA_ATTR, "Total data received", SimpleType.LONG,
                true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(CRAWL_TIME_ATTR, "Crawl time", SimpleType.LONG, true,
                false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(CURRENT_DOC_RATE_ATTR, "Current crawling rate (Docs/sec)",
                SimpleType.DOUBLE, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(CURRENT_KB_RATE_ATTR, "Current crawling rate (Kb/sec)",
                SimpleType.LONG, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(THREAD_COUNT_ATTR, "Active thread count",
                SimpleType.INTEGER, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(DOC_RATE_ATTR, "Crawling rate (Docs/sec)",
                SimpleType.DOUBLE, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(KB_RATE_ATTR, "Current crawling rate (Kb/sec)",
                SimpleType.LONG, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(DOWNLOAD_COUNT_ATTR, "Count of downloaded documents",
                SimpleType.LONG, true, false, false));
        attributes.add(new OpenMBeanAttributeInfoSupport(DISCOVERED_COUNT_ATTR, "Count of discovered documents",
                SimpleType.LONG, true, false, false));

        // Add in the crawl order attributes.
        addCrawlOrderAttributes(this.getController().getOrder(), attributes);

        // Add the bdbje attributes.  Convert to open mbean attributes.
        // First do bdbeje setup.  Then add a subset of the bdbje attributes.
        // Keep around the list of names as a convenience for when it comes
        // time to test if attribute is supported.
        Environment env = this.controller.getBdbEnvironment();
        try {
            this.bdbjeMBeanHelper = new JEMBeanHelper(env.getConfig(), env.getHome(), true);
        } catch (DatabaseException e) {
            e.printStackTrace();
            InitializationException ie = new InitializationException(e.getMessage());
            ie.setStackTrace(e.getStackTrace());
            throw ie;
        }
        this.bdbjeAttributeNameList = Arrays.asList(new String[] { JEMBeanHelper.ATT_ENV_HOME,
                JEMBeanHelper.ATT_OPEN, JEMBeanHelper.ATT_IS_READ_ONLY, JEMBeanHelper.ATT_IS_TRANSACTIONAL,
                JEMBeanHelper.ATT_CACHE_SIZE, JEMBeanHelper.ATT_CACHE_PERCENT, JEMBeanHelper.ATT_LOCK_TIMEOUT,
                JEMBeanHelper.ATT_IS_SERIALIZABLE, JEMBeanHelper.ATT_SET_READ_ONLY, });
        addBdbjeAttributes(attributes, this.bdbjeMBeanHelper.getAttributeList(env), this.bdbjeAttributeNameList);

        // Operations.
        List<OpenMBeanOperationInfo> operations = new ArrayList<OpenMBeanOperationInfo>();
        OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[3];
        args[0] = new OpenMBeanParameterInfoSupport("url", "URL to add to the frontier", SimpleType.STRING);
        args[1] = new OpenMBeanParameterInfoSupport("forceFetch", "True if URL is to be force fetched",
                SimpleType.BOOLEAN);
        args[2] = new OpenMBeanParameterInfoSupport("seed", "True if URL is a seed", SimpleType.BOOLEAN);
        operations.add(new OpenMBeanOperationInfoSupport(IMPORT_URI_OPER, "Add passed URL to the frontier", args,
                SimpleType.VOID, MBeanOperationInfo.ACTION));

        args = new OpenMBeanParameterInfoSupport[4];
        args[0] = new OpenMBeanParameterInfoSupport("pathOrUrl", "Path or URL to file of URLs", SimpleType.STRING);
        args[1] = new OpenMBeanParameterInfoSupport("style", "Format format:default|crawlLog|recoveryJournal",
                SimpleType.STRING);
        args[2] = new OpenMBeanParameterInfoSupport("forceFetch", "True if URLs are to be force fetched",
                SimpleType.BOOLEAN);
        args[3] = new OpenMBeanParameterInfoSupport("seed", "True if all content are seeds.", SimpleType.BOOLEAN);
        operations.add(new OpenMBeanOperationInfoSupport(IMPORT_URIS_OPER,
                "Add file of passed URLs to the frontier", args, SimpleType.STRING, MBeanOperationInfo.ACTION));

        args = new OpenMBeanParameterInfoSupport[4];
        args[0] = new OpenMBeanParameterInfoSupport("filename", "File to print to", SimpleType.STRING);
        args[1] = new OpenMBeanParameterInfoSupport("regexp", "Regular expression URLs must match",
                SimpleType.STRING);
        args[2] = new OpenMBeanParameterInfoSupport("numberOfMatches", "Maximum number of matches to return",
                SimpleType.INTEGER);
        args[3] = new OpenMBeanParameterInfoSupport("verbose", "Should they be verbose descriptions",
                SimpleType.BOOLEAN);
        operations.add(new OpenMBeanOperationInfoSupport(DUMP_URIS_OPER,
                "Dump pending URIs from frontier to a file", args, SimpleType.VOID, MBeanOperationInfo.ACTION));

        operations.add(new OpenMBeanOperationInfoSupport(PAUSE_OPER, "Pause crawling (noop if already paused)",
                null, SimpleType.VOID, MBeanOperationInfo.ACTION));

        operations.add(new OpenMBeanOperationInfoSupport(RESUME_OPER, "Resume crawling (noop if already resumed)",
                null, SimpleType.VOID, MBeanOperationInfo.ACTION));

        args = new OpenMBeanParameterInfoSupport[1];
        args[0] = new OpenMBeanParameterInfoSupport("name", "Name of report ('all', 'standard', etc.).",
                SimpleType.STRING);
        operations.add(new OpenMBeanOperationInfoSupport(FRONTIER_REPORT_OPER, "Full frontier report", args,
                SimpleType.STRING, MBeanOperationInfo.INFO));

        operations.add(new OpenMBeanOperationInfoSupport(THREADS_REPORT_OPER, "Full thread report", null,
                SimpleType.STRING, MBeanOperationInfo.INFO));

        operations.add(new OpenMBeanOperationInfoSupport(SEEDS_REPORT_OPER, "Seeds report", null, SimpleType.STRING,
                MBeanOperationInfo.INFO));

        operations.add(new OpenMBeanOperationInfoSupport(PROGRESS_STATISTICS_OPER,
                "Progress statistics at time of invocation", null, SimpleType.STRING, MBeanOperationInfo.INFO));

        operations.add(new OpenMBeanOperationInfoSupport(PROGRESS_STATISTICS_LEGEND_OPER,
                "Progress statistics legend", null, SimpleType.STRING, MBeanOperationInfo.INFO));

        operations.add(new OpenMBeanOperationInfoSupport(CHECKPOINT_OPER, "Start a checkpoint", null,
                SimpleType.VOID, MBeanOperationInfo.ACTION));

        // Add bdbje operations. Add subset only. Keep around the list so have
        // it to hand when figuring what operations are supported. Usual actual
        // Strings because not accessible from JEMBeanHelper.
        this.bdbjeOperationsNameList = Arrays.asList(new String[] { "cleanLog", "evictMemory", "checkpoint", "sync",
                "getEnvironmentStatsToString", "getLockStatsToString", "getDatabaseNames", OP_DB_STAT });
        addBdbjeOperations(operations, this.bdbjeMBeanHelper.getOperationList(env), this.bdbjeOperationsNameList);

        // Register notifications
        List<MBeanNotificationInfo> notifications = new ArrayList<MBeanNotificationInfo>();
        notifications.add(new MBeanNotificationInfo(
                new String[] { "crawlStarted", "crawlEnding", "crawlPaused", "crawlResuming", PROG_STATS },
                this.getClass().getName() + ".notifications",
                "CrawlStatusListener events and progress statistics as " + "notifications"));
        MBeanNotificationInfo[] notificationsArray = new MBeanNotificationInfo[notifications.size()];
        notifications.toArray(notificationsArray);

        // Build the info object.
        OpenMBeanAttributeInfoSupport[] attributesArray = new OpenMBeanAttributeInfoSupport[attributes.size()];
        attributes.toArray(attributesArray);
        OpenMBeanOperationInfoSupport[] operationsArray = new OpenMBeanOperationInfoSupport[operations.size()];
        operations.toArray(operationsArray);
        return new OpenMBeanInfoSupport(this.getClass().getName(), "Current Crawl Job as OpenMBean",
                attributesArray, new OpenMBeanConstructorInfoSupport[] {}, operationsArray, notificationsArray);
    }

    protected void addBdbjeAttributes(final List<OpenMBeanAttributeInfo> attributes,
            final List<MBeanAttributeInfo> bdbjeAttributes, final List<String> bdbjeNamesToAdd) {
        for (MBeanAttributeInfo info : bdbjeAttributes) {
            if (bdbjeNamesToAdd.contains(info.getName())) {
                attributes.add(JmxUtils.convertToOpenMBeanAttribute(info));
            }
        }
    }

    protected void addBdbjeOperations(final List<OpenMBeanOperationInfo> operations,
            final List<MBeanOperationInfo> bdbjeOperations, final List<String> bdbjeNamesToAdd) {
        for (MBeanOperationInfo info : bdbjeOperations) {
            if (bdbjeNamesToAdd.contains(info.getName())) {
                OpenMBeanOperationInfo omboi = null;
                if (info.getName().equals(OP_DB_STAT)) {
                    // Db stats needs special handling. The published
                    // signature is wrong and its return type is awkward.
                    // Handle it.
                    omboi = JmxUtils.convertToOpenMBeanOperation(info, null, SimpleType.STRING);
                    MBeanParameterInfo[] params = omboi.getSignature();
                    OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[params.length + 1];
                    for (int ii = 0; ii < params.length; ii++) {
                        args[ii] = (OpenMBeanParameterInfo) params[ii];
                    }
                    args[params.length] = new OpenMBeanParameterInfoSupport("name", "Database name",
                            SimpleType.STRING);
                    omboi = new OpenMBeanOperationInfoSupport(omboi.getName(), omboi.getDescription(), args,
                            omboi.getReturnOpenType(), omboi.getImpact());
                } else {
                    omboi = JmxUtils.convertToOpenMBeanOperation(info);
                }
                operations.add(omboi);
            }
        }
    }

    protected void addCrawlOrderAttributes(final ComplexType type, final List<OpenMBeanAttributeInfo> attributes) {
        for (final Iterator i = type.getAttributeInfoIterator(null); i.hasNext();) {
            ModuleAttributeInfo info = (ModuleAttributeInfo) i.next();
            if (ORDER_EXCLUDE.contains(info.getName())) {
                // Skip.
                continue;
            }
            String absoluteName = type.getAbsoluteName() + "/" + info.getName();
            if (JmxUtils.isOpenType(info.getType())) {
                String description = info.getDescription();
                if (description == null || description.length() <= 0) {
                    // Description can't be empty.
                    description = info.getName();
                }
                attributes.add(new OpenMBeanAttributeInfoSupport(absoluteName, description,
                        JmxUtils.getOpenType(info.getType()), true, true, false));
            } else if (info.isComplexType()) {
                try {
                    ComplexType c = (ComplexType) type.getAttribute(info.getName());
                    addCrawlOrderAttributes(c, attributes);
                } catch (AttributeNotFoundException e) {
                    logger.log(Level.SEVERE, "Failed get of attribute", e);
                } catch (MBeanException e) {
                    logger.log(Level.SEVERE, "Failed get of attribute", e);
                } catch (ReflectionException e) {
                    logger.log(Level.SEVERE, "Failed get of attribute", e);
                }
            } else if (info.getType().equals(TextField.class.getName())) {
                // Special handling for TextField.  Use the STRING OpenType.
                attributes.add(new OpenMBeanAttributeInfoSupport(absoluteName, info.getDescription(),
                        SimpleType.STRING, true, true, false));
            } else {
                // Looks like only type we don't currently handle is StringList.
                // Figure how to do it.  Add as AttributeList?
                logger.fine(info.getType());
            }
        }
    }

    public Object getAttribute(String attribute_name) throws AttributeNotFoundException {
        if (attribute_name == null) {
            throw new RuntimeOperationsException(new IllegalArgumentException("Attribute name cannot be null"),
                    "Cannot call getAttribute with null attribute name");
        }

        // If no controller, we can't do any work in here.
        if (this.controller == null) {
            throw new RuntimeOperationsException(new NullPointerException("Controller is null"),
                    "Controller is null");
        }

        // Is it a bdbje attribute?
        if (this.bdbjeAttributeNameList.contains(attribute_name)) {
            try {
                return this.bdbjeMBeanHelper.getAttribute(this.controller.getBdbEnvironment(), attribute_name);
            } catch (MBeanException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
        }

        // Is it a crawl-order attribute?
        if (attribute_name.startsWith(this.controller.getOrder().getAbsoluteName())) {
            return getCrawlOrderAttribute(attribute_name);
        }

        if (!ATTRIBUTE_LIST.contains(attribute_name)) {
            throw new AttributeNotFoundException("Attribute " + attribute_name + " is unimplemented.");
        }

        // The pattern in the below is to match an attribute and when found
        // do a return out of if clause.  Doing it this way, I can fall
        // on to the AttributeNotFoundException for case where we've an
        // attribute but no handler.
        if (attribute_name.equals(STATUS_ATTR)) {
            return getCrawlStatus();
        }
        if (attribute_name.equals(NAME_ATTR)) {
            return getJobName();
        }
        if (attribute_name.equals(UID_ATTR)) {
            return getUID();
        }
        if (attribute_name.equals(TOTAL_DATA_ATTR)) {
            return new Long(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().totalBytesWritten());
        }
        if (attribute_name.equals(CRAWL_TIME_ATTR)) {
            return new Long(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().getCrawlerTotalElapsedTime() / 1000);
        }
        if (attribute_name.equals(CURRENT_DOC_RATE_ATTR)) {
            return new Double(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().currentProcessedDocsPerSec());
        }
        if (attribute_name.equals(DOC_RATE_ATTR)) {
            return new Double(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().processedDocsPerSec());
        }
        if (attribute_name.equals(KB_RATE_ATTR)) {
            return new Long(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().currentProcessedKBPerSec());
        }
        if (attribute_name.equals(CURRENT_KB_RATE_ATTR)) {
            return new Long(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().processedKBPerSec());
        }
        if (attribute_name.equals(THREAD_COUNT_ATTR)) {
            return new Integer(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().activeThreadCount());
        }
        if (attribute_name.equals(FRONTIER_SHORT_REPORT_ATTR)) {
            return getFrontierOneLine();
        }
        if (attribute_name.equals(THREADS_SHORT_REPORT_ATTR)) {
            return getThreadOneLine();
        }
        if (attribute_name.equals(DISCOVERED_COUNT_ATTR)) {
            return new Long(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().totalCount());
        }
        if (attribute_name.equals(DOWNLOAD_COUNT_ATTR)) {
            return new Long(this.controller == null && this.controller.getStatistics() != null ? 0
                    : this.controller.getStatistics().successfullyFetchedCount());
        }

        throw new AttributeNotFoundException("Attribute " + attribute_name + " not found.");
    }

    protected Object getCrawlOrderAttribute(final String attribute_name) {
        CrawlOrder order = this.getController().getOrder();
        Object result = null;
        try {
            result = getCrawlOrderAttribute(attribute_name.substring(order.getAbsoluteName().length()), order);
        } catch (NullPointerException e) {
            logger.log(Level.SEVERE, "Failed get of " + attribute_name, e);
        } catch (AttributeNotFoundException e) {
            logger.log(Level.SEVERE, "Failed get of " + attribute_name, e);
        } catch (MBeanException e) {
            logger.log(Level.SEVERE, "Failed get of " + attribute_name, e);
        } catch (ReflectionException e) {
            logger.log(Level.SEVERE, "Failed get of " + attribute_name, e);
        }
        return result;
    }

    protected Object getCrawlOrderAttribute(final String attribute_name, final ComplexType ct)
            throws AttributeNotFoundException, MBeanException, ReflectionException {
        String subName = attribute_name.startsWith("/") ? attribute_name.substring(1) : attribute_name;
        int index = subName.indexOf("/");
        if (index <= 0) {
            MBeanAttributeInfo info = ct.getAttributeInfo(subName);
            // Special handling for TextField.
            return info.getType().equals(TextField.class.getName()) ? ct.getAttribute(subName).toString()
                    : ct.getAttribute(subName);
        }
        return getCrawlOrderAttribute(subName.substring(index + 1),
                (ComplexType) ct.getAttribute(subName.substring(0, index)));
    }

    public AttributeList getAttributes(String[] attributeNames) {
        if (attributeNames == null) {
            throw new RuntimeOperationsException(
                    new IllegalArgumentException("attributeNames[] cannot be " + "null"),
                    "Cannot call getAttributes with null attribute " + "names");
        }

        // If no controller, we can't do any work in here.
        if (this.controller == null) {
            throw new RuntimeOperationsException(new NullPointerException("Controller is null"),
                    "Controller is null");
        }

        AttributeList resultList = new AttributeList();
        if (attributeNames.length == 0) {
            return resultList;
        }
        for (int i = 0; i < attributeNames.length; i++) {
            try {
                Object value = getAttribute(attributeNames[i]);
                resultList.add(new Attribute(attributeNames[i], value));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return (resultList);
    }

    public void setAttribute(Attribute attribute) throws AttributeNotFoundException {
        // Is it a crawl order attribute?
        CrawlOrder order = this.getController().getOrder();
        String attName = attribute.getName();
        if (attName.startsWith(order.getAbsoluteName())) {
            try {
                setCrawlOrderAttribute(attribute.getName().substring(order.getAbsoluteName().length()), order,
                        attribute);
            } catch (NullPointerException e) {
                logger.log(Level.SEVERE, "Failed set of " + attName, e);
            } catch (AttributeNotFoundException e) {
                logger.log(Level.SEVERE, "Failed set of " + attName, e);
            } catch (MBeanException e) {
                logger.log(Level.SEVERE, "Failed set of " + attName, e);
            } catch (ReflectionException e) {
                logger.log(Level.SEVERE, "Failed set of " + attName, e);
            } catch (InvalidAttributeValueException e) {
                logger.log(Level.SEVERE, "Failed set of " + attName, e);
            }
            return;
        }

        // Is it a bdbje attribute?
        if (this.bdbjeAttributeNameList.contains(attName)) {
            try {
                this.bdbjeMBeanHelper.setAttribute(this.controller.getBdbEnvironment(), attribute);
            } catch (AttributeNotFoundException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            } catch (InvalidAttributeValueException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
            return;
        }

        // Else, we don't know how to handle this attribute.
        throw new AttributeNotFoundException("Attribute " + attName + " can not be set.");
    }

    protected void setCrawlOrderAttribute(final String attribute_name, final ComplexType ct,
            final Attribute attribute)
            throws AttributeNotFoundException, InvalidAttributeValueException, MBeanException, ReflectionException {
        String subName = attribute_name.startsWith("/") ? attribute_name.substring(1) : attribute_name;
        int index = subName.indexOf("/");
        if (index <= 0) {
            ct.setAttribute(new Attribute(subName, attribute.getValue()));
            return;
        }
        setCrawlOrderAttribute(subName.substring(index + 1),
                (ComplexType) ct.getAttribute(subName.substring(0, index)), attribute);
    }

    public AttributeList setAttributes(AttributeList attributes) {
        if (attributes == null) {
            throw new RuntimeOperationsException(
                    new IllegalArgumentException("attributeNames[] cannot be " + "null"),
                    "Cannot call getAttributes with null attribute " + "names");
        }

        AttributeList resultList = new AttributeList();
        if (attributes.size() == 0) {
            return resultList;
        }
        for (int i = 0; i < attributes.size(); i++) {
            try {
                Attribute attr = (Attribute) attributes.get(i);
                setAttribute(attr);
                String an = attr.getName();
                Object newValue = getAttribute(an);
                resultList.add(new Attribute(an, newValue));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return resultList;
    }

    public Object invoke(String operationName, Object[] params, String[] signature) throws ReflectionException {
        if (operationName == null) {
            throw new RuntimeOperationsException(new IllegalArgumentException("Operation name cannot be null"),
                    "Cannot call invoke with null operation name");
        }

        controller.installThreadContextSettingsHandler();

        if (this.bdbjeOperationsNameList.contains(operationName)) {
            try {
                Object o = this.bdbjeMBeanHelper.invoke(this.controller.getBdbEnvironment(), operationName, params,
                        signature);
                // If OP_DB_ST, return String version of result.
                if (operationName.equals(OP_DB_STAT)) {
                    return o.toString();
                }
                return o;
            } catch (MBeanException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
        }

        // TODO: Exploit passed signature.

        // The pattern in the below is to match an operation and when found
        // do a return out of if clause.  Doing it this way, I can fall
        // on to the MethodNotFoundException for case where we've an
        // attribute but no handler.
        if (operationName.equals(IMPORT_URI_OPER)) {
            JmxUtils.checkParamsCount(IMPORT_URI_OPER, params, 3);
            mustBeCrawling();
            try {
                importUri((String) params[0], ((Boolean) params[1]).booleanValue(),
                        ((Boolean) params[2]).booleanValue());
            } catch (URIException e) {
                throw new RuntimeOperationsException(new RuntimeException(e));
            }
            return null;
        }

        if (operationName.equals(IMPORT_URIS_OPER)) {
            JmxUtils.checkParamsCount(IMPORT_URIS_OPER, params, 4);
            mustBeCrawling();
            return importUris((String) params[0], ((String) params[1]).toString(),
                    ((Boolean) params[2]).booleanValue(), ((Boolean) params[3]).booleanValue());
        }

        if (operationName.equals(DUMP_URIS_OPER)) {
            JmxUtils.checkParamsCount(DUMP_URIS_OPER, params, 4);
            mustBeCrawling();
            if (!this.controller.isPaused()) {
                throw new RuntimeOperationsException(new IllegalArgumentException("Must " + "be paused"),
                        "Cannot dump URI's from running job.");
            }
            dumpUris((String) params[0], (String) params[1], ((Integer) params[2]).intValue(),
                    ((Boolean) params[3]).booleanValue());
        }

        if (operationName.equals(PAUSE_OPER)) {
            JmxUtils.checkParamsCount(PAUSE_OPER, params, 0);
            mustBeCrawling();
            pause();
            return null;
        }

        if (operationName.equals(RESUME_OPER)) {
            JmxUtils.checkParamsCount(RESUME_OPER, params, 0);
            mustBeCrawling();
            resume();
            return null;
        }

        if (operationName.equals(FRONTIER_REPORT_OPER)) {
            JmxUtils.checkParamsCount(FRONTIER_REPORT_OPER, params, 1);
            mustBeCrawling();
            return getFrontierReport((String) params[0]);
        }

        if (operationName.equals(THREADS_REPORT_OPER)) {
            JmxUtils.checkParamsCount(THREADS_REPORT_OPER, params, 0);
            mustBeCrawling();
            return getThreadsReport();
        }

        if (operationName.equals(SEEDS_REPORT_OPER)) {
            JmxUtils.checkParamsCount(SEEDS_REPORT_OPER, params, 0);
            mustBeCrawling();
            StringWriter sw = new StringWriter();
            if (getStatisticsTracking() != null && getStatisticsTracking() instanceof StatisticsTracker) {
                ((StatisticsTracker) getStatisticsTracking()).writeSeedsReportTo(new PrintWriter(sw));
            } else {
                sw.write("Unsupported");
            }
            return sw.toString();
        }

        if (operationName.equals(CHECKPOINT_OPER)) {
            JmxUtils.checkParamsCount(CHECKPOINT_OPER, params, 0);
            mustBeCrawling();
            try {
                checkpoint();
            } catch (IllegalStateException e) {
                throw new RuntimeOperationsException(e);
            }
            return null;
        }

        if (operationName.equals(PROGRESS_STATISTICS_OPER)) {
            JmxUtils.checkParamsCount(PROGRESS_STATISTICS_OPER, params, 0);
            mustBeCrawling();
            return getStatisticsTracking().getProgressStatisticsLine();
        }

        if (operationName.equals(PROGRESS_STATISTICS_LEGEND_OPER)) {
            JmxUtils.checkParamsCount(PROGRESS_STATISTICS_LEGEND_OPER, params, 0);
            return getStatisticsTracking().progressStatisticsLegend();
        }

        throw new ReflectionException(new NoSuchMethodException(operationName),
                "Cannot find the operation " + operationName);
    }

    public void mustBeCrawling() {
        if (!isCrawling()) {
            throw new RuntimeOperationsException(
                    new IllegalArgumentException("Not " + "crawling (Shouldn't ever be the case)"),
                    "Not current crawling job?");
        }
    }

    public boolean isCrawling() {
        return this.controller != null;
    }

    /**
     * Utility method to get the stored list of ignored seed items (if any),
     * from the last time the seeds were imported to the frontier.
     * 
     * @return String of all ignored seed items, or null if none
     */
    public String getIgnoredSeeds() {
        File ignoredFile = new File(getDirectory(), AbstractFrontier.IGNORED_SEEDS_FILENAME);
        if (!ignoredFile.exists()) {
            return null;
        }
        try {
            return FileUtils.readFileAsString(ignoredFile);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            return null;
        }
    }

    /**
     * Forward a 'kick' update to current controller if any.
     * @see CrawlController#kickUpdate()
     */
    public void kickUpdate() {
        if (this.controller != null) {
            this.controller.kickUpdate();
        }
    }

    /**
     * Returns a URIFrontierMarker for the current, paused, job. If there is no
     * current job or it is not paused null will be returned.
     *
     * @param regexpr A regular expression that each URI must match in order to
     * be considered 'within' the marker.
     * @param inCacheOnly Limit marker scope to 'cached' URIs.
     * @return a URIFrontierMarker for the current job.
     * @see #getPendingURIsList(FrontierMarker, int, boolean)
     * @see com.cyberway.issue.crawler.framework.Frontier#getInitialMarker(String,
     *      boolean)
     * @see com.cyberway.issue.crawler.framework.FrontierMarker
     */
    public FrontierMarker getInitialMarker(String regexpr, boolean inCacheOnly) {
        return (this.controller != null && this.controller.isPaused())
                ? this.controller.getFrontier().getInitialMarker(regexpr, inCacheOnly)
                : null;
    }

    /**
     * Returns the frontiers URI list based on the provided marker. This method
     * will return null if there is not current job or if the current job is
     * not paused. Only when there is a paused current job will this method
     * return a URI list.
     *
     * @param marker URIFrontier marker
     * @param numberOfMatches Maximum number of matches to return
     * @param verbose Should detailed info be provided on each URI?
     * @return the frontiers URI list based on the provided marker
     * @throws InvalidFrontierMarkerException
     *             When marker is inconsistent with the current state of the
     *             frontier.
     * @see #getInitialMarker(String, boolean)
     * @see com.cyberway.issue.crawler.framework.FrontierMarker
     */
    public ArrayList<String> getPendingURIsList(FrontierMarker marker, int numberOfMatches, boolean verbose)
            throws InvalidFrontierMarkerException {
        return (this.controller != null && this.controller.isPaused())
                ? this.controller.getFrontier().getURIsList(marker, numberOfMatches, verbose)
                : null;
    }

    public void dumpUris(String filename, String regexp, int numberOfMatches, boolean verbose) {
        try {
            PrintWriter out = new PrintWriter(filename);
            FrontierMarker marker = controller.getFrontier().getInitialMarker(regexp, false);
            int matchesDumped = 0;

            while (matchesDumped < numberOfMatches) {
                int batchMatches = Math.min(100, numberOfMatches - matchesDumped);

                ArrayList<String> batchOfUris = getPendingURIsList(marker, batchMatches, false);
                for (String uriLine : batchOfUris) {
                    out.write(uriLine);
                    out.write("\n");
                    matchesDumped++;
                }
                if (batchOfUris.size() < batchMatches) {
                    // must be exhausted; we're finished
                    break;
                }
            }
            IOUtils.closeQuietly(out);
        } catch (FileNotFoundException e) {
            logger.log(Level.SEVERE, "Failed dumpUris write", e);
        } catch (InvalidFrontierMarkerException e) {
            logger.log(Level.SEVERE, "Failed dumpUris", e);
        }
    }

    public void crawlStarted(String message) {
        if (this.mbeanName != null) {
            // Can be null around job startup.
            sendNotification(
                    new Notification("crawlStarted", this.mbeanName, getNotificationsSequenceNumber(), message));
        }
    }

    public void crawlEnding(String sExitMessage) {
        setRunning(false);
        setStatus(sExitMessage);
        setReadOnly();
        if (this.mbeanName != null) {
            sendNotification(new Notification("crawlEnding", this.mbeanName, getNotificationsSequenceNumber(),
                    sExitMessage));
        }
    }

    public void crawlEnded(String sExitMessage) {
        // Let the settings handler be cleaned up by the crawl controller
        // completeStop. Just let go of our reference in here.
        // if (this.settingsHandler != null) {
        //    this.settingsHandler.cleanup();
        // }

        // We used to zero-out datamembers but no longer needed now CrawlJobs
        // no longer persist after completion (They used to be kept around in
        // a list so operator could view CrawlJob finish state and reports --
        // but we now dump actual job and create a new uninitialized CrawlJob
        // that points at old CrawlJob data. 
    }

    public void crawlPausing(String statusMessage) {
        setStatus(statusMessage);
    }

    public void crawlPaused(String statusMessage) {
        setStatus(statusMessage);
        if (this.mbeanName != null) {
            // Can be null around job startup.
            sendNotification(new Notification("crawlPaused", this.mbeanName, getNotificationsSequenceNumber(),
                    statusMessage));
        }
    }

    public void crawlResuming(String statusMessage) {
        setStatus(statusMessage);
        if (this.mbeanName != null) {
            // Can be null around job startup.
            sendNotification(new Notification("crawlResuming", this.mbeanName, getNotificationsSequenceNumber(),
                    statusMessage));
        }
    }

    public void crawlCheckpoint(File checkpointDir) throws Exception {
        setStatus(CrawlJob.STATUS_CHECKPOINTING);
    }

    public CrawlController getController() {
        return this.controller;
    }

    public ObjectName preRegister(final MBeanServer server, ObjectName on) throws Exception {
        this.mbeanServer = server;
        @SuppressWarnings("unchecked")
        Hashtable<String, String> ht = on.getKeyPropertyList();
        if (!ht.containsKey(JmxUtils.NAME)) {
            throw new IllegalArgumentException("Name property required" + on.getCanonicalName());
        }
        // Now append key/values from hosting heritrix JMX ObjectName so it can be
        // found just by examination of the CrawlJob JMX ObjectName.  Add heritrix
        // name attribute as 'mother' attribute.
        Heritrix h = getHostingHeritrix();
        if (h == null || h.getMBeanName() == null) {
            throw new IllegalArgumentException(
                    "Hosting heritrix not found " + "or not registered with JMX: " + on.getCanonicalName());
        }
        @SuppressWarnings("unchecked")
        Map<String, String> hht = h.getMBeanName().getKeyPropertyList();
        ht.put(JmxUtils.MOTHER, hht.get(JmxUtils.NAME));
        String port = hht.get(JmxUtils.JMX_PORT);
        if (port != null) {
            ht.put(JmxUtils.JMX_PORT, port);
        }
        ht.put(JmxUtils.HOST, hht.get(JmxUtils.HOST));
        if (!ht.containsKey(JmxUtils.TYPE)) {
            ht.put(JmxUtils.TYPE, CRAWLJOB_JMXMBEAN_TYPE);
        }
        this.mbeanName = new ObjectName(on.getDomain(), ht);
        return this.mbeanName;
    }

    public void postRegister(Boolean registrationDone) {
        if (logger.isLoggable(Level.INFO)) {
            logger.info(JmxUtils.getLogRegistrationMsg(this.mbeanName.getCanonicalName(), this.mbeanServer,
                    registrationDone.booleanValue()));
        }
    }

    public void preDeregister() throws Exception {
        // Nothing to do.
    }

    public void postDeregister() {
        if (mbeanName == null) {
            return;
        }
        if (logger.isLoggable(Level.INFO)) {
            logger.info(JmxUtils.getLogUnregistrationMsg(this.mbeanName.getCanonicalName(), this.mbeanServer));
        }
        this.mbeanName = null;
    }

    /**
     * @return Heritrix that is hosting this job.
     */
    protected Heritrix getHostingHeritrix() {
        Heritrix hostingHeritrix = null;
        Map heritrice = Heritrix.getInstances();
        for (final Iterator i = heritrice.keySet().iterator(); i.hasNext();) {
            Heritrix h = (Heritrix) heritrice.get(i.next());
            if (h.getJobHandler().getCurrentJob() == this) {
                hostingHeritrix = h;
                break;
            }
        }
        return hostingHeritrix;
    }

    /**
     * @return Unique name for job that is safe to use in jmx (Like display
     * name but without spaces).
     */
    public String getJmxJobName() {
        return getJobName() + "-" + getUID();
    }

    /**
     * @return Notification sequence number (Does increment after each access).
     */
    protected static int getNotificationsSequenceNumber() {
        return notificationsSequenceNumber++;
    }

    protected ObjectName getMbeanName() {
        return this.mbeanName;
    }

    /**
     * @return the statistics tracking instance (of null if none yet available).
     */
    public StatisticsTracking getStatisticsTracking() {
        return this.controller == null || this.controller.getStatistics() == null ? null
                : this.controller.getStatistics();
    }
}