dk.netarkivet.harvester.harvesting.controller.BnfHeritrixController.java Source code

Introduction

Here is the source code for dk.netarkivet.harvester.harvesting.controller.BnfHeritrixController.java
Source

/* File:       $Id$
 * Revision:   $Revision$
 * Author:     $Author$
 * Date:       $Date$
 *
 * The Netarchive Suite - Software to harvest and preserve websites
 * Copyright 2004-2012 The Royal Danish Library, the Danish State and
 * University Library, the National Library of France and the Austrian
 * National Library.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
package dk.netarkivet.harvester.harvesting.controller;

import dk.netarkivet.common.exceptions.*;
import dk.netarkivet.common.utils.*;
import dk.netarkivet.harvester.HarvesterSettings;
import dk.netarkivet.harvester.harvesting.HeritrixFiles;
import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage;
import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceInfo;
import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceJobInfo;
import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlStatus;
import dk.netarkivet.harvester.harvesting.frontier.FullFrontierReport;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.archive.crawler.framework.CrawlController;
import org.archive.util.JmxUtils;

import javax.management.*;
import javax.management.openmbean.CompositeData;
import javax.management.openmbean.TabularData;
import javax.management.remote.JMXConnector;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

/**
 * This implementation of the HeritrixController interface starts Heritrix as a
 * separate process and uses JMX to communicate with it. Each instance executes
 * exactly one process that runs exactly one crawl job.
 */
public class BnfHeritrixController extends AbstractJMXHeritrixController {

    /** The logger for this class. */
    private static final Log log = LogFactory.getLog(BnfHeritrixController.class);

    /**
     * The below commands and attributes are copied from the attributes and
     * operations exhibited by the Heritrix MBeans of type CrawlJob and
     * CrawlService.Job, as they appear in JConsole.
     *
     * Only operations and attributes used in NAS are listed.
     */
    private static enum CrawlServiceAttribute {
        /** The number of alerts raised by Heritrix. */
        AlertCount,
        /** True if Heritrix is currently crawling, false otherwise. */
        IsCrawling,
        /** The ID of the job being currently crawled by Heritrix. */
        CurrentJob;

        /**
         * Returns the {@link CrawlServiceAttribute} enum value matching the
         * given name. Throws {@link UnknownID} if no match is found.
         *
         * @param name
         *            the attribute name
         * @return the corresponding {@link CrawlServiceAttribute} enum value.
         */
        public static CrawlServiceAttribute fromString(String name) {
            for (CrawlServiceAttribute att : values()) {
                if (att.name().equals(name)) {
                    return att;
                }
            }
            throw new UnknownID(name + " : unknown CrawlServiceAttribute !");
        }
    }

    /**
     * 
     * Enum listing the different job attributes available.
     *
     */
    private static enum CrawlServiceJobAttribute {
        /** The time in seconds elapsed since the crawl began. */
        CrawlTime,
        /** The current download rate in URI/s. */
        CurrentDocRate,
        /** The current download rate in kB/s. */
        CurrentKbRate,
        /** The number of URIs discovered by Heritrix. */
        DiscoveredCount,
        /** The average download rate in URI/s. */
        DocRate,
        /** The number of URIs downloaded by Heritrix. */
        DownloadedCount,
        /** A string summarizing the Heritrix frontier. */
        FrontierShortReport,
        /** The average download rate in kB/s. */
        KbRate,
        /** The job status (Heritrix status). */
        Status,
        /** The number of active toe threads. */
        ThreadCount;

        /**
         * Returns the {@link CrawlServiceJobAttribute} enum value matching the
         * given name. Throws {@link UnknownID} if no match is found.
         *
         * @param name
         *            the attribute name
         * @return the corresponding {@link CrawlServiceJobAttribute} enum
         *         value.
         */
        public static CrawlServiceJobAttribute fromString(String name) {
            for (CrawlServiceJobAttribute att : values()) {
                if (att.name().equals(name)) {
                    return att;
                }
            }
            throw new UnknownID(name + " : unknown CrawlServiceJobAttribute !");
        }
    }

    /**
     * Enum class defining the general operations available to 
     * the Heritrix operator.
     */
    private static enum CrawlServiceOperation {
        /** Adds a new job to an Heritrix instance. */
        addJob,
        /** Fetches the identifiers of pending jobs. */
        pendingJobs,
        /** Fetches the identifiers of completed jobs. */
        completedJobs,
        /** Shuts down an Heritrix instance. */
        shutdown,
        /** Instructs an Heritrix instance to starts crawling jobs. */
        startCrawling,
        /** Instructs an Heritrix instance to terminate the current job. */
        terminateCurrentJob;
    }

    /**
     * Enum class defining the Job-operations available to 
     * the Heritrix operator.
     */
    private static enum CrawlServiceJobOperation {
        /** Fetches the progress statistics string from an Heritrix instance. */
        progressStatistics,
        /**
         * Fetches the progress statistics legend string from an Heritrix
         * instance.
         */
        progressStatisticsLegend,
        /** Fetches the frontier report. */
        frontierReport;
    }

    /**
     * Shall we abort, if we lose the connection to Heritrix.
     */
    private static final boolean ABORT_IF_CONN_LOST = Settings
            .getBoolean(HarvesterSettings.ABORT_IF_CONNECTION_LOST);

    /**
     * The part of the Job MBean name that designates the unique id. For some
     * reason, this is not included in the normal Heritrix definitions in
     * JmxUtils, otherwise we wouldn't have to define it. I have committed a
     * feature request: http://webteam.archive.org/jira/browse/HER-1618
     */
    private static final String UID_PROPERTY = "uid";

    /**
     * The name that Heritrix gives to the job we ask it to create. This is part
     * of the name of the MBean for that job, but we can only retrieve the name
     * after the MBean has been created.
     */
    private String jobName;

    /** The header line (legend) for the statistics report. */
    private String progressStatisticsLegend;

    /**
     * The connector to the Heritrix MBeanServer.
     */
    private JMXConnector jmxConnector;

    /**
     * Max tries for a JMX operation.
     */
    private final int jmxMaxTries = JMXUtils.getMaxTries();

    /**
     * The name of the MBean for the submitted job.
     */
    private String crawlServiceJobBeanName;

    /**
     * The name of the main Heritrix MBean.
     */
    private String crawlServiceBeanName;

    /**
     * Create a BnfHeritrixController object.
     *
     * @param files
     *            Files that are used to set up Heritrix.
     */
    public BnfHeritrixController(HeritrixFiles files) {
        super(files);
    }

    /**
     * Initialize the JMXconnection to the Heritrix.
     * @throws IOFailure
     *             If Heritrix dies before initialization, or we encounter any
     *             problems during the initialization.
     * @see HeritrixController#initialize()
     */
    @Override
    public void initialize() {
        if (processHasExited()) {
            String errMsg = "Heritrix process of " + this + " died before initialization";
            log.warn(errMsg);
            throw new IOFailure(errMsg);
        }

        log.info("Abort, if we lose the connection " + "to Heritrix, is " + ABORT_IF_CONN_LOST);
        initJMXConnection();

        log.info("JMX connection initialized successfully");

        crawlServiceBeanName = "org.archive.crawler:" + JmxUtils.NAME + "=Heritrix," + JmxUtils.TYPE
                + "=CrawlService," + JmxUtils.JMX_PORT + "=" + getJmxPort() + "," + JmxUtils.GUI_PORT + "="
                + getGuiPort() + "," + JmxUtils.HOST + "=" + getHostName();

        // We want to be sure there are no jobs when starting, in case we got
        // an old Heritrix or somebody added jobs behind our back.
        TabularData doneJobs = (TabularData) executeMBeanOperation(CrawlServiceOperation.completedJobs);
        TabularData pendingJobs = (TabularData) executeMBeanOperation(CrawlServiceOperation.pendingJobs);
        if (doneJobs != null && doneJobs.size() > 0 || pendingJobs != null && pendingJobs.size() > 0) {
            throw new IllegalState(
                    "This Heritrix instance is in a illegalState! " + "This instance has either old done jobs ("
                            + doneJobs + "), or old pending jobs (" + pendingJobs + ").");
        }
        // From here on, we can assume there's only the one job we make.
        // We'll use the arc file prefix to name the job, since the prefix
        // already contains the harvest id and job id.
        HeritrixFiles files = getHeritrixFiles();
        executeMBeanOperation(CrawlServiceOperation.addJob, files.getOrderXmlFile().getAbsolutePath(),
                files.getArchiveFilePrefix(), getJobDescription(), files.getSeedsTxtFile().getAbsolutePath());

        jobName = getJobName();

        crawlServiceJobBeanName = "org.archive.crawler:" + JmxUtils.NAME + "=" + jobName + "," + JmxUtils.TYPE
                + "=CrawlService.Job," + JmxUtils.JMX_PORT + "=" + getJmxPort() + "," + JmxUtils.MOTHER
                + "=Heritrix," + JmxUtils.HOST + "=" + getHostName();
    }

    @Override
    public void requestCrawlStart() {
        executeMBeanOperation(CrawlServiceOperation.startCrawling);
    }

    @Override
    public void requestCrawlStop(String reason) {
        executeMBeanOperation(CrawlServiceOperation.terminateCurrentJob);
    }

    /**
     * Return the URL for monitoring this instance.
     *
     * @return the URL for monitoring this instance.
     */
    public String getHeritrixConsoleURL() {
        return "http://" + SystemUtils.getLocalHostName() + ":" + getGuiPort();
    }

    /**
     * Cleanup after an Heritrix process. This entails sending the shutdown
     * command to the Heritrix process, and killing it forcefully, if it is
     * still alive after waiting the period of time specified by the
     * CommonSettings.PROCESS_TIMEOUT setting.
     * @param crawlDir the crawldir to cleanup
     * @see HeritrixController#cleanup()
     */
    public void cleanup(File crawlDir) {
        // Before cleaning up, we need to wait for the reports to be generated
        waitForReportGeneration(crawlDir);

        try {
            executeMBeanOperation(CrawlServiceOperation.shutdown);
        } catch (IOFailure e) {
            log.error("JMX error while cleaning up Heritrix controller", e);
        }

        closeJMXConnection();

        waitForHeritrixProcessExit();
    }

    /**
     * Return the URL for monitoring this instance.
     *
     * @return the URL for monitoring this instance.
     */
    public String getAdminInterfaceUrl() {
        return "http://" + SystemUtils.getLocalHostName() + ":" + getGuiPort();
    }

    /**
     * Gets a message that stores the information summarizing the crawl
     * progress.
     *
     * @return a message that stores the information summarizing the crawl
     *         progress.
     */
    public CrawlProgressMessage getCrawlProgress() {

        HeritrixFiles files = getHeritrixFiles();
        CrawlProgressMessage cpm = new CrawlProgressMessage(files.getHarvestID(), files.getJobID(),
                progressStatisticsLegend);

        cpm.setHostUrl(getHeritrixConsoleURL());

        getCrawlServiceAttributes(cpm);

        if (cpm.crawlIsFinished()) {
            cpm.setStatus(CrawlStatus.CRAWLING_FINISHED);
            // No need to go further, CrawlService.Job bean does not exist
            return cpm;
        }

        fetchCrawlServiceJobAttributes(cpm);

        return cpm;
    }

    /**
     * Retrieve the values of the crawl service attributes and
     * add them to the CrawlProgressMessage being put together.
     * @param cpm the crawlProgress message being prepared
     */
    private void getCrawlServiceAttributes(CrawlProgressMessage cpm) {
        List<Attribute> heritrixAtts = getMBeanAttributes(
                new CrawlServiceAttribute[] { CrawlServiceAttribute.AlertCount, CrawlServiceAttribute.IsCrawling,
                        CrawlServiceAttribute.CurrentJob });

        CrawlServiceInfo hStatus = cpm.getHeritrixStatus();
        for (Attribute att : heritrixAtts) {
            Object value = att.getValue();
            CrawlServiceAttribute crawlServiceAttribute = CrawlServiceAttribute.fromString(att.getName());
            switch (crawlServiceAttribute) {
            case AlertCount:
                Integer alertCount = -1;
                if (value != null) {
                    alertCount = (Integer) value;
                }
                hStatus.setAlertCount(alertCount);
                break;
            case CurrentJob:
                String newCurrentJob = "";
                if (value != null) {
                    newCurrentJob = (String) value;
                }
                hStatus.setCurrentJob(newCurrentJob);
                break;
            case IsCrawling:
                Boolean newCrawling = false;
                if (value != null) {
                    newCrawling = (Boolean) value;
                }
                hStatus.setCrawling(newCrawling);
                break;
            default:
                log.debug("Unhandled attribute: " + crawlServiceAttribute);
            }
        }
    }

    /**
     * Retrieve the values of the crawl service job attributes and
     * add them to the CrawlProgressMessage being put together.
     * @param cpm the crawlProgress message being prepared
     */
    private void fetchCrawlServiceJobAttributes(CrawlProgressMessage cpm) {
        String progressStats = (String) executeMBeanOperation(CrawlServiceJobOperation.progressStatistics);
        CrawlServiceJobInfo jStatus = cpm.getJobStatus();
        String newProgressStats = "?";
        if (progressStats != null) {
            newProgressStats = progressStats;
        }
        jStatus.setProgressStatistics(newProgressStats);

        if (progressStatisticsLegend == null) {
            progressStatisticsLegend = (String) executeMBeanOperation(
                    CrawlServiceJobOperation.progressStatisticsLegend);
        }

        List<Attribute> jobAtts = getMBeanAttributes(CrawlServiceJobAttribute.values());

        for (Attribute att : jobAtts) {
            Object value = att.getValue();
            CrawlServiceJobAttribute aCrawlServiceJobAttribute = CrawlServiceJobAttribute.fromString(att.getName());
            switch (aCrawlServiceJobAttribute) {
            case CrawlTime:
                Long elapsedSeconds = -1L;
                if (value != null) {
                    elapsedSeconds = (Long) value;
                }
                jStatus.setElapsedSeconds(elapsedSeconds);
                break;
            case CurrentDocRate:
                Double processedDocsPerSec = new Double(-1L);
                if (value != null) {
                    processedDocsPerSec = (Double) value;
                }
                jStatus.setCurrentProcessedDocsPerSec(processedDocsPerSec);
                break;
            case CurrentKbRate:
                // NB Heritrix seems to store the average value in
                // KbRate instead of CurrentKbRate...
                // Inverse of doc rates.
                Long processedKBPerSec = -1L;
                if (value != null) {
                    processedKBPerSec = (Long) value;
                }
                jStatus.setProcessedKBPerSec(processedKBPerSec);
                break;
            case DiscoveredCount:
                Long discoveredCount = -1L;
                if (value != null) {
                    discoveredCount = (Long) value;
                }
                jStatus.setDiscoveredFilesCount(discoveredCount);
                break;
            case DocRate:
                Double docRate = new Double(-1L);
                if (value != null) {
                    docRate = (Double) value;
                }
                jStatus.setProcessedDocsPerSec(docRate);
                break;
            case DownloadedCount:
                Long downloadedCount = -1L;
                if (value != null) {
                    downloadedCount = (Long) value;
                }
                jStatus.setDownloadedFilesCount(downloadedCount);
                break;
            case FrontierShortReport:
                String frontierShortReport = "?";
                if (value != null) {
                    frontierShortReport = (String) value;
                }
                jStatus.setFrontierShortReport(frontierShortReport);
                break;
            case KbRate:
                // NB Heritrix seems to store the average value in
                // KbRate instead of CurrentKbRate...
                // Inverse of doc rates.
                Long kbRate = -1L;
                if (value != null) {
                    kbRate = (Long) value;
                }
                jStatus.setCurrentProcessedKBPerSec(kbRate);
                break;
            case Status:
                String newStatus = "?";
                if (value != null) {
                    newStatus = (String) value;
                }
                jStatus.setStatus(newStatus);
                if (value != null) {
                    String status = (String) value;
                    if (CrawlController.PAUSING.equals(status)) {
                        cpm.setStatus(CrawlStatus.CRAWLER_PAUSING);
                    } else if (CrawlController.PAUSED.equals(status)) {
                        cpm.setStatus(CrawlStatus.CRAWLER_PAUSED);
                    } else {
                        cpm.setStatus(CrawlStatus.CRAWLER_ACTIVE);
                    }
                }
                break;
            case ThreadCount:
                Integer currentActiveToecount = -1;
                if (value != null) {
                    currentActiveToecount = (Integer) value;
                }
                jStatus.setActiveToeCount(currentActiveToecount);
                break;
            default:
                log.debug("Unhandled attribute: " + aCrawlServiceJobAttribute);
            }
        }
    }

    /**
     * Generates a full frontier report.
     * @return a Full frontier report.
     */
    public FullFrontierReport getFullFrontierReport() {

        return FullFrontierReport.parseContentsAsString(jobName,
                (String) executeOperationNoRetry(crawlServiceJobBeanName,
                        CrawlServiceJobOperation.frontierReport.name(), "all"));
    }

    /**
     * Get the name of the one job we let this Heritrix run. The handling of
     * done jobs depends on Heritrix not being in crawl. This call may take
     * several seconds to finish.
     *
     * @return The name of the one job that Heritrix has.
     * @throws IOFailure
     *             if the job created failed to initialize or didn't appear in
     *             time.
     * @throws IllegalState
     *             if more than one job in done list, or more than one pending
     *             job
     */
    private String getJobName() {
        /*
         * This is called just after we've told Heritrix to create a job. It may
         * take a while before the job is actually created, so we have to wait
         * around a bit.
         */
        TabularData pendingJobs = null;
        TabularData doneJobs;
        int retries = 0;
        final int maxJmxRetries = JMXUtils.getMaxTries();
        while (retries++ < maxJmxRetries) {
            // If the job turns up in Heritrix' pending jobs list, it's ready
            pendingJobs = (TabularData) executeMBeanOperation(CrawlServiceOperation.pendingJobs);
            if (pendingJobs != null && pendingJobs.size() > 0) {
                break; // It's ready, we can move on.
            }

            // If there's an error in the job configuration, the job will be put
            // in Heritrix' completed jobs list.
            doneJobs = (TabularData) executeMBeanOperation(CrawlServiceOperation.completedJobs);
            if (doneJobs != null && doneJobs.size() >= 1) {
                // Since we haven't allowed Heritrix to start any crawls yet,
                // the only way the job could have ended and then put into
                // the list of completed jobs is by error.
                if (doneJobs.size() > 1) {
                    throw new IllegalState("More than one job in done list: " + doneJobs);
                } else {
                    CompositeData job = JMXUtils.getOneCompositeData(doneJobs);
                    throw new IOFailure(
                            "Job " + job + " failed: " + job.get(CrawlServiceJobAttribute.Status.name()));
                }
            }
            if (retries < maxJmxRetries) {
                TimeUtils.exponentialBackoffSleep(retries);
            }
        }
        // If all went well, we now have exactly one job in the pending
        // jobs list.
        if (pendingJobs == null || pendingJobs.size() == 0) {
            throw new IOFailure("Heritrix has not created a job after "
                    + (Math.pow(2, maxJmxRetries) / TimeUtils.SECOND_IN_MILLIS) + " seconds, giving up.");
        } else if (pendingJobs.size() > 1) {
            throw new IllegalState("More than one pending job: " + pendingJobs);
        } else {
            // Note that we may actually get through to here even if the job
            // is malformed. The job will then die as soon as we tell it to
            // start crawling.
            CompositeData job = JMXUtils.getOneCompositeData(pendingJobs);
            String name = job.get(JmxUtils.NAME) + "-" + job.get(UID_PROPERTY);
            log.info("Heritrix created a job with name " + name);
            return name;
        }
    }

    /**
     * Periodically scans the crawl dir to see if Heritrix has finished
     * generating the crawl reports. The time to wait is bounded by
     * {@link HarvesterSettings#WAIT_FOR_REPORT_GENERATION_TIMEOUT}.
     *
     * @param crawlDir
     *            the crawl directory to scan.
     */
    private void waitForReportGeneration(File crawlDir) {

        log.info("Started waiting for Heritrix report generation.");

        long currentTime = System.currentTimeMillis();
        long waitSeconds = Settings.getLong(HarvesterSettings.WAIT_FOR_REPORT_GENERATION_TIMEOUT);
        long waitDeadline = currentTime + TimeUtils.SECOND_IN_MILLIS * waitSeconds;

        // While the deadline is not attained, periodically perform the
        // following checks:
        //    1) Verify that the crawl job MBean still exists. If not then
        //       the job is over, no need to wait more and exit the loop.
        //    2) Read the job(s status. Since Heritrix 1.14.4, a FINISHED status
        //       guarantees that all reports have been generated. In this case
        //       exit the loop.
        while (currentTime <= waitDeadline) {

            currentTime = System.currentTimeMillis();

            boolean crawlServiceJobExists = false;
            try {
                if (crawlServiceJobBeanName != null) {
                    crawlServiceJobExists = getMBeanServerConnection()
                            .isRegistered(JMXUtils.getBeanName(crawlServiceJobBeanName));
                } else {
                    // An error occurred when initializing the controller
                    // Simply log a warning for the record
                    log.warn("crawlServiceJobBeanName is null, earlier "
                            + "initialization of controller did not complete.");
                }
            } catch (IOException e) {
                log.warn(e);
                continue;
            }

            if (!crawlServiceJobExists) {
                log.info(crawlServiceJobBeanName + " MBean not found, report generation is finished."
                        + " Exiting wait loop.");
                break;
            }

            String status = "";
            try {
                List<Attribute> atts = getAttributesNoRetry(crawlServiceJobBeanName,
                        new String[] { CrawlServiceJobAttribute.Status.name() });
                status = (String) atts.get(0).getValue();
            } catch (IOFailure e) {
                log.warn(e);
                continue;
            } catch (IndexOutOfBoundsException e) {
                // sometimes the array is empty TODO find out why
                log.warn(e);
                continue;
            }

            if (CrawlController.FINISHED.equals(status)) {
                log.info(crawlServiceJobBeanName + " status is FINISHED, report generation is complete."
                        + " Exiting wait loop.");
                return;
            }

            try {
                // Wait 20 seconds
                Thread.sleep(20 * TimeUtils.SECOND_IN_MILLIS);
            } catch (InterruptedException e) {
                log.trace("Received InterruptedException", e);
            }
        }
        log.info("Waited " + StringUtils.formatDuration(waitSeconds)
                + " for report generation. Will proceed with cleanup.");
    }

    /**
     * Execute a single CrawlServiceOperation.
     *
     * @param operation the operation to execute
     * @param arguments any arguments needed by the operation  
     * @return Whatever the command returned.
     */
    private Object executeMBeanOperation(CrawlServiceOperation operation, String... arguments) {
        return executeOperation(crawlServiceBeanName, operation.name(), arguments);
    }

    /**
     * Execute a single CrawlServiceOperation.
     *
     * @param operation the operation to execute
     * @param arguments any arguments needed by the operation         
     * @return Whatever the command returned.
     */
    private Object executeMBeanOperation(CrawlServiceJobOperation operation, String... arguments) {
        return executeOperation(crawlServiceJobBeanName, operation.name(), arguments);
    }

    /**
     * Get the value of several attributes.
     *
     * @param attributes
     *            The attributes to get.
     * @return Whatever the command returned.
     */
    private List<Attribute> getMBeanAttributes(CrawlServiceJobAttribute[] attributes) {

        String[] attNames = new String[attributes.length];
        for (int i = 0; i < attributes.length; i++) {
            attNames[i] = attributes[i].name();
        }

        return getAttributes(crawlServiceJobBeanName, attNames);
    }

    /**
     * Get the value of several attributes.
     *
     * @param attributes
     *            The attributes to get.
     * @return Whatever the command returned.
     */
    private List<Attribute> getMBeanAttributes(CrawlServiceAttribute[] attributes) {

        String[] attNames = new String[attributes.length];
        for (int i = 0; i < attributes.length; i++) {
            attNames[i] = attributes[i].name();
        }

        return getAttributes(crawlServiceBeanName, attNames);
    }

    /**
     * Execute a command on a bean.
     *
     * @param beanName
     *            Name of the bean.
     * @param operation
     *            Command to execute.
     * @param args
     *            Arguments to the command. Only string arguments are possible
     *            at the moment.
     * @return The return value of the executed command.
     */
    private Object executeOperation(String beanName, String operation, String... args) {
        return jmxCall(beanName, true, true, new String[] { operation }, args);
    }

    /**
     * Execute a command on a bean, does not retry if fails.
     *
     * @param beanName
     *            Name of the bean.
     * @param operation
     *            Command to execute.
     * @param args
     *            Arguments to the command. Only string arguments are possible
     *            at the moment.
     * @return The return value of the executed command.
     */
    private Object executeOperationNoRetry(String beanName, String operation, String... args) {
        return jmxCall(beanName, false, true, new String[] { operation }, args);
    }

    /**
     * Get the value of several attributes from a bean.
     *
     * @param beanName
     *            Name of the bean to get an attribute for.
     * @param attributes
     *            Name of the attributes to get.
     * @return Value of the attribute.
     */
    @SuppressWarnings("unchecked")
    private List<Attribute> getAttributes(String beanName, String[] attributes) {
        return (List<Attribute>) jmxCall(beanName, true, false, attributes);
    }

    /**
     * Get the value of several attributes from a bean, but does not retry if
     * the fetch fails.
     *
     * @param beanName
     *            Name of the bean to get an attribute for.
     * @param attributes
     *            Name of the attributes to get.
     * @return Value of the attribute.
     */
    @SuppressWarnings("unchecked")
    private List<Attribute> getAttributesNoRetry(String beanName, String[] attributes) {
        return (List<Attribute>) jmxCall(beanName, false, false, attributes);
    }

    /**
     * Executes a JMX call (attribute read or single operation) on a given bean.
     *
     * @param beanName
     *            the MBean name.
     * @param retry
     *            if true, will retry a number of times if the operation fails.
     * @param isOperation
     *            true if the call is an operation, false if it's an attribute
     *            read.
     * @param names
     *            name of operation or name of attributes
     * @param args
     *            optional arguments for operations
     * @return the object returned by the distant MBean
     */
    private Object jmxCall(String beanName, boolean retry, boolean isOperation, String[] names, String... args) {

        MBeanServerConnection connection = getMBeanServerConnection();

        int maxTries = 1;
        if (retry) {
            maxTries = jmxMaxTries;
        }
        int tries = 0;
        Throwable lastException;
        do {
            tries++;
            try {
                if (isOperation) {
                    final String[] signature = new String[args.length];
                    Arrays.fill(signature, String.class.getName());
                    return connection.invoke(JMXUtils.getBeanName(beanName), names[0], args, signature);
                } else {
                    return connection.getAttributes(JMXUtils.getBeanName(beanName), names).asList();
                }
            } catch (IOException e) {
                lastException = e;
            } catch (ReflectionException e) {
                lastException = e;
            } catch (InstanceNotFoundException e) {
                lastException = e;
            } catch (MBeanException e) {
                lastException = e;
            }
            log.debug("Attempt " + tries + " out of " + maxTries + " attempts to make this jmxCall failed ");
            if (tries < maxTries) {
                TimeUtils.exponentialBackoffSleep(tries);
            }

        } while (tries < maxTries);

        String msg = "";
        if (isOperation) {
            msg = "Failed to execute " + names[0] + " with args " + Arrays.toString(args) + " on " + beanName;
        } else {
            msg = "Failed to read attributes " + Arrays.toString(names) + " of " + beanName;
        }

        if (lastException != null) {
            msg += ", last exception was " + lastException.getClass().getName();
        }
        msg += " after " + tries + " attempts";
        throw new IOFailure(msg, lastException);
    }

    /**
     * Initializes the JMX connection.
     */
    private void initJMXConnection() {

        // Initialize the connection to Heritrix' MBeanServer
        this.jmxConnector = JMXUtils.getJMXConnector(SystemUtils.LOCALHOST, getJmxPort(),
                Settings.get(HarvesterSettings.HERITRIX_JMX_USERNAME),
                Settings.get(HarvesterSettings.HERITRIX_JMX_PASSWORD));
    }

    /**
     * Closes the JMX connection.
     */
    private void closeJMXConnection() {
        // Close the connection to the MBean Server
        try {
            jmxConnector.close();
        } catch (IOException e) {
            log.error("JMX error while closing connection to Heritrix", e);
        }
    }

    /** 
     * @return aMBeanServerConnection to Heritrix
     */
    private MBeanServerConnection getMBeanServerConnection() {
        MBeanServerConnection connection = null;
        int tries = 0;
        IOException ioe = null;
        while (tries < jmxMaxTries && connection == null) {
            tries++;
            try {
                connection = jmxConnector.getMBeanServerConnection();
                log.debug("Got a MBeanserverconnection at attempt #" + tries);
                return connection;
            } catch (IOException e) {
                ioe = e;
                log.info("IOException while getting MBeanServerConnection." + " Attempt " + tries + " out of "
                        + jmxMaxTries + ". Will try to renew the JMX connection to Heritrix");
                // When an IOException is raised in RMIConnector, a terminated
                // flag is set to true, even if the underlying connection is
                // not closed. This seems to be part of a mechanism to prevent
                // deadlocks, but can cause trouble for us.
                // So if this happens, we close and reinitialize
                // the JMX connector itself.
                closeJMXConnection();
                try {
                    initJMXConnection();
                    log.info("Successfully renewed JMX connection");
                } catch (IOFailure e1) {
                    log.debug("Renewal of JMXConnection failed at retry #" + tries + " with exception: ", e1);
                }
            }
            if (tries < jmxMaxTries) {
                TimeUtils.exponentialBackoffSleep(tries);
            }
        }

        if (ABORT_IF_CONN_LOST) {
            log.debug("Connection to Heritrix seems to be lost. " + "Trying to abort ...");
            throw new HarvestingAbort("Failed to connect to MBeanServer", ioe);
        } else {
            throw new IOFailure("Failed to connect to MBeanServer", ioe);
        }
    }

    @Override
    public boolean atFinish() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public void beginCrawlStop() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public void cleanup() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public boolean crawlIsEnded() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public int getActiveToeCount() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public int getCurrentProcessedKBPerSec() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public String getHarvestInformation() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public String getProgressStats() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public long getQueuedUriCount() {
        throw new NotImplementedException("Not implemented");
    }

    @Override
    public boolean isPaused() {
        throw new NotImplementedException("Not implemented");
    }

}