dk.netarkivet.harvester.scheduler.HarvestSchedulerMonitorServer.java Source code

Java tutorial

Introduction

Here is the source code for dk.netarkivet.harvester.scheduler.HarvestSchedulerMonitorServer.java

Source

/* File:        $Id$
 * Revision:    $Revision$
 * Author:      $Author$
 * Date:        $Date$
 *
 * The Netarchive Suite - Software to harvest and preserve websites
 * Copyright 2004-2012 The Royal Danish Library, the Danish State and
 * University Library, the National Library of France and the Austrian
 * National Library.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

package dk.netarkivet.harvester.scheduler;

import java.util.Date;

import javax.jms.MessageListener;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import dk.netarkivet.common.distribute.Channels;
import dk.netarkivet.common.distribute.JMSConnectionFactory;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.lifecycle.ComponentLifeCycle;
import dk.netarkivet.common.utils.NotificationType;
import dk.netarkivet.common.utils.NotificationsFactory;
import dk.netarkivet.harvester.datamodel.HarvestDefinition;
import dk.netarkivet.harvester.datamodel.HarvestDefinitionDAO;
import dk.netarkivet.harvester.datamodel.Job;
import dk.netarkivet.harvester.datamodel.JobDAO;
import dk.netarkivet.harvester.datamodel.JobStatus;
import dk.netarkivet.harvester.distribute.HarvesterMessageHandler;
import dk.netarkivet.harvester.distribute.IndexReadyMessage;
import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage;
import dk.netarkivet.harvester.harvesting.distribute.CrawlStatusMessage;
import dk.netarkivet.harvester.harvesting.distribute.JobEndedMessage;
import dk.netarkivet.harvester.harvesting.report.HarvestReport;

/**
 * Submitted harvesting jobs are registered with this singleton. The class
 * listens for CrawlStatusMessages on the THE_SCHED queue and processes
 * completed harvests.
 *
 */
public class HarvestSchedulerMonitorServer extends HarvesterMessageHandler
        implements MessageListener, ComponentLifeCycle {
    /**
     * The JobDAO.
     */
    private final JobDAO jobDAO = JobDAO.getInstance();

    /** The private logger for this class. */
    private final Log log = LogFactory.getLog(getClass().getName());

    @Override
    public void start() {
        JMSConnectionFactory.getInstance().setListener(Channels.getTheSched(), this);
    }

    /**
     * Updates the job status from the current status to that specified in
     * message if it legal to do so. Logs a warning if messages arrive out of
     * order.
     *
     * @param cmsg The CrawlStatusMessage received
     * @throws ArgumentNotValid if the current job status is Job.STATUS_NEW
     */
    private void processCrawlStatusMessage(CrawlStatusMessage cmsg) throws ArgumentNotValid {
        long jobID = cmsg.getJobID();
        JobStatus newStatus = cmsg.getStatusCode();
        Job job = jobDAO.read(Long.valueOf(jobID));
        JobStatus oldStatus = job.getStatus();
        // Update the job status

        // a NEW job should never get a message
        if (oldStatus == JobStatus.NEW) {
            String msg = "CrawlStatusMessage received on new job: " + job;
            log.warn(msg);
        }

        switch (newStatus) {
        case RESUBMITTED:
        case SUBMITTED:
        case NEW:
            // crawl status should never update to
            // new/submitted/resubmitted, because these statuses should not
            // be used by the harvesters!
            String msg = "CrawlStatusMessage tried to update job " + job + " to status " + newStatus;
            log.warn(msg);
            throw new ArgumentNotValid(msg);
        case STARTED:
            if (oldStatus == JobStatus.SUBMITTED || oldStatus == JobStatus.NEW) {
                if (oldStatus == JobStatus.NEW) {
                    log.warn("CrawlStatusMessage updated job in unexpected " + "state " + oldStatus + " to "
                            + newStatus + "\n" + job.toString());
                }
                // The usual case submitted -> started
                job.setStatus(newStatus);

                // Send the initial progress message
                JMSConnectionFactory.getInstance()
                        .send(new CrawlProgressMessage(job.getOrigHarvestDefinitionID(), job.getJobID()));

                log.info("Job #" + job.getJobID() + " has been started by the harvester.");
                jobDAO.update(job);
            } else {
                // Must not change status back to STARTED
                log.warn("CrawlStatusMessage tried to update job status" + " for job " + job.getJobID() + " from "
                        + oldStatus + " to " + newStatus + ". Ignoring.");
            }
            break;
        case DONE:
        case FAILED:
            if (oldStatus == JobStatus.STARTED || oldStatus == JobStatus.SUBMITTED
                    || oldStatus == JobStatus.RESUBMITTED || oldStatus == JobStatus.NEW) {
                // Received done or failed on non-ended job - okay
                if (oldStatus != JobStatus.STARTED) {
                    //we expect "started" first, but it's not serious. Just
                    //log.
                    log.warn("CrawlStatusMessage updated job in unexpected " + "state " + oldStatus + " to "
                            + newStatus + "\n" + job.toString());
                }
                if (newStatus == JobStatus.FAILED) {
                    String errors = "HarvestErrors = " + cmsg.getHarvestErrors() + "\nHarvestErrorDetails = "
                            + cmsg.getHarvestErrorDetails() + "\nUploadErrors = " + cmsg.getUploadErrors()
                            + "\nUploadErrorDetails = " + cmsg.getUploadErrorDetails();

                    log.warn("Job " + jobID + " failed: " + errors);
                } else {
                    log.info("Job " + jobID + " succesfully completed");
                }
                job.setStatus(newStatus);
                job.appendHarvestErrors(cmsg.getHarvestErrors());
                job.appendHarvestErrorDetails(cmsg.getHarvestErrorDetails());
                job.appendUploadErrors(cmsg.getUploadErrors());
                job.appendUploadErrorDetails(cmsg.getUploadErrorDetails());
                jobDAO.update(job);
            } else {
                // Received done or failed on already dead job. Bad!
                String message = "CrawlStatusMessage tried to update " + "job status " + " from " + oldStatus
                        + " to " + newStatus + ". Marking job FAILED";
                log.warn(message);
                job.setStatus(JobStatus.FAILED);
                job.appendHarvestErrors(cmsg.getHarvestErrors());
                job.appendHarvestErrors(message);
                job.appendHarvestErrorDetails(cmsg.getHarvestErrors());
                job.appendHarvestErrorDetails(message);
                log.warn("Job " + jobID + " failed: " + job.getHarvestErrorDetails());
                jobDAO.update(job);
            }
            //Always process the data!
            processCrawlData(job, cmsg.getDomainHarvestReport());

            // Send message to notify HarvestMonitor that
            // it should stop monitoring this job
            JMSConnectionFactory.getInstance().send(new JobEndedMessage(job.getJobID(), newStatus));

            break;
        default:
            log.warn("CrawlStatusMessage tried to update job status to " + "unsupported status " + newStatus);
            break;
        }
    }

    /**
     * Takes the crawl report from the job and updates the domain information
     * with harvesting history.
     * If the crawler was unable to generate a {@link HarvestReport},
     * it will do nothing.
     * @param job the completed job
     * @param dhr the domain harvest report, or null if none available.
     * @throws ArgumentNotValid if job is null
     */
    private void processCrawlData(Job job, HarvestReport dhr) throws ArgumentNotValid {
        ArgumentNotValid.checkNotNull(job, "job");

        //If the crawler was unable to generate a HarvestReport,
        //we will do nothing.

        if (dhr == null) {
            return;
        }

        // Post-process the report.
        dhr.postProcess(job);
    }

    /**
     * @param msg a given CrawlStatusMessage
     * @see dk.netarkivet.harvester.distribute.HarvesterMessageHandler#visit(
     * dk.netarkivet.harvester.harvesting.distribute.CrawlStatusMessage)
     */
    public void visit(CrawlStatusMessage msg) {
        ArgumentNotValid.checkNotNull(msg, "msg");
        processCrawlStatusMessage(msg);
    }

    /**
     * Removes the HarvestSchedulerMonitorServer as listener
     * to the JMS scheduler Channel.
     */
    @Override
    public void shutdown() {
        // FIXME This command fail when shutting down properly. (kill $PID)
        // instead of kill -9 $PID. See NAS-1976
        //JMSConnectionFactory.getInstance().removeListener(
        //        Channels.getTheSched(), this);
    }

    @Override
    public void visit(IndexReadyMessage msg) {
        ArgumentNotValid.checkNotNull(msg, "msg");
        processIndexReadyMessage(msg);
    }

    /**
     * Process an incoming IndexReadyMessage.
     * @param msg the message
     */
    private void processIndexReadyMessage(IndexReadyMessage msg) {
        // Set isindexready to true if Indexisready is true
        Long harvestId = msg.getHarvestId();
        boolean indexisready = msg.getIndexOK();
        HarvestDefinitionDAO dao = HarvestDefinitionDAO.getInstance();
        if (dao.isSnapshot(harvestId)) {
            dao.setIndexIsReady(harvestId, indexisready);
            if (indexisready) {
                log.info("Got message from the IndexServer, that the index is ready for" + " harvest # "
                        + harvestId);
            } else {
                String errMsg = "Got message from IndexServer, that it failed to generate index for" + " harvest # "
                        + harvestId + ". Deactivating harvest";
                log.warn(errMsg);
                HarvestDefinition hd = dao.read(harvestId);
                hd.setActive(false);
                StringBuilder commentsBuf = new StringBuilder(hd.getComments());
                commentsBuf.append("\n" + (new Date())
                        + ": Deactivated by the system because indexserver failed to generate index");
                hd.setComments(commentsBuf.toString());
                dao.update(hd);
                NotificationsFactory.getInstance().notify(errMsg, NotificationType.ERROR);
            }
        } else {
            log.debug("Ignoring IndexreadyMesssage sent on behalf on " + "selective harvest w/id " + harvestId);
        }
    }
}