dk.netarkivet.harvester.datamodel.JobDBDAO.java Source code

Introduction

Here is the source code for dk.netarkivet.harvester.datamodel.JobDBDAO.java
Source

/* File:        $Id$
 * Revision:    $Revision$
 * Author:      $Author$
 * Date:        $Date$
 *
 * The Netarchive Suite - Software to harvest and preserve websites
 * Copyright 2004-2012 The Royal Danish Library, the Danish State and
 * University Library, the National Library of France and the Austrian
 * National Library.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

package dk.netarkivet.harvester.datamodel;

import java.sql.Clob;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.exceptions.PermissionDenied;
import dk.netarkivet.common.exceptions.UnknownID;
import dk.netarkivet.common.utils.DBUtils;
import dk.netarkivet.common.utils.ExceptionUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.StringUtils;
import dk.netarkivet.common.utils.XmlUtils;
import dk.netarkivet.harvester.webinterface.HarvestStatus;
import dk.netarkivet.harvester.webinterface.HarvestStatusQuery;
import dk.netarkivet.harvester.webinterface.HarvestStatusQuery.SORT_ORDER;

/**
 * A database-based implementation of the JobDAO class.
 * The statements to create the tables are now in
 * scripts/sql/createfullhddb.sql
 */
public class JobDBDAO extends JobDAO {
    /** The logger for this class. */
    private final Log log = LogFactory.getLog(getClass());

    /**
     * Create a new JobDAO implemented using database.
     * This constructor also tries to upgrade the jobs and jobs_configs tables
     * in the current database.
     * throws and IllegalState exception, if it is impossible to
     * make the necessary updates.
     */
    protected JobDBDAO() {
        Connection connection = HarvestDBConnection.get();
        try {
            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.JOBS);
            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.JOBCONFIGS);
        } finally {
            HarvestDBConnection.release(connection);
        }
    }

    /**
     * Creates an instance in persistent storage of the given job.
     * If the job doesn't have an ID, one is generated for it.
     *
     * @param job a given job to add to persistent storage
     * @throws PermissionDenied If a job already exists in persistent storage
     *                          with the same id as the given job
     * @throws IOFailure        If some IOException occurs while
     *                          writing the job to persistent storage
     */
    public synchronized void create(Job job) {
        ArgumentNotValid.checkNotNull(job, "Job job");
        // Check that job.getOrigHarvestDefinitionID() refers to
        // existing harvestdefinition
        Long harvestId = job.getOrigHarvestDefinitionID();
        if (!HarvestDefinitionDAO.getInstance().exists(harvestId)) {
            throw new UnknownID("No harvestdefinition with ID=" + harvestId);
        }

        Connection connection = HarvestDBConnection.get();
        if (job.getJobID() != null) {
            log.warn("The jobId for the job is already set. " + "This should probably never happen.");
        } else {
            job.setJobID(generateNextID(connection));
        }

        if (job.getCreationDate() != null) {
            log.warn("The creation time for the job is already set. " + "This should probably never happen.");
        } else {
            job.setCreationDate(new Date());
        }

        log.debug("Creating " + job.toString());

        PreparedStatement statement = null;
        try {
            connection.setAutoCommit(false);
            statement = connection
                    .prepareStatement("INSERT INTO jobs " + "(job_id, harvest_id, status, channel, forcemaxcount, "
                            + "forcemaxbytes, forcemaxrunningtime, orderxml, " + "orderxmldoc, seedlist, "
                            + "harvest_num, startdate, enddate, submitteddate, creationdate, "
                            + "num_configs, edition, resubmitted_as_job, harvestname_prefix, snapshot) "
                            + "VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?," + "?, ?, ?, ?, ?, ?)");

            statement.setLong(1, job.getJobID());
            statement.setLong(2, job.getOrigHarvestDefinitionID());
            statement.setInt(3, job.getStatus().ordinal());
            statement.setString(4, job.getChannel());
            statement.setLong(5, job.getForceMaxObjectsPerDomain());
            statement.setLong(6, job.getMaxBytesPerDomain());
            statement.setLong(7, job.getMaxJobRunningTime());
            DBUtils.setStringMaxLength(statement, 8, job.getOrderXMLName(), Constants.MAX_NAME_SIZE, job,
                    "order.xml name");
            final String orderreader = job.getOrderXMLdoc().asXML();
            DBUtils.setClobMaxLength(statement, 9, orderreader, Constants.MAX_ORDERXML_SIZE, job, "order.xml");
            DBUtils.setClobMaxLength(statement, 10, job.getSeedListAsString(),
                    Constants.MAX_COMBINED_SEED_LIST_SIZE, job, "seedlist");
            statement.setInt(11, job.getHarvestNum());
            DBUtils.setDateMaybeNull(statement, 12, job.getActualStart());
            DBUtils.setDateMaybeNull(statement, 13, job.getActualStop());
            DBUtils.setDateMaybeNull(statement, 14, job.getSubmittedDate());
            DBUtils.setDateMaybeNull(statement, 15, job.getCreationDate());

            // The size of the configuration map == number of configurations
            statement.setInt(16, job.getDomainConfigurationMap().size());
            long initialEdition = 1;
            statement.setLong(17, initialEdition);
            DBUtils.setLongMaybeNull(statement, 18, job.getResubmittedAsJob());
            statement.setString(19, job.getHarvestFilenamePrefix());
            statement.setBoolean(20, job.isSnapshot());
            statement.executeUpdate();
            createJobConfigsEntries(connection, job);
            connection.commit();
            job.setEdition(initialEdition);
        } catch (SQLException e) {
            String message = "SQL error creating job " + job + " in database" + "\n"
                    + ExceptionUtils.getSQLExceptionCause(e);
            log.warn(message, e);
            throw new IOFailure(message, e);
        } finally {
            DBUtils.rollbackIfNeeded(connection, "create job", job);
            HarvestDBConnection.release(connection);
        }
    }

    /** Create the entries in the job_configs table for this job.
     * Since some jobs have up to 10000 configs, this must be optimized.
     * The entries are only created, if job.configsChanged is true.
     *
     * @param dbconnection A connection to work on
     * @param job The job to store entries for
     * @throws SQLException If any problems occur during creation of the
     * new entries in the job_configs table.
     */
    private void createJobConfigsEntries(Connection dbconnection, Job job) throws SQLException {
        if (job.configsChanged) {
            PreparedStatement statement = null;
            String tmpTable = null;
            Long jobID = job.getJobID();
            try {
                statement = dbconnection.prepareStatement("DELETE FROM job_configs WHERE job_id = ?");
                statement.setLong(1, jobID);
                statement.executeUpdate();
                statement.close();
                tmpTable = DBSpecifics.getInstance().getJobConfigsTmpTable(dbconnection);
                final Map<String, String> domainConfigurationMap = job.getDomainConfigurationMap();
                statement = dbconnection.prepareStatement(
                        "INSERT INTO " + tmpTable + " ( domain_name, config_name ) " + " VALUES ( ?, ?)");
                for (Map.Entry<String, String> entry : domainConfigurationMap.entrySet()) {
                    statement.setString(1, entry.getKey());
                    statement.setString(2, entry.getValue());
                    statement.executeUpdate();
                    statement.clearParameters();
                }
                statement.close();
                // Now we have a temp table with all the domains and configs
                statement = dbconnection.prepareStatement("INSERT INTO job_configs " + "( job_id, config_id ) "
                        + "SELECT ?, configurations.config_id " + "  FROM domains, configurations, " + tmpTable
                        + " WHERE domains.name = " + tmpTable + ".domain_name"
                        + "   AND domains.domain_id = configurations.domain_id" + "   AND configurations.name = "
                        + tmpTable + ".config_name");
                statement.setLong(1, jobID);
                int rows = statement.executeUpdate();
                if (rows != domainConfigurationMap.size()) {
                    log.debug("Domain or configuration in table for " + job + " missing: Should have "
                            + domainConfigurationMap.size() + ", got " + rows);
                }
                dbconnection.commit();
            } finally {
                if (tmpTable != null) {
                    DBSpecifics.getInstance().dropJobConfigsTmpTable(dbconnection, tmpTable);
                }
                job.configsChanged = false;
            }
        }
    }

    /** Check whether a particular job exists.
     *
     * @param jobID Id of the job.
     * @return true if the job exists in any state.
     */
    @Override
    public boolean exists(Long jobID) {
        ArgumentNotValid.checkNotNull(jobID, "Long jobID");

        Connection c = HarvestDBConnection.get();
        try {
            return exists(c, jobID);
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    /** Check whether a particular job exists.
    *
    * @param c an open connection to the harvestDatabase
    * @param jobID Id of the job.
    * @return true if the job exists in any state.
    */
    private boolean exists(Connection c, Long jobID) {
        return 1 == DBUtils.selectLongValue(c, "SELECT COUNT(*) FROM jobs WHERE job_id = ?", jobID);
    }

    /**
     * Generates the next id of job.
     * @param c an open connection to the harvestDatabase
     * @return id
     */
    private Long generateNextID(Connection c) {
        // Set to zero original, can be set after admin machine breakdown,
        // and the use this as the point of reference.
        Long restoreId = Settings.getLong(Constants.NEXT_JOB_ID);

        Long maxVal = DBUtils.selectLongValue(c, "SELECT MAX(job_id) FROM jobs");
        if (maxVal == null) {
            maxVal = 0L;
        }
        // return the largest number of the two numbers: the NEXT_JOB_ID
        // declared in settings and max value of job_id used
        // in the jobs table.
        return ((restoreId > maxVal) ? restoreId : maxVal + 1L);
    }

    /**
     * Update a Job in persistent storage.
     *
     * @param job The Job to update
     * @throws ArgumentNotValid If the Job is null
     * @throws UnknownID If the Job doesn't exist in the DAO
     * @throws IOFailure If writing the job to persistent storage fails
     * @throws PermissionDenied If the job has been updated behind our backs
     */
    @Override
    public synchronized void update(Job job) {
        ArgumentNotValid.checkNotNull(job, "job");

        Connection connection = HarvestDBConnection.get();
        // Not done as a transaction as it's awfully big.
        // TODO Make sure that a failed job update does... what?
        PreparedStatement statement = null;
        try {
            final Long jobID = job.getJobID();
            if (!exists(connection, jobID)) {
                throw new UnknownID("Job id " + jobID + " is not known in persistent storage");
            }

            connection.setAutoCommit(false);
            statement = connection.prepareStatement("UPDATE jobs SET " + "harvest_id = ?, status = ?, channel = ?, "
                    + "forcemaxcount = ?, forcemaxbytes = ?, " + "forcemaxrunningtime = ?," + "orderxml = ?, "
                    + "orderxmldoc = ?, seedlist = ?, " + "harvest_num = ?, harvest_errors = ?, "
                    + "harvest_error_details = ?, upload_errors = ?, " + "upload_error_details = ?, startdate = ?,"
                    + "enddate = ?, num_configs = ?, edition = ?, " + "submitteddate = ?, creationdate = ?, "
                    + "resubmitted_as_job = ?, harvestname_prefix = ?," + "snapshot = ?"
                    + " WHERE job_id = ? AND edition = ?");
            statement.setLong(1, job.getOrigHarvestDefinitionID());
            statement.setInt(2, job.getStatus().ordinal());
            statement.setString(3, job.getChannel());
            statement.setLong(4, job.getForceMaxObjectsPerDomain());
            statement.setLong(5, job.getMaxBytesPerDomain());
            statement.setLong(6, job.getMaxJobRunningTime());
            DBUtils.setStringMaxLength(statement, 7, job.getOrderXMLName(), Constants.MAX_NAME_SIZE, job,
                    "order.xml name");
            final String orderreader = job.getOrderXMLdoc().asXML();
            DBUtils.setClobMaxLength(statement, 8, orderreader, Constants.MAX_ORDERXML_SIZE, job, "order.xml");
            DBUtils.setClobMaxLength(statement, 9, job.getSeedListAsString(), Constants.MAX_COMBINED_SEED_LIST_SIZE,
                    job, "seedlist");
            statement.setInt(10, job.getHarvestNum()); // Not in job yet
            DBUtils.setStringMaxLength(statement, 11, job.getHarvestErrors(), Constants.MAX_ERROR_SIZE, job,
                    "harvest_error");
            DBUtils.setStringMaxLength(statement, 12, job.getHarvestErrorDetails(), Constants.MAX_ERROR_DETAIL_SIZE,
                    job, "harvest_error_details");
            DBUtils.setStringMaxLength(statement, 13, job.getUploadErrors(), Constants.MAX_ERROR_SIZE, job,
                    "upload_error");
            DBUtils.setStringMaxLength(statement, 14, job.getUploadErrorDetails(), Constants.MAX_ERROR_DETAIL_SIZE,
                    job, "upload_error_details");
            long edition = job.getEdition() + 1;
            DBUtils.setDateMaybeNull(statement, 15, job.getActualStart());
            DBUtils.setDateMaybeNull(statement, 16, job.getActualStop());
            statement.setInt(17, job.getDomainConfigurationMap().size());
            statement.setLong(18, edition);
            DBUtils.setDateMaybeNull(statement, 19, job.getSubmittedDate());
            DBUtils.setDateMaybeNull(statement, 20, job.getCreationDate());
            DBUtils.setLongMaybeNull(statement, 21, job.getResubmittedAsJob());
            statement.setString(22, job.getHarvestFilenamePrefix());
            statement.setBoolean(23, job.isSnapshot());
            statement.setLong(24, job.getJobID());
            statement.setLong(25, job.getEdition());
            final int rows = statement.executeUpdate();
            if (rows == 0) {
                String message = "Edition " + job.getEdition() + " has expired, not updating";
                log.debug(message);
                throw new PermissionDenied(message);
            }
            createJobConfigsEntries(connection, job);
            connection.commit();
            job.setEdition(edition);
        } catch (SQLException e) {
            String message = "SQL error updating job " + job + " in database" + "\n"
                    + ExceptionUtils.getSQLExceptionCause(e);
            log.warn(message, e);
            throw new IOFailure(message, e);
        } finally {
            DBUtils.rollbackIfNeeded(connection, "update job", job);
            HarvestDBConnection.release(connection);
        }
    }

    /** Read a single job from the job database.
    *
    * @param jobID ID of the job.
    * @return A Job object
    * @throws UnknownID if the job id does not exist.
    * @throws IOFailure if there was some problem talking to the database.
    */
    @Override
    public Job read(Long jobID) {
        ArgumentNotValid.checkNotNull(jobID, "jobID");
        Connection connection = HarvestDBConnection.get();
        try {
            return read(connection, jobID);
        } finally {
            HarvestDBConnection.release(connection);
        }
    }

    /** Read a single job from the job database.
     *
     * @param jobID ID of the job.
     * @param connection an open connection to the harvestDatabase
     * @return A Job object
     * @throws UnknownID if the job id does not exist.
     * @throws IOFailure if there was some problem talking to the database.
     */
    private synchronized Job read(Connection connection, Long jobID) {
        if (!exists(connection, jobID)) {
            throw new UnknownID("Job id " + jobID + " is not known in persistent storage");
        }
        PreparedStatement statement = null;
        try {
            statement = connection.prepareStatement("SELECT " + "harvest_id, status, channel, "
                    + "forcemaxcount, forcemaxbytes, " + "forcemaxrunningtime, orderxml, "
                    + "orderxmldoc, seedlist, harvest_num," + "harvest_errors, harvest_error_details, "
                    + "upload_errors, upload_error_details, " + "startdate, enddate, submitteddate, creationdate, "
                    + "edition, resubmitted_as_job, continuationof, harvestname_prefix, snapshot "
                    + "FROM jobs WHERE job_id = ?");
            statement.setLong(1, jobID);
            ResultSet result = statement.executeQuery();
            result.next();
            long harvestID = result.getLong(1);
            JobStatus status = JobStatus.fromOrdinal(result.getInt(2));
            String channel = result.getString(3);
            long forceMaxCount = result.getLong(4);
            long forceMaxBytes = result.getLong(5);
            long forceMaxRunningTime = result.getLong(6);
            String orderxml = result.getString(7);

            Document orderXMLdoc = null;

            boolean useClobs = DBSpecifics.getInstance().supportsClob();
            if (useClobs) {
                Clob clob = result.getClob(8);
                orderXMLdoc = getOrderXMLdocFromClob(clob);
            } else {
                orderXMLdoc = XmlUtils.documentFromString(result.getString(8));
            }
            String seedlist = "";
            if (useClobs) {
                Clob clob = result.getClob(9);
                seedlist = clob.getSubString(1, (int) clob.length());
            } else {
                seedlist = result.getString(9);
            }

            int harvestNum = result.getInt(10);
            String harvestErrors = result.getString(11);
            String harvestErrorDetails = result.getString(12);
            String uploadErrors = result.getString(13);
            String uploadErrorDetails = result.getString(14);
            Date startdate = DBUtils.getDateMaybeNull(result, 15);
            Date stopdate = DBUtils.getDateMaybeNull(result, 16);
            Date submittedDate = DBUtils.getDateMaybeNull(result, 17);
            Date creationDate = DBUtils.getDateMaybeNull(result, 18);
            Long edition = result.getLong(19);
            Long resubmittedAsJob = DBUtils.getLongMaybeNull(result, 20);
            Long continuationOfJob = DBUtils.getLongMaybeNull(result, 21);
            String harvestnamePrefix = result.getString(22);
            boolean snapshot = result.getBoolean(23);
            statement.close();
            // IDs should match up in a natural join
            // The following if-block is an attempt to fix Bug 1856, an
            // unexplained derby deadlock, by making this statement a dirty
            // read.
            String domainStatement = "SELECT domains.name, configurations.name "
                    + "FROM domains, configurations, job_configs " + "WHERE job_configs.job_id = ?"
                    + "  AND job_configs.config_id = configurations.config_id"
                    + "  AND domains.domain_id = configurations.domain_id";
            if (Settings.get(CommonSettings.DB_SPECIFICS_CLASS).contains(CommonSettings.DB_IS_DERBY_IF_CONTAINS)) {
                statement = connection.prepareStatement(domainStatement + " WITH UR");
            } else {
                statement = connection.prepareStatement(domainStatement);
            }
            statement.setLong(1, jobID);
            result = statement.executeQuery();
            Map<String, String> configurationMap = new HashMap<String, String>();
            while (result.next()) {
                String domainName = result.getString(1);
                String configName = result.getString(2);
                configurationMap.put(domainName, configName);
            }
            final Job job = new Job(harvestID, configurationMap, channel, snapshot, forceMaxCount, forceMaxBytes,
                    forceMaxRunningTime, status, orderxml, orderXMLdoc, seedlist, harvestNum, continuationOfJob);
            job.appendHarvestErrors(harvestErrors);
            job.appendHarvestErrorDetails(harvestErrorDetails);
            job.appendUploadErrors(uploadErrors);
            job.appendUploadErrorDetails(uploadErrorDetails);
            if (startdate != null) {
                job.setActualStart(startdate);
            }
            if (stopdate != null) {
                job.setActualStop(stopdate);
            }

            if (submittedDate != null) {
                job.setSubmittedDate(submittedDate);
            }

            if (creationDate != null) {
                job.setCreationDate(creationDate);
            }

            job.configsChanged = false;
            job.setJobID(jobID);
            job.setEdition(edition);

            if (resubmittedAsJob != null) {
                job.setResubmittedAsJob(resubmittedAsJob);
            }
            if (harvestnamePrefix == null) {
                job.setDefaultHarvestNamePrefix();
            } else {
                job.setHarvestFilenamePrefix(harvestnamePrefix);
            }

            return job;
        } catch (SQLException e) {
            String message = "SQL error reading job " + jobID + " in database" + "\n"
                    + ExceptionUtils.getSQLExceptionCause(e);
            log.warn(message, e);
            throw new IOFailure(message, e);
        } catch (DocumentException e) {
            String message = "XML error reading job " + jobID + " in database";
            log.warn(message, e);
            throw new IOFailure(message, e);
        }
    }

    /** Try to extract an orderxmldoc from a given Clob.
     * This method is used by the read() method, which catches the
     * thrown DocumentException.
     * @param clob a given Clob returned from the database
     * @return a Document object based on the data in the Clob
     * @throws SQLException If data from the clob cannot be fetched.
     * @throws DocumentException If unable to create a Document object based on
     * the data in the Clob
     */
    private Document getOrderXMLdocFromClob(Clob clob) throws SQLException, DocumentException {
        Document doc;
        try {
            SAXReader reader = new SAXReader();
            doc = reader.read(clob.getCharacterStream());
        } catch (DocumentException e) {
            log.warn("Failed to read the contents of the clob as XML:" + clob.getSubString(1, (int) clob.length()));
            throw e;
        }
        return doc;
    }

    /**
     * Return a list of all jobs with the given status, ordered by id.
     *
     * @param status A given status.
     * @return A list of all job with given status
     */
    @Override
    public synchronized Iterator<Job> getAll(JobStatus status) {
        ArgumentNotValid.checkNotNull(status, "JobStatus status");

        Connection c = HarvestDBConnection.get();
        try {
            List<Long> idList = DBUtils.selectLongList(c,
                    "SELECT job_id FROM jobs WHERE status = ? " + "ORDER BY job_id", status.ordinal());
            List<Job> orderedJobs = new LinkedList<Job>();
            for (Long jobId : idList) {
                orderedJobs.add(read(c, jobId));
            }
            return orderedJobs.iterator();
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    /**
     * Return a list of all job_id's representing jobs with the given status.
     *
     * @param status A given status.
     * @return A list of all job_id's representing jobs with given status
     * @throws ArgumentNotValid If the given status is not one of the
     *                          five valid statuses specified in Job.
     */
    @Override
    public Iterator<Long> getAllJobIds(JobStatus status) {
        ArgumentNotValid.checkNotNull(status, "JobStatus status");

        Connection c = HarvestDBConnection.get();
        try {
            List<Long> idList = DBUtils.selectLongList(c,
                    "SELECT job_id FROM jobs WHERE status = ? " + "ORDER BY job_id", status.ordinal());
            return idList.iterator();
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    @Override
    public Iterator<Long> getAllJobIds(JobStatus status, HarvestChannel channel) {
        ArgumentNotValid.checkNotNull(status, "JobStatus status");
        ArgumentNotValid.checkNotNull(channel, "Channel");

        Connection c = HarvestDBConnection.get();
        try {
            List<Long> idList = DBUtils.selectLongList(c,
                    "SELECT job_id FROM jobs WHERE status = ? AND channel = ? " + "ORDER BY job_id",
                    status.ordinal(), channel.getName());
            return idList.iterator();
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    /**
     * Return a list of all jobs.
     *
     * @return A list of all jobs
     */
    @Override
    public synchronized Iterator<Job> getAll() {
        Connection c = HarvestDBConnection.get();
        try {
            List<Long> idList = DBUtils.selectLongList(c, "SELECT job_id FROM jobs ORDER BY job_id");
            List<Job> orderedJobs = new LinkedList<Job>();
            for (Long jobId : idList) {
                orderedJobs.add(read(c, jobId));
            }
            return orderedJobs.iterator();
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    /**
     * Return a list of all job_ids .
     * @return A list of all job_ids
     */
    public Iterator<Long> getAllJobIds() {
        Connection c = HarvestDBConnection.get();
        try {
            List<Long> idList = DBUtils.selectLongList(c, "SELECT job_id FROM jobs ORDER BY job_id");
            return idList.iterator();
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    /**
     * Get a list of small and immediately usable status information
     * for given status and in given order. Is used by getStatusInfo
     * functions in order to share code (and SQL)
     * TODO should also include given harvest run
     * @param connection an open connection to the harvestDatabase
     * @param jobStatusCode code for jobstatus, -1 if all
     * @param asc true if it is to be sorted in ascending order,
     *        false if it is to be sorted in descending order
     * @return List of JobStatusInfo objects for all jobs.
     * @throws ArgumentNotValid for invalid jobStatusCode
     * @throws IOFailure on trouble getting data from database
     */
    private List<JobStatusInfo> getStatusInfo(Connection connection, int jobStatusCode, boolean asc) {
        // Validate jobStatusCode
        // Throws ArgumentNotValid if it is an invalid job status
        if (jobStatusCode != JobStatus.ALL_STATUS_CODE) {
            JobStatus.fromOrdinal(jobStatusCode);
        }

        StringBuffer sqlBuffer = new StringBuffer("SELECT jobs.job_id, status, jobs.harvest_id, "
                + "harvestdefinitions.name, harvest_num, harvest_errors,"
                + " upload_errors, orderxml, num_configs, submitteddate, creationdate, startdate,"
                + " enddate, resubmitted_as_job" + " FROM jobs, harvestdefinitions "
                + " WHERE harvestdefinitions.harvest_id = jobs.harvest_id ");

        if (jobStatusCode != JobStatus.ALL_STATUS_CODE) {
            sqlBuffer.append(" AND status = ").append(jobStatusCode);
        }
        sqlBuffer.append(" ORDER BY jobs.job_id");
        if (!asc) { // Assume default is ASCENDING
            sqlBuffer.append(" " + HarvestStatusQuery.SORT_ORDER.DESC.name());
        }

        PreparedStatement statement = null;
        try {
            statement = connection.prepareStatement(sqlBuffer.toString());
            ResultSet res = statement.executeQuery();
            return makeJobStatusInfoListFromResultset(res);
        } catch (SQLException e) {
            String message = "SQL error asking for job status list in database" + "\n"
                    + ExceptionUtils.getSQLExceptionCause(e);
            log.warn(message, e);
            throw new IOFailure(message, e);
        }
    }

    /**
     * Get a list of small and immediately usable status information for given
     * job status.
     *
     * @param status The status asked for.
     * @return List of JobStatusInfo objects for all jobs with given job status
     * @throws ArgumentNotValid for invalid jobStatus
     * @throws IOFailure on trouble getting data from database
     */
    @Override
    public List<JobStatusInfo> getStatusInfo(JobStatus status) {
        ArgumentNotValid.checkNotNull(status, "status");
        Connection c = HarvestDBConnection.get();
        try {
            return getStatusInfo(c, status.ordinal(), true);
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    /**
     * Get a list of small and immediately usable status information for given
     * job status and in given job id order.
     *
     * @param query the user query
     * @throws IOFailure on trouble getting data from database
     */
    @Override
    public HarvestStatus getStatusInfo(HarvestStatusQuery query) {
        log.debug("Constructing Harveststatus based on given query.");
        PreparedStatement s = null;
        Connection c = HarvestDBConnection.get();

        try {
            // Obtain total count without limit
            // NB this will be a performance bottleneck if the table gets big
            long totalRowsCount = 0;

            s = buildSqlQuery(query, true).getPopulatedStatement(c);
            ResultSet res = s.executeQuery();
            res.next();
            totalRowsCount = res.getLong(1);

            s = buildSqlQuery(query, false).getPopulatedStatement(c);
            res = s.executeQuery();
            List<JobStatusInfo> jobs = makeJobStatusInfoListFromResultset(res);

            if (log.isDebugEnabled()) {
                log.debug("Harveststatus constructed based on given query.");
            }
            return new HarvestStatus(totalRowsCount, jobs);

        } catch (SQLException e) {
            String message = "SQL error asking for job status list in database" + "\n"
                    + ExceptionUtils.getSQLExceptionCause(e);
            log.warn(message, e);
            throw new IOFailure(message, e);
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    /**
     * Calculate all jobIDs to use for duplication reduction.
     *
     * More precisely, this method calculates the following: If the job ID
     * corresponds to a partial harvest, all jobIDs from the previous scheduled
     * harvest are returned, or the empty list if this harvest hasn't been
     * scheduled before.
     *
     * If the job ID corresponds to a full harvest, the entire chain of harvests
     * this is based on is returned, and all jobIDs from the previous chain of
     * full harvests is returned.
     *
     * This method is synchronized to avoid DB locking.
     *
     * @param jobID The job ID to find duplicate reduction data for.
     * @return A list of job IDs (possibly empty) of potential previous harvests
     *         of this job, to use for duplicate reduction.
     * @throws UnknownID if job ID is unknown
     * @throws IOFailure on trouble querying database
     */
    public synchronized List<Long> getJobIDsForDuplicateReduction(long jobID) throws UnknownID {

        Connection connection = HarvestDBConnection.get();
        List<Long> jobs;
        //Select the previous harvest from the same harvestdefinition
        try {
            if (!exists(connection, jobID)) {
                throw new UnknownID("Job ID '" + jobID + "' does not exist in database");
            }

            jobs = DBUtils.selectLongList(connection,
                    "SELECT jobs.job_id FROM jobs, jobs AS original_jobs" + " WHERE original_jobs.job_id=?"
                            + " AND jobs.harvest_id=original_jobs.harvest_id"
                            + " AND jobs.harvest_num=original_jobs.harvest_num-1",
                    jobID);
            List<Long> harvestDefinitions = getPreviousFullHarvests(connection, jobID);
            if (!harvestDefinitions.isEmpty()) {
                //Select all jobs from a given list of harvest definitions
                jobs.addAll(DBUtils.selectLongList(connection, "SELECT jobs.job_id FROM jobs"
                        + " WHERE jobs.harvest_id IN (" + StringUtils.conjoin(",", harvestDefinitions) + ")"));
            }
            return jobs;
        } finally {
            HarvestDBConnection.release(connection);
        }
    }

    /**
     * Find the harvest definition ids from this chain of snapshot harvests and
     * the previous chain of snapshot harvests.
     * @param connection an open connection to the harvestDatabase
     * @param jobID The ID of the job
     * @return A (possibly empty) list of harvest definition ids
     */
    private List<Long> getPreviousFullHarvests(Connection connection, long jobID) {
        List<Long> results = new ArrayList<Long>();
        //Find the jobs' fullharvest id
        Long thisHarvest = DBUtils.selectFirstLongValueIfAny(connection,
                "SELECT jobs.harvest_id FROM jobs, fullharvests" + " WHERE jobs.harvest_id=fullharvests.harvest_id"
                        + " AND jobs.job_id=?",
                jobID);

        if (thisHarvest == null) {
            //Not a full harvest
            return results;
        }

        //Follow the chain of orginating IDs back
        for (Long originatingHarvest = thisHarvest; originatingHarvest != null; originatingHarvest = DBUtils
                .selectFirstLongValueIfAny(connection,
                        "SELECT previoushd FROM fullharvests" + " WHERE fullharvests.harvest_id=?",
                        originatingHarvest)) {
            if (!originatingHarvest.equals(thisHarvest)) {
                results.add(originatingHarvest);
            }
        }

        //Find the first harvest in the chain
        Long firstHarvest = thisHarvest;
        if (!results.isEmpty()) {
            firstHarvest = results.get(results.size() - 1);
        }

        //Find the last harvest in the chain before
        Long olderHarvest = DBUtils.selectFirstLongValueIfAny(connection,
                "SELECT fullharvests.harvest_id" + " FROM fullharvests, harvestdefinitions,"
                        + "  harvestdefinitions AS currenthd" + " WHERE currenthd.harvest_id=?"
                        + " AND fullharvests.harvest_id" + "=harvestdefinitions.harvest_id"
                        + " AND harvestdefinitions.submitted<currenthd.submitted"
                        + " ORDER BY harvestdefinitions.submitted " + HarvestStatusQuery.SORT_ORDER.DESC.name(),
                firstHarvest);
        //Follow the chain of orginating IDs back
        for (Long originatingHarvest = olderHarvest; originatingHarvest != null; originatingHarvest = DBUtils
                .selectFirstLongValueIfAny(connection,
                        "SELECT previoushd FROM fullharvests" + " WHERE fullharvests.harvest_id=?",
                        originatingHarvest)) {
            results.add(originatingHarvest);
        }
        return results;
    }

    /**
     * Returns the number of existing jobs.
     *
     * @return Number of jobs in 'jobs' table
     */
    @Override
    public int getCountJobs() {
        Connection c = HarvestDBConnection.get();
        try {
            return DBUtils.selectIntValue(c, "SELECT COUNT(*) FROM jobs");
        } finally {
            HarvestDBConnection.release(c);
        }
    }

    @Override
    public synchronized long rescheduleJob(long oldJobID) {
        Connection connection = HarvestDBConnection.get();
        long newJobID = generateNextID(connection);
        PreparedStatement statement = null;
        try {
            statement = connection.prepareStatement("SELECT status FROM jobs WHERE job_id = ?");
            statement.setLong(1, oldJobID);
            ResultSet res = statement.executeQuery();
            if (!res.next()) {
                throw new UnknownID("No job with ID " + oldJobID + " to resubmit");
            }
            final JobStatus currentJobStatus = JobStatus.fromOrdinal(res.getInt(1));
            if (currentJobStatus != JobStatus.SUBMITTED && currentJobStatus != JobStatus.FAILED) {
                throw new IllegalState("Job " + oldJobID + " is not ready to be copied.");
            }

            // Now do the actual copying.
            // Note that startdate, and enddate is not copied.
            // They must be null in JobStatus NEW.
            statement.close();
            connection.setAutoCommit(false);

            statement = connection.prepareStatement("INSERT INTO jobs "
                    + " (job_id, harvest_id, channel, snapshot, status,"
                    + "  forcemaxcount, forcemaxbytes, orderxml," + "  orderxmldoc, seedlist, harvest_num,"
                    + "  num_configs, edition, continuationof) " + " SELECT ?, harvest_id, channel, snapshot, ?,"
                    + "  forcemaxcount, forcemaxbytes, orderxml," + "  orderxmldoc, seedlist, harvest_num,"
                    + " num_configs, ?, ?" + " FROM jobs WHERE job_id = ?");
            statement.setLong(1, newJobID);
            statement.setLong(2, JobStatus.NEW.ordinal());
            long initialEdition = 1;
            statement.setLong(3, initialEdition);
            Long continuationOf = null;
            // In case we want to try to continue using the Heritrix recover log
            if (currentJobStatus == JobStatus.FAILED) {
                continuationOf = oldJobID;
            }
            DBUtils.setLongMaybeNull(statement, 4, continuationOf);

            statement.setLong(5, oldJobID);

            statement.executeUpdate();
            statement.close();
            statement = connection.prepareStatement("INSERT INTO job_configs " + "( job_id, config_id ) "
                    + "SELECT ?, config_id " + "  FROM job_configs" + " WHERE job_id = ?");
            statement.setLong(1, newJobID);
            statement.setLong(2, oldJobID);
            statement.executeUpdate();
            statement.close();
            statement = connection
                    .prepareStatement("UPDATE jobs SET status = ?, resubmitted_as_job = ? " + " WHERE job_id = ?");
            statement.setInt(1, JobStatus.RESUBMITTED.ordinal());
            statement.setLong(2, newJobID);
            statement.setLong(3, oldJobID);
            statement.executeUpdate();
            connection.commit();
        } catch (SQLException e) {
            String message = "SQL error rescheduling job #" + oldJobID + " in database" + "\n"
                    + ExceptionUtils.getSQLExceptionCause(e);
            log.warn(message, e);
            throw new IOFailure(message, e);
        } finally {
            DBUtils.closeStatementIfOpen(statement);
            DBUtils.rollbackIfNeeded(connection, "resubmit job", oldJobID);
            HarvestDBConnection.release(connection);
        }
        log.info("Job # " + oldJobID + " successfully as job # " + newJobID);
        return newJobID;
    }

    /** Helper-method that constructs a list of JobStatusInfo objects
     * from the given resultset.
     * @param res a given resultset
     * @return a list of JobStatusInfo objects
     * @throws SQLException If any problem with accessing the data in
     * the ResultSet
     */
    private List<JobStatusInfo> makeJobStatusInfoListFromResultset(ResultSet res) throws SQLException {
        List<JobStatusInfo> joblist = new ArrayList<JobStatusInfo>();
        while (res.next()) {
            final long jobId = res.getLong(1);
            joblist.add(
                    new JobStatusInfo(jobId, JobStatus.fromOrdinal(res.getInt(2)), res.getLong(3), res.getString(4),
                            res.getInt(5), res.getString(6), res.getString(7), res.getString(8), res.getInt(9),

                            DBUtils.getDateMaybeNull(res, 10), DBUtils.getDateMaybeNull(res, 11),
                            DBUtils.getDateMaybeNull(res, 12), DBUtils.getDateMaybeNull(res, 13),
                            DBUtils.getLongMaybeNull(res, 14)));
        }
        return joblist;
    }

    /**
      * Internal utility class to build a SQL query using a prepared statement.
      */
    private class HarvestStatusQueryBuilder {
        /** The sql string. */
        private String sqlString;
        // from java.sql.Types
        /** list of parameter classes. */
        private LinkedList<Class<?>> paramClasses = new LinkedList<Class<?>>();
        /** list of parameter values. */
        private LinkedList<Object> paramValues = new LinkedList<Object>();

        /**
         * Constructor.
         */
        HarvestStatusQueryBuilder() {
            super();
        }

        /**
         * @param sqlString the sqlString to set
         */
        void setSqlString(String sqlString) {
            this.sqlString = sqlString;
        }

        /**
         * Add the given class and given value to the list of paramClasses and 
         * paramValues respectively.
         * @param clazz a given class.
         * @param value a given value
         */
        void addParameter(Class<?> clazz, Object value) {
            paramClasses.addLast(clazz);
            paramValues.addLast(value);
        }

        /**
         * Prepare a statement for the database that uses
         * the sqlString, and the paramClasses, and paramValues.
         * Only Integer, Long, String, and Date values accepted.
         * @param c an Open connection to the harvestDatabase
         * @return the prepared statement 
         * @throws SQLException If unable to prepare the statement
         * @throws UnknownID If one of the parameter classes is unexpected 
         */
        PreparedStatement getPopulatedStatement(Connection c) throws SQLException {
            PreparedStatement stm = c.prepareStatement(sqlString);

            Iterator<Class<?>> pClasses = paramClasses.iterator();
            Iterator<Object> pValues = paramValues.iterator();
            int pIndex = 0;
            while (pClasses.hasNext()) {
                pIndex++;
                Class<?> pClass = pClasses.next();
                Object pVal = pValues.next();

                if (Integer.class.equals(pClass)) {
                    stm.setInt(pIndex, (Integer) pVal);
                } else if (Long.class.equals(pClass)) {
                    stm.setLong(pIndex, (Long) pVal);
                } else if (String.class.equals(pClass)) {
                    stm.setString(pIndex, (String) pVal);
                } else if (java.sql.Date.class.equals(pClass)) {
                    stm.setDate(pIndex, (java.sql.Date) pVal);
                } else {
                    throw new UnknownID("Unexpected parameter class " + pClass);
                }
            }
            return stm;
        }

    }

    /**
     * Builds a query to fetch jobs according to selection criteria.
     * @param query the selection criteria.
     * @param count build a count query instead of selecting columns.
     * @return the proper SQL query.
     */
    private HarvestStatusQueryBuilder buildSqlQuery(HarvestStatusQuery query, boolean count) {

        HarvestStatusQueryBuilder sq = new HarvestStatusQueryBuilder();
        StringBuffer sql = new StringBuffer("SELECT");
        if (count) {
            sql.append(" count(*)");
        } else {
            sql.append(" jobs.job_id, status, jobs.harvest_id,");
            sql.append(" harvestdefinitions.name, harvest_num,");
            sql.append(" harvest_errors, upload_errors, orderxml,");
            sql.append(" num_configs, submitteddate, creationdate, startdate, enddate,");
            sql.append(" resubmitted_as_job");
        }

        sql.append(" FROM jobs, harvestdefinitions ");
        sql.append(" WHERE harvestdefinitions.harvest_id = jobs.harvest_id ");

        JobStatus[] jobStatuses = query.getSelectedJobStatuses();
        if (jobStatuses.length > 0) {
            if (jobStatuses.length == 1) {
                int statusOrdinal = jobStatuses[0].ordinal();
                sql.append(" AND status = ?");
                sq.addParameter(Integer.class, statusOrdinal);
            } else {
                sql.append("AND (status = ");
                sql.append(jobStatuses[0].ordinal());
                for (int i = 1; i < jobStatuses.length; i++) {
                    sql.append(" OR status = ?");
                    sq.addParameter(Integer.class, jobStatuses[i].ordinal());
                }
                sql.append(")");
            }
        }

        String harvestName = query.getHarvestName();
        boolean caseSensitiveHarvestName = query.getCaseSensitiveHarvestName();
        if (!harvestName.isEmpty()) {
            if (caseSensitiveHarvestName) {
                if (harvestName.indexOf(HarvestStatusQuery.HARVEST_NAME_WILDCARD) == -1) {
                    // No wildcard, exact match
                    sql.append(" AND harvestdefinitions.name = ?");
                    sq.addParameter(String.class, harvestName);
                } else {
                    String harvestNamePattern = harvestName.replaceAll("\\*", "%");
                    sql.append(" AND harvestdefinitions.name LIKE ?");
                    sq.addParameter(String.class, harvestNamePattern);
                }
            } else {
                harvestName = harvestName.toUpperCase();
                if (harvestName.indexOf(HarvestStatusQuery.HARVEST_NAME_WILDCARD) == -1) {
                    // No wildcard, exact match
                    sql.append(" AND UPPER(harvestdefinitions.name) = ?");
                    sq.addParameter(String.class, harvestName);
                } else {
                    String harvestNamePattern = harvestName.replaceAll("\\*", "%");
                    sql.append(" AND UPPER(harvestdefinitions.name) " + " LIKE ?");
                    sq.addParameter(String.class, harvestNamePattern);
                }
            }
        }

        Long harvestRun = query.getHarvestRunNumber();
        if (harvestRun != null) {
            sql.append(" AND jobs.harvest_num = ?");
            sq.addParameter(Long.class, harvestRun);
        }

        Long harvestId = query.getHarvestId();
        if (harvestId != null) {
            sql.append(" AND harvestdefinitions.harvest_id = ?");
            sq.addParameter(Long.class, harvestId);
        }

        long startDate = query.getStartDate();
        if (startDate != HarvestStatusQuery.DATE_NONE) {
            sql.append(" AND startdate >= ?");
            sq.addParameter(java.sql.Date.class, new java.sql.Date(startDate));
        }

        long endDate = query.getEndDate();
        if (endDate != HarvestStatusQuery.DATE_NONE) {
            sql.append(" AND enddate < ?");
            // end date must be set +1 day at midnight
            Calendar cal = Calendar.getInstance();
            cal.setTimeInMillis(endDate);
            cal.roll(Calendar.DAY_OF_YEAR, 1);
            sq.addParameter(java.sql.Date.class, new java.sql.Date(cal.getTimeInMillis()));
        }

        if (!count) {
            sql.append(" ORDER BY jobs.job_id");
            if (!query.isSortAscending()) {
                sql.append(" " + SORT_ORDER.DESC.name());
            } else {
                sql.append(" " + SORT_ORDER.ASC.name());
            }

            long pagesize = query.getPageSize();
            if (pagesize != HarvestStatusQuery.PAGE_SIZE_NONE) {
                sql.append(" " + DBSpecifics.getInstance().getOrderByLimitAndOffsetSubClause(pagesize,
                        (query.getStartPageIndex() - 1) * pagesize));
            }
        }

        sq.setSqlString(sql.toString());
        return sq;
    }

    /**
     * Get Jobstatus for the job with the given id.
     * @param jobID A given Jobid
     * @return the Jobstatus for the job with the given id.
     * @throws UnknownID if no job exists with id jobID
     */
    public JobStatus getJobStatus(Long jobID) {
        ArgumentNotValid.checkNotNull(jobID, "Long jobID");

        Connection c = HarvestDBConnection.get();
        try {
            Integer statusAsInteger = DBUtils.selectIntValue(c, "SELECT status FROM jobs WHERE job_id = ?", jobID);
            if (statusAsInteger == null) {
                throw new UnknownID("No known job with id=" + jobID);
            }
            return JobStatus.fromOrdinal(statusAsInteger);
        } finally {
            HarvestDBConnection.release(c);
        }
    }

}