eionet.cr.harvest.BaseHarvest.java Source code

Java tutorial

Introduction

Here is the source code for eionet.cr.harvest.BaseHarvest.java

Source

/*
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code is Content Registry 3
 *
 * The Initial Owner of the Original Code is European Environment
 * Agency. Portions created by Zero Technologies are Copyright
 * (C) European Environment Agency.  All Rights Reserved.
 *
 * Contributor(s):
 *        Jaanus Heinlaid
 */

package eionet.cr.harvest;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URLConnection;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.mail.MessagingException;
import javax.mail.internet.AddressException;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.xml.sax.SAXException;

import eionet.cr.common.Predicates;
import eionet.cr.config.GeneralConfig;
import eionet.cr.dao.DAOException;
import eionet.cr.dao.DAOFactory;
import eionet.cr.dao.HarvestDAO;
import eionet.cr.dao.HarvestMessageDAO;
import eionet.cr.dao.HarvestSourceDAO;
import eionet.cr.dao.HelperDAO;
import eionet.cr.dao.PostHarvestScriptDAO;
import eionet.cr.dto.HarvestDTO;
import eionet.cr.dto.HarvestMessageDTO;
import eionet.cr.dto.HarvestSourceDTO;
import eionet.cr.dto.ObjectDTO;
import eionet.cr.dto.PostHarvestScriptDTO;
import eionet.cr.dto.PostHarvestScriptDTO.TargetType;
import eionet.cr.dto.SubjectDTO;
import eionet.cr.harvest.load.ContentLoader;
import eionet.cr.harvest.load.FeedFormatLoader;
import eionet.cr.harvest.load.RDFFormatLoader;
import eionet.cr.harvest.util.HarvestMessageType;
import eionet.cr.harvest.util.RDFMediaTypes;
import eionet.cr.util.EMailSender;
import eionet.cr.util.FileDeletionJob;
import eionet.cr.util.Util;
import eionet.cr.util.sesame.SesameUtil;
import eionet.cr.util.sql.SingleObjectReader;
import eionet.cr.web.action.admin.postHarvest.PostHarvestScriptParser;
import eionet.cr.web.security.CRUser;

/**
 *
 * @author Jaanus Heinlaid
 */
public abstract class BaseHarvest implements Harvest {

    /** */
    private static final Logger LOGGER = Logger.getLogger(BaseHarvest.class);

    /** Default harvesting timeout (36 hours = 129600000 ms) if no last harvest duration could be detected. */
    protected static final int DEFAULT_HARVEST_TIMEOUT = 129600000;

    /** Minimum possible harvest timeout (10 min = 600000 ms). */
    protected static final int MINIMUM_HARVEST_TIMEOUT = 600000;

    /** A harvest is expected to take no more than the duration of last harvest multiplied by this constnat. */
    protected static final double HARVEST_TIMEOUT_MULTIPLIER = 1.2;

    /** Text/plain content type. */
    protected static final String CONTENT_TYPE_TEXT = "text/plain";

    /** Turtle file extension. */
    protected static final String EXT_TTL = "ttl";

    /** N3 file extension. */
    protected static final String EXT_N3 = "n3";

    /** container for redirected source DTOs. */
    protected final List<HarvestSourceDTO> redirectedHarvestSources = new ArrayList<HarvestSourceDTO>();

    /** */
    private final HelperDAO helperDAO = DAOFactory.get().getDao(HelperDAO.class);
    private final HarvestDAO harvestDAO = DAOFactory.get().getDao(HarvestDAO.class);
    private final HarvestSourceDAO harvestSourceDAO = DAOFactory.get().getDao(HarvestSourceDAO.class);
    private final HarvestMessageDAO harvestMessageDAO = DAOFactory.get().getDao(HarvestMessageDAO.class);

    /** The currently harvested URL. In case of redirections, this is the current redirected-to-URL that is baeing handled. */
    private String contextUrl;

    /** The harvest source DTO object mnatching the {@link #contextUrl}. */
    private HarvestSourceDTO contextSourceDTO;

    /** The metadata of the currently harvest source as in triple store. */
    private SubjectDTO sourceMetadata;

    /** The current harvest's ID, assigned at the harvest's start. */
    private int harvestId;

    /** List of messages collected during tha harvest and saved into the DB. */
    private List<HarvestMessageDTO> harvestMessages = new ArrayList<HarvestMessageDTO>();

    /** If true, all previously present harvest source metadata should be purged from the triple store. */
    private boolean cleanAllPreviousSourceMetadata;

    /** The number of triples stored during this harvest. This does NOT include the generated harvest source metadata! */
    private int storedTriplesCount;

    /** True if the current harvest was initiated by the user (as opposed to batch harvester in the background) . */
    protected boolean isOnDemandHarvest;

    /** The user who initiated the current harvest (if this is is an on-edmand harvest). */
    private String harvestUser;

    /** Last harvest duration in milliseconds. */
    private long lastHarvestDuration;

    /** True if a fatal error occurred during this harvest, otherwise false. */
    protected boolean isFatalErrorOccured = false;

    /** HTTP response code returned from the harvest source. */
    protected int httpResponseCode;

    /** The timeout value of this harvest. Initialized at first access to the getter. */
    private Integer timeout;

    /**
     *
     * Class constructor.
     *
     * @param contextUrl
     * @throws HarvestException
     */
    protected BaseHarvest(String contextUrl) throws HarvestException {

        changeContext(contextUrl);
    }

    /**
     *
     * @param contextSourceDTO
     */
    protected BaseHarvest(HarvestSourceDTO contextSourceDTO) {

        if (contextSourceDTO == null || StringUtils.isEmpty(contextSourceDTO.getUrl())) {
            throw new IllegalArgumentException("Context source and its URL must not be null or empty!");
        }
        this.contextSourceDTO = contextSourceDTO;
        this.contextUrl = contextSourceDTO.getUrl();

        this.lastHarvestDuration = calculateLastHarvestDuration(contextSourceDTO);
    }

    /*
     * (non-Javadoc)
     *
     * @see eionet.cr.harvest.Harvest#execute()
     */
    @Override
    public final void execute() throws HarvestException {

        startHarvest();

        boolean wasHarvestException = false;
        try {
            doHarvest();
        } catch (HarvestException e) {
            wasHarvestException = true;
            throw e;
        } catch (RuntimeException e) {
            wasHarvestException = true;
            throw e;
        } finally {
            finishHarvest(wasHarvestException);
            afterFinish();
        }
    }

    /**
     * @throws HarvestException
     *
     */
    private void startHarvest() throws HarvestException {

        LOGGER.debug("                                                                   ");
        LOGGER.debug(loggerMsg("Starting harvest"));

        // no null checking, i.e. assuming the context source exists for sure
        int sourceId = contextSourceDTO.getSourceId();

        // fall back to default user name, if harvest user has not been set
        String user = harvestUser == null ? CRUser.APPLICATION.getUserName() : harvestUser;

        // create harvest record in the database
        try {
            harvestId = getHarvestDAO().insertStartedHarvest(sourceId, getHarvestType(), user);
            LOGGER.debug("Saved started-harvest record with id=" + harvestId);
        } catch (DAOException e) {
            throw new HarvestException(e.getMessage(), e);
        }
    }

    /**
     *
     * @throws HarvestException
     */
    protected abstract void doHarvest() throws HarvestException;

    /**
     *
     * @param dontThrowException
     * @throws HarvestException
     */
    private void finishHarvest(boolean dontThrowException) throws HarvestException {

        try {
            // run post-harvest scripts
            runPostHarvestScripts();

            // send harvest messages
            sendHarvestMessages();

            // double-check that we're not closing a harvest whose id we don't know
            if (harvestId == 0) {
                if (dontThrowException) {
                    return;
                } else {
                    throw new HarvestException("Cannot close an un-started harvest: missing harvest id");
                }
            }

            // update harvest source dto
            updateHarvestSourceFinished();

            // close harvest record, persist harvest messages
            updateHarvestAndMessagesClosed();

            // save source meta-data
            finishSourceMetadata();

            // derive new harvest sources from stored content
            deriveNewHarvestSources();

            // delete old harvests history
            housekeepOldHarvests();

            // add source into inference if it is schema source
            addIntoInferenceRule();

            // delete sources in permanent error state
            deleteErroneousSources();

        } catch (DAOException e) {

            if (dontThrowException) {
                LOGGER.error("Error when finishing harvest: ", e);
            } else {
                if (isSendNotifications()) {
                    LOGGER.debug(loggerMsg("Sending message about harvest finishing error"));
                    sendFinishingError(e);
                }
                throw new HarvestException(e.getMessage(), e);
            }
        } finally {
            LOGGER.debug(loggerMsg("Harvest finished"));
            LOGGER.debug("                                                                   ");
        }
    }

    /**
     * Called as the very last thing after {@link #finishHarvest(boolean)}. This is an abstract method that extending classes must
     * implement.
     */
    protected abstract void afterFinish();

    /**
     * Runs all post-harvest scripts relevant for this harvest.
     */
    private void runPostHarvestScripts() {

        if (getStoredTriplesCount() <= 0) {
            LOGGER.debug(loggerMsg("Ignoring post-harvest scripts, as no triples were harvested!"));
            return;
        }

        LOGGER.debug(loggerMsg("Running post-harvest scripts"));

        RepositoryConnection conn = null;
        try {
            conn = SesameUtil.getRepositoryConnection();
            conn.setAutoCommit(false);
            PostHarvestScriptDAO dao = DAOFactory.get().getDao(PostHarvestScriptDAO.class);

            int totalScriptsFound = 0;
            // run scripts meant for all sources (i.e. all-source scripts)
            List<PostHarvestScriptDTO> scripts = dao.listActive(null, null);
            totalScriptsFound += scripts.size();
            runScripts(scripts, conn);

            // run scripts meant for this source only
            scripts = dao.listActive(PostHarvestScriptDTO.TargetType.SOURCE, getContextUrl());
            totalScriptsFound += scripts.size();
            runScripts(scripts, conn);

            // run scripts meant for the types found in the freshly harvested content of this source
            SingleObjectReader<String> reader = new SingleObjectReader<String>();
            SesameUtil.executeQuery("select distinct ?type from <" + getContextUrl() + "> where {?s a ?type}",
                    reader, conn);
            List<String> distinctTypes = reader.getResultList();
            if (distinctTypes != null && !distinctTypes.isEmpty()) {

                scripts = dao.listActiveForTypes(distinctTypes);
                totalScriptsFound += scripts.size();
                runScripts(scripts, conn);
            }

            if (totalScriptsFound == 0) {
                LOGGER.debug(loggerMsg("No active post-harvest scripts were found relevant for this source"));
            }

            // commit changes
            conn.commit();
        } catch (Exception e) {
            SesameUtil.rollback(conn);
            addHarvestMessage("Error when running post-harvest scripts: " + e.getMessage(),
                    HarvestMessageType.ERROR, Util.getStackTrace(e));
            LOGGER.error(loggerMsg("Error when running post-harvest scripts: " + e.getMessage()), e);
        } finally {
            SesameUtil.close(conn);
        }
    }

    /**
     *
     * @param scriptDtos
     * @param conn
     */
    private void runScripts(List<PostHarvestScriptDTO> scriptDtos, RepositoryConnection conn) {

        if (scriptDtos == null || scriptDtos.isEmpty()) {
            return;
        }

        for (PostHarvestScriptDTO scriptDto : scriptDtos) {
            runScript(scriptDto, conn);
        }
    }

    /**
     * @param scriptDto
     * @param conn
     */
    private void runScript(PostHarvestScriptDTO scriptDto, RepositoryConnection conn) {

        TargetType targetType = scriptDto.getTargetType();
        String targetUrl = scriptDto.getTargetUrl();
        String query = scriptDto.getScript();
        String title = scriptDto.getTitle();
        String scriptType = targetType == null ? "all-source" : targetType.toString().toLowerCase() + "-specific";
        String associatedType = targetType != null && targetType.equals(TargetType.TYPE) ? targetUrl : null;
        String parsedQuery = PostHarvestScriptParser.parseForExecution(query, getContextUrl(), associatedType);

        try {
            LOGGER.debug(MessageFormat.format("Executing {0} script titled \"{1}\":\n{2}", scriptType, title,
                    parsedQuery));

            int updateCount = SesameUtil.executeSPARUL(parsedQuery, conn);
            if (updateCount > 0 && !scriptDto.isRunOnce()) {
                // run maximum 100 times
                LOGGER.debug("Script's update count was " + updateCount
                        + ", running it until the count becomes 0, or no more than 100 times ...");
                int i = 0;
                int totalUpdateCount = updateCount;
                for (; updateCount > 0 && i < 100; i++) {
                    updateCount = SesameUtil.executeSPARUL(parsedQuery, conn, getContextUrl());
                    totalUpdateCount += updateCount;
                }
                LOGGER.debug("Script was run for a total of " + (i + 1) + " times, total update count = "
                        + totalUpdateCount);
            } else {
                LOGGER.debug("Script's update count was " + updateCount);
            }
        } catch (Exception e) {
            String message = MessageFormat.format(
                    "Got exception *** {0} *** when executing the following {1} post-harvest script titled \"{2}\":\n{3}",
                    e.toString(), scriptType, title, parsedQuery);
            LOGGER.warn(message);
            addHarvestMessage(message, HarvestMessageType.WARNING);
        }
    }

    /**
     * @throws DAOException
     */
    private void housekeepOldHarvests() throws DAOException {
        LOGGER.debug(loggerMsg("Deleting old harvests history"));
        getHarvestDAO().deleteOldHarvests(getContextSourceDTO().getSourceId());
    }

    /**
     * Adds source int inference rule, if source is inference rule. (It is done because rule set must be updated after the harvest
     * is done)
     *
     * @throws DAOException
     */
    private void addIntoInferenceRule() throws DAOException {
        if (getHarvestSourceDAO().isSourceInInferenceRule(getContextUrl())) {
            LOGGER.debug(loggerMsg("Adding source into inference rule"));
            getHarvestSourceDAO().addSourceIntoInferenceRule(getContextUrl());
        }
    }

    /**
     * Deletes sources with permanent errors after batch harvesting.
     *
     * @throws DAOException
     *             if deleting fails
     */
    private void deleteErroneousSources() throws DAOException {
        LOGGER.debug(loggerMsg("Checking sources that need removal"));
        HashSet<String> sourcesToDelete = new HashSet<String>();

        boolean sourceInError = false;

        // if the source or redirected sources are in erroneous state, delete them while batch harvesting
        if (!isOnDemandHarvest) {
            // check only the current (last redirected) source if there were redirections.
            // If it was failed delete redirected sources as well
            if (getContextSourceDTO().isPermanentError()) {
                if (!getContextSourceDTO().isPrioritySource()) {
                    LOGGER.debug(getContextSourceDTO().getUrl() + "  will be deleted as a non-priority source "
                            + "with permanent error");
                    sourcesToDelete.add(getContextSourceDTO().getUrl());
                    sourceInError = true;

                }
            } else if (getContextSourceDTO().getCountUnavail() >= 5) {
                if (!getContextSourceDTO().isPrioritySource()) {
                    LOGGER.debug(getContextSourceDTO().getUrl() + "  will be deleted as a non-priority source "
                            + "with unavailability >= 5");
                    sourcesToDelete.add(getContextSourceDTO().getUrl());
                    sourceInError = true;
                }
            }
            if (sourceInError) {
                for (HarvestSourceDTO dto : redirectedHarvestSources) {
                    // delete redirected source if not in queue and not priority source
                    LOGGER.debug(dto.getUrl() + "  is a redirected source will be deleted.");
                    if (!dto.isPrioritySource()) {
                        sourcesToDelete.add(dto.getUrl());
                    }
                }
            }
            LOGGER.debug(loggerMsg("sources to be removed count=" + sourcesToDelete.size()));
            getHarvestSourceDAO().removeHarvestSources(sourcesToDelete);
        }

    }

    /**
     * @throws DAOException
     */
    private void finishSourceMetadata() throws DAOException {
        if (sourceMetadata == null) {
            sourceMetadata = new SubjectDTO(getContextUrl(), false);
        }

        // get number of triples in context URL, add it to the source metadata under cr:harvestedStatements
        int tripleCount = getHelperDAO().getHarvestedStatements(getContextUrl());
        addSourceMetadata(Predicates.CR_HARVESTED_STATEMENTS,
                ObjectDTO.createLiteral(String.valueOf(tripleCount), XMLSchema.INTEGER));

        // save source metadata
        String msg = "Saving " + sourceMetadata.getTripleCount() + " triples of harvest source metadata";
        if (cleanAllPreviousSourceMetadata) {
            msg = msg + ", cleaning all previous metadata first";
        }
        LOGGER.debug(loggerMsg(msg));

        // if all previous metadata should be deleted, then do so,
        if (cleanAllPreviousSourceMetadata) {
            getHarvestSourceDAO().deleteSubjectTriplesInSource(getContextUrl(), GeneralConfig.HARVESTER_URI);
        } else {
            // delete those metadata we're about to save (i.e. we're doing a replace)
            List<String> subjectUris = Collections.singletonList(getContextUrl());
            Set<String> predicateUris = sourceMetadata.getPredicateUris();
            List<String> sourceUris = Collections.singletonList(GeneralConfig.HARVESTER_URI);
            getHelperDAO().deleteSubjectPredicates(subjectUris, predicateUris, sourceUris);
        }
        getHelperDAO().addTriples(sourceMetadata);
    }

    /**
     * @throws DAOException
     */
    private void updateHarvestAndMessagesClosed() throws DAOException {

        LOGGER.debug(loggerMsg("Updating harvest record, saving harvest messages"));
        Integer noOfStatements = getContextSourceDTO().getStatements();
        LOGGER.debug("Saving finished-harvest record with id=" + harvestId);
        getHarvestDAO().updateFinishedHarvest(harvestId, noOfStatements == null ? 0 : noOfStatements,
                httpResponseCode);
        for (HarvestMessageDTO messageDTO : harvestMessages) {
            getHarvestMessageDAO().insertHarvestMessage(messageDTO);
        }
    }

    /**
     * @throws DAOException
     */
    private void updateHarvestSourceFinished() throws DAOException {
        LOGGER.debug(loggerMsg("Updating harvest source record"));
        getContextSourceDTO().setLastHarvestId(harvestId);
        getHarvestSourceDAO().updateSourceHarvestFinished(getContextSourceDTO());

        // update redirected sources
        for (HarvestSourceDTO dto : redirectedHarvestSources) {
            LOGGER.debug(loggerMsg("Updating redirected harvest source record [" + dto.getUrl() + "]"));
            dto.setLastHarvestId(harvestId);
            getHarvestSourceDAO().updateSourceHarvestFinished(dto);
        }
    }

    /**
     *
     * @return
     */
    protected abstract String getHarvestType();

    /**
     *
     * @param date
     */
    protected String formatDate(Date date) {
        return Util.virtuosoDateToString(date);
    }

    /**
     *
     * @param messageObject
     * @return
     */
    protected String loggerMsg(Object messageObject) {
        return loggerMsg(messageObject, contextUrl);
    }

    /**
     *
     * @param messageObject
     * @param contextGraphUri
     * @return
     */
    public static String loggerMsg(Object messageObject, String contextGraphUri) {
        return messageObject + " [" + contextGraphUri + "]";
    }

    /*
     * (non-Javadoc)
     *
     * @see eionet.cr.harvest.Harvest#getContextUrl()
     */
    @Override
    public String getContextUrl() {
        return contextUrl;
    }

    /**
     * @return the harvestDAO
     */
    protected HarvestDAO getHarvestDAO() {
        return harvestDAO;
    }

    /**
     * @return the harvestSourceDAO
     */
    protected HarvestSourceDAO getHarvestSourceDAO() {
        return harvestSourceDAO;
    }

    /**
     * @return the harvestMessageDAO
     */
    protected HarvestMessageDAO getHarvestMessageDAO() {
        return harvestMessageDAO;
    }

    /**
     * @return the helperDAO
     */
    protected HelperDAO getHelperDAO() {
        return helperDAO;
    }

    /**
     *
     * @param url
     * @return
     * @throws DAOException
     */
    protected HarvestSourceDTO getHarvestSource(String url) throws DAOException {

        return getHarvestSourceDAO().getHarvestSourceByUrl(url);
    }

    /**
     *
     * @param contextUrl
     * @throws HarvestException
     */
    private void changeContext(String contextUrl) throws HarvestException {

        if (contextUrl == null || contextUrl.isEmpty()) {
            throw new IllegalArgumentException("Context URL must not be empty!");
        }

        this.contextUrl = contextUrl;
        try {
            this.contextSourceDTO = getHarvestSource(contextUrl);
        } catch (DAOException e) {
            throw new HarvestException(e.getMessage(), e);
        }

        if (this.contextSourceDTO == null) {
            throw new HarvestException("Context source must exist in the database!");
        }
    }

    /**
     *
     * @param contextUrl
     * @throws HarvestException
     * @throws DAOException
     */
    protected void startWithNewContext(String contextUrl) throws HarvestException, DAOException {

        changeContext(contextUrl);
        startHarvest();
    }

    /**
     * @return the harvestId
     */
    protected int getHarvestId() {
        return harvestId;
    }

    /**
     *
     * @param predicate
     * @param objectDTO
     */
    protected void addSourceMetadata(String predicate, ObjectDTO objectDTO) {

        if (sourceMetadata == null) {
            sourceMetadata = new SubjectDTO(getContextUrl(), false);
        }
        objectDTO.setSourceUri(eionet.cr.config.GeneralConfig.HARVESTER_URI);
        sourceMetadata.addObject(predicate, objectDTO);
    }

    /**
     * @return the contextSourceDTO
     */
    protected HarvestSourceDTO getContextSourceDTO() {
        return contextSourceDTO;
    }

    /**
     *
     * @param message
     * @param messageType
     */
    protected void addHarvestMessage(String message, HarvestMessageType messageType) {
        addHarvestMessage(message, messageType, null);
    }

    /**
     *
     * @param message
     * @param messageType
     * @param stackTrace
     */
    protected void addHarvestMessage(String message, HarvestMessageType messageType, String stackTrace) {

        if (harvestMessages == null) {
            harvestMessages = new ArrayList<HarvestMessageDTO>();
        }

        HarvestMessageDTO dto = HarvestMessageDTO.create(message, messageType, stackTrace);
        dto.setHarvestId(harvestId);
        harvestMessages.add(dto);
    }

    /**
     * Derives new harvest sources from stored content.
     *
     */
    private void deriveNewHarvestSources() {

        if (storedTriplesCount <= 0) {
            return;
        }
        LOGGER.debug(loggerMsg("Deriving new harvest sources"));

        try {
            int foundSourceCount = getHarvestSourceDAO().deriveNewHarvestSources(getContextUrl());
            LOGGER.debug(loggerMsg(foundSourceCount + " new harvest sources found and inserted"));
        } catch (DAOException e) {
            LOGGER.warn("Failure when extracting new harvest sources", e);
        }
    }

    /*
     * (non-Javadoc)
     *
     * @see eionet.cr.harvest.Harvest#getStoredTriplesCount()
     */
    @Override
    public int getStoredTriplesCount() {
        return storedTriplesCount;
    }

    /**
     * @param cleanAllPreviousSourceMetadata
     *            the cleanAllPreviousSourceMetadata to set
     */
    protected void setCleanAllPreviousSourceMetadata(boolean cleanAllPreviousSourceMetadata) {
        this.cleanAllPreviousSourceMetadata = cleanAllPreviousSourceMetadata;
    }

    /*
     * (non-Javadoc)
     *
     * @see eionet.cr.harvest.Harvest#setHarvestUser(java.lang.String)
     */
    @Override
    public void setHarvestUser(String harvestUser) {
        this.harvestUser = harvestUser;
    }

    /**
     * @return the harvestUser
     */
    protected String getHarvestUser() {
        return harvestUser;
    }

    /**
     *
     * @param message
     * @param throwable
     */
    private void sendFinishingError(Throwable throwable) {

        if (throwable != null) {

            StringBuilder messageBody = new StringBuilder(
                    "The following error happened while finishing the harvest of\n");
            messageBody.append(contextUrl);
            messageBody.append("\n\n---\n\n").append(Util.getStackTrace(throwable));

            sendErrorMessage(messageBody.toString());
        }
    }

    /**
     *
     */
    private void sendHarvestMessages() {

        if (!isSendNotifications() || harvestMessages.isEmpty()) {
            return;
        }
        LOGGER.debug(loggerMsg("Sending harvest messages"));

        StringBuilder messageBody = null;

        for (HarvestMessageDTO messageDTO : harvestMessages) {

            String messageType = messageDTO.getType();
            if (messageType != null) {

                HarvestMessageType harvestMessageType = HarvestMessageType.parseFrom(messageType);

                // only error-messages will be notified, i.e. the message type must not be INFO
                if (harvestMessageType != null && !harvestMessageType.equals(HarvestMessageType.INFO)) {

                    if (messageBody == null) {
                        messageBody = new StringBuilder("The following error(s) happened while harvesting\n")
                                .append(contextUrl);
                    }
                    messageBody.append("\n\n---\n\n");
                    if (StringUtils.isBlank(messageDTO.getMessage())
                            && StringUtils.isBlank(messageDTO.getStackTrace())) {
                        messageBody.append("No error message could be found!");
                    } else {
                        if (StringUtils.isNotBlank(messageDTO.getMessage())) {
                            messageBody.append(messageDTO.getMessage());
                        }
                        if (StringUtils.isNotBlank(messageDTO.getStackTrace())) {
                            messageBody.append("\n").append(messageDTO.getStackTrace());
                        }
                    }
                }
            }
        }

        if (messageBody != null) {
            sendErrorMessage(messageBody.toString());
        }

    }

    /**
     *
     * @param messageBody
     */
    private void sendErrorMessage(String messageBody) {

        String subject = "Error(s) when harvesting " + contextUrl;

        // Send to to those listed in this source's e-mails list.
        try {
            String[] emailReceivers = getContextSourceEmailReceivers();
            if (emailReceivers != null && emailReceivers.length > 0) {
                EMailSender.send(emailReceivers, subject, messageBody, false);
            }
        } catch (AddressException e) {
            LOGGER.error("E-mail address formatting error: " + e.getMessage());
        } catch (MessagingException e) {
            LOGGER.error("E-mail sending error", e);
        }

        // Send to sys-admins.
        try {
            EMailSender.sendToSysAdmin(subject, messageBody);
        } catch (AddressException e) {
            LOGGER.error("E-mail address formatting error: " + e.getMessage());
        } catch (MessagingException e) {
            LOGGER.error("E-mail sending error", e);
        }
    }

    /**
     * Returns the list of e-mail addresses to which the error notifications of this harvest source should be sent. Does *NOT*
     * include the "default" list provided in system configuration.
     *
     * @return As indicated above.
     */
    private String[] getContextSourceEmailReceivers() {

        String emailsStr = getContextSourceDTO().getEmails();
        if (StringUtils.isNotBlank(emailsStr)) {
            return StringUtils.split(emailsStr, ";,\t\n\r ");
        } else {
            return null;
        }
    }

    /**
     * Returns true if harvest errors should be sent as notifications to selected addresses. Otherwise returns false.
     *
     * {@link BaseHarvest} always returns false for this method, as default behavior. Extending classes can override it.
     *
     * @return
     */
    protected boolean isSendNotifications() {
        return false;
    }

    /**
     * @param storedTriplesCount
     *            the storedTriplesCount to set
     */
    protected void setStoredTriplesCount(int storedTriplesCount) {
        this.storedTriplesCount = storedTriplesCount;
    }

    /*
     * (non-Javadoc)
     *
     * @see eionet.cr.harvest.Harvest#isBeingHarvested(java.lang.String)
     */
    @Override
    public boolean isBeingHarvested(String url) {

        return url != null && StringUtils.equals(url, contextUrl);
    }

    /**
     * Calculates the duration of the given source's last harvest. If the last harvest failed, the default harvest timeout is
     * returned as the result of this method. Otherwise, if the last harvest duration cannot be detected due to some reason, the
     * method returns 0.
     *
     * @param harvestSource
     *            The source in question.
     * @return The duration of the given source's last harvest.
     */
    private long calculateLastHarvestDuration(HarvestSourceDTO harvestSource) {

        // If last harvest failed, returns the default harvest timeout.
        if (harvestSource.isLastHarvestFailed()) {
            return DEFAULT_HARVEST_TIMEOUT;
        }

        long result = 0;
        try {
            HarvestDTO lastHarvest = harvestDAO.getLastRealHarvestBySourceId(harvestSource.getSourceId());
            if (lastHarvest != null) {
                Date startTime = lastHarvest.getDatetimeStarted();
                Date endTime = lastHarvest.getDatetimeFinished();
                if (startTime != null && endTime != null) {
                    result = endTime.getTime() - startTime.getTime();
                }
            }
        } catch (DAOException e) {
            LOGGER.error(
                    loggerMsg("Failed getting the last harvest, last harvest duration fallback to 0 ms. " + e));
        }

        return result;

    }

    /**
     * Calculates timeout based on last harvests. Timeout is not greater than maximum timeout specified in HARVEST_TIMEOUT_TRESHOLD
     * If last harvest has not taken more than MINIMAL_HARVEST_TIMEOUT minimal timeout is used
     *
     * @return timeout in milliseconds
     */
    protected int getTimeout() {

        if (timeout == null) {

            String msg = "";

            // Start with default timeout, attempt to calculate a proper one.
            timeout = Integer.valueOf(DEFAULT_HARVEST_TIMEOUT);

            // Assuming lastHarvestDuration was properly set at construction-time.
            if (lastHarvestDuration > 0) {
                timeout = Integer.valueOf((int) (lastHarvestDuration * HARVEST_TIMEOUT_MULTIPLIER));
                msg = "Timeout set to " + timeout + " ms (last harvest duration * " + HARVEST_TIMEOUT_MULTIPLIER
                        + ")";
            } else {
                msg = "Timeout set to the maximum " + DEFAULT_HARVEST_TIMEOUT
                        + " ms, last harvest duration could not be detected";
            }

            // Use minimal if last harvest went very quickly.
            if (timeout.intValue() < MINIMUM_HARVEST_TIMEOUT) {
                timeout = Integer.valueOf(MINIMUM_HARVEST_TIMEOUT);
                msg = "Timeout set to the minimum " + MINIMUM_HARVEST_TIMEOUT + " ms, last harvest duration was "
                        + lastHarvestDuration + " ms";
            }

            LOGGER.debug(loggerMsg(msg));
        }

        return timeout;
    }

    /**
     * Determines if the given throwable is fatal exception that occured during harvesting. If so, raises the fatal error flag. The
     * method is null-safe.
     *
     * @param t
     *            Throwable
     */
    protected void checkAndSetFatalExceptionFlag(Throwable t) {
        if (t != null && t instanceof TimeoutException) {
            isFatalErrorOccured = true;
        }
    }

    /**
     * @param isOnDemandHarvest
     *            the isOnDemandHarvest parameter to set
     */
    public void setOnDemandHarvest(boolean isOnDemandHarvest) {
        this.isOnDemandHarvest = isOnDemandHarvest;
    }

    /**
     * Harvests file in a local filestore.
     * Does not load it through /home servlet but takes it directly from the file system
     *
     * @param file Given file
     * @param contentType content type saved in earlier harvest
     * @return number of triples
     * @throws IOException if error in I/O
     * @throws DAOException if DAO call fails.
     * @throws SAXException if parsing fails
     * @throws RDFHandlerException if error in RDF handler
     * @throws RDFParseException if error in RDF parsing
     */
    protected int processLocalContent(File file, String contentType)
            throws IOException, DAOException, SAXException, RDFHandlerException, RDFParseException {

        // If the downloaded file can be loaded straight away as it is, then proceed to loading straight away.
        // Otherwise try to process the file into RDF format and *then* proceed to loading.

        ContentLoader contentLoader = getLocalFileContentloader(file, contentType);

        if (contentLoader != null) {
            contentLoader.setTimeout(getTimeout());
            LOGGER.debug(loggerMsg("Filestore file is in RDF or web feed format"));
            return loadFile(file, contentLoader);
        } else {
            LOGGER.debug(loggerMsg("Filestore file is not in RDF or web feed format, processing the file further"));
            File processedFile = null;
            try {
                // The file could be a zipped RDF, an XML with an RDF conversion, N3, or actually a completely valid RDF
                // that simply wasn't declared in the server-returned content type.
                FileToRdfProcessor fileProcessor = new FileToRdfProcessor(file, getContextUrl());
                processedFile = fileProcessor.process();
                if (processedFile != null && fileProcessor.getRdfFormat() != null) {
                    LOGGER.debug(loggerMsg("File processed into RDF format"));
                    ContentLoader rdfLoader = new RDFFormatLoader(fileProcessor.getRdfFormat());
                    rdfLoader.setTimeout(getTimeout());
                    return loadFile(processedFile, rdfLoader);
                } else {
                    LOGGER.debug(loggerMsg("File couldn't be processed into RDF format"));
                    return 0;
                }
            } finally {
                if (processedFile != null && !file.getPath().equals(processedFile.getPath())) {
                    FileDeletionJob.register(processedFile);
                }
            }
        }
    }

    /**
     * Loads file into triplestore.
     *
     * @param file object in file system.
     * @param contentLoader does the actual loading of triples.
     * @return number of triples.
     * @throws DAOException database exception
     */
    protected int loadFile(File file, ContentLoader contentLoader) throws DAOException {

        LOGGER.debug(loggerMsg(
                "Loading file into triple store, loader class is " + contentLoader.getClass().getSimpleName()));
        int tripleCount = getHarvestSourceDAO().loadContent(file, contentLoader, getContextUrl());
        return tripleCount;
    }

    /**
     * Returns content loader for local files.
     *
     * @param file File to re-harvest
     * @param contentType content type originally stored
     * @return ContentLoader
     */

    private ContentLoader getLocalFileContentloader(File file, String contentType) {

        ContentLoader contentLoader = null;

        if (contentType == null) {
            contentType = getContextSourceDTO().getMediaType();
        }

        // try to guess contentType
        if (contentType == null) {
            InputStream is = null;
            try {
                is = new BufferedInputStream(new FileInputStream(file));
                contentType = URLConnection.guessContentTypeFromStream(is);
            } catch (Exception e) {
                LOGGER.warn(loggerMsg("Error getting content type for " + file.getPath()));

            } finally {
                IOUtils.closeQuietly(is);
            }

        }

        if (contentType == null) {
            return null;
        }

        // content type is not null
        if (contentType.startsWith("application/rss+xml") || contentType.startsWith("application/atom+xml")) {
            contentLoader = new FeedFormatLoader();
        } else {
            // TODO refactor?
            RDFFormat rdfFormat = null;
            if (contentType.equals(CONTENT_TYPE_TEXT)) {
                String fileName = file.getName();
                String[] arr = fileName.split("\\.");
                if (arr.length > 0) {
                    String ext = arr[arr.length - 1];
                    if (StringUtils.isNotEmpty(ext)) {
                        if (ext.equalsIgnoreCase(EXT_TTL)) {
                            rdfFormat = RDFFormat.TURTLE;
                        }
                        if (ext.equalsIgnoreCase(EXT_N3)) {
                            rdfFormat = RDFFormat.N3;
                        }
                    }
                }
            } else {
                rdfFormat = RDFMediaTypes.toRdfFormat(contentType);
            }

            if (rdfFormat != null) {
                contentLoader = new RDFFormatLoader(rdfFormat);
            }
        }

        return contentLoader;
    }
}