org.fao.geonet.component.harvester.csw.Harvest.java Source code

Java tutorial

Introduction

Here is the source code for org.fao.geonet.component.harvester.csw.Harvest.java

Source

//=============================================================================
//===   Copyright (C) 2001-2007 Food and Agriculture Organization of the
//===   United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===   and United Nations Environment Programme (UNEP)
//===
//===   This program is free software; you can redistribute it and/or modify
//===   it under the terms of the GNU General Public License as published by
//===   the Free Software Foundation; either version 2 of the License, or (at
//===   your option) any later version.
//===
//===   This program is distributed in the hope that it will be useful, but
//===   WITHOUT ANY WARRANTY; without even the implied warranty of
//===   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===   General Public License for more details.
//===
//===   You should have received a copy of the GNU General Public License
//===   along with this program; if not, write to the Free Software
//===   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===   Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===   Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================

package org.fao.geonet.component.harvester.csw;

import com.google.common.base.Function;
import jeeves.server.context.ServiceContext;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.net.ftp.FTPClient;
import org.apache.commons.net.ftp.FTPReply;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.impl.client.HttpClientBuilder;
import org.fao.geonet.Constants;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.csw.common.Csw;
import org.fao.geonet.csw.common.exceptions.CatalogException;
import org.fao.geonet.csw.common.exceptions.InvalidParameterValueEx;
import org.fao.geonet.csw.common.exceptions.MissingParameterValueEx;
import org.fao.geonet.csw.common.exceptions.NoApplicableCodeEx;
import org.fao.geonet.domain.ISODate;
import org.fao.geonet.kernel.csw.CatalogService;
import org.fao.geonet.kernel.csw.CswHarvesterResponseExecutionService;
import org.fao.geonet.kernel.csw.services.AbstractOperation;
import org.fao.geonet.kernel.csw.services.SupportedResourceType;
import org.fao.geonet.kernel.harvest.Common.OperResult;
import org.fao.geonet.kernel.harvest.HarvestManager;
import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester;
import org.fao.geonet.kernel.setting.SettingManager;
import org.fao.geonet.kernel.setting.Settings;
import org.fao.geonet.lib.Lib;
import org.fao.geonet.services.harvesting.Util;
import org.fao.geonet.util.ISOPeriod;
import org.fao.geonet.util.MailSender;
import org.fao.geonet.utils.GeonetHttpRequestFactory;
import org.fao.geonet.utils.Log;
import org.fao.geonet.utils.Xml;
import org.jdom.Element;
import org.springframework.context.ApplicationContext;
import org.springframework.http.HttpStatus;
import org.springframework.http.client.ClientHttpResponse;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.*;

/**
 * CSW Harvest operation.
 * <p>
 * OGC 07-006: "This is the pull mechanism that 'pulls' data into the catalogue. That is, this
 * operation only references the data to be inserted or updated in the catalogue, and it is the job
 * of the catalogue service to resolve the reference, fetch that data, and process it into the
 * catalogue."
 *
 * @author heikki doeleman
 */
public class Harvest extends AbstractOperation implements CatalogService {

    static final String NAME = "Harvest";
    private ApplicationContext applicationContext;
    private String operationId = NAME;
    private Protocol protocol;

    public Harvest() {
    }

    /**
     * Returns name of this CSW operation.
     *
     * @return name
     */
    public String getName() {
        return NAME;
    }

    /**
     * Executes a CSW Harvest request, see OGC 07-006 section 10 dot 12.
     *
     * @param request        - the request
     * @param serviceContext - used everywhere in GeoNetwork
     * @return response xml
     * @throws CatalogException hmm
     */
    public Element execute(Element request, ServiceContext serviceContext) throws CatalogException {
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST)) {
            Log.debug(Geonet.CSW_HARVEST, "CSW Harvest execute, request is:\n" + Xml.getString(request));
        }
        try {
            this.applicationContext = serviceContext.getApplicationContext();
            checkService(request);
            checkVersion(request);
            String resourceType = checkResourceType(request);
            checkResourceFormat(request);
            String source = checkSource(request);

            // Define an id for the operation to be used for harvester node name identification and response handler info.
            operationId = "CSW.HarvestOperation:" + UUID.randomUUID().toString();

            // create a new harvester node
            Element node = createHarvestNode(request, resourceType, source, serviceContext);

            Element response;

            //
            // OGC 07-006 10.12.4.4 :
            // The ResponseHandler parameter is a flag that indicates how the Harvest operation should be processed by
            // a CSW server.
            //
            String responseHandler = request.getChildText("ResponseHandler", Csw.NAMESPACE_CSW);

            //
            // no response handler requested: synchronous execution. Note that client can ask for synchronous execution
            // of a periodic harvester; in that case only the result of the first run are returned synchronously, and
            // results of future runs cannot be tracked by client.

            // OGC 07-006 10.12.4.4 :
            // If the parameter is not present, then the Harvest operation is processed synchronously meaning that the
            // client sends the Harvest request to a CSW and then waits to receive a HarvestResponse or exception
            // message as described in Subclause 10.3.7. The CSW immediately processes the Harvest request, while the
            // client waits for a response. The problem with this mode of operation is that the client may timeout
            // waiting for the server to process the request.
            //
            if (StringUtils.isEmpty(responseHandler)) {
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                    Log.debug(Geonet.CSW_HARVEST, "CSW Harvest executes synchronously");
                response = doHarvest(node, serviceContext, Mode.SYNCHRONOUS);
            }
            //
            // response handler requested: asynchronous execution.
            //
            // OGC 07-006 10.12.4.4 :
            // If the parameter is present, the Harvest operation is processed asynchronously. In this case, the server
            // responds immediately to a client's request with an acknowledgement message as defined in Subclause
            // 10.8.4.13. The acknowledgment message echoes the clients request, using the <EchoedRequest> element,
            // and may include an optionally generated request identifier using the <RequestId> element. The
            // acknowledgement message tells the client that the request has been received and notification of
            // completion will be send to the URL specified as the value of the ResponseHandler parameter. The Harvest
            // request may then be processed at some later time taking as much time as is required to complete the
            // operation. When the operation is completed, a HarvestResponse message or exception message per Subclause
            // 10.3.7 (if a problem was encountered) is sent to the URL specified as the value of the ResponseHandler
            // parameter.
            else {
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                    Log.debug(Geonet.CSW_HARVEST, "CSW Harvest executes asynchronously");
                checkResponseHandler(responseHandler);
                // Immediate acknowledgement answer.
                response = createAcknowledgeResponse(request);
                // run harvester
                doHarvest(node, serviceContext, Mode.ASYNCHRONOUS);
                // deal with results asynchronously
                asynchronousHarvestResponse(node, responseHandler, serviceContext);
            }
            if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                Log.debug(Geonet.CSW_HARVEST, "CSW Harvest execute returns:\n" + Xml.getString(response));
            return response;
        } catch (CatalogException x) {
            Log.error(Geonet.CSW_HARVEST, x.getMessage());
            x.printStackTrace();
            throw x;
        } catch (Exception x) {
            Log.error(Geonet.CSW_HARVEST, x.getMessage());
            x.printStackTrace();
            throw new NoApplicableCodeEx("ERROR: " + x.getMessage());
        }
    }

    /**
     * Checks whether the responsehandler uses a supported protocol.
     *
     * @param responseHandler - url to send results to
     * @throws InvalidParameterValueEx hmm
     */
    private void checkResponseHandler(String responseHandler) throws InvalidParameterValueEx {
        this.protocol = Protocol.validate(responseHandler);
        if (this.protocol == null) {
            throw new InvalidParameterValueEx("ResponseHandler", "Unsupported protocol in responseHandler "
                    + responseHandler + ". Supported protocols are: ftp://, http://, and mailto:");
        }
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "CSW Harvest checkResponseHandler: OK");
    }

    /**
     * Polls periodically whether the harvester is still running and when not, creates a
     * HarvestResponse and sends it to the url in responseHandler.
     * <p>
     * This method must not block the execute() method, therefore it starts a separate thread.
     *
     * @param harvester       - the harvester
     * @param responseHandler - url to send results to
     * @param serviceContext  - all over the place
     */
    private void asynchronousHarvestResponse(Element harvester, String responseHandler,
            ServiceContext serviceContext) {
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST,
                    "Asynchronous harvest run requested for " + responseHandler + ", starting in 1 minute");
        long delay = 1;
        CswHarvesterResponseExecutionService.getExecutionService().schedule(
                new AsyncHarvestResponse(harvester, responseHandler, serviceContext), delay, TimeUnit.MINUTES);
    }

    /**
     * Verifies ResourceType is supported.
     * <p>
     * OGC 07-006 10.12.4.2 : The ResourceType parameter references a document that defines the
     * structure of the resource being harvested. For high interoperability, this resource should be
     * an XML document, and the ResourceType parameter string value should be a URI that references
     * the structure of that XML document (i.e., its XML Schema namespace identifier URI). If a
     * server can harvest resources in the schema of an information model it supports, the
     * ResourceType URI should be the same as the outputSchema parameter URI defined for the
     * GetRecords operation.
     *
     * @param request - the request
     * @return requested ResourceType if it is supported
     * @throws InvalidParameterValueEx hmm
     * @throws MissingParameterValueEx hmm
     */
    private String checkResourceType(Element request) throws MissingParameterValueEx, InvalidParameterValueEx {
        String resourceType = request.getChildText("ResourceType", Csw.NAMESPACE_CSW);
        //
        // resourcetype is a required parameter
        //
        if (resourceType == null) {
            throw new MissingParameterValueEx("ResourceType");
        }
        SupportedResourceType supportedResourceType = SupportedResourceType.fromString(resourceType);
        //
        // resource type not supported
        //
        if (supportedResourceType == null) {
            throw new InvalidParameterValueEx("ResourceType", "ResourceType not supported: " + resourceType);
        }
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "CSW Harvest checkResourceType OK, returns: " + resourceType);
        return resourceType;
    }

    /**
     * Verifies ResourceFormat is supported.
     * <p>
     * OGC 07-006 10.12.4.3 : The ResourceFormat parameter is used to indicate the encoding used for
     * the resource being harvested. This parameter is included to support the harvesting of
     * metadata resources available in various formats such as plain text, XML or HTML. The values
     * of this parameter shall be a MIME type. If the parameter is not specified then the default
     * value of application/xml shall be assumed.
     *
     * @param request - the request
     * @throws InvalidParameterValueEx hmm
     */
    private void checkResourceFormat(Element request) throws InvalidParameterValueEx {
        String resourceFormat = request.getChildText("ResourceFormat", Csw.NAMESPACE_CSW);
        if (StringUtils.isNotEmpty(resourceFormat) && !resourceFormat.equals("application/xml")) {
            throw new InvalidParameterValueEx("ResourceFormat", "ResourceFormat not supported: " + resourceFormat
                    + ". This catalog only supports XML metadata.");
        }
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "CSW Harvest checkResourceFormat: OK");
    }

    /**
     * Verifies Source parameter is present and well-formed.
     * <p>
     * OGC 07-006 10.12.4.1 : The Source parameter is used to specify a URI reference to the
     * metadata resource to be harvested.
     *
     * @param request - the request
     * @return - the harvesting target uri
     * @throws InvalidParameterValueEx hmm
     * @throws MissingParameterValueEx hmm
     */
    private String checkSource(Element request) throws MissingParameterValueEx, InvalidParameterValueEx {
        String source = request.getChildText("Source", Csw.NAMESPACE_CSW);
        //
        // source is a required parameter
        //
        if (source == null) {
            throw new MissingParameterValueEx("Source");
        }
        //
        // check that source is a valid url by constructing URL object from it
        //
        try {
            new URL(source);
        }
        // not a valid url
        catch (MalformedURLException x) {
            throw new InvalidParameterValueEx("Source", "Invalid source URL:" + source + " - " + x.getMessage());
        }

        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "CSW Harvest checkSource OK, returns: " + source);
        return source;
    }

    /**
     * Creates request from KVP GET request parameters.
     * <p>
     * See OGC 07-006 10.12.2.
     *
     * @param params - params
     * @return adapted getrequest
     */

    public Element adaptGetRequest(Map<String, String> params) {

        String service = params.get("service");
        String version = params.get("version");
        String source = params.get("Source");
        String resourceType = params.get("ResourceType");
        String resourceFormat = params.get("ResourceFormat");
        String responseHandler = params.get("ResponseHandler");
        String harvestInterval = params.get("HarvestInterval");

        Element request = new Element(getName(), Csw.NAMESPACE_CSW);

        setAttrib(request, "service", service);
        setAttrib(request, "version", version);
        setAttrib(request, "Source", source);
        setAttrib(request, "ResourceType", resourceType);
        setAttrib(request, "ResourceFormat", resourceFormat);
        setAttrib(request, "ResponseHandler", responseHandler);
        setAttrib(request, "HarvestInterval", harvestInterval);

        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "CSW Harvest adaptGetRequest returns:\n" + Xml.getString(request));
        return request;
    }

    /**
     * Returns domain values information for specific parameters.
     *
     * @param parameterName - name of param
     * @return parameter values
     * @throws CatalogException hmm
     */

    public Element retrieveValues(String parameterName) throws CatalogException {
        return null;
    }

    /**
     * Creates a CSW Harvester configuration and saves it do database.
     *
     * @param request      - the request
     * @param resourceType - type of resource to harvest
     * @param source       - where to harvest from
     * @param context      - we need that in all methods
     * @return harvester the harvester
     * @throws Exception hmm
     */
    private Element createHarvestNode(Element request, String resourceType, String source, ServiceContext context)
            throws Exception {
        // TODO use resource type in OutputSchema. Current CSW harvesting client does not allow for setting that.
        //
        // create configuration for the harvester
        //
        Element node = new Element("node");
        // Only CSW harvester could be configured here.
        node = node.setAttribute("type", "csw");

        Element site = new Element("site");
        Element name = new Element("name").addContent(operationId);
        site.addContent(name);

        Element capabilitiesUrl = new Element("capabilitiesUrl");
        capabilitiesUrl.addContent(source);
        site.addContent(capabilitiesUrl);

        Element eleIcon = new Element("icon").addContent("csw.gif");
        site.addContent(eleIcon);

        // if CSW node is protected by HTTP/BA
        // heikki: then it can't be harvested with csw harvest !
        Element account = new Element("account");
        Element username = new Element("username");
        Element password = new Element("password");
        Element use = new Element("use").addContent("false");

        account.addContent(use);
        account.addContent(username);
        account.addContent(password);
        site.addContent(account);

        node.addContent(site);

        // Harvester interval
        Element options = new Element("options");
        Element eleEvery = new Element("every");
        Element eleOneRun = new Element("oneRunOnly");
        String harvestInterval = request.getChildText("HarvestInterval", Csw.NAMESPACE_CSW);
        //
        // single run harvester
        //
        if (StringUtils.isEmpty(harvestInterval) || harvestInterval.equals(ISOPeriod.ZERO_DURATION)) {
            // heikki: why 90 ? it's one run only. Could be empty ?
            eleEvery.addContent("90");
            eleOneRun.addContent("true");
        }
        //
        // periodic harvester
        //
        else {
            int intervalInMinutes = ISOPeriod.iso8601Period2Minutes(harvestInterval);
            eleEvery.addContent(Integer.toString(intervalInMinutes));
            eleOneRun.addContent("false");
        }
        options.addContent(eleEvery);
        options.addContent(eleOneRun);
        node.addContent(options);

        // no search criteria supported in csw harvest, leave empty
        Element searches = new Element("searches");
        node.addContent(searches);

        // no privileges settings supported in csw harvest; use GN-specific setting (if enabled, make metadata public)
        GeonetContext geonetContext = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
        SettingManager sm = geonetContext.getBean(SettingManager.class);
        boolean metadataPublic = sm.getValueAsBool(Settings.SYSTEM_CSW_METADATA_PUBLIC, false);
        if (metadataPublic) {
            // <privileges>
            //   <group id="1">
            //     <operation name="view" />
            //     <operation name="dynamic" />
            //     <operation name="featured" />
            //   </group>
            // </privileges>
            Element privileges = new Element("privileges");
            Element group = new Element("group");
            group.setAttribute("id", "1");
            Element operation1 = new Element("operation");
            operation1.setAttribute("name", "view");
            group.addContent(operation1);
            Element operation2 = new Element("operation");
            operation2.setAttribute("name", "dynamic");
            group.addContent(operation2);
            Element operation3 = new Element("operation");
            operation3.setAttribute("name", "featured");
            group.addContent(operation3);
            privileges.addContent(group);
            node.addContent(privileges);
        }

        /*
         heikki: not so easy as we can't identify harvesters from their target url or anything else
            
         for the moment, no updates take place, the harvester is simply saved
            
        // Check if harvester already exist
        // FIXME : Unable to find the getHarvesterID method from patch provided for now id is null
        // String id = hm.getHarvesterID(source);
        // Here we should do an update of an existing node if exist.
            
        //      if (id == null) {
        //
        //      } else {
        //         node.setAttribute("id", id);
        //         if (!hm.update(dbms, node))
        //            return null;
        //      }
            
         */

        GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
        HarvestManager hm = gc.getBean(HarvestManager.class);
        String uuid = hm.addHarvesterReturnUUID(node);
        node.setAttribute("uuid", uuid);
        node.addContent(new Element("info"));
        AbstractHarvester harvester = hm.getHarvester(uuid);
        String id = harvester.getID();
        node.setAttribute("id", id);

        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "** CSW Harvest createHarvestNode returns:\n" + Xml.getString(node));
        return node;
    }

    /**
     * Creates a HarvestResponse containing results from harvester.
     *
     * @param harvester - the harvester
     * @param context   - here, there and everywhere
     * @return - response
     * @throws Exception hmm
     */
    private Element createHarvestResponse(Element harvester, ServiceContext context) throws Exception {

        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "createHarvestResponse for harvester:\n" + Xml.getString(harvester));

        // http://schemas.opengis.net/csw/2.0.2/CSW-publication.xsd#HarvestResponse :
        // The content of the response varies depending on the presence of the ResponseHandler element. If present, then
        // the catalogue should verify the request and respond immediately with an csw:Acknowledgement element in the
        // response. The catalogue must then attempt to harvest the resource at some later time and send the response
        // message to the location specified by the value of the ResponseHandler element using the indicated protocol
        // (e.g. ftp, mailto, http). If the ResponseHandler element is absent, then the catalogue must attempt to
        // harvest the resource immediately and include a TransactionResponse element in the response. In any case, if
        // the harvest attempt is successful the response shall include summary representations of the newly created
        // catalogue item(s).
        Element harvestResponse = new Element("HarvestResponse", Csw.NAMESPACE_CSW);

        Element error = harvester.getChild("error");
        // successful harvesting run
        if (error == null) {
            Element transactionResponse = new Element("TransactionResponse", Csw.NAMESPACE_CSW);

            // Reports the total number of catalogue items modified by a transaction request (i.e, inserted, updated,
            // deleted). If the client did not specify a requestId, the server may assign one (a URI value).
            Element transactionSummary = new Element("TransactionSummary", Csw.NAMESPACE_CSW);
            Element info = harvester.getChild("info");
            Element result = info.getChild("result");
            Element totalInserted = new Element("totalInserted", Csw.NAMESPACE_CSW)
                    .setText(result.getChildText("added"));
            Element totalUpdated = new Element("totalUpdated", Csw.NAMESPACE_CSW)
                    .setText(result.getChildText("updated"));
            Element totalDeleted = new Element("totalDeleted", Csw.NAMESPACE_CSW)
                    .setText(result.getChildText("removed"));
            transactionSummary.addContent(totalInserted);
            transactionSummary.addContent(totalUpdated);
            transactionSummary.addContent(totalDeleted);
            transactionResponse.addContent(transactionSummary);
            // Returns a "brief" view of any newly created catalogue records. The handle attribute may reference a
            // particular statement in the corresponding transaction request.
            // TODO: impossible to implement with current harvesters because they do not return a list of UUIDs of the inserted metadata.
            // Element insertResult = new Element("InsertResult", Csw.NAMESPACE_CSW);
            // transactionResponse.addContent(insertResult);
            harvestResponse.addContent(transactionResponse);
        }
        // unsuccessful harvesting run
        else {
            harvestResponse.addContent(createExceptionReport(error));
        }
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "createHarvestResponse returns:\n" + Xml.getString(harvestResponse));
        return harvestResponse;
    }

    /**
     * Creates exception report.
     * <p>
     * OGC 07-006 section 10.3.7 : In the event that a catalogue service encounters an error while
     * processing a request or receives an unrecognised request, it shall generate an XML document
     * indicating that an error has occurred. The format of the XML error response is specified by,
     * and shall validate against, the exception response schema defined in clause 8 of the OWS
     * Common Implementation Specification [OGC 05-008c1].
     *
     * @param error error element from harvester node
     * @return exception report
     */
    private Element createExceptionReport(Element error) {
        // Report message returned to the client that requested any OWS operation when the server detects an error while
        // processing that operation request.
        Element exceptionReport = new Element("ExceptionReport", Csw.NAMESPACE_OWS);
        // Specification version for OWS operation. The string value shall contain one x.y.z "version" value (e.g.,
        // "2.1.3"). A version number shall contain three non-negative integers separated by decimal points, in the form
        // "x.y.z". The integers y and z shall not exceed 99. Each version shall be for the Implementation Specification
        // (document) and the associated XML Schemas to which requested operations will conform. An Implementation
        // Specification version normally specifies XML Schemas against which an XML encoded operation response must
        // conform and should be validated. See Version negotiation subclause for more information.
        exceptionReport.setAttribute("version", "2.0.2");
        // An Exception element describes one detected error that a server chooses to convey to the client.
        Element exception = new Element("Exception", Csw.NAMESPACE_OWS);
        // Ordered sequence of text strings that describe this specific exception or error. The contents of these
        // strings are left open to definition by each server implementation. A server is strongly encouraged to include
        // at least one ExceptionText value, to provide more information about the detected error than provided by the
        // exceptionCode. When included, multiple ExceptionText values shall provide hierarchical information about one
        // detected error, with the most significant information listed first.
        Element exceptionText = new Element("ExceptionText", Csw.NAMESPACE_OWS);
        String exceptionClass = error.getChildText("class");
        String exceptionMessage = error.getChildText("message");
        exceptionText.setText(exceptionClass + ": " + exceptionMessage);
        exception.addContent(exceptionText);
        // A code representing the type of this exception, which shall be selected from a set of exceptionCode values
        // specified for the specific service operation and server.
        exception.setAttribute("exceptionCode", "TransactionFailure");
        exceptionReport.addContent(exception);
        return exceptionReport;
    }

    /**
     * Creates Acknowledge response for asynchronous CSW requests.
     * <p>
     * OGC 07-006 section 10.8.4.14 : The acknowledgment message shall echo the exact XML text of
     * the client's request, using the <EchoedRequest> element, and may include an optionally
     * generated request identifier using the <RequestId> element. The echoed request is used to
     * correlate the acknowledgement message with the originating request.
     *
     * @param asyncRequest - the request
     * @return acknowledgement response
     */
    private Element createAcknowledgeResponse(Element asyncRequest) {

        Element response = new Element(getName() + "Response", Csw.NAMESPACE_CSW);
        Element acknowledgement = new Element("Acknowledgement", Csw.NAMESPACE_CSW);

        String timeStamp = new ISODate().toString();
        acknowledgement.setAttribute("timeStamp", timeStamp);

        Element echoedRequest = new Element("EchoedRequest", Csw.NAMESPACE_CSW);
        echoedRequest.addContent(asyncRequest);
        acknowledgement.addContent(echoedRequest);

        Element requestId = new Element("RequestId", Csw.NAMESPACE_CSW);
        requestId.addContent("urn:uuid:" + UUID.randomUUID().toString());
        acknowledgement.addContent(requestId);

        response.addContent(acknowledgement);

        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST,
                    "CSW Harvest createAcknowledgeResponse returns:\n" + Xml.getString(response));
        return response;
    }

    /**
     * Runs the harvester. In synchronous mode, waits for it to finish.
     *
     * @param harvester - the harvester
     * @param context   - everywhere in GN !
     * @param mode      - sync or async
     * @return result of harvest
     * @throws Exception hmm
     */
    private Element doHarvest(Element harvester, ServiceContext context, Mode mode) throws Exception {
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "doHarvest start in mode " + mode.toString());

        // params
        String id = harvester.getAttributeValue("id");
        Element activeParams = new Element("request");
        Element idele = new Element("id");
        idele.addContent(id);
        activeParams.addContent(idele);

        // run
        Element response = Util.exec(activeParams, context, new Util.Job() {

            public OperResult execute(HarvestManager hm, String id) throws Exception {
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                    Log.debug(Geonet.CSW_HARVEST, "doHarvest starting harvester job");
                hm.start(id);
                return hm.run(id);
            }
        });

        if (mode == Mode.SYNCHRONOUS) {
            if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                Log.debug(Geonet.CSW_HARVEST, "doHarvest waiting for harvester to finish");
            waitForHarvesterToFinish(harvester, context);
            if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                Log.debug(Geonet.CSW_HARVEST, "doHarvest finished waiting for harvester to finish");
            response = createHarvestResponse(harvester, context);
        }
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "doHarvest returns\n" + Xml.getString(response));

        return response;
    }

    /**
     * Returns whether the harvester is running.
     *
     * @param harvester - the harvester
     * @param context   - service context: all over GeoNetwork
     * @return whether it is running
     * @throws Exception hmm
     */
    private boolean isRunning(Element harvester, ServiceContext context) throws Exception {
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "isRunning harvester:\n" + Xml.getString(harvester));

        GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
        String uuid = harvester.getAttribute("uuid").getValue();
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "uuid: " + uuid);
        AbstractHarvester abstractHarvester = gc.getBean(HarvestManager.class).getHarvester(uuid);
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "abstractHarvester: " + abstractHarvester);

        if (abstractHarvester == null) {
            return false;
        }
        abstractHarvester.addInfo(harvester);
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "abstractHarvester added info: " + Xml.getString(harvester));

        // GeoNetwork has a bug that whenever addInfo() is called, a <running> element is added again (not overwritten);
        // so you need to check for the existence of a <running>false</running> element to determine whether harvester is
        // still running
        // boolean running = harvester.getChild("info").getChildText("running").equals("true");

        @SuppressWarnings({ "unchecked" })
        List<Element> runningElements = harvester.getChild("info").getChildren("running");
        for (Element runningElement : runningElements) {
            if (runningElement.getText().equals("false")) {
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                    Log.debug(Geonet.CSW_HARVEST, "isRunning returns: false");
                return false;
            }
        }
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "isRunning returns: true");
        return true;
    }

    /**
     * Suspends execution until harvester is not running, checking every 30 seconds.
     *
     * @param harvester - the harvester
     * @param context   - all over the place
     * @throws Exception hmm
     */
    private void waitForHarvesterToFinish(Element harvester, ServiceContext context) throws Exception {
        Thread.sleep(30000);
        while (isRunning(harvester, context)) {
            if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                Log.debug(Geonet.CSW_HARVEST, "CSW Harvest waitForHarvesterToFinish: harvester still running");
            // poll every 30 seconds
            Thread.sleep(30000);
        }
        if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
            Log.debug(Geonet.CSW_HARVEST, "CSW Harvest waitForHarvesterToFinish: harvester no longer running");
    }

    /**
     * Modes of operation.
     */
    private enum Mode {
        /**
         * Synchronous mode.
         */
        SYNCHRONOUS,
        /**
         * Asynchronous mode.
         */
        ASYNCHRONOUS
    }

    /**
     * Supported protocols for ResponseHandlers.
     */
    private enum Protocol {
        /**
         * File Transfer Protocol.
         */
        FTP {
            public String toString() {
                return "ftp://";
            }
        },
        /**
         * Hypertext Transfer Protocol.
         */
        HTTP {
            public String toString() {
                return "http://";
            }
        },
        /**
         * Electronic mail.
         */
        EMAIL {
            public String toString() {
                return "mailto:";
            }
        };

        /**
         * Returns the enum value that has a toString starting with the requested string, or null if
         * not found.
         *
         * @param string - string to match
         * @return matching protocol or null if not found
         */
        public static Protocol validate(String string) {
            if (StringUtils.isNotEmpty(string)) {
                for (Protocol protocol : Protocol.values()) {
                    if (string.startsWith(protocol.toString())) {
                        return protocol;
                    }
                }
            }
            return null;
        }
    }

    /**
     * Class to deal with asynchronous HarvestResponse.
     */
    private class AsyncHarvestResponse implements RunnableFuture<Object> {
        Semaphore ready = new Semaphore(0);
        private Element harvester;
        private String responseHandler;
        private ServiceContext serviceContext;

        /**
         * Constructor checks if the responseHandler uses a supported protocol.
         *
         * @param harvester       - the harvester
         * @param responseHandler - url to send result to
         * @param serviceContext  - everywhere in GN !
         */
        AsyncHarvestResponse(Element harvester, String responseHandler, ServiceContext serviceContext) {
            try {
                checkResponseHandler(responseHandler);
            } catch (InvalidParameterValueEx x) {
                throw new ExceptionInInitializerError("WARNING: unsupported protocol in responseHandler "
                        + responseHandler + ", failed to create AsyncHarvestResponse");
            }
            this.harvester = harvester;
            this.responseHandler = responseHandler;
            this.serviceContext = serviceContext;
        }

        /**
         * Sends Harvest response using email.
         *
         * @param harvestResponse response to send
         */
        private void sendByEmail(String harvestResponse) {
            GeonetContext geonetContext = (GeonetContext) serviceContext.getHandlerContext(Geonet.CONTEXT_NAME);
            SettingManager settingManager = geonetContext.getBean(SettingManager.class);
            String host = settingManager.getValue(Settings.SYSTEM_FEEDBACK_MAILSERVER_HOST);
            String port = settingManager.getValue(Settings.SYSTEM_FEEDBACK_MAILSERVER_PORT);
            String to = responseHandler.substring(Protocol.EMAIL.toString().length());
            MailSender sender = new MailSender(serviceContext);
            sender.send(host, Integer.parseInt(port),
                    settingManager.getValue(Settings.SYSTEM_FEEDBACK_MAILSERVER_USERNAME),
                    settingManager.getValue(Settings.SYSTEM_FEEDBACK_MAILSERVER_PASSWORD),
                    settingManager.getValueAsBool(Settings.SYSTEM_FEEDBACK_MAILSERVER_SSL),
                    settingManager.getValueAsBool(Settings.SYSTEM_FEEDBACK_MAILSERVER_TLS),
                    settingManager
                            .getValueAsBool(Settings.SYSTEM_FEEDBACK_MAILSERVER_IGNORE_SSL_CERTIFICATE_ERRORS),
                    settingManager.getValue(Settings.SYSTEM_FEEDBACK_EMAIL), "GeoNetwork CSW Server", to, null,
                    "Asynchronous CSW Harvest results delivery", harvestResponse);
        }

        /**
         * Sends Harvest response using FTP.
         *
         * @param harvestResponse response to send
         */
        private void sendByFTP(String harvestResponse) {
            FTPClient ftpClient = null;
            try {
                ftpClient = new FTPClient();

                // parse ftp uri
                URI ftpUri = new URI(responseHandler);
                String host = ftpUri.getHost();
                int port = ftpUri.getPort();
                String path = ftpUri.getPath();
                String userInfo = ftpUri.getUserInfo();
                String user = null;
                String password = null;
                if (StringUtils.isNotEmpty(userInfo)) {
                    user = userInfo.substring(0, userInfo.indexOf(':'));
                    password = userInfo.substring(userInfo.indexOf(':') + 1);
                }
                if (port > 0) {
                    ftpClient.connect(host, port);
                } else {
                    ftpClient.connect(host);
                }
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                    Log.debug(Geonet.CSW_HARVEST, "Connected to " + host + ".");
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST))
                    Log.debug(Geonet.CSW_HARVEST, ftpClient.getReplyString());
                // check if connection is OK
                int reply = ftpClient.getReplyCode();
                if (!FTPReply.isPositiveCompletion(reply)) {
                    ftpClient.disconnect();
                    Log.warning(Geonet.CSW_HARVEST,
                            "Warning: FTP server refused connection. Not sending asynchronous CSW Harvest results to "
                                    + responseHandler);
                    return;
                }
                // set timeout to 5 minutes
                ftpClient.setControlKeepAliveTimeout(300);

                // login
                if (user != null && password != null) {
                    ftpClient.login(user, password);
                } else {
                    ftpClient.login("anonymous", "");
                }
                // cd to directory
                if (StringUtils.isNotEmpty(path)) {
                    ftpClient.changeWorkingDirectory(path);
                }

                //
                // transfer file
                //
                String filename = "CSW.Harvest.result";
                InputStream is = new ByteArrayInputStream(harvestResponse.getBytes(Constants.ENCODING));
                ftpClient.storeFile(filename, is);
                is.close();
                ftpClient.logout();
            }
            // never mind, just log it
            catch (IOException x) {
                System.err.println("WARNING: " + x.getMessage() + " (this exception is swallowed)");
                x.printStackTrace();
            }
            // never mind, just log it
            catch (URISyntaxException x) {
                System.err.println("WARNING: " + x.getMessage() + " (this exception is swallowed)");
                x.printStackTrace();
            } finally {
                if (ftpClient != null && ftpClient.isConnected()) {
                    try {
                        ftpClient.disconnect();
                    }
                    // never mind, just log it
                    catch (IOException x) {
                        System.err.println("WARNING: " + x.getMessage() + " (this exception is swallowed)");
                        x.printStackTrace();
                    }
                }
            }
        }

        /**
         * Sends Harvest response using HTTP POST.
         *
         * @param harvestResponse response to send
         */
        private void sendByHTTP(String harvestResponse) {
            HttpPost method = new HttpPost(responseHandler);
            try {
                RequestConfig.Builder config = RequestConfig.custom();
                method.setEntity(new StringEntity(harvestResponse));
                config.setAuthenticationEnabled(false);
                method.setConfig(config.build());

                final String requestHost = method.getURI().getHost();
                final ClientHttpResponse httpResponse = applicationContext.getBean(GeonetHttpRequestFactory.class)
                        .execute(method, new Function<HttpClientBuilder, Void>() {
                            @Nullable
                            @Override
                            public Void apply(@Nonnull HttpClientBuilder input) {
                                SettingManager settingManager = applicationContext.getBean(SettingManager.class);
                                Lib.net.setupProxy(settingManager, input, requestHost);
                                input.setRetryHandler(new DefaultHttpRequestRetryHandler());
                                return null;
                            }
                        });
                if (httpResponse.getStatusCode() != HttpStatus.OK) {
                    // never mind, just log it
                    Log.warning(Geonet.CSW_HARVEST, "WARNING: Failed to send HarvestResponse to responseHandler "
                            + responseHandler + ", HTTP status is " + httpResponse.getStatusText());
                }
            } catch (IOException x) {
                // never mind, just log it
                Log.warning(Geonet.CSW_HARVEST, "WARNING: " + x.getMessage() + " (this exception is swallowed)");
                x.printStackTrace();
            } finally {
                method.releaseConnection();
            }
        }

        /**
         * Sends a HarvestResponse to the destination specified in responseHandler. Supports http,
         * email and ftp.
         * <p>
         * OGC 07-006 10.12.5: .. send it to the URI specified by the ResponseHandler parameter
         * using the protocol encoded therein. Common protocols are ftp for sending the response to
         * a ftp server and mailto which may be used to send the response to an email address.
         *
         * @param harvestResponse - the response to send
         */
        private void send(Element harvestResponse) {
            if (Log.isDebugEnabled(Geonet.CSW_HARVEST)) {
                Log.debug(Geonet.CSW_HARVEST, "AsyncHarvestResponse send started");
            }

            String harvestResponseString = Xml.getString(harvestResponse);
            if (Log.isDebugEnabled(Geonet.CSW_HARVEST)) {
                Log.debug(Geonet.CSW_HARVEST, "Sending HarvestResponse to " + responseHandler);
            }

            switch (protocol) {
            case EMAIL:
                sendByEmail(harvestResponseString);
                break;
            case FTP:
                sendByFTP(harvestResponseString);
                break;
            case HTTP:
                sendByHTTP(harvestResponseString);
                break;
            default:
                // shouldn't happen
                Log.warning(Geonet.CSW_HARVEST, "WARNING: unsupported protocol for responseHandler "
                        + responseHandler + ". " + "HarvestResponse is not sent.");
            }
            if (Log.isDebugEnabled(Geonet.CSW_HARVEST)) {
                Log.debug(Geonet.CSW_HARVEST, "AsyncHarvestResponse send finished");
            }
        }

        /**
         * Polls periodically wether this harvester is still running and when it has finished
         * creates a HarvestResponse and sends it to the url in responseHandler.
         */

        public void run() {
            try {
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST)) {
                    Log.debug(Geonet.CSW_HARVEST, "AsyncHarvestResponse run started");
                }
                waitForHarvesterToFinish(harvester, serviceContext);
                Element harvestResponse = createHarvestResponse(harvester, serviceContext);
                send(harvestResponse);
                ready.release();
                if (Log.isDebugEnabled(Geonet.CSW_HARVEST)) {
                    Log.debug(Geonet.CSW_HARVEST, "AsyncHarvestResponse run finished");
                }
            } catch (Exception x) {
                Log.error(Geonet.CSW_HARVEST,
                        ("ERROR: AsyncHarvestResponse " + x.getMessage() + " (this exception is swallowed)"));
                x.printStackTrace();
            }
        }

        /**
         * Attempts to cancel execution of this task.  This attempt will fail if the task has
         * already completed, has already been cancelled, or could not be cancelled for some other
         * reason. If successful, and this task has not started when <tt>cancel</tt> is called, this
         * task should never run.  If the task has already started, then the
         * <tt>mayInterruptIfRunning</tt> parameter determines whether the thread executing this
         * task should be interrupted in an attempt to stop the task.
         * <p/>
         * <p>After this method returns, subsequent calls to {@link #isDone} will always return
         * <tt>true</tt>.  Subsequent calls to {@link #isCancelled} will always return <tt>true</tt>
         * if this method returned <tt>true</tt>.
         *
         * @param mayInterruptIfRunning <tt>true</tt> if the thread executing this task should be
         *                              interrupted; otherwise, in-progress tasks are allowed to
         *                              complete
         * @return <tt>false</tt> if the task could not be cancelled, typically because it has
         * already completed normally; <tt>true</tt> otherwise
         */

        public boolean cancel(boolean mayInterruptIfRunning) {
            return false;
        }

        /**
         * Returns <tt>true</tt> if this task was cancelled before it completed normally.
         *
         * @return <tt>true</tt> if this task was cancelled before it completed
         */

        public boolean isCancelled() {
            return false;
        }

        /**
         * Returns <tt>true</tt> if this task completed.
         * <p/>
         * Completion may be due to normal termination, an exception, or cancellation -- in all of
         * these cases, this method will return <tt>true</tt>.
         *
         * @return <tt>true</tt> if this task completed
         */

        public boolean isDone() {
            return false;
        }

        /**
         * Waits if necessary for the computation to complete, and then retrieves its result.
         *
         * @return the computed result
         * @throws java.util.concurrent.CancellationException if the computation was cancelled
         * @throws java.util.concurrent.ExecutionException    if the computation threw an exception
         * @throws InterruptedException                       if the current thread was interrupted
         *                                                    while waiting
         */

        public Object get() throws InterruptedException, ExecutionException {
            return null;
        }

        /**
         * Waits if necessary for at most the given time for the computation to complete, and then
         * retrieves its result, if available.
         *
         * @param timeout the maximum time to wait
         * @param unit    the time unit of the timeout argument
         * @return the computed result
         * @throws java.util.concurrent.CancellationException if the computation was cancelled
         * @throws java.util.concurrent.ExecutionException    if the computation threw an exception
         * @throws InterruptedException                       if the current thread was interrupted
         *                                                    while waiting
         * @throws java.util.concurrent.TimeoutException      if the wait timed out
         */

        public Object get(long timeout, TimeUnit unit)
                throws InterruptedException, ExecutionException, TimeoutException {
            return null;
        }
    }

}