org.gbif.harvest.tapir.TapirMetadataHandler.java Source code

Introduction

Here is the source code for org.gbif.harvest.tapir.TapirMetadataHandler.java
Source

/*******************************************************************************
 * Copyright (C) 2008 Global Biodiversity Information Facility Secretariat.
 * All Rights Reserved.
 * 
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 ******************************************************************************/
package org.gbif.harvest.tapir;

import org.gbif.harvest.AbstractHarvester;
import org.gbif.harvest.core.AbstractSynchroniserFactory;
import org.gbif.harvest.core.Constants;
import org.gbif.harvest.exception.HarvesterException;
import org.gbif.harvest.exception.OperationStoppedException;
import org.gbif.harvest.log.CommonGBIFLogEvent;
import org.gbif.harvest.log.I18nLog;
import org.gbif.harvest.log.I18nLogFactory;
import org.gbif.harvest.model.BioDatasource;
import org.gbif.harvest.service.BioDatasourceManager;
import org.gbif.harvest.util.FileUtils;
import org.gbif.harvest.util.GbifLogger;
import org.gbif.harvest.util.JSONUtils;
import org.gbif.harvest.util.RequestUtils;
import org.gbif.harvest.util.TemplateUtils;
import org.gbif.harvest.xml.DigesterUtils;
import org.gbif.util.BioDatasourceUtils;

import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.digester.Digester;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Level;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.dom4j.tree.DefaultDocument;
import org.dom4j.xpath.DefaultXPath;
import org.xml.sax.SAXException;

/**
 * This is a special handler for Tapir where it will create a new BioDatasource
 * for each resource at a Tapir endpoint, and update its target count. Typically,
 * there will only ever be a single resource per endpoint. But, in the case that
 * the records are represented with ABCD, and there are multiple datasets and the concept
 * dataset title is searchable, each individual dataset becomes a new BioDatasource.
 * In the event that a BioDatasource already exists, its metadata is updated.
 *
 * @author timrobertson
 * @author kbraak
 */
public class TapirMetadataHandler extends AbstractHarvester {

    // commons logging
    protected I18nLog log = I18nLogFactory.getLog(this.getClass());

    protected static final String BASE_LOCATION = "org/gbif/harvest/tapir";
    protected static final String MAPPING_DIRECTORY_NAME = "mapping";
    protected static final String TEMPLATE_DIRECTORY_NAME = "template";

    // the template for the capabilities request
    protected static final String CAPABILITIES_TEMPLATE_FILENAME = "capabilities";

    // the template for the capabilities request
    protected static final String SEARCH_TEMPLATE_FILENAME = "search";

    // the TAPIR template for the metadata request
    protected static final String METADATA_TEMPLATE_FILENAME = "metadata";

    protected static final String METADATA_MAPPING_FILE_NAME = "metadataMapping";

    // the mapping file to determine which mapping file to use
    protected static final String CONCEPTUAL_MAPPING_FILENAME = "conceptualMapping";

    // the mapping file to determine which outputModel to use
    protected static final String OUTPUT_MODEL_MAPPING_FILENAME = "outputModelMapping";

    // element for determining the namespace
    protected static final String namespaceResponseXPathElement = "*/schema";
    protected static final String supportedNamespaceAttributeName = "namespace";

    // element for determining the count
    protected static final String COUNT_RESPONSE_XPATH_ELEMENT = "*/summary@totalMatched";

    // default constants corresponding to Tapir 1.0, DwC 1.4
    protected static final String DEFAULT_CONTENT_NAMESPACE = "http://rs.tdwg.org/dwc/dwcore/";
    protected static final String DEFAULT_OUTPUT_MODEL = "http://rs.tdwg.org/tapir/cs/dwc/1.4/model/dw_core_geo_cur.xml";

    protected static final String DEFAULT_MAPPING_FILE = "indexMapping_dwc_1_4";
    protected static final String DEFAULT_CONCEPTUAL_SCHEMA = "tapir_1_0";

    protected static final String languageAttributeName = "xml:lang";

    // complete list of Tapir's supported namespaces
    protected static List<String> supported_namespaces = new LinkedList<String>();

    // The name of the keys used in the repeating elements / xpath map.
    // These MUST be the same as the keys in the metadataMapping properties file
    // that identify the repeating elements' XPath expressions
    protected static final String RELATEDENTITY_REPEATING_ELEMENT_NAME = "reXPath";
    protected static final String HASCONTACT_REPEATING_ELEMENT_NAME = "hcXPath";

    // File writers
    protected static BufferedWriter relatedEntityBW = null;
    protected static BufferedWriter hasContactBW = null;

    // Maps of element of interest name / String XPath expression
    protected static Map<String, String> harvestedRelatedEntityElementsOfInterest = new HashMap<String, String>();
    protected static Map<String, String> harvestedHasContactElementsOfInterest = new HashMap<String, String>();

    // Map of repeating element name / XPath expression key/value pairs
    protected static Map<String, String> settingsElementsOfInterest = new HashMap<String, String>();
    protected static Map<String, String> metadataElementsOfInterest = new HashMap<String, String>();
    protected Map<String, DefaultXPath> metadataRepeatingElementsXpath = new HashMap<String, DefaultXPath>();

    // key names: these names must be used in all metadata mappings files
    protected static String resourceNameKeyName = "dataResourceName";
    protected static String resourceDisplayNameKeyName = "dataResourceDisplayName";

    protected static String englishLanguageCode = "en";

    // for use in setting xpaths
    protected Map<String, String> namespaceMap = new HashMap<String, String>();

    private int lineNumber;

    private TemplateUtils templateUtils;
    private FileUtils fileUtils;
    private RequestUtils requestUtils;
    private DigesterUtils digesterUtils;
    private GbifLogger gbifLogger;
    private BioDatasourceManager bioDatasourceManager;
    private List<AbstractSynchroniserFactory> synchroniserFactories = new LinkedList<AbstractSynchroniserFactory>();

    public TapirMetadataHandler(TemplateUtils templateUtils, RequestUtils requestUtils, FileUtils fileUtils,
            DigesterUtils digesterUtils, GbifLogger gbifLogger, BioDatasourceManager bioDatasourceManager,
            List<AbstractSynchroniserFactory> synchroniserFactories) {
        this.templateUtils = templateUtils;
        this.requestUtils = requestUtils;
        this.fileUtils = fileUtils;
        this.digesterUtils = digesterUtils;
        this.gbifLogger = gbifLogger;
        this.bioDatasourceManager = bioDatasourceManager;
        this.synchroniserFactories = synchroniserFactories;
        init();
    }

    /**
     * Defaults supportsTitle to false and construct resource name (code).
     *
     * @param name             of BioDatasource
     * @param url              access point URL
     * @param uddiKey          registry service UUID
     * @param params           map of Biodatasource params
     * @param contentNamespace contentNamespace
     * @param mappingFile      mappingFile
     * @param protocol         name
     * @param settings         settings
     *
     * @return Biodatasource id
     *
     * @throws HarvesterException thrown if method fails
     * @see org.gbif.harvest.tapir.TapirMetadataHandler#createOrUpdateBioDatasource(String, String, String, String,
     *      java.util.Map, String, String, String, java.util.Map, String, String)
     */
    private Long createOrUpdateBioDatasource(String name, String url, String uddiKey, Map<String, Object> params,
            String contentNamespace, String mappingFile, String protocol, Map<String, String> settings)
            throws HarvesterException {

        // construct the resource name (last name in url)
        String resourceName = parseNameFromUrl(url);

        Long id = -1L;
        if (StringUtils.isNotBlank(StringUtils.trimToNull(resourceName))) {
            id = createOrUpdateBioDatasource(name, url, resourceName, uddiKey, params, contentNamespace,
                    mappingFile, protocol, settings, "false", (String) params.get("directory"));
        } else {
            log.error("error.resourceName", new String[] { url, name });
        }

        return id;
    }

    /**
     * Construct a new BioDatasource, or update an existing one.
     * Uniqueness is based on the name and uddi key.
     * Return the created/updated BioDatasource's id - later used in
     * updating its target count
     *
     * @param name                of Biodatasource
     * @param url                 access point URL
     * @param resourceName        resource name
     * @param uddiKey             registry service UUID
     * @param params              map of Biodatasource params
     * @param contentNamespace    contentNamespace
     * @param mappingFile         name
     * @param protocol            name
     * @param settings            settings
     * @param supportsTitle       boolean
     * @param parentDirectoryName parent directory name
     *
     * @return id of Biodatasource
     *
     * @throws HarvesterException thrown if method fails
     */
    private Long createOrUpdateBioDatasource(String name, String url, String resourceName, String uddiKey,
            Map<String, Object> params, String contentNamespace, String mappingFile, String protocol,
            Map<String, String> settings, String supportsTitle, String parentDirectoryName)
            throws HarvesterException {

        // Whether we're creating/updating, we always need to update params:
        params.put("url", url);
        params.put("resource_name", resourceName);
        params.put("contentNamespace", contentNamespace);
        params.put("mappingFile", mappingFile);
        params.put("protocol", protocol);
        params.put("supportsTitle", supportsTitle);
        params.put("harvesterFactory", Constants.TAPIR_HARVESTER_FACTORY);

        // construct BioDatasource's name
        String newName = BioDatasourceUtils.constructBioDatasourceName(name, resourceName);
        params.put("name", newName);

        // construct the new, validated directory name
        String newValidDirectoryName = BioDatasourceUtils.constructBioDatasourceOperatorDirectoryName(resourceName,
                parentDirectoryName);
        params.put("directory", newValidDirectoryName);

        // get country name
        String country = null;
        if (params.containsKey("country")) {
            country = (String) params.get("country");
            // "country":null is converted to "country":"\"null\""
            if (StringUtils.equalsIgnoreCase(country, "\"null\"")) {
                country = null;
            }
        }

        // get provider name
        String dataProviderName = null;
        if (params.containsKey("providerName")) {
            dataProviderName = params.get("providerName").toString();
        }

        // add the settings info to params
        if (settings.containsKey("minQueryTermLength")) {
            params.put("minQueryTermLength", settings.get("minQueryTermLength"));
        } else {
            params.put("minQueryTermLength", "0");
        }
        if (settings.containsKey("maxResponseSize")) {
            params.put("maxResponseSize", settings.get("maxResponseSize"));
        } else {
            params.put("maxResponseSize", "0");
        }

        // add synchroniserFactories list to params
        synchroniserFactories = getSynchroniserFactories();
        List<String> factories = new LinkedList<String>();
        Iterator<AbstractSynchroniserFactory> iter = synchroniserFactories.iterator();
        while (iter.hasNext()) {
            Class cls = (iter.next().getClass());
            String clsName = cls.getName();
            factories.add(clsName);
        }
        params.put("synchroniserFactories", factories);

        // check if the retrieved entity has already been saved as a bioDatasource
        Long id = -1L;
        // get resource uuid
        String resourceUuid = null;
        if (params.containsKey("resourceUuid")) {
            resourceUuid = params.get("resourceUuid").toString();
        }
        // if it belongs to a resource, check using resourceKey in case resource name has changed
        if (StringUtils.isNotBlank(resourceUuid)) {
            id = bioDatasourceManager.checkIfBioDatasourceExists(resourceUuid, uddiKey,
                    Constants.TAPIR_HARVESTER_FACTORY);
        } else {
            id = bioDatasourceManager.checkIfBioDatasourceExists(newName, uddiKey);
        }

        int defaultCount = 0;
        try {
            // if this is a new BioDatasource
            if (id.compareTo(-1L) == 0) {

                // update params
                Map<String, Object> newParams = new HashMap<String, Object>();
                newParams.putAll(params);

                // newParams.put("directory", newValidDirectoryName);
                newParams.put("uddiKey", uddiKey);
                newParams.put("targetCount", defaultCount);
                String parametersAsJson = JSONUtils.jsonFromMap(newParams);

                // create new BioDatasource
                BioDatasource datasource = new BioDatasource(newName, dataProviderName,
                        Constants.TAPIR_HARVESTER_FACTORY, parametersAsJson, uddiKey, country, url);

                BioDatasource bioDatasource = bioDatasourceManager.save(datasource);
                log.info("createBioDatasource", newName);

                return bioDatasource.getId();

            } else {
                BioDatasource bioDatasource = bioDatasourceManager.get(id);

                // update params
                Map<String, Object> oldParams = JSONUtils.mapFromJSON(bioDatasource.getParametersAsJSON());
                oldParams.putAll(params);
                bioDatasource.setParametersAsJSON(JSONUtils.jsonFromMap(oldParams));

                // in case the name got changed
                bioDatasource.setName(BioDatasourceUtils.prepareStringForUI(newName));

                // in case the url has changed
                bioDatasource.setUrl(BioDatasourceUtils.prepareStringForUI(url));

                // in case the country has changed
                bioDatasource.setCountry(BioDatasourceUtils.prepareStringForUI(country));

                // in case the provider name has changed
                bioDatasource.setProviderName(BioDatasourceUtils.prepareStringForUI(dataProviderName));

                bioDatasourceManager.save(bioDatasource);
                log.info("createBioDatasource.exists", bioDatasource.getName());
            }
        } catch (Exception e) {
            log.error("error.createBioDatasource", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        return id;
    }

    /**
     * @return the bioDatasourceManager
     */
    public BioDatasourceManager getBioDatasourceManager() {
        return bioDatasourceManager;
    }

    /**
     * Executes a capabilities request, saves the response as a file,
     * and returns that file.
     *
     * @param destination     access point URL
     * @param outputDirectory directory to write to
     * @param protocol        name
     *
     * @return capabilities resonse as ByteArrayInputStream
     *
     * @throws HarvesterException thrown if method fails
     */
    public ByteArrayInputStream getCapabilities(String destination, String outputDirectory, String protocol)
            throws HarvesterException {
        log.info("tapirmetadatahandler.start.getCapabilities");

        // build the parameters required for the template into a map
        Map<String, String> templateParams = new HashMap<String, String>();
        templateParams.put("destination", destination);

        // Prepare directory
        File directory = new File(outputDirectory);
        log.debug("tapirmetadatahandler.start.getCapabilities.prepareDirectory");
        if (directory.isDirectory()) {
            try {
                // remove all capabilities requests and responses
                fileUtils.prepareDirectory(outputDirectory, Constants.CAPABILITIES_PREFIX);
                log.debug("tapirmetadatahandler.end.getCapabilities.prepareDirectory");
            } catch (Exception e) {
                log.error("tapirmetadatahandler.error.getCapabilities.prepareDirectory", e.getMessage(), e);
                throw new HarvesterException(e.getMessage(), e);
            }
        }

        // build the TAPIR capabilities request
        // NOTE: here we use the default protocol, as we don't
        // actually know the protocol yet. Ideally though, the capabilities
        // request would not change through different versions of TAPIR
        String query;
        String request;

        String templateLocation = BASE_LOCATION.concat("/").concat(protocol).concat("/")
                .concat(TEMPLATE_DIRECTORY_NAME).concat("/").concat(CAPABILITIES_TEMPLATE_FILENAME)
                .concat(Constants.VELOCITY_FILENAME_EXTENSION);

        try {
            query = templateUtils.getAndMerge(templateLocation, templateParams);
            request = requestUtils.buildURL(destination, "request", query);
        } catch (Exception e) {
            log.error("tapirmetadatahandler.error.getCapabilities.buildUrl", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // save the request
        try {
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.CAPABILITIES_REQUEST_FILENAME,
                    query.getBytes());
        } catch (IOException e) {
            log.warn("tapirmetadatahandler.error.getCapabilities.writeRequest", e.getMessage(), e);
        }

        // fire the request
        ByteArrayInputStream is;
        byte[] array;
        try {
            // get response as byte array
            log.debug("tapirmetadatahandler.getCapabilities.execute");
            array = requestUtils.executePersistentGetRequestAndReturnByteArray(request, outputDirectory,
                    destination);

            // save the response as gzipped file
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.CAPABILITIES_RESPONSE_FILENAME,
                    array);

            // convert byte array into inputStream
            is = new ByteArrayInputStream(array);
        }
        // was the operation stopped?
        catch (OperationStoppedException e) {
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.getCapabilities.writeResponse", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        log.info("tapirmetadatahandler.end.getCapabilities");
        return is;
    }

    /**
     * Parse the search response for the count information.
     *
     * @param inputStream search response as ByteArrayInputStream
     *
     * @return count
     *
     * @throws HarvesterException thrown if method fails
     */
    public String getCount(ByteArrayInputStream inputStream) throws HarvesterException {
        log.info("tapirmetadatahandler.start.getCount");

        // retrieve the count
        String count;
        try {
            count = returnCount(inputStream, COUNT_RESPONSE_XPATH_ELEMENT);
        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.getCount.parsing", e.getMessage());
            throw new HarvesterException(e.getMessage(), e);
        } catch (SAXException e) {
            log.error("tapirmetadatahandler.error.getCount.parsing", e.getMessage());
            throw new HarvesterException(e.getMessage(), e);
        }

        log.info("tapirmetadatahandler.end.getCount");
        return count;
    }

    public int getLineNumber() {
        return lineNumber;
    }

    /**
     * Determine the index mapping file.
     * If there is a problem loading the file, or no match exists for the
     * contentNamespace, the default is used.
     *
     * @param contentNamespace contentNamespace
     *
     * @return mapping file name
     *
     * @throws HarvesterException thrown if method fails
     */
    private String getMappingFile(String contentNamespace) throws HarvesterException {

        // Initially, set the mapping file to the default
        String mappingFile = DEFAULT_MAPPING_FILE;

        Properties mapping = new Properties();
        String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME,
                CONCEPTUAL_MAPPING_FILENAME);
        InputStream is = null;
        try {
            is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath);
            mapping.load(is);
            for (Object key : mapping.keySet()) {
                // match on contentnamespace determines mapping file
                if (StringUtils.equals(contentNamespace, (String) key)) {
                    mappingFile = mapping.getProperty((String) key);
                }
            }
        } catch (NullPointerException e) {
            log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.getMappingFile", e.getMessage(), e);
            log.debug("tapirmetadatahandler.default.getMappingFile", mappingFile);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    log.error(
                            "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(),
                            e);
                }
            }
        }

        return mappingFile;
    }

    /**
     * Get the most prioritised content namespace.
     * In the event the capabilities response cannot be parsed,
     * the default content namespace is used
     *
     * @param inputStream capabilities response as ByteArrayInputStream
     * @param directory   as String
     *
     * @return most prioritized conetent namespace
     *
     * @throws HarvesterException thrown if method fails
     */
    private String getNamespace(ByteArrayInputStream inputStream, String directory) throws HarvesterException {
        log.info("tapirmetadatahandler.start.getNamespace");

        // Initially, set the namespace to the default
        String newestNamespace = DEFAULT_CONTENT_NAMESPACE;

        // reste stream as we're reading it a second time
        if (inputStream != null) {
            inputStream.reset();
        }

        // retrieve the list of supported namespaces
        try {
            // namespaces = returnNamespace(fis, NAMESPACE_RESPONSE_XPATH_ELEMENT);
            Set<String> namespaces = digesterUtils.xmlToListOfAttributeValuesForSingleElement(inputStream,
                    TapirMetadataHandler.namespaceResponseXPathElement,
                    TapirMetadataHandler.supportedNamespaceAttributeName);

            // Iterate through the ordered list of available namespaces and
            // determine what the newest one from amongst the set of supported
            // namespaces retrieved is
            // Set the default namespace
            for (String supportedNamespace : supported_namespaces) {
                if (namespaces.contains(supportedNamespace)) {
                    newestNamespace = supportedNamespace;
                    log.debug("tapirmetadatahandler.getNamespace.chooseNamespace", newestNamespace);
                    log.info("tapirmetadatahandler.end.getNamespace");
                    return newestNamespace;
                }
            }
            // if not found, alert operator
            log.error("tapirmetadatahandler.default.conceptualMappingNotFound", namespaces.toString());
            // and write GBIF Log Message
            gbifLogger.openAndWriteToGbifLogMessageFile(directory,
                    CommonGBIFLogEvent.COMMON_MESSAGES_UNKNOWN_SCHEMA_LOCATION.getName(),
                    CommonGBIFLogEvent.COMMON_MESSAGES_UNKNOWN_SCHEMA_LOCATION.getValue(), Level.ERROR_INT,
                    "None of the namespace(s) " + namespaces.toString()
                            + " was not found in the TAPIR conceptualMapping.properties file. Please update this file with valid namespace(s) and try again. Defaulting to namespace http://rs.tdwg.org/dwc/dwcore/",
                    1, false);

        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.getNamespace.parsing", e.getMessage(), e);
            log.debug("tapirmetadatahandler.default.getNamespace.chooseNamespace", newestNamespace);
            // throw new HarvesterException(e.getMessage(), e);
        } catch (SAXException e) {
            log.error("tapirmetadatahandler.error.getNamespace.parsing", e.getMessage(), e);
            log.debug("tapirmetadatahandler.default.getNamespace.chooseNamespace", newestNamespace);
            // throw new HarvesterException(e.getMessage(), e);
        }

        // close inputStream
        try {
            if (inputStream != null) {
                inputStream.close();
            }
        } catch (Exception e) {
            // do nothing
        }

        log.info("tapirmetadatahandler.end.getNamespace");
        return newestNamespace;
    }

    /**
     * Determine the outputModel from the appropriate mapping file.
     * If there is a problem loading the file, or no match exists for the
     * contentNamespace, the default is used.
     *
     * @param contentNamespace contentNamespace
     *
     * @return mapping file name
     *
     * @throws HarvesterException thrown if method fails
     */
    private String getOutputModel(String contentNamespace) throws HarvesterException {

        // Initially, set the outputModel to the default
        String outputModel = DEFAULT_OUTPUT_MODEL;

        Properties mapping = new Properties();
        String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME,
                OUTPUT_MODEL_MAPPING_FILENAME);
        boolean found = false;
        InputStream is = null;
        try {
            is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath);
            mapping.load(is);
            for (Object key : mapping.keySet()) {
                if (StringUtils.equals(contentNamespace, (String) key)) {
                    outputModel = mapping.getProperty((String) key);
                    found = true;
                }
            }
            // if not found, alert operator
            if (!found) {
                log.error("digirmetadatahandler.default.outputModelMappingNotFound", contentNamespace);
            }
        } catch (NullPointerException e) {
            log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.getOutputModel", e.getMessage(), e);
            log.debug("tapirmetadatahandler.default.getOutputModel", outputModel);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    log.error(
                            "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(),
                            e);
                }
            }
        }

        return outputModel;
    }

    /**
     * Executes a search request, saves the response as a file,
     * and returns that file.
     *
     * @param destination      access point URL
     * @param outputDirectory  directory to which the response will be saved
     * @param outputModel      outputModel
     * @param resourceName     resource name
     * @param datasetTitlePath dataset title path
     * @param protocol         name
     *
     * @return search response as ByteArrayInputStream
     *
     * @throws HarvesterException thrown if method fails
     */
    public ByteArrayInputStream getSearch(String destination, String outputDirectory, String outputModel,
            String resourceName, String datasetTitlePath, String protocol) throws HarvesterException {
        log.info("tapirmetadatahandler.start.getSearch");

        // build the parameters required for the template into a map
        Map<String, String> templateParams = new HashMap<String, String>();
        templateParams.put("outputModel", outputModel);

        // if the dataset title name is not null, and the dataset title path is
        // not null, add a filter by title name (using the title path)
        if (StringUtils.isNotBlank(resourceName) && StringUtils.isNotBlank(datasetTitlePath)) {
            templateParams.put("datasetTitle", resourceName);
            templateParams.put("datasetTitlePath", datasetTitlePath);
        }

        // Prepare directory
        File directory = new File(outputDirectory);
        log.debug("tapirmetadatahandler.start.getSearch.prepareDirectory");
        if (directory.isDirectory()) {
            try {
                // remove all metadata requests and responses
                fileUtils.prepareDirectory(outputDirectory, Constants.OTHER_METADATA_PREFIX);
                log.debug("tapirmetadatahandler.end.getSearch.prepareDirectory");
            } catch (Exception e) {
                log.error("tapirmetadatahandler.error.getSearch.prepareDirectory", e.getMessage(), e);
                throw new HarvesterException(e.getMessage(), e);
            }
        }

        // build the TAPIR search request
        String query;
        String request;

        String templateLocation = BASE_LOCATION.concat("/").concat(protocol).concat("/")
                .concat(TEMPLATE_DIRECTORY_NAME).concat("/").concat(SEARCH_TEMPLATE_FILENAME)
                .concat(Constants.VELOCITY_FILENAME_EXTENSION);

        try {
            query = templateUtils.getAndMerge(templateLocation, templateParams);
            request = requestUtils.buildURL(destination, "request", query);
        } catch (Exception e) {
            log.error("tapirmetadatahandler.error.getSearch.buildUrl", e.getMessage());
            throw new HarvesterException(e.getMessage(), e);
        }

        // save the request
        try {
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.OTHER_METADATA_REQUEST_FILENAME,
                    query.getBytes());
        } catch (IOException e) {
            log.warn("tapirmetadatahandler.error.getSearch.writeRequest", e.getMessage());
        }

        // fire the request
        ByteArrayInputStream is;
        byte[] array;
        try {
            log.debug("tapirmetadatahandler.getSearch.execute");
            array = requestUtils.executePersistentGetRequestAndReturnByteArray(request, outputDirectory,
                    destination);

            // save the response and return the newly created file
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.OTHER_METADATA_RESPONSE_FILENAME,
                    array);

            is = new ByteArrayInputStream(array);
        }
        // was the operation stopped?
        catch (OperationStoppedException e) {
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.getSearch.writeResponse", e.getMessage());
            throw new HarvesterException(e);
        }

        log.info("tapirmetadatahandler.end.getSearch");
        return is;
    }

    /**
     * Collect settings information from the capabilities response.
     *
     * @param inputStream capabilities response as ByteArrayInputStream
     *
     * @return settings
     *
     * @throws HarvesterException thrown if method fails
     */
    public Map<String, String> getSettings(ByteArrayInputStream inputStream) throws HarvesterException {
        log.info("tapirmetadatahandler.start.getSettings");

        Map<String, String> settings = new HashMap<String, String>();
        // retrieve settings information
        try {
            settings = digesterUtils.parseElementsOfInterest(inputStream, settingsElementsOfInterest, true);
        } catch (Exception e) {
            log.warn("tapirmetadatahandler.error.getSettings.parsing", e.getMessage(), e);
        }

        log.info("tapirmetadatahandler.end.getSettings");
        return settings;
    }

    /**
     * @return the synchroniserFactories
     */
    public List<AbstractSynchroniserFactory> getSynchroniserFactories() {
        return synchroniserFactories;
    }

    private void init() {
        // with default (tapir 1.0) values as place holders
        metadataRepeatingElementsXpath = new HashMap<String, DefaultXPath>();
        metadataRepeatingElementsXpath.put(RELATEDENTITY_REPEATING_ELEMENT_NAME,
                new DefaultXPath("//vcard:relatedEntity"));
        metadataRepeatingElementsXpath.put(HASCONTACT_REPEATING_ELEMENT_NAME,
                new DefaultXPath("//vcard:hasContact"));

        // when more versions of TAPIR become available, these can no longer be hard-coded.
        namespaceMap = new HashMap<String, String>();
        namespaceMap.put("tapir_1_0", "http://rs.tdwg.org/tapir/1.0");
        namespaceMap.put("vcard", "http://www.w3.org/2001/vcard-rdf/3.0#");
    }

    /**
     * The entry point required for the user interface integration.
     *
     * @param params map of the datasource to whom the operation belongs
     *
     * @throws HarvesterException thrown if method fails
     */
    public void issueMetadata(Map<String, String> params) throws HarvesterException {
        Map<String, Object> paramsCopy = new HashMap<String, Object>();
        paramsCopy.putAll(params);
        issueMetadata(params.get("name"), params.get("url"), params.get("uddiKey"),
                Constants.BASE_DIR.concat(File.separator).concat(params.get("directory")), paramsCopy);
    }

    /**
     * Issues a capabilities request to a Tapir access point.
     * Determines the highest priority content namespace.
     * In the event that the content namespace is ABCD, the possibility
     * exists that there may be several datasets behind it. Therefore,
     * a new BioDatasource is created for each dataset, with its
     * name, and other attributes set accordingly. In the event that there is only a single dataset behind it,
     * a single new BioDatasource is created.
     * Typically, however, a Tapir access point only has a single
     * resource.
     * In order to set the dataset's count, a search request must be sent
     * with no filter and the totalMatched attribute retrieved.
     * Where there are multiple datasets to gather counts
     * for, additional request are sent, filtering by dataset title.
     * If an endpoint does not support searching by dataset title,
     * this information is logged.
     *
     * @param name      of the datasource
     * @param url       of the datasource
     * @param uddiKey   of the datasource
     * @param directory to save files to
     * @param params    map of the datasource
     *
     * @throws HarvesterException thrown if method fails
     */
    public void issueMetadata(String name, String url, String uddiKey, String directory, Map<String, Object> params)
            throws HarvesterException {
        log.info("start.issueMetadata");

        // Determine the protocol
        // For now use default protocol as this is the only one
        String protocol = DEFAULT_CONCEPTUAL_SCHEMA;

        // populate element of interest maps from the mapping file's properties
        populateElementOfInterestsMapsFromMappingFile(METADATA_MAPPING_FILE_NAME, protocol);

        // send capabilities request and get response as ByteArrayInputStream
        ByteArrayInputStream capabilitiesResponse = getCapabilities(url, directory, protocol);

        // Determine the settings, i.e. maxResponseSize
        Map<String, String> settings = getSettings(capabilitiesResponse);

        // load list of supported namespaces
        loadSupportedNamespaces();

        // Determine the content namespace
        String contentNamespace = getNamespace(capabilitiesResponse, directory);

        // Determine the mapping file
        String mappingFile = getMappingFile(contentNamespace);

        // create a single BioDatasource using the same datasource name
        Long id = createOrUpdateBioDatasource(name, url, uddiKey, params, contentNamespace, mappingFile, protocol,
                settings);

        // update the BioDatasource's target count, other metadata, and contact info
        if (id > 0) {
            updateMetadata(id, url);
        }

        log.info("end.issueMetadata");
    }

    /**
     * Load supported namespaces into list.
     * If there is a problem loading the file, or no match exists for the
     * contentNamespace, list will remain empty
     *
     * @throws HarvesterException thrown if method fails
     */
    private void loadSupportedNamespaces() throws HarvesterException {

        Properties mapping = new Properties();
        String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME,
                CONCEPTUAL_MAPPING_FILENAME);
        InputStream is = null;
        try {
            is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath);
            mapping.load(is);
            for (Object key : mapping.keySet()) {
                // add content namespace to list of supported namespaces
                supported_namespaces.add((String) key);
            }
        } catch (NullPointerException e) {
            log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.loadSupportedNamespaces", e.getMessage(), e);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    log.error(
                            "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(),
                            e);
                }
            }
        }
    }

    /**
     * Executes a metadata request for the purpose of retrieving additional information
     * about the contacts, dataset, etc. There response is returned as a ByteArrayInputStream.
     *
     * @param destination     of the TAPIR access point to request against
     * @param outputDirectory to which the response will be saved
     * @param protocol        name
     *
     * @return response as ByteArrayInputStream
     *
     * @throws HarvesterException thrown if method fails.
     */
    public ByteArrayInputStream metadataRequest(String destination, String outputDirectory, String protocol)
            throws HarvesterException {
        log.info("start.metadataRequest");

        // build the parameters required for the template into a map
        Map<String, String> templateParams = new HashMap<String, String>();
        templateParams.put("destination", destination);

        // Prepare directory
        File directory = new File(outputDirectory);
        log.debug("start.metadataRequest.prepareDirectory");
        if (directory.isDirectory()) {
            try {
                // remove all other metadata requests and responses
                fileUtils.prepareDirectory(outputDirectory, Constants.METADATA_PREFIX);
                log.debug("end.metadataRequest.prepareDirectory");
            } catch (Exception e) {
                log.error("error.metadataRequest.prepareDirectory", e.getMessage(), e);
                throw new HarvesterException(e.getMessage(), e);
            }
        }

        // build the TAPIR capabilities request
        String query;
        String request;

        String templateLocation = BASE_LOCATION.concat("/").concat(protocol).concat("/")
                .concat(TEMPLATE_DIRECTORY_NAME).concat("/").concat(METADATA_TEMPLATE_FILENAME)
                .concat(Constants.VELOCITY_FILENAME_EXTENSION);

        try {
            query = templateUtils.getAndMerge(templateLocation, templateParams);
            request = requestUtils.buildURL(destination, "request", query);
        } catch (Exception e) {
            log.error("tapirmetadatahandler.error.getCapabilities.buildUrl", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // save the request
        try {
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.METADATA_REQUEST_FILENAME,
                    query.getBytes());
        } catch (IOException e) {
            log.warn("error.metadataRequest.writeRequest", e.getMessage());
        }

        // fire the request
        ByteArrayInputStream is;
        byte[] array;
        try {
            // execute request and return response as byte array
            log.debug("tapirmetadatahandler.metadataRequest.execute");
            array = requestUtils.executePersistentGetRequestAndReturnByteArray(request, outputDirectory,
                    destination);

            // save the response as gzipped file
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.METADATA_RESPONSE_FILENAME,
                    array);

            // convert byte array into inputStream
            is = new ByteArrayInputStream(array);
        }
        // was the operation stopped?
        catch (OperationStoppedException e) {
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("error.metadataRequest.writeResponse", e.getMessage());
            throw new HarvesterException(e.getMessage(), e);
        }

        log.info("end.metadataRequest");
        return is;
    }

    /**
     * Parse the resource name (code) from the url
     *
     * @param url access point URL
     *
     * @return resource name
     */
    private String parseNameFromUrl(String url) {

        Pattern namePattern = Pattern.compile("(.*)[$//]([\\S]*)");
        Matcher matcher = namePattern.matcher(url);

        String resourceName = null;
        if (matcher.matches()) {
            resourceName = matcher.group(2);
            log.info("Resource name (code) parsed from url = " + resourceName);
        }
        return resourceName;
    }

    /**
     * Parse the response file and write the parsed values to their
     * appropriate file.
     *
     * @param stream file representing harvested xml response as ByteArrayInputStream
     *
     * @throws DocumentException thrown if parsing errors occur
     * @throws IOException       thrown
     */
    private void parseResponseFile(ByteArrayInputStream stream) throws DocumentException, IOException {

        // create a DOM4J tree, reading a Document from the given File
        SAXReader reader = new SAXReader();
        reader.setEncoding("UTF-8");
        Document document = reader.read(stream);
        document.setXMLEncoding("UTF-8");

        // get all relatedEntity Elements
        List<Node> relatedEntities = (metadataRepeatingElementsXpath.get(RELATEDENTITY_REPEATING_ELEMENT_NAME))
                .selectNodes(document);
        // iterate over dataset Elements
        for (Node relatedEntity : relatedEntities) {

            // Detatch relatedEntity Element and create new Document with it
            DefaultDocument doc1 = new DefaultDocument();
            doc1.setRootElement((Element) relatedEntity.detach());

            // get all hasContact Elements
            List<Node> hasContacts = (metadataRepeatingElementsXpath.get(HASCONTACT_REPEATING_ELEMENT_NAME))
                    .selectNodes(doc1);
            // iterate over hasContact Elements
            for (Node hasContact : hasContacts) {

                // Detatch relatedEntity Element and create new Document with it
                DefaultDocument doc2 = new DefaultDocument();
                doc2.setRootElement((Element) hasContact.detach());

                // write hasContact elements-of-interest to file
                fileUtils.writeValuesToFile(hasContactBW, harvestedHasContactElementsOfInterest.values(), doc2,
                        namespaceMap, String.valueOf(getLineNumber()));
            }
            // write relatedEntity elements-of-interest to file
            fileUtils.writeValuesToFile(relatedEntityBW, harvestedRelatedEntityElementsOfInterest.values(), doc1,
                    namespaceMap, String.valueOf(getLineNumber()));

            setLineNumber(getLineNumber() + 1);
        }
    }

    /**
     * Iterates over the metadata mapping file (properties file), populating the
     * various elements-of-interest maps.
     * In most cases, regular expressions divide the mapping file's properties
     * into the appropriate element-of-interest map.
     * Where some properties actually represent repeating elements in a metadata
     * xml response, the standardised set of repeating element names are
     * located in a static list. Each repeating element name matches a key name
     * in the indexMapping properties file and is used to get at its XPath
     * expression.
     * Note: The mapping file's properties are in the following format:
     * [element-of-interest categoriser] + [property name] = [XPath expresson]
     * The regular expression matches according to the [element-of-interest
     * categoriser]
     * The corresponding element-of-interest map is then populated with: key =
     * [property name] & value = [XPath expression]
     *
     * @param mappingFile name
     * @param protocol    name
     *
     * @throws HarvesterException thrown if method fails
     */
    private void populateElementOfInterestsMapsFromMappingFile(String mappingFile, String protocol)
            throws HarvesterException {

        // Create regex patterns
        // contact-related patterns
        Pattern relatedEntityKeyPattern = Pattern.compile("relatedEntity([\\S]*)");
        Pattern hasContactKeyPattern = Pattern.compile("hasContact([\\S]*)");

        // non-contact, metadata related pattern
        Pattern metadataKeyPattern = Pattern.compile("metadata([\\S]*)");

        // non-contact, non-metadata, settings related pattern
        Pattern settingKeyPattern = Pattern.compile("setting([\\S]*)");

        // properties we harvest are read from file
        Properties mapping = new Properties();
        String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, protocol, MAPPING_DIRECTORY_NAME,
                mappingFile);
        InputStream is = null;
        try {
            is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath);
            mapping.load(is);

            // Divide the mapping properties into various element-of-interest maps
            for (Object key : mapping.keySet()) {
                Boolean matched = false;
                // Matchers matching keys belonging to repeating element groups
                Matcher relatedEntityKeyMatcher = relatedEntityKeyPattern.matcher((String) key);

                if (relatedEntityKeyMatcher.matches()) {
                    String property = relatedEntityKeyMatcher.group(1);
                    harvestedRelatedEntityElementsOfInterest.put(property, mapping.getProperty((String) key));
                    matched = true;
                }
                if (!matched) {
                    Matcher hasContactKeyMatcher = hasContactKeyPattern.matcher((String) key);
                    if (hasContactKeyMatcher.matches()) {
                        String property = hasContactKeyMatcher.group(1);
                        harvestedHasContactElementsOfInterest.put(property, mapping.getProperty((String) key));
                        matched = true;
                    }
                    if (!matched) {
                        Matcher contactKeyMatcher = metadataKeyPattern.matcher((String) key);
                        if (contactKeyMatcher.matches()) {
                            String property = contactKeyMatcher.group(1);
                            metadataElementsOfInterest.put(property, mapping.getProperty((String) key));
                            matched = true;
                        }
                        if (!matched) {
                            Matcher settingKeyMatcher = settingKeyPattern.matcher((String) key);
                            if (settingKeyMatcher.matches()) {
                                String property = settingKeyMatcher.group(1);
                                settingsElementsOfInterest.put(property, mapping.getProperty((String) key));
                                matched = true;
                            }
                            if (!matched) {
                                // Determines the XPath expressions used to isolate repeating elements in a
                                // metadata xml response.
                                if (metadataRepeatingElementsXpath.keySet().contains(key)) {
                                    // construct an XPath expression for repeating Element
                                    DefaultXPath xpath = new DefaultXPath(mapping.getProperty((String) key));
                                    xpath.setNamespaceURIs(namespaceMap);
                                    metadataRepeatingElementsXpath.put((String) key, xpath);
                                }
                            }
                        }
                    }
                }
            }
        } catch (NullPointerException e) {
            log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } catch (Exception e) {
            log.error("error.populateElementOfInterestsMapsFromMappingFile",
                    new String[] { mappingFile, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    log.error(
                            "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(),
                            e);
                }
            }
        }
    }

    /**
     * Processes the metadata response by a particular element of interest
     * XPath, and outputs all possible language alternatives listed for it.
     * A single Map.Entry will consist of: key=language, value=element value
     * For example:
     * <dc:title xml:lang="en">National Taiwan University</dc:title>
     * <dc:title xml:lang="zh-TW">BlaBla</dc:title>
     * Here,the output would be two Map.Entries: <"en", "National Taiwan University">
     * and <"zh-TW", "BlaBla">
     *
     * @param stream            metadata response as ByteArrayInputStream
     * @param elementOfInterest XPath
     *
     * @return all possible language alternatives for term
     *
     * @throws HarvesterException thrown if method fails
     */
    private Map<String, String> processAllLanguageAlternativesForAParticularElementOfInterest(
            ByteArrayInputStream stream, String elementOfInterest) throws HarvesterException {

        Map<String, String> processed = new HashMap<String, String>();
        try {
            processed = digesterUtils.xmlToMapForSingleElement(stream, elementOfInterest,
                    TapirMetadataHandler.languageAttributeName);
        } catch (Exception e) {
            log.warn("error.processMetadata.parsing", new String[] { elementOfInterest, e.getMessage() }, e);
        }

        return processed;
    }

    /**
     * Processes the metadata response by the mapping file
     * that corresponds to a particular protocol.
     *
     * @param stream metadata response as ByteArrayInputStream
     *
     * @return map with concept-name/value key/value pairs
     *
     * @throws HarvesterException thrown if the method fails
     */
    public Map<String, String> processMetadata(ByteArrayInputStream stream) throws HarvesterException {
        log.info("start.processMetadata");

        Map<String, String> processed = new HashMap<String, String>();
        // retrieve settings information
        try {
            processed = digesterUtils.parseElementsOfInterest(stream, metadataElementsOfInterest, true);
        } catch (Exception e) {
            log.warn("error.processMetadata.parsing", e.getMessage(), e);
        }

        log.info("end.processMetadata");
        return processed;
    }

    /**
     * Process the metadata response for the contact information by the mapping
     * file that corresponds to the particular protocol.
     * Because there may be several contacts, and a single contact can contain
     * several different attributes, they are saved to file(s) versus being
     * saved into JSON format like the other metadata extracted in the
     * processMetadata method.
     * The output is written to two tab delimited files, each with column header
     * definition lines:
     * relatedEntity.txt
     * contact.txt
     *
     * @param stream          metadataResponse as ByteArrayInputStream
     * @param outputDirectory directory to write to
     *
     * @throws HarvesterException thrown if method fails
     */
    public void processMetadataForContacts(ByteArrayInputStream stream, String outputDirectory)
            throws HarvesterException {
        log.info("start.processMetadataForContacts");
        // create the output directory
        File directory = new File(outputDirectory);

        // Prepare directory
        log.debug("tapirmetadatahandler.start.processMetadataForContacts.prepareDirectory");
        if (directory.isDirectory()) {
            try {
                // remove all pre-existing contact tab files
                fileUtils.prepareDirectory(outputDirectory, Constants.RELATED_ENTITY_FILENAME);
                fileUtils.prepareDirectory(outputDirectory, Constants.CONTACT_FILENAME);
                log.debug("tapirmetadatahandler.end.processMetadataForContacts.prepareDirectory");
            } catch (Exception e) {
                log.error("tapirmetadatahandler.error.processMetadataForContacts.prepareDirectory", e.getMessage(),
                        e);
                throw new HarvesterException(e.getMessage(), e);
            }
        }

        // get the various files
        File relatedEntityFile = new File(directory,
                Constants.RELATED_ENTITY_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION));
        File hasContactFile = new File(directory,
                Constants.CONTACT_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION));

        // ensure that they exist anew
        try {
            relatedEntityFile.createNewFile();
            hasContactFile.createNewFile();
        } catch (IOException e) {
            log.error("tapirmetadatahandler.error.processMetadataForContacts.createFiles", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // create file writers for each file
        try {
            relatedEntityBW = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(relatedEntityFile, true), "UTF8"));
            hasContactBW = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(hasContactFile, true), "UTF8"));
        } catch (IOException e) {
            log.error("error.createBWs", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // write header column line for each file
        try {
            // The header line is derived from the names of the properties
            fileUtils.writeHeaderLine(relatedEntityBW, harvestedRelatedEntityElementsOfInterest.keySet(), true);

            // an identification number column name is also written
            fileUtils.writeHeaderLine(hasContactBW, harvestedHasContactElementsOfInterest.keySet(), true);
        } catch (IOException e) {
            log.error("error.writeHeaders", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // parse metadata for contacts
        setLineNumber(1);
        try {
            parseResponseFile(stream);
        } catch (Exception e) {
            log.error("error.metadataRequest.parsing", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // close the buffer writers, and log having written the files so that
        // they appear in the console
        try {
            relatedEntityBW.close();
            hasContactBW.close();

            log.info("Writing to file: " + relatedEntityFile.getAbsolutePath());
            log.info("Writing to file: " + hasContactFile.getAbsolutePath());
        } catch (IOException e) {
            log.error("error.closeBWs", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }
        log.info("end.processMetadataForContacts");
    }

    /**
     * Removes non-English entries from a map, and returns the
     * value of the English map entry, if it exists.
     * It is assumed the key is the language, and the value is the
     * corresponding value.
     * If there is no English value, the resulting map will be empty.
     *
     * @param map alternative language entries
     *
     * @return processed map
     */
    private String retrieveValueForEnlishEntry(Map<String, String> map) {
        String englishEntry = null;
        Iterator<Map.Entry<String, String>> iter = map.entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<String, String> entry = iter.next();
            String language = entry.getKey();
            if (StringUtils.equalsIgnoreCase(language, TapirMetadataHandler.englishLanguageCode)) {
                englishEntry = entry.getValue();
            }
        }
        return englishEntry;
    }

    /**
     * Parse the search response for the record count.
     *
     * @param inputStream    search response as ByteArrayInputStream
     * @param itemOfInterest XPath to namespace element
     *
     * @return count
     *
     * @throws IOException  thrown
     * @throws SAXException thrown
     */
    private String returnCount(ByteArrayInputStream inputStream, String itemOfInterest)
            throws IOException, SAXException {

        List<String> records = new LinkedList<String>();
        Digester digester = new Digester();
        digester.setNamespaceAware(true);
        digester.push(records);
        if (itemOfInterest.contains("@")) {
            String[] parts = itemOfInterest.split("@");
            digester.addCallMethod(parts[0], "add", 1);
            // digester.addObjectParam(parts[0], 0, parts[0] + "@" + parts[1]);
            digester.addCallParam(parts[0], 0, parts[1]);
        } else {
            digester.addCallMethod(itemOfInterest, "add", 1);
            // digester.addObjectParam(itemOfInterest, 0, itemOfInterest);
            digester.addCallParam(itemOfInterest, 0);
        }
        digester.parse(inputStream);

        // close inputStream
        try {
            if (inputStream != null) {
                inputStream.close();
            }
        } catch (Exception e) {
            // do nothing
        }

        if (records.size() != 1) {
            return "0";
        }

        return records.get(0);
    }

    /**
     * @param bioDatasourceManager the bioDatasourceManager to set
     */
    public void setBioDatasourceManager(BioDatasourceManager bioDatasourceManager) {
        this.bioDatasourceManager = bioDatasourceManager;
    }

    public void setLineNumber(int lineNumber) {
        this.lineNumber = lineNumber;
    }

    /**
     * @param synchroniserFactories the synchroniserFactories to set
     */
    public void setSynchroniserFactories(List<AbstractSynchroniserFactory> synchroniserFactories) {
        this.synchroniserFactories = synchroniserFactories;
    }

    /**
     *
     * @param
     * @see TapirMetadataHandler.updateCount(Long, String, String, String)
     */
    /**
     * Defaults dataset title and dataset title path to null.
     *
     * @param id  Biodatasource id
     * @param url access point URL
     *
     * @throws HarvesterException thrown if method fails
     * @see org.gbif.harvest.tapir.TapirMetadataHandler#updateMetadata(Long, String, String, String)
     */
    public void updateMetadata(Long id, String url) throws HarvesterException {
        updateMetadata(id, url, null, null);
    }

    /**
     * Update a BioDatsource's target count, and their other metadata.
     *
     * @param id               Biodatasource id
     * @param url              access point URL
     * @param datasetTitle     dataset title
     * @param datasetTitlePath dataset title path
     *
     * @throws HarvesterException thrown if method fails
     */
    public void updateMetadata(Long id, String url, String datasetTitle, String datasetTitlePath)
            throws HarvesterException {
        log.debug("start.updateMetadata");

        // retrieve the BioDatasource
        BioDatasource bioDatasource = bioDatasourceManager.get(id);

        // retrieve the BioDatasource's directory
        Map<String, Object> params = JSONUtils.mapFromJSON(bioDatasource.getParametersAsJSON());
        String bioDatasourceDirectory = Constants.BASE_DIR.concat(File.separator)
                .concat((String) params.get("directory"));
        String protocol = (String) params.get("protocol");

        // determine the outputModel from the appropriate mapping file
        String contentNamespace = params.get("contentNamespace").toString();
        String outputModel = getOutputModel(contentNamespace);

        // get the count 'String'
        String resource_count = null;
        try {
            // send search request and get response as ByteArrayInputStream
            ByteArrayInputStream searchResponse = getSearch(url, bioDatasourceDirectory, outputModel, datasetTitle,
                    datasetTitlePath, protocol);

            // parse the response for the count information
            resource_count = getCount(searchResponse);
        } catch (HarvesterException e) {
            log.error("error.gettingCount", bioDatasource.getName(), e);
        }

        // check count is proper integer value, then set it as targetCount
        if (StringUtils.trimToNull(resource_count) != null) {
            int targetCount = 0;
            try {
                targetCount = Integer.valueOf(resource_count);
            } catch (NumberFormatException e) {
                log.warn("Problem occurred converting resource count: " + resource_count);
            } finally {
                params.put("targetCount", String.valueOf(targetCount));
                // update the BioDatasource's target count attribute
                bioDatasource.setTargetCount(targetCount);
                log.info("updateCount", String.valueOf(targetCount));
            }
        }
        // update the BioDatasource's params with the default count
        else {
            params.put("targetCount", "0");
            bioDatasource.setTargetCount(0);
        }

        ByteArrayInputStream metadataResponse = metadataRequest(url, bioDatasourceDirectory, protocol);

        Map<String, String> processed = null;
        Map<String, String> processedDataResourceNames;
        try {
            // for all parameters with no alternative languages
            processed = processMetadata(metadataResponse);

            // for all parameters with alternative languages
            // 1. data resource title
            String dataResourceNameXPath = metadataElementsOfInterest.get(resourceNameKeyName);
            // remember to reset inputStream first
            metadataResponse.reset();
            processedDataResourceNames = processAllLanguageAlternativesForAParticularElementOfInterest(
                    metadataResponse, dataResourceNameXPath);

            // get the English dataResourceName
            String englishDataResourceName = retrieveValueForEnlishEntry(processedDataResourceNames);
            if (StringUtils.isNotBlank(englishDataResourceName)) {
                processed.put(resourceNameKeyName, englishDataResourceName);
                processed.put(resourceDisplayNameKeyName, englishDataResourceName);
            }

            // for all contact related metadata
            // remember to reset inputStream first
            metadataResponse.reset();
            processMetadataForContacts(metadataResponse, bioDatasourceDirectory);
        } catch (HarvesterException e) {
            // do nothing, log error follows below...
        }

        // update other metadata
        if (processed != null && processed.size() > 0) {
            if (processed.containsKey(resourceNameKeyName)) {
                String dataResourceName = StringUtils.trimToNull(processed.get(resourceNameKeyName));
                if (StringUtils.isBlank(dataResourceName)
                        || StringUtils.equalsIgnoreCase(dataResourceName, "NULL")) {
                    log.error("tapirmetadatahandler.error.updateMetadata.dataResourceName",
                            bioDatasource.getName());
                    params.put(resourceNameKeyName, bioDatasource.getName());
                    params.put(resourceDisplayNameKeyName, bioDatasource.getName());
                }
            }
        } else {
            log.error("tapirmetadatahandler.error.updateMetadata.metadataRequest", bioDatasource.getName());
        }

        // add all metadata to params
        params.putAll(processed);
        // re save params
        bioDatasource.setParametersAsJSON(JSONUtils.jsonFromMap(params));

        // save the BioDatasource
        bioDatasourceManager.save(bioDatasource);
        log.debug("end.updateMetadata");
    }
}