org.gbif.harvest.digir.DigirMetadataHandler.java Source code

Introduction

Here is the source code for org.gbif.harvest.digir.DigirMetadataHandler.java
Source

/*******************************************************************************
 * Copyright (C) 2008 Global Biodiversity Information Facility Secretariat.
 * All Rights Reserved.
 * 
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 ******************************************************************************/
package org.gbif.harvest.digir;

import org.gbif.harvest.AbstractHarvester;
import org.gbif.harvest.core.AbstractSynchroniserFactory;
import org.gbif.harvest.core.Constants;
import org.gbif.harvest.exception.HarvesterException;
import org.gbif.harvest.exception.OperationStoppedException;
import org.gbif.harvest.log.CommonGBIFLogEvent;
import org.gbif.harvest.log.I18nLog;
import org.gbif.harvest.log.I18nLogFactory;
import org.gbif.harvest.model.BioDatasource;
import org.gbif.harvest.service.BioDatasourceManager;
import org.gbif.harvest.util.FileUtils;
import org.gbif.harvest.util.GbifLogger;
import org.gbif.harvest.util.JSONUtils;
import org.gbif.harvest.util.RequestUtils;
import org.gbif.harvest.util.TemplateUtils;
import org.gbif.util.BioDatasourceUtils;

import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Level;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.dom4j.tree.DefaultDocument;
import org.dom4j.xpath.DefaultXPath;

/**
 * This is a special handler for DiGIR where it will issue a metadata request
 * against a DiGIR endpoint, and create a new BioDatasource for each resource
 * behind it having a recognized schemaLocation.
 * In the event that a BioDatasource already exists, its metadata is updated.
 *
 * @author timrobertson
 * @author kbraak
 */
public class DigirMetadataHandler extends AbstractHarvester {

    // commons logging
    private I18nLog log = I18nLogFactory.getLog(this.getClass());

    private static final String BASE_LOCATION = "org/gbif/harvest/digir";
    private static final String MAPPING_DIRECTORY_NAME = "mapping";
    private static final String TEMPLATE_DIRECTORY_NAME = "template";

    // the DiGIR template for the metadata request
    private static final String REQUEST_TEMPLATE_FILENAME = "metadata";

    private static final String METADATA_MAPPING_FILE_NAME = "metadataMapping";

    // the mapping file to determine which index mapping file to use
    private static final String SCHEMA_LOCATION_MAPPING_FILENAME = "schemaLocationMapping";

    // the mapping file to determine which protocol to use
    private static final String PROTOCOL_MAPPING_FILENAME = "protocolMapping";

    private static final String RESOURCES_WITH_COUNT_FILENAME = "resources_with_count";

    private static final String DEFAULT_MAPPING_FILE = "indexMapping_dwc_1_0";
    private static final String DEFAULT_PROTOCOL = "digir_1_0";

    // Maps with xpaths to those elements we're interested in processing/extracting
    private static Map<String, String> metadataElementsOfInterest = new HashMap<String, String>();

    // Maps with xpaths to those contact related elements we're interested in processing/extracting
    private static Map<String, String> metadataResourceContactElementsOfInterest = new HashMap<String, String>();
    // NOTE: currently provider contacts not being gathered - taken from UDDI instead
    private static Map<String, String> metadataProviderContactElementsOfInterest = new HashMap<String, String>();

    private static Pattern tabPattern = Pattern.compile("\t");

    // key names: these names must be used in all metadata mappings files
    private static String resourceNameKeyName = "code";
    private static String schemaLocationKeyName = "schemaLocation";
    private static String minQueryTermLengthKeyName = "minQueryTermLength";
    private static String maxInventoryResponseKeyName = "maxInventoryResponse";
    private static String maxSearchResponseKeyName = "maxSearchResponse";
    private static String recordCountKeyName = "recordCount";
    private static String conceptualSchemaKeyName = "conceptualSchema";

    // File writers
    private static BufferedWriter resourceContactsBW = null;
    private static BufferedWriter resourcesBW = null;

    // The name of the keys used in the repeating elements / xpath map.
    // These MUST be the same as the keys in the metadataMapping properties file
    // that identify the repeating elements' XPath expressions
    private static final String resourceEntityRepeatingElementName = "reXPath";
    private static final String contactEntityRepeatingElementName = "ceXPath";

    private Map<String, DefaultXPath> metadataRepeatingElementsXpath;

    // for use in setting xpaths
    private Map<String, String> namespaceMap;

    private Set<String> conceptualSchemaWhitelist;

    private int lineNumber;

    private TemplateUtils templateUtils;
    private RequestUtils requestUtils;
    private FileUtils fileUtils;
    private GbifLogger gbifLogger;
    private BioDatasourceManager bioDatasourceManager;
    private List<AbstractSynchroniserFactory> synchroniserFactories = new LinkedList<AbstractSynchroniserFactory>();

    public DigirMetadataHandler(TemplateUtils templateUtils, RequestUtils requestUtils, FileUtils fileUtils,
            GbifLogger gbifLogger, BioDatasourceManager bioDatasourceManager,
            List<AbstractSynchroniserFactory> synchroniserFactories) {
        this.templateUtils = templateUtils;
        this.requestUtils = requestUtils;
        this.fileUtils = fileUtils;
        this.gbifLogger = gbifLogger;
        this.bioDatasourceManager = bioDatasourceManager;
        this.synchroniserFactories = synchroniserFactories;
        init();
    }

    /**
     * Construct a new BioDatasource, or update a pre-existing one.
     *
     * @param name                of BioDatasource
     * @param url                 access point URL
     * @param resourceName        code
     * @param resourceCount       count
     * @param uddiKey             registry service UUID
     * @param params              map of BioDatasource params
     * @param contentNamespace    contentNamespace
     * @param mappingFile         name
     * @param protocol            name
     * @param parentDirectoryName parent directory name
     *
     * @throws HarvesterException thrown if method fails
     */
    private void createOrUpdateBioDatasource(String name, String url, String resourceName, String resourceCount,
            String uddiKey, Map<String, Object> params, String contentNamespace, String mappingFile,
            String protocol, String parentDirectoryName) throws HarvesterException {

        // Whether we're creating/updating, we always need to update params:
        params.put("url", url);
        params.put("resource_name", resourceName);
        params.put("contentNamespace", contentNamespace);
        params.put("mappingFile", mappingFile);
        params.put("protocol", protocol);
        params.put("harvesterFactory", Constants.DIGIR_HARVESTER_FACTORY);

        // construct the new, validated directory name
        String newValidDirectoryName = BioDatasourceUtils.constructBioDatasourceOperatorDirectoryName(resourceName,
                parentDirectoryName);
        params.put("directory", newValidDirectoryName);

        // get country name
        String country = null;
        if (params.containsKey("country")) {
            country = (String) params.get("country");
            // "country":null is converted to "country":"\"null\""
            if (StringUtils.equalsIgnoreCase(country, "\"null\"")) {
                country = null;
            }
        }

        // get provider name
        String dataProviderName = null;
        if (params.containsKey("providerName")) {
            dataProviderName = params.get("providerName").toString();
        }

        // add synchroniserFactories list to params
        synchroniserFactories = getSynchroniserFactories();
        List<String> factories = new LinkedList<String>();
        Iterator<AbstractSynchroniserFactory> iter = synchroniserFactories.iterator();
        while (iter.hasNext()) {
            Class cls = (iter.next().getClass());
            String clsName = cls.getName();
            factories.add(clsName);
        }
        params.put("synchroniserFactories", factories);

        // construct BioDatasource's name
        String newName = BioDatasourceUtils.constructBioDatasourceName(name, resourceName);

        Long id = bioDatasourceManager.checkIfBioDatasourceExists(newName, uddiKey);
        try {
            // if this is a new BioDatasource
            if (id.compareTo(-1L) == 0) {

                // default count to 0
                int count = 0;
                try {
                    count = Integer.valueOf(resourceCount);
                } catch (NumberFormatException e) {
                    count = 0;
                    log.info("defaultCount", String.valueOf(count));
                }

                // update params
                Map<String, Object> newParams = new HashMap<String, Object>();
                newParams.putAll(params);
                newParams.put("name", newName);
                newParams.put("uddiKey", uddiKey);
                newParams.put("targetCount", String.valueOf(count));
                String parametersAsJson = JSONUtils.jsonFromMap(newParams);

                // create new BioDatasource
                BioDatasource datasource = new BioDatasource(newName, dataProviderName,
                        Constants.DIGIR_HARVESTER_FACTORY, parametersAsJson, count, uddiKey, country, url);

                bioDatasourceManager.save(datasource);
                log.info("createBioDatasource", newName);
                log.info("setCount", resourceCount);
            } else {
                BioDatasource bioDatasource = bioDatasourceManager.get(id);

                // update params
                Map<String, Object> oldParams = JSONUtils.mapFromJSON(bioDatasource.getParametersAsJSON());
                oldParams.putAll(params);

                // update its target count
                oldParams.put("targetCount", resourceCount);
                bioDatasource.setParametersAsJSON(JSONUtils.jsonFromMap(oldParams));
                bioDatasource.setTargetCount(Integer.parseInt(resourceCount));

                // in case the url has changed
                bioDatasource.setUrl(url);

                // in case the country has changed
                bioDatasource.setCountry(country);

                // in case the provider name has changed
                bioDatasource.setProviderName(BioDatasourceUtils.prepareStringForUI(dataProviderName));

                bioDatasourceManager.save(bioDatasource);
                log.info("createBioDatasource.exists", bioDatasource.getName());
                log.info("updateCount", resourceCount);
            }
        } catch (Exception e) {
            log.error("error.createBioDatasource", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }
    }

    public int getLineNumber() {
        return lineNumber;
    }

    /**
     * Determine the mapping file.
     * If there is a problem loading the file, or no match exists for the
     * contentNamespace, the default is used.
     *
     * @param contentNamespace contentNamespace
     * @param directory        as String
     * @param resourceName     code
     *
     * @return mappingFile name
     *
     * @throws HarvesterException thrown if method fails
     */
    private String getMappingFile(String contentNamespace, String directory, String resourceName)
            throws HarvesterException {

        // Initially, set the mapping file to the default
        String mappingFile = DEFAULT_MAPPING_FILE;

        if (StringUtils.isNotBlank(contentNamespace)) {
            Properties mapping = new Properties();
            String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME,
                    SCHEMA_LOCATION_MAPPING_FILENAME);
            InputStream is = null;
            try {
                is = DigirMetadataHandler.class.getResourceAsStream(mappingFilePath);
                mapping.load(is);
                boolean found = false;
                for (Object key : mapping.keySet()) {
                    if (StringUtils.equals(contentNamespace, (String) key)) {
                        mappingFile = mapping.getProperty((String) key);
                        found = true;
                    }
                }
                // if not found, alert operator
                if (!found) {
                    log.error("digirmetadatahandler.default.conceptualMappingNotFound",
                            new String[] { resourceName, contentNamespace });
                    // and write GBIF Log Message
                    gbifLogger.openAndWriteToGbifLogMessageFile(directory,
                            CommonGBIFLogEvent.COMMON_MESSAGES_UNKNOWN_SCHEMA_LOCATION.getName(),
                            CommonGBIFLogEvent.COMMON_MESSAGES_UNKNOWN_SCHEMA_LOCATION.getValue(), Level.ERROR_INT,
                            "For resource=" + resourceName + ": the schemaLocation " + contentNamespace
                                    + " was not found in the DiGIR conceptualMapping.properties file. If this is a valid schemaLocation, please update this file and try again. Defaulting to DwC 1.0",
                            1, false);
                }
            } catch (NullPointerException e) {
                log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e);
                throw new HarvesterException(e.getMessage(), e);
            } catch (IOException e) {
                log.error("digirmetadatahandler.error.getMappingFile", e.getMessage(), e);
                log.error("digirmetadatahandler.default.getMappingFile", mappingFile);
            } finally {
                if (is != null) {
                    try {
                        is.close();
                    } catch (IOException e) {
                        log.error("An error occurred closing input stream on " + mappingFilePath + ": "
                                + e.getMessage(), e);
                    }
                }
            }
        } else {
            log.error(
                    "No schemaLocation attribute was specified in element conceptualSchema: defaulting to DwC 1.0");
        }

        return mappingFile;
    }

    /**
     * Determine the protocol.
     * If there is a problem loading the file, or no match exists for the
     * contentNamespace, the default is used.
     *
     * @param contentNamespace contentNamespace
     *
     * @return protocol name
     *
     * @throws HarvesterException thrown if method fails
     */
    private String getProtocol(String contentNamespace) throws HarvesterException {

        // Initially, set the protocol to the default
        String protocol = DEFAULT_PROTOCOL;

        Properties mapping = new Properties();
        String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME,
                PROTOCOL_MAPPING_FILENAME);
        InputStream is = null;
        try {
            is = DigirMetadataHandler.class.getResourceAsStream(mappingFilePath);
            mapping.load(is);
            boolean found = false;
            for (Object key : mapping.keySet()) {
                if (StringUtils.equals(contentNamespace, (String) key)) {
                    protocol = mapping.getProperty((String) key);
                    found = true;
                }
            }
            // if not found, alert operator
            if (!found) {
                log.error("digirmetadatahandler.default.protocolMappingNotFound", contentNamespace);
            }
        } catch (NullPointerException e) {
            log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("digirmetadatahandler.error.getProtocol", e.getMessage(), e);
            log.debug("digirmetadatahandler.default.getProtocol", protocol);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    log.error(
                            "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(),
                            e);
                }
            }
        }
        return protocol;
    }

    /**
     * @return the synchroniserFactories
     */
    public List<AbstractSynchroniserFactory> getSynchroniserFactories() {
        return synchroniserFactories;
    }

    private void init() {
        // with default (tapir 1.0) values as place holders
        metadataRepeatingElementsXpath = new HashMap<String, DefaultXPath>();
        metadataRepeatingElementsXpath.put(resourceEntityRepeatingElementName,
                new DefaultXPath("//digir_1_0:resource"));
        metadataRepeatingElementsXpath.put(contactEntityRepeatingElementName,
                new DefaultXPath("//digir_1_0:resource/digir_1_0:contact"));

        // when more versions of DiGIR become available, these can no longer be hard-coded.
        namespaceMap = new HashMap<String, String>();
        namespaceMap.put("digir_1_0", "http://digir.net/schema/protocol/2003/1.0");

        // load conceptualSchema whiteList list
        conceptualSchemaWhitelist = new HashSet<String>();
        conceptualSchemaWhitelist.add("http://digir.net/schema/conceptual/darwin/2003/1.0");
        conceptualSchemaWhitelist.add("http://www.iobis.org/obis");
        conceptualSchemaWhitelist.add("OBIS Schema Version 1.0");
    }

    /**
     * The entry point required for the user interface integration.
     *
     * @param params map of the datasource to whom the operation belongs
     *
     * @throws HarvesterException thrown if method fails
     */
    public void issueMetadata(Map<String, String> params) throws HarvesterException {
        Map<String, Object> paramsCopy = new HashMap<String, Object>();
        paramsCopy.putAll(params);
        issueMetadata(params.get("name"), params.get("url"), params.get("uddiKey"),
                Constants.BASE_DIR.concat(File.separator).concat(params.get("directory")), paramsCopy);
    }

    /**
     * Issues a metadata request to a DiGIR provider. It then collects metadata
     * about the different resources located behind that provider's access
     * point.
     * The collected metadata for each resource, like the name (code), count,
     * etc. is written to a file.
     * Iterating over this file, a new BioDatasource is created for each
     * resource, with its name, count, and other attributes all set accordingly.
     * Note that the name of each new BioDatasource is the concatenation of the
     * Provider and resource code.
     *
     * @param name      of the datasource
     * @param url       of the datasource
     * @param uddiKey   of the datasource
     * @param directory to save files to
     * @param params    map of the datasource
     *
     * @throws HarvesterException thrown if method fails
     */
    public void issueMetadata(String name, String url, String uddiKey, String directory, Map<String, Object> params)
            throws HarvesterException {
        log.info("start.issueMetadata");

        // Determine the protocol
        // For now use default protocol as this is the only one needed at metadata level
        String protocol = DEFAULT_PROTOCOL;

        // populate element of interest maps from the mapping file's properties
        populateElementOfInterestsMapsFromMappingFile(METADATA_MAPPING_FILE_NAME, protocol);

        // send metadata request and get response as ByteArrayInputStream
        ByteArrayInputStream metadataResponse = metadataRequest(url, directory, protocol);

        // collect resources metadata, including contact metadata, into
        // separate output files
        processAllMetadata(metadataResponse, directory);

        // recover our metadata files
        File resourcesFile = new File(directory,
                DigirMetadataHandler.RESOURCES_WITH_COUNT_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION));
        File contactsFile = new File(directory,
                Constants.CONTACT_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION));

        // Iterate over resource metadata file, and resource contact metadata file
        // For each resource's contacts, write a new contact metadata file
        // For each resource create a new BioDatasource
        // NOTE: it is in the BioDatasource's directory that the contact metadata
        // file is saved
        RandomAccessFile contactsRaf = null;
        RandomAccessFile resourcesRaf = null;
        try {
            contactsRaf = new RandomAccessFile(contactsFile, "r");
            contactsRaf.seek(0L);
            String contactLine = fileUtils.readUTFLine(contactsRaf);

            // put the header column properties into a list (minus line number)
            List<String> contactPropertiesList = retrieveStringListFromLine(contactLine);
            contactLine = fileUtils.readUTFLine(contactsRaf);
            int contactLineNumber = -1;
            if (StringUtils.isNotBlank(contactLine)) {
                contactLineNumber = Integer.valueOf(fileUtils.getDelimitedPart(contactLine, "\t", 0));
            }

            // Open a file cursor to the resources and contacts files
            resourcesRaf = new RandomAccessFile(resourcesFile, "r");
            resourcesRaf.seek(0L);
            String resourceLine = fileUtils.readUTFLine(resourcesRaf);
            int resourceLineNumber = 1;

            // put the header column properties into an array
            String[] resourceProperties = tabPattern.split(resourceLine);
            // remove all line breaking characters
            resourceProperties = fileUtils.removeLineBreakingCharacters(resourceProperties);

            while ((resourceLine = fileUtils.readUTFLine(resourcesRaf)) != null) {
                // set the position of the cursor
                resourceLineNumber = Integer.valueOf(fileUtils.getDelimitedPart(resourceLine, "\t", 0));
                Map<String, Object> newParams = new HashMap<String, Object>();
                newParams.putAll(params);

                String schemaLocation = null;
                String conceptualSchema = "";
                String recordCount = "0";
                for (int columnIndex = 1; columnIndex < resourceProperties.length; columnIndex++) {
                    String property = resourceProperties[columnIndex];
                    String value = fileUtils.getDelimitedPart(resourceLine, "\t", columnIndex);
                    System.out.println("the value for property: " + property + " is: " + value);

                    // ignore the value if it's null
                    if (StringUtils.isNotBlank(value) || !StringUtils.equalsIgnoreCase(value, "null")) {
                        newParams.put(property, value);
                    } else if (StringUtils.isBlank(value) && property.equals(minQueryTermLengthKeyName)) {
                        newParams.put(property, "0");
                    } else if (StringUtils.isBlank(value) && property.equals(maxInventoryResponseKeyName)) {
                        newParams.put(property, "0");
                    } else if (StringUtils.isBlank(value) && property.equals(maxSearchResponseKeyName)) {
                        newParams.put(property, "0");
                    }
                }

                // Determine the schema location
                if (newParams.containsKey(schemaLocationKeyName)) {
                    schemaLocation = (String) newParams.get(schemaLocationKeyName);
                }

                // Get the resourceName
                String resourceName = null;
                if (newParams.containsKey(resourceNameKeyName)) {
                    resourceName = (String) newParams.get(resourceNameKeyName);
                }

                String mappingFile = getMappingFile(schemaLocation, directory, resourceName);

                // Determine the protocol
                protocol = getProtocol(schemaLocation);

                // Determine the count
                if (newParams.containsKey(recordCountKeyName)) {
                    recordCount = (String) newParams.get(recordCountKeyName);
                }

                // ensure resource relates to a recognized conceptualSchema
                boolean recognizedConceptualSchema = false;
                if (newParams.containsKey(conceptualSchemaKeyName)) {
                    conceptualSchema = (String) newParams.get(conceptualSchemaKeyName);
                    if (StringUtils.isNotBlank(conceptualSchema)) {
                        recognizedConceptualSchema = conceptualSchemaWhitelist.contains(conceptualSchema);
                    }
                }

                // only proceed with biodatasource creation if we have a resourceName
                if (StringUtils.isNotBlank(StringUtils.trimToNull(resourceName)) && recognizedConceptualSchema) {

                    // Get location where we'll save the contact file
                    String validatedResourceName = fileUtils.validateDirectoryName(resourceName);

                    // create new directory if necessary
                    File resourceDirectory = new File(directory, validatedResourceName);
                    if (!resourceDirectory.exists()) {
                        log.debug("Creating new directory: " + resourceDirectory.getAbsolutePath());
                        resourceDirectory.mkdirs(); // including parents
                    }

                    // delete pre-existing contact file
                    fileUtils.prepareDirectory(resourceDirectory.getAbsolutePath(), Constants.CONTACT_FILENAME);

                    // create new contact file
                    File newContactsFile = new File(resourceDirectory + "/" + Constants.CONTACT_FILENAME
                            + Constants.TEXT_FILENAME_EXTENSION);
                    newContactsFile.createNewFile();

                    // create bufferedWriter on contact file
                    BufferedWriter newContactsBW = new BufferedWriter(
                            new OutputStreamWriter(new FileOutputStream(newContactsFile, true), "UTF8"));

                    // write header line
                    fileUtils.writeValuesToFile(newContactsBW, contactPropertiesList);

                    // Move over contacts as far as position of current cursor
                    while (contactLineNumber <= resourceLineNumber && StringUtils.isNotBlank(contactLine)) {

                        // retrieve all values, minus line number
                        List<String> contactValues = retrieveStringListFromLine(contactLine);

                        // write values to file
                        fileUtils.writeValuesToFile(newContactsBW, contactValues);

                        // move to next line
                        contactLine = fileUtils.readUTFLine(contactsRaf);
                        if (StringUtils.isNotBlank(contactLine)) {
                            int lineNumber = Integer.valueOf(fileUtils.getDelimitedPart(contactLine, "\t", 0));
                            if (lineNumber > contactLineNumber) {
                                contactLineNumber = lineNumber;
                            }
                        }
                    }

                    // close BW
                    newContactsBW.close();

                    // log having written the files so that they appear in the console
                    log.info("Writing to file: " + newContactsFile.getAbsolutePath());

                    // Construct the new BioDatasource
                    createOrUpdateBioDatasource(name, url, resourceName, recordCount, uddiKey, newParams,
                            schemaLocation, mappingFile, protocol, (String) params.get("directory"));
                } else if (StringUtils.isBlank(StringUtils.trimToNull(resourceName))) {
                    log.error("error.issueMetadata.noName");
                } else if (!recognizedConceptualSchema) {
                    log.error("conceptualSchema (" + conceptualSchema + ") was not recognized for resource="
                            + resourceName + " therefore NO BioDatasource will be created");
                }
            }
        } catch (IOException e) {
            log.error("An IOException occurred during issueMetadata(): " + e.getMessage(), e);
        } finally {
            try {
                if (contactsRaf != null) {
                    contactsRaf.close();
                }
            } catch (IOException e) {
                log.error("digirmetadatahandler.error.issueMetadata.closingCursors");
            }
            try {
                if (resourcesRaf != null) {
                    resourcesRaf.close();
                }
            } catch (IOException e) {
                log.error("digirmetadatahandler.error.issueMetadata.closingCursors");
            }
        }
        log.info("end.issueMetadata");
    }

    /**
     * Executes a metadata request that retrieves information about the
     * resources behind a given access point and saves it in the output
     * directory.
     *
     * @param destination     of the DiGIR access point to request against
     * @param outputDirectory to which the response will be saved
     * @param protocol        name
     *
     * @return metadata response as ByteArrayInputStream
     *
     * @throws HarvesterException thrown if method fails
     */
    public ByteArrayInputStream metadataRequest(String destination, String outputDirectory, String protocol)
            throws HarvesterException {
        log.info("start.metadataRequest");

        // build the parameters required for the template into a map
        Map<String, String> templateParams = new HashMap<String, String>();
        templateParams.put("destination", destination);

        // Prepare directory
        File directory = new File(outputDirectory);
        log.debug("start.metadataRequest.prepareDirectory");
        if (directory.isDirectory()) {
            try {
                // remove all metadata requests and responses
                fileUtils.prepareDirectory(outputDirectory, Constants.METADATA_PREFIX);
                fileUtils.prepareDirectory(outputDirectory, DigirMetadataHandler.RESOURCES_WITH_COUNT_FILENAME);

                log.debug("end.metadataRequest.prepareDirectory");
            } catch (Exception e) {
                log.error("error.metadataRequest.prepareDirectory", e.getMessage(), e);
                throw new HarvesterException(e.getMessage(), e);
            }
        }

        // build the DiGIR metadata request
        String query;
        String request;

        String templateLocation = BASE_LOCATION.concat("/").concat(protocol).concat("/")
                .concat(TEMPLATE_DIRECTORY_NAME).concat("/").concat(REQUEST_TEMPLATE_FILENAME)
                .concat(Constants.VELOCITY_FILENAME_EXTENSION);
        try {
            query = templateUtils.getAndMerge(templateLocation, templateParams);
            request = requestUtils.buildURL(destination, "request", query);
        } catch (Exception e) {
            log.error("error.metadataRequest.buildUrl", e.getMessage());
            throw new HarvesterException(e.getMessage(), e);
        }

        // save the request
        try {
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.METADATA_REQUEST_FILENAME,
                    query.getBytes());
        } catch (IOException e) {
            log.warn("error.metadataRequest.writeRequest", e.getMessage());
        }

        // fire the request
        ByteArrayInputStream is;
        byte[] array;
        try {
            // get response as byte array
            array = requestUtils.executePersistentGetRequestAndReturnByteArray(request, outputDirectory,
                    destination);

            // save the response and return the newly created file
            fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.METADATA_RESPONSE_FILENAME,
                    array);

            // set input stream
            is = new ByteArrayInputStream(array);
        }
        // was the operation stopped?
        catch (OperationStoppedException e) {
            throw new HarvesterException(e.getMessage(), e);
        } catch (IOException e) {
            log.error("error.metadataRequest.writeResponse", e.getMessage());
            throw new HarvesterException(e.getMessage(), e);
        }

        log.info("end.metadataRequest");
        return is;
    }

    /**
     * Parse the response file and write the parsed values to their
     * appropriate file.
     *
     * @param inputStream representing harvested xml response
     *
     * @throws DocumentException thrown if parsing error occurred
     * @throws IOException       thrown
     */
    private void parseResponseFile(ByteArrayInputStream inputStream) throws DocumentException, IOException {

        // create a DOM4J tree, reading a Document from the given File
        SAXReader reader = new SAXReader();
        reader.setEncoding("UTF-8");
        Document document = reader.read(inputStream);
        document.setXMLEncoding("UTF-8");

        // get all resource Elements
        List<Node> resourceEntities = (metadataRepeatingElementsXpath.get(resourceEntityRepeatingElementName))
                .selectNodes(document);
        // iterate over resource Elements
        for (Node resourceEntity : resourceEntities) {

            // Detatch resource Element and create new Document with it
            DefaultDocument doc1 = new DefaultDocument();
            doc1.setRootElement((Element) resourceEntity.detach());

            // get all resource contact Elements
            List<Node> resourceContacts = (metadataRepeatingElementsXpath.get(contactEntityRepeatingElementName))
                    .selectNodes(doc1);
            // iterate over contact Elements
            for (Node resourceContact : resourceContacts) {

                // Detatch relatedEntity Element and create new Document with it
                DefaultDocument doc2 = new DefaultDocument();
                doc2.setRootElement((Element) resourceContact.detach());

                // write hasContact elements-of-interest to file
                fileUtils.writeValuesToFile(resourceContactsBW, metadataResourceContactElementsOfInterest.values(),
                        doc2, namespaceMap, String.valueOf(getLineNumber()));
            }
            // write relatedEntity elements-of-interest to file
            fileUtils.writeValuesToFile(resourcesBW, metadataElementsOfInterest.values(), doc1, namespaceMap,
                    String.valueOf(getLineNumber()));

            setLineNumber(getLineNumber() + 1);
        }
    }

    /**
     * Iterates over the metadata mapping file, populating the various
     * elements-of-interest maps. Regular expressions divide the mapping file's
     * properties into the appropriate element-of-interest map.
     * Note: The mapping file's properties are in the following format:
     * [element-of-interest name] + [property name] = [XPath expresson]
     * The regular expression matches according to the [element-of-interest
     * name]
     * The corresponding element-of-interest map is then populated with: key =
     * [property name] & value = [XPath expression]
     *
     * @param mappingFile name
     * @param protocol    name
     *
     * @throws HarvesterException thrown if method fails
     */
    private void populateElementOfInterestsMapsFromMappingFile(String mappingFile, String protocol)
            throws HarvesterException {

        // Create regex patterns
        // we're interested in all non-contact related properties
        Pattern metadataKeyPattern = Pattern.compile("metadata([\\S]*)");
        Pattern providerContactKeyPattern = Pattern.compile("providerContact([\\S]*)");
        Pattern resourceContactKeyPattern = Pattern.compile("resourceContact([\\S]*)");

        // properties we harvest are read from file
        Properties mapping = new Properties();
        String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, protocol, MAPPING_DIRECTORY_NAME,
                mappingFile);
        InputStream is = null;
        try {
            is = DigirMetadataHandler.class.getResourceAsStream(mappingFilePath);
            mapping.load(is);

            // Divide the mapping properties into various element-of-interest maps
            for (Object key : mapping.keySet()) {
                Boolean matched = false;
                // Matchers matching keys belonging to repeating element groups
                Matcher metadataKeyMatcher = metadataKeyPattern.matcher((String) key);

                if (metadataKeyMatcher.matches()) {
                    String property = metadataKeyMatcher.group(1);
                    metadataElementsOfInterest.put(property, mapping.getProperty((String) key));
                    matched = true;
                }
                if (!matched) {
                    Matcher providerContactKeyMatcher = providerContactKeyPattern.matcher((String) key);
                    if (providerContactKeyMatcher.matches()) {
                        String property = providerContactKeyMatcher.group(1);
                        metadataProviderContactElementsOfInterest.put(property, mapping.getProperty((String) key));
                        matched = true;

                    }
                    if (!matched) {
                        Matcher resourceContactKeyMatcher = resourceContactKeyPattern.matcher((String) key);
                        if (resourceContactKeyMatcher.matches()) {
                            String property = resourceContactKeyMatcher.group(1);
                            metadataResourceContactElementsOfInterest.put(property,
                                    mapping.getProperty((String) key));
                            matched = true;

                        }
                        if (!matched) {
                            // Determines the XPath expressions used to isolate repeating elements in a
                            // metadata xml response.
                            if (metadataRepeatingElementsXpath.keySet().contains(key)) {
                                // construct an XPath expression for repeating Element
                                DefaultXPath xpath = new DefaultXPath(mapping.getProperty((String) key));
                                xpath.setNamespaceURIs(namespaceMap);
                                metadataRepeatingElementsXpath.put((String) key, xpath);
                            }
                        }
                    }
                }
            }
        } catch (NullPointerException e) {
            log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } catch (Exception e) {
            log.error("error.populateElementOfInterestsMapsFromMappingFile",
                    new String[] { mappingFile, e.getMessage() }, e);
            throw new HarvesterException(e.getMessage(), e);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    log.error(
                            "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(),
                            e);
                }
            }
        }
    }

    /**
     * Collect resource metadata, including resource contact metadata.
     * Resources metadata is outputed to file.
     * Resource contact metadata is also outputted to file
     *
     * @param metadataResponse as ByteArrayInputStream
     * @param outputDirectory  directory to write to
     *
     * @throws HarvesterException thrown if method fails
     */
    private void processAllMetadata(ByteArrayInputStream metadataResponse, String outputDirectory)
            throws HarvesterException {
        log.info("start.processAllMetadata");
        // create the output directory
        File directory = new File(outputDirectory);

        // Prepare directory
        log.debug("digirmetadatahandler.start.processAllMetadata.prepareDirectory");
        if (directory.isDirectory()) {
            try {
                // remove all pre-existing contact tab files
                fileUtils.prepareDirectory(outputDirectory, DigirMetadataHandler.RESOURCES_WITH_COUNT_FILENAME);
                fileUtils.prepareDirectory(outputDirectory, Constants.CONTACT_FILENAME);
                log.debug("digirmetadatahandler.end.processAllMetadata.prepareDirectory");
            } catch (Exception e) {
                log.error("digirmetadatahandler.error.processAllMetadata.prepareDirectory", e.getMessage(), e);
                throw new HarvesterException(e.getMessage(), e);
            }
        }

        // create the output files
        File resourcesFile = new File(directory,
                DigirMetadataHandler.RESOURCES_WITH_COUNT_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION));
        File resourceContactsFile = new File(directory,
                Constants.CONTACT_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION));

        // ensure that they exist anew
        try {
            resourcesFile.createNewFile();
            resourceContactsFile.createNewFile();
        } catch (IOException e) {
            log.error("digirmetadatahandler.error.processAllMetadata.createFiles", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // create file writers for each file
        try {
            resourcesBW = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(resourcesFile, true), "UTF8"));
            resourceContactsBW = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(resourceContactsFile, true), "UTF8"));
        } catch (IOException e) {
            log.error("error.createBWs", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // write header column line for each file
        try {
            // The header line is derived from the names of the properties
            fileUtils.writeHeaderLine(resourcesBW, metadataElementsOfInterest.keySet(), true);

            // an identification number column name is also written
            fileUtils.writeHeaderLine(resourceContactsBW, metadataResourceContactElementsOfInterest.keySet(), true);
        } catch (IOException e) {
            log.error("error.writeHeaders", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // parse metadata
        setLineNumber(1);
        try {
            parseResponseFile(metadataResponse);
        } catch (Exception e) {
            log.error("error.metadataRequest.parsing", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }

        // close the buffer writers and inputStream, and log having written the files so that
        // they appear in the console
        try {
            resourcesBW.close();
            resourceContactsBW.close();
            log.info("Writing to file: " + resourcesFile.getAbsolutePath());
            log.info("Writing to file: " + resourceContactsFile.getAbsolutePath());

            // close inputStream
            metadataResponse.close();
        } catch (IOException e) {
            log.error("error.closeBWs", e.getMessage(), e);
            throw new HarvesterException(e.getMessage(), e);
        }
        log.info("end.processAllMetadata");
    }

    /**
     * Populates a list from the tab delimited Stings taken
     * from a String.
     *
     * @param line tab delimited String
     *
     * @return list of tab delimited Strings taken from input String
     */
    private List<String> retrieveStringListFromLine(String line) {
        // put the header column properties into an array
        String[] array = tabPattern.split(line);
        // remove line breaking characters
        array = fileUtils.removeLineBreakingCharacters(array);

        // add properties to a list, excluding line number (index 0)
        List<String> list = new LinkedList<String>();
        for (int i = 1; i < array.length; i++) {
            list.add(array[i]);
        }
        return list;
    }

    public void setBioDatasourceManager(BioDatasourceManager bioDatasourceManager) {
        this.bioDatasourceManager = bioDatasourceManager;
    }

    public void setLineNumber(int lineNumber) {
        this.lineNumber = lineNumber;
    }

    /**
     * @param synchroniserFactories the synchroniserFactories to set
     */
    public void setSynchroniserFactories(List<AbstractSynchroniserFactory> synchroniserFactories) {
        this.synchroniserFactories = synchroniserFactories;
    }
}