Java tutorial
/******************************************************************************* * Copyright (C) 2008 Global Biodiversity Information Facility Secretariat. * All Rights Reserved. * * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. ******************************************************************************/ package org.gbif.harvest.tapir; import org.gbif.harvest.AbstractHarvester; import org.gbif.harvest.core.AbstractSynchroniserFactory; import org.gbif.harvest.core.Constants; import org.gbif.harvest.exception.HarvesterException; import org.gbif.harvest.exception.OperationStoppedException; import org.gbif.harvest.log.CommonGBIFLogEvent; import org.gbif.harvest.log.I18nLog; import org.gbif.harvest.log.I18nLogFactory; import org.gbif.harvest.model.BioDatasource; import org.gbif.harvest.service.BioDatasourceManager; import org.gbif.harvest.util.FileUtils; import org.gbif.harvest.util.GbifLogger; import org.gbif.harvest.util.JSONUtils; import org.gbif.harvest.util.RequestUtils; import org.gbif.harvest.util.TemplateUtils; import org.gbif.harvest.xml.DigesterUtils; import org.gbif.util.BioDatasourceUtils; import java.io.BufferedWriter; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.digester.Digester; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Level; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.dom4j.tree.DefaultDocument; import org.dom4j.xpath.DefaultXPath; import org.xml.sax.SAXException; /** * This is a special handler for Tapir where it will create a new BioDatasource * for each resource at a Tapir endpoint, and update its target count. Typically, * there will only ever be a single resource per endpoint. But, in the case that * the records are represented with ABCD, and there are multiple datasets and the concept * dataset title is searchable, each individual dataset becomes a new BioDatasource. * In the event that a BioDatasource already exists, its metadata is updated. * * @author timrobertson * @author kbraak */ public class TapirMetadataHandler extends AbstractHarvester { // commons logging protected I18nLog log = I18nLogFactory.getLog(this.getClass()); protected static final String BASE_LOCATION = "org/gbif/harvest/tapir"; protected static final String MAPPING_DIRECTORY_NAME = "mapping"; protected static final String TEMPLATE_DIRECTORY_NAME = "template"; // the template for the capabilities request protected static final String CAPABILITIES_TEMPLATE_FILENAME = "capabilities"; // the template for the capabilities request protected static final String SEARCH_TEMPLATE_FILENAME = "search"; // the TAPIR template for the metadata request protected static final String METADATA_TEMPLATE_FILENAME = "metadata"; protected static final String METADATA_MAPPING_FILE_NAME = "metadataMapping"; // the mapping file to determine which mapping file to use protected static final String CONCEPTUAL_MAPPING_FILENAME = "conceptualMapping"; // the mapping file to determine which outputModel to use protected static final String OUTPUT_MODEL_MAPPING_FILENAME = "outputModelMapping"; // element for determining the namespace protected static final String namespaceResponseXPathElement = "*/schema"; protected static final String supportedNamespaceAttributeName = "namespace"; // element for determining the count protected static final String COUNT_RESPONSE_XPATH_ELEMENT = "*/summary@totalMatched"; // default constants corresponding to Tapir 1.0, DwC 1.4 protected static final String DEFAULT_CONTENT_NAMESPACE = "http://rs.tdwg.org/dwc/dwcore/"; protected static final String DEFAULT_OUTPUT_MODEL = "http://rs.tdwg.org/tapir/cs/dwc/1.4/model/dw_core_geo_cur.xml"; protected static final String DEFAULT_MAPPING_FILE = "indexMapping_dwc_1_4"; protected static final String DEFAULT_CONCEPTUAL_SCHEMA = "tapir_1_0"; protected static final String languageAttributeName = "xml:lang"; // complete list of Tapir's supported namespaces protected static List<String> supported_namespaces = new LinkedList<String>(); // The name of the keys used in the repeating elements / xpath map. // These MUST be the same as the keys in the metadataMapping properties file // that identify the repeating elements' XPath expressions protected static final String RELATEDENTITY_REPEATING_ELEMENT_NAME = "reXPath"; protected static final String HASCONTACT_REPEATING_ELEMENT_NAME = "hcXPath"; // File writers protected static BufferedWriter relatedEntityBW = null; protected static BufferedWriter hasContactBW = null; // Maps of element of interest name / String XPath expression protected static Map<String, String> harvestedRelatedEntityElementsOfInterest = new HashMap<String, String>(); protected static Map<String, String> harvestedHasContactElementsOfInterest = new HashMap<String, String>(); // Map of repeating element name / XPath expression key/value pairs protected static Map<String, String> settingsElementsOfInterest = new HashMap<String, String>(); protected static Map<String, String> metadataElementsOfInterest = new HashMap<String, String>(); protected Map<String, DefaultXPath> metadataRepeatingElementsXpath = new HashMap<String, DefaultXPath>(); // key names: these names must be used in all metadata mappings files protected static String resourceNameKeyName = "dataResourceName"; protected static String resourceDisplayNameKeyName = "dataResourceDisplayName"; protected static String englishLanguageCode = "en"; // for use in setting xpaths protected Map<String, String> namespaceMap = new HashMap<String, String>(); private int lineNumber; private TemplateUtils templateUtils; private FileUtils fileUtils; private RequestUtils requestUtils; private DigesterUtils digesterUtils; private GbifLogger gbifLogger; private BioDatasourceManager bioDatasourceManager; private List<AbstractSynchroniserFactory> synchroniserFactories = new LinkedList<AbstractSynchroniserFactory>(); public TapirMetadataHandler(TemplateUtils templateUtils, RequestUtils requestUtils, FileUtils fileUtils, DigesterUtils digesterUtils, GbifLogger gbifLogger, BioDatasourceManager bioDatasourceManager, List<AbstractSynchroniserFactory> synchroniserFactories) { this.templateUtils = templateUtils; this.requestUtils = requestUtils; this.fileUtils = fileUtils; this.digesterUtils = digesterUtils; this.gbifLogger = gbifLogger; this.bioDatasourceManager = bioDatasourceManager; this.synchroniserFactories = synchroniserFactories; init(); } /** * Defaults supportsTitle to false and construct resource name (code). * * @param name of BioDatasource * @param url access point URL * @param uddiKey registry service UUID * @param params map of Biodatasource params * @param contentNamespace contentNamespace * @param mappingFile mappingFile * @param protocol name * @param settings settings * * @return Biodatasource id * * @throws HarvesterException thrown if method fails * @see org.gbif.harvest.tapir.TapirMetadataHandler#createOrUpdateBioDatasource(String, String, String, String, * java.util.Map, String, String, String, java.util.Map, String, String) */ private Long createOrUpdateBioDatasource(String name, String url, String uddiKey, Map<String, Object> params, String contentNamespace, String mappingFile, String protocol, Map<String, String> settings) throws HarvesterException { // construct the resource name (last name in url) String resourceName = parseNameFromUrl(url); Long id = -1L; if (StringUtils.isNotBlank(StringUtils.trimToNull(resourceName))) { id = createOrUpdateBioDatasource(name, url, resourceName, uddiKey, params, contentNamespace, mappingFile, protocol, settings, "false", (String) params.get("directory")); } else { log.error("error.resourceName", new String[] { url, name }); } return id; } /** * Construct a new BioDatasource, or update an existing one. * Uniqueness is based on the name and uddi key. * Return the created/updated BioDatasource's id - later used in * updating its target count * * @param name of Biodatasource * @param url access point URL * @param resourceName resource name * @param uddiKey registry service UUID * @param params map of Biodatasource params * @param contentNamespace contentNamespace * @param mappingFile name * @param protocol name * @param settings settings * @param supportsTitle boolean * @param parentDirectoryName parent directory name * * @return id of Biodatasource * * @throws HarvesterException thrown if method fails */ private Long createOrUpdateBioDatasource(String name, String url, String resourceName, String uddiKey, Map<String, Object> params, String contentNamespace, String mappingFile, String protocol, Map<String, String> settings, String supportsTitle, String parentDirectoryName) throws HarvesterException { // Whether we're creating/updating, we always need to update params: params.put("url", url); params.put("resource_name", resourceName); params.put("contentNamespace", contentNamespace); params.put("mappingFile", mappingFile); params.put("protocol", protocol); params.put("supportsTitle", supportsTitle); params.put("harvesterFactory", Constants.TAPIR_HARVESTER_FACTORY); // construct BioDatasource's name String newName = BioDatasourceUtils.constructBioDatasourceName(name, resourceName); params.put("name", newName); // construct the new, validated directory name String newValidDirectoryName = BioDatasourceUtils.constructBioDatasourceOperatorDirectoryName(resourceName, parentDirectoryName); params.put("directory", newValidDirectoryName); // get country name String country = null; if (params.containsKey("country")) { country = (String) params.get("country"); // "country":null is converted to "country":"\"null\"" if (StringUtils.equalsIgnoreCase(country, "\"null\"")) { country = null; } } // get provider name String dataProviderName = null; if (params.containsKey("providerName")) { dataProviderName = params.get("providerName").toString(); } // add the settings info to params if (settings.containsKey("minQueryTermLength")) { params.put("minQueryTermLength", settings.get("minQueryTermLength")); } else { params.put("minQueryTermLength", "0"); } if (settings.containsKey("maxResponseSize")) { params.put("maxResponseSize", settings.get("maxResponseSize")); } else { params.put("maxResponseSize", "0"); } // add synchroniserFactories list to params synchroniserFactories = getSynchroniserFactories(); List<String> factories = new LinkedList<String>(); Iterator<AbstractSynchroniserFactory> iter = synchroniserFactories.iterator(); while (iter.hasNext()) { Class cls = (iter.next().getClass()); String clsName = cls.getName(); factories.add(clsName); } params.put("synchroniserFactories", factories); // check if the retrieved entity has already been saved as a bioDatasource Long id = -1L; // get resource uuid String resourceUuid = null; if (params.containsKey("resourceUuid")) { resourceUuid = params.get("resourceUuid").toString(); } // if it belongs to a resource, check using resourceKey in case resource name has changed if (StringUtils.isNotBlank(resourceUuid)) { id = bioDatasourceManager.checkIfBioDatasourceExists(resourceUuid, uddiKey, Constants.TAPIR_HARVESTER_FACTORY); } else { id = bioDatasourceManager.checkIfBioDatasourceExists(newName, uddiKey); } int defaultCount = 0; try { // if this is a new BioDatasource if (id.compareTo(-1L) == 0) { // update params Map<String, Object> newParams = new HashMap<String, Object>(); newParams.putAll(params); // newParams.put("directory", newValidDirectoryName); newParams.put("uddiKey", uddiKey); newParams.put("targetCount", defaultCount); String parametersAsJson = JSONUtils.jsonFromMap(newParams); // create new BioDatasource BioDatasource datasource = new BioDatasource(newName, dataProviderName, Constants.TAPIR_HARVESTER_FACTORY, parametersAsJson, uddiKey, country, url); BioDatasource bioDatasource = bioDatasourceManager.save(datasource); log.info("createBioDatasource", newName); return bioDatasource.getId(); } else { BioDatasource bioDatasource = bioDatasourceManager.get(id); // update params Map<String, Object> oldParams = JSONUtils.mapFromJSON(bioDatasource.getParametersAsJSON()); oldParams.putAll(params); bioDatasource.setParametersAsJSON(JSONUtils.jsonFromMap(oldParams)); // in case the name got changed bioDatasource.setName(BioDatasourceUtils.prepareStringForUI(newName)); // in case the url has changed bioDatasource.setUrl(BioDatasourceUtils.prepareStringForUI(url)); // in case the country has changed bioDatasource.setCountry(BioDatasourceUtils.prepareStringForUI(country)); // in case the provider name has changed bioDatasource.setProviderName(BioDatasourceUtils.prepareStringForUI(dataProviderName)); bioDatasourceManager.save(bioDatasource); log.info("createBioDatasource.exists", bioDatasource.getName()); } } catch (Exception e) { log.error("error.createBioDatasource", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } return id; } /** * @return the bioDatasourceManager */ public BioDatasourceManager getBioDatasourceManager() { return bioDatasourceManager; } /** * Executes a capabilities request, saves the response as a file, * and returns that file. * * @param destination access point URL * @param outputDirectory directory to write to * @param protocol name * * @return capabilities resonse as ByteArrayInputStream * * @throws HarvesterException thrown if method fails */ public ByteArrayInputStream getCapabilities(String destination, String outputDirectory, String protocol) throws HarvesterException { log.info("tapirmetadatahandler.start.getCapabilities"); // build the parameters required for the template into a map Map<String, String> templateParams = new HashMap<String, String>(); templateParams.put("destination", destination); // Prepare directory File directory = new File(outputDirectory); log.debug("tapirmetadatahandler.start.getCapabilities.prepareDirectory"); if (directory.isDirectory()) { try { // remove all capabilities requests and responses fileUtils.prepareDirectory(outputDirectory, Constants.CAPABILITIES_PREFIX); log.debug("tapirmetadatahandler.end.getCapabilities.prepareDirectory"); } catch (Exception e) { log.error("tapirmetadatahandler.error.getCapabilities.prepareDirectory", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } } // build the TAPIR capabilities request // NOTE: here we use the default protocol, as we don't // actually know the protocol yet. Ideally though, the capabilities // request would not change through different versions of TAPIR String query; String request; String templateLocation = BASE_LOCATION.concat("/").concat(protocol).concat("/") .concat(TEMPLATE_DIRECTORY_NAME).concat("/").concat(CAPABILITIES_TEMPLATE_FILENAME) .concat(Constants.VELOCITY_FILENAME_EXTENSION); try { query = templateUtils.getAndMerge(templateLocation, templateParams); request = requestUtils.buildURL(destination, "request", query); } catch (Exception e) { log.error("tapirmetadatahandler.error.getCapabilities.buildUrl", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } // save the request try { fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.CAPABILITIES_REQUEST_FILENAME, query.getBytes()); } catch (IOException e) { log.warn("tapirmetadatahandler.error.getCapabilities.writeRequest", e.getMessage(), e); } // fire the request ByteArrayInputStream is; byte[] array; try { // get response as byte array log.debug("tapirmetadatahandler.getCapabilities.execute"); array = requestUtils.executePersistentGetRequestAndReturnByteArray(request, outputDirectory, destination); // save the response as gzipped file fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.CAPABILITIES_RESPONSE_FILENAME, array); // convert byte array into inputStream is = new ByteArrayInputStream(array); } // was the operation stopped? catch (OperationStoppedException e) { throw new HarvesterException(e.getMessage(), e); } catch (IOException e) { log.error("tapirmetadatahandler.error.getCapabilities.writeResponse", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } log.info("tapirmetadatahandler.end.getCapabilities"); return is; } /** * Parse the search response for the count information. * * @param inputStream search response as ByteArrayInputStream * * @return count * * @throws HarvesterException thrown if method fails */ public String getCount(ByteArrayInputStream inputStream) throws HarvesterException { log.info("tapirmetadatahandler.start.getCount"); // retrieve the count String count; try { count = returnCount(inputStream, COUNT_RESPONSE_XPATH_ELEMENT); } catch (IOException e) { log.error("tapirmetadatahandler.error.getCount.parsing", e.getMessage()); throw new HarvesterException(e.getMessage(), e); } catch (SAXException e) { log.error("tapirmetadatahandler.error.getCount.parsing", e.getMessage()); throw new HarvesterException(e.getMessage(), e); } log.info("tapirmetadatahandler.end.getCount"); return count; } public int getLineNumber() { return lineNumber; } /** * Determine the index mapping file. * If there is a problem loading the file, or no match exists for the * contentNamespace, the default is used. * * @param contentNamespace contentNamespace * * @return mapping file name * * @throws HarvesterException thrown if method fails */ private String getMappingFile(String contentNamespace) throws HarvesterException { // Initially, set the mapping file to the default String mappingFile = DEFAULT_MAPPING_FILE; Properties mapping = new Properties(); String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME, CONCEPTUAL_MAPPING_FILENAME); InputStream is = null; try { is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath); mapping.load(is); for (Object key : mapping.keySet()) { // match on contentnamespace determines mapping file if (StringUtils.equals(contentNamespace, (String) key)) { mappingFile = mapping.getProperty((String) key); } } } catch (NullPointerException e) { log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e); throw new HarvesterException(e.getMessage(), e); } catch (IOException e) { log.error("tapirmetadatahandler.error.getMappingFile", e.getMessage(), e); log.debug("tapirmetadatahandler.default.getMappingFile", mappingFile); } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error( "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(), e); } } } return mappingFile; } /** * Get the most prioritised content namespace. * In the event the capabilities response cannot be parsed, * the default content namespace is used * * @param inputStream capabilities response as ByteArrayInputStream * @param directory as String * * @return most prioritized conetent namespace * * @throws HarvesterException thrown if method fails */ private String getNamespace(ByteArrayInputStream inputStream, String directory) throws HarvesterException { log.info("tapirmetadatahandler.start.getNamespace"); // Initially, set the namespace to the default String newestNamespace = DEFAULT_CONTENT_NAMESPACE; // reste stream as we're reading it a second time if (inputStream != null) { inputStream.reset(); } // retrieve the list of supported namespaces try { // namespaces = returnNamespace(fis, NAMESPACE_RESPONSE_XPATH_ELEMENT); Set<String> namespaces = digesterUtils.xmlToListOfAttributeValuesForSingleElement(inputStream, TapirMetadataHandler.namespaceResponseXPathElement, TapirMetadataHandler.supportedNamespaceAttributeName); // Iterate through the ordered list of available namespaces and // determine what the newest one from amongst the set of supported // namespaces retrieved is // Set the default namespace for (String supportedNamespace : supported_namespaces) { if (namespaces.contains(supportedNamespace)) { newestNamespace = supportedNamespace; log.debug("tapirmetadatahandler.getNamespace.chooseNamespace", newestNamespace); log.info("tapirmetadatahandler.end.getNamespace"); return newestNamespace; } } // if not found, alert operator log.error("tapirmetadatahandler.default.conceptualMappingNotFound", namespaces.toString()); // and write GBIF Log Message gbifLogger.openAndWriteToGbifLogMessageFile(directory, CommonGBIFLogEvent.COMMON_MESSAGES_UNKNOWN_SCHEMA_LOCATION.getName(), CommonGBIFLogEvent.COMMON_MESSAGES_UNKNOWN_SCHEMA_LOCATION.getValue(), Level.ERROR_INT, "None of the namespace(s) " + namespaces.toString() + " was not found in the TAPIR conceptualMapping.properties file. Please update this file with valid namespace(s) and try again. Defaulting to namespace http://rs.tdwg.org/dwc/dwcore/", 1, false); } catch (IOException e) { log.error("tapirmetadatahandler.error.getNamespace.parsing", e.getMessage(), e); log.debug("tapirmetadatahandler.default.getNamespace.chooseNamespace", newestNamespace); // throw new HarvesterException(e.getMessage(), e); } catch (SAXException e) { log.error("tapirmetadatahandler.error.getNamespace.parsing", e.getMessage(), e); log.debug("tapirmetadatahandler.default.getNamespace.chooseNamespace", newestNamespace); // throw new HarvesterException(e.getMessage(), e); } // close inputStream try { if (inputStream != null) { inputStream.close(); } } catch (Exception e) { // do nothing } log.info("tapirmetadatahandler.end.getNamespace"); return newestNamespace; } /** * Determine the outputModel from the appropriate mapping file. * If there is a problem loading the file, or no match exists for the * contentNamespace, the default is used. * * @param contentNamespace contentNamespace * * @return mapping file name * * @throws HarvesterException thrown if method fails */ private String getOutputModel(String contentNamespace) throws HarvesterException { // Initially, set the outputModel to the default String outputModel = DEFAULT_OUTPUT_MODEL; Properties mapping = new Properties(); String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME, OUTPUT_MODEL_MAPPING_FILENAME); boolean found = false; InputStream is = null; try { is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath); mapping.load(is); for (Object key : mapping.keySet()) { if (StringUtils.equals(contentNamespace, (String) key)) { outputModel = mapping.getProperty((String) key); found = true; } } // if not found, alert operator if (!found) { log.error("digirmetadatahandler.default.outputModelMappingNotFound", contentNamespace); } } catch (NullPointerException e) { log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e); throw new HarvesterException(e.getMessage(), e); } catch (IOException e) { log.error("tapirmetadatahandler.error.getOutputModel", e.getMessage(), e); log.debug("tapirmetadatahandler.default.getOutputModel", outputModel); } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error( "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(), e); } } } return outputModel; } /** * Executes a search request, saves the response as a file, * and returns that file. * * @param destination access point URL * @param outputDirectory directory to which the response will be saved * @param outputModel outputModel * @param resourceName resource name * @param datasetTitlePath dataset title path * @param protocol name * * @return search response as ByteArrayInputStream * * @throws HarvesterException thrown if method fails */ public ByteArrayInputStream getSearch(String destination, String outputDirectory, String outputModel, String resourceName, String datasetTitlePath, String protocol) throws HarvesterException { log.info("tapirmetadatahandler.start.getSearch"); // build the parameters required for the template into a map Map<String, String> templateParams = new HashMap<String, String>(); templateParams.put("outputModel", outputModel); // if the dataset title name is not null, and the dataset title path is // not null, add a filter by title name (using the title path) if (StringUtils.isNotBlank(resourceName) && StringUtils.isNotBlank(datasetTitlePath)) { templateParams.put("datasetTitle", resourceName); templateParams.put("datasetTitlePath", datasetTitlePath); } // Prepare directory File directory = new File(outputDirectory); log.debug("tapirmetadatahandler.start.getSearch.prepareDirectory"); if (directory.isDirectory()) { try { // remove all metadata requests and responses fileUtils.prepareDirectory(outputDirectory, Constants.OTHER_METADATA_PREFIX); log.debug("tapirmetadatahandler.end.getSearch.prepareDirectory"); } catch (Exception e) { log.error("tapirmetadatahandler.error.getSearch.prepareDirectory", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } } // build the TAPIR search request String query; String request; String templateLocation = BASE_LOCATION.concat("/").concat(protocol).concat("/") .concat(TEMPLATE_DIRECTORY_NAME).concat("/").concat(SEARCH_TEMPLATE_FILENAME) .concat(Constants.VELOCITY_FILENAME_EXTENSION); try { query = templateUtils.getAndMerge(templateLocation, templateParams); request = requestUtils.buildURL(destination, "request", query); } catch (Exception e) { log.error("tapirmetadatahandler.error.getSearch.buildUrl", e.getMessage()); throw new HarvesterException(e.getMessage(), e); } // save the request try { fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.OTHER_METADATA_REQUEST_FILENAME, query.getBytes()); } catch (IOException e) { log.warn("tapirmetadatahandler.error.getSearch.writeRequest", e.getMessage()); } // fire the request ByteArrayInputStream is; byte[] array; try { log.debug("tapirmetadatahandler.getSearch.execute"); array = requestUtils.executePersistentGetRequestAndReturnByteArray(request, outputDirectory, destination); // save the response and return the newly created file fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.OTHER_METADATA_RESPONSE_FILENAME, array); is = new ByteArrayInputStream(array); } // was the operation stopped? catch (OperationStoppedException e) { throw new HarvesterException(e.getMessage(), e); } catch (IOException e) { log.error("tapirmetadatahandler.error.getSearch.writeResponse", e.getMessage()); throw new HarvesterException(e); } log.info("tapirmetadatahandler.end.getSearch"); return is; } /** * Collect settings information from the capabilities response. * * @param inputStream capabilities response as ByteArrayInputStream * * @return settings * * @throws HarvesterException thrown if method fails */ public Map<String, String> getSettings(ByteArrayInputStream inputStream) throws HarvesterException { log.info("tapirmetadatahandler.start.getSettings"); Map<String, String> settings = new HashMap<String, String>(); // retrieve settings information try { settings = digesterUtils.parseElementsOfInterest(inputStream, settingsElementsOfInterest, true); } catch (Exception e) { log.warn("tapirmetadatahandler.error.getSettings.parsing", e.getMessage(), e); } log.info("tapirmetadatahandler.end.getSettings"); return settings; } /** * @return the synchroniserFactories */ public List<AbstractSynchroniserFactory> getSynchroniserFactories() { return synchroniserFactories; } private void init() { // with default (tapir 1.0) values as place holders metadataRepeatingElementsXpath = new HashMap<String, DefaultXPath>(); metadataRepeatingElementsXpath.put(RELATEDENTITY_REPEATING_ELEMENT_NAME, new DefaultXPath("//vcard:relatedEntity")); metadataRepeatingElementsXpath.put(HASCONTACT_REPEATING_ELEMENT_NAME, new DefaultXPath("//vcard:hasContact")); // when more versions of TAPIR become available, these can no longer be hard-coded. namespaceMap = new HashMap<String, String>(); namespaceMap.put("tapir_1_0", "http://rs.tdwg.org/tapir/1.0"); namespaceMap.put("vcard", "http://www.w3.org/2001/vcard-rdf/3.0#"); } /** * The entry point required for the user interface integration. * * @param params map of the datasource to whom the operation belongs * * @throws HarvesterException thrown if method fails */ public void issueMetadata(Map<String, String> params) throws HarvesterException { Map<String, Object> paramsCopy = new HashMap<String, Object>(); paramsCopy.putAll(params); issueMetadata(params.get("name"), params.get("url"), params.get("uddiKey"), Constants.BASE_DIR.concat(File.separator).concat(params.get("directory")), paramsCopy); } /** * Issues a capabilities request to a Tapir access point. * Determines the highest priority content namespace. * In the event that the content namespace is ABCD, the possibility * exists that there may be several datasets behind it. Therefore, * a new BioDatasource is created for each dataset, with its * name, and other attributes set accordingly. In the event that there is only a single dataset behind it, * a single new BioDatasource is created. * Typically, however, a Tapir access point only has a single * resource. * In order to set the dataset's count, a search request must be sent * with no filter and the totalMatched attribute retrieved. * Where there are multiple datasets to gather counts * for, additional request are sent, filtering by dataset title. * If an endpoint does not support searching by dataset title, * this information is logged. * * @param name of the datasource * @param url of the datasource * @param uddiKey of the datasource * @param directory to save files to * @param params map of the datasource * * @throws HarvesterException thrown if method fails */ public void issueMetadata(String name, String url, String uddiKey, String directory, Map<String, Object> params) throws HarvesterException { log.info("start.issueMetadata"); // Determine the protocol // For now use default protocol as this is the only one String protocol = DEFAULT_CONCEPTUAL_SCHEMA; // populate element of interest maps from the mapping file's properties populateElementOfInterestsMapsFromMappingFile(METADATA_MAPPING_FILE_NAME, protocol); // send capabilities request and get response as ByteArrayInputStream ByteArrayInputStream capabilitiesResponse = getCapabilities(url, directory, protocol); // Determine the settings, i.e. maxResponseSize Map<String, String> settings = getSettings(capabilitiesResponse); // load list of supported namespaces loadSupportedNamespaces(); // Determine the content namespace String contentNamespace = getNamespace(capabilitiesResponse, directory); // Determine the mapping file String mappingFile = getMappingFile(contentNamespace); // create a single BioDatasource using the same datasource name Long id = createOrUpdateBioDatasource(name, url, uddiKey, params, contentNamespace, mappingFile, protocol, settings); // update the BioDatasource's target count, other metadata, and contact info if (id > 0) { updateMetadata(id, url); } log.info("end.issueMetadata"); } /** * Load supported namespaces into list. * If there is a problem loading the file, or no match exists for the * contentNamespace, list will remain empty * * @throws HarvesterException thrown if method fails */ private void loadSupportedNamespaces() throws HarvesterException { Properties mapping = new Properties(); String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, MAPPING_DIRECTORY_NAME, CONCEPTUAL_MAPPING_FILENAME); InputStream is = null; try { is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath); mapping.load(is); for (Object key : mapping.keySet()) { // add content namespace to list of supported namespaces supported_namespaces.add((String) key); } } catch (NullPointerException e) { log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e); throw new HarvesterException(e.getMessage(), e); } catch (IOException e) { log.error("tapirmetadatahandler.error.loadSupportedNamespaces", e.getMessage(), e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error( "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(), e); } } } } /** * Executes a metadata request for the purpose of retrieving additional information * about the contacts, dataset, etc. There response is returned as a ByteArrayInputStream. * * @param destination of the TAPIR access point to request against * @param outputDirectory to which the response will be saved * @param protocol name * * @return response as ByteArrayInputStream * * @throws HarvesterException thrown if method fails. */ public ByteArrayInputStream metadataRequest(String destination, String outputDirectory, String protocol) throws HarvesterException { log.info("start.metadataRequest"); // build the parameters required for the template into a map Map<String, String> templateParams = new HashMap<String, String>(); templateParams.put("destination", destination); // Prepare directory File directory = new File(outputDirectory); log.debug("start.metadataRequest.prepareDirectory"); if (directory.isDirectory()) { try { // remove all other metadata requests and responses fileUtils.prepareDirectory(outputDirectory, Constants.METADATA_PREFIX); log.debug("end.metadataRequest.prepareDirectory"); } catch (Exception e) { log.error("error.metadataRequest.prepareDirectory", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } } // build the TAPIR capabilities request String query; String request; String templateLocation = BASE_LOCATION.concat("/").concat(protocol).concat("/") .concat(TEMPLATE_DIRECTORY_NAME).concat("/").concat(METADATA_TEMPLATE_FILENAME) .concat(Constants.VELOCITY_FILENAME_EXTENSION); try { query = templateUtils.getAndMerge(templateLocation, templateParams); request = requestUtils.buildURL(destination, "request", query); } catch (Exception e) { log.error("tapirmetadatahandler.error.getCapabilities.buildUrl", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } // save the request try { fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.METADATA_REQUEST_FILENAME, query.getBytes()); } catch (IOException e) { log.warn("error.metadataRequest.writeRequest", e.getMessage()); } // fire the request ByteArrayInputStream is; byte[] array; try { // execute request and return response as byte array log.debug("tapirmetadatahandler.metadataRequest.execute"); array = requestUtils.executePersistentGetRequestAndReturnByteArray(request, outputDirectory, destination); // save the response as gzipped file fileUtils.writeSequentiallyNamedGzippedFile(outputDirectory, Constants.METADATA_RESPONSE_FILENAME, array); // convert byte array into inputStream is = new ByteArrayInputStream(array); } // was the operation stopped? catch (OperationStoppedException e) { throw new HarvesterException(e.getMessage(), e); } catch (IOException e) { log.error("error.metadataRequest.writeResponse", e.getMessage()); throw new HarvesterException(e.getMessage(), e); } log.info("end.metadataRequest"); return is; } /** * Parse the resource name (code) from the url * * @param url access point URL * * @return resource name */ private String parseNameFromUrl(String url) { Pattern namePattern = Pattern.compile("(.*)[$//]([\\S]*)"); Matcher matcher = namePattern.matcher(url); String resourceName = null; if (matcher.matches()) { resourceName = matcher.group(2); log.info("Resource name (code) parsed from url = " + resourceName); } return resourceName; } /** * Parse the response file and write the parsed values to their * appropriate file. * * @param stream file representing harvested xml response as ByteArrayInputStream * * @throws DocumentException thrown if parsing errors occur * @throws IOException thrown */ private void parseResponseFile(ByteArrayInputStream stream) throws DocumentException, IOException { // create a DOM4J tree, reading a Document from the given File SAXReader reader = new SAXReader(); reader.setEncoding("UTF-8"); Document document = reader.read(stream); document.setXMLEncoding("UTF-8"); // get all relatedEntity Elements List<Node> relatedEntities = (metadataRepeatingElementsXpath.get(RELATEDENTITY_REPEATING_ELEMENT_NAME)) .selectNodes(document); // iterate over dataset Elements for (Node relatedEntity : relatedEntities) { // Detatch relatedEntity Element and create new Document with it DefaultDocument doc1 = new DefaultDocument(); doc1.setRootElement((Element) relatedEntity.detach()); // get all hasContact Elements List<Node> hasContacts = (metadataRepeatingElementsXpath.get(HASCONTACT_REPEATING_ELEMENT_NAME)) .selectNodes(doc1); // iterate over hasContact Elements for (Node hasContact : hasContacts) { // Detatch relatedEntity Element and create new Document with it DefaultDocument doc2 = new DefaultDocument(); doc2.setRootElement((Element) hasContact.detach()); // write hasContact elements-of-interest to file fileUtils.writeValuesToFile(hasContactBW, harvestedHasContactElementsOfInterest.values(), doc2, namespaceMap, String.valueOf(getLineNumber())); } // write relatedEntity elements-of-interest to file fileUtils.writeValuesToFile(relatedEntityBW, harvestedRelatedEntityElementsOfInterest.values(), doc1, namespaceMap, String.valueOf(getLineNumber())); setLineNumber(getLineNumber() + 1); } } /** * Iterates over the metadata mapping file (properties file), populating the * various elements-of-interest maps. * In most cases, regular expressions divide the mapping file's properties * into the appropriate element-of-interest map. * Where some properties actually represent repeating elements in a metadata * xml response, the standardised set of repeating element names are * located in a static list. Each repeating element name matches a key name * in the indexMapping properties file and is used to get at its XPath * expression. * Note: The mapping file's properties are in the following format: * [element-of-interest categoriser] + [property name] = [XPath expresson] * The regular expression matches according to the [element-of-interest * categoriser] * The corresponding element-of-interest map is then populated with: key = * [property name] & value = [XPath expression] * * @param mappingFile name * @param protocol name * * @throws HarvesterException thrown if method fails */ private void populateElementOfInterestsMapsFromMappingFile(String mappingFile, String protocol) throws HarvesterException { // Create regex patterns // contact-related patterns Pattern relatedEntityKeyPattern = Pattern.compile("relatedEntity([\\S]*)"); Pattern hasContactKeyPattern = Pattern.compile("hasContact([\\S]*)"); // non-contact, metadata related pattern Pattern metadataKeyPattern = Pattern.compile("metadata([\\S]*)"); // non-contact, non-metadata, settings related pattern Pattern settingKeyPattern = Pattern.compile("setting([\\S]*)"); // properties we harvest are read from file Properties mapping = new Properties(); String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, protocol, MAPPING_DIRECTORY_NAME, mappingFile); InputStream is = null; try { is = TapirMetadataHandler.class.getResourceAsStream(mappingFilePath); mapping.load(is); // Divide the mapping properties into various element-of-interest maps for (Object key : mapping.keySet()) { Boolean matched = false; // Matchers matching keys belonging to repeating element groups Matcher relatedEntityKeyMatcher = relatedEntityKeyPattern.matcher((String) key); if (relatedEntityKeyMatcher.matches()) { String property = relatedEntityKeyMatcher.group(1); harvestedRelatedEntityElementsOfInterest.put(property, mapping.getProperty((String) key)); matched = true; } if (!matched) { Matcher hasContactKeyMatcher = hasContactKeyPattern.matcher((String) key); if (hasContactKeyMatcher.matches()) { String property = hasContactKeyMatcher.group(1); harvestedHasContactElementsOfInterest.put(property, mapping.getProperty((String) key)); matched = true; } if (!matched) { Matcher contactKeyMatcher = metadataKeyPattern.matcher((String) key); if (contactKeyMatcher.matches()) { String property = contactKeyMatcher.group(1); metadataElementsOfInterest.put(property, mapping.getProperty((String) key)); matched = true; } if (!matched) { Matcher settingKeyMatcher = settingKeyPattern.matcher((String) key); if (settingKeyMatcher.matches()) { String property = settingKeyMatcher.group(1); settingsElementsOfInterest.put(property, mapping.getProperty((String) key)); matched = true; } if (!matched) { // Determines the XPath expressions used to isolate repeating elements in a // metadata xml response. if (metadataRepeatingElementsXpath.keySet().contains(key)) { // construct an XPath expression for repeating Element DefaultXPath xpath = new DefaultXPath(mapping.getProperty((String) key)); xpath.setNamespaceURIs(namespaceMap); metadataRepeatingElementsXpath.put((String) key, xpath); } } } } } } } catch (NullPointerException e) { log.info("error.mappingFileExists", new String[] { mappingFilePath, e.getMessage() }, e); throw new HarvesterException(e.getMessage(), e); } catch (Exception e) { log.error("error.populateElementOfInterestsMapsFromMappingFile", new String[] { mappingFile, e.getMessage() }, e); throw new HarvesterException(e.getMessage(), e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error( "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(), e); } } } } /** * Processes the metadata response by a particular element of interest * XPath, and outputs all possible language alternatives listed for it. * A single Map.Entry will consist of: key=language, value=element value * For example: * <dc:title xml:lang="en">National Taiwan University</dc:title> * <dc:title xml:lang="zh-TW">BlaBla</dc:title> * Here,the output would be two Map.Entries: <"en", "National Taiwan University"> * and <"zh-TW", "BlaBla"> * * @param stream metadata response as ByteArrayInputStream * @param elementOfInterest XPath * * @return all possible language alternatives for term * * @throws HarvesterException thrown if method fails */ private Map<String, String> processAllLanguageAlternativesForAParticularElementOfInterest( ByteArrayInputStream stream, String elementOfInterest) throws HarvesterException { Map<String, String> processed = new HashMap<String, String>(); try { processed = digesterUtils.xmlToMapForSingleElement(stream, elementOfInterest, TapirMetadataHandler.languageAttributeName); } catch (Exception e) { log.warn("error.processMetadata.parsing", new String[] { elementOfInterest, e.getMessage() }, e); } return processed; } /** * Processes the metadata response by the mapping file * that corresponds to a particular protocol. * * @param stream metadata response as ByteArrayInputStream * * @return map with concept-name/value key/value pairs * * @throws HarvesterException thrown if the method fails */ public Map<String, String> processMetadata(ByteArrayInputStream stream) throws HarvesterException { log.info("start.processMetadata"); Map<String, String> processed = new HashMap<String, String>(); // retrieve settings information try { processed = digesterUtils.parseElementsOfInterest(stream, metadataElementsOfInterest, true); } catch (Exception e) { log.warn("error.processMetadata.parsing", e.getMessage(), e); } log.info("end.processMetadata"); return processed; } /** * Process the metadata response for the contact information by the mapping * file that corresponds to the particular protocol. * Because there may be several contacts, and a single contact can contain * several different attributes, they are saved to file(s) versus being * saved into JSON format like the other metadata extracted in the * processMetadata method. * The output is written to two tab delimited files, each with column header * definition lines: * relatedEntity.txt * contact.txt * * @param stream metadataResponse as ByteArrayInputStream * @param outputDirectory directory to write to * * @throws HarvesterException thrown if method fails */ public void processMetadataForContacts(ByteArrayInputStream stream, String outputDirectory) throws HarvesterException { log.info("start.processMetadataForContacts"); // create the output directory File directory = new File(outputDirectory); // Prepare directory log.debug("tapirmetadatahandler.start.processMetadataForContacts.prepareDirectory"); if (directory.isDirectory()) { try { // remove all pre-existing contact tab files fileUtils.prepareDirectory(outputDirectory, Constants.RELATED_ENTITY_FILENAME); fileUtils.prepareDirectory(outputDirectory, Constants.CONTACT_FILENAME); log.debug("tapirmetadatahandler.end.processMetadataForContacts.prepareDirectory"); } catch (Exception e) { log.error("tapirmetadatahandler.error.processMetadataForContacts.prepareDirectory", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } } // get the various files File relatedEntityFile = new File(directory, Constants.RELATED_ENTITY_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION)); File hasContactFile = new File(directory, Constants.CONTACT_FILENAME.concat(Constants.TEXT_FILENAME_EXTENSION)); // ensure that they exist anew try { relatedEntityFile.createNewFile(); hasContactFile.createNewFile(); } catch (IOException e) { log.error("tapirmetadatahandler.error.processMetadataForContacts.createFiles", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } // create file writers for each file try { relatedEntityBW = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(relatedEntityFile, true), "UTF8")); hasContactBW = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(hasContactFile, true), "UTF8")); } catch (IOException e) { log.error("error.createBWs", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } // write header column line for each file try { // The header line is derived from the names of the properties fileUtils.writeHeaderLine(relatedEntityBW, harvestedRelatedEntityElementsOfInterest.keySet(), true); // an identification number column name is also written fileUtils.writeHeaderLine(hasContactBW, harvestedHasContactElementsOfInterest.keySet(), true); } catch (IOException e) { log.error("error.writeHeaders", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } // parse metadata for contacts setLineNumber(1); try { parseResponseFile(stream); } catch (Exception e) { log.error("error.metadataRequest.parsing", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } // close the buffer writers, and log having written the files so that // they appear in the console try { relatedEntityBW.close(); hasContactBW.close(); log.info("Writing to file: " + relatedEntityFile.getAbsolutePath()); log.info("Writing to file: " + hasContactFile.getAbsolutePath()); } catch (IOException e) { log.error("error.closeBWs", e.getMessage(), e); throw new HarvesterException(e.getMessage(), e); } log.info("end.processMetadataForContacts"); } /** * Removes non-English entries from a map, and returns the * value of the English map entry, if it exists. * It is assumed the key is the language, and the value is the * corresponding value. * If there is no English value, the resulting map will be empty. * * @param map alternative language entries * * @return processed map */ private String retrieveValueForEnlishEntry(Map<String, String> map) { String englishEntry = null; Iterator<Map.Entry<String, String>> iter = map.entrySet().iterator(); while (iter.hasNext()) { Map.Entry<String, String> entry = iter.next(); String language = entry.getKey(); if (StringUtils.equalsIgnoreCase(language, TapirMetadataHandler.englishLanguageCode)) { englishEntry = entry.getValue(); } } return englishEntry; } /** * Parse the search response for the record count. * * @param inputStream search response as ByteArrayInputStream * @param itemOfInterest XPath to namespace element * * @return count * * @throws IOException thrown * @throws SAXException thrown */ private String returnCount(ByteArrayInputStream inputStream, String itemOfInterest) throws IOException, SAXException { List<String> records = new LinkedList<String>(); Digester digester = new Digester(); digester.setNamespaceAware(true); digester.push(records); if (itemOfInterest.contains("@")) { String[] parts = itemOfInterest.split("@"); digester.addCallMethod(parts[0], "add", 1); // digester.addObjectParam(parts[0], 0, parts[0] + "@" + parts[1]); digester.addCallParam(parts[0], 0, parts[1]); } else { digester.addCallMethod(itemOfInterest, "add", 1); // digester.addObjectParam(itemOfInterest, 0, itemOfInterest); digester.addCallParam(itemOfInterest, 0); } digester.parse(inputStream); // close inputStream try { if (inputStream != null) { inputStream.close(); } } catch (Exception e) { // do nothing } if (records.size() != 1) { return "0"; } return records.get(0); } /** * @param bioDatasourceManager the bioDatasourceManager to set */ public void setBioDatasourceManager(BioDatasourceManager bioDatasourceManager) { this.bioDatasourceManager = bioDatasourceManager; } public void setLineNumber(int lineNumber) { this.lineNumber = lineNumber; } /** * @param synchroniserFactories the synchroniserFactories to set */ public void setSynchroniserFactories(List<AbstractSynchroniserFactory> synchroniserFactories) { this.synchroniserFactories = synchroniserFactories; } /** * * @param * @see TapirMetadataHandler.updateCount(Long, String, String, String) */ /** * Defaults dataset title and dataset title path to null. * * @param id Biodatasource id * @param url access point URL * * @throws HarvesterException thrown if method fails * @see org.gbif.harvest.tapir.TapirMetadataHandler#updateMetadata(Long, String, String, String) */ public void updateMetadata(Long id, String url) throws HarvesterException { updateMetadata(id, url, null, null); } /** * Update a BioDatsource's target count, and their other metadata. * * @param id Biodatasource id * @param url access point URL * @param datasetTitle dataset title * @param datasetTitlePath dataset title path * * @throws HarvesterException thrown if method fails */ public void updateMetadata(Long id, String url, String datasetTitle, String datasetTitlePath) throws HarvesterException { log.debug("start.updateMetadata"); // retrieve the BioDatasource BioDatasource bioDatasource = bioDatasourceManager.get(id); // retrieve the BioDatasource's directory Map<String, Object> params = JSONUtils.mapFromJSON(bioDatasource.getParametersAsJSON()); String bioDatasourceDirectory = Constants.BASE_DIR.concat(File.separator) .concat((String) params.get("directory")); String protocol = (String) params.get("protocol"); // determine the outputModel from the appropriate mapping file String contentNamespace = params.get("contentNamespace").toString(); String outputModel = getOutputModel(contentNamespace); // get the count 'String' String resource_count = null; try { // send search request and get response as ByteArrayInputStream ByteArrayInputStream searchResponse = getSearch(url, bioDatasourceDirectory, outputModel, datasetTitle, datasetTitlePath, protocol); // parse the response for the count information resource_count = getCount(searchResponse); } catch (HarvesterException e) { log.error("error.gettingCount", bioDatasource.getName(), e); } // check count is proper integer value, then set it as targetCount if (StringUtils.trimToNull(resource_count) != null) { int targetCount = 0; try { targetCount = Integer.valueOf(resource_count); } catch (NumberFormatException e) { log.warn("Problem occurred converting resource count: " + resource_count); } finally { params.put("targetCount", String.valueOf(targetCount)); // update the BioDatasource's target count attribute bioDatasource.setTargetCount(targetCount); log.info("updateCount", String.valueOf(targetCount)); } } // update the BioDatasource's params with the default count else { params.put("targetCount", "0"); bioDatasource.setTargetCount(0); } ByteArrayInputStream metadataResponse = metadataRequest(url, bioDatasourceDirectory, protocol); Map<String, String> processed = null; Map<String, String> processedDataResourceNames; try { // for all parameters with no alternative languages processed = processMetadata(metadataResponse); // for all parameters with alternative languages // 1. data resource title String dataResourceNameXPath = metadataElementsOfInterest.get(resourceNameKeyName); // remember to reset inputStream first metadataResponse.reset(); processedDataResourceNames = processAllLanguageAlternativesForAParticularElementOfInterest( metadataResponse, dataResourceNameXPath); // get the English dataResourceName String englishDataResourceName = retrieveValueForEnlishEntry(processedDataResourceNames); if (StringUtils.isNotBlank(englishDataResourceName)) { processed.put(resourceNameKeyName, englishDataResourceName); processed.put(resourceDisplayNameKeyName, englishDataResourceName); } // for all contact related metadata // remember to reset inputStream first metadataResponse.reset(); processMetadataForContacts(metadataResponse, bioDatasourceDirectory); } catch (HarvesterException e) { // do nothing, log error follows below... } // update other metadata if (processed != null && processed.size() > 0) { if (processed.containsKey(resourceNameKeyName)) { String dataResourceName = StringUtils.trimToNull(processed.get(resourceNameKeyName)); if (StringUtils.isBlank(dataResourceName) || StringUtils.equalsIgnoreCase(dataResourceName, "NULL")) { log.error("tapirmetadatahandler.error.updateMetadata.dataResourceName", bioDatasource.getName()); params.put(resourceNameKeyName, bioDatasource.getName()); params.put(resourceDisplayNameKeyName, bioDatasource.getName()); } } } else { log.error("tapirmetadatahandler.error.updateMetadata.metadataRequest", bioDatasource.getName()); } // add all metadata to params params.putAll(processed); // re save params bioDatasource.setParametersAsJSON(JSONUtils.jsonFromMap(params)); // save the BioDatasource bioDatasourceManager.save(bioDatasource); log.debug("end.updateMetadata"); } }