org.fao.geonet.kernel.harvest.harvester.thredds.Harvester.java Source code

Java tutorial

Introduction

Here is the source code for org.fao.geonet.kernel.harvest.harvester.thredds.Harvester.java

Source

//=============================================================================
//===   Copyright (C) 2001-2007 Food and Agriculture Organization of the
//===   United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===   and United Nations Environment Programme (UNEP)
//===
//===   Copyright (C) 2008-2011 CSIRO Marine and Atmospheric Research,
//=== Australia
//===
//===   This program is free software; you can redistribute it and/or modify
//===   it under the terms of the GNU General Public License as published by
//===   the Free Software Foundation; either version 2 of the License, or (at
//===   your option) any later version.
//===
//===   This program is distributed in the hope that it will be useful, but
//===   WITHOUT ANY WARRANTY; without even the implied warranty of
//===   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===   General Public License for more details.
//===
//===   You should have received a copy of the GNU General Public License
//===   along with this program; if not, write to the Free Software
//===   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
//===
//===   Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===   Rome - Italy. email: geonetwork@osgeo.org
//==============================================================================

package org.fao.geonet.kernel.harvest.harvester.thredds;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;

import javax.net.ssl.SSLHandshakeException;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.fao.geonet.Constants;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.Logger;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.domain.AbstractMetadata;
import org.fao.geonet.domain.ISODate;
import org.fao.geonet.domain.Metadata;
import org.fao.geonet.domain.MetadataType;
import org.fao.geonet.exceptions.BadServerCertificateEx;
import org.fao.geonet.exceptions.BadXmlResponseEx;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.SchemaManager;
import org.fao.geonet.kernel.UpdateDatestamp;
import org.fao.geonet.kernel.harvest.BaseAligner;
import org.fao.geonet.kernel.harvest.harvester.CategoryMapper;
import org.fao.geonet.kernel.harvest.harvester.GroupMapper;
import org.fao.geonet.kernel.harvest.harvester.HarvestError;
import org.fao.geonet.kernel.harvest.harvester.HarvestResult;
import org.fao.geonet.kernel.harvest.harvester.IHarvester;
import org.fao.geonet.kernel.harvest.harvester.RecordInfo;
import org.fao.geonet.kernel.harvest.harvester.UriMapper;
import org.fao.geonet.kernel.harvest.harvester.fragment.FragmentHarvester;
import org.fao.geonet.kernel.harvest.harvester.fragment.FragmentHarvester.FragmentParams;
import org.fao.geonet.kernel.harvest.harvester.fragment.FragmentHarvester.HarvestSummary;
import org.fao.geonet.kernel.setting.SettingInfo;
import org.fao.geonet.lib.Lib;
import org.fao.geonet.util.Sha1Encoder;
import org.fao.geonet.utils.GeonetHttpRequestFactory;
import org.fao.geonet.utils.Xml;
import org.fao.geonet.utils.XmlRequest;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Namespace;

import jeeves.server.context.ServiceContext;
import jeeves.xlink.Processor;
import thredds.catalog.InvAccess;
import thredds.catalog.InvCatalogFactory;
import thredds.catalog.InvCatalogImpl;
import thredds.catalog.InvCatalogRef;
import thredds.catalog.InvDataset;
import thredds.catalog.InvMetadata;
import thredds.catalog.InvService;
import thredds.catalog.ServiceType;
import thredds.catalog.ThreddsMetadata;
import thredds.catalog.dl.DIFWriter;
import ucar.nc2.Attribute;
import ucar.nc2.dataset.NetcdfDataset;
import ucar.nc2.dataset.NetcdfDatasetInfo;
import ucar.nc2.ncml.NcMLWriter;
import ucar.nc2.units.DateType;
import ucar.unidata.util.StringUtil;

//=============================================================================

/**
 * A ThreddsHarvester is able to generate metadata for datasets and services from a Thredds
 * catalogue. Metadata for datasets are generated using dataset information contained in the thredds
 * catalogue document or or from opening the dataset and retrieving variables, coordinate systems
 * and/or global attributes.
 *
 * Metadata produced are : <ul> <li>ISO19119 for service metadata (all services in the catalog)</li>
 * <li>ISO19139 (or profile) metadata for datasets in catalog</li> </ul>
 *
 * <pre>
 * <nodes>
 *  <node type="thredds" id="114">
 *    <site>
 *      <name>TEST</name>
 *      <uuid>c1da2928-c866-49fd-adde-466fe36d3508</uuid>
 *      <account>
 *        <use>true</use>
 *        <username />
 *        <password />
 *      </account>
 *      <url>http://localhost:5556/thredds/catalog.xml</url>
 *      <icon>default.gif</icon>
 *    </site>
 *    <options>
 *      <every>90</every>
 *      <oneRunOnly>false</oneRunOnly>
 *      <status>active</status>
 *      <lang>eng</lang>
 *      <createThumbnails>false</createThumbnails>
 *      <createServiceMd>false</createServiceMd>
 *      <createCollectionDatasetMd>true</createCollectionDatasetMd>
 *      <createAtomicDatasetMd>false</createAtomicDatasetMd>
 *      <ignoreHarvestOnCollections>true</ignoreHarvestOnCollections>
 * Choice of {
 *      <outputSchemaOnCollectionsDIF>iso19139</outputSchemaOnCollectionsDIF>
 * } OR {
 *      <outputSchemaOnCollectionsFragments>iso19139</outputSchemaOnCollectionsFragments>
 *      <collectionFragmentStylesheet>collection_fragments.xsl</collectionFragmentStylesheet>
 *      <collectionMetadataTemplate>My template</collectionMetadataTemplate>
 *      <createCollectionSubtemplates>false</createCollectionSubtemplates>
 * }
 *      <ignoreHarvestOnAtomics>true</ignoreHarvestOnAtomics>
 * Choice of {
 *      <outputSchemaOnAtomicsDIF>iso19139.mcp</outputSchemaOnAtomicsDIF>
 * } OR {
 *      <outputSchemaOnAtomicsFragments>iso19139</outputSchemaOnAtomicsFragments>
 *      <atomicFragmentStylesheet>atomic_fragments.xsl</atomicFragmentStylesheet>
 *      <atomicMetadataTemplate>My template</atomicMetadataTemplate>
 *      <createAtomicSubtemplates>false</createAtomicSubtemplates>
 * }
 *      <modifiedOnly>true</modifiedOnly>
 *      <datasetCategory></datasetCategory>
 *    </options>
 *    <privileges>
 *      <group id="1">
 *        <operation name="view" />
 *      </group>
 *    </privileges>
 *    <categories>
 *      <category id="3" />
 *    </categories>
 *    <info>
 *      <lastRun>2007-12-05T16:17:20</lastRun>
 *      <running>false</running>
 *    </info>
 *  </node>
 * </nodes>
 * </pre>
 *
 * @author Simon Pigot
 */
class Harvester extends BaseAligner<ThreddsParams> implements IHarvester<HarvestResult> {

    static private final Namespace difNS = Namespace.getNamespace("http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/");
    static private final Namespace invCatalogNS = Namespace
            .getNamespace("http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0");

    static private final Namespace gmd = Namespace.getNamespace("gmd", "http://www.isotc211.org/2005/gmd");
    static private final Namespace srv = Namespace.getNamespace("srv", "http://www.isotc211.org/2005/srv");
    static private final Namespace xlink = Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink");

    private Logger log;
    private ServiceContext context;
    private DataManager dataMan;
    private SchemaManager schemaMan;
    private CategoryMapper localCateg;
    private GroupMapper localGroups;
    private UriMapper localUris;
    private HarvestResult result;
    private String hostUrl;
    private HashSet<String> harvestUris = new HashSet<String>();
    private Path cdmCoordsToIsoKeywordsStyleSheet;
    private Path cdmCoordsToIsoMcpDataParametersStyleSheet;
    private Path fragmentStylesheetDirectory;
    private String metadataGetService;
    private Map<String, ThreddsService> services = new HashMap<String, Harvester.ThreddsService>();
    private InvCatalogImpl catalog;
    private FragmentHarvester atomicFragmentHarvester;
    private FragmentHarvester collectionFragmentHarvester;
    private List<HarvestError> errors = new LinkedList<HarvestError>();

    public Harvester(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, ThreddsParams params) {
        super(cancelMonitor);
        this.log = log;
        this.context = context;
        this.params = params;

        result = new HarvestResult();

        GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
        dataMan = gc.getBean(DataManager.class);
        schemaMan = gc.getBean(SchemaManager.class);

        SettingInfo si = context.getBean(SettingInfo.class);
        String siteUrl = si.getSiteUrl() + context.getBaseUrl();
        metadataGetService = "local://" + context.getNodeId() + "/api/records/";

        //--- Create fragment harvester for atomic datasets if required
        if (params.createAtomicDatasetMd && params.atomicMetadataGeneration.equals(ThreddsParams.FRAGMENTS)) {
            atomicFragmentHarvester = new FragmentHarvester(cancelMonitor, log, context, getAtomicFragmentParams());
        }

        //--- Create fragment harvester for collection datasets if required
        if (params.createCollectionDatasetMd
                && params.collectionMetadataGeneration.equals(ThreddsParams.FRAGMENTS)) {
            collectionFragmentHarvester = new FragmentHarvester(cancelMonitor, log, context,
                    getCollectionFragmentParams());
        }
    }

    //---------------------------------------------------------------------------

    /**
     * Start the harvesting of a thredds catalog
     **/

    public HarvestResult harvest(Logger log) throws Exception {
        this.log = log;

        Element xml = null;
        log.info("Retrieving remote metadata information for : " + params.getName());

        //--- Get uuid's and change dates of metadata records previously
        //--- harvested by this harvester grouping by harvest uri
        localUris = new UriMapper(context, params.getUuid());

        //--- Try to load thredds catalog document
        String url = params.url;
        try {
            XmlRequest req = context.getBean(GeonetHttpRequestFactory.class).createXmlRequest();
            req.setUrl(new URL(url));
            req.setMethod(XmlRequest.Method.GET);
            Lib.net.setupProxy(context, req);

            xml = req.execute();
        } catch (SSLHandshakeException e) {
            throw new BadServerCertificateEx("Most likely cause: The thredds catalog " + url + " does not have a "
                    + "valid certificate. If you feel this is because the server may be "
                    + "using a test certificate rather than a certificate from a well "
                    + "known certification authority, then you can add this certificate "
                    + "to the GeoNetwork keystore using bin/installCert");
        }

        //--- Traverse catalog to create services and dataset metadata as required
        harvestCatalog(xml);

        //--- Remove previously harvested metadata for uris that no longer exist on the remote site
        for (String localUri : localUris.getUris()) {
            if (cancelMonitor.get()) {
                return this.result;
            }

            if (!harvestUris.contains(localUri)) {
                for (RecordInfo record : localUris.getRecords(localUri)) {
                    if (cancelMonitor.get()) {
                        return this.result;
                    }

                    if (log.isDebugEnabled())
                        log.debug("  - Removing deleted metadata with id: " + record.id);
                    dataMan.deleteMetadata(context, record.id);

                    if (record.isTemplate.equals("s")) {
                        //--- Uncache xlinks if a subtemplate
                        Processor.uncacheXLinkUri(metadataGetService + record.uuid);
                        result.subtemplatesRemoved++;
                    } else {
                        result.locallyRemoved++;
                    }
                }
            }
        }

        dataMan.flush();

        result.totalMetadata = result.serviceRecords + result.collectionDatasetRecords
                + result.atomicDatasetRecords;
        return result;
    }

    //---------------------------------------------------------------------------

    /**
     * Add metadata to GN for the services and datasets in a thredds catalog
     *
     * 1. Open Catalog Document 2. Crawl the catalog processing datasets as ISO19139 records and
     * recording services (attach dataset ids to the services that deliver them) 3. Process services
     * found as ISO19119 records 4. Create a service record for the thredds catalog service provided
     * and list service records as something that the thredds catalog provides 5. Save all
     *
     * @param cata Catalog document
     **/

    private void harvestCatalog(Element cata) throws Exception {

        if (cata == null)
            return;

        //--- loading categories and groups
        localCateg = new CategoryMapper(context);
        localGroups = new GroupMapper(context);

        //--- Setup proxy authentication
        Lib.net.setupProxy(context);

        //--- load catalog
        InvCatalogFactory factory = new InvCatalogFactory("default", true);
        catalog = (InvCatalogImpl) factory.readXML(params.url);
        StringBuilder buff = new StringBuilder();
        if (!catalog.check(buff, true)) {
            throw new BadXmlResponseEx("Invalid catalog " + params.url + "\n" + buff.toString());
        }

        //--- display catalog read in log file
        log.info("Catalog read from " + params.url + " is \n" + factory.writeXML(catalog));
        Path serviceStyleSheet = context.getAppPath().resolve(Geonet.Path.IMPORT_STYLESHEETS)
                .resolve("ThreddsCatalog-to-ISO19119_ISO19139.xsl");

        //--- Get base host url
        URL url = new URL(params.url);
        hostUrl = url.getProtocol() + "://" + url.getHost();
        if (url.getPort() != -1)
            hostUrl += ":" + url.getPort();

        //--- Crawl all datasets in the thredds catalogue
        log.info("Crawling the datasets in the catalog....");
        List<InvDataset> dsets = catalog.getDatasets();
        for (InvDataset ds : dsets) {
            if (cancelMonitor.get()) {
                return;
            }

            crawlDatasets(ds);
        }

        //--- show how many datasets have been processed
        int totalDs = result.collectionDatasetRecords + result.atomicDatasetRecords;
        log.info("Processed " + totalDs + " datasets.");

        if (params.createServiceMd) {
            //--- process services found by crawling the catalog
            log.info("Processing " + services.size() + " services...");
            processServices(cata, serviceStyleSheet);

            //--- finally create a service record for the thredds catalog itself and
            //--- add uuids of services that it provides to operatesOn element
            //--- (not sure that this is what we should do here really - the catalog
            //--- is a dataset and a service??
            log.info("Creating service metadata for thredds catalog...");
            Map<String, Object> param = new HashMap<String, Object>();
            param.put("lang", params.lang);
            param.put("topic", params.topic);
            param.put("uuid", params.getUuid());
            param.put("url", params.url);
            param.put("name", catalog.getName());
            param.put("type", "Thredds Data Service Catalog " + catalog.getVersion());
            param.put("version", catalog.getVersion());
            param.put("desc", Xml.getString(cata));
            param.put("props", catalog.getProperties().toString());
            param.put("serverops", "");

            if (log.isDebugEnabled())
                log.debug("  - XSLT transformation using " + serviceStyleSheet);
            Element md = Xml.transform(cata, serviceStyleSheet, param);

            //--- TODO: Add links to services provided by the thredds catalog - but
            //--- where do we do this in ISO19119?
            saveMetadata(md, Sha1Encoder.encodeString(params.url), params.url);

            harvestUris.add(params.url);

            result.serviceRecords++;
        }
    }

    //---------------------------------------------------------------------------

    /**
     * Crawl all datasets in the catalog recursively
     *
     * @param    catalogDs        the dataset being processed
     * @throws Exception
     **/

    private void crawlDatasets(InvDataset catalogDs) throws Exception {
        log.info("Crawling through " + catalogDs.getName());

        // HACK!! Get real dataset hidden by netcdf library when catalog ref name
        // equals top dataset name in referenced catalog
        InvDataset realDs = catalogDs;
        if (catalogDs instanceof InvCatalogRef) {
            InvDataset proxyDataset = ((InvCatalogRef) catalogDs).getProxyDataset();
            realDs = proxyDataset.getName().equals(catalogDs.getName()) ? proxyDataset : catalogDs;
        }

        if (realDs.hasNestedDatasets()) {
            List<InvDataset> dsets = realDs.getDatasets();
            for (InvDataset ds : dsets) {
                crawlDatasets(ds);
            }
        }

        if (harvestMetadata(realDs)) {
            log.info("Harvesting dataset: " + realDs.getName());
            harvest(realDs);
        } else {
            log.info("Skipping dataset: " + realDs.getName());
        }

        // Release resources allocated when crawling catalog references
        if (catalogDs instanceof InvCatalogRef) {
            ((InvCatalogRef) catalogDs).release();
        }
    }

    //---------------------------------------------------------------------------

    /**
     * Save the metadata to GeoNetwork's database
     *
     * @param md   the metadata being saved
     * @param uuid the uuid of the metadata being saved
     * @param uri  the uri from which the metadata has been harvested
     **/

    private void saveMetadata(Element md, String uuid, String uri) throws Exception {

        //--- strip the catalog namespace as it is not required
        md.removeNamespaceDeclaration(invCatalogNS);

        String schema = dataMan.autodetectSchema(md, null); // should be iso19139
        if (schema == null) {
            log.warning("Skipping metadata with unknown schema.");
            result.unknownSchema++;
        }

        log.info("  - Adding metadata with " + uuid + " schema is set to " + schema + "\n XML is "
                + Xml.getString(md));

        deleteExistingMetadata(uri);

        //
        // insert metadata
        //
        AbstractMetadata metadata = new Metadata();
        metadata.setUuid(uuid);
        metadata.getDataInfo().setSchemaId(schema).setRoot(md.getQualifiedName()).setType(MetadataType.METADATA);
        metadata.getSourceInfo().setSourceId(params.getUuid()).setOwner(getOwner())
                .setGroupOwner(Integer.valueOf(params.getOwnerIdGroup()));
        metadata.getHarvestInfo().setHarvested(true).setUuid(params.getUuid()).setUri(uri);

        addCategories(metadata, params.getCategories(), localCateg, context, log, null, false);
        metadata = dataMan.insertMetadata(context, metadata, md, true, false, false, UpdateDatestamp.NO, false,
                false);

        String id = String.valueOf(metadata.getId());

        addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context, log);

        dataMan.indexMetadata(id, true, null);

        dataMan.flush();
    }

    //---------------------------------------------------------------------------
    //---
    //--- Variables
    //---
    //---------------------------------------------------------------------------

    /**
     * Process one dataset generating metadata as per harvesting node settings
     *
     * @param ds the dataset to be processed
     * @throws Exception
     **/

    private void harvest(InvDataset ds) throws Exception {
        //--- harvest metadata only if the dataset has changed
        if (!params.modifiedOnly || datasetChanged(ds)) {
            if (harvestMetadataUsingFragments(ds)) {
                createMetadataUsingFragments(ds);
            } else {
                createDIFMetadata(ds);
            }
        }

        //--- Add dataset uri to list of harvested uri's
        harvestUris.add(getUri(ds));

        //--- Record uuid of dataset against services that deliver it for
        //--- inclusion in operatesOn element in 19119 service record
        List<InvAccess> accesses = ds.getAccess();
        for (InvAccess access : accesses) {
            processService(access.getService(), getUuid(ds), ds);
        }
    }

    /**
     * Get dataset uri
     *
     * @param ds the dataset to be processed
     **/

    private String getUri(InvDataset ds) {
        if (ds.getID() == null) {
            return ds.getParentCatalog().getUriString() + "#" + ds.getName();
        } else {
            return getSubsetUrl(ds);
        }
    }

    /**
     * Has the dataset has been modified since its metadata was last harvested
     *
     * @param ds the dataset to be processed
     **/

    private boolean datasetChanged(InvDataset ds) {
        List<RecordInfo> localRecords = localUris.getRecords(getUri(ds));

        if (localRecords == null)
            return true;

        Date lastModifiedDate = null;

        List<DateType> dates = ds.getDates();

        for (DateType date : dates) {
            if (date.getType().equalsIgnoreCase("modified")) {
                lastModifiedDate = date.getDate();
            }
        }

        if (lastModifiedDate == null)
            return true;

        String datasetModifiedDate = new ISODate(lastModifiedDate.getTime(), false).toString();

        for (RecordInfo localRecord : localRecords) {
            if (localRecord.isOlderThan(datasetModifiedDate))
                return true;
        }

        return false;
    }

    /**
     * Delete all metadata previously harvested for a particular uri
     *
     * @param uri uri for which previously harvested metadata should be deleted
     **/

    private void deleteExistingMetadata(String uri) throws Exception {
        List<RecordInfo> localRecords = localUris.getRecords(uri);

        if (localRecords == null)
            return;

        for (RecordInfo record : localRecords) {
            dataMan.deleteMetadata(context, record.id);

            if (record.isTemplate.equals("s")) {
                //--- Uncache xlinks if a subtemplate
                Processor.uncacheXLinkUri(metadataGetService + record.uuid);
            }
        }
    }

    /**
     * Create metadata using fragments
     *
     * <ul> <li>collect useful metadata for the dataset<li> <li>use supplied stylesheet to convert
     * collected metadata into fragments</li> <li>harvest metadata from fragments as requested</li>
     * </ul>
     *
     * Metadata collected is as follows:
     *
     * <pre>
     * {@code
     * <root>
     *    <catalogUri>http://someserver.com/thredds/catalog.xml</catalog>
     *    <uuid>uuid-generated-for-dataset</uuid>
     *    <catalog xmlns="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0"
     * xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0.1">
     *        ... subset of catalog containing dataset as the top dataset ...
     *    </catalog>
     *    <netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2"
     * location="example1.nc">
     *       ... ncml generated for netcdf dataset ...
     *       ... atomic datasets only ...
     *    </netcdf>
     * </root>
     * }
     * </pre>
     **/

    private void createMetadataUsingFragments(InvDataset ds) {
        try {
            log.info("Retrieving thredds/netcdf metadata...");

            //--- Create root element to collect dataset metadata to be passed to xsl transformation
            Element dsMetadata = new Element("root");

            //--- Add catalog uri (url) to allow relative urls to be resolved
            dsMetadata.addContent(new Element("catalogUri").setText(ds.getParentCatalog().getUriString()));

            //--- Add suggested uuid for dataset
            dsMetadata.addContent(new Element("uuid").setText(getUuid(ds)));

            //--- Add fullName of dataset
            dsMetadata.addContent(new Element("fullName").setText(ds.getFullName()));

            //--- Add dataset subset catalog information to metadata
            dsMetadata.addContent(getDatasetSubset(ds));

            //--- For atomic dataset's add ncml for dataset to metadata
            if (!ds.hasNestedDatasets()) {
                NetcdfDataset ncD = NetcdfDataset.openDataset("thredds:" + ds.getCatalogUrl());
                NcMLWriter ncmlWriter = new NcMLWriter();
                Element ncml = Xml.loadString(ncmlWriter.writeXML(ncD), false);
                dsMetadata.addContent(ncml);
            }

            if (log.isDebugEnabled())
                log.debug("Thredds metadata and ncml is:" + Xml.getString(dsMetadata));

            //--- Create fragments using provided stylesheet

            String schema = ds.hasNestedDatasets() ? params.outputSchemaOnCollectionsFragments
                    : params.outputSchemaOnAtomicsFragments;
            fragmentStylesheetDirectory = schemaMan.getSchemaDir(schema).resolve(Geonet.Path.TDS_STYLESHEETS);
            String stylesheet = ds.hasNestedDatasets() ? params.collectionFragmentStylesheet
                    : params.atomicFragmentStylesheet;

            Element fragments = Xml.transform(dsMetadata, fragmentStylesheetDirectory.resolve(stylesheet));
            if (log.isDebugEnabled())
                log.debug("Fragments generated for dataset:" + Xml.getString(fragments));

            //--- remove any previously harvested metadata/sub-templates
            deleteExistingMetadata(getUri(ds));

            //--- Create metadata/subtemplates from fragments
            FragmentHarvester fragmentHarvester = ds.hasNestedDatasets() ? collectionFragmentHarvester
                    : atomicFragmentHarvester;
            HarvestSummary fragmentResult = fragmentHarvester.harvest(fragments, getUri(ds));

            //--- Include fragment results in thredds results
            result.fragmentsReturned += fragmentResult.fragmentsReturned;
            result.fragmentsUnknownSchema += fragmentResult.fragmentsUnknownSchema;
            result.subtemplatesAdded += fragmentResult.fragmentsAdded;
            result.fragmentsMatched += fragmentResult.fragmentsMatched;

            if (ds.hasNestedDatasets()) {
                result.collectionDatasetRecords += fragmentResult.recordsBuilt;
            } else {
                result.atomicDatasetRecords += fragmentResult.recordsBuilt;
            }
        } catch (Exception e) {
            log.error("Thrown Exception " + e + " during dataset processing");
            e.printStackTrace();
        }
    }

    /**
     * Return a catalog having the specified dataset as the top dataset resolving inherited metadata
     * and required services
     *
     * @param ds the dataset to be processed
     */

    private Element getDatasetSubset(InvDataset ds) throws Exception {
        String datasetSubsetUrl = getSubsetUrl(ds);

        return Xml.loadFile(new URL(datasetSubsetUrl));
    }

    /**
     * Return url to a catalog having the specified dataset as the top dataset
     *
     * @param ds the dataset to be processed
     **/

    private String getSubsetUrl(InvDataset ds) {
        try {
            return ds.getParentCatalog().getUriString() + "?dataset="
                    + URLEncoder.encode(ds.getID(), Constants.ENCODING);
        } catch (UnsupportedEncodingException e) {
            log.error("Thrown Exception " + e + " during dataset processing");
            e.printStackTrace();
        }
        return null;
    }

    /**
     * Get uuid for dataset
     *
     * @param ds the dataset to be processed
     **/

    private String getUuid(InvDataset ds) {
        String uuid = ds.getUniqueID();

        if (uuid == null) {
            uuid = Sha1Encoder.encodeString(ds.getCatalogUrl()); // md5 full dataset url
        } else {
            uuid = StringUtil.allow(uuid, "_-.", '-');
        }

        return uuid;
    }

    /**
     * Process one dataset by extracting its metadata, writing to DIF and using xslt to transform to
     * the required ISO format.
     *
     * @param ds the dataset to be processed
     */

    private void createDIFMetadata(InvDataset ds) {
        try {

            boolean addCoordSys = false; // add coordinate systems if not DIF relaxed

            //--- TODO: Thredds has a metadata converter interface and some other
            //--- methods of handling metadata (including XML of different
            //--- namespaces) in the catalog - this is a place holder for getting
            //--- this info in future
            List<InvMetadata> mds = ds.getMetadata();
            log.info("Dataset has " + mds.size() + " metadata elements");
            for (InvMetadata md : mds) {
                log.info("Found metadata " + md.toString());
            }

            //--- check and see whether this dataset is DIF writeable
            DIFWriter difWriter = new DIFWriter();
            StringBuffer sBuff = new StringBuffer();
            Element dif = null;

            if (difWriter.isDatasetUseable(ds, sBuff)) {
                log.info("Yay! Dataset has DIF compatible metadata " + sBuff.toString());

                dif = difWriter.writeOneEntry(ds, sBuff);

            } else {
                log.info("Dataset does not have DIF compatible metadata so we will write a relaxed DIF entry\n"
                        + sBuff.toString());

                dif = difWriter.writeOneRelaxedEntry(ds, sBuff);
                addCoordSys = true;
            }

            //--- get the UUID assigned to the DIF record
            String uuid = dif.getChild("Entry_ID", difNS).getText();

            boolean isCollection = ds.hasNestedDatasets();
            log.info("Dataset is a collection dataset? " + isCollection);

            //--- now convert DIF entry into an ISO entry using the appropriate
            //--- difToIso converter (only schemas with a DIF converter are
            //--- supplied to the user for choice)
            Element md = null;
            if (isCollection) {
                Path difToIsoStyleSheet = schemaMan.getSchemaDir(params.outputSchemaOnCollectionsDIF)
                        .resolve(Geonet.Path.DIF_STYLESHEETS).resolve("DIFToISO.xsl");
                log.info("Transforming collection dataset to " + params.outputSchemaOnCollectionsDIF);
                md = Xml.transform(dif, difToIsoStyleSheet);
            } else {
                Path difToIsoStyleSheet = schemaMan.getSchemaDir(params.outputSchemaOnAtomicsDIF)
                        .resolve(Geonet.Path.DIF_STYLESHEETS).resolve("DIFToISO.xsl");
                log.info("Transforming atomic dataset to " + params.outputSchemaOnAtomicsDIF);
                md = Xml.transform(dif, difToIsoStyleSheet);
            }

            //--- if we don't have full set of DIF metadata then
            //--- if atomic dataset then check dataset for global attributes
            //--- and/or dump coordinate systems else
            //--- if collection then check for ThreddsMetadata.Variables and
            //--- create a netcdfInfo for addition to the ISO record
            if (addCoordSys) {
                boolean globalAttributes = false;
                if (!isCollection) { // open up atomic dataset for info
                    log.info("Opening dataset to get global attributes");
                    //--- if not a dataset collection then
                    //--- open and check global attributes for metadata conventions
                    try {
                        NetcdfDataset ncD = NetcdfDataset.openDataset("thredds:" + ds.getCatalogUrl());
                        Attribute mdCon = ncD.findGlobalAttributeIgnoreCase("metadata_conventions");
                        if (mdCon != null) {
                            List<Attribute> ga = ncD.getGlobalAttributes();
                            for (Attribute att : ga) {
                                if (log.isDebugEnabled())
                                    log.debug("Attribute found " + att.toString());
                                //--- TODO: Attach the attributes to the metadata node
                                //--- for conversion into the ISO record by an xslt
                            }
                        } else {
                            if (log.isDebugEnabled())
                                log.debug("No global attribute with metadata conventions found");
                        }
                        ncD.close();
                    } catch (Exception e) {
                        log.info("Exception raised in netcdfDataset ops: " + e);
                        e.printStackTrace();
                    }
                }

                //--- if no metadata conventions then find the coordinate systems
                //--- and add these to the appropriate place in whatever ISO or ISO
                //--- profile we are using - MCP: mcp:dataParameters & gmd:keywords,
                //--- ISO: gmd:keywords
                boolean foundNetcdfInfo = false;
                if (!globalAttributes && !isCollection) {
                    log.info(
                            "No global attributes describing metadata so opening dataset to get coordinate systems");
                    try {
                        NetcdfDatasetInfo ncDI = new NetcdfDatasetInfo("thredds:" + ds.getCatalogUrl());
                        log.info("Coordinate systems builder is " + ncDI.getConventionUsed());
                        if (!ncDI.getConventionUsed().equals("None")) {
                            Document doc = ncDI.makeDocument();
                            Element coords = doc.detachRootElement();
                            log.info("Coordinate systems of dataset are: \n" + Xml.getString(coords));
                            setCoordsStyleSheet(isCollection);
                            addKeywordsAndDataParams(coords, md);
                            foundNetcdfInfo = true;
                        } else {
                            if (log.isDebugEnabled())
                                log.debug("Coordinate system convention is not recognized");
                        }
                        ncDI.close();
                    } catch (Exception e) {
                        log.info("Exception raised in netcdfDatasetInfo ops: " + e);
                        e.printStackTrace();
                    }
                }

                //--- finally - check and see whether we can extract variables from the
                //--- ThreddsMetadata - we no longer care whether this is a collection
                //--- or atomic
                if (!globalAttributes && !foundNetcdfInfo) {
                    //--- get ThreddsMetadata.Variables and create a netcdfDatasetInfo
                    //--- document if possible
                    List<ThreddsMetadata.Variables> vsL = ds.getVariables();
                    if (vsL != null && vsL.size() > 0) {
                        for (ThreddsMetadata.Variables vs : vsL) {
                            String vHref = vs.getVocabHref();
                            URI vUri = vs.getVocabUri();
                            String vocab = vs.getVocabulary();
                            Element coords = new Element("netcdfDatasetInfo");
                            for (ThreddsMetadata.Variable v : vs.getVariableList()) {
                                Element varX = new Element("variable");
                                varX.setAttribute("name", v.getName());
                                varX.setAttribute("decl", v.getDescription());
                                varX.setAttribute("units", v.getUnits());
                                // - these three attributes are new but then there is no
                                // - xsd for this so we can add as we want!
                                varX.setAttribute("vocab", vocab);
                                varX.setAttribute("vocaburi", vUri.toString());
                                varX.setAttribute("vocabhref", vHref);
                                coords.addContent(varX);
                            }
                            log.info("Coordinate systems from ThreddsMetadata are: \n" + Xml.getString(coords));
                            setCoordsStyleSheet(isCollection);
                            addKeywordsAndDataParams(coords, md);
                        }
                    }
                }
            }

            //--- write metadata
            saveMetadata(md, uuid, getUri(ds));

            //--- update totals
            if (isCollection) {
                result.collectionDatasetRecords++;
            } else {
                result.atomicDatasetRecords++;
            }
        } catch (Exception e) {
            log.error("Thrown Exception " + e + " during dataset processing");
            e.printStackTrace();
        }
    }

    /**
     * Create the coordinate stylesheet names that will be used to add gmd:keywords and
     * mcp:DataParameters if the output schema requires.
     *
     * @param    isCollection true if we are working with a collection dataset
     */
    private void setCoordsStyleSheet(boolean isCollection) {

        Path schemaDir;
        if (!isCollection) {
            schemaDir = schemaMan.getSchemaDir(params.outputSchemaOnAtomicsDIF);
        } else {
            schemaDir = schemaMan.getSchemaDir(params.outputSchemaOnCollectionsDIF);
        }

        cdmCoordsToIsoKeywordsStyleSheet = schemaDir.resolve(Geonet.Path.DIF_STYLESHEETS)
                .resolve("CDMCoords-to-ISO19139Keywords.xsl");

        // -- FIXME: This is still schema dependent and needs to be improved
        // -- What we wait upon is finalization of the new coverage data parameters
        // -- metadata elements (inside MD_ContentInformation) in ISO19115/19139
        if (schemaDir.toString().contains("iso19139.mcp")) {
            cdmCoordsToIsoMcpDataParametersStyleSheet = schemaDir.resolve(Geonet.Path.DIF_STYLESHEETS)
                    .resolve("/CDMCoords-to-ISO19139MCPDataParameters.xsl");
        } else {
            cdmCoordsToIsoMcpDataParametersStyleSheet = null;
        }
    }

    /**
     * Process a netcdfinfo document - adding variables as keywords and mcp:DataParameters if the
     * output schema requires.
     *
     * @param    coords    the netcdfinfo document with coord systems embedded
     * @param    md        ISO metadata record to add keywords and data params to
     **/

    private void addKeywordsAndDataParams(Element coords, Element md) throws Exception {
        Element keywords = Xml.transform(coords, cdmCoordsToIsoKeywordsStyleSheet);
        addKeywords(md, keywords);
        if (cdmCoordsToIsoMcpDataParametersStyleSheet != null) {
            Element dataParameters = Xml.transform(coords, cdmCoordsToIsoMcpDataParametersStyleSheet);
            log.info("mcp:DataParameters are: \n" + Xml.getString(dataParameters));
            addDataParameters(md, dataParameters);
        }
    }

    /**
     * Process a service reference in a dataset - record details of the service and add the details
     * of a dataset to the list of datasets it serves - Note: compound services are expanded.
     *
     * @param serv the service to be processed
     * @param uuid uuid of the dataset that is delivered by this service
     * @param ds   dataset that is being delivered by this service
     **/

    private void processService(InvService serv, String uuid, InvDataset ds) {

        //--- get service, if compound service then get all nested services
        List<InvService> servs = new ArrayList<InvService>();
        if (serv.getServiceType() == ServiceType.COMPOUND) {
            servs.addAll(serv.getServices());
        } else {
            servs.add(serv);
        }

        //--- add dataset info to the appropriate ThreddsService
        for (InvService s : servs) {
            //Skip resolver services
            if (s.getServiceType().equals(ServiceType.RESOLVER))
                continue;

            String sUrl = "";

            if (!s.isRelativeBase()) {
                sUrl = s.getBase();
            } else {
                sUrl = hostUrl + s.getBase();
            }

            ThreddsService ts = services.get(sUrl);
            if (ts == null) {
                ts = new ThreddsService();
                ts.service = s;
                ts.version = getVersion(serv, ds);
                ts.ops = getServerOperations(serv, ds);

                services.put(sUrl, ts);
            }
            ts.datasets.put(uuid, ds.getName());
        }

    }

    /**
     * Find the version of the service that delivers a particular dataset Handles OPeNDAP and HTTP
     * only at present
     *
     * @param    serv    the service that delivers the dataset
     * @param    ds        the dataset being delivered by the service
     **/

    private String getVersion(InvService serv, InvDataset ds) {
        String result = "unknown";
        if (serv.getServiceType() == ServiceType.OPENDAP) {
            InvAccess access = ds.getAccess(ServiceType.OPENDAP);
            if (access != null) {
                String href = access.getStandardUrlName() + ".ver";
                String readResult = getResultFromHttpUrl(href);
                if (readResult != null)
                    result = readResult;
            }
        } else if (serv.getServiceType() == ServiceType.HTTPServer) {
            result = "HTTP/1.1";
        }
        return result;
    }

    /**
     * Get the server operations Applicable to OPeNDAP only at present
     *
     * @param    serv    the service that delivers the dataset
     * @param    ds        the dataset being delivered by the service
     **/

    private String getServerOperations(InvService serv, InvDataset ds) {
        String result = "none";
        if (serv.getServiceType() == ServiceType.OPENDAP) {
            InvAccess access = ds.getAccess(ServiceType.OPENDAP);
            if (access != null) {
                String href = access.getStandardUrlName() + ".help";
                String readResult = getResultFromHttpUrl(href);
                if (readResult != null)
                    result = readResult;
            }
        }
        return result;
    }

    /**
     * Get a String result from an HTTP URL
     *
     * @param href the URL to get the info from
     **/

    private String getResultFromHttpUrl(String href) {
        String result = null;
        try {
            //--- get the version from the OPeNDAP server
            URL url = new URL(href);
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            Object o = conn.getContent();
            if (log.isDebugEnabled())
                log.debug("Opened " + href + " and got class " + o.getClass().getName());
            StringBuffer version = new StringBuffer();
            String inputLine;
            BufferedReader dis = null;
            InputStreamReader isr = null;
            InputStream is = null;
            try {
                is = conn.getInputStream();
                isr = new InputStreamReader(is, Constants.ENCODING);
                dis = new BufferedReader(isr);
                while ((inputLine = dis.readLine()) != null) {
                    version.append(inputLine + "\n");
                }
                result = version.toString();
                if (log.isDebugEnabled())
                    log.debug("Read from URL:\n" + result);
            } finally {
                IOUtils.closeQuietly(is);
                IOUtils.closeQuietly(isr);
                IOUtils.closeQuietly(dis);
            }
        } catch (Exception e) {
            if (log.isDebugEnabled())
                log.debug("Caught exception " + e + " whilst attempting to query URL " + href);
            e.printStackTrace();
        }
        return result;
    }

    /**
     * Process all services that serve datasets in the thredds catalog
     *
     * @param    cata                the XML of the catalog
     * @param    serviceStyleSheet    name of the stylesheet to produce 19119
     **/

    private void processServices(Element cata, Path serviceStyleSheet) throws Exception {

        for (String sUrl : services.keySet()) {

            ThreddsService ts = services.get(sUrl);
            InvService serv = ts.service;

            if (log.isDebugEnabled())
                log.debug("Processing Thredds service: " + serv.toString());

            String sUuid = Sha1Encoder.encodeString(sUrl);

            //--- TODO: if service is WCS or WMS then pass the full service url to
            //--- OGCWxS service metadata creator

            //---   pass info to stylesheet which will create a 19119 record

            if (log.isDebugEnabled())
                log.debug("  - XSLT transformation using " + serviceStyleSheet);

            Map<String, Object> param = new HashMap<String, Object>();
            param.put("lang", params.lang);
            param.put("topic", params.topic);
            param.put("uuid", sUuid);
            param.put("url", sUrl);
            param.put("name", serv.getName());
            param.put("type", serv.getServiceType().toString().toUpperCase());
            param.put("version", ts.version);
            param.put("desc", serv.toString());
            param.put("props", serv.getProperties().toString());
            param.put("serverops", ts.ops);

            Element md = Xml.transform(cata, serviceStyleSheet, param);

            String schema = dataMan.autodetectSchema(md, null);
            if (schema == null) {
                log.warning("Skipping metadata with unknown schema.");
                result.unknownSchema++;
            } else {

                //--- Update ISO19119 for data/service links (ie. operatesOn element)
                md = addOperatesOnUuid(md, ts.datasets);

                //--- Now add to geonetwork
                saveMetadata(md, sUuid, sUrl);

                harvestUris.add(sUrl);

                result.serviceRecords++;
            }
        }
    }

    /**
     * Add an Element to a child list at index after specified element
     *
     * @param md         iso19139 metadata
     * @param theNewElem the new element to be added
     * @param name       the name of the element to search for
     * @param ns         the namespace of the element to search for
     **/

    boolean addAfter(Element md, Element theNewElem, String name, Namespace ns) throws Exception {
        Element chSet = md.getChild(name, ns);

        if (chSet != null) {
            int pos = md.indexOf(chSet);
            md.addContent(pos + 1, theNewElem);
            return true;
        }

        return false;
    }

    /**
     * Add keywords generated from CDM coordinate systems to identificationInfo
     *
     * <gmd:descriptiveKeywords> <gmd:MD_Keywords> <gmd:keyword> <gco:CharacterString>
     * </gco:CharacterString> </gmd:keyword> ... ... ... <gmd:type> <gmd:MD_KeywordType codelist...>
     * </gmd:type> <gmd:thesaurusName> <gmd:CI_Citation> .... </gmd:CI_Citation>
     * </gmd:thesaurusName> </gmd:MD_Keywords> </gmd:descriptiveKeywords>
     *
     * @param md       iso19139 metadata
     * @param keywords gmd:keywords block to be added to metadata
     **/

    private Element addKeywords(Element md, Element keywords) throws Exception {
        Element root = (Element) md.getChild("identificationInfo", gmd).getChildren().get(0);
        boolean ok = addAfter(root, keywords, "descriptiveKeywords", gmd);
        if (!ok) {
            throw new BadXmlResponseEx("The metadata did not have a descriptiveKeywords Element");
        }
        return md;
    }

    /**
     * Add mcp:dataParameters created from CDM coordinate systems to identificationInfo (mcp only)
     *
     * <mcp:dataParameters> <mcp:DP_DataParameters> ... ... ... </mcp:DP_DataParameters>
     * </mcp:dataParameters>
     *
     * @param md             iso19139 MCP metadata
     * @param dataParameters mcp:dataParameters block to be added to metadata
     **/

    private Element addDataParameters(Element md, Element dataParameters) throws Exception {
        Element root = (Element) md.getChild("identificationInfo", gmd).getChildren().get(0);
        root.addContent(dataParameters); // this is dependent on the mcp schema
        return md;
    }

    /**
     * Add OperatesOn elements on an ISO19119 metadata
     *
     * <srv:operatesOn> <gmd:MD_DataIdentification uuidref=""/> </srv:operatesOn>
     *
     * @param md       iso19119 metadata
     * @param datasets HashMap of datasets with uuids to be added
     **/

    private Element addOperatesOnUuid(Element md, Map<String, String> datasets) {
        Element root = md.getChild("identificationInfo", gmd).getChild("SV_ServiceIdentification", srv);
        //      Element co       = root.getChild("containsOperations", srv);

        if (root != null) {
            if (log.isDebugEnabled())
                log.debug("  - add operatesOn with uuid and other attributes");

            for (Map.Entry<String, String> entry : datasets.entrySet()) {
                String dsUuid = entry.getKey();

                Element op = new Element("operatesOn", srv);
                op.setAttribute("uuidref", dsUuid);
                op.setAttribute("href", context.getBaseUrl() + "/srv/en/metadata.show?uuid=" + dsUuid, xlink);
                op.setAttribute("title", entry.getValue(), xlink);
                root.addContent(op);
            }
        }

        return md;
    }

    ;

    /**
     * Determine whether dataset metadata should be harvested
     *
     * @param ds the dataset to be checked
     **/

    private boolean harvestMetadata(InvDataset ds) {
        if (isCollection(ds)) {
            return params.createCollectionDatasetMd && (params.ignoreHarvestOnCollections || ds.isHarvest());
        } else {
            return params.createAtomicDatasetMd && (params.ignoreHarvestOnAtomics || ds.isHarvest());
        }
    }

    /**
     * Determine whether dataset metadata should be harvested using fragments
     *
     * @param ds the dataset to be checked
     **/

    private boolean harvestMetadataUsingFragments(InvDataset ds) {
        if (isCollection(ds)) {
            return params.collectionMetadataGeneration.equals(ThreddsParams.FRAGMENTS);
        } else {
            return params.atomicMetadataGeneration.equals(ThreddsParams.FRAGMENTS);
        }
    }

    /**
     * Determine whether dataset is a collection i.e. has nested datasets
     *
     * @param ds the dataset to be checked
     **/

    private boolean isCollection(InvDataset ds) {
        return ds.hasNestedDatasets();
    }

    /**
     * Get fragment harvesting parameters for collection datasets
     *
     * @return fragment harvesting parameters for collection datasets
     **/

    private FragmentParams getCollectionFragmentParams() {
        FragmentParams collectionParams = new FragmentHarvester.FragmentParams();
        collectionParams.categories = params.getCategories();
        collectionParams.createSubtemplates = params.createCollectionSubtemplates;
        collectionParams.isoCategory = params.datasetCategory;
        collectionParams.privileges = params.getPrivileges();
        collectionParams.templateId = params.collectionMetadataTemplate;
        collectionParams.uuid = params.getUuid();
        collectionParams.outputSchema = params.outputSchemaOnCollectionsFragments;
        return collectionParams;
    }

    /**
     * Get fragment harvesting parameters for atomic datasets
     *
     * @return fragment harvesting parameters for atomic datasets
     **/

    private FragmentParams getAtomicFragmentParams() {
        FragmentParams atomicParams = new FragmentHarvester.FragmentParams();
        atomicParams.categories = params.getCategories();
        atomicParams.createSubtemplates = params.createAtomicSubtemplates;
        atomicParams.isoCategory = params.datasetCategory;
        atomicParams.privileges = params.getPrivileges();
        atomicParams.templateId = params.atomicMetadataTemplate;
        atomicParams.uuid = params.getUuid();
        atomicParams.outputSchema = params.outputSchemaOnAtomicsFragments;
        atomicParams.owner = params.getOwnerId();
        return atomicParams;
    }

    @Override
    public List<HarvestError> getErrors() {
        return errors;
    }

    private static class ThreddsService {
        public Map<String, String> datasets = new HashMap<String, String>();
        public InvService service;
        public String version;
        public String ops;
    }

}