org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.java Source code

Introduction

Here is the source code for org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter.java
Source

/*
 * #%L
 * Alfresco Repository
 * %%
 * Copyright (C) 2005 - 2016 Alfresco Software Limited
 * %%
 * This file is part of the Alfresco software. 
 * If the software was purchased under a paid Alfresco license, the terms of 
 * the paid license agreement will prevail.  Otherwise, the software is 
 * provided under the following open source license terms:
 * 
 * Alfresco is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Alfresco is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
 * #L%
 */
package org.alfresco.repo.content.metadata;

import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.dictionary.DictionaryService;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.MalformedNodeRefException;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.alfresco.service.namespace.InvalidQNameException;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xmlbeans.impl.xb.xsdschema.All;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.BeanNameAware;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.extensions.surf.util.ISO8601DateFormat;

/**
 * Support class for metadata extracters that support dynamic and config-driven
 * mapping between extracted values and model properties.  Extraction is broken
 * up into two phases:
 * <ul>
 *   <li>Extract ALL available metadata from the document.</li>
 *   <li>Translate the metadata into system properties.</li>
 * </ul>
 * <p>
 * Migrating an existing extracter to use this class is straightforward:
 * <ul>
 *   <li>
 *   Construct the extracter providing a default set of supported mimetypes to this
 *   implementation.  This can be overwritten with configurations.
 *   </li>
 *   <li>
 *   Implement the {@link #extract} method.  This now returns a raw map of extracted
 *   values keyed by document-specific property names.  The <b>trimPut</b> method has
 *   been replaced with an equivalent {@link #putRawValue(String, Serializable, Map)}.
 *   </li>
 *   <li>
 *   Provide the default mapping of the document-specific properties to system-specific
 *   properties as describe by the {@link #getDefaultMapping()} method.  The simplest
 *   is to provide the default mapping in a correlated <i>.properties</i> file.
 *   </li>
 *   <li>
 *   Document, in the class-level javadoc, all the available properties that are extracted
 *   along with their approximate meanings.  Add to this, the default mappings.
 *   </li>
 * </ul>
 * 
 * @see #getDefaultMapping()
 * @see #extractRaw(ContentReader)
 * @see #setMapping(Map)
 * 
 * @since 2.1
 * 
 * @author Jesper Steen Mller
 * @author Derek Hulley
 */
@AlfrescoPublicApi
abstract public class AbstractMappingMetadataExtracter
        implements MetadataExtracter, MetadataEmbedder, BeanNameAware, ApplicationContextAware {
    public static final String NAMESPACE_PROPERTY_PREFIX = "namespace.prefix.";
    private static final String ERR_TYPE_CONVERSION = "metadata.extraction.err.type_conversion";
    private static final String PROP_DEFAULT_TIMEOUT = "content.metadataExtracter.default.timeoutMs";
    public static final String PROPERTY_PREFIX_METADATA = "metadata.";
    public static final String PROPERTY_COMPONENT_EXTRACT = ".extract.";
    public static final String PROPERTY_COMPONENT_EMBED = ".embed.";

    protected static Log logger = LogFactory.getLog(AbstractMappingMetadataExtracter.class);

    private MetadataExtracterRegistry registry;
    private MimetypeService mimetypeService;
    private DictionaryService dictionaryService;
    private boolean initialized;

    private Set<String> supportedMimetypes;
    private Set<String> supportedEmbedMimetypes;
    private OverwritePolicy overwritePolicy;
    private boolean failOnTypeConversion;
    private Set<DateTimeFormatter> supportedDateFormatters;
    private Map<String, Set<QName>> mapping;
    private Map<QName, Set<String>> embedMapping;
    private boolean inheritDefaultMapping;
    private boolean inheritDefaultEmbedMapping;
    private boolean enableStringTagging;
    private String beanName;
    private ApplicationContext applicationContext;
    private Properties properties;
    private Map<String, MetadataExtracterLimits> mimetypeLimits;
    private ExecutorService executorService;
    protected MetadataExtracterConfig metadataExtracterConfig;

    /**
     * Default constructor.  If this is called, then {@link #isSupported(String)} should
     * be implemented.  This is useful when the list of supported mimetypes is not known
     * when the instance is constructed.  Alternatively, once the set becomes known, call
     * {@link #setSupportedMimetypes(Collection)}.
     *
     * @see #isSupported(String)
     * @see #setSupportedMimetypes(Collection)
     */
    protected AbstractMappingMetadataExtracter() {
        this(Collections.<String>emptySet());
    }

    /**
     * Constructor that can be used when the list of supported mimetypes is known up front.
     * 
     * @param supportedMimetypes    the set of mimetypes supported by default
     */
    protected AbstractMappingMetadataExtracter(Set<String> supportedMimetypes) {
        this.supportedMimetypes = supportedMimetypes;
        // Set defaults
        overwritePolicy = OverwritePolicy.PRAGMATIC;
        failOnTypeConversion = true;
        mapping = null; // The default will be fetched
        embedMapping = null;
        inheritDefaultMapping = false; // Any overrides are complete 
        inheritDefaultEmbedMapping = false;
        initialized = false;
    }

    /**
     * Constructor that can be used when the list of supported extract and embed mimetypes is known up front.
     *
     * @param supportedMimetypes    the set of mimetypes supported for extraction by default
     * @param supportedEmbedMimetypes    the set of mimetypes supported for embedding by default
     */
    protected AbstractMappingMetadataExtracter(Set<String> supportedMimetypes,
            Set<String> supportedEmbedMimetypes) {
        this(supportedMimetypes);
        this.supportedEmbedMimetypes = supportedEmbedMimetypes;
    }

    /**
     * Set the registry to register with.  If this is not set, then the default
     * initialization will not auto-register the extracter for general use.  It
     * can still be used directly.
     * 
     * @param registry a metadata extracter registry
     */
    public void setRegistry(MetadataExtracterRegistry registry) {
        this.registry = registry;
    }

    /**
     * @param mimetypeService       the mimetype service.  Set this if required.
     */
    public void setMimetypeService(MimetypeService mimetypeService) {
        this.mimetypeService = mimetypeService;
    }

    /**
     * @return Returns the mimetype helper
     */
    protected MimetypeService getMimetypeService() {
        return mimetypeService;
    }

    /**
     * @param dictionaryService     the dictionary service to determine which data conversions are necessary
     */
    public void setDictionaryService(DictionaryService dictionaryService) {
        this.dictionaryService = dictionaryService;
    }

    /**
     * Set the mimetypes that are supported by the extracter.
     * 
     */
    public void setSupportedMimetypes(Collection<String> supportedMimetypes) {
        this.supportedMimetypes.clear();
        this.supportedMimetypes.addAll(supportedMimetypes);
    }

    /**
     * Set the mimetypes that are supported for embedding.
     *
     */
    public void setSupportedEmbedMimetypes(Collection<String> supportedEmbedMimetypes) {
        this.supportedEmbedMimetypes.clear();
        this.supportedEmbedMimetypes.addAll(supportedEmbedMimetypes);
    }

    /**
     * {@inheritDoc}
     * 
     * @see #setSupportedMimetypes(Collection)
     */
    public boolean isSupported(String sourceMimetype) {
        return supportedMimetypes.contains(sourceMimetype) && isEnabled(sourceMimetype);
    }

    /**
     * {@inheritDoc}
     *
     * @see #setSupportedEmbedMimetypes(Collection)
     */
    public boolean isEmbeddingSupported(String sourceMimetype) {
        if (supportedEmbedMimetypes == null) {
            return false;
        }
        return supportedEmbedMimetypes.contains(sourceMimetype);
    }

    private boolean isEnabled(String mimetype) {
        return properties == null || mimetypeService == null || (getBooleanProperty(beanName + ".enabled", true)
                && getBooleanProperty(beanName + '.' + mimetypeService.getExtension(mimetype) + ".enabled", true));
    }

    private boolean getBooleanProperty(String name, boolean defaultValue) {
        boolean value = defaultValue;
        if (properties != null) {
            String property = properties.getProperty(name);
            if (property != null) {
                value = property.trim().equalsIgnoreCase("true");
            }
        }
        return value;
    }

    /**
     * TODO - This doesn't appear to be used, so should be removed / deprecated / replaced
     * @return      Returns <code>1.0</code> if the mimetype is supported, otherwise <tt>0.0</tt>
     * 
     * @see #isSupported(String)
     */
    public double getReliability(String mimetype) {
        return isSupported(mimetype) ? 1.0D : 0.0D;
    }

    /**
     * Set the policy to use when existing values are encountered.  Depending on how the extractor
     * is called, this may not be relevant, i.e an empty map of existing properties may be passed
     * in by the client code, which may follow its own overwrite strategy.
     * 
     * @param overwritePolicy       the policy to apply when there are existing system properties
     */
    public void setOverwritePolicy(OverwritePolicy overwritePolicy) {
        this.overwritePolicy = overwritePolicy;
    }

    /**
     * Set the policy to use when existing values are encountered.  Depending on how the extractor
     * is called, this may not be relevant, i.e an empty map of existing properties may be passed
     * in by the client code, which may follow its own overwrite strategy.
     * 
     * @param overwritePolicyStr    the policy to apply when there are existing system properties
     */
    public void setOverwritePolicy(String overwritePolicyStr) {
        this.overwritePolicy = OverwritePolicy.valueOf(overwritePolicyStr);
    }

    /**
     * Set whether the extractor should discard metadata that fails to convert to the target type
     * defined in the data dictionary model.  This is <tt>true</tt> by default i.e. if the data
     * extracted is not compatible with the target model then the extraction will fail.  If this is
     * <tt>false</tt> then any extracted data that fails to convert will be discarded.
     * 
     * @param failOnTypeConversion      <tt>false</tt> to discard properties that can't get converted
     *                                  to the dictionary-defined type, or <tt>true</tt> (default)
     *                                  to fail the extraction if the type doesn't convert
     */
    public void setFailOnTypeConversion(boolean failOnTypeConversion) {
        this.failOnTypeConversion = failOnTypeConversion;
    }

    /**
     * Set the date formats, over and above the {@link ISO8601DateFormat ISO8601 format}, that will
     * be supported for string to date conversions.  The supported syntax is described by the
     * <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/text/SimpleDateFormat.html">SimpleDateFormat Javadocs</a>.
     * 
     * @param supportedDateFormats      a list of supported date formats.
     */
    public void setSupportedDateFormats(List<String> supportedDateFormats) {
        supportedDateFormatters = new HashSet<DateTimeFormatter>();

        // Note: The previous version attempted to create a single DateTimeFormatter from
        // multiple DateTimeFormatters, but that does not work as the time zone part is lost.
        // Now have a set of them.
        for (String dateFormatStr : supportedDateFormats) {
            try {
                supportedDateFormatters.add(DateTimeFormat.forPattern(dateFormatStr));
            } catch (Throwable e) {
                // No good
                throw new AlfrescoRuntimeException("Unable to set supported date format: " + dateFormatStr, e);
            }
        }
    }

    /**
     * Set if the property mappings augment or override the mapping generically provided by the
     * extracter implementation.  The default is <tt>false</tt>, i.e. any mapping set completely
     * replaces the {@link #getDefaultMapping() default mappings}.
     * <p>
     * Note that even when set to <tt>true</tt> an individual property mapping entry replaces the
     * entry provided by the extracter implementation.
     * 
     * @param inheritDefaultMapping <tt>true</tt> to add the configured mapping
     *                              to the list of default mappings.
     * 
     * @see #getDefaultMapping()
     * @see #setMapping(Map)
     * @see #setMappingProperties(Properties)
     */
    public void setInheritDefaultMapping(boolean inheritDefaultMapping) {
        this.inheritDefaultMapping = inheritDefaultMapping;
    }

    @Override
    public void setBeanName(String beanName) {
        this.beanName = beanName;
    }

    public String getBeanName() {
        return beanName;
    }

    public void setApplicationContext(ApplicationContext applicationContext) {
        this.applicationContext = applicationContext;
    }

    /**
     * The Alfresco global properties.
     */
    public void setProperties(Properties properties) {
        this.properties = properties;
    }

    /**
     * The metadata extracter config.
     */
    public void setMetadataExtracterConfig(MetadataExtracterConfig metadataExtracterConfig) {
        this.metadataExtracterConfig = metadataExtracterConfig;
    }

    /**
     * Whether or not to enable the pass through of simple strings to cm:taggable tags
     * 
     * @param enableStringTagging       <tt>true</tt> find or create tags for each string 
     *                                  mapped to cm:taggable.  <tt>false</tt> (default) 
     *                                  ignore mapping strings to tags.
     */
    public void setEnableStringTagging(boolean enableStringTagging) {
        this.enableStringTagging = enableStringTagging;
    }

    /**
     * Set if the embed property mappings augment or override the mapping generically provided by the
     * extracter implementation.  The default is <tt>false</tt>, i.e. any mapping set completely
     * replaces the {@link #getDefaultEmbedMapping() default mappings}.
     * <p>
     * Note that even when set to <tt>true</tt> an individual property mapping entry replaces the
     * entry provided by the extracter implementation.
     *
     * @param inheritDefaultEmbedMapping <tt>true</tt> to add the configured embed mapping
     *                              to the list of default embed mappings.
     *
     * @see #getDefaultEmbedMapping()
     * @see #setEmbedMapping(Map)
     * @see #setEmbedMappingProperties(Properties)
     */
    public void setInheritDefaultEmbedMapping(boolean inheritDefaultEmbedMapping) {
        this.inheritDefaultEmbedMapping = inheritDefaultEmbedMapping;
    }

    /**
     * Sets the map of source mimetypes to metadata extracter limits.
     * 
     */
    public void setMimetypeLimits(Map<String, MetadataExtracterLimits> mimetypeLimits) {
        this.mimetypeLimits = mimetypeLimits;
    }

    /**
     * Gets the <code>ExecutorService</code> to be used for timeout-aware
     * extraction.
     * <p>
     * If no <code>ExecutorService</code> has been defined a default
     * of <code>Executors.newCachedThreadPool()</code> is used during
     * {@link AbstractMappingMetadataExtracter#init()}.
     * 
     * @return the defined or default <code>ExecutorService</code>
     */
    protected ExecutorService getExecutorService() {
        return executorService;
    }

    /**
     * Sets the <code>ExecutorService</code> to be used for timeout-aware
     * extraction.
     * 
     * @param executorService the <code>ExecutorService</code> for timeouts
     */
    public void setExecutorService(ExecutorService executorService) {
        this.executorService = executorService;
    }

    /**
     * Set the mapping from document metadata to system metadata.  It is possible to direct
     * an extracted document property to several system properties.  The conversion between
     * the document property types and the system property types will be done by the
     * {@link org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter default converter}.
     * 
     * @param mapping       a mapping from document metadata to system metadata
     */
    public void setMapping(Map<String, Set<QName>> mapping) {
        this.mapping = mapping;
    }

    /**
     * Set the embed mapping from document metadata to system metadata.  It is possible to direct
     * an model properties to several content file metadata keys.  The conversion between
     * the model property types and the content file metadata keys types will be done by the
     * {@link org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter default converter}.
     *
     * @param embedMapping       an embed mapping from model properties to content file metadata keys
     */
    public void setEmbedMapping(Map<QName, Set<String>> embedMapping) {
        this.embedMapping = embedMapping;
    }

    /**
     * Set the properties that contain the mapping from document metadata to system metadata.
     * This is an alternative to the {@link #setMapping(Map)} method.  Any mappings already
     * present will be cleared out.
     * 
     * The property mapping is of the form:
     * <pre>
     * # Namespaces prefixes
     * namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
     * namespace.prefix.my=http://www....com/alfresco/1.0
     * 
     * # Mapping
     * editor=cm:author, my:editor
     * title=cm:title
     * user1=cm:summary
     * user2=cm:description
     * </pre>
     * The mapping can therefore be from a single document property onto several system properties.
     * 
     * @param mappingProperties     the properties that map document properties to system properties
     */
    public void setMappingProperties(Properties mappingProperties) {
        mapping = readMappingProperties(mappingProperties);
    }

    /**
     * Set the properties that contain the embed mapping from model properties to content file metadata.
     * This is an alternative to the {@link #setEmbedMapping(Map)} method.  Any mappings already
     * present will be cleared out.
     *
     * The property mapping is of the form:
     * <pre>
     * # Namespaces prefixes
     * namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
     * namespace.prefix.my=http://www....com/alfresco/1.0
     *
     * # Mapping
     * cm\:author=editor
     * cm\:title=title
     * cm\:summary=user1
     * cm\:description=description,user2
     * </pre>
     * The embed mapping can therefore be from a model property onto several content file metadata properties.
     *
     * @param embedMappingProperties     the properties that map model properties to content file metadata properties
     */
    public void setEmbedMappingProperties(Properties embedMappingProperties) {
        embedMapping = readEmbedMappingProperties(embedMappingProperties);
    }

    /**
     * Helper method for derived classes to obtain the mappings that will be applied to raw
     * values.  This should be called after initialization in order to guarantee the complete
     * map is given.
     * <p>
     * Normally, the list of properties that can be extracted from a document is fixed and
     * well-known - in that case, just extract everything.  But Some implementations may have
     * an extra, indeterminate set of values available for extraction.  If the extraction of
     * these runtime parameters is expensive, then the keys provided by the return value can
     * be used to extract values from the documents.  The metadata extraction becomes fully
     * configuration-driven, i.e. declaring further mappings will result in more values being
     * extracted from the documents.
     * <p>
     * Most extractors will not be using this method.  For an example of its use, see the
     * {@linkplain OpenDocumentMetadataExtracter OpenDocument extractor}, which uses the mapping
     * to select specific user properties from a document.
     */
    protected final Map<String, Set<QName>> getMapping() {
        if (!initialized) {
            throw new UnsupportedOperationException("The complete mapping is only available after initialization.");
        }
        return Collections.unmodifiableMap(mapping);
    }

    /**
     * Helper method for derived classes to obtain the embed mappings.
     * This should be called after initialization in order to guarantee the complete
     * map is given.
     * <p>
     * Normally, the list of properties that can be embedded in a document is fixed and
     * well-known..  But some implementations may have
     * an extra, indeterminate set of values available for embedding.  If the embedding of
     * these runtime parameters is expensive, then the keys provided by the return value can
     * be used to embed values in the documents.  The metadata embedding becomes fully
     * configuration-driven, i.e. declaring further mappings will result in more values being
     * embedded in the documents.
     */
    protected final Map<QName, Set<String>> getEmbedMapping() {
        if (!initialized) {
            throw new UnsupportedOperationException(
                    "The complete embed mapping is only available after initialization.");
        }
        return Collections.unmodifiableMap(embedMapping);
    }

    /**
     * A utility method to read mapping properties from a resource file and convert to the map form.
     * 
     * @param propertiesUrl     A standard Properties file URL location
     * 
     * @see #setMappingProperties(Properties)
     */
    protected Map<String, Set<QName>> readMappingProperties(String propertiesUrl) {
        InputStream is = null;
        try {
            is = getClass().getClassLoader().getResourceAsStream(propertiesUrl);
            if (is == null) {
                throw new AlfrescoRuntimeException("Metadata Extracter mapping properties not found: \n"
                        + "   Extracter:  " + this + "\n" + "   Bundle:     " + propertiesUrl);
            }
            Properties props = new Properties();
            props.load(is);
            // Process it
            Map<String, Set<QName>> map = readMappingProperties(props);
            // Done
            if (logger.isDebugEnabled()) {
                logger.debug("Loaded mapping properties from resource: " + propertiesUrl);
            }
            return map;
        } catch (Throwable e) {
            throw new AlfrescoRuntimeException(
                    "Unable to load properties file to read extracter mapping properties: \n" + "   Extracter:  "
                            + this + "\n" + "   Bundle:     " + propertiesUrl,
                    e);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (Throwable e) {
                }
            }
        }
    }

    /**
     * A utility method to convert global properties to the Map form for the given
     * propertyComponent.
     * <p>
     * Mappings can be specified using the same method defined for
     * normal mapping properties files but with a prefix of
     * <code>metadata.extracter</code>, the extracter bean name, and the propertyComponent.
     * For example:
     * 
     *     metadata.extracter.TikaAuto.extract.namespace.prefix.my=http://DummyMappingMetadataExtracter
     *     metadata.extracter.TikaAuto.extract.namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
     *     metadata.extracter.TikaAuto.extract.dc\:description=cm:description, my:customDescription
     * 
     */
    private Map<Object, Object> getRelevantGlobalProperties(String propertyComponent) {
        if (applicationContext == null) {
            logger.info("ApplicationContext not set");
            return null;
        }
        Properties globalProperties = (Properties) applicationContext.getBean("global-properties");
        if (globalProperties == null) {
            logger.info("Could not get global-properties");
            return null;
        }
        Map<Object, Object> relevantGlobalPropertiesMap = new HashMap<Object, Object>();
        String propertyPrefix = PROPERTY_PREFIX_METADATA + beanName + propertyComponent;
        for (Entry<Object, Object> globalEntry : globalProperties.entrySet()) {
            if (((String) globalEntry.getKey()).startsWith(propertyPrefix)) {
                relevantGlobalPropertiesMap.put(((String) globalEntry.getKey()).replace(propertyPrefix, ""),
                        globalEntry.getValue());
            }
        }
        return relevantGlobalPropertiesMap;
    }

    /**
     * A utility method to convert global properties to the Map form for the given
     * propertyComponent.
     * <p>
     * Mappings can be specified using the same method defined for
     * normal mapping properties files but with a prefix of
     * <code>metadata.extracter</code>, the extracter bean name, and the extract component.
     * For example:
     * 
     *     metadata.extracter.TikaAuto.extract.namespace.prefix.my=http://DummyMappingMetadataExtracter
     *     metadata.extracter.TikaAuto.extract.namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
     *     metadata.extracter.TikaAuto.extract.dc\:description=cm:description, my:customDescription
     * 
     */
    protected Map<String, Set<QName>> readGlobalExtractMappingProperties() {
        Map<Object, Object> relevantGlobalPropertiesMap = getRelevantGlobalProperties(PROPERTY_COMPONENT_EXTRACT);
        if (relevantGlobalPropertiesMap == null) {
            return null;
        }
        return readMappingProperties(relevantGlobalPropertiesMap.entrySet());
    }

    /**
     * A utility method to convert mapping properties to the Map form.
     * 
     * @see #setMappingProperties(Properties)
     */
    protected Map<String, Set<QName>> readMappingProperties(Properties mappingProperties) {
        return readMappingProperties(mappingProperties.entrySet());
    }

    /**
     * A utility method to convert mapping properties entries to the Map form.
     * 
     * @see #setMappingProperties(Properties)
     */
    private Map<String, Set<QName>> readMappingProperties(Set<Entry<Object, Object>> mappingPropertiesEntries) {
        Map<String, String> namespacesByPrefix = new HashMap<String, String>(5);
        // Get the namespaces
        for (Map.Entry<Object, Object> entry : mappingPropertiesEntries) {
            String propertyName = (String) entry.getKey();
            if (propertyName.startsWith(NAMESPACE_PROPERTY_PREFIX)) {
                String prefix = propertyName.substring(17);
                String namespace = (String) entry.getValue();
                namespacesByPrefix.put(prefix, namespace);
            }
        }
        // Create the mapping
        Map<String, Set<QName>> convertedMapping = new HashMap<String, Set<QName>>(17);
        for (Map.Entry<Object, Object> entry : mappingPropertiesEntries) {
            String documentProperty = (String) entry.getKey();
            String qnamesStr = (String) entry.getValue();
            if (documentProperty.startsWith(NAMESPACE_PROPERTY_PREFIX)) {
                // Ignore these now
                continue;
            }
            // Create the entry
            Set<QName> qnames = new HashSet<QName>(3);
            convertedMapping.put(documentProperty, qnames);
            // The to value can be a list of QNames
            StringTokenizer tokenizer = new StringTokenizer(qnamesStr, ",");
            while (tokenizer.hasMoreTokens()) {
                String qnameStr = tokenizer.nextToken().trim();
                // Check if we need to resolve a namespace reference
                int index = qnameStr.indexOf(QName.NAMESPACE_PREFIX);
                if (index > -1 && qnameStr.charAt(0) != QName.NAMESPACE_BEGIN) {
                    String prefix = qnameStr.substring(0, index);
                    String suffix = qnameStr.substring(index + 1);
                    // It is prefixed
                    String uri = namespacesByPrefix.get(prefix);
                    if (uri == null) {
                        throw new AlfrescoRuntimeException("No prefix mapping for extracter property mapping: \n"
                                + "   Extracter: " + this + "\n" + "   Mapping: " + entry);
                    }
                    qnameStr = QName.NAMESPACE_BEGIN + uri + QName.NAMESPACE_END + suffix;
                }
                try {
                    QName qname = QName.createQName(qnameStr);
                    // Add it to the mapping
                    qnames.add(qname);
                } catch (InvalidQNameException e) {
                    throw new AlfrescoRuntimeException("Can't create metadata extracter property mapping: \n"
                            + "   Extracter: " + this + "\n" + "   Mapping: " + entry);
                }
            }
            if (logger.isTraceEnabled()) {
                logger.trace("Added mapping from " + documentProperty + " to " + qnames);
            }
        }
        // Done
        return convertedMapping;
    }

    /**
     * A utility method to read embed mapping properties from a resource file and convert to the map form.
     *
     * @param propertiesUrl     A standard Properties file URL location
     *
     * @see #setEmbedMappingProperties(Properties)
     */
    protected Map<QName, Set<String>> readEmbedMappingProperties(String propertiesUrl) {
        InputStream is = null;
        try {
            is = getClass().getClassLoader().getResourceAsStream(propertiesUrl);
            if (is == null) {
                return null;
            }
            Properties props = new Properties();
            props.load(is);
            // Process it
            Map<QName, Set<String>> map = readEmbedMappingProperties(props);
            // Done
            if (logger.isDebugEnabled()) {
                logger.debug("Loaded embed mapping properties from resource: " + propertiesUrl);
            }
            return map;
        } catch (Throwable e) {
            throw new AlfrescoRuntimeException(
                    "Unable to load properties file to read extracter embed mapping properties: \n"
                            + "   Extracter:  " + this + "\n" + "   Bundle:     " + propertiesUrl,
                    e);
        } finally {
            if (is != null) {
                try {
                    is.close();
                } catch (Throwable e) {
                }
            }
        }
    }

    /**
     * A utility method to convert global mapping properties to the Map form.
     * <p>
     * Different from readGlobalExtractMappingProperties in that keys are the Alfresco QNames
     * and values are file metadata properties.
     * <p>
     * Mappings can be specified using the same method defined for
     * normal embed mapping properties files but with a prefix of
     * <code>metadata.extracter</code>, the extracter bean name, and the embed component.
     * For example:
     * 
     *     metadata.extracter.TikaAuto.embed.namespace.prefix.cm=http://www.alfresco.org/model/content/1.0
     *     metadata.extracter.TikaAuto.embed.cm\:description=description
     *
     * @see #setMappingProperties(Properties)
     */
    protected Map<QName, Set<String>> readGlobalEmbedMappingProperties() {
        Map<Object, Object> relevantGlobalPropertiesMap = getRelevantGlobalProperties(PROPERTY_COMPONENT_EMBED);
        if (relevantGlobalPropertiesMap == null) {
            return null;
        }
        return readEmbedMappingProperties(relevantGlobalPropertiesMap.entrySet());
    }

    /**
     * A utility method to convert mapping properties to the Map form.
     * <p>
     * Different from readMappingProperties in that keys are the Alfresco QNames
     * and values are file metadata properties.
     *
     * @see #setMappingProperties(Properties)
     */
    protected Map<QName, Set<String>> readEmbedMappingProperties(Properties mappingProperties) {
        return readEmbedMappingProperties(mappingProperties.entrySet());
    }

    /**
     * A utility method to convert mapping properties entries to the Map form.
     * <p>
     * Different from readMappingProperties in that keys are the Alfresco QNames
     * and values are file metadata properties.
     *
     * @see #setMappingProperties(Properties)
     */
    private Map<QName, Set<String>> readEmbedMappingProperties(
            Set<Entry<Object, Object>> mappingPropertiesEntries) {
        Map<String, String> namespacesByPrefix = new HashMap<String, String>(5);
        // Get the namespaces
        for (Map.Entry<Object, Object> entry : mappingPropertiesEntries) {
            String propertyName = (String) entry.getKey();
            if (propertyName.startsWith(NAMESPACE_PROPERTY_PREFIX)) {
                String prefix = propertyName.substring(17);
                String namespace = (String) entry.getValue();
                namespacesByPrefix.put(prefix, namespace);
            }
        }
        // Create the mapping
        Map<QName, Set<String>> convertedMapping = new HashMap<QName, Set<String>>(17);
        for (Map.Entry<Object, Object> entry : mappingPropertiesEntries) {
            String modelProperty = (String) entry.getKey();
            String metadataKeysString = (String) entry.getValue();
            if (modelProperty.startsWith(NAMESPACE_PROPERTY_PREFIX)) {
                // Ignore these now
                continue;
            }

            int index = modelProperty.indexOf(QName.NAMESPACE_PREFIX);
            if (index > -1 && modelProperty.charAt(0) != QName.NAMESPACE_BEGIN) {
                String prefix = modelProperty.substring(0, index);
                String suffix = modelProperty.substring(index + 1);
                // It is prefixed
                String uri = namespacesByPrefix.get(prefix);
                if (uri == null) {
                    throw new AlfrescoRuntimeException("No prefix mapping for embed property mapping: \n"
                            + "   Extracter: " + this + "\n" + "   Mapping: " + entry);
                }
                modelProperty = QName.NAMESPACE_BEGIN + uri + QName.NAMESPACE_END + suffix;
            }
            try {
                QName qname = QName.createQName(modelProperty);
                String[] metadataKeysArray = metadataKeysString.split(",");
                Set<String> metadataKeys = new HashSet<String>(metadataKeysArray.length);
                for (String metadataKey : metadataKeysArray) {
                    metadataKeys.add(metadataKey.trim());
                }
                // Create the entry
                convertedMapping.put(qname, metadataKeys);
            } catch (InvalidQNameException e) {
                throw new AlfrescoRuntimeException("Can't create metadata embedding property mapping: \n"
                        + "   Extracter: " + this + "\n" + "   Mapping: " + entry);
            }
            if (logger.isTraceEnabled()) {
                logger.trace("Added mapping from " + modelProperty + " to " + metadataKeysString);
            }
        }
        // Done
        return convertedMapping;
    }

    /**
     * Registers this instance of the extracter with the registry.  This will call the
     * {@link #init()} method and then register if the registry is available.
     * 
     * @see #setRegistry(MetadataExtracterRegistry)
     * @see #init()
     */
    public final void register() {
        init();

        // Register the extracter, if necessary
        if (registry != null) {
            registry.register(this);
        }
    }

    /**
     * Provides a hook point for implementations to perform initialization.  The base
     * implementation must be invoked or the extracter will fail during extraction.
     * The {@link #getDefaultMapping() default mappings} will be requested during
     * initialization.
     */
    protected void init() {
        Map<String, Set<QName>> defaultMapping = getDefaultMapping();
        if (defaultMapping == null) {
            throw new AlfrescoRuntimeException("The metadata extracter must provide a default mapping: " + this);
        }

        // Was a mapping explicitly provided
        if (mapping == null) {
            // No mapping, so use the default
            mapping = defaultMapping;
        } else if (inheritDefaultMapping) {
            // Merge the default mapping into the configured mapping
            for (String documentKey : defaultMapping.keySet()) {
                Set<QName> systemQNames = mapping.get(documentKey);
                if (systemQNames == null) {
                    systemQNames = new HashSet<QName>(3);
                    mapping.put(documentKey, systemQNames);
                    Set<QName> defaultQNames = defaultMapping.get(documentKey);
                    systemQNames.addAll(defaultQNames);
                }
            }
        }

        // Override with any extract mappings specified in global properties
        Map<String, Set<QName>> globalExtractMapping = readGlobalExtractMappingProperties();
        if (globalExtractMapping != null && globalExtractMapping.size() > 0) {
            for (String documentKey : globalExtractMapping.keySet()) {
                mapping.put(documentKey, globalExtractMapping.get(documentKey));
            }
        }

        // The configured mappings are empty, but there were default mappings
        if (mapping.size() == 0 && defaultMapping.size() > 0) {
            logger.warn("There are no property mappings for the metadata extracter.\n"
                    + "  Nothing will be extracted by: " + this);
        }

        if (executorService == null) {
            executorService = Executors.newCachedThreadPool();
        }

        if (mimetypeLimits == null) {
            if (properties != null) {
                String property = properties.getProperty(PROP_DEFAULT_TIMEOUT);
                if (property != null) {
                    Long value = Long.parseLong(property);
                    if (value != null) {
                        MetadataExtracterLimits limits = new MetadataExtracterLimits();
                        limits.setTimeoutMs(value);
                        mimetypeLimits = new HashMap<String, MetadataExtracterLimits>(1);
                        mimetypeLimits.put("*", limits);
                    }
                }
            }
        }

        Map<QName, Set<String>> defaultEmbedMapping = getDefaultEmbedMapping();

        // Was a mapping explicitly provided
        if (embedMapping == null) {
            // No mapping, so use the default
            embedMapping = defaultEmbedMapping;
        }

        else if (inheritDefaultEmbedMapping) {
            // Merge the default mapping into the configured mapping
            for (QName modelProperty : defaultEmbedMapping.keySet()) {
                Set<String> metadataKeys = embedMapping.get(modelProperty);
                if (metadataKeys == null) {
                    metadataKeys = new HashSet<String>(3);
                    embedMapping.put(modelProperty, metadataKeys);
                    Set<String> defaultMetadataKeys = defaultEmbedMapping.get(modelProperty);
                    metadataKeys.addAll(defaultMetadataKeys);
                }
            }
        }

        // Override with any embed mappings specified in global properties
        Map<QName, Set<String>> globalEmbedMapping = readGlobalEmbedMappingProperties();
        if (globalEmbedMapping != null && globalEmbedMapping.size() > 0) {
            for (QName modelProperty : globalEmbedMapping.keySet()) {
                embedMapping.put(modelProperty, globalEmbedMapping.get(modelProperty));
            }
        }
        // Done
        initialized = true;
    }

    /** {@inheritDoc} */
    public long getExtractionTime() {
        return 1000L;
    }

    /**
     * Checks if the mimetype is supported.
     * 
     * @param reader the reader to check
     * @throws AlfrescoRuntimeException if the mimetype is not supported
     */
    protected void checkIsSupported(ContentReader reader) {
        String mimetype = reader.getMimetype();
        if (!isSupported(mimetype)) {
            throw new AlfrescoRuntimeException(
                    "Metadata extracter does not support mimetype: " + mimetype + "\n" + "   reader: " + reader
                            + "\n" + "   supported: " + supportedMimetypes + "\n" + "   extracter: " + this);
        }
    }

    /**
     * Checks if embedding for the mimetype is supported.
     *
     * @param writer the writer to check
     * @throws AlfrescoRuntimeException if embedding for the mimetype is not supported
     */
    protected void checkIsEmbedSupported(ContentWriter writer) {
        String mimetype = writer.getMimetype();
        if (!isEmbeddingSupported(mimetype)) {
            throw new AlfrescoRuntimeException(
                    "Metadata extracter does not support embedding mimetype: \n" + "   writer: " + writer + "\n"
                            + "   supported: " + supportedEmbedMimetypes + "\n" + "   extracter: " + this);
        }
    }

    /**
     * {@inheritDoc}
     */
    public final Map<QName, Serializable> extract(ContentReader reader, Map<QName, Serializable> destination) {
        return extract(reader, this.overwritePolicy, destination, this.mapping);
    }

    /**
     * {@inheritDoc}
     */
    public final Map<QName, Serializable> extract(ContentReader reader, OverwritePolicy overwritePolicy,
            Map<QName, Serializable> destination) {
        return extract(reader, overwritePolicy, destination, this.mapping);
    }

    /**
     * {@inheritDoc}
     */
    public Map<QName, Serializable> extract(ContentReader reader, OverwritePolicy overwritePolicy,
            Map<QName, Serializable> destination, Map<String, Set<QName>> mapping) {
        // Done
        if (logger.isDebugEnabled()) {
            logger.debug(
                    "Starting metadata extraction: \n" + "   reader: " + reader + "\n" + "   extracter: " + this);
        }

        if (!initialized) {
            throw new AlfrescoRuntimeException(
                    "Metadata extracter not initialized.\n" + "  Call the 'register' method on: " + this + "\n"
                            + "  Implementations of the 'init' method must call the base implementation.");
        }
        // check the reliability
        checkIsSupported(reader);

        Map<QName, Serializable> changedProperties = null;
        try {
            Map<String, Serializable> rawMetadata = null;
            // Check that the content has some meat
            if (reader.getSize() > 0 && reader.exists()) {
                rawMetadata = extractRaw(reader, getLimits(reader.getMimetype()));
            } else {
                rawMetadata = new HashMap<String, Serializable>(1);
            }
            // Convert to system properties (standalone)
            Map<QName, Serializable> systemProperties = mapRawToSystem(rawMetadata);
            // Convert the properties according to the dictionary types
            systemProperties = convertSystemPropertyValues(systemProperties);
            // Last chance to filter the system properties map before applying them            
            filterSystemProperties(systemProperties, destination);
            // Now use the proper overwrite policy
            changedProperties = overwritePolicy.applyProperties(systemProperties, destination);

            if (logger.isDebugEnabled()) {
                logger.debug("Extracted Metadata from " + reader + "\n  Found: " + rawMetadata
                        + "\n  Mapped and Accepted: " + changedProperties);
            }
        } catch (Throwable e) {
            // Ask Tika to detect the document, and report back on if
            //  the current mime type is plausible
            String typeErrorMessage = null;
            String differentType = null;
            if (mimetypeService != null) {
                differentType = mimetypeService.getMimetypeIfNotMatches(reader.getReader());
            } else {
                logger.info("Unable to verify mimetype of " + reader.getReader()
                        + " as no MimetypeService available to " + getClass().getName());
            }
            if (differentType != null) {
                typeErrorMessage = "\n" + "   claimed mime type: " + reader.getMimetype() + "\n"
                        + "   detected mime type: " + differentType;
            }

            if (logger.isDebugEnabled()) {
                logger.debug("Metadata extraction failed: \n" + "   Extracter: " + this + "\n" + "   Content:   "
                        + reader + typeErrorMessage, e);
            } else {
                logger.warn("Metadata extraction failed (turn on DEBUG for full error): \n" + "   Extracter: "
                        + this + "\n" + "   Content:   " + reader + "\n" + "   Failure:   " + e.getMessage()
                        + typeErrorMessage);
            }
        } finally {
            // check that the reader was closed (if used)
            if (reader.isChannelOpen()) {
                logger.error("Content reader not closed by metadata extracter: \n" + "   reader: " + reader + "\n"
                        + "   extracter: " + this);
            }
            // Make sure that we have something to return
            if (changedProperties == null) {
                changedProperties = new HashMap<QName, Serializable>(0);
            }
        }

        // Done
        if (logger.isDebugEnabled()) {
            logger.debug("Completed metadata extraction: \n" + "   reader:    " + reader + "\n" + "   extracter: "
                    + this + "\n" + "   changed:   " + changedProperties);
        }
        return changedProperties;
    }

    /**
     * {@inheritDoc}
     */
    public final void embed(Map<QName, Serializable> properties, ContentReader reader, ContentWriter writer) {
        // Done
        if (logger.isDebugEnabled()) {
            logger.debug("Starting metadata embedding: \n" + "   reader: " + reader + "\n" + "   writer: " + writer
                    + "\n" + "   extracter: " + this);
        }

        if (!initialized) {
            throw new AlfrescoRuntimeException(
                    "Metadata extracter not initialized.\n" + "  Call the 'register' method on: " + this + "\n"
                            + "  Implementations of the 'init' method must call the base implementation.");
        }
        // check the reliability
        checkIsEmbedSupported(writer);

        try {
            embedInternal(mapSystemToRaw(properties), reader, writer);
            if (logger.isDebugEnabled()) {
                logger.debug("Embedded Metadata into " + writer);
            }
        } catch (Throwable e) {
            // Ask Tika to detect the document, and report back on if
            //  the current mime type is plausible
            String typeErrorMessage = "";
            String differentType = null;
            if (mimetypeService != null) {
                try {
                    differentType = mimetypeService.getMimetypeIfNotMatches(writer.getReader());
                } catch (ContentIOException cioe) {
                    // Embedding failed and writer is empty
                }
            } else {
                logger.info("Unable to verify mimetype of " + writer.getReader()
                        + " as no MimetypeService available to " + getClass().getName());
            }
            if (differentType != null) {
                typeErrorMessage = "\n" + "   claimed mime type: " + writer.getMimetype() + "\n"
                        + "   detected mime type: " + differentType;
            }

            if (logger.isDebugEnabled()) {
                logger.debug("Metadata embedding failed: \n" + "   Extracter: " + this + "\n" + "   Content:   "
                        + writer + typeErrorMessage, e);
            } else {
                logger.error("Metadata embedding failed (turn on DEBUG for full error): \n" + "   Extracter: "
                        + this + "\n" + "   Content:   " + writer + "\n" + "   Failure:   " + e.getMessage()
                        + typeErrorMessage);
            }
        } finally {
            // check that the writer was closed (if used)
            if (writer.isChannelOpen()) {
                logger.error("Content writer not closed by metadata extracter: \n" + "   writer: " + writer + "\n"
                        + "   extracter: " + this);
            }
        }

        // Done
        if (logger.isDebugEnabled()) {
            logger.debug("Completed metadata embedding: \n" + "   writer:    " + writer + "\n" + "   extracter: "
                    + this);
        }
    }

    /**
     * 
     * @param rawMetadata   Metadata keyed by document properties
     * @return              Returns the metadata keyed by the system properties
     */
    private Map<QName, Serializable> mapRawToSystem(Map<String, Serializable> rawMetadata) {
        Map<QName, Serializable> systemProperties = new HashMap<QName, Serializable>(rawMetadata.size() * 2 + 1);
        for (Map.Entry<String, Serializable> entry : rawMetadata.entrySet()) {
            String documentKey = entry.getKey();
            // Check if there is a mapping for this
            if (!mapping.containsKey(documentKey)) {
                // No mapping - ignore
                continue;
            }
            Serializable documentValue = entry.getValue();
            Set<QName> systemQNames = mapping.get(documentKey);
            for (QName systemQName : systemQNames) {
                systemProperties.put(systemQName, documentValue);
            }
        }
        // Done
        if (logger.isDebugEnabled()) {
            logger.debug("Converted extracted raw values to system values: \n" + "   Raw Properties:    "
                    + rawMetadata + "\n" + "   System Properties: " + systemProperties);
        }
        return systemProperties;
    }

    /**
     *
     * @param systemMetadata   Metadata keyed by system properties
     * @return              Returns the metadata keyed by the content file metadata properties
     */
    private Map<String, Serializable> mapSystemToRaw(Map<QName, Serializable> systemMetadata) {
        Map<String, Serializable> metadataProperties = new HashMap<String, Serializable>(
                systemMetadata.size() * 2 + 1);
        for (Map.Entry<QName, Serializable> entry : systemMetadata.entrySet()) {
            QName modelProperty = entry.getKey();
            // Check if there is a mapping for this
            if (!embedMapping.containsKey(modelProperty)) {
                // No mapping - ignore
                continue;
            }
            Serializable documentValue = entry.getValue();
            Set<String> metadataKeys = embedMapping.get(modelProperty);
            for (String metadataKey : metadataKeys) {
                metadataProperties.put(metadataKey, documentValue);
            }
        }
        // Done
        if (logger.isDebugEnabled()) {
            logger.debug("Converted system model values to metadata values: \n" + "   System Properties:    "
                    + systemMetadata + "\n" + "   Metadata Properties: " + metadataProperties);
        }
        return metadataProperties;
    }

    /**
     * Filters the system properties that are going to be applied.  Gives the metadata extracter an 
     * opportunity to remove properties that may not be appropriate in a given context.
     * 
     * @param systemProperties  map of system properties to be applied
     * @param targetProperties  map of target properties, may be used to provide to the context requried
     */
    protected void filterSystemProperties(Map<QName, Serializable> systemProperties,
            Map<QName, Serializable> targetProperties) {
        // Default implementation does nothing
    }

    /**
     * Converts all values according to their dictionary-defined type.  This uses the
     * {@link #setFailOnTypeConversion(boolean) failOnTypeConversion flag} to determine how failures
     * are handled i.e. if values fail to convert, the process may discard the property.
     * 
     * @param systemProperties  the values keyed to system property names
     * @return                  Returns a modified map of properties that have been converted.
     */
    @SuppressWarnings("unchecked")
    private Map<QName, Serializable> convertSystemPropertyValues(Map<QName, Serializable> systemProperties) {
        Map<QName, Serializable> convertedProperties = new HashMap<QName, Serializable>(
                systemProperties.size() + 7);
        for (Map.Entry<QName, Serializable> entry : systemProperties.entrySet()) {
            QName propertyQName = entry.getKey();
            Serializable propertyValue = entry.getValue();
            // Get the property definition
            PropertyDefinition propertyDef = (dictionaryService == null) ? null
                    : dictionaryService.getProperty(propertyQName);
            if (propertyDef == null) {
                // There is nothing in the DD about this so just transfer it
                convertedProperties.put(propertyQName, propertyValue);
                continue;
            }
            // It is in the DD, so attempt the conversion
            DataTypeDefinition propertyTypeDef = propertyDef.getDataType();
            Serializable convertedPropertyValue = null;

            try {
                // Attempt to make any date conversions
                if (propertyTypeDef.getName().equals(DataTypeDefinition.DATE)
                        || propertyTypeDef.getName().equals(DataTypeDefinition.DATETIME)) {
                    if (propertyValue instanceof Date) {
                        convertedPropertyValue = propertyValue;
                    } else if (propertyValue instanceof Collection) {
                        convertedPropertyValue = (Serializable) makeDates((Collection<String>) propertyValue);
                    } else if (propertyValue instanceof String) {
                        convertedPropertyValue = makeDate((String) propertyValue);
                    } else if (propertyValue == null) {
                        convertedPropertyValue = null;
                    } else {
                        if (logger.isWarnEnabled()) {
                            StringBuilder mesg = new StringBuilder();
                            mesg.append("Unable to convert Date property: ").append(propertyQName)
                                    .append(", value: ").append(propertyValue).append(", type: ")
                                    .append(propertyTypeDef.getName());
                            logger.warn(mesg.toString());
                        }
                    }
                } else {
                    if (propertyValue instanceof Collection) {
                        convertedPropertyValue = (Serializable) DefaultTypeConverter.INSTANCE
                                .convert(propertyTypeDef, (Collection<?>) propertyValue);
                    } else if (propertyValue instanceof Object[]) {
                        convertedPropertyValue = (Serializable) DefaultTypeConverter.INSTANCE
                                .convert(propertyTypeDef, (Object[]) propertyValue);
                    } else {
                        convertedPropertyValue = (Serializable) DefaultTypeConverter.INSTANCE
                                .convert(propertyTypeDef, propertyValue);
                    }
                }
                convertedProperties.put(propertyQName, convertedPropertyValue);
            } catch (TypeConversionException e) {
                logger.warn("Type conversion failed during metadata extraction: \n" + "   Failure:   "
                        + e.getMessage() + "\n" + "   Type:      " + propertyTypeDef + "\n" + "   Value:     "
                        + propertyValue);
                // Do we just absorb this or is it a problem?
                if (failOnTypeConversion) {
                    throw AlfrescoRuntimeException.create(e, ERR_TYPE_CONVERSION, this, propertyQName,
                            propertyTypeDef.getName(), propertyValue);
                }
            } catch (MalformedNodeRefException e) {
                if (propertyQName.equals(ContentModel.PROP_TAGS)) {
                    if (enableStringTagging) {
                        // We must want to map tag string values instead of nodeRefs
                        // ContentMetadataExtracter will take care of tagging by string
                        ArrayList<Object> list = new ArrayList<Object>(1);
                        if (propertyValue instanceof Object[]) {
                            for (Object value : (Object[]) propertyValue) {
                                list.add(value);
                            }
                        } else {
                            list.add(propertyValue);
                        }
                        convertedProperties.put(propertyQName, list);
                    } else {
                        if (logger.isInfoEnabled()) {
                            logger.info("enableStringTagging is false and could not convert "
                                    + propertyQName.toString() + ": " + e.getMessage());
                        }
                    }
                } else {
                    if (failOnTypeConversion) {
                        throw e;
                    }
                }
            }
        }
        // Done
        return convertedProperties;
    }

    /**
     * Convert a collection of date <tt>String</tt> to <tt>Date</tt> objects
     */
    private Collection<Date> makeDates(Collection<String> dateStrs) {
        List<Date> dates = new ArrayList<Date>(dateStrs.size());
        for (String dateStr : dateStrs) {
            Date date = makeDate(dateStr);
            dates.add(date);
        }
        return dates;
    }

    /**
     * Convert a date <tt>String</tt> to a <tt>Date</tt> object
     */
    protected Date makeDate(String dateStr) {
        if (dateStr == null || dateStr.length() == 0) {
            return null;
        }

        Date date = null;
        try {
            date = DefaultTypeConverter.INSTANCE.convert(Date.class, dateStr);
        } catch (TypeConversionException e) {
            // Try one of the other formats
            if (this.supportedDateFormatters != null) {
                // Remove text such as " (PDT)" which cannot be parsed.
                String dateStr2 = (dateStr == null || dateStr.indexOf('(') == -1) ? dateStr
                        : dateStr.replaceAll(" \\(.*\\)", "");
                for (DateTimeFormatter supportedDateFormatter : supportedDateFormatters) {
                    // supported DateFormats were defined
                    /**
                     * Regional date format
                     */
                    try {
                        DateTime dateTime = supportedDateFormatter.parseDateTime(dateStr2);
                        if (dateTime.getCenturyOfEra() > 0) {
                            return dateTime.toDate();
                        }
                    } catch (IllegalArgumentException e1) {
                        // Didn't work
                    }

                    /**
                     * Date format can be locale specific - make sure English format always works
                     */
                    /* 
                     * TODO MER 25 May 2010 - Added this as a quick fix for IMAP date parsing which is always 
                     * English regardless of Locale.  Some more thought and/or code is required to configure 
                     * the relationship between properties, format and locale.
                     */
                    try {
                        DateTime dateTime = supportedDateFormatter.withLocale(Locale.US).parseDateTime(dateStr2);
                        if (dateTime.getCenturyOfEra() > 0) {
                            return dateTime.toDate();
                        }
                    } catch (IllegalArgumentException e1) {
                        // Didn't work
                    }
                }
            }

            if (date == null) {
                // Still no luck
                throw new TypeConversionException("Unable to convert string to date: " + dateStr);
            }
        }
        return date;
    }

    /**
     * Adds a value to the map, conserving null values.  Values are converted to null if:
     * <ul>
     *   <li>it is an empty string value after trimming</li>
     *   <li>it is an empty collection</li>
     *   <li>it is an empty array</li>
     * </ul>
     * String values are trimmed before being put into the map.
     * Otherwise, it is up to the extracter to ensure that the value is a <tt>Serializable</tt>.
     * It is not appropriate to implicitly convert values in order to make them <tt>Serializable</tt>
     * - the best conversion method will depend on the value's specific meaning.
     * 
     * @param key           the destination key
     * @param value         the serializable value
     * @param destination   the map to put values into
     * @return              Returns <tt>true</tt> if set, otherwise <tt>false</tt>
     */
    protected boolean putRawValue(String key, Serializable value, Map<String, Serializable> destination) {
        if (value == null) {
            // Just keep this
        } else if (value instanceof String) {
            String valueStr = ((String) value).trim();
            if (valueStr.length() == 0) {
                value = null;
            } else {
                if (valueStr.indexOf("\u0000") != -1) {
                    valueStr = valueStr.replaceAll("\u0000", "");
                }
                // Keep the trimmed value
                value = valueStr;
            }
        } else if (value instanceof Collection) {
            Collection<?> valueCollection = (Collection<?>) value;
            if (valueCollection.isEmpty()) {
                value = null;
            }
        } else if (value.getClass().isArray()) {
            if (Array.getLength(value) == 0) {
                value = null;
            }
        }
        // It passed all the tests
        destination.put(key, value);
        return true;
    }

    /**
     * Helper method to fetch a clean map into which raw values can be dumped.
     * 
     * @return          Returns an empty map
     */
    protected final Map<String, Serializable> newRawMap() {
        return new HashMap<String, Serializable>(17);
    }

    /**
     * This method provides a <i>best guess</i> of where to store the values extracted
     * from the documents.  The list of properties mapped by default need <b>not</b>
     * include all properties extracted from the document; just the obvious set of mappings
     * need be supplied.
     * Implementations must either provide the default mapping properties in the expected
     * location or override the method to provide the default mapping.
     * <p>
     * The default implementation looks for the default mapping file in the location
     * given by the class name and <i>.properties</i>.  If the extracter's class is
     * <b>x.y.z.MyExtracter</b> then the default properties will be picked up at
     * <b>classpath:/alfresco/metadata/MyExtracter.properties</b>.
     * The previous location of <b>classpath:/x/y/z/MyExtracter.properties</b> is
     * still supported but may be removed in a future release.
     * Inner classes are supported, but the '$' in the class name is replaced with '-', so
     * default properties for <b>x.y.z.MyStuff$MyExtracter</b> will be located using
     * <b>classpath:/alfresco/metadata/MyStuff-MyExtracter.properties</b>.
     * <p>
     * The default mapping implementation should include thorough Javadocs so that the
     * system administrators can accurately determine how to best enhance or override the
     * default mapping.
     * <p>
     * If the default mapping is declared in a properties file other than the one named after
     * the class, then the {@link #readMappingProperties(String)} method can be used to quickly
     * generate the return value:
     * <pre><code>
     *      {
     *          return readMappingProperties(DEFAULT_MAPPING);
     *      }
     * </code></pre>
     * The map can also be created in code either statically or during the call.
     * 
     * @return              Returns the default, static mapping.  It may not be null.
     * 
     * @see #setInheritDefaultMapping(boolean inherit)
     */
    protected Map<String, Set<QName>> getDefaultMapping() {
        AlfrescoRuntimeException metadataLocationReadException = null;
        try {
            // Can't use getSimpleName here because we lose inner class $ processing
            String className = this.getClass().getName();
            String shortClassName = className.split("\\.")[className.split("\\.").length - 1];
            // Replace $
            shortClassName = shortClassName.replace('$', '-');
            // Append .properties
            String metadataPropertiesUrl = "alfresco/metadata/" + shortClassName + ".properties";
            // Attempt to load the properties
            return readMappingProperties(metadataPropertiesUrl);
        } catch (AlfrescoRuntimeException e) {
            // We'll save this to throw at someone later
            metadataLocationReadException = e;
        }
        // Try package location
        try {
            String canonicalClassName = this.getClass().getName();
            // Replace $
            canonicalClassName = canonicalClassName.replace('$', '-');
            // Replace .
            canonicalClassName = canonicalClassName.replace('.', '/');
            // Append .properties
            String packagePropertiesUrl = canonicalClassName + ".properties";
            // Attempt to load the properties
            return readMappingProperties(packagePropertiesUrl);
        } catch (AlfrescoRuntimeException e) {
            // Not found in either location, but we want to throw the error for the new metadata location
            throw metadataLocationReadException;
        }
    }

    /**
     * This method provides a <i>best guess</i> of what model properties should be embedded
     * in content.  The list of properties mapped by default need <b>not</b>
     * include all properties to be embedded in the document; just the obvious set of mappings
     * need be supplied.
     * Implementations must either provide the default mapping properties in the expected
     * location or override the method to provide the default mapping.
     * <p>
     * The default implementation looks for the default mapping file in the location
     * given by the class name and <i>.embed.properties</i>.  If the extracter's class is
     * <b>x.y.z.MyExtracter</b> then the default properties will be picked up at
     * <b>classpath:/x/y/z/MyExtracter.embed.properties</b>.
     * Inner classes are supported, but the '$' in the class name is replaced with '-', so
     * default properties for <b>x.y.z.MyStuff$MyExtracter</b> will be located using
     * <b>x.y.z.MyStuff-MyExtracter.embed.properties</b>.
     * <p>
     * The default mapping implementation should include thorough Javadocs so that the
     * system administrators can accurately determine how to best enhance or override the
     * default mapping.
     * <p>
     * If the default mapping is declared in a properties file other than the one named after
     * the class, then the {@link #readEmbedMappingProperties(String)} method can be used to quickly
     * generate the return value:
     * <pre><code>
     *      protected Map<<String, Set<QName>> getDefaultMapping()
     *      {
     *          return readEmbedMappingProperties(DEFAULT_MAPPING);
     *      }
     * </code></pre>
     * The map can also be created in code either statically or during the call.
     * <p>
     * If no embed mapping properties file is found a reverse of the extract
     * mapping in {@link #getDefaultMapping()} will be assumed with the first QName in each
     * value used as the key for this mapping and a last win approach for duplicates.
     *
     * @return              Returns the default, static embed mapping.  It may not be null.
     *
     * @see #setInheritDefaultMapping(boolean inherit)
     */
    protected Map<QName, Set<String>> getDefaultEmbedMapping() {
        Map<QName, Set<String>> embedMapping = null;
        String metadataPropertiesUrl = null;
        try {
            // Can't use getSimpleName here because we lose inner class $ processing
            String className = this.getClass().getName();
            String shortClassName = className.split("\\.")[className.split("\\.").length - 1];
            // Replace $
            shortClassName = shortClassName.replace('$', '-');
            // Append .properties
            metadataPropertiesUrl = "alfresco/metadata/" + shortClassName + ".embed.properties";
            // Attempt to load the properties
            embedMapping = readEmbedMappingProperties(metadataPropertiesUrl);
        } catch (AlfrescoRuntimeException e) {
            // No embed mapping found at default location
        }
        // Try package location
        try {
            String canonicalClassName = this.getClass().getName();
            // Replace $
            canonicalClassName = canonicalClassName.replace('$', '-');
            // Replace .
            canonicalClassName = canonicalClassName.replace('.', '/');
            // Append .properties
            String packagePropertiesUrl = canonicalClassName + ".embed.properties";
            // Attempt to load the properties
            embedMapping = readEmbedMappingProperties(packagePropertiesUrl);
        } catch (AlfrescoRuntimeException e) {
            // No embed mapping found at legacy location
        }
        if (embedMapping == null) {
            if (logger.isDebugEnabled()) {
                logger.debug("No explicit embed mapping properties found at: " + metadataPropertiesUrl
                        + ", assuming reverse of extract mapping");
            }
            Map<String, Set<QName>> extractMapping = this.mapping;
            if (extractMapping == null || extractMapping.size() == 0) {
                extractMapping = getDefaultMapping();
            }
            embedMapping = new HashMap<QName, Set<String>>(extractMapping.size());
            for (String metadataKey : extractMapping.keySet()) {
                if (extractMapping.get(metadataKey) != null && extractMapping.get(metadataKey).size() > 0) {
                    QName modelProperty = extractMapping.get(metadataKey).iterator().next();
                    Set<String> metadataKeys = embedMapping.get(modelProperty);
                    if (metadataKeys == null) {
                        metadataKeys = new HashSet<String>(1);
                        embedMapping.put(modelProperty, metadataKeys);
                    }
                    metadataKeys.add(metadataKey);
                    if (logger.isTraceEnabled()) {
                        logger.trace("Added mapping from " + modelProperty + " to " + metadataKeys.toString());
                    }
                }
            }
        }
        return embedMapping;
    }

    /**
     * Gets the metadata extracter limits for the given mimetype.
     * <p>
     * A specific match for the given mimetype is tried first and
     * if none is found a wildcard of "*" is tried.
     * 
     * @param mimetype String
     * @return the found limits or null
     */
    protected MetadataExtracterLimits getLimits(String mimetype) {
        if (mimetypeLimits == null) {
            return null;
        }
        MetadataExtracterLimits limits = null;
        limits = mimetypeLimits.get(mimetype);
        if (limits == null) {
            limits = mimetypeLimits.get("*");
        }
        return limits;
    }

    /**
     * <code>Callable</code> wrapper for the 
     * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)} method
     * to handle timeouts.
     */
    private class ExtractRawCallable implements Callable<Map<String, Serializable>> {
        private ContentReader contentReader;

        public ExtractRawCallable(ContentReader reader) {
            this.contentReader = reader;
        }

        @Override
        public Map<String, Serializable> call() throws Exception {
            try {
                return extractRaw(contentReader);
            } catch (Throwable e) {
                throw new ExtractRawCallableException(e);
            }
        }
    }

    /**
     * Exception wrapper to handle any {@link Throwable} from 
     * {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)}
     */
    private class ExtractRawCallableException extends Exception {
        private static final long serialVersionUID = 1813857091767321624L;

        public ExtractRawCallableException(Throwable cause) {
            super(cause);
        }
    }

    /**
     * Calls the {@link AbstractMappingMetadataExtracter#extractRaw(ContentReader)} method
     * using the given limits.
     * <p>
     * Currently the only limit supported by {@link MetadataExtracterLimits} is a timeout
     * so this method uses {@link AbstractMappingMetadataExtracter#getExecutorService()}
     * to execute a {@link FutureTask} with any timeout defined.
     * <p>
     * If no timeout limit is defined or is unlimited (-1),
     * the <code>extractRaw</code> method is called directly.
     * 
     * @param reader        the document to extract the values from.  This stream provided by
     *                      the reader must be closed if accessed directly.
     * @param limits        the limits to impose on the extraction
     * @return              Returns a map of document property values keyed by property name.
     * @throws Throwable    All exception conditions can be handled.
     */
    private Map<String, Serializable> extractRaw(ContentReader reader, MetadataExtracterLimits limits)
            throws Throwable {
        if (limits == null || limits.getTimeoutMs() == -1) {
            return extractRaw(reader);
        }
        FutureTask<Map<String, Serializable>> task = null;
        StreamAwareContentReaderProxy proxiedReader = null;
        try {
            proxiedReader = new StreamAwareContentReaderProxy(reader);
            task = new FutureTask<Map<String, Serializable>>(new ExtractRawCallable(proxiedReader));
            getExecutorService().execute(task);
            return task.get(limits.getTimeoutMs(), TimeUnit.MILLISECONDS);
        } catch (TimeoutException e) {
            task.cancel(true);
            if (null != proxiedReader) {
                proxiedReader.release();
            }
            throw e;
        } catch (InterruptedException e) {
            // We were asked to stop
            task.cancel(true);
            return null;
        } catch (ExecutionException e) {
            // Unwrap our cause and throw that
            Throwable cause = e.getCause();
            if (cause != null && cause instanceof ExtractRawCallableException) {
                cause = ((ExtractRawCallableException) cause).getCause();
            }
            throw cause;
        }
    }

    /**
     * Override to provide the raw extracted metadata values.  An extracter should extract
     * as many of the available properties as is realistically possible.  Even if the
     * {@link #getDefaultMapping() default mapping} doesn't handle all properties, it is
     * possible for each instance of the extracter to be configured differently and more or
     * less of the properties may be used in different installations.
     * <p>
     * Raw values must not be trimmed or removed for any reason.  Null values and empty
     * strings are 
     * <ul>
     *    <li><b>Null:</b>              Removed</li>
     *    <li><b>Empty String:</b>      Passed to the OverwritePolicy</li>
     *    <li><b>Non Serializable:</b>  Converted to String or fails if that is not possible</li>
     * </ul>
     * <p>
     * Properties extracted and their meanings and types should be thoroughly described in
     * the class-level javadocs of the extracter implementation, for example:
     * <pre>
     * <b>editor:</b> - the document editor        -->  cm:author
     * <b>title:</b>  - the document title         -->  cm:title
     * <b>user1:</b>  - the document summary
     * <b>user2:</b>  - the document description   -->  cm:description
     * <b>user3:</b>  -
     * <b>user4:</b>  -
     * </pre>
     * 
     * @param reader        the document to extract the values from.  This stream provided by
     *                      the reader must be closed if accessed directly.
     * @return              Returns a map of document property values keyed by property name.
     * @throws Throwable    All exception conditions can be handled.
     * 
     * @see #getDefaultMapping()
     */
    protected abstract Map<String, Serializable> extractRaw(ContentReader reader) throws Throwable;

    /**
     * Override to embed metadata values.  An extracter should embed
     * as many of the available properties as is realistically possible.  Even if the
     * {@link #getDefaultEmbedMapping() default mapping} doesn't handle all properties, it is
     * possible for each instance of the extracter to be configured differently and more or
     * less of the properties may be used in different installations.
     *
     * @param metadata      the metadata keys and values to embed in the content file
     * @param reader      the reader for the original document.  This stream provided by
     *                      the reader must be closed if accessed directly.
     * @param writer        the writer for the document to embed the values in.  This stream provided by
     *                      the writer must be closed if accessed directly.
     * @throws Throwable    All exception conditions can be handled.
     *
     * @see #getDefaultEmbedMapping()
     */
    protected void embedInternal(Map<String, Serializable> metadata, ContentReader reader, ContentWriter writer)
            throws Throwable {
        // TODO make this an abstract method once more extracters support embedding
    }
}