ddf.mime.mapper.MimeTypeMapperImpl.java Source code

Java tutorial

Introduction

Here is the source code for ddf.mime.mapper.MimeTypeMapperImpl.java

Source

/**
 * Copyright (c) Codice Foundation
 * <p>
 * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser
 * General Public License as published by the Free Software Foundation, either version 3 of the
 * License, or any later version.
 * <p>
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details. A copy of the GNU Lesser General Public License
 * is distributed along with this program and can be found at
 * <http://www.gnu.org/licenses/lgpl.html>.
 */
package ddf.mime.mapper;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import ddf.mime.MimeTypeMapper;
import ddf.mime.MimeTypeResolutionException;
import ddf.mime.MimeTypeResolver;

/**
 * Implementation of the {@link MimeTypeMapper} interface that searches through all of the
 * registered {@link MimeTypeResolver}s to retieve file extension for a given mime type, and vice
 * versa. Once a file extension (or mime type) is resolved, this mapper stops searching through any
 * remaining {@link MimeTypeResolver}s and returns.
 *
 * @since 2.1.0
 *
 */
public class MimeTypeMapperImpl implements MimeTypeMapper {
    private static final Logger LOGGER = LoggerFactory.getLogger(MimeTypeMapperImpl.class);

    private static final String XML_FILE_EXTENSION = "xml";

    private static Comparator<MimeTypeResolver> comparator = new Comparator<MimeTypeResolver>() {
        public int compare(MimeTypeResolver o1, MimeTypeResolver o2) {
            return o1.getPriority() - o2.getPriority();
        }
    };

    /**
     * The {@link List} of {@link MimeTypeResolver}s configured for this mapper and will be searched
     * on mime type/file extension mapping requests.
     */
    protected List<MimeTypeResolver> mimeTypeResolvers;

    protected MimeTypeResolver mimeTypeResolver;

    /**
     * Constructs the MimeTypeMapper with a list of {@link MimeTypeResolver}s.
     *
     * @param mimeTypeResolvers
     *            the {@link List} of {@link MimeTypeResolver}s
     */
    public MimeTypeMapperImpl(List<MimeTypeResolver> mimeTypeResolvers) {
        LOGGER.debug("INSIDE: MimeTypeMapperImpl constructor");
        this.mimeTypeResolvers = mimeTypeResolvers;
    }

    @Override
    public String getFileExtensionForMimeType(String mimeType) throws MimeTypeResolutionException {
        LOGGER.trace("ENTERING: getFileExtensionForMimeType()");

        String extension = null;

        LOGGER.debug("Looping through {} MimeTypeResolvers", mimeTypeResolvers.size());

        // Sort the mime type resolvers in descending order of priority. This should
        // insure custom mime type resolvers are called before the (default) Apache Tika
        // mime type resolver.
        List<MimeTypeResolver> sortedResolvers = sortResolvers(mimeTypeResolvers);

        // Loop through all of the configured MimeTypeResolvers. The order of their
        // invocation is determined by their OSGi service ranking. The default
        // TikaMimeTypeResolver should be called last, allowing any configured custom
        // mime type resolvers to be invoked first - this allows custom mime type
        // resolvers that may override mime types supported by Tika to be invoked first.
        // Once a file extension is find for the given mime type, exit the loop.
        for (MimeTypeResolver resolver : sortedResolvers) {
            LOGGER.debug("Calling MimeTypeResolver {}", resolver.getName());
            try {
                extension = resolver.getFileExtensionForMimeType(mimeType);
            } catch (Exception e) {
                LOGGER.warn("Error resolving file extension for mime type: {}", mimeType);
                throw new MimeTypeResolutionException(e);
            }

            if (StringUtils.isNotEmpty(extension)) {
                LOGGER.debug("extension [{}] retrieved from MimeTypeResolver:  {}", extension, resolver.getName());
                break;
            }
        }

        LOGGER.debug("mimeType = {},   file extension = [{}]", mimeType, extension);

        LOGGER.trace("EXITING: getFileExtensionForMimeType()");

        return extension;
    }

    @Override
    public String getMimeTypeForFileExtension(String fileExtension) throws MimeTypeResolutionException {
        LOGGER.trace("ENTERING: getMimeTypeForFileExtension()");

        String mimeType = null;

        LOGGER.debug("Looping through {} MimeTypeResolvers", mimeTypeResolvers.size());

        // TODO: This is to force the TikaMimeTypeResolver to be called
        // after the CustomMimeTypeResolvers to prevent Tika default mapping
        // from being used when a CustomMimeTypeResolver may be more appropriate.
        List<MimeTypeResolver> sortedResolvers = sortResolvers(mimeTypeResolvers);

        // Loop through all of the configured MimeTypeResolvers. The order of their
        // invocation is determined by their OSGi service ranking. The default
        // TikaMimeTypeResolver should be called last, allowing any configured custom
        // mime type resolvers to be invoked first - this allows custom mime type
        // resolvers that may override mime types supported by Tika to be invoked first.
        // Once a file extension is find for the given mime type, exit the loop.
        for (MimeTypeResolver resolver : sortedResolvers) {
            LOGGER.debug("Calling MimeTypeResolver {}", resolver.getName());
            try {
                mimeType = resolver.getMimeTypeForFileExtension(fileExtension);
            } catch (Exception e) {
                LOGGER.warn("Error resolving mime type for file extension: " + fileExtension);
                throw new MimeTypeResolutionException(e);
            }

            if (StringUtils.isNotEmpty(mimeType)) {
                LOGGER.debug("mimeType [{}] retrieved from MimeTypeResolver:  ", mimeType, resolver.getName());
                break;
            }
        }

        LOGGER.debug("mimeType = {},   file extension = [{}]", mimeType, fileExtension);

        LOGGER.trace("EXITING: getMimeTypeForFileExtension()");

        return mimeType;
    }

    @Override
    public String guessMimeType(InputStream is, String fileExtension) throws MimeTypeResolutionException {
        LOGGER.trace("ENTERING: guessMimeType()");

        String mimeType = null;

        LOGGER.debug("Looping through{} MimeTypeResolvers", mimeTypeResolvers.size());

        // This is to force the TikaMimeTypeResolver to be called
        // after the CustomMimeTypeResolvers to prevent Tika default mapping
        // from being used when a CustomMimeTypeResolver may be more appropriate.
        List<MimeTypeResolver> sortedResolvers = sortResolvers(mimeTypeResolvers);

        // If file has XML extension, then read root element namespace once so
        // each MimeTypeResolver does not have to open the stream and read the namespace
        String namespace = null;
        if (fileExtension.equals(XML_FILE_EXTENSION)) {
            namespace = getRootElementNamespace(is);
            LOGGER.debug("namespace = {}", namespace);
        }

        // Loop through all of the configured MimeTypeResolvers. The order of their
        // invocation is determined by their OSGi service ranking. The default
        // TikaMimeTypeResolver should be called last, allowing any configured custom
        // mime type resolvers to be invoked first - this allows custom mime type
        // resolvers that may override mime types supported by Tika to be invoked first.
        // Once a file extension is find for the given mime type, exit the loop.
        for (MimeTypeResolver resolver : sortedResolvers) {
            LOGGER.debug("Calling MimeTypeResolver {}", resolver.getName());
            try {
                // If processing an XML file, then match the namespace extracted from the
                // XML file to the MimeTypeResolver that supports that schema (namespace).
                // If no MimeTypeResolvers support the namespace, then mime type will be null.
                // Even if a MimeTypeResolver, such as the TikaMimeTypeResolver, were to handle
                // XML files that have no "known" schema it is highly unlikely there would be
                // an InputTransformer to create a metacard for that "generic" XML file.
                if (fileExtension.equals(XML_FILE_EXTENSION)) {
                    if (namespace != null && resolver.hasSchema()) {
                        if (namespace.equals(resolver.getSchema())) {
                            mimeType = resolver.getMimeTypeForFileExtension(fileExtension);
                        }
                    }
                } else {
                    mimeType = resolver.getMimeTypeForFileExtension(fileExtension);
                }
            } catch (Exception e) {
                LOGGER.warn("Error resolving mime type for file extension: " + fileExtension);
                throw new MimeTypeResolutionException(e);
            }

            if (StringUtils.isNotEmpty(mimeType)) {
                LOGGER.debug("mimeType [{}] retrieved from MimeTypeResolver:  ", mimeType, resolver.getName());
                break;
            }
        }

        LOGGER.debug("mimeType = {},   file extension = [{}]", mimeType, fileExtension);

        LOGGER.trace("EXITING: guessMimeType()");

        return mimeType;
    }

    private String getRootElementNamespace(InputStream is) {
        LOGGER.trace("ENTERING: getRootElementNamespace()");

        if (is == null) {
            return null;
        }

        String namespace = null;
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(true);

        try {
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document document = db.parse(is);
            Node node = document.getDocumentElement();
            namespace = node.getNamespaceURI();
        } catch (ParserConfigurationException | SAXException | IOException e) {
            LOGGER.debug("Unable to get root element namespace");
        }

        LOGGER.trace("ENXITING: getRootElementNamespace() - namespace = {}", namespace);

        return namespace;
    }

    /**
     * Sort the list of {@link MimeTypeResolver}s by their descending priority, i.e., the lower the
     * priority the later the {@link MimeTypeResolver} is invoked.
     *
     * @param resolvers
     *            the {@link List} of {@link MimeTypeResolver}s
     * @return the sorted list of {@link MimeTypeResolver}s by descending priority
     */
    private List<MimeTypeResolver> sortResolvers(List<MimeTypeResolver> resolvers) {
        LOGGER.debug("ENTERING: sortResolvers()");

        List<MimeTypeResolver> sortedResolvers = null;

        if (resolvers != null) {
            // Log sorted list of PreIngestServices for debugging
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Unsorted services");
                LOGGER.debug("------------------");

                for (MimeTypeResolver resolver : resolvers) {
                    LOGGER.debug("{}   (priority: {})", resolver.getName(), resolver.getPriority());
                }
            }

            // Make copy of input services list because OSGi/Blueprint marks this input list as
            // read-only
            sortedResolvers = new ArrayList<MimeTypeResolver>(resolvers);

            // Inner class Comparator for comparing/sorting
            Comparator<MimeTypeResolver> comparator = new Comparator<MimeTypeResolver>() {
                @Override
                public int compare(MimeTypeResolver arg0, MimeTypeResolver arg1) {
                    LOGGER.debug("INSIDE: Comparator");
                    return (arg0.getPriority() - arg1.getPriority());
                }
            };

            if (sortedResolvers.size() > 1) {
                LOGGER.debug("Sorting resolvers");
                Collections.sort(sortedResolvers, comparator);
                Collections.reverse(sortedResolvers);
            }

            // Log sorted list of PreIngestServices for debugging
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Sorted/prioritized services");
                LOGGER.debug("---------------------------");

                for (MimeTypeResolver resolver : sortedResolvers) {
                    LOGGER.debug("{}   (priority: {})", resolver.getName(), resolver.getPriority());
                }
            }
        }

        LOGGER.debug("EXITING: sortResolvers()");

        return sortedResolvers;
    }
}