ddf.catalog.resource.impl.URLResourceReader.java Source code

Java tutorial

Introduction

Here is the source code for ddf.catalog.resource.impl.URLResourceReader.java

Source

/**
 * Copyright (c) Codice Foundation
 * <p>
 * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser
 * General Public License as published by the Free Software Foundation, either version 3 of the
 * License, or any later version.
 * <p>
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details. A copy of the GNU Lesser General Public License
 * is distributed along with this program and can be found at
 * <http://www.gnu.org/licenses/lgpl.html>.
 */
package ddf.catalog.resource.impl;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.net.URI;
import java.net.URLConnection;
import java.nio.file.InvalidPathException;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition;
import org.apache.tika.Tika;
import org.apache.tika.metadata.HttpHeaders;
import org.codice.ddf.security.common.jaxrs.RestSecurity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.ImmutableSet;

import ddf.catalog.data.Metacard;
import ddf.catalog.operation.ResourceResponse;
import ddf.catalog.operation.impl.ResourceResponseImpl;
import ddf.catalog.resource.ResourceNotFoundException;
import ddf.catalog.resource.ResourceReader;
import ddf.mime.MimeTypeMapper;
import ddf.mime.MimeTypeResolutionException;
import ddf.security.SecurityConstants;
import ddf.security.Subject;

/**
 * A URLResourceReader retrieves a {@link ddf.catalog.resource.Resource} from a local or remote file system using a
 * {@link URI}. The {@link URI} is used to specify the file location. A URLResourceReader supports
 * {@link URI}s with HTTP, HTTPS, and file schemes.
 */
public class URLResourceReader implements ResourceReader {

    private static final String URL_HTTP_SCHEME = "http";

    private static final String URL_HTTPS_SCHEME = "https";

    private static final String URL_FILE_SCHEME = "file";

    private static final Logger LOGGER = LoggerFactory.getLogger(URLResourceReader.class);

    private static final String VERSION = "1.0";

    private static final String SHORTNAME = "URLResourceReader";

    private static final String TITLE = "URL ddf.catalog.resource.Resource Reader";

    private static final String DESCRIPTION = "Retrieves a file from a remote file system.";

    private static final String ORGANIZATION = "DDF";

    private static final String DEFAULT_MIME_TYPE = "application/octet-stream";

    private static final String BYTES_TO_SKIP = "BytesToSkip";

    private static final Set<String> QUALIFIER_SET = ImmutableSet.of(URL_HTTP_SCHEME, URL_HTTPS_SCHEME,
            URL_FILE_SCHEME);

    /**
     * Mapper for file extensions-to-mime types (and vice versa)
     */
    private MimeTypeMapper mimeTypeMapper;

    private Set<String> rootResourceDirectories = new HashSet<>();

    private boolean followRedirects = false;

    /**
     * Default URLResourceReader constructor.
     */
    public URLResourceReader() {
    }

    public URLResourceReader(MimeTypeMapper mimeTypeMapper) {
        if (mimeTypeMapper == null) {
            LOGGER.debug("mimeTypeMapper is NULL");
        }
        this.mimeTypeMapper = mimeTypeMapper;

        LOGGER.debug("Supported Schemes for {}: {}", URLResourceReader.class.getSimpleName(),
                QUALIFIER_SET.toString());
    }

    public Set<String> getURLSupportedSchemes() {
        return QUALIFIER_SET;
    }

    @Override
    public String getVersion() {
        return VERSION;
    }

    @Override
    public String getId() {
        return SHORTNAME;
    }

    @Override
    public String getTitle() {
        return TITLE;
    }

    @Override
    public String getDescription() {
        return DESCRIPTION;
    }

    @Override
    public String getOrganization() {
        return ORGANIZATION;
    }

    /**
     * Supported schemes are HTTP, HTTPS, and file
     *
     * @return set of supported schemes
     */
    @Override
    public Set<String> getSupportedSchemes() {
        return QUALIFIER_SET;
    }

    public MimeTypeMapper getMimeTypeMapper() {
        return mimeTypeMapper;
    }

    public void setMimeTypeMapper(MimeTypeMapper mimeTypeMapper) {
        this.mimeTypeMapper = mimeTypeMapper;
    }

    /**
     * Sets the directories that the {@link URLResourceReader} has permission to access when attempting to
     * download a resource linked by a file URL.
     *
     * @param rootResourceDirectoryPaths
     *            a set of absolute paths specifying which directories the {@link URLResourceReader} has
     *            permission to access when attempting to download resources linked by a file URL. A
     *            null or empty input clears all root resource directory paths from the
     *            {@link URLResourceReader} (this effectively blocks all resource downloads linked by file
     *            URLs).
     */
    public void setRootResourceDirectories(Set<String> rootResourceDirectoryPaths) {
        this.rootResourceDirectories.clear();
        if (rootResourceDirectoryPaths != null) {
            LOGGER.debug("Attempting to set Root Resource Directories to {} for {}",
                    rootResourceDirectoryPaths.toString(), URLResourceReader.class.getSimpleName());

            for (String rootResourceDirectoryPath : rootResourceDirectoryPaths) {
                String path = null;
                try {
                    path = Paths.get(rootResourceDirectoryPath).toAbsolutePath().normalize().toString();
                    this.rootResourceDirectories.add(path);
                    LOGGER.debug("Added [{}] to the list of Root Resource Directories for {}", path,
                            URLResourceReader.class.getSimpleName());
                } catch (InvalidPathException e) {
                    LOGGER.error("{} is an invalid path.", rootResourceDirectoryPath, e);
                }
            }
        }

        LOGGER.debug("Root Resource Directories for {} are {}", URLResourceReader.class.getSimpleName(),
                this.rootResourceDirectories.toString());
    }

    public Set<String> getRootResourceDirectories() {
        return this.rootResourceDirectories;
    }

    /**
     * Specifies whether the code should follow server issued redirection (HTTP
     * Response codes between 300 and 400)
     *
     * @param redirect
     *            true - follow redirections automatically false - do not follow
     *            server issued redirections
     */
    public void setFollowRedirects(Boolean redirect) {
        LOGGER.debug("{}: Setting follow URL redirects (HTTP 300 codes) to {}", URLResourceReader.class.getName(),
                redirect);
        if (redirect != null) {
            this.followRedirects = redirect;
        }
    }

    /**
     * Gets the autoRedirect property
     * 
     * @return true if the server issued redirections should be automatically
     *         followed
     */
    public Boolean getFollowRedirects() {
        return followRedirects;
    }

    /**
     * Retrieves a {@link ddf.catalog.resource.Resource} based on a {@link URI} and provided
     * arguments. A connection is made to the {@link URI} to obtain the
     * {@link ddf.catalog.resource.Resource}'s {@link InputStream} and build a
     * {@link ResourceResponse} from that. If the {@link URI}'s scheme is HTTP or HTTPS, the
     * {@link ddf.catalog.resource.Resource}'s name gets set to the {@link URI} passed in,
     * otherwise, if it is a file scheme, the name is set to the actual file name.
     *
     * @param resourceURI
     *            A {@link URI} that defines what {@link ddf.catalog.resource.Resource} to retrieve
     *            and how to do it.
     * @param properties
     *            Any additional arguments that should be passed to the {@link ResourceReader}.
     * @return A {@link ResourceResponse} containing the retrieved
     *         {@link ddf.catalog.resource.Resource}.
     */
    @Override
    public ResourceResponse retrieveResource(URI resourceURI, Map<String, Serializable> properties)
            throws IOException, ResourceNotFoundException {
        String bytesToSkip = null;
        if (resourceURI == null) {
            LOGGER.warn("Resource URI was null");
            throw new ResourceNotFoundException("Unable to find resource");
        }

        if (properties.containsKey(BYTES_TO_SKIP)) {
            bytesToSkip = properties.get(BYTES_TO_SKIP).toString();
            LOGGER.debug("bytesToSkip: {}", bytesToSkip);
        }

        if (resourceURI.getScheme().equals(URL_HTTP_SCHEME) || resourceURI.getScheme().equals(URL_HTTPS_SCHEME)) {
            LOGGER.debug("Resource URI is HTTP or HTTPS");
            String fileAddress = resourceURI.toURL().getFile();
            LOGGER.debug("resource name: {}", fileAddress);
            return retrieveHttpProduct(resourceURI, fileAddress, bytesToSkip, properties);
        } else if (resourceURI.getScheme().equals(URL_FILE_SCHEME)) {
            LOGGER.debug("Resource URI is a File");
            File filePathName = new File(resourceURI);
            if (validateFilePath(filePathName)) {
                String fileName = filePathName.getName();
                LOGGER.debug("resource name: {}", fileName);
                return retrieveFileProduct(resourceURI, fileName, bytesToSkip);
            } else {
                throw new ResourceNotFoundException("Error retrieving resource [" + resourceURI.toString()
                        + "]. Invalid Resource URI of [" + resourceURI.toString()
                        + "]. Resources  must be in one of the following directories: "
                        + this.rootResourceDirectories.toString());
            }
        } else {
            ResourceNotFoundException ce = new ResourceNotFoundException(
                    "Resource qualifier ( " + resourceURI.getScheme() + " ) not valid. " + URLResourceReader.TITLE
                            + " requires a qualifier of " + URL_HTTP_SCHEME + " or " + URL_HTTPS_SCHEME + " or "
                            + URL_FILE_SCHEME);
            throw ce;
        }
    }

    private ResourceResponse retrieveFileProduct(URI resourceURI, String productName, String bytesToSkip)
            throws ResourceNotFoundException {
        URLConnection connection = null;
        try {
            LOGGER.debug("Opening connection to: {}", resourceURI.toString());
            connection = resourceURI.toURL().openConnection();

            productName = StringUtils.defaultIfBlank(
                    handleContentDispositionHeader(connection.getHeaderField(HttpHeaders.CONTENT_DISPOSITION)),
                    productName);

            String mimeType = getMimeType(resourceURI, productName);

            InputStream is = connection.getInputStream();

            skipBytes(is, bytesToSkip);

            return new ResourceResponseImpl(
                    new ResourceImpl(new BufferedInputStream(is), mimeType, FilenameUtils.getName(productName)));
        } catch (MimeTypeResolutionException | IOException e) {
            LOGGER.error("Error retrieving resource", e);
            throw new ResourceNotFoundException("Unable to retrieve resource at: " + resourceURI.toString(), e);
        }
    }

    private ResourceResponse retrieveHttpProduct(URI resourceURI, String productName, String bytesToSkip,
            Map<String, Serializable> properties) throws ResourceNotFoundException {

        try {
            LOGGER.debug("Opening connection to: {}", resourceURI.toString());

            WebClient client = getWebClient(resourceURI.toString());

            Object subjectObj = properties.get(SecurityConstants.SECURITY_SUBJECT);
            if (subjectObj != null) {
                Subject subject = (Subject) subjectObj;
                LOGGER.debug("Setting Subject on webclient: {}", subject);
                RestSecurity.setSubjectOnClient(subject, client);
            }

            Response response = client.get();

            MultivaluedMap<String, Object> headers = response.getHeaders();
            List<Object> cdHeaders = headers.get(HttpHeaders.CONTENT_DISPOSITION);
            if (cdHeaders != null && !cdHeaders.isEmpty()) {
                String contentHeader = (String) cdHeaders.get(0);
                productName = StringUtils.defaultIfBlank(handleContentDispositionHeader(contentHeader),
                        productName);
            }
            String mimeType = getMimeType(resourceURI, productName);

            Response clientResponse = client.get();

            InputStream is = null;
            Object entityObj = clientResponse.getEntity();
            if (entityObj instanceof InputStream) {
                is = (InputStream) entityObj;
                if (Response.Status.OK.getStatusCode() != clientResponse.getStatus()) {
                    String error = null;
                    try {
                        if (is != null) {
                            error = IOUtils.toString(is);
                        }
                    } catch (IOException ioe) {
                        LOGGER.debug("Could not convert error message to a string for output.", ioe);
                    }
                    String errorMsg = "Received error code while retrieving resource (status "
                            + clientResponse.getStatus() + "): " + error;
                    LOGGER.warn(errorMsg);
                    throw new ResourceNotFoundException(errorMsg);
                }
            } else {
                throw new ResourceNotFoundException("Received null response while retrieving resource.");
            }

            skipBytes(is, bytesToSkip);

            return new ResourceResponseImpl(
                    new ResourceImpl(new BufferedInputStream(is), mimeType, FilenameUtils.getName(productName)));
        } catch (MimeTypeResolutionException | IOException | WebApplicationException e) {
            LOGGER.error("Error retrieving resource", e);
            throw new ResourceNotFoundException("Unable to retrieve resource at: " + resourceURI.toString(), e);
        }
    }

    private String getMimeType(URI resourceURI, String productName)
            throws MimeTypeResolutionException, IOException {
        // Determine the mime type in a hierarchical fashion. The hierarchy is based on the
        // most accurate mime type resolution being used and lesser accurate approaches being
        // used
        // if a mime type is not resolved.

        // The approaches, in order, are:
        // 1. Try using the DDF MimeTypeMapper so that custom MimeTypeResolvers are used
        // 2. Try using Apache Tika directly on the URL

        String mimeType = null;
        if (mimeTypeMapper == null) {
            LOGGER.warn("mimeTypeMapper is NULL");
        } else {
            // Extract the file extension (if any) from the URL's filename
            String fileExtension = FilenameUtils.getExtension(productName);
            mimeType = mimeTypeMapper.getMimeTypeForFileExtension(fileExtension);
        }

        // If MimeTypeMapper was null or did not yield a mime type, or if default
        // mime type was returned, try using Apache Tika to inspect the file for better
        // mime type resolution than just file extension mime type mapping
        if ((mimeType == null || mimeType.isEmpty() || mimeType.equals(DEFAULT_MIME_TYPE))
                && URL_FILE_SCHEME.equalsIgnoreCase(resourceURI.getScheme())) {
            // Use Apache Tika to detect mime type from URL
            Tika tika = new Tika();
            mimeType = tika.detect(resourceURI.toURL());
            LOGGER.debug("Tika determined mimeType for url = {}", mimeType);
        } else {
            LOGGER.debug("mimeType = {} set by MimeTypeMapper", mimeType);
        }

        // Legacy default is application/unknown but URLConnection returns content/unknown
        // as default when mime type does not map to a file extension. To maintain legacy
        // compatibility, change content/unknown to application/unknown

        // With switching to use MimeTypeMapper vs. URLConnection.getContentType() and
        // guessContentTypeFromName()
        // the underlying TikaMimeTypeResolver will always return at least
        // application/octet-stream as the default
        // mime type for an unknown file extension. Hence, application/unknown will probably
        // never be returned.
        if (mimeType == null || mimeType.equals("content/unknown")) {
            mimeType = "application/unknown";
        }

        LOGGER.debug("mimeType set to: {}", mimeType);
        return mimeType;
    }

    /* Check Connection headers for filename */
    private String handleContentDispositionHeader(String contentDispositionHeader) {
        if (StringUtils.isNotBlank(contentDispositionHeader)) {
            ContentDisposition contentDisposition = new ContentDisposition(contentDispositionHeader);
            String filename = contentDisposition.getParameter("filename");
            if (StringUtils.isNotBlank(filename)) {
                LOGGER.debug("Found content disposition header, changing resource name to {}", filename);
                return filename;
            }
        }
        return "";
    }

    private void skipBytes(InputStream is, String bytesToSkip) throws IOException {
        if (bytesToSkip != null) {
            LOGGER.debug("Skipping {} bytes", bytesToSkip);
            long bytesSkipped = is.skip(Long.parseLong(bytesToSkip));
            if (Long.parseLong(bytesToSkip) != bytesSkipped) {
                LOGGER.debug("Did not skip specified bytes while retrieving resource."
                        + " Bytes to skip: {} -- Skipped Bytes: {}", bytesToSkip, bytesSkipped);
            }
        }
    }

    private boolean validateFilePath(File resourceFilePath) throws IOException {
        String resourceCanonicalPath = resourceFilePath.getCanonicalPath();
        LOGGER.debug("Converted resource path [{}] to its canonical path of [{}]", resourceFilePath.toString(),
                resourceCanonicalPath);
        if (this.rootResourceDirectories != null) {
            for (String rootResourceDirectory : this.rootResourceDirectories) {
                String rootResouceDirectoryCanonicalPath = new File(rootResourceDirectory).getCanonicalPath();
                LOGGER.debug("Converted root resource directory [{}] to its canonical path of [{}]",
                        rootResourceDirectory, rootResouceDirectoryCanonicalPath);
                LOGGER.debug(
                        "Determining if resource path [{}] starts with configured root resource directory [{}].",
                        resourceCanonicalPath, rootResouceDirectoryCanonicalPath);
                if (StringUtils.startsWith(resourceCanonicalPath, rootResouceDirectoryCanonicalPath)) {
                    LOGGER.debug(
                            "Resource path [{}] starts with configured root resource directory [{}]. Resource is in a valid location for download by the {}",
                            resourceCanonicalPath, rootResouceDirectoryCanonicalPath,
                            URLResourceReader.class.getSimpleName());
                    return true;
                } else {
                    LOGGER.debug("Resource path [{}] does not start with configured root resource directory [{}].",
                            resourceCanonicalPath, rootResouceDirectoryCanonicalPath);
                }
            }

            LOGGER.debug(
                    "Unable to find a root resource directory in the {}'s configuration for resource path [{}]. Unable to download resource.",
                    URLResourceReader.class.getSimpleName(), resourceCanonicalPath);
            return false;
        }

        return false;
    }

    protected WebClient getWebClient(String uri) {
        WebClient client = WebClient.create(uri);
        WebClient.getConfig(client).getHttpConduit().getClient().setAutoRedirect(getFollowRedirects());
        return client;
    }

    @Override
    public Set<String> getOptions(Metacard metacard) {
        LOGGER.debug("URLResourceReader getOptions doesn't support options, returning empty set.");
        return Collections.emptySet();
    }
}