org.alfresco.web.app.servlet.BaseDownloadContentServlet.java Source code

Introduction

Here is the source code for org.alfresco.web.app.servlet.BaseDownloadContentServlet.java
Source

/*
 * #%L
 * Alfresco Repository WAR Community
 * %%
 * Copyright (C) 2005 - 2016 Alfresco Software Limited
 * %%
 * This file is part of the Alfresco software. 
 * If the software was purchased under a paid Alfresco license, the terms of 
 * the paid license agreement will prevail.  Otherwise, the software is 
 * provided under the following open source license terms:
 * 
 * Alfresco is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Alfresco is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
 * #L%
 */
package org.alfresco.web.app.servlet;

import java.io.IOException;
import java.net.SocketException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.StringTokenizer;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.repo.web.util.HttpRangeProcessor;
import org.alfresco.service.ServiceRegistry;
import org.alfresco.service.cmr.model.FileInfo;
import org.alfresco.service.cmr.model.FileNotFoundException;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.MimetypeService;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.cmr.repository.StoreRef;
import org.alfresco.service.cmr.security.PermissionService;
import org.alfresco.service.namespace.QName;
import org.alfresco.web.app.Application;
import org.apache.commons.logging.Log;
import org.springframework.extensions.surf.util.URLDecoder;
import org.springframework.extensions.surf.util.URLEncoder;
import org.springframework.extensions.webscripts.ui.common.StringUtils;

/**
 * Base class for the download content servlets. Provides common
 * processing for the request.
 * 
 * @see org.alfresco.web.app.servlet.DownloadContentServlet
 * @see org.alfresco.web.app.servlet.GuestDownloadContentServlet
 * 
 * @author Kevin Roast
 * @author gavinc
 */
public abstract class BaseDownloadContentServlet extends BaseServlet {
    private static final String HEADER_IF_MODIFIED_SINCE = "If-Modified-Since";

    private static final long serialVersionUID = -4558907921887235967L;

    private static final String POWER_POINT_DOCUMENT_MIMETYPE = "application/vnd.ms-powerpoint";
    private static final String POWER_POINT_2007_DOCUMENT_MIMETYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation";

    private static final String HEADER_CONTENT_RANGE = "Content-Range";
    private static final String HEADER_CONTENT_LENGTH = "Content-Length";
    private static final String HEADER_ACCEPT_RANGES = "Accept-Ranges";
    private static final String HEADER_RANGE = "Range";
    private static final String HEADER_ETAG = "ETag";
    private static final String HEADER_CACHE_CONTROL = "Cache-Control";
    private static final String HEADER_LAST_MODIFIED = "Last-Modified";
    private static final String HEADER_USER_AGENT = "User-Agent";
    private static final String HEADER_CONTENT_DISPOSITION = "Content-Disposition";

    protected static final String MIMETYPE_OCTET_STREAM = "application/octet-stream";

    protected static final String MSG_ERROR_CONTENT_MISSING = "error_content_missing";
    protected static final String MSG_ERROR_NOT_FOUND = "error_not_found";

    protected static final String URL_DIRECT = "d";
    protected static final String URL_DIRECT_LONG = "direct";
    protected static final String URL_ATTACH = "a";
    protected static final String URL_ATTACH_LONG = "attach";
    protected static final String ARG_PROPERTY = "property";
    protected static final String ARG_PATH = "path";

    /**
     * Gets the logger to use for this request.
     * <p>
     * This will show all debug entries from this class as though they
     * came from the subclass.
     * 
     * @return The logger
     */
    protected abstract Log getLogger();

    /**
     * Processes the download request using the current context i.e. no authentication checks are made, it is presumed
     * they have already been done.
     * 
     * @param req
     *           The HTTP request
     * @param res
     *           The HTTP response
     * @param allowLogIn
     *           Indicates whether guest users without access to the content should be redirected to the log in page. If
     *           <code>false</code>, a status 403 forbidden page is displayed instead.
     */
    protected void processDownloadRequest(HttpServletRequest req, HttpServletResponse res, boolean allowLogIn,
            boolean transmitContent) throws ServletException, IOException {
        Log logger = getLogger();
        String uri = req.getRequestURI();

        if (logger.isDebugEnabled()) {
            String queryString = req.getQueryString();
            logger.debug("Processing URL: " + uri
                    + ((queryString != null && queryString.length() > 0) ? ("?" + queryString) : ""));
        }

        uri = uri.substring(req.getContextPath().length());
        StringTokenizer t = new StringTokenizer(uri, "/");
        int tokenCount = t.countTokens();

        t.nextToken(); // skip servlet name

        // attachment mode (either 'attach' or 'direct')
        String attachToken = t.nextToken();
        boolean attachment = URL_ATTACH.equals(attachToken) || URL_ATTACH_LONG.equals(attachToken);

        ServiceRegistry serviceRegistry = getServiceRegistry(getServletContext());

        // get or calculate the noderef and filename to download as
        NodeRef nodeRef;
        String filename;

        // do we have a path parameter instead of a NodeRef?
        String path = req.getParameter(ARG_PATH);
        if (path != null && path.length() != 0) {
            // process the name based path to resolve the NodeRef and the Filename element
            try {
                PathRefInfo pathInfo = resolveNamePath(getServletContext(), path);
                nodeRef = pathInfo.NodeRef;
                filename = pathInfo.Filename;
            } catch (IllegalArgumentException e) {
                Application.handleSystemError(getServletContext(), req, res, MSG_ERROR_NOT_FOUND,
                        HttpServletResponse.SC_NOT_FOUND, logger);
                return;
            }
        } else {
            // a NodeRef must have been specified if no path has been found
            if (tokenCount < 6) {
                throw new IllegalArgumentException("Download URL did not contain all required args: " + uri);
            }

            // assume 'workspace' or other NodeRef based protocol for remaining URL elements
            StoreRef storeRef = new StoreRef(URLDecoder.decode(t.nextToken()), URLDecoder.decode(t.nextToken()));
            String id = URLDecoder.decode(t.nextToken());

            // build noderef from the appropriate URL elements
            nodeRef = new NodeRef(storeRef, id);

            if (tokenCount > 6) {
                // found additional relative path elements i.e. noderefid/images/file.txt
                // this allows a url to reference siblings nodes via a cm:name based relative path
                // solves the issue with opening HTML content containing relative URLs in HREF or IMG tags etc.
                List<String> paths = new ArrayList<String>(tokenCount - 5);
                while (t.hasMoreTokens()) {
                    paths.add(URLDecoder.decode(t.nextToken()));
                }
                filename = paths.get(paths.size() - 1);

                try {
                    NodeRef parentRef = serviceRegistry.getNodeService().getPrimaryParent(nodeRef).getParentRef();
                    FileInfo fileInfo = serviceRegistry.getFileFolderService().resolveNamePath(parentRef, paths);
                    nodeRef = fileInfo.getNodeRef();
                } catch (FileNotFoundException e) {
                    Application.handleSystemError(getServletContext(), req, res, MSG_ERROR_NOT_FOUND,
                            HttpServletResponse.SC_NOT_FOUND, logger);
                    return;
                }
            } else {
                // filename is last remaining token
                filename = t.nextToken();
            }
        }

        // get qualified of the property to get content from - default to ContentModel.PROP_CONTENT
        QName propertyQName = ContentModel.PROP_CONTENT;
        String property = req.getParameter(ARG_PROPERTY);
        if (property != null && property.length() != 0) {
            propertyQName = QName.createQName(property);
        }

        if (logger.isDebugEnabled()) {
            logger.debug("Found NodeRef: " + nodeRef);
            logger.debug("Will use filename: " + filename);
            logger.debug("For property: " + propertyQName);
            logger.debug("With attachment mode: " + attachment);
        }

        // get the services we need to retrieve the content
        NodeService nodeService = serviceRegistry.getNodeService();
        ContentService contentService = serviceRegistry.getContentService();

        // Check that the node still exists
        if (!nodeService.exists(nodeRef)) {
            Application.handleSystemError(getServletContext(), req, res, MSG_ERROR_NOT_FOUND,
                    HttpServletResponse.SC_NOT_FOUND, logger);
            return;
        }

        try {
            // check that the user has at least READ_CONTENT access - else redirect to an error or login page
            if (!checkAccess(req, res, nodeRef, PermissionService.READ_CONTENT, allowLogIn)) {
                return;
            }

            // check If-Modified-Since header and set Last-Modified header as appropriate
            Date modified = (Date) nodeService.getProperty(nodeRef, ContentModel.PROP_MODIFIED);
            if (modified != null) {
                long modifiedSince = req.getDateHeader(HEADER_IF_MODIFIED_SINCE);
                if (modifiedSince > 0L) {
                    // round the date to the ignore millisecond value which is not supplied by header
                    long modDate = (modified.getTime() / 1000L) * 1000L;
                    if (modDate <= modifiedSince) {
                        if (logger.isDebugEnabled())
                            logger.debug("Returning 304 Not Modified.");
                        res.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
                        return;
                    }
                }
                res.setDateHeader(HEADER_LAST_MODIFIED, modified.getTime());
                res.setHeader(HEADER_CACHE_CONTROL, "must-revalidate, max-age=0");
                res.setHeader(HEADER_ETAG, "\"" + Long.toString(modified.getTime()) + "\"");
            }

            if (attachment == true) {
                setHeaderContentDisposition(req, res, filename);
            }

            // get the content reader
            ContentReader reader = contentService.getReader(nodeRef, propertyQName);
            // ensure that it is safe to use
            reader = FileContentReader.getSafeContentReader(reader,
                    Application.getMessage(req.getSession(), MSG_ERROR_CONTENT_MISSING), nodeRef, reader);

            String mimetype = reader.getMimetype();
            // fall back if unable to resolve mimetype property
            if (mimetype == null || mimetype.length() == 0) {
                MimetypeService mimetypeMap = serviceRegistry.getMimetypeService();
                mimetype = MIMETYPE_OCTET_STREAM;
                int extIndex = filename.lastIndexOf('.');
                if (extIndex != -1) {
                    String ext = filename.substring(extIndex + 1);
                    mimetype = mimetypeMap.getMimetype(ext);
                }
            }

            // explicitly set the content disposition header if the content is powerpoint
            if (!attachment && (mimetype.equals(POWER_POINT_2007_DOCUMENT_MIMETYPE)
                    || mimetype.equals(POWER_POINT_DOCUMENT_MIMETYPE))) {
                setHeaderContentDisposition(req, res, filename);
            }

            // get the content and stream directly to the response output stream
            // assuming the repo is capable of streaming in chunks, this should allow large files
            // to be streamed directly to the browser response stream.
            res.setHeader(HEADER_ACCEPT_RANGES, "bytes");

            // for a GET request, transmit the content else just the headers are sent
            if (transmitContent) {
                try {
                    boolean processedRange = false;
                    String range = req.getHeader(HEADER_CONTENT_RANGE);
                    if (range == null) {
                        range = req.getHeader(HEADER_RANGE);
                    }
                    if (range != null) {
                        if (logger.isDebugEnabled())
                            logger.debug("Found content range header: " + range);

                        // ensure the range header is starts with "bytes=" and process the range(s)
                        if (range.length() > 6) {
                            HttpRangeProcessor rangeProcessor = new HttpRangeProcessor(contentService);
                            processedRange = rangeProcessor.processRange(res, reader, range.substring(6), nodeRef,
                                    propertyQName, mimetype, req.getHeader(HEADER_USER_AGENT));
                        }
                    }
                    if (processedRange == false) {
                        if (logger.isDebugEnabled())
                            logger.debug("Sending complete file content...");

                        // set mimetype for the content and the character encoding for the stream
                        res.setContentType(mimetype);
                        res.setCharacterEncoding(reader.getEncoding());

                        // MNT-10642 Alfresco Explorer has javascript vulnerability opening HTML files
                        if (req.getRequestURI().contains("/d/d/") && (mimetype.equals("text/html")
                                || mimetype.equals("application/xhtml+xml") || mimetype.equals("text/xml"))) {
                            String content = reader.getContentString();

                            if (mimetype.equals("text/html") || mimetype.equals("application/xhtml+xml")) {
                                // process with HTML stripper
                                content = StringUtils.stripUnsafeHTMLTags(content, false);
                            } else if (mimetype.equals("text/xml") && mimetype.equals("text/x-component")) {
                                // IE supports "behaviour" which means that css can load a .htc file that could
                                // contain XSS code in the form of jscript, vbscript etc, to stop it form being
                                // evaluated we set the contient type to text/plain
                                res.setContentType("text/plain");
                            }

                            String encoding = reader.getEncoding();
                            byte[] bytes = encoding != null ? content.getBytes(encoding) : content.getBytes();
                            res.setContentLength(bytes.length);
                            res.getOutputStream().write(bytes);

                            return;
                        }

                        // return the complete entity range
                        long size = reader.getSize();
                        res.setHeader(HEADER_CONTENT_RANGE,
                                "bytes 0-" + Long.toString(size - 1L) + "/" + Long.toString(size));
                        res.setHeader(HEADER_CONTENT_LENGTH, Long.toString(size));
                        reader.getContent(res.getOutputStream());
                    }
                } catch (SocketException e1) {
                    // the client cut the connection - our mission was accomplished apart from a little error message
                    if (logger.isDebugEnabled())
                        logger.debug("Client aborted stream read:\n\tnode: " + nodeRef + "\n\tcontent: " + reader);
                } catch (ContentIOException e2) {
                    if (logger.isInfoEnabled())
                        logger.info("Failed stream read:\n\tnode: " + nodeRef + " due to: " + e2.getMessage());
                } catch (Throwable err) {
                    if (err.getCause() instanceof SocketException) {
                        // the client cut the connection - our mission was accomplished apart from a little error message
                        if (logger.isDebugEnabled())
                            logger.debug(
                                    "Client aborted stream read:\n\tnode: " + nodeRef + "\n\tcontent: " + reader);
                    } else
                        throw err;
                }
            } else {
                if (logger.isDebugEnabled())
                    logger.debug("HEAD request processed - no content sent.");
                res.getOutputStream().close();
            }
        } catch (Throwable err) {
            throw new AlfrescoRuntimeException(
                    "Error during download content servlet processing: " + err.getMessage(), err);
        }
    }

    private void setHeaderContentDisposition(HttpServletRequest req, HttpServletResponse res, String filename) {
        // set header based on filename - will force a Save As from the browse if it doesn't recognise it
        // this is better than the default response of the browser trying to display the contents

        // IE requires that "Content-Disposition" header in case of "attachment" type should include
        // "filename" part. See MNT-9900
        String userAgent = req.getHeader(HEADER_USER_AGENT);
        if (userAgent != null
                && (userAgent.toLowerCase().contains("firefox") || userAgent.toLowerCase().contains("safari"))) {
            res.setHeader(HEADER_CONTENT_DISPOSITION,
                    "attachment; filename=\"" + URLDecoder.decode(filename) + "\"");
        } else {
            res.setHeader(HEADER_CONTENT_DISPOSITION, "attachment; filename=\"" + filename + "\"");
        }

    }

    /**
     * Helper to generate a URL to a content node for downloading content from the server.
     * 
     * @param pattern The pattern to use for the URL
     * @param ref     NodeRef of the content node to generate URL for (cannot be null)
     * @param name    File name to return in the URL (cannot be null)
     * 
     * @return URL to download the content from the specified node
     */
    protected final static String generateUrl(String pattern, NodeRef ref, String name) {
        return MessageFormat.format(pattern, new Object[] { ref.getStoreRef().getProtocol(),
                ref.getStoreRef().getIdentifier(), ref.getId(), URLEncoder.encode(name) });
    }
}