org.archive.wayback.resourcestore.locationdb.FileProxyServlet.java Source code

Java tutorial

Introduction

Here is the source code for org.archive.wayback.resourcestore.locationdb.FileProxyServlet.java

Source

/*
 *  This file is part of the Wayback archival access software
 *   (http://archive-access.sourceforge.net/projects/wayback/).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.archive.wayback.resourcestore.locationdb;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.URL;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.httpclient.ChunkedInputStream;
import org.archive.util.anvl.ANVLRecord;
import org.archive.wayback.util.http.HttpRequestMessage;
import org.archive.wayback.util.http.HttpResponse;
import org.archive.wayback.util.webapp.AbstractRequestHandler;

/**
 * ServletRequestContext interface which uses a ResourceFileLocationDB to 
 * reverse proxy an incoming HTTP request for a file by name to it's actual 
 * back-end location. This will also forward HTTP byte range requests to the
 * final location.
 *
 * @author brad
 * @version $Date$, $Revision$
 */
public class FileProxyServlet extends AbstractRequestHandler {
    private static final Logger LOGGER = Logger.getLogger(FileProxyServlet.class.getName());

    private static final int BUF_SIZE = 4096;
    private static final String RANGE_HTTP_HEADER = "Range";
    private static final String DEFAULT_CONTENT_TYPE = "application/x-gzip";
    private static final String HEADER_BYTES_PREFIX = "bytes=";
    private static final String HEADER_BYTES_SUFFIX = "-";

    private static final String FILE_REGEX = "/([^/]*)$";
    private static final String FILE_OFFSET_REGEX = "/([^/]*)/(\\d*)$";

    private static final Pattern FILE_PATTERN = Pattern.compile(FILE_REGEX);
    private static final Pattern FILE_OFFSET_PATTERN = Pattern.compile(FILE_OFFSET_REGEX);

    private static final long serialVersionUID = 1L;

    private ResourceFileLocationDB locationDB = null;

    private int socketTimeoutMs = 5000;

    private int connectTimeoutMs = 1000;

    public boolean handleRequest(HttpServletRequest httpRequest, HttpServletResponse httpResponse)
            throws IOException, ServletException {

        ResourceLocation location = parseRequest(httpRequest);
        if (location == null) {
            httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, "no/invalid name");
        } else {

            String urls[] = locationDB.nameToUrls(location.getName());

            if (urls == null || urls.length == 0) {

                LOGGER.warning("No locations for " + location.getName());
                httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND,
                        "Unable to locate(" + location.getName() + ")");
            } else {

                DataSource ds = null;
                for (String urlString : urls) {
                    try {
                        ds = locationToDataSource(urlString, location.getOffset());
                        if (ds != null) {
                            break;
                        }
                    } catch (IOException e) {
                        LOGGER.warning("failed proxy of " + urlString + " " + e.getLocalizedMessage());
                    }
                }
                if (ds == null) {
                    LOGGER.warning("No successful locations for " + location.getName());
                    httpResponse.sendError(HttpServletResponse.SC_BAD_GATEWAY,
                            "failed proxy of (" + location.getName() + ")");

                } else {
                    httpResponse.setStatus(HttpServletResponse.SC_OK);
                    // BUGBUG: this will be broken for non compressed data...
                    httpResponse.setContentType(ds.getContentType());
                    httpResponse.setBufferSize(BUF_SIZE);
                    ds.copyTo(httpResponse.getOutputStream());
                }
            }
        }
        return true;
    }

    private DataSource locationToDataSource(String location, long offset) throws IOException {
        DataSource ds = null;
        if (location.startsWith("http://")) {
            URL url = new URL(location);
            String hostname = url.getHost();
            int port = url.getPort();
            if (port == -1) {
                port = 80;
            }
            byte GET[] = "GET".getBytes();
            byte HTTP11[] = "HTTP/1.1".getBytes();
            InetAddress addr = InetAddress.getByName(hostname);
            HttpRequestMessage requestMessage = new HttpRequestMessage(GET, url.getFile().getBytes(), HTTP11);
            ANVLRecord headers = new ANVLRecord();
            headers.addLabelValue("Host", hostname);

            if (offset != 0) {
                headers.addLabelValue(RANGE_HTTP_HEADER,
                        HEADER_BYTES_PREFIX + String.valueOf(offset) + HEADER_BYTES_SUFFIX);
            }
            InetSocketAddress sockAddr = new InetSocketAddress(addr, port);
            Socket socket = new Socket();
            socket.setSoTimeout(socketTimeoutMs);
            socket.setReceiveBufferSize(BUF_SIZE);

            socket.connect(sockAddr, connectTimeoutMs);
            OutputStream socketOut = socket.getOutputStream();
            InputStream socketIn = socket.getInputStream();
            socketOut.write(requestMessage.getBytes(true));
            socketOut.write(headers.getUTF8Bytes());
            socketOut.flush();
            HttpResponse response = HttpResponse.load(socketIn);
            String contentType = response.getHeaders().asMap().get("Content-Type");
            if (contentType == null) {
                contentType = "application/unknown";
            }
            String xferEncoding = response.getHeaders().asMap().get("Transfer-Encoding");

            if (xferEncoding != null) {
                if (xferEncoding.equals("chunked")) {
                    socketIn = new ChunkedInputStream(socketIn);
                }
            }

            ds = new URLDataSource(socketIn, contentType);

        } else {
            // assume a local file path:
            File f = new File(location);
            if (f.isFile() && f.canRead()) {
                long size = f.length();
                if (size < offset) {
                    throw new IOException("short file " + location + " cannot" + " seek to offset " + offset);
                }
                RandomAccessFile raf = new RandomAccessFile(f, "r");
                raf.seek(offset);
                // BUGBUG: is it compressed?
                ds = new FileDataSource(raf, DEFAULT_CONTENT_TYPE);

            } else {
                throw new IOException("No readable file at " + location);
            }

        }

        return ds;
    }

    private ResourceLocation parseRequest(HttpServletRequest request) {
        ResourceLocation location = null;

        String path = request.getRequestURI();
        Matcher fo = FILE_OFFSET_PATTERN.matcher(path);
        if (fo.find()) {
            location = new ResourceLocation(fo.group(1), Long.parseLong(fo.group(2)));
        } else {
            fo = FILE_PATTERN.matcher(path);
            if (fo.find()) {
                String rangeHeader = request.getHeader(RANGE_HTTP_HEADER);
                if (rangeHeader != null) {
                    if (rangeHeader.startsWith(HEADER_BYTES_PREFIX)) {
                        rangeHeader = rangeHeader.substring(HEADER_BYTES_PREFIX.length());
                        if (rangeHeader.endsWith(HEADER_BYTES_SUFFIX)) {
                            rangeHeader = rangeHeader.substring(0,
                                    rangeHeader.length() - HEADER_BYTES_SUFFIX.length());
                        }
                    }
                    location = new ResourceLocation(fo.group(1), Long.parseLong(rangeHeader));
                } else {
                    location = new ResourceLocation(fo.group(1));
                }
            }
        }
        return location;
    }

    /**
     * @return the locationDB
     */
    public ResourceFileLocationDB getLocationDB() {
        return locationDB;
    }

    /**
     * @param locationDB the locationDB to set
     */
    public void setLocationDB(ResourceFileLocationDB locationDB) {
        this.locationDB = locationDB;
    }

    /**
     * @return the socketTimeoutMs
     */
    public int getSocketTimeoutMs() {
        return socketTimeoutMs;
    }

    /**
     * @param socketTimeoutMs the socketTimeoutMs to set
     */
    public void setSocketTimeoutMs(int socketTimeoutMs) {
        this.socketTimeoutMs = socketTimeoutMs;
    }

    /**
     * @return the connectTimeoutMs
     */
    public int getConnectTimeoutMs() {
        return connectTimeoutMs;
    }

    /**
     * @param connectTimeoutMs the connectTimeoutMs to set
     */
    public void setConnectTimeoutMs(int connectTimeoutMs) {
        this.connectTimeoutMs = connectTimeoutMs;
    }

    private class ResourceLocation {
        private String name = null;
        private long offset = 0;

        public ResourceLocation(String name, long offset) {
            this.name = name;
            this.offset = offset;
        }

        public ResourceLocation(String name) {
            this(name, 0);
        }

        public String getName() {
            return name;
        }

        public long getOffset() {
            return offset;
        }
    }

    private interface DataSource {
        public void copyTo(OutputStream os) throws IOException;

        public String getContentType();
    }

    private class FileDataSource implements DataSource {
        private RandomAccessFile raf = null;
        private String contentType = null;

        public FileDataSource(RandomAccessFile raf, String contentType) {
            this.raf = raf;
            this.contentType = contentType;
        }

        public String getContentType() {
            return contentType;
        }

        public void copyTo(OutputStream os) throws IOException {
            byte[] buffer = new byte[BUF_SIZE];
            try {
                int r = -1;
                while ((r = raf.read(buffer, 0, BUF_SIZE)) != -1) {
                    os.write(buffer, 0, r);
                }
            } finally {
                raf.close();
            }
        }
    }

    private class URLDataSource implements DataSource {
        private InputStream is = null;
        private String contentType = null;

        public URLDataSource(InputStream is, String contentType) {
            this.is = is;
            this.contentType = contentType;
        }

        public String getContentType() {
            return contentType;
        }

        public void copyTo(OutputStream os) throws IOException {
            byte[] buffer = new byte[BUF_SIZE];
            try {
                int r = -1;
                while ((r = is.read(buffer, 0, BUF_SIZE)) != -1) {
                    os.write(buffer, 0, r);
                }
            } finally {
                is.close();
            }
        }
    }
}