net.yacy.http.ProxyHandler.java Source code

Java tutorial

Introduction

Here is the source code for net.yacy.http.ProxyHandler.java

Source

//
//  ProxyHandler
//  Copyright 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
//  Copyright 2011 by Florian Richter
//  First released 2011 at http://yacy.net
//  
//  $LastChangedDate$
//  $LastChangedRevision$
//  $LastChangedBy$
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//  
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//  
//  You should have received a copy of the GNU Lesser General Public License
//  along with this program in the file lgpl21.txt
//  If not, see <http://www.gnu.org/licenses/>.
//

package net.yacy.http;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.SocketException;
import java.util.Date;
import java.util.Enumeration;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.crawler.data.Cache;
import net.yacy.crawler.retrieval.Response;
import net.yacy.document.TextParser;
import net.yacy.server.http.HTTPDProxyHandler;
import net.yacy.server.http.MultiOutputStream;

import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.eclipse.jetty.server.Handler;
import org.eclipse.jetty.server.Request;

/**
 * jetty http handler
 * proxies request, caches responses and adds urls to crawler
 */
public class ProxyHandler extends AbstractRemoteHandler implements Handler {

    protected int timeout = 10000;

    @Override
    protected void doStart() throws Exception {
        super.doStart();
        timeout = sb.getConfigInt("proxy.clientTimeout", 10000);
    }

    private void convertHeaderToJetty(HttpResponse in, HttpServletResponse out) {
        for (Header h : in.getAllHeaders()) {
            out.addHeader(h.getName(), h.getValue());
        }
    }

    private void cleanResponseHeader(HttpResponse headers) {
        headers.removeHeaders(HeaderFramework.CONTENT_ENCODING);
        headers.removeHeaders(HeaderFramework.CONTENT_LENGTH);
    }

    private void deleteFromCache(final byte[] hash) {
        // long size = -1;
        ResponseHeader rh = Cache.getResponseHeader(hash);
        if (rh != null) {
            // delete the cache
            // if ((size = rh.getContentLength()) == 0) {
            //    byte[] b = Cache.getContent(hash);
            //    if (b != null) size = b.length;
            // }
            try {
                Cache.delete(hash);
            } catch (final IOException e) {
                // log refresh miss 
                HTTPDProxyHandler.proxyLog.fine(e.getMessage());
            }
        }
    }

    private void storeToCache(final Response yacyResponse, final byte[] cacheArray) {
        final Thread t = new Thread() {
            @Override
            public void run() {
                if (yacyResponse == null)
                    return;
                this.setName("ProxyHandler.storeToCache(" + yacyResponse.url().toNormalform(true) + ")");

                // the cache does either not exist or is (supposed to be) stale
                deleteFromCache(yacyResponse.url().hash());

                if (cacheArray == null || cacheArray.length <= 0)
                    return;

                yacyResponse.setContent(cacheArray);
                try {
                    Cache.store(yacyResponse.url(), yacyResponse.getResponseHeader(), cacheArray);
                    sb.toIndexer(yacyResponse);
                } catch (IOException e) {
                    //log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e);
                }
            }
        };
        t.setPriority(Thread.MIN_PRIORITY);
        t.start();
    }

    @Override
    public void handleRemote(String target, Request baseRequest, HttpServletRequest request,
            HttpServletResponse response) throws IOException, ServletException {

        sb.proxyLastAccess = System.currentTimeMillis();

        RequestHeader proxyHeaders = ProxyHandler.convertHeaderFromJetty(request);
        setProxyHeaderForClient(request, proxyHeaders);

        final HTTPClient client = new HTTPClient(ClientIdentification.yacyProxyAgent);
        client.setTimout(timeout);
        client.setHeader(proxyHeaders.entrySet());
        client.setRedirecting(false);
        // send request
        try {
            String queryString = request.getQueryString() != null ? "?" + request.getQueryString() : "";
            DigestURL digestURI = new DigestURL(request.getScheme(), request.getServerName(),
                    request.getServerPort(), request.getRequestURI() + queryString);
            if (request.getMethod().equals(HeaderFramework.METHOD_GET)) {
                client.GET(digestURI, false);
            } else if (request.getMethod().equals(HeaderFramework.METHOD_POST)) {
                client.POST(digestURI, request.getInputStream(), request.getContentLength(), false);
            } else if (request.getMethod().equals(HeaderFramework.METHOD_HEAD)) {
                client.HEADResponse(digestURI, false);
            } else {
                throw new ServletException("Unsupported Request Method");
            }
            HttpResponse clientresponse = client.getHttpResponse();
            int statusCode = clientresponse.getStatusLine().getStatusCode();
            final ResponseHeader responseHeaderLegacy = new ResponseHeader(statusCode,
                    clientresponse.getAllHeaders());

            if (responseHeaderLegacy.isEmpty()) {
                throw new SocketException(clientresponse.getStatusLine().toString());
            }
            cleanResponseHeader(clientresponse);

            // reserver cache entry
            final net.yacy.crawler.retrieval.Request yacyRequest = new net.yacy.crawler.retrieval.Request(null,
                    digestURI, null, //requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(), 
                    "", responseHeaderLegacy.lastModified(), sb.crawler.defaultProxyProfile.handle(), 0,
                    sb.crawler.defaultProxyProfile.timezoneOffset()); //sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete);
            final Response yacyResponse = new Response(yacyRequest, null, responseHeaderLegacy,
                    sb.crawler.defaultProxyProfile, false, null);

            final String storeError = yacyResponse.shallStoreCacheForProxy();
            final boolean storeHTCache = yacyResponse.profile().storeHTCache();
            final String supportError = TextParser.supports(yacyResponse.url(), yacyResponse.getMimeType());

            if (
            /*
             * Now we store the response into the htcache directory if
             * a) the response is cacheable AND
             */
            (storeError == null) &&
            /*
             * b) the user has configured to use the htcache OR
             * c) the content should be indexed
             */
                    ((storeHTCache) || (supportError != null))) {
                // we don't write actually into a file, only to RAM, and schedule writing the file.
                int l = responseHeaderLegacy.size();
                final ByteArrayOutputStream byteStream = new ByteArrayOutputStream((l < 32) ? 32 : l);
                final OutputStream toClientAndMemory = new MultiOutputStream(
                        new OutputStream[] { response.getOutputStream(), byteStream });
                convertHeaderToJetty(clientresponse, response);
                response.setStatus(statusCode);
                client.writeTo(toClientAndMemory);

                // cached bytes
                storeToCache(yacyResponse, byteStream.toByteArray());
            } else {
                // no caching
                /*if (log.isFine()) log.logFine(reqID +" "+ url.toString() + " not cached." +
                 " StoreError=" + ((storeError==null)?"None":storeError) +
                 " StoreHTCache=" + storeHTCache +
                 " SupportError=" + supportError);*/
                convertHeaderToJetty(clientresponse, response);
                response.setStatus(statusCode);

                if (statusCode == HttpServletResponse.SC_OK) { // continue to serve header to client e.g. HttpStatus = 302 (while skiping content)
                    client.writeTo(response.getOutputStream()); // may throw exception on httpStatus=302 while gzip encoded inputstream
                }

            }
        } catch (final SocketException se) {
            throw new ServletException("Socket Exception: " + se.getMessage());
        } finally {
            client.finish();
        }

        // we handled this request, break out of handler chain
        logProxyAccess(request);
        baseRequest.setHandled(true);
    }

    /**
     * Convert ServletRequest header to modifiable YaCy RequestHeader
     *
     * @param request ServletRequest
     * @return RequestHeader created from ServletRequest
     */
    public static RequestHeader convertHeaderFromJetty(HttpServletRequest request) {
        RequestHeader result = new RequestHeader();
        Enumeration<String> headerNames = request.getHeaderNames();
        while (headerNames.hasMoreElements()) {
            String headerName = headerNames.nextElement();
            Enumeration<String> headers = request.getHeaders(headerName);
            while (headers.hasMoreElements()) {
                String header = headers.nextElement();
                result.add(headerName, header);
            }
        }
        return result;
    }

    /**
     * adds specific header elements for the connection of the internal
     * httpclient to the remote server according to local config
     *
     * @param header header for http client (already preset with headers from
     * original ServletRequest)
     * @param origServletRequest original request/header
     */
    private void setProxyHeaderForClient(final HttpServletRequest origServletRequest,
            final HeaderFramework header) {

        header.remove(RequestHeader.KEEP_ALIVE);
        header.remove(HeaderFramework.CONTENT_LENGTH);

        // setting the X-Forwarded-For header
        if (sb.getConfigBool("proxy.sendXForwardedForHeader", true)) {
            String ip = origServletRequest.getRemoteAddr();
            if (!Domains.isThisHostIP(ip)) { // if originator is local host no user ip to forward (= request from localhost)
                header.put(HeaderFramework.X_FORWARDED_FOR, origServletRequest.getRemoteAddr());
            }
        }

        String httpVersion = origServletRequest.getProtocol();
        HTTPDProxyHandler.modifyProxyHeaders(header, httpVersion);
    }

    public final static synchronized void logProxyAccess(HttpServletRequest request) {

        final StringBuilder logMessage = new StringBuilder(80);

        // Timestamp
        logMessage.append(GenericFormatter.SHORT_SECOND_FORMATTER.format(new Date()));
        logMessage.append(' ');

        // Remote Host
        final String clientIP = request.getRemoteAddr();
        logMessage.append(clientIP);
        logMessage.append(' ');

        // Method
        final String requestMethod = request.getMethod();
        logMessage.append(requestMethod);
        logMessage.append(' ');

        // URL
        logMessage.append(request.getRequestURL());
        final String requestArgs = request.getQueryString();
        if (requestArgs != null) {
            logMessage.append("?").append(requestArgs);
        }

        HTTPDProxyHandler.proxyLog.fine(logMessage.toString());

    }
}