org.apache.manifoldcf.crawler.connectors.rss.ThrottledFetcher.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.manifoldcf.crawler.connectors.rss.ThrottledFetcher.java

Source

/* $Id: ThrottledFetcher.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.connectors.rss;

import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.connectorcommon.interfaces.*;
import org.apache.manifoldcf.connectorcommon.common.XThreadInputStream;
import org.apache.manifoldcf.connectorcommon.common.InterruptibleSocketFactory;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import org.apache.manifoldcf.crawler.system.Logging;
import org.apache.manifoldcf.crawler.system.ManifoldCF;
import java.util.*;
import java.io.*;
import java.net.*;

import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.client.HttpClient;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.protocol.HttpRequestExecutor;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.config.SocketConfig;
import org.apache.http.conn.ssl.BrowserCompatHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.NTCredentials;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.util.EntityUtils;
import org.apache.http.HttpStatus;
import org.apache.http.HttpHost;
import org.apache.http.Header;
import org.apache.http.message.BasicHeader;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.protocol.HttpContext;

import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.client.CircularRedirectException;
import org.apache.http.NoHttpResponseException;
import org.apache.http.HttpException;

/** This class uses httpclient to fetch stuff from webservers.  However, it additionally controls the fetch
* rate in two ways: first, controlling the overall bandwidth used per server, and second, limiting the number
* of simultaneous open connections per server.  It's also capable of limiting the maximum number of fetches
* per time period per server as well; however, this functionality is not strictly necessary at this time because
* the CF scheduler does that at a higher layer.
* An instance of this class would very probably need to have a lifetime consistent with the long-term nature
* of these values, and be static.
* This class sets up a different Http connection pool for each server, so that we can foist off onto the httpclient
* library the task of limiting the number of connections.  This means that we need periodic polling to determine
* when idle pooled connections can be freed.
*/
public class ThrottledFetcher {
    public static final String _rcsid = "@(#)$Id: ThrottledFetcher.java 988245 2010-08-23 18:39:35Z kwright $";

    /** This flag determines whether we record everything to the disk, as a means of doing a web snapshot */
    protected static final boolean recordEverything = false;

    /** The read chunk length */
    protected static final int READ_CHUNK_LENGTH = 4096;

    /** This counter keeps track of the total outstanding handles across everything, because we do try to control that */
    protected static int globalHandleCount = 0;
    /** This is the lock object for that global handle counter */
    protected static Integer globalHandleCounterLock = new Integer(0);

    /** This hash maps the server string (without port) to a pool throttling object, where
    * we can track the statistics and make sure we throttle appropriately */
    protected final Map<String, IConnectionThrottler> serverMap = new HashMap<String, IConnectionThrottler>();

    /** Reference count for how many connections to this pool there are */
    protected int refCount = 0;

    // Current host name
    private static String currentHost = null;
    static {
        // Find the current host name
        try {
            java.net.InetAddress addr = java.net.InetAddress.getLocalHost();

            // Get hostname
            currentHost = addr.getHostName();
        } catch (java.net.UnknownHostException e) {
        }
    }

    /** Note that we're about to need a handle (and make sure we have enough) */
    protected static void registerGlobalHandle(int maxHandles) throws ManifoldCFException {
        try {
            synchronized (globalHandleCounterLock) {
                while (globalHandleCount >= maxHandles) {
                    globalHandleCounterLock.wait();
                }
                globalHandleCount++;
            }
        } catch (InterruptedException e) {
            throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
        }
    }

    /** Note that we're done with a handle (so we can free it) */
    protected static void releaseGlobalHandle() {
        synchronized (globalHandleCounterLock) {
            globalHandleCount--;
            globalHandleCounterLock.notifyAll();
        }
    }

    /** Constructor.
    */
    public ThrottledFetcher() {
    }

    /** Establish a connection to a specified URL.
    * @param serverName is the FQDN of the server, e.g. foo.metacarta.com
    * @param connectionLimit is the maximum desired outstanding connections at any one time.
    * @param connectionTimeoutMilliseconds is the number of milliseconds to wait for the connection before timing out.
    */
    public synchronized IThrottledConnection createConnection(IThreadContext threadContext,
            String throttleGroupName, String serverName, int connectionLimit, int connectionTimeoutMilliseconds,
            String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername,
            String proxyAuthPassword, IAbortActivity activities) throws ManifoldCFException, ServiceInterruption {
        IConnectionThrottler server;
        server = serverMap.get(serverName);
        if (server == null) {
            // Create a connection throttler for this server
            IThrottleGroups tg = ThrottleGroupsFactory.make(threadContext);
            server = tg.obtainConnectionThrottler(RSSConnector.rssThrottleGroupType, throttleGroupName,
                    new String[] { serverName });
            serverMap.put(serverName, server);
        }

        return new ThrottledConnection(serverName, server, connectionTimeoutMilliseconds, connectionLimit,
                proxyHost, proxyPort, proxyAuthDomain, proxyAuthUsername, proxyAuthPassword, activities);
    }

    /** Poll.  This method is designed to allow idle connections to be closed and freed.
    */
    public synchronized void poll() throws ManifoldCFException {
        // Nothing needed now; connections are released when we're done with them.
    }

    /** Note that there is a repository connection that is using this object. */
    public synchronized void noteConnectionEstablished() {
        refCount++;
    }

    /** Connection pool no longer needed.  Call this to indicate that this object no
    * longer needs to keep its pools available, for the moment.
    */
    public synchronized void noteConnectionReleased() {
        refCount--;
        if (refCount == 0) {
            // Since we don't have any actual pools here, this can be a no-op for now
            // MHL
            serverMap.clear();
        }
    }

    /** This class represents an established connection to a URL.
    */
    protected static class ThrottledConnection implements IThrottledConnection {
        /** The server fqdn */
        protected final String serverName;
        /** The throttling object we use to track connections */
        protected final IConnectionThrottler connectionThrottler;
        /** The throttling object we use to track fetches */
        protected final IFetchThrottler fetchThrottler;
        /** Connection timeout in milliseconds */
        protected final int connectionTimeoutMilliseconds;
        /** The client connection manager */
        protected final HttpClientConnectionManager connectionManager;
        /** The httpclient */
        protected final HttpClient httpClient;

        /** The method object */
        protected HttpRequestBase executeMethod = null;
        /** The start-fetch time */
        protected long startFetchTime = -1L;
        /** The error trace, if any */
        protected Throwable throwable = null;
        /** The current URL being fetched */
        protected String myUrl = null;
        /** The status code fetched, if any */
        protected int statusCode = FETCH_NOT_TRIED;
        /** The kind of fetch we are doing */
        protected String fetchType = null;
        /** The current bytes in the current fetch */
        protected long fetchCounter = 0L;

        /** The thread that is actually doing the work */
        protected ExecuteMethodThread methodThread = null;
        /** Set if thread has been started */
        protected boolean threadStarted = false;

        /** Abort checker */
        protected final AbortChecker abortChecker;

        /** Constructor.
        */
        public ThrottledConnection(String serverName, IConnectionThrottler connectionThrottler,
                int connectionTimeoutMilliseconds, int connectionLimit, String proxyHost, int proxyPort,
                String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword,
                IAbortActivity activities) throws ManifoldCFException, ServiceInterruption {
            this.serverName = serverName;
            this.connectionThrottler = connectionThrottler;
            this.connectionTimeoutMilliseconds = connectionTimeoutMilliseconds;
            this.abortChecker = new AbortChecker(activities);

            // Create the https scheme for this connection
            javax.net.ssl.SSLSocketFactory httpsSocketFactory = KeystoreManagerFactory
                    .getTrustingSecureSocketFactory();
            ;
            SSLConnectionSocketFactory myFactory = new SSLConnectionSocketFactory(
                    new InterruptibleSocketFactory(httpsSocketFactory, connectionTimeoutMilliseconds),
                    SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);

            connectionManager = new PoolingHttpClientConnectionManager();

            CredentialsProvider credentialsProvider = new BasicCredentialsProvider();

            RequestConfig.Builder requestBuilder = RequestConfig.custom().setCircularRedirectsAllowed(true)
                    .setSocketTimeout(connectionTimeoutMilliseconds).setStaleConnectionCheckEnabled(true)
                    .setExpectContinueEnabled(true).setConnectTimeout(connectionTimeoutMilliseconds)
                    .setConnectionRequestTimeout(connectionTimeoutMilliseconds);

            // If there's a proxy, set that too.
            if (proxyHost != null && proxyHost.length() > 0) {

                // Configure proxy authentication
                if (proxyAuthUsername != null && proxyAuthUsername.length() > 0) {
                    if (proxyAuthPassword == null)
                        proxyAuthPassword = "";
                    if (proxyAuthDomain == null)
                        proxyAuthDomain = "";

                    credentialsProvider.setCredentials(new AuthScope(proxyHost, proxyPort),
                            new NTCredentials(proxyAuthUsername, proxyAuthPassword, currentHost, proxyAuthDomain));
                }

                HttpHost proxy = new HttpHost(proxyHost, proxyPort);

                requestBuilder.setProxy(proxy);
            }

            httpClient = HttpClients.custom().setConnectionManager(connectionManager).setMaxConnTotal(1)
                    .disableAutomaticRetries().setDefaultRequestConfig(requestBuilder.build())
                    .setDefaultSocketConfig(SocketConfig.custom().setTcpNoDelay(true)
                            .setSoTimeout(connectionTimeoutMilliseconds).build())
                    .setDefaultCredentialsProvider(credentialsProvider).setSSLSocketFactory(myFactory)
                    .setRequestExecutor(new HttpRequestExecutor(connectionTimeoutMilliseconds))
                    .setRedirectStrategy(new DefaultRedirectStrategy()).build();

            registerGlobalHandle(connectionLimit);
            try {
                int result = connectionThrottler.waitConnectionAvailable(abortChecker);
                if (result != IConnectionThrottler.CONNECTION_FROM_CREATION)
                    throw new IllegalStateException(
                            "Got back unexpected value from waitForAConnection() of " + result);
            } catch (InterruptedException e) {
                throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
            } catch (BreakException e) {
                abortChecker.rethrowExceptions();
            }
            fetchThrottler = connectionThrottler.getNewConnectionFetchThrottler();
        }

        /** Begin the fetch process.
        * @param fetchType is a short descriptive string describing the kind of fetch being requested.  This
        *        is used solely for logging purposes.
        */
        @Override
        public void beginFetch(String fetchType) throws ManifoldCFException, ServiceInterruption {
            this.fetchType = fetchType;
            fetchCounter = 0L;
            try {
                if (fetchThrottler.obtainFetchDocumentPermission(abortChecker) == false)
                    throw new IllegalStateException("obtainFetchDocumentPermission() had unexpected return value");
            } catch (InterruptedException e) {
                throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED);
            } catch (BreakException e) {
                abortChecker.rethrowExceptions();
            }
            threadStarted = false;
        }

        /** Log the fetch of a number of bytes. */
        public void logFetchCount(int count) {
            fetchCounter += (long) count;
        }

        /** Execute the fetch and get the return code.  This method uses the
        * standard logging mechanism to keep track of the fetch attempt.  It also
        * signals the following three conditions: ServiceInterruption (if a dynamic
        * error occurs), OK, or a static error code (for a condition where retry is
        * not likely to be helpful).  The actual HTTP error code is NOT returned by
        * this method.
        * @param protocol is the protocol to use to perform the access, e.g. "http"
        * @param port is the port to use to perform the access, where -1 means "use the default"
        * @param urlPath is the path part of the url, e.g. "/robots.txt"
        * @param userAgent is the value of the userAgent header to use.
        * @param from is the value of the from header to use.
        * @param proxyHost is the proxy host, or null if none.
        * @param proxyPort is the proxy port, or -1 if none.
        * @param proxyAuthDomain is the proxy authentication domain, or null.
        * @param proxyAuthUsername is the proxy authentication user name, or null.
        * @param proxyAuthPassword is the proxy authentication password, or null.
        * @param lastETag is the requested lastETag header value.
        * @param lastModified is the requested lastModified header value.
        * @return the status code: success, static error, or dynamic error.
        */
        @Override
        public int executeFetch(String protocol, int port, String urlPath, String userAgent, String from,
                String lastETag, String lastModified) throws ManifoldCFException, ServiceInterruption {

            StringBuilder sb = new StringBuilder(protocol);
            sb.append("://").append(serverName);
            if (port != -1)
                sb.append(":").append(Integer.toString(port));
            sb.append(urlPath);
            myUrl = sb.toString();

            // Create the get method
            executeMethod = new HttpGet(myUrl);

            startFetchTime = System.currentTimeMillis();

            // Set all appropriate headers
            executeMethod.setHeader(new BasicHeader("User-Agent", userAgent));
            executeMethod.setHeader(new BasicHeader("From", from));
            executeMethod.setHeader(new BasicHeader("Accept", "*/*"));

            if (lastETag != null)
                executeMethod.setHeader(new BasicHeader("ETag", lastETag));
            if (lastModified != null)
                executeMethod.setHeader(new BasicHeader("Last-Modified", lastModified));
            // Create the execution thread.
            methodThread = new ExecuteMethodThread(this, fetchThrottler, httpClient, executeMethod);
            // Start the method thread, which will start the transaction
            try {
                methodThread.start();
                threadStarted = true;
                // We want to wait until at least the execution has fired, and then figure out where we
                // stand
                try {
                    statusCode = methodThread.getResponseCode();
                    long currentTime;
                    switch (statusCode) {
                    case HttpStatus.SC_OK:
                        return STATUS_OK;
                    case HttpStatus.SC_UNAUTHORIZED:
                    case HttpStatus.SC_USE_PROXY:
                        // Permanent errors that mean, "fetch not allowed"
                        return STATUS_SITEERROR;
                    case HttpStatus.SC_REQUEST_TIMEOUT:
                    case HttpStatus.SC_GATEWAY_TIMEOUT:
                    case HttpStatus.SC_SERVICE_UNAVAILABLE:
                        // Temporary service interruption
                        // May want to make the retry time a parameter someday
                        currentTime = System.currentTimeMillis();
                        throw new ServiceInterruption(
                                "Http response temporary error on '" + myUrl + "': " + Integer.toString(statusCode),
                                null, currentTime + 60L * 60000L, currentTime + 1440L * 60000L, -1, false);
                    case HttpStatus.SC_NOT_MODIFIED:
                        return STATUS_NOCHANGE;
                    case HttpStatus.SC_INTERNAL_SERVER_ERROR:
                        // Fail for a while, but give up after 24 hours
                        currentTime = System.currentTimeMillis();
                        throw new ServiceInterruption(
                                "Http response internal server error on '" + myUrl + "': "
                                        + Integer.toString(statusCode),
                                null, currentTime + 60L * 60000L, currentTime + 1440L * 60000L, -1, false);
                    case HttpStatus.SC_GONE:
                    case HttpStatus.SC_NOT_FOUND:
                    case HttpStatus.SC_BAD_GATEWAY:
                    case HttpStatus.SC_BAD_REQUEST:
                    default:
                        return STATUS_PAGEERROR;
                    }
                } catch (InterruptedException e) {
                    methodThread.interrupt();
                    methodThread = null;
                    threadStarted = false;
                    throw e;
                }

            } catch (InterruptedException e) {
                // Drop the current connection on the floor, so it cannot be reused.
                executeMethod = null;
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
            } catch (java.net.MalformedURLException e) {
                throwable = new ManifoldCFException("Illegal URI: '" + myUrl + "'", e);
                statusCode = FETCH_BAD_URI;
                return STATUS_PAGEERROR;
            } catch (java.net.SocketTimeoutException e) {
                throwable = e;
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for IO for '" + myUrl + "': " + e.getMessage(), e,
                        currentTime + 300000L, currentTime + 120L * 60000L, -1, false);
            } catch (ConnectTimeoutException e) {
                throwable = e;
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption(
                        "Timed out waiting for connect for '" + myUrl + "': " + e.getMessage(), e,
                        currentTime + 60L * 60000L, currentTime + 720L * 60000L, -1, false);
            } catch (InterruptedIOException e) {
                throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED);
            } catch (CircularRedirectException e) {
                throwable = e;
                statusCode = FETCH_CIRCULAR_REDIRECT;
                return STATUS_PAGEERROR;
            } catch (NoHttpResponseException e) {
                throwable = e;
                // Give up after 2 hours.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for response for '" + myUrl + "'", e,
                        currentTime + 15L * 60000L, currentTime + 120L * 60000L, -1, false);
            } catch (java.net.ConnectException e) {
                throwable = e;
                // Give up after 6 hours.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for a connection for '" + myUrl + "'", e,
                        currentTime + 1000000L, currentTime + 720L * 60000L, -1, false);
            } catch (java.net.NoRouteToHostException e) {
                // This exception means we know the IP address but can't get there.  That's either a firewall issue, or it's something transient
                // with the network.  Some degree of retry is probably wise.
                throwable = e;
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("No route to host for '" + myUrl + "'", e, currentTime + 1000000L,
                        currentTime + 720L * 60000L, -1, false);
            } catch (HttpException e) {
                throwable = e;
                statusCode = FETCH_IO_ERROR;
                return STATUS_PAGEERROR;
            } catch (IOException e) {
                // Treat this as a bad url.  We don't know what happened, but it isn't something we are going to naively
                // retry on.
                throwable = e;
                statusCode = FETCH_IO_ERROR;
                return STATUS_PAGEERROR;
            } catch (Throwable e) {
                Logging.connectors.debug("RSS: Caught an unexpected exception: " + e.getMessage(), e);
                throwable = e;
                statusCode = FETCH_UNKNOWN_ERROR;
                return STATUS_PAGEERROR;
            }
        }

        /** Get the http response code.
        *@return the response code.  This is either an HTTP response code, or one of the codes above.
        */
        @Override
        public int getResponseCode() throws ManifoldCFException, ServiceInterruption {
            return statusCode;
        }

        /** Get the response input stream.  It is the responsibility of the caller
        * to close this stream when done.
        */
        @Override
        public InputStream getResponseBodyStream() throws ManifoldCFException, ServiceInterruption {
            if (executeMethod == null)
                throw new ManifoldCFException("Attempt to get an input stream when there is no method");
            if (methodThread == null || threadStarted == false)
                throw new ManifoldCFException("Attempt to get an input stream when no method thread");
            try {
                return methodThread.getSafeInputStream();
            } catch (InterruptedException e) {
                methodThread.interrupt();
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
            } catch (java.net.SocketTimeoutException e) {
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for IO for '" + myUrl + "': " + e.getMessage(), e,
                        currentTime + 300000L, currentTime + 120L * 60000L, -1, false);
            } catch (ConnectTimeoutException e) {
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption(
                        "Timed out waiting for connect for '" + myUrl + "': " + e.getMessage(), e,
                        currentTime + 60L * 60000L, currentTime + 720L * 60000L, -1, false);
            } catch (InterruptedIOException e) {
                methodThread.interrupt();
                throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED);
            } catch (NoHttpResponseException e) {
                // Give up after 2 hours.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for response for '" + myUrl + "'", e,
                        currentTime + 15L * 60000L, currentTime + 120L * 60000L, -1, false);
            } catch (java.net.ConnectException e) {
                // Give up after 6 hours.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for a stream connection for '" + myUrl + "'", e,
                        currentTime + 1000000L, currentTime + 720L * 60000L, -1, false);
            } catch (java.net.NoRouteToHostException e) {
                // This exception means we know the IP address but can't get there.  That's either a firewall issue, or it's something transient
                // with the network.  Some degree of retry is probably wise.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("No route to host for '" + myUrl + "'", e, currentTime + 1000000L,
                        currentTime + 720L * 60000L, -1, false);
            } catch (HttpException e) {
                throw new ManifoldCFException("Http exception reading stream: " + e.getMessage(), e);
            } catch (IOException e) {
                throw new ManifoldCFException("I/O exception reading stream: " + e.getMessage(), e);
            }
        }

        /** Get a specified response header, if it exists.
        *@param headerName is the name of the header.
        *@return the header value, or null if it doesn't exist.
        */
        @Override
        public String getResponseHeader(String headerName) throws ManifoldCFException, ServiceInterruption {
            if (executeMethod == null)
                throw new ManifoldCFException("Attempt to get a header when there is no method");
            if (methodThread == null || threadStarted == false)
                throw new ManifoldCFException("Attempt to get a header when no method thread");
            try {
                return methodThread.getFirstHeader(headerName);
            } catch (InterruptedException e) {
                methodThread.interrupt();
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
            } catch (java.net.SocketTimeoutException e) {
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for IO for '" + myUrl + "': " + e.getMessage(), e,
                        currentTime + 300000L, currentTime + 120L * 60000L, -1, false);
            } catch (ConnectTimeoutException e) {
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption(
                        "Timed out waiting for connect for '" + myUrl + "': " + e.getMessage(), e,
                        currentTime + 60L * 60000L, currentTime + 720L * 60000L, -1, false);
            } catch (InterruptedIOException e) {
                methodThread.interrupt();
                throw new ManifoldCFException("Interrupted", ManifoldCFException.INTERRUPTED);
            } catch (NoHttpResponseException e) {
                // Give up after 2 hours.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for response for '" + myUrl + "'", e,
                        currentTime + 15L * 60000L, currentTime + 120L * 60000L, -1, false);
            } catch (java.net.ConnectException e) {
                // Give up after 6 hours.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("Timed out waiting for a connection for '" + myUrl + "'", e,
                        currentTime + 1000000L, currentTime + 720L * 60000L, -1, false);
            } catch (java.net.NoRouteToHostException e) {
                // This exception means we know the IP address but can't get there.  That's either a firewall issue, or it's something transient
                // with the network.  Some degree of retry is probably wise.
                long currentTime = System.currentTimeMillis();
                throw new ServiceInterruption("No route to host for '" + myUrl + "'", e, currentTime + 1000000L,
                        currentTime + 720L * 60000L, -1, false);
            } catch (HttpException e) {
                throw new ManifoldCFException("Http exception reading response: " + e.getMessage(), e);
            } catch (IOException e) {
                throw new ManifoldCFException("I/O exception reading response: " + e.getMessage(), e);
            }
        }

        /** Done with the fetch.  Call this when the fetch has been completed.  A log entry will be generated
        * describing what was done.
        */
        @Override
        public void doneFetch(IProcessActivity activities) throws ManifoldCFException {

            if (fetchType != null) {
                if (methodThread != null && threadStarted)
                    methodThread.abort();
                long endTime = System.currentTimeMillis();

                activities.recordActivity(new Long(startFetchTime), RSSConnector.ACTIVITY_FETCH,
                        new Long(fetchCounter), myUrl, Integer.toString(statusCode),
                        (throwable == null) ? null : throwable.getMessage(), null);

                Logging.connectors.info(
                        "RSS: FETCH " + fetchType + "|" + myUrl + "|" + new Long(startFetchTime).toString() + "+"
                                + new Long(endTime - startFetchTime).toString() + "|" + Integer.toString(statusCode)
                                + "|" + new Long(fetchCounter).toString() + "|" + ((throwable == null) ? ""
                                        : (throwable.getClass().getName() + "| " + throwable.getMessage())));
                if (throwable != null) {
                    if (Logging.connectors.isDebugEnabled())
                        Logging.connectors.debug("RSS: Fetch exception for '" + myUrl + "'", throwable);
                }

                // Shut down (join) the connection thread, if any, and if it started
                if (methodThread != null) {
                    if (threadStarted) {
                        try {
                            methodThread.finishUp();
                        } catch (InterruptedException e) {
                            throw new ManifoldCFException(e.getMessage(), e, ManifoldCFException.INTERRUPTED);
                        }
                        threadStarted = false;
                    }
                    methodThread = null;
                }

                executeMethod = null;
                throwable = null;
                startFetchTime = -1L;
                myUrl = null;
                statusCode = -1;
                fetchType = null;
            }
        }

        /** Close the connection.  Call this to end this server connection.
        */
        @Override
        public void close() throws ManifoldCFException {
            // Clean up the connection pool.  This should do the necessary bookkeeping to release the one connection that's sitting there.
            connectionManager.shutdown();
            connectionThrottler.noteConnectionDestroyed();
            releaseGlobalHandle();
        }

    }

    /** This class throttles an input stream based on the specified byte rate parameters.  The
    * throttling takes place across all streams that are open to the server in question.
    */
    protected static class ThrottledInputstream extends InputStream {
        /** Throttled connection */
        protected final ThrottledConnection throttledConnection;
        /** Stream throttler */
        protected final IStreamThrottler streamThrottler;
        /** The stream we are wrapping. */
        protected final InputStream inputStream;

        /** Constructor.
        */
        public ThrottledInputstream(ThrottledConnection throttledConnection, IStreamThrottler streamThrottler,
                InputStream is) {
            this.throttledConnection = throttledConnection;
            this.streamThrottler = streamThrottler;
            this.inputStream = is;
        }

        /** Read a byte.
        */
        public int read() throws IOException {
            byte[] byteArray = new byte[1];
            int count = read(byteArray, 0, 1);
            if (count == -1)
                return count;
            return (int) byteArray[0];
        }

        /** Read lots of bytes.
        */
        public int read(byte[] b) throws IOException {
            return read(b, 0, b.length);
        }

        /** Read lots of specific bytes.
        */
        public int read(byte[] b, int off, int len) throws IOException {
            int totalCount = 0;
            while (len > ThrottledFetcher.READ_CHUNK_LENGTH) {
                int amt = basicRead(b, off, ThrottledFetcher.READ_CHUNK_LENGTH, totalCount);
                if (amt == -1) {
                    if (totalCount == 0)
                        return amt;
                    return totalCount;
                }
                totalCount += amt;
                off += amt;
                len -= amt;
            }
            if (len > 0) {
                int amt = basicRead(b, off, len, totalCount);
                if (amt == -1) {
                    if (totalCount == 0)
                        return amt;
                    return totalCount;
                }
                return totalCount + amt;
            }
            return totalCount;
        }

        /** Basic read, which uses the server object to throttle activity.
        */
        protected int basicRead(byte[] b, int off, int len, int totalSoFar) throws IOException {
            try {
                if (streamThrottler.obtainReadPermission(len) == false)
                    throw new IllegalStateException("Throttler shut down while still active");
                int amt = 0;
                try {
                    amt = inputStream.read(b, off, len);
                    return amt;
                } finally {
                    if (amt == -1)
                        streamThrottler.releaseReadPermission(len, 0);
                    else {
                        streamThrottler.releaseReadPermission(len, amt);
                        throttledConnection.logFetchCount(amt);
                    }
                }
            } catch (InterruptedException e) {
                InterruptedIOException e2 = new InterruptedIOException("Interrupted");
                e2.bytesTransferred = totalSoFar;
                throw e2;
            }
        }

        /** Skip
        */
        public long skip(long n) throws IOException {
            // Not sure whether we should bother doing anything with this; it's not used.
            return inputStream.skip(n);
        }

        /** Get available.
        */
        public int available() throws IOException {
            return inputStream.available();
        }

        /** Mark.
        */
        public void mark(int readLimit) {
            inputStream.mark(readLimit);
        }

        /** Reset.
        */
        public void reset() throws IOException {
            inputStream.reset();
        }

        /** Check if mark is supported.
        */
        public boolean markSupported() {
            return inputStream.markSupported();
        }

        /** Close.
        */
        public void close() throws IOException {
            try {
                inputStream.close();
            } finally {
                streamThrottler.closeStream();
            }
        }

    }

    /** This thread does the actual socket communication with the server.
    * It's set up so that it can be abandoned at shutdown time.
    *
    * The way it works is as follows:
    * - it starts the transaction
    * - it receives the response, and saves that for the calling class to inspect
    * - it transfers the data part to an input stream provided to the calling class
    * - it shuts the connection down
    *
    * If there is an error, the sequence is aborted, and an exception is recorded
    * for the calling class to examine.
    *
    * The calling class basically accepts the sequence above.  It starts the
    * thread, and tries to get a response code.  If instead an exception is seen,
    * the exception is thrown up the stack.
    */
    protected static class ExecuteMethodThread extends Thread {
        /** The connection */
        protected final ThrottledConnection theConnection;
        /** The fetch throttler */
        protected final IFetchThrottler fetchThrottler;
        /** Client and method, all preconfigured */
        protected final HttpClient httpClient;
        protected final HttpRequestBase executeMethod;

        protected HttpResponse response = null;
        protected Throwable responseException = null;
        protected XThreadInputStream threadStream = null;
        protected InputStream bodyStream = null;
        protected boolean streamCreated = false;
        protected Throwable streamException = null;

        protected boolean abortThread = false;

        protected Throwable shutdownException = null;

        protected Throwable generalException = null;

        public ExecuteMethodThread(ThrottledConnection theConnection, IFetchThrottler fetchThrottler,
                HttpClient httpClient, HttpRequestBase executeMethod) {
            super();
            setDaemon(true);
            this.theConnection = theConnection;
            this.fetchThrottler = fetchThrottler;
            this.httpClient = httpClient;
            this.executeMethod = executeMethod;
        }

        public void run() {
            try {
                try {
                    // Call the execute method appropriately
                    synchronized (this) {
                        if (!abortThread) {
                            try {
                                response = httpClient.execute(executeMethod);
                            } catch (java.net.SocketTimeoutException e) {
                                responseException = e;
                            } catch (ConnectTimeoutException e) {
                                responseException = e;
                            } catch (InterruptedIOException e) {
                                throw e;
                            } catch (Throwable e) {
                                responseException = e;
                            }
                            this.notifyAll();
                        }
                    }

                    // Start the transfer of the content
                    if (responseException == null) {
                        synchronized (this) {
                            if (!abortThread) {
                                try {
                                    bodyStream = response.getEntity().getContent();
                                    if (bodyStream != null) {
                                        bodyStream = new ThrottledInputstream(theConnection,
                                                fetchThrottler.createFetchStream(), bodyStream);
                                        threadStream = new XThreadInputStream(bodyStream);
                                    }
                                    streamCreated = true;
                                } catch (java.net.SocketTimeoutException e) {
                                    streamException = e;
                                } catch (ConnectTimeoutException e) {
                                    streamException = e;
                                } catch (InterruptedIOException e) {
                                    throw e;
                                } catch (Throwable e) {
                                    streamException = e;
                                }
                                this.notifyAll();
                            }
                        }
                    }

                    if (responseException == null && streamException == null) {
                        if (threadStream != null) {
                            // Stuff the content until we are done
                            threadStream.stuffQueue();
                        }
                    }

                } finally {
                    if (bodyStream != null) {
                        try {
                            bodyStream.close();
                        } catch (IOException e) {
                        }
                        bodyStream = null;
                    }
                    synchronized (this) {
                        try {
                            executeMethod.abort();
                        } catch (Throwable e) {
                            shutdownException = e;
                        }
                        this.notifyAll();
                    }
                }
            } catch (Throwable e) {
                // We catch exceptions here that should ONLY be InterruptedExceptions, as a result of the thread being aborted.
                this.generalException = e;
            }
        }

        public int getResponseCode() throws InterruptedException, IOException, HttpException {
            // Must wait until the response object is there
            while (true) {
                synchronized (this) {
                    checkException(responseException);
                    if (response != null)
                        return response.getStatusLine().getStatusCode();
                    wait();
                }
            }
        }

        public String getFirstHeader(String headerName) throws InterruptedException, IOException, HttpException {
            // Must wait for the response object to appear
            while (true) {
                synchronized (this) {
                    checkException(responseException);
                    if (response != null) {
                        Header h = response.getFirstHeader(headerName);
                        if (h == null)
                            return null;
                        return h.getValue();
                    }
                    wait();
                }
            }
        }

        public InputStream getSafeInputStream() throws InterruptedException, IOException, HttpException {
            // Must wait until stream is created, or until we note an exception was thrown.
            while (true) {
                synchronized (this) {
                    if (responseException != null)
                        throw new IllegalStateException("Check for response before getting stream");
                    checkException(streamException);
                    if (streamCreated)
                        return threadStream;
                    wait();
                }
            }
        }

        public void abort() {
            // This will be called during the finally
            // block in the case where all is well (and
            // the stream completed) and in the case where
            // there were exceptions.
            synchronized (this) {
                if (streamCreated) {
                    if (threadStream != null)
                        threadStream.abort();
                }
                abortThread = true;
            }
        }

        public void finishUp() throws InterruptedException {
            join();
        }

        protected synchronized void checkException(Throwable exception) throws IOException, HttpException {
            if (exception != null) {
                // Throw the current exception, but clear it, so no further throwing is possible on the same problem.
                Throwable e = exception;
                if (e instanceof IOException)
                    throw (IOException) e;
                else if (e instanceof HttpException)
                    throw (HttpException) e;
                else if (e instanceof RuntimeException)
                    throw (RuntimeException) e;
                else if (e instanceof Error)
                    throw (Error) e;
                else
                    throw new RuntimeException("Unhandled exception of type: " + e.getClass().getName(), e);
            }
        }

    }

    /** This class furnishes an abort signal whenever the job activity says it should.
    * It should never be invoked from a background thread, only from a ManifoldCF thread.
    */
    protected static class AbortChecker implements IBreakCheck {
        protected final IAbortActivity activities;
        protected ServiceInterruption serviceInterruption = null;
        protected ManifoldCFException mcfException = null;

        public AbortChecker(IAbortActivity activities) {
            this.activities = activities;
        }

        @Override
        public long abortCheck() throws BreakException, InterruptedException {
            try {
                activities.checkJobStillActive();
                return 1000L;
            } catch (ServiceInterruption e) {
                serviceInterruption = e;
                throw new BreakException("Break requested: " + e.getMessage(), e);
            } catch (ManifoldCFException e) {
                if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                    throw new InterruptedException("Interrupted: " + e.getMessage());
                mcfException = e;
                throw new BreakException("Error during break check: " + e.getMessage(), e);
            }
        }

        public void rethrowExceptions() throws ManifoldCFException, ServiceInterruption {
            if (serviceInterruption != null)
                throw serviceInterruption;
            if (mcfException != null)
                throw mcfException;
        }
    }

}