com.rometools.fetcher.impl.HttpClientFeedFetcher.java Source code

Java tutorial

Introduction

Here is the source code for com.rometools.fetcher.impl.HttpClientFeedFetcher.java

Source

/*
 * Copyright 2004 Sun Microsystems, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package com.rometools.fetcher.impl;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Map;
import java.util.zip.GZIPInputStream;

import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpState;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.params.HttpMethodParams;

import com.rometools.fetcher.FetcherEvent;
import com.rometools.fetcher.FetcherException;
import com.rometools.rome.feed.synd.SyndFeed;
import com.rometools.rome.io.FeedException;
import com.rometools.rome.io.SyndFeedInput;
import com.rometools.rome.io.XmlReader;
import com.rometools.utils.IO;

/**
 * @author Nick Lothian
 */
public class HttpClientFeedFetcher extends AbstractFeedFetcher {

    private CredentialSupplier credentialSupplier;
    private FeedFetcherCache feedInfoCache;
    private volatile HttpClientMethodCallbackIntf httpClientMethodCallback;
    private volatile HttpClientParams httpClientParams;
    private Map<String, String> customRequestHeaders;

    public HttpClientFeedFetcher() {
        this(null, null);
    }

    public HttpClientFeedFetcher(final FeedFetcherCache cache) {
        this(cache, null);
    }

    public HttpClientFeedFetcher(final FeedFetcherCache cache, final CredentialSupplier credentialSupplier) {
        setHttpClientParams(new HttpClientParams());
        setFeedInfoCache(cache);
        setCredentialSupplier(credentialSupplier);
    }

    @Override
    public SyndFeed retrieveFeed(final URL url)
            throws IllegalArgumentException, IOException, FeedException, FetcherException {
        return this.retrieveFeed(getUserAgent(), url);
    }

    @Override
    public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl)
            throws IllegalArgumentException, IOException, FeedException, FetcherException {

        if (feedUrl == null) {
            throw new IllegalArgumentException("null is not a valid URL");
        }

        final HttpClient client = new HttpClient(httpClientParams);

        if (credentialSupplier != null) {

            final HttpClientParams params = client.getParams();
            params.setAuthenticationPreemptive(true);

            final String host = feedUrl.getHost();
            final Credentials credentials = credentialSupplier.getCredentials(null, host);
            if (credentials != null) {
                final AuthScope authScope = new AuthScope(host, -1);
                final HttpState state = client.getState();
                state.setCredentials(authScope, credentials);
            }

        }

        System.setProperty("httpclient.useragent", userAgent);

        final String urlStr = feedUrl.toString();
        final HttpMethod method = new GetMethod(urlStr);

        if (customRequestHeaders == null) {
            method.addRequestHeader("Accept-Encoding", "gzip");
            method.addRequestHeader("User-Agent", userAgent);

        } else {
            for (final Map.Entry<String, String> entry : customRequestHeaders.entrySet()) {
                method.addRequestHeader(entry.getKey(), entry.getValue());
            }
            if (!customRequestHeaders.containsKey("Accept-Encoding")) {
                method.addRequestHeader("Accept-Encoding", "gzip");
            }
            if (!customRequestHeaders.containsKey("User-Agent")) {
                method.addRequestHeader("User-Agent", userAgent);
            }
        }

        method.setFollowRedirects(true);

        if (httpClientMethodCallback != null) {
            synchronized (httpClientMethodCallback) {
                httpClientMethodCallback.afterHttpClientMethodCreate(method);
            }
        }

        final FeedFetcherCache cache = getFeedInfoCache();

        if (cache != null) {
            // retrieve feed
            try {

                if (isUsingDeltaEncoding()) {
                    method.setRequestHeader("A-IM", "feed");
                }

                // try to get the feed info from the cache
                SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);

                if (syndFeedInfo != null) {

                    method.setRequestHeader("If-None-Match", syndFeedInfo.getETag());

                    final Object lastModifiedHeader = syndFeedInfo.getLastModified();
                    if (lastModifiedHeader instanceof String) {
                        method.setRequestHeader("If-Modified-Since", (String) lastModifiedHeader);
                    }

                }

                final int statusCode = client.executeMethod(method);
                fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
                handleErrorCodes(statusCode);

                SyndFeed feed = getFeed(syndFeedInfo, urlStr, method, statusCode);

                syndFeedInfo = buildSyndFeedInfo(feedUrl, urlStr, method, feed, statusCode);

                cache.setFeedInfo(feedUrl, syndFeedInfo);

                // the feed may have been modified to pick up cached values
                // (eg - for delta encoding)
                feed = syndFeedInfo.getSyndFeed();

                return feed;

            } finally {

                method.releaseConnection();

            }

        } else {

            // cache is not in use
            try {

                final int statusCode = client.executeMethod(method);
                fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, urlStr);
                handleErrorCodes(statusCode);

                return getFeed(null, urlStr, method, statusCode);

            } finally {

                method.releaseConnection();

            }

        }

    }

    private SyndFeed getFeed(final SyndFeedInfo syndFeedInfo, final String urlStr, final HttpMethod method,
            final int statusCode) throws IOException, HttpException, FetcherException, FeedException {

        if (statusCode == HttpURLConnection.HTTP_NOT_MODIFIED && syndFeedInfo != null) {
            fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, urlStr);
            return syndFeedInfo.getSyndFeed();
        }

        final SyndFeed feed = retrieveFeed(urlStr, method);
        fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, urlStr, feed);
        return feed;
    }

    private SyndFeedInfo buildSyndFeedInfo(final URL feedUrl, final String urlStr, final HttpMethod method,
            SyndFeed feed, final int statusCode) throws MalformedURLException {

        SyndFeedInfo syndFeedInfo;
        syndFeedInfo = new SyndFeedInfo();

        // this may be different to feedURL because of 3XX redirects
        syndFeedInfo.setUrl(new URL(urlStr));
        syndFeedInfo.setId(feedUrl.toString());

        final Header imHeader = method.getResponseHeader("IM");
        if (imHeader != null && imHeader.getValue().contains("feed") && isUsingDeltaEncoding()) {

            final FeedFetcherCache cache = getFeedInfoCache();

            if (cache != null && statusCode == 226) {
                // client is setup to use http delta encoding and the server supports it and has
                // returned a delta encoded response. This response only includes new items
                final SyndFeedInfo cachedInfo = cache.getFeedInfo(feedUrl);

                if (cachedInfo != null) {
                    final SyndFeed cachedFeed = cachedInfo.getSyndFeed();

                    // set the new feed to be the orginal feed plus the new items
                    feed = combineFeeds(cachedFeed, feed);
                }
            }
        }

        final Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
        if (lastModifiedHeader != null) {
            syndFeedInfo.setLastModified(lastModifiedHeader.getValue());
        }

        final Header eTagHeader = method.getResponseHeader("ETag");
        if (eTagHeader != null) {
            syndFeedInfo.setETag(eTagHeader.getValue());
        }

        syndFeedInfo.setSyndFeed(feed);

        return syndFeedInfo;
    }

    private SyndFeed retrieveFeed(final String urlStr, final HttpMethod method)
            throws IOException, HttpException, FetcherException, FeedException {

        final Header contentEncodingHeader = method.getResponseHeader("Content-Encoding");

        final InputStream stream;
        if (contentEncodingHeader != null && "gzip".equalsIgnoreCase(contentEncodingHeader.getValue())) {
            stream = new GZIPInputStream(method.getResponseBodyAsStream());
        } else {
            stream = method.getResponseBodyAsStream();
        }

        try {

            final Header contentTypeHeader = method.getResponseHeader("Content-Type");

            final XmlReader reader;
            if (contentTypeHeader != null) {
                reader = new XmlReader(stream, contentTypeHeader.getValue(), true);
            } else {
                reader = new XmlReader(stream, true);
            }

            final SyndFeedInput syndFeedInput = new SyndFeedInput();
            syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());

            return syndFeedInput.build(reader);

        } finally {

            IO.close(stream);

        }

    }

    public synchronized void setRetryHandler(final HttpMethodRetryHandler handler) {
        httpClientParams.setParameter(HttpMethodParams.RETRY_HANDLER, handler);
    }

    /**
     * @param timeout Sets the connect timeout for the HttpClient but using the URLConnection method
     *            name. Uses the HttpClientParams method setConnectionManagerTimeout instead of
     *            setConnectTimeout
     *
     */
    public synchronized void setConnectTimeout(final int timeout) {
        httpClientParams.setConnectionManagerTimeout(timeout);
    }

    /**
     * @return The currently used connect timeout for the HttpClient but using the URLConnection
     *         method name. Uses the HttpClientParams method getConnectionManagerTimeout instead of
     *         getConnectTimeout
     *
     */
    public int getConnectTimeout() {
        return (int) getHttpClientParams().getConnectionManagerTimeout();
    }

    public synchronized void setCredentialSupplier(final CredentialSupplier credentialSupplier) {
        this.credentialSupplier = credentialSupplier;
    }

    public synchronized CredentialSupplier getCredentialSupplier() {
        return credentialSupplier;
    }

    public synchronized void setFeedInfoCache(final FeedFetcherCache feedInfoCache) {
        this.feedInfoCache = feedInfoCache;
    }

    public synchronized FeedFetcherCache getFeedInfoCache() {
        return feedInfoCache;
    }

    public synchronized void setHttpClientMethodCallback(
            final HttpClientMethodCallbackIntf httpClientMethodCallback) {
        this.httpClientMethodCallback = httpClientMethodCallback;
    }

    public HttpClientMethodCallbackIntf getHttpClientMethodCallback() {
        return httpClientMethodCallback;
    }

    public synchronized void setHttpClientParams(final HttpClientParams httpClientParams) {
        this.httpClientParams = httpClientParams;
    }

    public synchronized HttpClientParams getHttpClientParams() {
        return httpClientParams;
    }

    /**
     * @return The currently used read timeout for the URLConnection, 0 is unlimited, i.e. no
     *         timeout
     */
    public synchronized void setReadTimeout(final int timeout) {
        httpClientParams.setSoTimeout(timeout);
    }

    /**
     * @return timeout the read timeout for the URLConnection to a specified timeout, in
     *         milliseconds.
     */
    public int getReadTimeout() {
        return getHttpClientParams().getSoTimeout();
    }

    /**
     * Apply any request headers to the HTTP method call.
     *
     * @param customRequestHeaders
     */
    public synchronized void setCustomRequestHeaders(final Map<String, String> customRequestHeaders) {
        this.customRequestHeaders = customRequestHeaders;
    }

    public interface CredentialSupplier {
        public Credentials getCredentials(String realm, String host);
    }

    public interface HttpClientMethodCallbackIntf {

        /**
         * Allows access to the underlying HttpClient HttpMethod object. Note that in most cases,
         * method.setRequestHeader(String, String) is what you want to do (rather than
         * method.addRequestHeader(String, String))
         *
         * @param method
         */
        public void afterHttpClientMethodCreate(HttpMethod method);
    }

}