com.seajas.search.contender.http.HttpClientFeedFetcher.java Source code

Java tutorial

Introduction

Here is the source code for com.seajas.search.contender.http.HttpClientFeedFetcher.java

Source

/**
 * Copyright (C) 2013 Seajas, the Netherlands.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package com.seajas.search.contender.http;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Map;
import java.util.Map.Entry;
import java.util.zip.GZIPInputStream;

import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.message.BasicHeader;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.rometools.fetcher.FetcherEvent;
import org.rometools.fetcher.FetcherException;
import org.rometools.fetcher.impl.AbstractFeedFetcher;
import org.rometools.fetcher.impl.FeedFetcherCache;
import org.rometools.fetcher.impl.SyndFeedInfo;

import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.SyndFeedInput;
import com.sun.syndication.io.XmlReader;

/**
 * A feed fetcher implementation which takes care to remember the ETag header information.
 * 
 * @author Nick Lothian
 * @author Jasper van Veghel <jasper@seajas.com>
 */
public class HttpClientFeedFetcher extends AbstractFeedFetcher {
    /**
     * The logger.
     */
    private final static Logger logger = LoggerFactory.getLogger(HttpClientFeedFetcher.class);

    /**
     * The HTTP client.
     */
    private final HttpClient httpClient;

    /**
     * The feed cache.
     */
    private final FeedFetcherCache feedCache;

    /**
     * Default constructor.
     * 
     * @param httpClient
     * @param feedCache
     */
    public HttpClientFeedFetcher(final HttpClient httpClient, final FeedFetcherCache feedCache) {
        super();

        this.httpClient = httpClient;
        this.feedCache = feedCache;
    }

    /**
     * NOTE: The User-Agent string is discarded in favor of the one given to the HttpClient.
     * 
     * {@inheritDoc}
     */
    @Override
    public SyndFeed retrieveFeed(final URL url)
            throws IllegalArgumentException, IOException, FeedException, FetcherException {
        return retrieveFeed(null, url);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl)
            throws IllegalArgumentException, IOException, FeedException, FetcherException {
        return retrieveFeed(userAgent, feedUrl, null);
    }

    /**
     * NOTE: If a user agent is given here, it overrides the one given to the HttpClient.
     * 
     * @param userAgent
     * @param feedUrl
     * @param headers
     * @return SyndFeed
     * @throws IllegalArgumentException
     * @throws IOException
     * @throws FeedException
     * @throws FetcherException
     */
    public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl, final Map<String, String> headers)
            throws IllegalArgumentException, IOException, FeedException, FetcherException {
        if (feedUrl == null)
            throw new IllegalArgumentException("The given URL is invalid");

        HttpGet method = new HttpGet(feedUrl.toString());

        method.setHeader(new BasicHeader("Accept-Encoding", "gzip"));

        if (headers != null)
            for (Entry<String, String> header : headers.entrySet())
                method.setHeader(new BasicHeader(header.getKey(), header.getValue()));
        if (userAgent != null)
            method.setHeader(new BasicHeader(CoreProtocolPNames.USER_AGENT, userAgent));

        // Retrieve the feed

        if (isUsingDeltaEncoding())
            method.setHeader(new BasicHeader("A-IM", "feed"));

        SyndFeedInfo syndFeedInfo = feedCache.getFeedInfo(feedUrl);

        if (syndFeedInfo != null) {
            method.setHeader(new BasicHeader("If-None-Match", syndFeedInfo.getETag()));

            if (syndFeedInfo.getLastModified() instanceof String)
                method.setHeader(new BasicHeader("If-Modified-Since", (String) syndFeedInfo.getLastModified()));
        }

        HttpResponse response = httpClient.execute(method);

        fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, feedUrl.toString());

        try {
            handleErrorCodes(response.getStatusLine().getStatusCode());

            SyndFeed feed = handleResponse(syndFeedInfo, feedUrl.toString(), response);

            syndFeedInfo = buildSyndFeedInfo(feedUrl, feedUrl.toString(), response, feed);

            feedCache.setFeedInfo(feedUrl, syndFeedInfo);

            // The feed may have been modified to pick up cached values (e.g. for delta encoding)

            return syndFeedInfo.getSyndFeed();
        } catch (RuntimeException e) {
            method.abort();

            throw e;
        } catch (FetcherException e) {
            if (logger.isInfoEnabled())
                logger.info("Consuming all entity content so that the connection is properly released.");

            EntityUtils.consume(response.getEntity());

            throw e;
        }
    }

    /**
     * Handle the response, making sure to take the Not-Modified header information into account.
     * 
     * @param syndFeedInfo
     * @param url
     * @param response
     * @return SyndFeed
     * @throws IOException
     * @throws FetcherException
     * @throws FeedException
     */
    private SyndFeed handleResponse(final SyndFeedInfo syndFeedInfo, final String url, final HttpResponse response)
            throws IOException, FetcherException, FeedException {
        if (response.getStatusLine().getStatusCode() == HttpURLConnection.HTTP_NOT_MODIFIED
                && syndFeedInfo != null) {
            fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, url);

            return syndFeedInfo.getSyndFeed();
        }

        SyndFeed feed = buildFeedFromResponse(url, response);
        fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, url, feed);

        return feed;
    }

    /**
     * Parse the actual response content.
     * 
     * @param url
     * @param method
     * @return
     * @throws IOException
     * @throws HttpException
     * @throws FetcherException
     * @throws FeedException
     */
    private SyndFeed buildFeedFromResponse(final String url, final HttpResponse response)
            throws IOException, FetcherException, FeedException {
        InputStream stream = null;

        if (response.getFirstHeader("Content-Encoding") != null
                && "gzip".equalsIgnoreCase(response.getFirstHeader("Content-Encoding").getValue()))
            stream = new GZIPInputStream(response.getEntity().getContent());
        else
            stream = response.getEntity().getContent();

        try {
            XmlReader reader = null;

            if (response.getFirstHeader("Content-Type") != null)
                reader = new XmlReader(stream, response.getFirstHeader("Content-Type").getValue(), true);
            else
                reader = new XmlReader(stream, true);

            SyndFeedInput syndFeedInput = new SyndFeedInput();

            syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());

            return syndFeedInput.build(reader);
        } finally {
            if (stream != null)
                stream.close();
        }
    }

    /**
     * Build the syndication feed information.
     * 
     * @param feedUrl
     * @param url
     * @param response
     * @param feed
     * @return SyndFeedInfo
     * @throws MalformedURLException
     */
    private SyndFeedInfo buildSyndFeedInfo(final URL feedUrl, final String url, final HttpResponse response,
            SyndFeed feed) throws MalformedURLException {
        SyndFeedInfo syndFeedInfo = new SyndFeedInfo();

        // This may be different to the feed URL because of 3XX redirects

        syndFeedInfo.setUrl(new URL(url));
        syndFeedInfo.setId(feedUrl.toString());

        Header imHeader = response.getFirstHeader("IM");

        if (imHeader != null && imHeader.getValue().indexOf("feed") >= 0 && isUsingDeltaEncoding()) {
            if (response.getStatusLine().getStatusCode() == 226) {
                // Client is set up to use HTTP delta encoding and the server supports it and has returned a delta encoded response; this response only includes new items

                SyndFeedInfo cachedInfo = feedCache.getFeedInfo(feedUrl);

                if (cachedInfo != null) {
                    SyndFeed cachedFeed = cachedInfo.getSyndFeed();

                    // Set the new feed to be the original feed plus the new items

                    feed = combineFeeds(cachedFeed, feed);
                }
            }
        }

        Header lastModifiedHeader = response.getFirstHeader("Last-Modified");

        if (lastModifiedHeader != null)
            syndFeedInfo.setLastModified(lastModifiedHeader.getValue());

        Header eTagHeader = response.getFirstHeader("ETag");

        if (eTagHeader != null)
            syndFeedInfo.setETag(eTagHeader.getValue());

        syndFeedInfo.setSyndFeed(feed);

        return syndFeedInfo;
    }
}