Java tutorial
/** * Copyright (C) 2013 Seajas, the Netherlands. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3, as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.seajas.search.contender.http; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.Map; import java.util.Map.Entry; import java.util.zip.GZIPInputStream; import org.apache.http.Header; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.message.BasicHeader; import org.apache.http.params.CoreProtocolPNames; import org.apache.http.util.EntityUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.rometools.fetcher.FetcherEvent; import org.rometools.fetcher.FetcherException; import org.rometools.fetcher.impl.AbstractFeedFetcher; import org.rometools.fetcher.impl.FeedFetcherCache; import org.rometools.fetcher.impl.SyndFeedInfo; import com.sun.syndication.feed.synd.SyndFeed; import com.sun.syndication.io.FeedException; import com.sun.syndication.io.SyndFeedInput; import com.sun.syndication.io.XmlReader; /** * A feed fetcher implementation which takes care to remember the ETag header information. * * @author Nick Lothian * @author Jasper van Veghel <jasper@seajas.com> */ public class HttpClientFeedFetcher extends AbstractFeedFetcher { /** * The logger. */ private final static Logger logger = LoggerFactory.getLogger(HttpClientFeedFetcher.class); /** * The HTTP client. */ private final HttpClient httpClient; /** * The feed cache. */ private final FeedFetcherCache feedCache; /** * Default constructor. * * @param httpClient * @param feedCache */ public HttpClientFeedFetcher(final HttpClient httpClient, final FeedFetcherCache feedCache) { super(); this.httpClient = httpClient; this.feedCache = feedCache; } /** * NOTE: The User-Agent string is discarded in favor of the one given to the HttpClient. * * {@inheritDoc} */ @Override public SyndFeed retrieveFeed(final URL url) throws IllegalArgumentException, IOException, FeedException, FetcherException { return retrieveFeed(null, url); } /** * {@inheritDoc} */ @Override public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { return retrieveFeed(userAgent, feedUrl, null); } /** * NOTE: If a user agent is given here, it overrides the one given to the HttpClient. * * @param userAgent * @param feedUrl * @param headers * @return SyndFeed * @throws IllegalArgumentException * @throws IOException * @throws FeedException * @throws FetcherException */ public SyndFeed retrieveFeed(final String userAgent, final URL feedUrl, final Map<String, String> headers) throws IllegalArgumentException, IOException, FeedException, FetcherException { if (feedUrl == null) throw new IllegalArgumentException("The given URL is invalid"); HttpGet method = new HttpGet(feedUrl.toString()); method.setHeader(new BasicHeader("Accept-Encoding", "gzip")); if (headers != null) for (Entry<String, String> header : headers.entrySet()) method.setHeader(new BasicHeader(header.getKey(), header.getValue())); if (userAgent != null) method.setHeader(new BasicHeader(CoreProtocolPNames.USER_AGENT, userAgent)); // Retrieve the feed if (isUsingDeltaEncoding()) method.setHeader(new BasicHeader("A-IM", "feed")); SyndFeedInfo syndFeedInfo = feedCache.getFeedInfo(feedUrl); if (syndFeedInfo != null) { method.setHeader(new BasicHeader("If-None-Match", syndFeedInfo.getETag())); if (syndFeedInfo.getLastModified() instanceof String) method.setHeader(new BasicHeader("If-Modified-Since", (String) syndFeedInfo.getLastModified())); } HttpResponse response = httpClient.execute(method); fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, feedUrl.toString()); try { handleErrorCodes(response.getStatusLine().getStatusCode()); SyndFeed feed = handleResponse(syndFeedInfo, feedUrl.toString(), response); syndFeedInfo = buildSyndFeedInfo(feedUrl, feedUrl.toString(), response, feed); feedCache.setFeedInfo(feedUrl, syndFeedInfo); // The feed may have been modified to pick up cached values (e.g. for delta encoding) return syndFeedInfo.getSyndFeed(); } catch (RuntimeException e) { method.abort(); throw e; } catch (FetcherException e) { if (logger.isInfoEnabled()) logger.info("Consuming all entity content so that the connection is properly released."); EntityUtils.consume(response.getEntity()); throw e; } } /** * Handle the response, making sure to take the Not-Modified header information into account. * * @param syndFeedInfo * @param url * @param response * @return SyndFeed * @throws IOException * @throws FetcherException * @throws FeedException */ private SyndFeed handleResponse(final SyndFeedInfo syndFeedInfo, final String url, final HttpResponse response) throws IOException, FetcherException, FeedException { if (response.getStatusLine().getStatusCode() == HttpURLConnection.HTTP_NOT_MODIFIED && syndFeedInfo != null) { fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, url); return syndFeedInfo.getSyndFeed(); } SyndFeed feed = buildFeedFromResponse(url, response); fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, url, feed); return feed; } /** * Parse the actual response content. * * @param url * @param method * @return * @throws IOException * @throws HttpException * @throws FetcherException * @throws FeedException */ private SyndFeed buildFeedFromResponse(final String url, final HttpResponse response) throws IOException, FetcherException, FeedException { InputStream stream = null; if (response.getFirstHeader("Content-Encoding") != null && "gzip".equalsIgnoreCase(response.getFirstHeader("Content-Encoding").getValue())) stream = new GZIPInputStream(response.getEntity().getContent()); else stream = response.getEntity().getContent(); try { XmlReader reader = null; if (response.getFirstHeader("Content-Type") != null) reader = new XmlReader(stream, response.getFirstHeader("Content-Type").getValue(), true); else reader = new XmlReader(stream, true); SyndFeedInput syndFeedInput = new SyndFeedInput(); syndFeedInput.setPreserveWireFeed(isPreserveWireFeed()); return syndFeedInput.build(reader); } finally { if (stream != null) stream.close(); } } /** * Build the syndication feed information. * * @param feedUrl * @param url * @param response * @param feed * @return SyndFeedInfo * @throws MalformedURLException */ private SyndFeedInfo buildSyndFeedInfo(final URL feedUrl, final String url, final HttpResponse response, SyndFeed feed) throws MalformedURLException { SyndFeedInfo syndFeedInfo = new SyndFeedInfo(); // This may be different to the feed URL because of 3XX redirects syndFeedInfo.setUrl(new URL(url)); syndFeedInfo.setId(feedUrl.toString()); Header imHeader = response.getFirstHeader("IM"); if (imHeader != null && imHeader.getValue().indexOf("feed") >= 0 && isUsingDeltaEncoding()) { if (response.getStatusLine().getStatusCode() == 226) { // Client is set up to use HTTP delta encoding and the server supports it and has returned a delta encoded response; this response only includes new items SyndFeedInfo cachedInfo = feedCache.getFeedInfo(feedUrl); if (cachedInfo != null) { SyndFeed cachedFeed = cachedInfo.getSyndFeed(); // Set the new feed to be the original feed plus the new items feed = combineFeeds(cachedFeed, feed); } } } Header lastModifiedHeader = response.getFirstHeader("Last-Modified"); if (lastModifiedHeader != null) syndFeedInfo.setLastModified(lastModifiedHeader.getValue()); Header eTagHeader = response.getFirstHeader("ETag"); if (eTagHeader != null) syndFeedInfo.setETag(eTagHeader.getValue()); syndFeedInfo.setSyndFeed(feed); return syndFeedInfo; } }