Java tutorial
/** * This file is part of the BuffaloKiwi API package, and is subject to the * terms and conditions defined in file 'LICENSE', which is part * of this source code package. * * Copyright (c) 2016 All Rights Reserved, John T. Quinn III, * <johnquinn3@gmail.com> * * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A * PARTICULAR PURPOSE. */ package com.buffalokiwi.api; import java.io.IOException; import java.net.URISyntaxException; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import org.apache.http.Header; import org.apache.http.HeaderElement; import org.apache.http.HeaderElementIterator; import org.apache.http.HttpEntity; import org.apache.http.HttpException; import org.apache.http.HttpRequest; import org.apache.http.HttpRequestInterceptor; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseInterceptor; import org.apache.http.client.RedirectStrategy; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.GzipDecompressingEntity; import org.apache.http.client.utils.URIBuilder; import org.apache.http.conn.ConnectionKeepAliveStrategy; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.message.BasicHeaderElementIterator; import org.apache.http.protocol.HTTP; import org.apache.http.protocol.HttpContext; /** * A wrapper for HTTPClient settings, and a builder class for building instances * of that wrapper. * * @author John Quinn */ public class APIHttpClient implements IAPIHttpClient { /** * Use this to build instances of APIHttpClient */ public static class Builder { /** * Default user agent */ public static final String DEFAULT_USER_AGENT = "Mozilla/5.0 (compatible; BuffaloKiwiAPI/1.0; +http://www.buffalokiwi.com)"; /** * Default socket read timeout */ public static final long DEFAULT_READ_TIMEOUT = 10000L; /** * Default accept header value */ public static final String DEFAULT_ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; /** * Default accept language header value */ public static final String DEFAULT_ACCEPT_LANGUAGE = "en-US,en;q=0.5"; /** * Default allow gzip flag */ public static final boolean DEFAULT_ALLOW_GZIP = true; /** * Default allow untrusted ssl flag */ public static final boolean DEFAULT_ALLOW_UNTRUSTED_SSL = true; /** * Default delay between requests */ public static final long DEFAULT_CRAWL_DELAY = 1000L; /** * The user agent string to use */ protected String userAgent = DEFAULT_USER_AGENT; /** * The socket read timeout */ protected long readTimeout = DEFAULT_READ_TIMEOUT; /** * Types of encodings that are acceptable */ protected String accept = DEFAULT_ACCEPT; /** * Accepted languages */ protected String acceptLanguage = DEFAULT_ACCEPT_LANGUAGE; /** * Toggle allowing gzip */ private boolean allowgzip = DEFAULT_ALLOW_GZIP; /** * If untrusted SSL connections should be allowed */ private boolean allowUntrustedSSL = DEFAULT_ALLOW_UNTRUSTED_SSL; /** * Crawl delay */ private long crawlDelay = DEFAULT_CRAWL_DELAY; private long maxDownloadSize = -1L; /** * Some hostname */ private String host = ""; /** * Set the user agent string to use * @param ua User agent string * @return The builder instance */ public APIHttpClient.Builder setUserAgent(final String ua) { if (ua == null) throw new IllegalArgumentException("ua cannot be null"); userAgent = ua; return this; } public APIHttpClient.Builder setMaxDownloadSize(final long size) { if (size < -1) throw new IllegalArgumentException("size must be greater than -1"); maxDownloadSize = size; return this; } public long getMaxDownloadSize() { return maxDownloadSize; } /** * Sets the socket read timeout in milliseconds * @param timeout milliseconds * @return The builder instance */ public APIHttpClient.Builder setReadTimeout(final long timeout) { if (timeout < 1) throw new IllegalArgumentException("timeout must be greater than zero"); readTimeout = timeout; return this; } /** * Set the accept request header value * @param accept Accept header value * @return The builder instance */ public APIHttpClient.Builder setAccept(final String accept) { if (accept.trim().isEmpty()) throw new IllegalArgumentException("Accept cannot be empty"); this.accept = accept; return this; } /** * Set the value for the Accept-Language request header * @param accept value * @return The builder instance */ public APIHttpClient.Builder setAcceptLanguages(final String accept) { if (accept.trim().isEmpty()) throw new IllegalArgumentException("Accept cannot be empty"); acceptLanguage = accept; return this; } /** * Set accept gzip header. Defaults to false. * @param on toggle * @return The builder instance */ public APIHttpClient.Builder setAllowgzip(final boolean on) { allowgzip = on; return this; } /** * Sets the crawl delay in milliseconds. * This is the minimum time between requests * @param delay Delay between requests * @return The builder instance * @throws IllegalArgumentException if delay is less than zero */ public APIHttpClient.Builder setCrawlDelay(final long delay) throws IllegalArgumentException { if (delay < 0) { throw new IllegalArgumentException("delay must be greater than or equal to zero"); } crawlDelay = delay; return this; } /** * Set the allow untrusted ssl flag * @param allow enabled * @return The builder instance */ public APIHttpClient.Builder setAllowUntrustedSSL(final boolean allow) { allowUntrustedSSL = allow; return this; } /** * Set the host * @param host host * @return builder */ public APIHttpClient.Builder setHost(final String host) { this.host = host; return this; } /** * Build the HttpClient shared instance. * Available via getClient(). * @return the client * @throws URISyntaxException if URIBuilder throws an exception * with the host * @throws APIException If there is a problem building the HttpClient */ public APIHttpClient build() throws APIException, URISyntaxException { final APIHttpClient client = new APIHttpClient(userAgent, readTimeout, accept, acceptLanguage, allowgzip, allowUntrustedSSL, host, crawlDelay, maxDownloadSize); //..Create the new shared client to use client.client = client.createNewClient(); return client; } } //..End Builder /** * Maximum connections */ public static final int DEFAULT_MAX_TOTAL = 200; /** * Maximum connections per route */ public static final int DEFAULT_MAX_PER_ROUTE = 50; /** * The HTTP Client connection manager */ private final PoolingHttpClientConnectionManager POOL = new PoolingHttpClientConnectionManager(); /** * The idle connection monitor thread */ private final IdleConnectionMonitorThread MONITOR = new IdleConnectionMonitorThread(POOL); /** * The user agent string to use */ protected final String userAgent; /** * The socket read timeout */ protected final long readTimeout; /** * Types of encodings that are acceptable */ protected final String accept; /** * Accepted languages */ protected final String acceptLanguage; /** * Toggle allowing gzip */ private final boolean allowgzip; /** * If untrusted SSL connections should be allowed */ private final boolean allowUntrustedSSL; /** * The API host */ private final URIBuilder host; /** * The minimum time between requests. * Defaults to 1 second. */ private final long crawlDelay; /** * The HttpClient */ private CloseableHttpClient client = null; private final long maxDownloadSize; /** * Create a new APIHttpClient instance * @param userAgent The user agent * @param readTimeout The read timeout * @param accept The accept header value * @param acceptLanguage The accept language header value * @param allowgzip If gzip is allowed * @param allowUntrustedSSL If untrusted ssl is allowed * @param host The host (http://donkey.co) * @param crawlDelay some delay to use between requests * @throws URISyntaxException If the host is invalid */ protected APIHttpClient(final String userAgent, final long readTimeout, final String accept, final String acceptLanguage, final boolean allowgzip, final boolean allowUntrustedSSL, final String host, final long crawlDelay, final long maxDownloadSize) throws URISyntaxException { //..Set the pool defaults POOL.setMaxTotal(DEFAULT_MAX_TOTAL); POOL.setDefaultMaxPerRoute(DEFAULT_MAX_PER_ROUTE); this.maxDownloadSize = maxDownloadSize; this.host = new URIBuilder(host); this.readTimeout = readTimeout; this.accept = accept; this.acceptLanguage = acceptLanguage; this.allowgzip = allowgzip; this.allowUntrustedSSL = allowUntrustedSSL; this.userAgent = userAgent; this.crawlDelay = crawlDelay; } /** * Shutdown the underlying connection manager */ public synchronized void shutdownConnectionManager() { //..Shutdown the monitor thread MONITOR.shutdown(); //..Shutdown the connection pool POOL.shutdown(); } @Override public long getMaxDownloadSize() { return maxDownloadSize; } /** * Retrieve the user agent * @return User agent */ @Override public String getUserAgent() { return userAgent; } /** * Retrieve the current socket read timeout value * @return milliseconds */ @Override public long getReadTimeout() { return readTimeout; } /** * Retrieve the accept header value * @return accept header value */ @Override public String getAccept() { return accept; } /** * Retrieve the Accept-Language header value * @return value */ @Override public String getAcceptLanguages() { return acceptLanguage; } /** * Find out if gzip is allowed or not * @return can use gzip */ @Override public boolean isGzipAllowed() { return allowgzip; } /** * Retrieve the crawl delay * @return millis */ @Override public long getCrawlDelay() { return crawlDelay; } /** * Retrieve the host * @return host */ @Override public URIBuilder getHost() { return host; } /** * Retrieve a shared client instance to use * @return client */ @Override public CloseableHttpClient getClient() { return client; } /** * Create a new HttpClient instance to use * @return * @throws IllegalArgumentException * @throws APIException If there is a problem creating the client or strategy */ @Override public CloseableHttpClient createNewClient() throws IllegalArgumentException, APIException { //..Build the client final HttpClientBuilder builder = HttpClients.custom() //..Set the request configuration .setDefaultRequestConfig(createRequestConfig()) //..Set the client connection manager .setConnectionManager(POOL) //..Set the user agent .setUserAgent(getUserAgent()) //..Enable support for things like 301/302 redirects .setRedirectStrategy(createRedirectAndRobotsStrategy()) //..Keep the connection alive for some time .setKeepAliveStrategy(createConnectionKeepAliveStrategy()) //..Add the user agent intercept for setting the user agent //..Don't know if this is still necessary .addInterceptorFirst(createUserAgentInterceptor()) .setMaxConnTotal(DEFAULT_MAX_TOTAL).setMaxConnPerRoute(DEFAULT_MAX_PER_ROUTE) //..Add a few headers for what types of encoding to accept, etc. .addInterceptorFirst(createAcceptInterceptor()); //..End builder chain //..Check for gzip if (allowgzip) { //..Add gzip interceptor builder.addInterceptorFirst(createGzipResponseInterceptor()); } return getBuiltClient(builder); } /** * Retrieve a keep alive strategy. * This will use the value of readTimeout. * @return strategy. */ protected ConnectionKeepAliveStrategy createConnectionKeepAliveStrategy() { return new ConnectionKeepAliveStrategy() { @Override public long getKeepAliveDuration(HttpResponse hr, HttpContext hc) { final HeaderElementIterator it = new BasicHeaderElementIterator( hr.headerIterator(HTTP.CONN_KEEP_ALIVE)); while (it.hasNext()) { final HeaderElement he = it.nextElement(); final String param = he.getName(); final String value = he.getValue(); if (value != null && param.equalsIgnoreCase("timeout")) return Long.parseLong(value) * 1000; } return readTimeout; } }; } /** * This sets the client cookie spec to CookieSpecs.BROWSER_COMPATIBILITY * @return Request Config */ protected RequestConfig createRequestConfig() { return RequestConfig.custom().setCookieSpec(CookieSpecs.DEFAULT).setConnectTimeout(3000) .setConnectionRequestTimeout(3000).setSocketTimeout((int) readTimeout).build(); } /** * Create a redirect and robots.txt strategy * @return strategy */ protected RedirectStrategy createRedirectAndRobotsStrategy() { return new DefaultRedirectHandler(getUserAgent(), new RobotDirectives("*", getCrawlDelay())); } /** * Adds the user agent to the request * @return user agent interceptor */ protected HttpRequestInterceptor createUserAgentInterceptor() { return new HttpRequestInterceptor() { @Override public void process(HttpRequest hr, HttpContext hc) throws HttpException, IOException { //..Set the ua header hr.setHeader(HTTP.USER_AGENT, getUserAgent()); } }; } /** * Adds Accept, Accept-Language and Accept-Encoding headers to the request. * If allowgzip is set, this will set Accept-Encoding to gzip. * * @return interceptor */ protected HttpRequestInterceptor createAcceptInterceptor() { return new HttpRequestInterceptor() { @Override public void process(HttpRequest hr, HttpContext hc) throws HttpException, IOException { hr.setHeader("Accept", accept); hr.setHeader("Accept-Language", acceptLanguage); if (allowgzip) hr.setHeader("Accept-Encoding", "gzip"); } }; } /** * Build the http client. * * @param builder The client builder * @return The built client */ protected CloseableHttpClient buildClient(final HttpClientBuilder builder) { return builder.build(); } /** * Retrieve the built http client. * This will call buildClient() to allow overrides, and will throw an * APIException if the buildClient() method returns null * @param builder * @return * @throws APIException */ private CloseableHttpClient getBuiltClient(final HttpClientBuilder builder) throws APIException { final CloseableHttpClient built; if (allowUntrustedSSL) built = buildClient(setClientToSelfSigned(builder)); else built = buildClient(builder); if (built == null) { throw new APIException("Failed to build the http client. buildClient() returned null"); } return built; } /** * If gzip is enabled, this will decode things. * @return */ private HttpResponseInterceptor createGzipResponseInterceptor() { return new HttpResponseInterceptor() { @Override public void process(HttpResponse response, HttpContext context) throws HttpException, IOException { //..get the entity final HttpEntity entity = response.getEntity(); if (entity == null) return; //..Get any content encoding headers final Header ceHeader = entity.getContentEncoding(); if (ceHeader == null) return; //..Get any entries HeaderElement[] codecs = ceHeader.getElements(); //..See if one is marked as gzip for (final HeaderElement codec : codecs) { if (codec.getName().equalsIgnoreCase("gzip")) { //..Hey, it's gzip! decompress the entity response.setEntity(new GzipDecompressingEntity(response.getEntity())); //..Done with this ish. return; } } } }; } /** * Create a HTTP client that uses a self-signed and always trusted * SSL strategy. * * @param custom The client builder * @return builder with unsafe SSL strategy * @throws APIException If there is a problem creating the client or strategy */ private HttpClientBuilder setClientToSelfSigned(final HttpClientBuilder custom) throws APIException { final SSLContextBuilder builder = new SSLContextBuilder(); try { builder.loadTrustMaterial(null, new TrustSelfSignedStrategy()); SSLConnectionSocketFactory sf = new SSLConnectionSocketFactory(builder.build()); return custom.setSSLSocketFactory(sf); } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException e) { throw new APIException("Failed to create self-signed trust strategy and/or SSL-enabled HTTP Client", e); } } }