List of usage examples for org.apache.http.impl.client HttpClientBuilder setUserAgent
public final HttpClientBuilder setUserAgent(final String userAgent)
From source file:sachin.spider.WebSpider.java
/** * * @param config//from w ww. j a v a 2s .co m * @param latch */ @SuppressWarnings("deprecation") public void setValues(SpiderConfig config, CountDownLatch latch) { try { this.config = config; this.latch = latch; HttpClientBuilder builder = HttpClientBuilder.create(); builder.setUserAgent(config.getUserAgentString()); SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, new TrustStrategy() { @Override public boolean isTrusted(java.security.cert.X509Certificate[] xcs, String string) throws java.security.cert.CertificateException { return true; } }).build(); builder.setSslcontext(sslContext); HostnameVerifier hostnameVerifier = SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER; SSLConnectionSocketFactory sslSocketFactory = new SSLConnectionSocketFactory(sslContext, hostnameVerifier); Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder .<ConnectionSocketFactory>create() .register("http", PlainConnectionSocketFactory.getSocketFactory()) .register("https", sslSocketFactory).build(); cm = new PoolingHttpClientConnectionManager(socketFactoryRegistry); cm.setDefaultMaxPerRoute(config.getTotalSpiders() * 2); cm.setMaxTotal(config.getTotalSpiders() * 2); if (config.isAuthenticate()) { CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(config.getUsername(), config.getPassword())); httpclient = HttpClients.custom().setUserAgent(config.getUserAgentString()) .setDefaultCredentialsProvider(credentialsProvider).setConnectionManager(cm).build(); } else { httpclient = HttpClients.custom().setConnectionManager(cm).setUserAgent(config.getUserAgentString()) .build(); } } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { Logger.getLogger(WebSpider.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:io.github.cidisk.indexcrawler.fetcher.PageFetcher.java
public PageFetcher(CrawlConfig config) { super(config); RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false) .setCookieSpec(CookieSpecs.BROWSER_COMPATIBILITY).setRedirectsEnabled(false) .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout()) .build();//from w w w . ja v a2 s . com RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create(); connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE); if (config.isIncludeHttpsPages()) { try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174 // By always trusting the ssl certificate SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() { @Override public boolean isTrusted(final X509Certificate[] chain, String authType) { return true; } }).build(); SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext, SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); connRegistryBuilder.register("https", sslsf); } catch (Exception e) { logger.warn("Exception thrown while trying to register https"); logger.debug("Stacktrace", e); } } Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build(); connectionManager = new PoolingHttpClientConnectionManager(connRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); HttpClientBuilder clientBuilder = HttpClientBuilder.create(); clientBuilder.setDefaultRequestConfig(requestConfig); clientBuilder.setConnectionManager(connectionManager); clientBuilder.setUserAgent(config.getUserAgentString()); if (config.getProxyHost() != null) { if (config.getProxyUsername() != null) { BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()), new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword())); clientBuilder.setDefaultCredentialsProvider(credentialsProvider); } HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort()); clientBuilder.setProxy(proxy); logger.debug("Working through Proxy: {}", proxy.getHostName()); } httpClient = clientBuilder.build(); if (config.getAuthInfos() != null && !config.getAuthInfos().isEmpty()) { doAuthetication(config.getAuthInfos()); } if (connectionMonitorThread == null) { connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager); } connectionMonitorThread.start(); }
From source file:com.crawler.app.fetcher.PageFetcher.java
public PageFetcher(CrawlConfig config) { super(config); RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false) .setCookieSpec(CookieSpecs.BROWSER_COMPATIBILITY).setRedirectsEnabled(false) //.setRelativeRedirectsAllowed(true) .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout()) .build();/*from w w w.j av a 2 s .c om*/ RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create(); connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE); if (config.isIncludeHttpsPages()) { try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174 // By always trusting the ssl certificate SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() { //@Override public boolean isTrusted(final X509Certificate[] chain, String authType) { return true; } }).build(); SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext, SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); connRegistryBuilder.register("https", sslsf); } catch (Exception e) { logger.warn("Exception thrown while trying to register https"); logger.debug("Stacktrace", e); } } Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build(); connectionManager = new PoolingHttpClientConnectionManager(connRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); HttpClientBuilder clientBuilder = HttpClientBuilder.create(); clientBuilder.setDefaultRequestConfig(requestConfig); clientBuilder.setConnectionManager(connectionManager); clientBuilder.setUserAgent(config.getUserAgentString()); if (config.getProxyHost() != null) { if (config.getProxyUsername() != null) { BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()), new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword())); clientBuilder.setDefaultCredentialsProvider(credentialsProvider); } HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort()); clientBuilder.setProxy(proxy); logger.debug("Working through Proxy: {}", proxy.getHostName()); } httpClient = clientBuilder.build(); if (config.getAuthInfos() != null && !config.getAuthInfos().isEmpty()) { doAuthetication(config.getAuthInfos()); } if (connectionMonitorThread == null) { connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager); } connectionMonitorThread.start(); }
From source file:com.brsanthu.googleanalytics.GoogleAnalytics.java
protected CloseableHttpClient createHttpClient(GoogleAnalyticsConfig config) { PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(); connManager.setDefaultMaxPerRoute(getDefaultMaxPerRoute(config)); HttpClientBuilder builder = HttpClients.custom().setConnectionManager(connManager); if (isNotEmpty(config.getUserAgent())) { builder.setUserAgent(config.getUserAgent()); }/* w w w. ja v a 2s.c o m*/ if (isNotEmpty(config.getProxyHost())) { builder.setProxy(new HttpHost(config.getProxyHost(), config.getProxyPort())); if (isNotEmpty(config.getProxyUserName())) { BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()), new UsernamePasswordCredentials(config.getProxyUserName(), config.getProxyPassword())); builder.setDefaultCredentialsProvider(credentialsProvider); } } return builder.build(); }
From source file:crawler.PageFetcher.java
public PageFetcher(CrawlConfig config) { super(config); RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false) .setCookieSpec(config.getCookiePolicy()).setRedirectsEnabled(false) .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout()) .build();//from w w w . j a v a2 s . c om RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create(); connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE); if (config.isIncludeHttpsPages()) { try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174 // By always trusting the ssl certificate SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() { @Override public boolean isTrusted(final X509Certificate[] chain, String authType) { return true; } }).build(); SSLConnectionSocketFactory sslsf = new SniSSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); connRegistryBuilder.register("https", sslsf); } catch (Exception e) { logger.warn("Exception thrown while trying to register https"); logger.debug("Stacktrace", e); } } Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build(); connectionManager = new SniPoolingHttpClientConnectionManager(connRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); HttpClientBuilder clientBuilder = HttpClientBuilder.create(); clientBuilder.setDefaultRequestConfig(requestConfig); clientBuilder.setConnectionManager(connectionManager); clientBuilder.setUserAgent(config.getUserAgentString()); clientBuilder.setDefaultHeaders(config.getDefaultHeaders()); if (config.getProxyHost() != null) { if (config.getProxyUsername() != null) { BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()), new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword())); clientBuilder.setDefaultCredentialsProvider(credentialsProvider); } HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort()); clientBuilder.setProxy(proxy); logger.debug("Working through Proxy: {}", proxy.getHostName()); } httpClient = clientBuilder.build(); if ((config.getAuthInfos() != null) && !config.getAuthInfos().isEmpty()) { doAuthetication(config.getAuthInfos()); } if (connectionMonitorThread == null) { connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager); } connectionMonitorThread.start(); }
From source file:de.comlineag.snc.webcrawler.fetcher.PageFetcher.java
public PageFetcher(CrawlConfig config) { super(config); RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false) .setCookieSpec(CookieSpecs.BROWSER_COMPATIBILITY).setRedirectsEnabled(false) .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout()) .build();/*from w ww . ja v a 2s . c o m*/ RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create(); connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE); if (config.isIncludeHttpsPages()) { try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174 // By always trusting the ssl certificate SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() { @Override public boolean isTrusted(final X509Certificate[] chain, String authType) { return true; } }).build(); SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext, SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); connRegistryBuilder.register("https", sslsf); } catch (Exception e) { logger.debug("Exception thrown while trying to register https:", e); } } Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build(); connectionManager = new PoolingHttpClientConnectionManager(connRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); HttpClientBuilder clientBuilder = HttpClientBuilder.create(); clientBuilder.setDefaultRequestConfig(requestConfig); clientBuilder.setConnectionManager(connectionManager); clientBuilder.setUserAgent(config.getUserAgentString()); if (config.getProxyHost() != null) { if (config.getProxyUsername() != null) { BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()), new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword())); clientBuilder.setDefaultCredentialsProvider(credentialsProvider); } HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort()); clientBuilder.setProxy(proxy); } clientBuilder.addInterceptorLast(new HttpResponseInterceptor() { @Override public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { HttpEntity entity = response.getEntity(); Header contentEncoding = entity.getContentEncoding(); if (contentEncoding != null) { HeaderElement[] codecs = contentEncoding.getElements(); for (HeaderElement codec : codecs) { if (codec.getName().equalsIgnoreCase("gzip")) { response.setEntity(new GzipDecompressingEntity(response.getEntity())); return; } } } } }); httpClient = clientBuilder.build(); if (connectionMonitorThread == null) { connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager); } connectionMonitorThread.start(); }
From source file:crawler.java.edu.uci.ics.crawler4j.fetcher.PageFetcher.java
public PageFetcher(CrawlConfig config) { super(config); RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false) .setCookieSpec(CookieSpecs.DEFAULT).setRedirectsEnabled(false) .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout()) .build();/*w w w. ja v a 2s . c om*/ RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create(); connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE); if (config.isIncludeHttpsPages()) { try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174 // By always trusting the ssl certificate SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() { @Override public boolean isTrusted(final X509Certificate[] chain, String authType) { return true; } }).build(); SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext, SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); connRegistryBuilder.register("https", sslsf); } catch (Exception e) { logger.warn("Exception thrown while trying to register https"); logger.debug("Stacktrace", e); } } Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build(); connectionManager = new PoolingHttpClientConnectionManager(connRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); HttpClientBuilder clientBuilder = HttpClientBuilder.create(); clientBuilder.setDefaultRequestConfig(requestConfig); clientBuilder.setConnectionManager(connectionManager); clientBuilder.setUserAgent(config.getUserAgentString()); clientBuilder.setDefaultHeaders(config.getDefaultHeaders()); if (config.getProxyHost() != null) { if (config.getProxyUsername() != null) { BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()), new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword())); clientBuilder.setDefaultCredentialsProvider(credentialsProvider); } HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort()); clientBuilder.setProxy(proxy); logger.debug("Working through Proxy: {}", proxy.getHostName()); } httpClient = clientBuilder.build(); if ((config.getAuthInfos() != null) && !config.getAuthInfos().isEmpty()) { doAuthetication(config.getAuthInfos()); } if (connectionMonitorThread == null) { connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager); } connectionMonitorThread.start(); }
From source file:com.jaeksoft.searchlib.crawler.web.spider.HttpAbstract.java
public HttpAbstract(String userAgent, boolean bFollowRedirect, ProxyHandler proxyHandler) { HttpClientBuilder builder = HttpClients.custom(); redirectStrategy = new DefaultRedirectStrategy(); if (userAgent != null) { userAgent = userAgent.trim();/*www .j a v a2s . c o m*/ if (userAgent.length() > 0) builder.setUserAgent(userAgent); else userAgent = null; } if (!bFollowRedirect) builder.disableRedirectHandling(); this.proxyHandler = proxyHandler; Registry<AuthSchemeProvider> authSchemeRegistry = RegistryBuilder.<AuthSchemeProvider>create() .register(AuthSchemes.NTLM, new NTLMSchemeFactory()) .register(AuthSchemes.BASIC, new BasicSchemeFactory()) .register(AuthSchemes.DIGEST, new DigestSchemeFactory()) .register(AuthSchemes.SPNEGO, new SPNegoSchemeFactory()) .register(AuthSchemes.KERBEROS, new KerberosSchemeFactory()).build(); credentialsProvider = new BasicCredentialsProvider(); builder.setDefaultCredentialsProvider(credentialsProvider); cookieStore = new BasicCookieStore(); builder.setDefaultCookieStore(cookieStore); builder.setDefaultCredentialsProvider(credentialsProvider); builder.setDefaultAuthSchemeRegistry(authSchemeRegistry); httpClient = builder.build(); }
From source file:org.asqatasun.util.http.HttpRequestHandler.java
private CloseableHttpClient getHttpClient(String url) { RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(socketTimeout) .setConnectTimeout(connectionTimeout).build(); HttpClientBuilder httpClientBuilder = HttpClientBuilder.create(); httpClientBuilder.setDefaultRequestConfig(requestConfig); httpClientBuilder.setConnectionManager(new PoolingHttpClientConnectionManager()); httpClientBuilder.setUserAgent(ASQATASUN_USER_AGENT); if (isProxySet(url)) { LOGGER.debug(("Set proxy with " + proxyHost + " and " + proxyPort)); httpClientBuilder.setProxy(new HttpHost(proxyHost, Integer.valueOf(proxyPort))); if (isProxyCredentialSet()) { CredentialsProvider credsProvider = new BasicCredentialsProvider(); credsProvider.setCredentials(new AuthScope(proxyHost, Integer.valueOf(proxyPort)), new UsernamePasswordCredentials(proxyUser, proxyPassword)); httpClientBuilder.setDefaultCredentialsProvider(credsProvider); LOGGER.debug(("Set proxy credentials " + proxyHost + " and " + proxyPort + " and " + proxyUser + " and " + proxyPassword)); }// w w w . j a v a2s . co m } return httpClientBuilder.build(); }