Example usage for org.apache.http.impl.client HttpClientBuilder setUserAgent

List of usage examples for org.apache.http.impl.client HttpClientBuilder setUserAgent

Introduction

In this page you can find the example usage for org.apache.http.impl.client HttpClientBuilder setUserAgent.

Prototype

public final HttpClientBuilder setUserAgent(final String userAgent) 

Source Link

Document

Assigns User-Agent value.

Usage

From source file:sachin.spider.WebSpider.java

/**
 *
 * @param config//from  w ww.  j a  v  a  2s .co m
 * @param latch
 */
@SuppressWarnings("deprecation")
public void setValues(SpiderConfig config, CountDownLatch latch) {
    try {
        this.config = config;
        this.latch = latch;
        HttpClientBuilder builder = HttpClientBuilder.create();
        builder.setUserAgent(config.getUserAgentString());
        SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, new TrustStrategy() {

            @Override
            public boolean isTrusted(java.security.cert.X509Certificate[] xcs, String string)
                    throws java.security.cert.CertificateException {
                return true;
            }
        }).build();
        builder.setSslcontext(sslContext);
        HostnameVerifier hostnameVerifier = SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER;

        SSLConnectionSocketFactory sslSocketFactory = new SSLConnectionSocketFactory(sslContext,
                hostnameVerifier);
        Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder
                .<ConnectionSocketFactory>create()
                .register("http", PlainConnectionSocketFactory.getSocketFactory())
                .register("https", sslSocketFactory).build();
        cm = new PoolingHttpClientConnectionManager(socketFactoryRegistry);
        cm.setDefaultMaxPerRoute(config.getTotalSpiders() * 2);
        cm.setMaxTotal(config.getTotalSpiders() * 2);
        if (config.isAuthenticate()) {
            CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(AuthScope.ANY,
                    new UsernamePasswordCredentials(config.getUsername(), config.getPassword()));
            httpclient = HttpClients.custom().setUserAgent(config.getUserAgentString())
                    .setDefaultCredentialsProvider(credentialsProvider).setConnectionManager(cm).build();

        } else {
            httpclient = HttpClients.custom().setConnectionManager(cm).setUserAgent(config.getUserAgentString())
                    .build();
        }
    } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) {
        Logger.getLogger(WebSpider.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:io.github.cidisk.indexcrawler.fetcher.PageFetcher.java

public PageFetcher(CrawlConfig config) {
    super(config);

    RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false)
            .setCookieSpec(CookieSpecs.BROWSER_COMPATIBILITY).setRedirectsEnabled(false)
            .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout())
            .build();//from w w  w  . ja v a2  s .  com

    RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create();
    connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE);
    if (config.isIncludeHttpsPages()) {
        try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174
            // By always trusting the ssl certificate
            SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
                @Override
                public boolean isTrusted(final X509Certificate[] chain, String authType) {
                    return true;
                }
            }).build();
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext,
                    SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
            connRegistryBuilder.register("https", sslsf);
        } catch (Exception e) {
            logger.warn("Exception thrown while trying to register https");
            logger.debug("Stacktrace", e);
        }
    }

    Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build();
    connectionManager = new PoolingHttpClientConnectionManager(connRegistry);
    connectionManager.setMaxTotal(config.getMaxTotalConnections());
    connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost());

    HttpClientBuilder clientBuilder = HttpClientBuilder.create();
    clientBuilder.setDefaultRequestConfig(requestConfig);
    clientBuilder.setConnectionManager(connectionManager);
    clientBuilder.setUserAgent(config.getUserAgentString());

    if (config.getProxyHost() != null) {
        if (config.getProxyUsername() != null) {
            BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()),
                    new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword()));
            clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
        }

        HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort());
        clientBuilder.setProxy(proxy);
        logger.debug("Working through Proxy: {}", proxy.getHostName());
    }

    httpClient = clientBuilder.build();
    if (config.getAuthInfos() != null && !config.getAuthInfos().isEmpty()) {
        doAuthetication(config.getAuthInfos());
    }

    if (connectionMonitorThread == null) {
        connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager);
    }
    connectionMonitorThread.start();
}

From source file:com.crawler.app.fetcher.PageFetcher.java

public PageFetcher(CrawlConfig config) {
    super(config);

    RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false)
            .setCookieSpec(CookieSpecs.BROWSER_COMPATIBILITY).setRedirectsEnabled(false)
            //.setRelativeRedirectsAllowed(true)
            .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout())
            .build();/*from w w w.j av  a  2 s  .c  om*/

    RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create();
    connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE);
    if (config.isIncludeHttpsPages()) {
        try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174
            // By always trusting the ssl certificate
            SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
                //@Override
                public boolean isTrusted(final X509Certificate[] chain, String authType) {
                    return true;
                }
            }).build();
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext,
                    SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
            connRegistryBuilder.register("https", sslsf);
        } catch (Exception e) {
            logger.warn("Exception thrown while trying to register https");
            logger.debug("Stacktrace", e);
        }
    }

    Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build();
    connectionManager = new PoolingHttpClientConnectionManager(connRegistry);
    connectionManager.setMaxTotal(config.getMaxTotalConnections());
    connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost());

    HttpClientBuilder clientBuilder = HttpClientBuilder.create();
    clientBuilder.setDefaultRequestConfig(requestConfig);
    clientBuilder.setConnectionManager(connectionManager);
    clientBuilder.setUserAgent(config.getUserAgentString());

    if (config.getProxyHost() != null) {
        if (config.getProxyUsername() != null) {
            BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()),
                    new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword()));
            clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
        }

        HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort());
        clientBuilder.setProxy(proxy);
        logger.debug("Working through Proxy: {}", proxy.getHostName());
    }

    httpClient = clientBuilder.build();
    if (config.getAuthInfos() != null && !config.getAuthInfos().isEmpty()) {
        doAuthetication(config.getAuthInfos());
    }

    if (connectionMonitorThread == null) {
        connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager);
    }
    connectionMonitorThread.start();
}

From source file:com.brsanthu.googleanalytics.GoogleAnalytics.java

protected CloseableHttpClient createHttpClient(GoogleAnalyticsConfig config) {
    PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager();
    connManager.setDefaultMaxPerRoute(getDefaultMaxPerRoute(config));

    HttpClientBuilder builder = HttpClients.custom().setConnectionManager(connManager);

    if (isNotEmpty(config.getUserAgent())) {
        builder.setUserAgent(config.getUserAgent());
    }/*  w  w w. ja  v  a  2s.c  o m*/

    if (isNotEmpty(config.getProxyHost())) {
        builder.setProxy(new HttpHost(config.getProxyHost(), config.getProxyPort()));

        if (isNotEmpty(config.getProxyUserName())) {
            BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()),
                    new UsernamePasswordCredentials(config.getProxyUserName(), config.getProxyPassword()));
            builder.setDefaultCredentialsProvider(credentialsProvider);
        }
    }

    return builder.build();
}

From source file:crawler.PageFetcher.java

public PageFetcher(CrawlConfig config) {
    super(config);

    RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false)
            .setCookieSpec(config.getCookiePolicy()).setRedirectsEnabled(false)
            .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout())
            .build();//from   w  w w  . j a v a2 s  . c om

    RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create();
    connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE);
    if (config.isIncludeHttpsPages()) {
        try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174
              // By always trusting the ssl certificate
            SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
                @Override
                public boolean isTrusted(final X509Certificate[] chain, String authType) {
                    return true;
                }
            }).build();
            SSLConnectionSocketFactory sslsf = new SniSSLConnectionSocketFactory(sslContext,
                    NoopHostnameVerifier.INSTANCE);
            connRegistryBuilder.register("https", sslsf);
        } catch (Exception e) {
            logger.warn("Exception thrown while trying to register https");
            logger.debug("Stacktrace", e);
        }
    }

    Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build();
    connectionManager = new SniPoolingHttpClientConnectionManager(connRegistry);
    connectionManager.setMaxTotal(config.getMaxTotalConnections());
    connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost());

    HttpClientBuilder clientBuilder = HttpClientBuilder.create();
    clientBuilder.setDefaultRequestConfig(requestConfig);
    clientBuilder.setConnectionManager(connectionManager);
    clientBuilder.setUserAgent(config.getUserAgentString());
    clientBuilder.setDefaultHeaders(config.getDefaultHeaders());

    if (config.getProxyHost() != null) {
        if (config.getProxyUsername() != null) {
            BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()),
                    new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword()));
            clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
        }

        HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort());
        clientBuilder.setProxy(proxy);
        logger.debug("Working through Proxy: {}", proxy.getHostName());
    }

    httpClient = clientBuilder.build();
    if ((config.getAuthInfos() != null) && !config.getAuthInfos().isEmpty()) {
        doAuthetication(config.getAuthInfos());
    }

    if (connectionMonitorThread == null) {
        connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager);
    }
    connectionMonitorThread.start();
}

From source file:de.comlineag.snc.webcrawler.fetcher.PageFetcher.java

public PageFetcher(CrawlConfig config) {
    super(config);

    RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false)
            .setCookieSpec(CookieSpecs.BROWSER_COMPATIBILITY).setRedirectsEnabled(false)
            .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout())
            .build();/*from   w  ww  .  ja v a  2s  .  c o  m*/

    RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create();
    connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE);
    if (config.isIncludeHttpsPages()) {
        try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174
            // By always trusting the ssl certificate
            SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
                @Override
                public boolean isTrusted(final X509Certificate[] chain, String authType) {
                    return true;
                }
            }).build();
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext,
                    SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
            connRegistryBuilder.register("https", sslsf);
        } catch (Exception e) {
            logger.debug("Exception thrown while trying to register https:", e);
        }
    }

    Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build();
    connectionManager = new PoolingHttpClientConnectionManager(connRegistry);
    connectionManager.setMaxTotal(config.getMaxTotalConnections());
    connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost());

    HttpClientBuilder clientBuilder = HttpClientBuilder.create();
    clientBuilder.setDefaultRequestConfig(requestConfig);
    clientBuilder.setConnectionManager(connectionManager);
    clientBuilder.setUserAgent(config.getUserAgentString());
    if (config.getProxyHost() != null) {

        if (config.getProxyUsername() != null) {
            BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()),
                    new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword()));
            clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
        }

        HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort());
        clientBuilder.setProxy(proxy);
    }
    clientBuilder.addInterceptorLast(new HttpResponseInterceptor() {
        @Override
        public void process(final HttpResponse response, final HttpContext context)
                throws HttpException, IOException {
            HttpEntity entity = response.getEntity();
            Header contentEncoding = entity.getContentEncoding();
            if (contentEncoding != null) {
                HeaderElement[] codecs = contentEncoding.getElements();
                for (HeaderElement codec : codecs) {
                    if (codec.getName().equalsIgnoreCase("gzip")) {
                        response.setEntity(new GzipDecompressingEntity(response.getEntity()));
                        return;
                    }
                }
            }
        }
    });

    httpClient = clientBuilder.build();

    if (connectionMonitorThread == null) {
        connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager);
    }
    connectionMonitorThread.start();
}

From source file:crawler.java.edu.uci.ics.crawler4j.fetcher.PageFetcher.java

public PageFetcher(CrawlConfig config) {
    super(config);

    RequestConfig requestConfig = RequestConfig.custom().setExpectContinueEnabled(false)
            .setCookieSpec(CookieSpecs.DEFAULT).setRedirectsEnabled(false)
            .setSocketTimeout(config.getSocketTimeout()).setConnectTimeout(config.getConnectionTimeout())
            .build();/*w w w.  ja v a  2s  .  c om*/

    RegistryBuilder<ConnectionSocketFactory> connRegistryBuilder = RegistryBuilder.create();
    connRegistryBuilder.register("http", PlainConnectionSocketFactory.INSTANCE);
    if (config.isIncludeHttpsPages()) {
        try { // Fixing: https://code.google.com/p/crawler4j/issues/detail?id=174
            // By always trusting the ssl certificate
            SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
                @Override
                public boolean isTrusted(final X509Certificate[] chain, String authType) {
                    return true;
                }
            }).build();
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext,
                    SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
            connRegistryBuilder.register("https", sslsf);
        } catch (Exception e) {
            logger.warn("Exception thrown while trying to register https");
            logger.debug("Stacktrace", e);
        }
    }

    Registry<ConnectionSocketFactory> connRegistry = connRegistryBuilder.build();
    connectionManager = new PoolingHttpClientConnectionManager(connRegistry);
    connectionManager.setMaxTotal(config.getMaxTotalConnections());
    connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost());

    HttpClientBuilder clientBuilder = HttpClientBuilder.create();
    clientBuilder.setDefaultRequestConfig(requestConfig);
    clientBuilder.setConnectionManager(connectionManager);
    clientBuilder.setUserAgent(config.getUserAgentString());
    clientBuilder.setDefaultHeaders(config.getDefaultHeaders());

    if (config.getProxyHost() != null) {
        if (config.getProxyUsername() != null) {
            BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            credentialsProvider.setCredentials(new AuthScope(config.getProxyHost(), config.getProxyPort()),
                    new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword()));
            clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
        }

        HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort());
        clientBuilder.setProxy(proxy);
        logger.debug("Working through Proxy: {}", proxy.getHostName());
    }

    httpClient = clientBuilder.build();
    if ((config.getAuthInfos() != null) && !config.getAuthInfos().isEmpty()) {
        doAuthetication(config.getAuthInfos());
    }

    if (connectionMonitorThread == null) {
        connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager);
    }
    connectionMonitorThread.start();
}

From source file:com.jaeksoft.searchlib.crawler.web.spider.HttpAbstract.java

public HttpAbstract(String userAgent, boolean bFollowRedirect, ProxyHandler proxyHandler) {
    HttpClientBuilder builder = HttpClients.custom();

    redirectStrategy = new DefaultRedirectStrategy();

    if (userAgent != null) {
        userAgent = userAgent.trim();/*www  .j a  v  a2s  . c o  m*/
        if (userAgent.length() > 0)
            builder.setUserAgent(userAgent);
        else
            userAgent = null;
    }
    if (!bFollowRedirect)
        builder.disableRedirectHandling();

    this.proxyHandler = proxyHandler;

    Registry<AuthSchemeProvider> authSchemeRegistry = RegistryBuilder.<AuthSchemeProvider>create()
            .register(AuthSchemes.NTLM, new NTLMSchemeFactory())
            .register(AuthSchemes.BASIC, new BasicSchemeFactory())
            .register(AuthSchemes.DIGEST, new DigestSchemeFactory())
            .register(AuthSchemes.SPNEGO, new SPNegoSchemeFactory())
            .register(AuthSchemes.KERBEROS, new KerberosSchemeFactory()).build();

    credentialsProvider = new BasicCredentialsProvider();
    builder.setDefaultCredentialsProvider(credentialsProvider);

    cookieStore = new BasicCookieStore();
    builder.setDefaultCookieStore(cookieStore);

    builder.setDefaultCredentialsProvider(credentialsProvider);
    builder.setDefaultAuthSchemeRegistry(authSchemeRegistry);

    httpClient = builder.build();

}

From source file:org.asqatasun.util.http.HttpRequestHandler.java

private CloseableHttpClient getHttpClient(String url) {
    RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(socketTimeout)
            .setConnectTimeout(connectionTimeout).build();
    HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
    httpClientBuilder.setDefaultRequestConfig(requestConfig);
    httpClientBuilder.setConnectionManager(new PoolingHttpClientConnectionManager());
    httpClientBuilder.setUserAgent(ASQATASUN_USER_AGENT);
    if (isProxySet(url)) {
        LOGGER.debug(("Set proxy with " + proxyHost + " and " + proxyPort));
        httpClientBuilder.setProxy(new HttpHost(proxyHost, Integer.valueOf(proxyPort)));
        if (isProxyCredentialSet()) {
            CredentialsProvider credsProvider = new BasicCredentialsProvider();
            credsProvider.setCredentials(new AuthScope(proxyHost, Integer.valueOf(proxyPort)),
                    new UsernamePasswordCredentials(proxyUser, proxyPassword));
            httpClientBuilder.setDefaultCredentialsProvider(credsProvider);
            LOGGER.debug(("Set proxy credentials " + proxyHost + " and " + proxyPort + " and " + proxyUser
                    + " and " + proxyPassword));
        }//  w w w .  j  a  v  a2s  . co  m
    }
    return httpClientBuilder.build();
}