org.opens.tanaguru.util.http.HttpRequestHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.opens.tanaguru.util.http.HttpRequestHandler.java

Source

/*
 *  Tanaguru - Automated webpage assessment
 *  Copyright (C) 2008-2014  Open-S Company
 * 
 *  This file is part of Tanaguru.
 * 
 *  Tanaguru is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as
 *  published by the Free Software Foundation, either version 3 of the
 *  License, or (at your option) any later version.
 * 
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 * 
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 *  Contact us by mail: open-s AT open-s DOT com
 */

package org.opens.tanaguru.util.http;

import java.io.IOException;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.nio.charset.IllegalCharsetNameException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.util.URIUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpHost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.log4j.Logger;

/**
 *
 * @author jkowalczyk
 */
public class HttpRequestHandler {

    private static final String TANAGURU_USER_AGENT = "tanaguru";
    private static final Logger LOGGER = Logger.getLogger(HttpRequestHandler.class);

    private String proxyPort;

    public void setProxyPort(String proxyPort) {
        this.proxyPort = proxyPort;
    }

    private String proxyHost;

    public void setProxyHost(String proxyHost) {
        this.proxyHost = proxyHost;
    }

    /**
     * Multiple Url can be set through a unique String separated by ;
     */
    private List<String> proxyExclusionUrlList = new ArrayList<String>();

    public List<String> getProxyExclusionUrlList() {
        return proxyExclusionUrlList;
    }

    public void setProxyExclusionUrl(String proxyExclusionUrl) {
        proxyExclusionUrlList.addAll(Arrays.asList(proxyExclusionUrl.split(";")));
    }

    private int connectionTimeout = 3000;

    public void setConnectionTimeout(int connectionTimeout) {
        this.connectionTimeout = connectionTimeout;
    }

    private int socketTimeout = 3000;

    public void setSocketTimeout(int socketTimeout) {
        this.socketTimeout = socketTimeout;
    }

    /**
     * The holder that handles the unique instance of HttpRequestHandler
     */
    private static class HttpRequestHandlerHolder {
        private static final HttpRequestHandler INSTANCE = new HttpRequestHandler();
    }

    /**
     * Private constructor
     */
    private HttpRequestHandler() {
    }

    /**
     * Singleton pattern based on the "Initialization-on-demand 
     * holder idiom". See @http://en.wikipedia.org/wiki/Initialization_on_demand_holder_idiom
     * @return the unique instance of HttpRequestHandler
     */
    public static HttpRequestHandler getInstance() {
        return HttpRequestHandlerHolder.INSTANCE;
    }

    /**
     * 
     * @param url
     * @return whether the given Url is accessible or not
     */
    public boolean isUrlAccessible(String url) {
        int statusFromHead = computeStatus(getHttpStatus(url));
        switch (statusFromHead) {
        case 1:
            return true;
        case 0:
            int statusFromGet = computeStatus(getHttpStatusFromGet(url));
            switch (statusFromGet) {
            case 0:
                return false;
            case 1:
                return true;
            }
        }
        return false;
    }

    public int getHttpStatus(String url) {
        String encodedUrl = getEncodedUrl(url);
        HttpClient httpClient = getHttpClient(encodedUrl);
        HeadMethod head = new HeadMethod(encodedUrl);
        try {
            LOGGER.debug("executing head request to retrieve page status on " + head.getURI());
            int status = httpClient.executeMethod(head);
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("received " + status + " from head request");
                for (Header h : head.getResponseHeaders()) {
                    LOGGER.debug("header : " + h.toExternalForm());
                }
            }

            return status;
        } catch (UnknownHostException uhe) {
            LOGGER.warn("UnknownHostException on " + encodedUrl);
            return HttpStatus.SC_NOT_FOUND;
        } catch (IllegalArgumentException iae) {
            LOGGER.warn("IllegalArgumentException on " + encodedUrl);
            return HttpStatus.SC_NOT_FOUND;
        } catch (IOException ioe) {
            LOGGER.warn("IOException on " + encodedUrl);
            ioe.fillInStackTrace();
            return HttpStatus.SC_NOT_FOUND;
        } finally {
            // When HttpClient instance is no longer needed,
            // shut down the connection manager to ensure
            // immediate deallocation of all system resources
            head.releaseConnection();
        }
    }

    public String getHttpContent(String url)
            throws URISyntaxException, UnknownHostException, IOException, IllegalCharsetNameException {
        if (StringUtils.isEmpty(url)) {
            return "";
        }

        String encodedUrl = getEncodedUrl(url);
        HttpClient httpClient = getHttpClient(encodedUrl);
        GetMethod get = new GetMethod(encodedUrl);
        try {
            LOGGER.debug("executing request to retrieve content on " + get.getURI());
            int status = httpClient.executeMethod(get);
            LOGGER.debug("received " + status + " from get request");
            if (status == HttpStatus.SC_OK) {
                LOGGER.debug("status == HttpStatus.SC_OK ");

                byte[] responseBody = get.getResponseBody();
                return new String(responseBody);
            } else {
                LOGGER.debug("status != HttpStatus.SC_OK ");
                return "";
            }

        } catch (NullPointerException ioe) {
            LOGGER.debug("NullPointerException");
            return "";
        } finally {
            // When HttpClient instance is no longer needed,
            // shut down the connection manager to ensure
            // immediate deallocation of all system resources
            get.releaseConnection();
            LOGGER.debug("finally");
        }
    }

    public int getHttpStatusFromGet(String url) {
        String encodedUrl = getEncodedUrl(url);
        HttpClient httpClient = getHttpClient(encodedUrl);
        GetMethod get = new GetMethod(encodedUrl);
        try {
            LOGGER.debug("executing get request to retrieve status on " + get.getURI());
            int status = httpClient.executeMethod(get);
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("received " + status + " from get request");
                for (Header h : get.getResponseHeaders()) {
                    LOGGER.debug("header : " + h.toExternalForm());
                }
            }
            return status;
        } catch (UnknownHostException uhe) {
            LOGGER.warn("UnknownHostException on " + encodedUrl);
            return HttpStatus.SC_NOT_FOUND;
        } catch (IOException ioe) {
            LOGGER.warn("IOException on " + encodedUrl);
            return HttpStatus.SC_NOT_FOUND;
        } finally {
            // When HttpClient instance is no longer needed,
            // shut down the connection manager to ensure
            // immediate deallocation of all system resources
            get.releaseConnection();
        }
    }

    private HttpClient getHttpClient(String url) {
        HttpClient httpclient = new HttpClient();
        boolean isExcludedUrl = false;
        for (String excludedUrl : proxyExclusionUrlList) {
            if (url.contains(excludedUrl)) {
                isExcludedUrl = true;
            }
        }
        if (StringUtils.isNotEmpty(proxyPort) && StringUtils.isNotEmpty(proxyPort) && !isExcludedUrl) {
            HttpHost proxy = new HttpHost(proxyHost, Integer.valueOf(proxyPort));
            httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
        }
        setTimeouts(httpclient.getParams());
        httpclient.getParams().setParameter(CoreProtocolPNames.USER_AGENT, TANAGURU_USER_AGENT);
        return httpclient;
    }

    private void setTimeouts(HttpClientParams params) {
        params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, connectionTimeout);
        params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, socketTimeout);
    }

    private int computeStatus(int status) {
        switch (status) {
        case HttpStatus.SC_FORBIDDEN:
        case HttpStatus.SC_METHOD_NOT_ALLOWED:
        case HttpStatus.SC_BAD_REQUEST:
        case HttpStatus.SC_UNAUTHORIZED:
        case HttpStatus.SC_PAYMENT_REQUIRED:
        case HttpStatus.SC_NOT_FOUND:
        case HttpStatus.SC_NOT_ACCEPTABLE:
        case HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED:
        case HttpStatus.SC_REQUEST_TIMEOUT:
        case HttpStatus.SC_CONFLICT:
        case HttpStatus.SC_GONE:
        case HttpStatus.SC_LENGTH_REQUIRED:
        case HttpStatus.SC_PRECONDITION_FAILED:
        case HttpStatus.SC_REQUEST_TOO_LONG:
        case HttpStatus.SC_REQUEST_URI_TOO_LONG:
        case HttpStatus.SC_UNSUPPORTED_MEDIA_TYPE:
        case HttpStatus.SC_REQUESTED_RANGE_NOT_SATISFIABLE:
        case HttpStatus.SC_EXPECTATION_FAILED:
        case HttpStatus.SC_INSUFFICIENT_SPACE_ON_RESOURCE:
        case HttpStatus.SC_METHOD_FAILURE:
        case HttpStatus.SC_UNPROCESSABLE_ENTITY:
        case HttpStatus.SC_LOCKED:
        case HttpStatus.SC_FAILED_DEPENDENCY:
        case HttpStatus.SC_INTERNAL_SERVER_ERROR:
        case HttpStatus.SC_NOT_IMPLEMENTED:
        case HttpStatus.SC_BAD_GATEWAY:
        case HttpStatus.SC_SERVICE_UNAVAILABLE:
        case HttpStatus.SC_GATEWAY_TIMEOUT:
        case HttpStatus.SC_HTTP_VERSION_NOT_SUPPORTED:
        case HttpStatus.SC_INSUFFICIENT_STORAGE:
            return 0;
        case HttpStatus.SC_CONTINUE:
        case HttpStatus.SC_SWITCHING_PROTOCOLS:
        case HttpStatus.SC_PROCESSING:
        case HttpStatus.SC_OK:
        case HttpStatus.SC_CREATED:
        case HttpStatus.SC_ACCEPTED:
        case HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION:
        case HttpStatus.SC_NO_CONTENT:
        case HttpStatus.SC_RESET_CONTENT:
        case HttpStatus.SC_PARTIAL_CONTENT:
        case HttpStatus.SC_MULTI_STATUS:
        case HttpStatus.SC_MULTIPLE_CHOICES:
        case HttpStatus.SC_MOVED_PERMANENTLY:
        case HttpStatus.SC_MOVED_TEMPORARILY:
        case HttpStatus.SC_SEE_OTHER:
        case HttpStatus.SC_NOT_MODIFIED:
        case HttpStatus.SC_USE_PROXY:
        case HttpStatus.SC_TEMPORARY_REDIRECT:
            return 1;
        default:
            return 1;
        }
    }

    private String getEncodedUrl(String url) {
        try {
            return URIUtil.encodeQuery(URIUtil.decode(url));
        } catch (URIException ue) {
            LOGGER.warn("URIException on " + url);
            return url;
        }
    }

}