Example usage for org.apache.commons.httpclient HttpStatus SC_METHOD_FAILURE

List of usage examples for org.apache.commons.httpclient HttpStatus SC_METHOD_FAILURE

Introduction

In this page you can find the example usage for org.apache.commons.httpclient HttpStatus SC_METHOD_FAILURE.

Prototype

int SC_METHOD_FAILURE

To view the source code for org.apache.commons.httpclient HttpStatus SC_METHOD_FAILURE.

Click Source Link

Document

Static constant for a 420 error.

Usage

From source file:org.opens.tanaguru.util.http.HttpRequestHandler.java

private int computeStatus(int status) {
    switch (status) {
    case HttpStatus.SC_FORBIDDEN:
    case HttpStatus.SC_METHOD_NOT_ALLOWED:
    case HttpStatus.SC_BAD_REQUEST:
    case HttpStatus.SC_UNAUTHORIZED:
    case HttpStatus.SC_PAYMENT_REQUIRED:
    case HttpStatus.SC_NOT_FOUND:
    case HttpStatus.SC_NOT_ACCEPTABLE:
    case HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED:
    case HttpStatus.SC_REQUEST_TIMEOUT:
    case HttpStatus.SC_CONFLICT:
    case HttpStatus.SC_GONE:
    case HttpStatus.SC_LENGTH_REQUIRED:
    case HttpStatus.SC_PRECONDITION_FAILED:
    case HttpStatus.SC_REQUEST_TOO_LONG:
    case HttpStatus.SC_REQUEST_URI_TOO_LONG:
    case HttpStatus.SC_UNSUPPORTED_MEDIA_TYPE:
    case HttpStatus.SC_REQUESTED_RANGE_NOT_SATISFIABLE:
    case HttpStatus.SC_EXPECTATION_FAILED:
    case HttpStatus.SC_INSUFFICIENT_SPACE_ON_RESOURCE:
    case HttpStatus.SC_METHOD_FAILURE:
    case HttpStatus.SC_UNPROCESSABLE_ENTITY:
    case HttpStatus.SC_LOCKED:
    case HttpStatus.SC_FAILED_DEPENDENCY:
    case HttpStatus.SC_INTERNAL_SERVER_ERROR:
    case HttpStatus.SC_NOT_IMPLEMENTED:
    case HttpStatus.SC_BAD_GATEWAY:
    case HttpStatus.SC_SERVICE_UNAVAILABLE:
    case HttpStatus.SC_GATEWAY_TIMEOUT:
    case HttpStatus.SC_HTTP_VERSION_NOT_SUPPORTED:
    case HttpStatus.SC_INSUFFICIENT_STORAGE:
        return 0;
    case HttpStatus.SC_CONTINUE:
    case HttpStatus.SC_SWITCHING_PROTOCOLS:
    case HttpStatus.SC_PROCESSING:
    case HttpStatus.SC_OK:
    case HttpStatus.SC_CREATED:
    case HttpStatus.SC_ACCEPTED:
    case HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION:
    case HttpStatus.SC_NO_CONTENT:
    case HttpStatus.SC_RESET_CONTENT:
    case HttpStatus.SC_PARTIAL_CONTENT:
    case HttpStatus.SC_MULTI_STATUS:
    case HttpStatus.SC_MULTIPLE_CHOICES:
    case HttpStatus.SC_MOVED_PERMANENTLY:
    case HttpStatus.SC_MOVED_TEMPORARILY:
    case HttpStatus.SC_SEE_OTHER:
    case HttpStatus.SC_NOT_MODIFIED:
    case HttpStatus.SC_USE_PROXY:
    case HttpStatus.SC_TEMPORARY_REDIRECT:
        return 1;
    default://from w ww .j  a v  a 2  s  . c om
        return 1;
    }
}

From source file:org.paxle.crawler.http.impl.HttpCrawler.java

public ICrawlerDocument request(URI requestUri) {
    if (requestUri == null)
        throw new NullPointerException("URL was null");
    this.logger.debug(String.format("Crawling URL '%s' ...", requestUri));

    ICrawlerDocument doc = null;/*from  ww w  . ja  v  a2s  .  com*/
    HttpMethod method = null;
    try {
        final ICrawlerContext ctx = this.contextLocal.getCurrentContext();

        // creating an empty crawler-document
        doc = ctx.createDocument();
        doc.setLocation(requestUri);

        final String uriAsciiString = requestUri.toASCIIString();

        /* ==============================================================================
         * HTTP HEAD request
         * 
         * first use the HEAD method to determine whether the MIME-type is supported
         * and to compare the content-length with the maximum allowed download size
         * (both only if the server provides this information, if not, the file is
         * fetched)
         * ============================================================================== */
        method = new HeadMethod(uriAsciiString); // automatically follows redirects
        this.initRequestMethod(method);
        int statusCode = this.getHttpClient().executeMethod(method);

        final boolean headUnsupported = (statusCode == HttpStatus.SC_METHOD_FAILURE
                || statusCode == HttpStatus.SC_METHOD_NOT_ALLOWED);
        if (!headUnsupported) {
            if (statusCode != HttpStatus.SC_OK) {
                // RFC 2616 states that the GET and HEAD methods _must_ be supported by any
                // general purpose servers (which are in fact the ones we are connecting to here)

                if (statusCode == HttpStatus.SC_NOT_FOUND) {
                    doc.setStatus(ICrawlerDocument.Status.NOT_FOUND);
                } else {
                    doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE,
                            String.format("Server returned: %s", method.getStatusLine()));
                }

                this.logger.warn(String.format("Crawling of URL '%s' failed. Server returned: %s", requestUri,
                        method.getStatusLine()));
                return doc;
            }

            // getting the mimetype and charset
            Header contentTypeHeader = method.getResponseHeader(HTTPHEADER_CONTENT_TYPE);
            if (!handleContentTypeHeader(contentTypeHeader, doc))
                return doc;

            // reject the document if content-length is above our limit
            Header contentLengthHeader = method.getResponseHeader(HTTPHEADER_CONTENT_LENGTH);
            if (!handleContentLengthHeader(contentLengthHeader, doc))
                return doc;

            // FIXME: we've been redirected, re-enqueue the new URL and abort processing
            //if (!requestUri.equals(method.getURI())) ;            
        }

        /* ==============================================================================
         * HTTP GET request
         * 
         * secondly - if everything is alright up to now - proceed with getting the 
         * actual document
         * ============================================================================== */
        HttpMethod getMethod = new GetMethod(uriAsciiString); // automatically follows redirects
        method.releaseConnection();

        method = getMethod;
        this.initRequestMethod(method);

        // send the request to the server
        statusCode = this.getHttpClient().executeMethod(method);

        // check the response status code
        if (statusCode != HttpStatus.SC_OK) {
            if (statusCode == HttpStatus.SC_NOT_FOUND) {
                doc.setStatus(ICrawlerDocument.Status.NOT_FOUND);
            } else {
                doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE,
                        String.format("Server returned: %s", method.getStatusLine()));
            }

            this.logger.warn(String.format("Crawling of URL '%s' failed. Server returned: %s", requestUri,
                    method.getStatusLine()));
            return doc;
        }

        // FIXME: we've been redirected, re-enqueue the new URL and abort processing
        // if (!requestUri.equals(method.getURI())) ; 

        /*
         * HTTP Content-Type
         * - getting the mimetype and charset
         */
        Header contentTypeHeader = method.getResponseHeader(HTTPHEADER_CONTENT_TYPE);
        if (!handleContentTypeHeader(contentTypeHeader, doc))
            return doc;

        /* 
         * HTTP Content-Length
         * - Reject the document if content-length is above our limit
         * 
         *   We do this a second time here because some servers may have set the content-length
         *   of the head response to <code>0</code>
         */
        Header contentLengthHeader = method.getResponseHeader(HTTPHEADER_CONTENT_LENGTH);
        if (!handleContentLengthHeader(contentLengthHeader, doc))
            return doc;

        extractHttpHeaders(method, doc); // externalised into this method to cleanup here a bit

        // getting the response body
        InputStream respBody = method.getResponseBodyAsStream();

        // handle the content-encoding, i.e. decompress the server's response
        Header contentEncodingHeader = method.getResponseHeader(HTTPHEADER_CONTENT_ENCODING);
        try {
            respBody = handleContentEncoding(contentEncodingHeader, respBody);

            /* Limit the max allowed length of the content to copy. -1 is used for no limit.
             * 
             * We need to set a limit if:
             * a) the user has configured a max-download-size AND
             * b) the server returned no content-length header
             */
            int copyLimit = (this.maxDownloadSize <= 0 || contentLengthHeader != null) ? -1
                    : this.maxDownloadSize;

            // copy the content to file
            final ICrawlerTools crawlerTools = ctx.getCrawlerTools();
            crawlerTools.saveInto(doc, respBody, lrc, copyLimit);

            doc.setStatus(ICrawlerDocument.Status.OK);
            this.logger.debug(String.format("Crawling of URL '%s' finished.", requestUri));
        } catch (IOException e) {
            String msg = e.getMessage();
            if (msg == null || !msg.equals("Corrupt GZIP trailer"))
                throw e;

            setHostSetting(method.getURI().getHost(), PREF_NO_ENCODING);
            msg = String.format("server sent a corrupt gzip trailer at URL '%s'", requestUri);
            logger.warn(msg);

            // FIXME re-enqueue command
            doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, msg);
        } finally {
            respBody.close();
        }
    } catch (NoRouteToHostException e) {
        this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage()));
        doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage());
    } catch (UnknownHostException e) {
        this.logger.warn(String.format("Error crawling %s: Unknown host.", requestUri));
        doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage());
    } catch (ConnectException e) {
        this.logger.warn(String.format("Error crawling %s: Unable to connect to host.", requestUri));
        doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage());
    } catch (ConnectTimeoutException e) {
        this.logger.warn(String.format("Error crawling %s: %s.", requestUri, e.getMessage()));
        doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage());
    } catch (SocketTimeoutException e) {
        this.logger.warn(String.format("Error crawling %s: Connection timeout.", requestUri));
        doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage());
    } catch (CircularRedirectException e) {
        this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage()));
        doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage());
    } catch (NoHttpResponseException e) {
        this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage()));
        doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage());
    } catch (ContentLengthLimitExceededException e) {
        this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage()));
        doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, e.getMessage());
    } catch (Throwable e) {
        String errorMsg;
        if (e instanceof HttpException) {
            errorMsg = "Unrecovered protocol exception: [%s] %s";
        } else if (e instanceof IOException) {
            errorMsg = "Transport exceptions: [%s] %s";
        } else {
            errorMsg = "Unexpected exception: [%s] %s";
        }
        errorMsg = String.format(errorMsg, e.getClass().getName(), e.getMessage());

        this.logger.error(String.format("Error crawling %s: %s", requestUri, errorMsg));
        doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, errorMsg);
        e.printStackTrace();
    } finally {
        if (method != null)
            method.releaseConnection();
    }

    return doc;
}