List of usage examples for org.apache.commons.httpclient HttpStatus SC_METHOD_FAILURE
int SC_METHOD_FAILURE
To view the source code for org.apache.commons.httpclient HttpStatus SC_METHOD_FAILURE.
Click Source Link
From source file:org.opens.tanaguru.util.http.HttpRequestHandler.java
private int computeStatus(int status) { switch (status) { case HttpStatus.SC_FORBIDDEN: case HttpStatus.SC_METHOD_NOT_ALLOWED: case HttpStatus.SC_BAD_REQUEST: case HttpStatus.SC_UNAUTHORIZED: case HttpStatus.SC_PAYMENT_REQUIRED: case HttpStatus.SC_NOT_FOUND: case HttpStatus.SC_NOT_ACCEPTABLE: case HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED: case HttpStatus.SC_REQUEST_TIMEOUT: case HttpStatus.SC_CONFLICT: case HttpStatus.SC_GONE: case HttpStatus.SC_LENGTH_REQUIRED: case HttpStatus.SC_PRECONDITION_FAILED: case HttpStatus.SC_REQUEST_TOO_LONG: case HttpStatus.SC_REQUEST_URI_TOO_LONG: case HttpStatus.SC_UNSUPPORTED_MEDIA_TYPE: case HttpStatus.SC_REQUESTED_RANGE_NOT_SATISFIABLE: case HttpStatus.SC_EXPECTATION_FAILED: case HttpStatus.SC_INSUFFICIENT_SPACE_ON_RESOURCE: case HttpStatus.SC_METHOD_FAILURE: case HttpStatus.SC_UNPROCESSABLE_ENTITY: case HttpStatus.SC_LOCKED: case HttpStatus.SC_FAILED_DEPENDENCY: case HttpStatus.SC_INTERNAL_SERVER_ERROR: case HttpStatus.SC_NOT_IMPLEMENTED: case HttpStatus.SC_BAD_GATEWAY: case HttpStatus.SC_SERVICE_UNAVAILABLE: case HttpStatus.SC_GATEWAY_TIMEOUT: case HttpStatus.SC_HTTP_VERSION_NOT_SUPPORTED: case HttpStatus.SC_INSUFFICIENT_STORAGE: return 0; case HttpStatus.SC_CONTINUE: case HttpStatus.SC_SWITCHING_PROTOCOLS: case HttpStatus.SC_PROCESSING: case HttpStatus.SC_OK: case HttpStatus.SC_CREATED: case HttpStatus.SC_ACCEPTED: case HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION: case HttpStatus.SC_NO_CONTENT: case HttpStatus.SC_RESET_CONTENT: case HttpStatus.SC_PARTIAL_CONTENT: case HttpStatus.SC_MULTI_STATUS: case HttpStatus.SC_MULTIPLE_CHOICES: case HttpStatus.SC_MOVED_PERMANENTLY: case HttpStatus.SC_MOVED_TEMPORARILY: case HttpStatus.SC_SEE_OTHER: case HttpStatus.SC_NOT_MODIFIED: case HttpStatus.SC_USE_PROXY: case HttpStatus.SC_TEMPORARY_REDIRECT: return 1; default://from w ww .j a v a 2 s . c om return 1; } }
From source file:org.paxle.crawler.http.impl.HttpCrawler.java
public ICrawlerDocument request(URI requestUri) { if (requestUri == null) throw new NullPointerException("URL was null"); this.logger.debug(String.format("Crawling URL '%s' ...", requestUri)); ICrawlerDocument doc = null;/*from ww w . ja v a2s . com*/ HttpMethod method = null; try { final ICrawlerContext ctx = this.contextLocal.getCurrentContext(); // creating an empty crawler-document doc = ctx.createDocument(); doc.setLocation(requestUri); final String uriAsciiString = requestUri.toASCIIString(); /* ============================================================================== * HTTP HEAD request * * first use the HEAD method to determine whether the MIME-type is supported * and to compare the content-length with the maximum allowed download size * (both only if the server provides this information, if not, the file is * fetched) * ============================================================================== */ method = new HeadMethod(uriAsciiString); // automatically follows redirects this.initRequestMethod(method); int statusCode = this.getHttpClient().executeMethod(method); final boolean headUnsupported = (statusCode == HttpStatus.SC_METHOD_FAILURE || statusCode == HttpStatus.SC_METHOD_NOT_ALLOWED); if (!headUnsupported) { if (statusCode != HttpStatus.SC_OK) { // RFC 2616 states that the GET and HEAD methods _must_ be supported by any // general purpose servers (which are in fact the ones we are connecting to here) if (statusCode == HttpStatus.SC_NOT_FOUND) { doc.setStatus(ICrawlerDocument.Status.NOT_FOUND); } else { doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, String.format("Server returned: %s", method.getStatusLine())); } this.logger.warn(String.format("Crawling of URL '%s' failed. Server returned: %s", requestUri, method.getStatusLine())); return doc; } // getting the mimetype and charset Header contentTypeHeader = method.getResponseHeader(HTTPHEADER_CONTENT_TYPE); if (!handleContentTypeHeader(contentTypeHeader, doc)) return doc; // reject the document if content-length is above our limit Header contentLengthHeader = method.getResponseHeader(HTTPHEADER_CONTENT_LENGTH); if (!handleContentLengthHeader(contentLengthHeader, doc)) return doc; // FIXME: we've been redirected, re-enqueue the new URL and abort processing //if (!requestUri.equals(method.getURI())) ; } /* ============================================================================== * HTTP GET request * * secondly - if everything is alright up to now - proceed with getting the * actual document * ============================================================================== */ HttpMethod getMethod = new GetMethod(uriAsciiString); // automatically follows redirects method.releaseConnection(); method = getMethod; this.initRequestMethod(method); // send the request to the server statusCode = this.getHttpClient().executeMethod(method); // check the response status code if (statusCode != HttpStatus.SC_OK) { if (statusCode == HttpStatus.SC_NOT_FOUND) { doc.setStatus(ICrawlerDocument.Status.NOT_FOUND); } else { doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, String.format("Server returned: %s", method.getStatusLine())); } this.logger.warn(String.format("Crawling of URL '%s' failed. Server returned: %s", requestUri, method.getStatusLine())); return doc; } // FIXME: we've been redirected, re-enqueue the new URL and abort processing // if (!requestUri.equals(method.getURI())) ; /* * HTTP Content-Type * - getting the mimetype and charset */ Header contentTypeHeader = method.getResponseHeader(HTTPHEADER_CONTENT_TYPE); if (!handleContentTypeHeader(contentTypeHeader, doc)) return doc; /* * HTTP Content-Length * - Reject the document if content-length is above our limit * * We do this a second time here because some servers may have set the content-length * of the head response to <code>0</code> */ Header contentLengthHeader = method.getResponseHeader(HTTPHEADER_CONTENT_LENGTH); if (!handleContentLengthHeader(contentLengthHeader, doc)) return doc; extractHttpHeaders(method, doc); // externalised into this method to cleanup here a bit // getting the response body InputStream respBody = method.getResponseBodyAsStream(); // handle the content-encoding, i.e. decompress the server's response Header contentEncodingHeader = method.getResponseHeader(HTTPHEADER_CONTENT_ENCODING); try { respBody = handleContentEncoding(contentEncodingHeader, respBody); /* Limit the max allowed length of the content to copy. -1 is used for no limit. * * We need to set a limit if: * a) the user has configured a max-download-size AND * b) the server returned no content-length header */ int copyLimit = (this.maxDownloadSize <= 0 || contentLengthHeader != null) ? -1 : this.maxDownloadSize; // copy the content to file final ICrawlerTools crawlerTools = ctx.getCrawlerTools(); crawlerTools.saveInto(doc, respBody, lrc, copyLimit); doc.setStatus(ICrawlerDocument.Status.OK); this.logger.debug(String.format("Crawling of URL '%s' finished.", requestUri)); } catch (IOException e) { String msg = e.getMessage(); if (msg == null || !msg.equals("Corrupt GZIP trailer")) throw e; setHostSetting(method.getURI().getHost(), PREF_NO_ENCODING); msg = String.format("server sent a corrupt gzip trailer at URL '%s'", requestUri); logger.warn(msg); // FIXME re-enqueue command doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, msg); } finally { respBody.close(); } } catch (NoRouteToHostException e) { this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage())); doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage()); } catch (UnknownHostException e) { this.logger.warn(String.format("Error crawling %s: Unknown host.", requestUri)); doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage()); } catch (ConnectException e) { this.logger.warn(String.format("Error crawling %s: Unable to connect to host.", requestUri)); doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage()); } catch (ConnectTimeoutException e) { this.logger.warn(String.format("Error crawling %s: %s.", requestUri, e.getMessage())); doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage()); } catch (SocketTimeoutException e) { this.logger.warn(String.format("Error crawling %s: Connection timeout.", requestUri)); doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage()); } catch (CircularRedirectException e) { this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage())); doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage()); } catch (NoHttpResponseException e) { this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage())); doc.setStatus(ICrawlerDocument.Status.NOT_FOUND, e.getMessage()); } catch (ContentLengthLimitExceededException e) { this.logger.warn(String.format("Error crawling %s: %s", requestUri, e.getMessage())); doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, e.getMessage()); } catch (Throwable e) { String errorMsg; if (e instanceof HttpException) { errorMsg = "Unrecovered protocol exception: [%s] %s"; } else if (e instanceof IOException) { errorMsg = "Transport exceptions: [%s] %s"; } else { errorMsg = "Unexpected exception: [%s] %s"; } errorMsg = String.format(errorMsg, e.getClass().getName(), e.getMessage()); this.logger.error(String.format("Error crawling %s: %s", requestUri, errorMsg)); doc.setStatus(ICrawlerDocument.Status.UNKNOWN_FAILURE, errorMsg); e.printStackTrace(); } finally { if (method != null) method.releaseConnection(); } return doc; }