List of usage examples for org.apache.commons.httpclient HttpStatus SC_NOT_MODIFIED
int SC_NOT_MODIFIED
To view the source code for org.apache.commons.httpclient HttpStatus SC_NOT_MODIFIED.
Click Source Link
From source file:org.archive.crawler.fetcher.OptimizeFetchHTTP.java
protected void innerProcess(final CrawlURI curi) throws InterruptedException { if (!canFetch(curi)) { // Cannot fetch this, due to protocol, retries, or other problems return;/* w w w. j av a 2s . co m*/ } HttpClient http = this.getClient(); setLocalIP(http); this.curisHandled++; // Note begin time curi.putLong(A_FETCH_BEGAN_TIME, System.currentTimeMillis()); // Get a reference to the HttpRecorder that is set into this ToeThread. HttpRecorder rec = HttpRecorder.getHttpRecorder(); // Shall we get a digest on the content downloaded? boolean digestContent = ((Boolean) getUncheckedAttribute(curi, ATTR_DIGEST_CONTENT)).booleanValue(); String algorithm = null; if (digestContent) { algorithm = ((String) getUncheckedAttribute(curi, ATTR_DIGEST_ALGORITHM)); rec.getRecordedInput().setDigest(algorithm); } else { // clear rec.getRecordedInput().setDigest((MessageDigest) null); } // Below we do two inner classes that add check of midfetch // filters just as we're about to receive the response body. String curiString = curi.getUURI().toString(); HttpMethodBase method = null; if (curi.isPost()) { method = new HttpRecorderPostMethod(curiString, rec) { protected void readResponseBody(HttpState state, HttpConnection conn) throws IOException, HttpException { addResponseContent(this, curi); if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) { doAbort(curi, this, MIDFETCH_ABORT_LOG); } else { super.readResponseBody(state, conn); } } }; } else { method = new HttpRecorderGetMethod(curiString, rec) { protected void readResponseBody(HttpState state, HttpConnection conn) throws IOException, HttpException { addResponseContent(this, curi); if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) { doAbort(curi, this, MIDFETCH_ABORT_LOG); } else { super.readResponseBody(state, conn); } } }; } HostConfiguration customConfigOrNull = configureMethod(curi, method); // Set httpRecorder into curi. Subsequent code both here and later // in extractors expects to find the HttpRecorder in the CrawlURI. curi.setHttpRecorder(rec); // Populate credentials. Set config so auth. is not automatic. boolean addedCredentials = populateCredentials(curi, method); method.setDoAuthentication(addedCredentials); // set hardMax on bytes (if set by operator) long hardMax = getMaxLength(curi); // set overall timeout (if set by operator) long timeoutMs = 1000 * getTimeout(curi); // Get max fetch rate (bytes/ms). It comes in in KB/sec long maxRateKBps = getMaxFetchRate(curi); rec.getRecordedInput().setLimits(hardMax, timeoutMs, maxRateKBps); try { http.executeMethod(customConfigOrNull, method); } catch (RecorderTooMuchHeaderException ex) { // when too much header material, abort like other truncations doAbort(curi, method, HEADER_TRUNC); } catch (IOException e) { failedExecuteCleanup(method, curi, e); return; } catch (ArrayIndexOutOfBoundsException e) { // For weird windows-only ArrayIndex exceptions in native // code... see // http://forum.java.sun.com/thread.jsp?forum=11&thread=378356 // treating as if it were an IOException failedExecuteCleanup(method, curi, e); return; } // set softMax on bytes to get (if implied by content-length) long softMax = method.getResponseContentLength(); try { if (!curi.isSeed() && curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) { logger.debug(curi.getUURI().toString() + " is not modify"); curi.skipToProcessorChain(getController().getPostprocessorChain()); } else if (!method.isAborted()) { // Force read-to-end, so that any socket hangs occur here, // not in later modules. rec.getRecordedInput().readFullyOrUntil(softMax); } } catch (RecorderTimeoutException ex) { doAbort(curi, method, TIMER_TRUNC); } catch (RecorderLengthExceededException ex) { doAbort(curi, method, LENGTH_TRUNC); } catch (IOException e) { cleanup(curi, e, "readFully", S_CONNECT_LOST); return; } catch (ArrayIndexOutOfBoundsException e) { // For weird windows-only ArrayIndex exceptions from native code // see http://forum.java.sun.com/thread.jsp?forum=11&thread=378356 // treating as if it were an IOException cleanup(curi, e, "readFully", S_CONNECT_LOST); return; } finally { // ensure recording has stopped rec.closeRecorders(); logger.debug("cloase backup file.&uri= " + curi.getCrawlURIString()); if (!method.isAborted()) { method.releaseConnection(); } // Note completion time curi.putLong(A_FETCH_COMPLETED_TIME, System.currentTimeMillis()); // Set the response charset into the HttpRecord if available. setCharacterEncoding(rec, method); setSizes(curi, rec); } if (digestContent) { curi.setContentDigest(algorithm, rec.getRecordedInput().getDigestValue()); } logger.info((curi.isPost() ? "POST" : "GET") + " " + curi.getUURI().toString() + " " + method.getStatusCode() + " " + rec.getRecordedInput().getSize() + " " + curi.getContentType()); if (curi.isSuccess() && addedCredentials) { // Promote the credentials from the CrawlURI to the CrawlServer // so they are available for all subsequent CrawlURIs on this // server. promoteCredentials(curi); if (logger.isDebugEnabled()) { // Print out the cookie. Might help with the debugging. Header setCookie = method.getResponseHeader("set-cookie"); if (setCookie != null) { logger.debug(setCookie.toString().trim()); } } } else if (method.getStatusCode() == HttpStatus.SC_UNAUTHORIZED) { // 401 is not 'success'. handle401(method, curi); } if (rec.getRecordedInput().isOpen()) { logger.error(curi.toString() + " RIS still open. Should have" + " been closed by method release: " + Thread.currentThread().getName()); try { rec.getRecordedInput().close(); } catch (IOException e) { logger.error("second-chance RIS close failed", e); } } }
From source file:org.archive.crawler.util.CrawledBytesHistotable.java
public void accumulate(CrawlURI curi) { if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) { tally(NOTMODIFIED, curi.getContentSize()); tally(NOTMODIFIEDCOUNT, 1);/*from w ww . j a va 2s . c o m*/ } else if (curi.getAnnotations().contains("duplicate:digest")) { tally(DUPLICATE, curi.getContentSize()); tally(DUPLICATECOUNT, 1); } else { tally(NOVEL, curi.getContentSize()); tally(NOVELCOUNT, 1); } }
From source file:org.archive.modules.fetcher.FetchStats.java
public synchronized void tally(CrawlURI curi, Stage stage) { switch (stage) { case SCHEDULED: totalScheduled++;//from w w w . j a v a 2s . c om break; case RETRIED: if (curi.getFetchStatus() <= 0) { fetchNonResponses++; } break; case SUCCEEDED: fetchSuccesses++; fetchResponses++; totalBytes += curi.getContentSize(); successBytes += curi.getContentSize(); if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) { notModifiedBytes += curi.getContentSize(); notModifiedUrls++; } else if (curi.getAnnotations().contains("duplicate:digest")) { dupByHashBytes += curi.getContentSize(); dupByHashUrls++; } else { novelBytes += curi.getContentSize(); novelUrls++; } lastSuccessTime = curi.getFetchCompletedTime(); break; case DISREGARDED: fetchDisregards++; if (curi.getFetchStatus() == S_ROBOTS_PRECLUDED) { robotsDenials++; } break; case FAILED: if (curi.getFetchStatus() <= 0) { fetchNonResponses++; } else { fetchResponses++; totalBytes += curi.getContentSize(); if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) { notModifiedBytes += curi.getContentSize(); notModifiedUrls++; } else if (curi.getAnnotations().contains("duplicate:digest")) { dupByHashBytes += curi.getContentSize(); dupByHashUrls++; } else { novelBytes += curi.getContentSize(); novelUrls++; } } fetchFailures++; break; } }
From source file:org.archive.modules.writer.WARCWriterProcessor.java
protected void writeHttpRecords(final CrawlURI curi, WARCWriter w, final URI baseid, final String timestamp) throws IOException { // Add named fields for ip, checksum, and relate the metadata // and request to the resource field. // TODO: Use other than ANVL (or rename ANVL as NameValue or // use RFC822 (commons-httpclient?). ANVLRecord headers = new ANVLRecord(); if (curi.getContentDigest() != null) { headers.addLabelValue(HEADER_KEY_PAYLOAD_DIGEST, curi.getContentDigestSchemeString()); }/*from ww w. ja v a 2s. c o m*/ headers.addLabelValue(HEADER_KEY_IP, getHostAddress(curi)); URI rid; if (getWriteRevisitForIdenticalDigests() && curi.hasContentDigestHistory() && curi.getContentDigestHistory().get(A_ORIGINAL_URL) != null) { rid = writeRevisitUriAgnosticDigest(w, timestamp, HTTP_RESPONSE_MIMETYPE, baseid, curi, headers); } else if (IdenticalDigestDecideRule.hasIdenticalDigest(curi) && getWriteRevisitForIdenticalDigests()) { rid = writeRevisitDigest(w, timestamp, HTTP_RESPONSE_MIMETYPE, baseid, curi, headers); } else if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED && getWriteRevisitForNotModified()) { rid = writeRevisitNotModified(w, timestamp, baseid, curi, headers); } else { // Check for truncated annotation String value = null; Collection<String> anno = curi.getAnnotations(); if (anno.contains(TIMER_TRUNC)) { value = NAMED_FIELD_TRUNCATED_VALUE_TIME; } else if (anno.contains(LENGTH_TRUNC)) { value = NAMED_FIELD_TRUNCATED_VALUE_LENGTH; } else if (anno.contains(HEADER_TRUNC)) { value = NAMED_FIELD_TRUNCATED_VALUE_HEAD; } // TODO: Add annotation for TRUNCATED_VALUE_UNSPECIFIED if (value != null) { headers.addLabelValue(HEADER_KEY_TRUNCATED, value); } rid = writeResponse(w, timestamp, HTTP_RESPONSE_MIMETYPE, baseid, curi, headers); } headers = new ANVLRecord(); headers.addLabelValue(HEADER_KEY_CONCURRENT_TO, '<' + rid.toString() + '>'); if (getWriteRequests()) { writeRequest(w, timestamp, HTTP_REQUEST_MIMETYPE, baseid, curi, headers); } if (getWriteMetadata()) { writeMetadata(w, timestamp, baseid, curi, headers); } }
From source file:org.infoscoop.request.filter.HTMLFragmentFilter.java
protected int preProcess(HttpClient client, HttpMethod method, ProxyRequest request) { request.addIgnoreHeader("if-none-match"); String cacheURL = request.getRequestHeader("fragment-chacheID"); String cacheLifeTimeStr = request.getRequestHeader("fragment-cacheLifeTime"); int cacheLifeTime = 60;//360 is set by default in script if (cacheLifeTimeStr != null) { try {// w w w. java2 s.c om cacheLifeTime = Integer.parseInt(cacheLifeTimeStr); } catch (NumberFormatException e) { } } if (cacheURL != null && cacheURL.length() > 0) { Cache cache = CacheService.getHandle().getCacheByUrl(cacheURL); if (cache != null && cache.getId() != null) { Calendar cal = Calendar.getInstance(); cal.add(Calendar.MINUTE, -cacheLifeTime);// Update cache every 6 hours Date now = cal.getTime(); if (!now.after(cache.getTimestamp())) { List<Header> headerList; try { headerList = cache.getHeaderList(); } catch (SAXException ex) { log.error("parsing error", ex); throw new RuntimeException(); } //TODO:Fix this later20090612 by endoh String ifModifiedSince = request.getRequestHeader("If-Modified-Since"); if (ifModifiedSince != null) for (Header header : headerList) if (header.getName().equalsIgnoreCase("If-Modified-Since")) if (ifModifiedSince.equals(header.getValue())) return HttpStatus.SC_NOT_MODIFIED; String ifNoneMatch = request.getRequestHeader("If-None-Match"); if (ifNoneMatch != null) for (Header header : headerList) if (header.getName().equalsIgnoreCase("etag")) if (ifNoneMatch.equals(header.getValue())) return HttpStatus.SC_NOT_MODIFIED; //TODO:end for (Header header : headerList) { String name = header.getName(); if (!name.equalsIgnoreCase("transfer-encoding")) request.putResponseHeader(name, header.getValue()); } request.setResponseBody(new ByteArrayInputStream(cache.getBodyBytes())); request.putResponseHeader("MSDPortal-Cache-ID", cache.getId()); //if(log.isInfoEnabled()) log.error("use cache " + cacheURL); return 200; } } } return 0; }
From source file:org.infoscoop.request.filter.ProxyFilterContainer.java
public final int getCache(HttpClient client, HttpMethod method, ProxyRequest request) throws Exception { // check public cache if (method instanceof GetMethod) { request.addIgnoreHeader("content-type"); if (request.allowUserPublicCache()) { // TODO: Is the improvement of the performance necessary? Cache cache = CacheService.getHandle().getCacheByUrl(request.getOriginalURL()); if (cache != null) { int cacheLifeTime = request.getProxy().getCacheLifeTime(); if (cache.getTimestamp().getTime() + cacheLifeTime * 60 * 1000 > System.currentTimeMillis()) { for (Header header : cache.getHeaderList()) { if ("X-IS-REDIRECTED-FROM".equalsIgnoreCase(header.getName())) { request.setRedirectURL(header.getValue()); } else { request.putResponseHeader(header.getName(), header.getValue()); }/*ww w .j a v a 2 s .co m*/ } if (log.isInfoEnabled()) log.info(request.getOriginalURL() + " get from public cache"); String ifModifiedSince = request.getRequestHeader("if-modified-since"); if (ifModifiedSince == null || "Thu, 01 Jun 1970 00:00:00 GMT" .equals(request.getRequestHeader("if-modified-since"))) { doFilterChain(request, new ByteArrayInputStream(cache.getBodyBytes())); return HttpStatus.SC_OK; } else { return HttpStatus.SC_NOT_MODIFIED; } } else { if (log.isInfoEnabled()) log.info(request.getOriginalURL() + " delete from public cache by timeout"); CacheService.getHandle().deleteCacheByUrl(request.getOriginalURL()); } } } } return 0; }
From source file:org.infoscoop.request.filter.ProxyFilterContainer.java
public final int invoke(HttpClient client, HttpMethod method, ProxyRequest request) throws Exception { int preStatus = prepareInvoke(client, method, request); switch (preStatus) { case 0://from ww w . jav a2s . c om break; case EXECUTE_POST_STATUS: doFilterChain(request, request.getResponseBody()); default: return preStatus; } // copy headers sent target server List ignoreHeaderNames = request.getIgnoreHeaders(); List allowedHeaderNames = request.getAllowedHeaders(); boolean allowAllHeader = false; Proxy proxy = request.getProxy(); if (proxy != null) { allowAllHeader = proxy.isAllowAllHeader(); if (!allowAllHeader) allowedHeaderNames.addAll(proxy.getAllowedHeaders()); } AuthenticatorUtil.doAuthentication(client, method, request); StringBuffer headersSb = new StringBuffer(); for (String name : request.getRequestHeaders().keySet()) { String value = request.getRequestHeader(name); String lowname = name.toLowerCase(); if (!allowAllHeader && !allowedHeaderNames.contains(lowname)) continue; if (ignoreHeaderNames.contains(lowname)) continue; if ("cookie".equalsIgnoreCase(name)) { if (proxy.getSendingCookies() != null) { value = RequestUtil.removeCookieParam(value, proxy.getSendingCookies()); } } if ("if-modified-since".equalsIgnoreCase(name) && "Thu, 01 Jun 1970 00:00:00 GMT".equals(value)) continue; method.addRequestHeader(new Header(name, value)); headersSb.append(name + "=" + value + ", "); } int cacheStatus = getCache(client, method, request); if (cacheStatus != 0) return cacheStatus; if (log.isInfoEnabled()) log.info("RequestHeader: " + headersSb); // execute http method and process redirect method.setFollowRedirects(false); client.executeMethod(method); int statusCode = method.getStatusCode(); for (int i = 0; statusCode == HttpStatus.SC_MOVED_TEMPORARILY || statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_SEE_OTHER || statusCode == HttpStatus.SC_TEMPORARY_REDIRECT; i++) { // connection release method.releaseConnection(); if (i == 5) { log.error("The circular redirect is limited by five times."); return 500; } Header location = method.getResponseHeader("Location"); String redirectUrl = location.getValue(); // According to 2,068 1.1 rfc http spec, we cannot appoint the relative URL, // but microsoft.com gives back the relative URL. if (redirectUrl.startsWith("/")) { URI baseURI = method.getURI(); baseURI.setPath(redirectUrl); redirectUrl = baseURI.toString(); } //method.setURI(new URI(redirectUrl, false)); Header[] headers = method.getRequestHeaders(); method = new GetMethod(redirectUrl); for (int j = 0; j < headers.length; j++) { String headerName = headers[j].getName(); if (!headerName.equalsIgnoreCase("content-length") && !headerName.equalsIgnoreCase("authorization")) method.setRequestHeader(headers[j]); } AuthenticatorUtil.doAuthentication(client, method, request); method.setRequestHeader("authorization", request.getRequestHeader("Authorization")); method.setFollowRedirects(false); client.executeMethod(method); statusCode = method.getStatusCode(); request.setRedirectURL(redirectUrl); if (log.isInfoEnabled()) log.info("Redirect " + request.getTargetURL() + " to " + location + "."); } // copy response headers to proxyReqeust Header[] headers = method.getResponseHeaders(); for (int i = 0; i < headers.length; i++) { request.putResponseHeader(headers[i].getName(), headers[i].getValue()); } if (log.isInfoEnabled()) log.info("Original Status:" + statusCode); // check response code if (statusCode == HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED) { log.error("Proxy Authentication Required. Confirm ajax proxy setting."); throw new Exception( "Http Status 407, Proxy Authentication Required. Please contuct System Administrator."); } if (statusCode == HttpStatus.SC_NOT_MODIFIED || statusCode == HttpStatus.SC_RESET_CONTENT) { return statusCode; } else if (statusCode < 200 || statusCode >= 300) { request.setResponseBody(method.getResponseBodyAsStream()); return statusCode; } // process response body InputStream responseStream = null; if (statusCode != HttpStatus.SC_NO_CONTENT) { if (request.allowUserPublicCache()) { byte[] responseBody = method.getResponseBody(); Map<String, List<String>> responseHeaders = request.getResponseHeaders(); if (request.getRedirectURL() != null) responseHeaders.put("X-IS-REDIRECTED-FROM", Arrays.asList(new String[] { request.getRedirectURL() })); if (method instanceof GetMethod) { putCache(request.getOriginalURL(), new ByteArrayInputStream(responseBody), responseHeaders); } responseStream = new ByteArrayInputStream(responseBody); } else { responseStream = method.getResponseBodyAsStream(); } } doFilterChain(request, responseStream); return statusCode != HttpStatus.SC_NO_CONTENT ? method.getStatusCode() : 200; }
From source file:org.methodize.nntprss.feed.Channel.java
/** * Retrieves the latest RSS doc from the remote site *//*from w ww .j a va2 s .c o m*/ public synchronized void poll() { // Use method-level variable // Guard against change in history mid-poll polling = true; // boolean keepHistory = historical; long keepExpiration = expiration; lastPolled = new Date(); int statusCode = -1; HttpMethod method = null; String urlString = url.toString(); try { HttpClient httpClient = getHttpClient(); channelManager.configureHttpClient(httpClient); HttpResult result = null; try { connected = true; boolean redirected = false; int count = 0; do { URL currentUrl = new URL(urlString); method = new GetMethod(urlString); method.setRequestHeader("User-agent", AppConstants.getUserAgent()); method.setRequestHeader("Accept-Encoding", "gzip"); method.setFollowRedirects(false); method.setDoAuthentication(true); // ETag if (lastETag != null) { method.setRequestHeader("If-None-Match", lastETag); } // Last Modified if (lastModified != 0) { final String NAME = "If-Modified-Since"; //defend against such fun like net.freeroller.rickard got If-Modified-Since "Thu, 24 Aug 2028 12:29:54 GMT" if (lastModified < System.currentTimeMillis()) { final String DATE = httpDate.format(new Date(lastModified)); method.setRequestHeader(NAME, DATE); log.debug("channel " + this.name + " using " + NAME + " " + DATE); //ALEK } } method.setFollowRedirects(false); method.setDoAuthentication(true); HostConfiguration hostConfig = new HostConfiguration(); hostConfig.setHost(currentUrl.getHost(), currentUrl.getPort(), currentUrl.getProtocol()); result = executeHttpRequest(httpClient, hostConfig, method); statusCode = result.getStatusCode(); if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY || statusCode == HttpStatus.SC_SEE_OTHER || statusCode == HttpStatus.SC_TEMPORARY_REDIRECT) { redirected = true; // Resolve against current URI - may be a relative URI try { urlString = new java.net.URI(urlString).resolve(result.getLocation()).toString(); } catch (URISyntaxException use) { // Fall back to just using location from result urlString = result.getLocation(); } if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY && channelManager.isObserveHttp301()) { try { url = new URL(urlString); if (log.isInfoEnabled()) { log.info("Channel = " + this.name + ", updated URL from HTTP Permanent Redirect"); } } catch (MalformedURLException mue) { // Ignore URL permanent redirect for now... } } } else { redirected = false; } // method.getResponseBody(); // method.releaseConnection(); count++; } while (count < 5 && redirected); } catch (HttpRecoverableException hre) { if (log.isDebugEnabled()) { log.debug("Channel=" + name + " - Temporary Http Problem - " + hre.getMessage()); } status = STATUS_CONNECTION_TIMEOUT; statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR; } catch (ConnectException ce) { // @TODO Might also be a connection refused - not only a timeout... if (log.isDebugEnabled()) { log.debug("Channel=" + name + " - Connection Timeout, skipping - " + ce.getMessage()); } status = STATUS_CONNECTION_TIMEOUT; statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR; } catch (UnknownHostException ue) { if (log.isDebugEnabled()) { log.debug("Channel=" + name + " - Unknown Host Exception, skipping"); } status = STATUS_UNKNOWN_HOST; statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR; } catch (NoRouteToHostException re) { if (log.isDebugEnabled()) { log.debug("Channel=" + name + " - No Route To Host Exception, skipping"); } status = STATUS_NO_ROUTE_TO_HOST; statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR; } catch (SocketException se) { // e.g. Network is unreachable if (log.isDebugEnabled()) { log.debug("Channel=" + name + " - Socket Exception, skipping"); } status = STATUS_SOCKET_EXCEPTION; statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR; } // Only process if ok - if not ok (e.g. not modified), don't do anything if (connected && statusCode == HttpStatus.SC_OK) { PushbackInputStream pbis = new PushbackInputStream(new ByteArrayInputStream(result.getResponse()), PUSHBACK_BUFFER_SIZE); skipBOM(pbis); BufferedInputStream bis = new BufferedInputStream(pbis); DocumentBuilder db = AppConstants.newDocumentBuilder(); try { Document rssDoc = null; if (!parseAtAllCost) { try { rssDoc = db.parse(bis); } catch (InternalError ie) { // Crimson library throws InternalErrors if (log.isDebugEnabled()) { log.debug("InternalError thrown by Crimson", ie); } throw new SAXException("InternalError thrown by Crimson: " + ie.getMessage()); } } else { // Parse-at-all-costs selected // Read in document to local array - may need to parse twice ByteArrayOutputStream bos = new ByteArrayOutputStream(); byte[] buf = new byte[1024]; int bytesRead = bis.read(buf); while (bytesRead > -1) { if (bytesRead > 0) { bos.write(buf, 0, bytesRead); } bytesRead = bis.read(buf); } bos.flush(); bos.close(); byte[] rssDocBytes = bos.toByteArray(); try { // Try the XML document parser first - just in case // the doc is well-formed rssDoc = db.parse(new ByteArrayInputStream(rssDocBytes)); } catch (SAXParseException spe) { if (log.isDebugEnabled()) { log.debug("XML parse failed, trying tidy"); } // Fallback to parse-at-all-costs parser rssDoc = LooseParser.parse(new ByteArrayInputStream(rssDocBytes)); } } processChannelDocument(expiration, rssDoc); // Update last modified / etag from headers // lastETag = httpCon.getHeaderField("ETag"); // lastModified = httpCon.getHeaderFieldDate("Last-Modified", 0); Header hdrETag = method.getResponseHeader("ETag"); lastETag = hdrETag != null ? hdrETag.getValue() : null; Header hdrLastModified = method.getResponseHeader("Last-Modified"); lastModified = hdrLastModified != null ? parseHttpDate(hdrLastModified.getValue()) : 0; log.debug("channel " + this.name + " parsed Last-Modifed " + hdrLastModified + " to " + (lastModified != 0 ? "" + (new Date(lastModified)) : "" + lastModified)); //ALEK status = STATUS_OK; } catch (SAXParseException spe) { if (log.isEnabledFor(Priority.WARN)) { log.warn("Channel=" + name + " - Error parsing RSS document - check feed"); } status = STATUS_INVALID_CONTENT; } bis.close(); // end if response code == HTTP_OK } else if (connected && statusCode == HttpStatus.SC_NOT_MODIFIED) { if (log.isDebugEnabled()) { log.debug("Channel=" + name + " - HTTP_NOT_MODIFIED, skipping"); } status = STATUS_OK; } else if (statusCode == HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED) { if (log.isEnabledFor(Priority.WARN)) { log.warn("Channel=" + name + " - Proxy authentication required"); } status = STATUS_PROXY_AUTHENTICATION_REQUIRED; } else if (statusCode == HttpStatus.SC_UNAUTHORIZED) { if (log.isEnabledFor(Priority.WARN)) { log.warn("Channel=" + name + " - Authentication required"); } status = STATUS_USER_AUTHENTICATION_REQUIRED; } // Update channel in database... channelDAO.updateChannel(this); } catch (FileNotFoundException fnfe) { if (log.isEnabledFor(Priority.WARN)) { log.warn("Channel=" + name + " - File not found returned by web server - check feed"); } status = STATUS_NOT_FOUND; } catch (Exception e) { if (log.isEnabledFor(Priority.WARN)) { log.warn("Channel=" + name + " - Exception while polling channel", e); } } catch (NoClassDefFoundError ncdf) { // Throw if SSL / redirection to HTTPS if (log.isEnabledFor(Priority.WARN)) { log.warn("Channel=" + name + " - NoClassDefFound", ncdf); } } finally { connected = false; polling = false; } }
From source file:org.mule.ibeans.module.http.HttpClientMessageRequester2.java
/** * Make a specific request to the underlying transport * * @param timeout the maximum time the operation should block before returning. * The call should return immediately if there is data available. If * no data becomes available before the timeout elapses, null will be * returned//from w w w.jav a 2s . c om * @return the result of the request wrapped in a MuleMessage object. Null will be * returned if no data was avaialable * @throws Exception if the call to the underlying protocal cuases an exception */ protected MuleMessage doRequest(long timeout) throws Exception { HttpMethod httpMethod = new GetMethod(endpoint.getEndpointURI().getAddress()); if (endpoint.getProperties().containsKey(HttpConstants.HEADER_AUTHORIZATION)) { httpMethod.setDoAuthentication(true); client.getParams().setAuthenticationPreemptive(true); httpMethod.setRequestHeader(HttpConstants.HEADER_AUTHORIZATION, (String) endpoint.getProperty(HttpConstants.HEADER_AUTHORIZATION)); } boolean releaseConn = false; try { HttpClient client = new HttpClient(); if (etag != null && checkEtag) { httpMethod.setRequestHeader(HttpConstants.HEADER_IF_NONE_MATCH, etag); } client.executeMethod(httpMethod); if (httpMethod.getStatusCode() < 400) { MuleMessage message = new HttpMuleMessageFactory(connector.getMuleContext()).create(httpMethod, null /* encoding */); etag = message.getInboundProperty(HttpConstants.HEADER_ETAG, null); if (httpMethod.getStatusCode() == HttpStatus.SC_OK || (httpMethod.getStatusCode() != HttpStatus.SC_NOT_MODIFIED || !checkEtag)) { if (StringUtils.EMPTY.equals(message.getPayload())) { releaseConn = true; } return message; } else { //Not modified, we should really cache the whole message and return it return new DefaultMuleMessage(NullPayload.getInstance(), getConnector().getMuleContext()); } } else { releaseConn = true; throw new ReceiveException( HttpMessages.requestFailedWithStatus(httpMethod.getStatusLine().toString()), endpoint, timeout); } } catch (ReceiveException e) { releaseConn = true; throw e; } catch (Exception e) { releaseConn = true; throw new ReceiveException(endpoint, timeout, e); } finally { if (releaseConn) { httpMethod.releaseConnection(); } } }
From source file:org.mulgara.resolver.http.HttpContent.java
/** * Obtain a valid connection and follow redirects if necessary. * //from w ww . jav a 2 s . com * @param methodType request the headders (HEAD) or body (GET) * @return valid connection method. Can be null. * @throws NotModifiedException if the content validates against the cache * @throws IOException if there's difficulty communicating with the web site */ private HttpMethod establishConnection(int methodType) throws IOException, NotModifiedException { if (logger.isDebugEnabled()) logger.debug("Establishing connection"); HttpMethod method = getConnectionMethod(methodType); assert method != null; Header header = null; /* // Add cache validation headers to the request if (lastModifiedMap.containsKey(httpUri)) { String lastModified = (String) lastModifiedMap.get(httpUri); assert lastModified != null; method.addRequestHeader("If-Modified-Since", lastModified); } if (eTagMap.containsKey(httpUri)) { String eTag = (String) eTagMap.get(httpUri); assert eTag != null; method.addRequestHeader("If-None-Match", eTag); } */ // Make the request if (logger.isDebugEnabled()) logger.debug("Executing HTTP request"); connection.open(); method.execute(state, connection); if (logger.isDebugEnabled()) { logger.debug("Executed HTTP request, response code " + method.getStatusCode()); } // Interpret the response header if (method.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) { // cache has been validated throw new NotModifiedException(httpUri); } else if (!isValidStatusCode(method.getStatusCode())) { throw new UnknownHostException("Unable to obtain connection to " + httpUri + ". Returned status code " + method.getStatusCode()); } else { // has a redirection been issued int numberOfRedirection = 0; while (isRedirected(method.getStatusCode()) && numberOfRedirection <= MAX_NO_REDIRECTS) { // release the existing connection method.releaseConnection(); //attempt to follow the redirects numberOfRedirection++; // obtain the new location header = method.getResponseHeader("location"); if (header != null) { try { initialiseSettings(new URL(header.getValue())); if (logger.isInfoEnabled()) { logger.info("Redirecting to " + header.getValue()); } // attempt a new connection to this location method = getConnectionMethod(methodType); connection.open(); method.execute(state, connection); if (!isValidStatusCode(method.getStatusCode())) { throw new UnknownHostException( "Unable to obtain connection to " + " the redirected site " + httpUri + ". Returned status code " + method.getStatusCode()); } } catch (URISyntaxException ex) { throw new IOException( "Unable to follow redirection to " + header.getValue() + " Not a valid URI"); } } else { throw new IOException("Unable to obtain redirecting detaild from " + httpUri); } } } // Update metadata about the cached document Header lastModifiedHeader = method.getResponseHeader("Last-Modified"); if (lastModifiedHeader != null) { logger.debug(lastModifiedHeader.toString()); assert lastModifiedHeader.getElements().length >= 1; assert lastModifiedHeader.getElements()[0].getName() != null; assert lastModifiedHeader.getElements()[0].getName() instanceof String; // previous code: added to cache } Header eTagHeader = method.getResponseHeader("Etag"); if (eTagHeader != null) { logger.debug(eTagHeader.toString()); assert eTagHeader.getElements().length >= 1; assert eTagHeader.getElements()[0].getName() != null; assert eTagHeader.getElements()[0].getName() instanceof String; // previous code: added to cache } return method; }