Example usage for org.apache.commons.httpclient HttpStatus SC_NOT_MODIFIED

List of usage examples for org.apache.commons.httpclient HttpStatus SC_NOT_MODIFIED

Introduction

In this page you can find the example usage for org.apache.commons.httpclient HttpStatus SC_NOT_MODIFIED.

Prototype

int SC_NOT_MODIFIED

To view the source code for org.apache.commons.httpclient HttpStatus SC_NOT_MODIFIED.

Click Source Link

Document

<tt>304 Not Modified</tt> (HTTP/1.0 - RFC 1945)

Usage

From source file:org.archive.crawler.fetcher.OptimizeFetchHTTP.java

protected void innerProcess(final CrawlURI curi) throws InterruptedException {
    if (!canFetch(curi)) {
        // Cannot fetch this, due to protocol, retries, or other problems
        return;/*  w w  w.  j  av  a  2s  . co m*/
    }

    HttpClient http = this.getClient();
    setLocalIP(http);

    this.curisHandled++;

    // Note begin time
    curi.putLong(A_FETCH_BEGAN_TIME, System.currentTimeMillis());

    // Get a reference to the HttpRecorder that is set into this ToeThread.
    HttpRecorder rec = HttpRecorder.getHttpRecorder();

    // Shall we get a digest on the content downloaded?
    boolean digestContent = ((Boolean) getUncheckedAttribute(curi, ATTR_DIGEST_CONTENT)).booleanValue();
    String algorithm = null;
    if (digestContent) {
        algorithm = ((String) getUncheckedAttribute(curi, ATTR_DIGEST_ALGORITHM));
        rec.getRecordedInput().setDigest(algorithm);
    } else {
        // clear
        rec.getRecordedInput().setDigest((MessageDigest) null);
    }

    // Below we do two inner classes that add check of midfetch
    // filters just as we're about to receive the response body.
    String curiString = curi.getUURI().toString();
    HttpMethodBase method = null;
    if (curi.isPost()) {
        method = new HttpRecorderPostMethod(curiString, rec) {
            protected void readResponseBody(HttpState state, HttpConnection conn)
                    throws IOException, HttpException {
                addResponseContent(this, curi);
                if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) {
                    doAbort(curi, this, MIDFETCH_ABORT_LOG);
                } else {
                    super.readResponseBody(state, conn);
                }
            }
        };
    } else {
        method = new HttpRecorderGetMethod(curiString, rec) {
            protected void readResponseBody(HttpState state, HttpConnection conn)
                    throws IOException, HttpException {
                addResponseContent(this, curi);
                if (checkMidfetchAbort(curi, this.httpRecorderMethod, conn)) {
                    doAbort(curi, this, MIDFETCH_ABORT_LOG);
                } else {
                    super.readResponseBody(state, conn);
                }
            }
        };
    }

    HostConfiguration customConfigOrNull = configureMethod(curi, method);

    // Set httpRecorder into curi. Subsequent code both here and later
    // in extractors expects to find the HttpRecorder in the CrawlURI.
    curi.setHttpRecorder(rec);

    // Populate credentials. Set config so auth. is not automatic.
    boolean addedCredentials = populateCredentials(curi, method);
    method.setDoAuthentication(addedCredentials);

    // set hardMax on bytes (if set by operator)
    long hardMax = getMaxLength(curi);
    // set overall timeout (if set by operator)
    long timeoutMs = 1000 * getTimeout(curi);
    // Get max fetch rate (bytes/ms). It comes in in KB/sec
    long maxRateKBps = getMaxFetchRate(curi);
    rec.getRecordedInput().setLimits(hardMax, timeoutMs, maxRateKBps);

    try {
        http.executeMethod(customConfigOrNull, method);
    } catch (RecorderTooMuchHeaderException ex) {
        // when too much header material, abort like other truncations
        doAbort(curi, method, HEADER_TRUNC);
    } catch (IOException e) {
        failedExecuteCleanup(method, curi, e);
        return;
    } catch (ArrayIndexOutOfBoundsException e) {
        // For weird windows-only ArrayIndex exceptions in native
        // code... see
        // http://forum.java.sun.com/thread.jsp?forum=11&thread=378356
        // treating as if it were an IOException
        failedExecuteCleanup(method, curi, e);
        return;
    }

    // set softMax on bytes to get (if implied by content-length) 
    long softMax = method.getResponseContentLength();

    try {
        if (!curi.isSeed() && curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) {
            logger.debug(curi.getUURI().toString() + " is not modify");
            curi.skipToProcessorChain(getController().getPostprocessorChain());
        } else if (!method.isAborted()) {
            // Force read-to-end, so that any socket hangs occur here,
            // not in later modules.
            rec.getRecordedInput().readFullyOrUntil(softMax);
        }
    } catch (RecorderTimeoutException ex) {
        doAbort(curi, method, TIMER_TRUNC);
    } catch (RecorderLengthExceededException ex) {
        doAbort(curi, method, LENGTH_TRUNC);
    } catch (IOException e) {
        cleanup(curi, e, "readFully", S_CONNECT_LOST);
        return;
    } catch (ArrayIndexOutOfBoundsException e) {
        // For weird windows-only ArrayIndex exceptions from native code
        // see http://forum.java.sun.com/thread.jsp?forum=11&thread=378356
        // treating as if it were an IOException
        cleanup(curi, e, "readFully", S_CONNECT_LOST);
        return;
    } finally {
        // ensure recording has stopped
        rec.closeRecorders();
        logger.debug("cloase backup file.&uri= " + curi.getCrawlURIString());
        if (!method.isAborted()) {
            method.releaseConnection();
        }
        // Note completion time
        curi.putLong(A_FETCH_COMPLETED_TIME, System.currentTimeMillis());
        // Set the response charset into the HttpRecord if available.
        setCharacterEncoding(rec, method);
        setSizes(curi, rec);
    }

    if (digestContent) {
        curi.setContentDigest(algorithm, rec.getRecordedInput().getDigestValue());
    }

    logger.info((curi.isPost() ? "POST" : "GET") + " " + curi.getUURI().toString() + " "
            + method.getStatusCode() + " " + rec.getRecordedInput().getSize() + " " + curi.getContentType());

    if (curi.isSuccess() && addedCredentials) {
        // Promote the credentials from the CrawlURI to the CrawlServer
        // so they are available for all subsequent CrawlURIs on this
        // server.
        promoteCredentials(curi);
        if (logger.isDebugEnabled()) {
            // Print out the cookie.  Might help with the debugging.
            Header setCookie = method.getResponseHeader("set-cookie");
            if (setCookie != null) {
                logger.debug(setCookie.toString().trim());
            }
        }
    } else if (method.getStatusCode() == HttpStatus.SC_UNAUTHORIZED) {
        // 401 is not 'success'.
        handle401(method, curi);
    }

    if (rec.getRecordedInput().isOpen()) {
        logger.error(curi.toString() + " RIS still open. Should have" + " been closed by method release: "
                + Thread.currentThread().getName());
        try {
            rec.getRecordedInput().close();
        } catch (IOException e) {
            logger.error("second-chance RIS close failed", e);
        }
    }
}

From source file:org.archive.crawler.util.CrawledBytesHistotable.java

public void accumulate(CrawlURI curi) {
    if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) {
        tally(NOTMODIFIED, curi.getContentSize());
        tally(NOTMODIFIEDCOUNT, 1);/*from   w  ww .  j  a  va 2s . c  o  m*/
    } else if (curi.getAnnotations().contains("duplicate:digest")) {
        tally(DUPLICATE, curi.getContentSize());
        tally(DUPLICATECOUNT, 1);
    } else {
        tally(NOVEL, curi.getContentSize());
        tally(NOVELCOUNT, 1);
    }
}

From source file:org.archive.modules.fetcher.FetchStats.java

public synchronized void tally(CrawlURI curi, Stage stage) {
    switch (stage) {
    case SCHEDULED:
        totalScheduled++;//from w w w  . j  a v a  2s . c om
        break;
    case RETRIED:
        if (curi.getFetchStatus() <= 0) {
            fetchNonResponses++;
        }
        break;
    case SUCCEEDED:
        fetchSuccesses++;
        fetchResponses++;
        totalBytes += curi.getContentSize();
        successBytes += curi.getContentSize();

        if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) {
            notModifiedBytes += curi.getContentSize();
            notModifiedUrls++;
        } else if (curi.getAnnotations().contains("duplicate:digest")) {
            dupByHashBytes += curi.getContentSize();
            dupByHashUrls++;
        } else {
            novelBytes += curi.getContentSize();
            novelUrls++;
        }

        lastSuccessTime = curi.getFetchCompletedTime();
        break;
    case DISREGARDED:
        fetchDisregards++;
        if (curi.getFetchStatus() == S_ROBOTS_PRECLUDED) {
            robotsDenials++;
        }
        break;
    case FAILED:
        if (curi.getFetchStatus() <= 0) {
            fetchNonResponses++;
        } else {
            fetchResponses++;
            totalBytes += curi.getContentSize();

            if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED) {
                notModifiedBytes += curi.getContentSize();
                notModifiedUrls++;
            } else if (curi.getAnnotations().contains("duplicate:digest")) {
                dupByHashBytes += curi.getContentSize();
                dupByHashUrls++;
            } else {
                novelBytes += curi.getContentSize();
                novelUrls++;
            }

        }
        fetchFailures++;
        break;
    }
}

From source file:org.archive.modules.writer.WARCWriterProcessor.java

protected void writeHttpRecords(final CrawlURI curi, WARCWriter w, final URI baseid, final String timestamp)
        throws IOException {
    // Add named fields for ip, checksum, and relate the metadata
    // and request to the resource field.
    // TODO: Use other than ANVL (or rename ANVL as NameValue or
    // use RFC822 (commons-httpclient?).
    ANVLRecord headers = new ANVLRecord();
    if (curi.getContentDigest() != null) {
        headers.addLabelValue(HEADER_KEY_PAYLOAD_DIGEST, curi.getContentDigestSchemeString());
    }/*from  ww  w. ja v a 2s. c  o m*/
    headers.addLabelValue(HEADER_KEY_IP, getHostAddress(curi));

    URI rid;

    if (getWriteRevisitForIdenticalDigests() && curi.hasContentDigestHistory()
            && curi.getContentDigestHistory().get(A_ORIGINAL_URL) != null) {
        rid = writeRevisitUriAgnosticDigest(w, timestamp, HTTP_RESPONSE_MIMETYPE, baseid, curi, headers);
    } else if (IdenticalDigestDecideRule.hasIdenticalDigest(curi) && getWriteRevisitForIdenticalDigests()) {
        rid = writeRevisitDigest(w, timestamp, HTTP_RESPONSE_MIMETYPE, baseid, curi, headers);
    } else if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED && getWriteRevisitForNotModified()) {
        rid = writeRevisitNotModified(w, timestamp, baseid, curi, headers);
    } else {
        // Check for truncated annotation
        String value = null;
        Collection<String> anno = curi.getAnnotations();
        if (anno.contains(TIMER_TRUNC)) {
            value = NAMED_FIELD_TRUNCATED_VALUE_TIME;
        } else if (anno.contains(LENGTH_TRUNC)) {
            value = NAMED_FIELD_TRUNCATED_VALUE_LENGTH;
        } else if (anno.contains(HEADER_TRUNC)) {
            value = NAMED_FIELD_TRUNCATED_VALUE_HEAD;
        }
        // TODO: Add annotation for TRUNCATED_VALUE_UNSPECIFIED
        if (value != null) {
            headers.addLabelValue(HEADER_KEY_TRUNCATED, value);
        }
        rid = writeResponse(w, timestamp, HTTP_RESPONSE_MIMETYPE, baseid, curi, headers);
    }

    headers = new ANVLRecord();
    headers.addLabelValue(HEADER_KEY_CONCURRENT_TO, '<' + rid.toString() + '>');

    if (getWriteRequests()) {
        writeRequest(w, timestamp, HTTP_REQUEST_MIMETYPE, baseid, curi, headers);
    }
    if (getWriteMetadata()) {
        writeMetadata(w, timestamp, baseid, curi, headers);
    }
}

From source file:org.infoscoop.request.filter.HTMLFragmentFilter.java

protected int preProcess(HttpClient client, HttpMethod method, ProxyRequest request) {
    request.addIgnoreHeader("if-none-match");

    String cacheURL = request.getRequestHeader("fragment-chacheID");
    String cacheLifeTimeStr = request.getRequestHeader("fragment-cacheLifeTime");
    int cacheLifeTime = 60;//360 is set by default in script

    if (cacheLifeTimeStr != null) {
        try {//  w w  w.  java2 s.c  om
            cacheLifeTime = Integer.parseInt(cacheLifeTimeStr);
        } catch (NumberFormatException e) {
        }
    }

    if (cacheURL != null && cacheURL.length() > 0) {
        Cache cache = CacheService.getHandle().getCacheByUrl(cacheURL);
        if (cache != null && cache.getId() != null) {
            Calendar cal = Calendar.getInstance();
            cal.add(Calendar.MINUTE, -cacheLifeTime);// Update cache every 6 hours
            Date now = cal.getTime();

            if (!now.after(cache.getTimestamp())) {
                List<Header> headerList;
                try {
                    headerList = cache.getHeaderList();
                } catch (SAXException ex) {
                    log.error("parsing error", ex);
                    throw new RuntimeException();
                }

                //TODO:Fix this later20090612 by endoh
                String ifModifiedSince = request.getRequestHeader("If-Modified-Since");
                if (ifModifiedSince != null)
                    for (Header header : headerList)
                        if (header.getName().equalsIgnoreCase("If-Modified-Since"))
                            if (ifModifiedSince.equals(header.getValue()))
                                return HttpStatus.SC_NOT_MODIFIED;

                String ifNoneMatch = request.getRequestHeader("If-None-Match");
                if (ifNoneMatch != null)
                    for (Header header : headerList)
                        if (header.getName().equalsIgnoreCase("etag"))
                            if (ifNoneMatch.equals(header.getValue()))
                                return HttpStatus.SC_NOT_MODIFIED;
                //TODO:end

                for (Header header : headerList) {
                    String name = header.getName();
                    if (!name.equalsIgnoreCase("transfer-encoding"))
                        request.putResponseHeader(name, header.getValue());
                }
                request.setResponseBody(new ByteArrayInputStream(cache.getBodyBytes()));
                request.putResponseHeader("MSDPortal-Cache-ID", cache.getId());
                //if(log.isInfoEnabled())
                log.error("use cache " + cacheURL);
                return 200;
            }
        }
    }

    return 0;
}

From source file:org.infoscoop.request.filter.ProxyFilterContainer.java

public final int getCache(HttpClient client, HttpMethod method, ProxyRequest request) throws Exception {
    // check public cache
    if (method instanceof GetMethod) {

        request.addIgnoreHeader("content-type");

        if (request.allowUserPublicCache()) {
            // TODO: Is the improvement of the performance necessary?
            Cache cache = CacheService.getHandle().getCacheByUrl(request.getOriginalURL());
            if (cache != null) {
                int cacheLifeTime = request.getProxy().getCacheLifeTime();
                if (cache.getTimestamp().getTime() + cacheLifeTime * 60 * 1000 > System.currentTimeMillis()) {
                    for (Header header : cache.getHeaderList()) {
                        if ("X-IS-REDIRECTED-FROM".equalsIgnoreCase(header.getName())) {
                            request.setRedirectURL(header.getValue());
                        } else {
                            request.putResponseHeader(header.getName(), header.getValue());
                        }/*ww  w  .j  a v a  2 s .co  m*/
                    }
                    if (log.isInfoEnabled())
                        log.info(request.getOriginalURL() + " get from public cache");

                    String ifModifiedSince = request.getRequestHeader("if-modified-since");
                    if (ifModifiedSince == null || "Thu, 01 Jun 1970 00:00:00 GMT"
                            .equals(request.getRequestHeader("if-modified-since"))) {
                        doFilterChain(request, new ByteArrayInputStream(cache.getBodyBytes()));
                        return HttpStatus.SC_OK;
                    } else {
                        return HttpStatus.SC_NOT_MODIFIED;
                    }
                } else {
                    if (log.isInfoEnabled())
                        log.info(request.getOriginalURL() + " delete from public cache by timeout");
                    CacheService.getHandle().deleteCacheByUrl(request.getOriginalURL());
                }
            }
        }
    }

    return 0;
}

From source file:org.infoscoop.request.filter.ProxyFilterContainer.java

public final int invoke(HttpClient client, HttpMethod method, ProxyRequest request) throws Exception {
    int preStatus = prepareInvoke(client, method, request);
    switch (preStatus) {
    case 0://from  ww  w  . jav  a2s . c  om
        break;
    case EXECUTE_POST_STATUS:
        doFilterChain(request, request.getResponseBody());
    default:
        return preStatus;
    }
    // copy headers sent target server
    List ignoreHeaderNames = request.getIgnoreHeaders();
    List allowedHeaderNames = request.getAllowedHeaders();
    boolean allowAllHeader = false;

    Proxy proxy = request.getProxy();
    if (proxy != null) {
        allowAllHeader = proxy.isAllowAllHeader();
        if (!allowAllHeader)
            allowedHeaderNames.addAll(proxy.getAllowedHeaders());
    }

    AuthenticatorUtil.doAuthentication(client, method, request);

    StringBuffer headersSb = new StringBuffer();
    for (String name : request.getRequestHeaders().keySet()) {

        String value = request.getRequestHeader(name);
        String lowname = name.toLowerCase();

        if (!allowAllHeader && !allowedHeaderNames.contains(lowname))
            continue;

        if (ignoreHeaderNames.contains(lowname))
            continue;

        if ("cookie".equalsIgnoreCase(name)) {
            if (proxy.getSendingCookies() != null) {
                value = RequestUtil.removeCookieParam(value, proxy.getSendingCookies());
            }
        }

        if ("if-modified-since".equalsIgnoreCase(name) && "Thu, 01 Jun 1970 00:00:00 GMT".equals(value))
            continue;

        method.addRequestHeader(new Header(name, value));
        headersSb.append(name + "=" + value + ",  ");
    }

    int cacheStatus = getCache(client, method, request);
    if (cacheStatus != 0)
        return cacheStatus;

    if (log.isInfoEnabled())
        log.info("RequestHeader: " + headersSb);

    // execute http method and process redirect
    method.setFollowRedirects(false);

    client.executeMethod(method);

    int statusCode = method.getStatusCode();

    for (int i = 0; statusCode == HttpStatus.SC_MOVED_TEMPORARILY
            || statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_SEE_OTHER
            || statusCode == HttpStatus.SC_TEMPORARY_REDIRECT; i++) {

        // connection release
        method.releaseConnection();

        if (i == 5) {
            log.error("The circular redirect is limited by five times.");
            return 500;
        }

        Header location = method.getResponseHeader("Location");
        String redirectUrl = location.getValue();

        // According to 2,068 1.1 rfc http spec, we cannot appoint the relative URL,
        // but microsoft.com gives back the relative URL.
        if (redirectUrl.startsWith("/")) {
            URI baseURI = method.getURI();
            baseURI.setPath(redirectUrl);

            redirectUrl = baseURI.toString();
        }

        //method.setURI(new URI(redirectUrl, false));
        Header[] headers = method.getRequestHeaders();
        method = new GetMethod(redirectUrl);
        for (int j = 0; j < headers.length; j++) {
            String headerName = headers[j].getName();
            if (!headerName.equalsIgnoreCase("content-length") && !headerName.equalsIgnoreCase("authorization"))
                method.setRequestHeader(headers[j]);
        }
        AuthenticatorUtil.doAuthentication(client, method, request);
        method.setRequestHeader("authorization", request.getRequestHeader("Authorization"));
        method.setFollowRedirects(false);
        client.executeMethod(method);
        statusCode = method.getStatusCode();
        request.setRedirectURL(redirectUrl);

        if (log.isInfoEnabled())
            log.info("Redirect " + request.getTargetURL() + " to " + location + ".");
    }

    // copy response headers to proxyReqeust
    Header[] headers = method.getResponseHeaders();
    for (int i = 0; i < headers.length; i++) {
        request.putResponseHeader(headers[i].getName(), headers[i].getValue());
    }

    if (log.isInfoEnabled())
        log.info("Original Status:" + statusCode);

    // check response code
    if (statusCode == HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED) {
        log.error("Proxy Authentication Required. Confirm ajax proxy setting.");
        throw new Exception(
                "Http Status 407, Proxy Authentication Required. Please contuct System Administrator.");
    }
    if (statusCode == HttpStatus.SC_NOT_MODIFIED || statusCode == HttpStatus.SC_RESET_CONTENT) {
        return statusCode;
    } else if (statusCode < 200 || statusCode >= 300) {
        request.setResponseBody(method.getResponseBodyAsStream());
        return statusCode;
    }

    // process response body
    InputStream responseStream = null;
    if (statusCode != HttpStatus.SC_NO_CONTENT) {
        if (request.allowUserPublicCache()) {
            byte[] responseBody = method.getResponseBody();

            Map<String, List<String>> responseHeaders = request.getResponseHeaders();
            if (request.getRedirectURL() != null)
                responseHeaders.put("X-IS-REDIRECTED-FROM",
                        Arrays.asList(new String[] { request.getRedirectURL() }));
            if (method instanceof GetMethod) {
                putCache(request.getOriginalURL(), new ByteArrayInputStream(responseBody), responseHeaders);
            }

            responseStream = new ByteArrayInputStream(responseBody);
        } else {
            responseStream = method.getResponseBodyAsStream();
        }
    }
    doFilterChain(request, responseStream);

    return statusCode != HttpStatus.SC_NO_CONTENT ? method.getStatusCode() : 200;
}

From source file:org.methodize.nntprss.feed.Channel.java

/**
 * Retrieves the latest RSS doc from the remote site
 *//*from w  ww  .j a va2  s  .c  o  m*/
public synchronized void poll() {
    // Use method-level variable
    // Guard against change in history mid-poll
    polling = true;

    //      boolean keepHistory = historical;
    long keepExpiration = expiration;

    lastPolled = new Date();

    int statusCode = -1;
    HttpMethod method = null;
    String urlString = url.toString();
    try {
        HttpClient httpClient = getHttpClient();
        channelManager.configureHttpClient(httpClient);
        HttpResult result = null;

        try {

            connected = true;
            boolean redirected = false;
            int count = 0;
            do {
                URL currentUrl = new URL(urlString);
                method = new GetMethod(urlString);
                method.setRequestHeader("User-agent", AppConstants.getUserAgent());
                method.setRequestHeader("Accept-Encoding", "gzip");
                method.setFollowRedirects(false);
                method.setDoAuthentication(true);

                // ETag
                if (lastETag != null) {
                    method.setRequestHeader("If-None-Match", lastETag);
                }

                // Last Modified
                if (lastModified != 0) {
                    final String NAME = "If-Modified-Since";
                    //defend against such fun like net.freeroller.rickard got If-Modified-Since "Thu, 24 Aug 2028 12:29:54 GMT"
                    if (lastModified < System.currentTimeMillis()) {
                        final String DATE = httpDate.format(new Date(lastModified));
                        method.setRequestHeader(NAME, DATE);
                        log.debug("channel " + this.name + " using " + NAME + " " + DATE); //ALEK
                    }
                }

                method.setFollowRedirects(false);
                method.setDoAuthentication(true);

                HostConfiguration hostConfig = new HostConfiguration();
                hostConfig.setHost(currentUrl.getHost(), currentUrl.getPort(), currentUrl.getProtocol());

                result = executeHttpRequest(httpClient, hostConfig, method);
                statusCode = result.getStatusCode();
                if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY
                        || statusCode == HttpStatus.SC_MOVED_TEMPORARILY
                        || statusCode == HttpStatus.SC_SEE_OTHER
                        || statusCode == HttpStatus.SC_TEMPORARY_REDIRECT) {

                    redirected = true;
                    // Resolve against current URI - may be a relative URI
                    try {
                        urlString = new java.net.URI(urlString).resolve(result.getLocation()).toString();
                    } catch (URISyntaxException use) {
                        // Fall back to just using location from result
                        urlString = result.getLocation();
                    }
                    if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY && channelManager.isObserveHttp301()) {
                        try {
                            url = new URL(urlString);
                            if (log.isInfoEnabled()) {
                                log.info("Channel = " + this.name
                                        + ", updated URL from HTTP Permanent Redirect");
                            }
                        } catch (MalformedURLException mue) {
                            // Ignore URL permanent redirect for now...                        
                        }
                    }
                } else {
                    redirected = false;
                }

                //               method.getResponseBody();
                //               method.releaseConnection();
                count++;
            } while (count < 5 && redirected);

        } catch (HttpRecoverableException hre) {
            if (log.isDebugEnabled()) {
                log.debug("Channel=" + name + " - Temporary Http Problem - " + hre.getMessage());
            }
            status = STATUS_CONNECTION_TIMEOUT;
            statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR;
        } catch (ConnectException ce) {
            // @TODO Might also be a connection refused - not only a timeout...
            if (log.isDebugEnabled()) {
                log.debug("Channel=" + name + " - Connection Timeout, skipping - " + ce.getMessage());
            }
            status = STATUS_CONNECTION_TIMEOUT;
            statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR;
        } catch (UnknownHostException ue) {
            if (log.isDebugEnabled()) {
                log.debug("Channel=" + name + " - Unknown Host Exception, skipping");
            }
            status = STATUS_UNKNOWN_HOST;
            statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR;
        } catch (NoRouteToHostException re) {
            if (log.isDebugEnabled()) {
                log.debug("Channel=" + name + " - No Route To Host Exception, skipping");
            }
            status = STATUS_NO_ROUTE_TO_HOST;
            statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR;
        } catch (SocketException se) {
            // e.g. Network is unreachable            
            if (log.isDebugEnabled()) {
                log.debug("Channel=" + name + " - Socket Exception, skipping");
            }
            status = STATUS_SOCKET_EXCEPTION;
            statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR;
        }

        // Only process if ok - if not ok (e.g. not modified), don't do anything
        if (connected && statusCode == HttpStatus.SC_OK) {

            PushbackInputStream pbis = new PushbackInputStream(new ByteArrayInputStream(result.getResponse()),
                    PUSHBACK_BUFFER_SIZE);
            skipBOM(pbis);
            BufferedInputStream bis = new BufferedInputStream(pbis);
            DocumentBuilder db = AppConstants.newDocumentBuilder();

            try {
                Document rssDoc = null;
                if (!parseAtAllCost) {
                    try {
                        rssDoc = db.parse(bis);
                    } catch (InternalError ie) {
                        // Crimson library throws InternalErrors
                        if (log.isDebugEnabled()) {
                            log.debug("InternalError thrown by Crimson", ie);
                        }
                        throw new SAXException("InternalError thrown by Crimson: " + ie.getMessage());
                    }
                } else {
                    // Parse-at-all-costs selected
                    // Read in document to local array - may need to parse twice
                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
                    byte[] buf = new byte[1024];
                    int bytesRead = bis.read(buf);
                    while (bytesRead > -1) {
                        if (bytesRead > 0) {
                            bos.write(buf, 0, bytesRead);
                        }
                        bytesRead = bis.read(buf);
                    }
                    bos.flush();
                    bos.close();

                    byte[] rssDocBytes = bos.toByteArray();

                    try {
                        // Try the XML document parser first - just in case
                        // the doc is well-formed
                        rssDoc = db.parse(new ByteArrayInputStream(rssDocBytes));
                    } catch (SAXParseException spe) {
                        if (log.isDebugEnabled()) {
                            log.debug("XML parse failed, trying tidy");
                        }
                        // Fallback to parse-at-all-costs parser
                        rssDoc = LooseParser.parse(new ByteArrayInputStream(rssDocBytes));
                    }
                }

                processChannelDocument(expiration, rssDoc);

                // Update last modified / etag from headers
                //               lastETag = httpCon.getHeaderField("ETag");
                //               lastModified = httpCon.getHeaderFieldDate("Last-Modified", 0);

                Header hdrETag = method.getResponseHeader("ETag");
                lastETag = hdrETag != null ? hdrETag.getValue() : null;

                Header hdrLastModified = method.getResponseHeader("Last-Modified");
                lastModified = hdrLastModified != null ? parseHttpDate(hdrLastModified.getValue()) : 0;
                log.debug("channel " + this.name + " parsed Last-Modifed " + hdrLastModified + " to "
                        + (lastModified != 0 ? "" + (new Date(lastModified)) : "" + lastModified)); //ALEK

                status = STATUS_OK;
            } catch (SAXParseException spe) {
                if (log.isEnabledFor(Priority.WARN)) {
                    log.warn("Channel=" + name + " - Error parsing RSS document - check feed");
                }
                status = STATUS_INVALID_CONTENT;
            }

            bis.close();

            // end if response code == HTTP_OK
        } else if (connected && statusCode == HttpStatus.SC_NOT_MODIFIED) {
            if (log.isDebugEnabled()) {
                log.debug("Channel=" + name + " - HTTP_NOT_MODIFIED, skipping");
            }
            status = STATUS_OK;
        } else if (statusCode == HttpStatus.SC_PROXY_AUTHENTICATION_REQUIRED) {
            if (log.isEnabledFor(Priority.WARN)) {
                log.warn("Channel=" + name + " - Proxy authentication required");
            }
            status = STATUS_PROXY_AUTHENTICATION_REQUIRED;
        } else if (statusCode == HttpStatus.SC_UNAUTHORIZED) {
            if (log.isEnabledFor(Priority.WARN)) {
                log.warn("Channel=" + name + " - Authentication required");
            }
            status = STATUS_USER_AUTHENTICATION_REQUIRED;
        }

        // Update channel in database...
        channelDAO.updateChannel(this);

    } catch (FileNotFoundException fnfe) {
        if (log.isEnabledFor(Priority.WARN)) {
            log.warn("Channel=" + name + " - File not found returned by web server - check feed");
        }
        status = STATUS_NOT_FOUND;
    } catch (Exception e) {
        if (log.isEnabledFor(Priority.WARN)) {
            log.warn("Channel=" + name + " - Exception while polling channel", e);
        }
    } catch (NoClassDefFoundError ncdf) {
        // Throw if SSL / redirection to HTTPS
        if (log.isEnabledFor(Priority.WARN)) {
            log.warn("Channel=" + name + " - NoClassDefFound", ncdf);
        }
    } finally {
        connected = false;
        polling = false;
    }

}

From source file:org.mule.ibeans.module.http.HttpClientMessageRequester2.java

/**
 * Make a specific request to the underlying transport
 *
 * @param timeout the maximum time the operation should block before returning.
 *                The call should return immediately if there is data available. If
 *                no data becomes available before the timeout elapses, null will be
 *                returned//from w w w.jav a 2s  . c  om
 * @return the result of the request wrapped in a MuleMessage object. Null will be
 *         returned if no data was avaialable
 * @throws Exception if the call to the underlying protocal cuases an exception
 */
protected MuleMessage doRequest(long timeout) throws Exception {
    HttpMethod httpMethod = new GetMethod(endpoint.getEndpointURI().getAddress());

    if (endpoint.getProperties().containsKey(HttpConstants.HEADER_AUTHORIZATION)) {
        httpMethod.setDoAuthentication(true);
        client.getParams().setAuthenticationPreemptive(true);
        httpMethod.setRequestHeader(HttpConstants.HEADER_AUTHORIZATION,
                (String) endpoint.getProperty(HttpConstants.HEADER_AUTHORIZATION));
    }

    boolean releaseConn = false;
    try {
        HttpClient client = new HttpClient();

        if (etag != null && checkEtag) {
            httpMethod.setRequestHeader(HttpConstants.HEADER_IF_NONE_MATCH, etag);
        }
        client.executeMethod(httpMethod);

        if (httpMethod.getStatusCode() < 400) {
            MuleMessage message = new HttpMuleMessageFactory(connector.getMuleContext()).create(httpMethod,
                    null /* encoding */);
            etag = message.getInboundProperty(HttpConstants.HEADER_ETAG, null);

            if (httpMethod.getStatusCode() == HttpStatus.SC_OK
                    || (httpMethod.getStatusCode() != HttpStatus.SC_NOT_MODIFIED || !checkEtag)) {
                if (StringUtils.EMPTY.equals(message.getPayload())) {
                    releaseConn = true;
                }
                return message;
            } else {
                //Not modified, we should really cache the whole message and return it
                return new DefaultMuleMessage(NullPayload.getInstance(), getConnector().getMuleContext());
            }
        } else {
            releaseConn = true;
            throw new ReceiveException(
                    HttpMessages.requestFailedWithStatus(httpMethod.getStatusLine().toString()), endpoint,
                    timeout);
        }

    } catch (ReceiveException e) {
        releaseConn = true;
        throw e;
    } catch (Exception e) {
        releaseConn = true;
        throw new ReceiveException(endpoint, timeout, e);
    } finally {
        if (releaseConn) {
            httpMethod.releaseConnection();
        }
    }
}

From source file:org.mulgara.resolver.http.HttpContent.java

/**
 * Obtain a valid connection and follow redirects if necessary.
 * //from w  ww . jav a  2 s . com
 * @param methodType request the headders (HEAD) or body (GET)
 * @return valid connection method. Can be null.
 * @throws NotModifiedException  if the content validates against the cache
 * @throws IOException  if there's difficulty communicating with the web site
 */
private HttpMethod establishConnection(int methodType) throws IOException, NotModifiedException {
    if (logger.isDebugEnabled())
        logger.debug("Establishing connection");

    HttpMethod method = getConnectionMethod(methodType);
    assert method != null;
    Header header = null;

    /*
      // Add cache validation headers to the request
      if (lastModifiedMap.containsKey(httpUri)) {
        String lastModified = (String) lastModifiedMap.get(httpUri);
        assert lastModified != null;
        method.addRequestHeader("If-Modified-Since", lastModified);
      }
            
      if (eTagMap.containsKey(httpUri)) {
        String eTag = (String) eTagMap.get(httpUri);
        assert eTag != null;
        method.addRequestHeader("If-None-Match", eTag);
      }
     */

    // Make the request
    if (logger.isDebugEnabled())
        logger.debug("Executing HTTP request");
    connection.open();
    method.execute(state, connection);
    if (logger.isDebugEnabled()) {
        logger.debug("Executed HTTP request, response code " + method.getStatusCode());
    }

    // Interpret the response header
    if (method.getStatusCode() == HttpStatus.SC_NOT_MODIFIED) {
        // cache has been validated
        throw new NotModifiedException(httpUri);
    } else if (!isValidStatusCode(method.getStatusCode())) {
        throw new UnknownHostException("Unable to obtain connection to " + httpUri + ". Returned status code "
                + method.getStatusCode());
    } else {
        // has a redirection been issued
        int numberOfRedirection = 0;
        while (isRedirected(method.getStatusCode()) && numberOfRedirection <= MAX_NO_REDIRECTS) {

            // release the existing connection
            method.releaseConnection();

            //attempt to follow the redirects
            numberOfRedirection++;

            // obtain the new location
            header = method.getResponseHeader("location");
            if (header != null) {
                try {
                    initialiseSettings(new URL(header.getValue()));
                    if (logger.isInfoEnabled()) {
                        logger.info("Redirecting to " + header.getValue());
                    }

                    // attempt a new connection to this location
                    method = getConnectionMethod(methodType);
                    connection.open();
                    method.execute(state, connection);
                    if (!isValidStatusCode(method.getStatusCode())) {
                        throw new UnknownHostException(
                                "Unable to obtain connection to " + " the redirected site " + httpUri
                                        + ". Returned status code " + method.getStatusCode());
                    }
                } catch (URISyntaxException ex) {
                    throw new IOException(
                            "Unable to follow redirection to " + header.getValue() + " Not a valid URI");
                }
            } else {
                throw new IOException("Unable to obtain redirecting detaild from " + httpUri);
            }
        }
    }

    // Update metadata about the cached document
    Header lastModifiedHeader = method.getResponseHeader("Last-Modified");
    if (lastModifiedHeader != null) {
        logger.debug(lastModifiedHeader.toString());
        assert lastModifiedHeader.getElements().length >= 1;
        assert lastModifiedHeader.getElements()[0].getName() != null;
        assert lastModifiedHeader.getElements()[0].getName() instanceof String;
        // previous code: added to cache
    }

    Header eTagHeader = method.getResponseHeader("Etag");
    if (eTagHeader != null) {
        logger.debug(eTagHeader.toString());
        assert eTagHeader.getElements().length >= 1;
        assert eTagHeader.getElements()[0].getName() != null;
        assert eTagHeader.getElements()[0].getName() instanceof String;
        // previous code: added to cache
    }

    return method;
}