org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator.java

Source

package org.apache.maven.doxia.linkcheck.validation;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.IOException;

import java.net.URL;
import java.util.Map;

import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpState;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.NTCredentials;
import org.apache.commons.httpclient.StatusLine;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.HeadMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.params.HttpMethodParams;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.doxia.linkcheck.HttpBean;
import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
import org.codehaus.plexus.util.StringUtils;

/**
 * Checks links which are normal URLs
 *
 * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
 * @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a>
 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
 * @version $Id$
 */
public final class OnlineHTTPLinkValidator extends HTTPLinkValidator {
    /** Log for debug output. */
    private static final Log LOG = LogFactory.getLog(OnlineHTTPLinkValidator.class);

    /** The maximum number of redirections for a link. */
    private static final int MAX_NB_REDIRECT = 10;

    /** Use the get method to test pages. */
    private static final String GET_METHOD = "get";

    /** Use the head method to test pages. */
    private static final String HEAD_METHOD = "head";

    /** The http bean encapsuling all http parameters supported. */
    private HttpBean http;

    /** The base URL for links that start with '/'. */
    private String baseURL;

    /** The HttpClient. */
    private transient HttpClient cl;

    /**
     * Constructor: initialize settings, use "head" method.
     */
    public OnlineHTTPLinkValidator() {
        this(new HttpBean());
    }

    /**
     * Constructor: initialize settings.
     *
     * @param bean The http bean encapsuling all HTTP parameters supported.
     */
    public OnlineHTTPLinkValidator(HttpBean bean) {
        if (bean == null) {
            bean = new HttpBean();
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Will use method : [" + bean.getMethod() + "]");
        }

        this.http = bean;

        initHttpClient();
    }

    /**
     * The base URL.
     *
     * @return the base URL.
     */
    public String getBaseURL() {
        return this.baseURL;
    }

    /**
     * Sets the base URL. This is pre-pended to links that start with '/'.
     *
     * @param url the base URL.
     */
    public void setBaseURL(String url) {
        this.baseURL = url;
    }

    /** {@inheritDoc} */
    public LinkValidationResult validateLink(LinkValidationItem lvi) {
        if (this.cl == null) {
            initHttpClient();
        }

        if (this.http.getHttpClientParameters() != null) {
            for (Map.Entry<Object, Object> entry : this.http.getHttpClientParameters().entrySet()) {
                if (entry.getValue() != null) {
                    System.setProperty(entry.getKey().toString(), entry.getValue().toString());
                }
            }
        }

        // Some web servers don't allow the default user-agent sent by httpClient
        System.setProperty(HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
        this.cl.getParams().setParameter(HttpMethodParams.USER_AGENT,
                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");

        String link = lvi.getLink();
        String anchor = "";
        int idx = link.indexOf('#');
        if (idx != -1) {
            anchor = link.substring(idx + 1);
            link = link.substring(0, idx);
        }

        try {
            if (link.startsWith("/")) {
                if (getBaseURL() == null) {
                    if (LOG.isWarnEnabled()) {
                        LOG.warn("Cannot check link [" + link + "] in page [" + lvi.getSource()
                                + "], as no base URL has been set!");
                    }

                    return new LinkValidationResult(LinkcheckFileResult.WARNING_LEVEL, false,
                            "No base URL specified");
                }

                link = getBaseURL() + link;
            }

            HttpMethod hm = null;
            try {
                hm = checkLink(link, 0);
            } catch (Throwable t) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]", t);
                }

                return new LinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false,
                        t.getClass().getName() + " : " + t.getMessage());
            }

            if (hm == null) {
                return new LinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false,
                        "Cannot retreive HTTP Status");
            }

            if (hm.getStatusCode() == HttpStatus.SC_OK) {
                // lets check if the anchor is present
                if (anchor.length() > 0) {
                    String content = hm.getResponseBodyAsString();

                    if (!Anchors.matchesAnchor(content, anchor)) {
                        return new HTTPLinkValidationResult(LinkcheckFileResult.VALID_LEVEL, false,
                                "Missing anchor '" + anchor + "'");
                    }
                }
                return new HTTPLinkValidationResult(LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(),
                        hm.getStatusText());
            }

            String msg = "Received: [" + hm.getStatusCode() + "] for [" + link + "] in page [" + lvi.getSource()
                    + "]";
            // If there's a redirection ... add a warning
            if (hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
                    || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
                    || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT) {
                LOG.warn(msg);

                return new HTTPLinkValidationResult(LinkcheckFileResult.WARNING_LEVEL, true, hm.getStatusCode(),
                        hm.getStatusText());
            }

            LOG.debug(msg);

            return new HTTPLinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false, hm.getStatusCode(),
                    hm.getStatusText());
        } catch (Throwable t) {
            String msg = "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]";
            if (LOG.isDebugEnabled()) {
                LOG.debug(msg, t);
            } else {
                LOG.error(msg);
            }

            return new LinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false, t.getMessage());
        } finally {
            System.getProperties().remove(HttpMethodParams.USER_AGENT);

            if (this.http.getHttpClientParameters() != null) {
                for (Map.Entry<Object, Object> entry : this.http.getHttpClientParameters().entrySet()) {
                    if (entry.getValue() != null) {
                        System.getProperties().remove(entry.getKey().toString());
                    }
                }
            }
        }
    }

    /** Initialize the HttpClient. */
    private void initHttpClient() {
        LOG.debug("A new HttpClient instance is needed ...");

        this.cl = new HttpClient(new MultiThreadedHttpConnectionManager());

        // Default params
        if (this.http.getTimeout() != 0) {
            this.cl.getHttpConnectionManager().getParams().setConnectionTimeout(this.http.getTimeout());
            this.cl.getHttpConnectionManager().getParams().setSoTimeout(this.http.getTimeout());
        }
        this.cl.getParams().setBooleanParameter(HttpClientParams.ALLOW_CIRCULAR_REDIRECTS, true);

        HostConfiguration hc = new HostConfiguration();

        HttpState state = new HttpState();
        if (StringUtils.isNotEmpty(this.http.getProxyHost())) {
            hc.setProxy(this.http.getProxyHost(), this.http.getProxyPort());

            if (LOG.isDebugEnabled()) {
                LOG.debug("Proxy Host:" + this.http.getProxyHost());
                LOG.debug("Proxy Port:" + this.http.getProxyPort());
            }

            if (StringUtils.isNotEmpty(this.http.getProxyUser()) && this.http.getProxyPassword() != null) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Proxy User:" + this.http.getProxyUser());
                }

                Credentials credentials;
                if (StringUtils.isNotEmpty(this.http.getProxyNtlmHost())) {
                    credentials = new NTCredentials(this.http.getProxyUser(), this.http.getProxyPassword(),
                            this.http.getProxyNtlmHost(), this.http.getProxyNtlmDomain());
                } else {
                    credentials = new UsernamePasswordCredentials(this.http.getProxyUser(),
                            this.http.getProxyPassword());
                }

                state.setProxyCredentials(AuthScope.ANY, credentials);
            }
        } else {
            LOG.debug("Not using a proxy");
        }

        this.cl.setHostConfiguration(hc);
        this.cl.setState(state);

        LOG.debug("New HttpClient instance created.");
    }

    /**
     * Checks the given link.
     *
     * @param link the link to check.
     * @param nbRedirect the number of current redirects.
     * @return HttpMethod
     * @throws IOException if something goes wrong.
     */
    private HttpMethod checkLink(String link, int nbRedirect) throws IOException {
        int max = MAX_NB_REDIRECT;
        if (this.http.getHttpClientParameters() != null
                && this.http.getHttpClientParameters().get(HttpClientParams.MAX_REDIRECTS) != null) {
            try {
                max = Integer
                        .valueOf(this.http.getHttpClientParameters().get(HttpClientParams.MAX_REDIRECTS).toString())
                        .intValue();
            } catch (NumberFormatException e) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("HttpClient parameter '" + HttpClientParams.MAX_REDIRECTS
                            + "' is not a number. Ignoring!");
                }
            }
        }
        if (nbRedirect > max) {
            throw new HttpException("Maximum number of redirections (" + max + ") exceeded");
        }

        HttpMethod hm;
        if (HEAD_METHOD.equalsIgnoreCase(this.http.getMethod())) {
            hm = new HeadMethod(link);
        } else if (GET_METHOD.equalsIgnoreCase(this.http.getMethod())) {
            hm = new GetMethod(link);
        } else {
            if (LOG.isErrorEnabled()) {
                LOG.error("Unsupported method: " + this.http.getMethod() + ", using 'get'.");
            }
            hm = new GetMethod(link);
        }

        // Default
        hm.setFollowRedirects(this.http.isFollowRedirects());

        try {
            URL url = new URL(link);

            cl.getHostConfiguration().setHost(url.getHost(), url.getPort(), url.getProtocol());

            cl.executeMethod(hm);

            StatusLine sl = hm.getStatusLine();
            if (sl == null) {
                if (LOG.isErrorEnabled()) {
                    LOG.error("Unknown error validating link : " + link);
                }

                return null;
            }

            if (hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
                    || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
                    || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT) {
                Header locationHeader = hm.getResponseHeader("location");

                if (locationHeader == null) {
                    LOG.error("Site sent redirect, but did not set Location header");

                    return hm;
                }

                String newLink = locationHeader.getValue();

                // Be careful to absolute/relative links
                if (!newLink.startsWith("http://") && !newLink.startsWith("https://")) {
                    if (newLink.startsWith("/")) {
                        URL oldUrl = new URL(link);

                        newLink = oldUrl.getProtocol() + "://" + oldUrl.getHost()
                                + (oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "") + newLink;
                    } else {
                        newLink = link + newLink;
                    }
                }

                HttpMethod oldHm = hm;

                if (LOG.isDebugEnabled()) {
                    LOG.debug("[" + link + "] is redirected to [" + newLink + "]");
                }

                oldHm.releaseConnection();

                hm = checkLink(newLink, nbRedirect + 1);

                // Restore the hm to "Moved permanently" | "Moved temporarily" | "Temporary redirect"
                // if the new location is found to allow us to report it
                if (hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0) {
                    return oldHm;
                }
            }

        } finally {
            hm.releaseConnection();
        }

        return hm;
    }
}