Java tutorial
package org.apache.maven.doxia.linkcheck.validation; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import java.io.IOException; import java.net.URL; import java.util.Map; import org.apache.commons.httpclient.Credentials; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HostConfiguration; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.HttpState; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.NTCredentials; import org.apache.commons.httpclient.StatusLine; import org.apache.commons.httpclient.UsernamePasswordCredentials; import org.apache.commons.httpclient.auth.AuthScope; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.HeadMethod; import org.apache.commons.httpclient.params.HttpClientParams; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.maven.doxia.linkcheck.HttpBean; import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult; import org.codehaus.plexus.util.StringUtils; /** * Checks links which are normal URLs * * @author <a href="mailto:bwalding@apache.org">Ben Walding</a> * @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a> * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a> * @version $Id$ */ public final class OnlineHTTPLinkValidator extends HTTPLinkValidator { /** Log for debug output. */ private static final Log LOG = LogFactory.getLog(OnlineHTTPLinkValidator.class); /** The maximum number of redirections for a link. */ private static final int MAX_NB_REDIRECT = 10; /** Use the get method to test pages. */ private static final String GET_METHOD = "get"; /** Use the head method to test pages. */ private static final String HEAD_METHOD = "head"; /** The http bean encapsuling all http parameters supported. */ private HttpBean http; /** The base URL for links that start with '/'. */ private String baseURL; /** The HttpClient. */ private transient HttpClient cl; /** * Constructor: initialize settings, use "head" method. */ public OnlineHTTPLinkValidator() { this(new HttpBean()); } /** * Constructor: initialize settings. * * @param bean The http bean encapsuling all HTTP parameters supported. */ public OnlineHTTPLinkValidator(HttpBean bean) { if (bean == null) { bean = new HttpBean(); } if (LOG.isDebugEnabled()) { LOG.debug("Will use method : [" + bean.getMethod() + "]"); } this.http = bean; initHttpClient(); } /** * The base URL. * * @return the base URL. */ public String getBaseURL() { return this.baseURL; } /** * Sets the base URL. This is pre-pended to links that start with '/'. * * @param url the base URL. */ public void setBaseURL(String url) { this.baseURL = url; } /** {@inheritDoc} */ public LinkValidationResult validateLink(LinkValidationItem lvi) { if (this.cl == null) { initHttpClient(); } if (this.http.getHttpClientParameters() != null) { for (Map.Entry<Object, Object> entry : this.http.getHttpClientParameters().entrySet()) { if (entry.getValue() != null) { System.setProperty(entry.getKey().toString(), entry.getValue().toString()); } } } // Some web servers don't allow the default user-agent sent by httpClient System.setProperty(HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); this.cl.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); String link = lvi.getLink(); String anchor = ""; int idx = link.indexOf('#'); if (idx != -1) { anchor = link.substring(idx + 1); link = link.substring(0, idx); } try { if (link.startsWith("/")) { if (getBaseURL() == null) { if (LOG.isWarnEnabled()) { LOG.warn("Cannot check link [" + link + "] in page [" + lvi.getSource() + "], as no base URL has been set!"); } return new LinkValidationResult(LinkcheckFileResult.WARNING_LEVEL, false, "No base URL specified"); } link = getBaseURL() + link; } HttpMethod hm = null; try { hm = checkLink(link, 0); } catch (Throwable t) { if (LOG.isDebugEnabled()) { LOG.debug("Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]", t); } return new LinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false, t.getClass().getName() + " : " + t.getMessage()); } if (hm == null) { return new LinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false, "Cannot retreive HTTP Status"); } if (hm.getStatusCode() == HttpStatus.SC_OK) { // lets check if the anchor is present if (anchor.length() > 0) { String content = hm.getResponseBodyAsString(); if (!Anchors.matchesAnchor(content, anchor)) { return new HTTPLinkValidationResult(LinkcheckFileResult.VALID_LEVEL, false, "Missing anchor '" + anchor + "'"); } } return new HTTPLinkValidationResult(LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(), hm.getStatusText()); } String msg = "Received: [" + hm.getStatusCode() + "] for [" + link + "] in page [" + lvi.getSource() + "]"; // If there's a redirection ... add a warning if (hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT) { LOG.warn(msg); return new HTTPLinkValidationResult(LinkcheckFileResult.WARNING_LEVEL, true, hm.getStatusCode(), hm.getStatusText()); } LOG.debug(msg); return new HTTPLinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false, hm.getStatusCode(), hm.getStatusText()); } catch (Throwable t) { String msg = "Received: [" + t + "] for [" + link + "] in page [" + lvi.getSource() + "]"; if (LOG.isDebugEnabled()) { LOG.debug(msg, t); } else { LOG.error(msg); } return new LinkValidationResult(LinkcheckFileResult.ERROR_LEVEL, false, t.getMessage()); } finally { System.getProperties().remove(HttpMethodParams.USER_AGENT); if (this.http.getHttpClientParameters() != null) { for (Map.Entry<Object, Object> entry : this.http.getHttpClientParameters().entrySet()) { if (entry.getValue() != null) { System.getProperties().remove(entry.getKey().toString()); } } } } } /** Initialize the HttpClient. */ private void initHttpClient() { LOG.debug("A new HttpClient instance is needed ..."); this.cl = new HttpClient(new MultiThreadedHttpConnectionManager()); // Default params if (this.http.getTimeout() != 0) { this.cl.getHttpConnectionManager().getParams().setConnectionTimeout(this.http.getTimeout()); this.cl.getHttpConnectionManager().getParams().setSoTimeout(this.http.getTimeout()); } this.cl.getParams().setBooleanParameter(HttpClientParams.ALLOW_CIRCULAR_REDIRECTS, true); HostConfiguration hc = new HostConfiguration(); HttpState state = new HttpState(); if (StringUtils.isNotEmpty(this.http.getProxyHost())) { hc.setProxy(this.http.getProxyHost(), this.http.getProxyPort()); if (LOG.isDebugEnabled()) { LOG.debug("Proxy Host:" + this.http.getProxyHost()); LOG.debug("Proxy Port:" + this.http.getProxyPort()); } if (StringUtils.isNotEmpty(this.http.getProxyUser()) && this.http.getProxyPassword() != null) { if (LOG.isDebugEnabled()) { LOG.debug("Proxy User:" + this.http.getProxyUser()); } Credentials credentials; if (StringUtils.isNotEmpty(this.http.getProxyNtlmHost())) { credentials = new NTCredentials(this.http.getProxyUser(), this.http.getProxyPassword(), this.http.getProxyNtlmHost(), this.http.getProxyNtlmDomain()); } else { credentials = new UsernamePasswordCredentials(this.http.getProxyUser(), this.http.getProxyPassword()); } state.setProxyCredentials(AuthScope.ANY, credentials); } } else { LOG.debug("Not using a proxy"); } this.cl.setHostConfiguration(hc); this.cl.setState(state); LOG.debug("New HttpClient instance created."); } /** * Checks the given link. * * @param link the link to check. * @param nbRedirect the number of current redirects. * @return HttpMethod * @throws IOException if something goes wrong. */ private HttpMethod checkLink(String link, int nbRedirect) throws IOException { int max = MAX_NB_REDIRECT; if (this.http.getHttpClientParameters() != null && this.http.getHttpClientParameters().get(HttpClientParams.MAX_REDIRECTS) != null) { try { max = Integer .valueOf(this.http.getHttpClientParameters().get(HttpClientParams.MAX_REDIRECTS).toString()) .intValue(); } catch (NumberFormatException e) { if (LOG.isWarnEnabled()) { LOG.warn("HttpClient parameter '" + HttpClientParams.MAX_REDIRECTS + "' is not a number. Ignoring!"); } } } if (nbRedirect > max) { throw new HttpException("Maximum number of redirections (" + max + ") exceeded"); } HttpMethod hm; if (HEAD_METHOD.equalsIgnoreCase(this.http.getMethod())) { hm = new HeadMethod(link); } else if (GET_METHOD.equalsIgnoreCase(this.http.getMethod())) { hm = new GetMethod(link); } else { if (LOG.isErrorEnabled()) { LOG.error("Unsupported method: " + this.http.getMethod() + ", using 'get'."); } hm = new GetMethod(link); } // Default hm.setFollowRedirects(this.http.isFollowRedirects()); try { URL url = new URL(link); cl.getHostConfiguration().setHost(url.getHost(), url.getPort(), url.getProtocol()); cl.executeMethod(hm); StatusLine sl = hm.getStatusLine(); if (sl == null) { if (LOG.isErrorEnabled()) { LOG.error("Unknown error validating link : " + link); } return null; } if (hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT) { Header locationHeader = hm.getResponseHeader("location"); if (locationHeader == null) { LOG.error("Site sent redirect, but did not set Location header"); return hm; } String newLink = locationHeader.getValue(); // Be careful to absolute/relative links if (!newLink.startsWith("http://") && !newLink.startsWith("https://")) { if (newLink.startsWith("/")) { URL oldUrl = new URL(link); newLink = oldUrl.getProtocol() + "://" + oldUrl.getHost() + (oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "") + newLink; } else { newLink = link + newLink; } } HttpMethod oldHm = hm; if (LOG.isDebugEnabled()) { LOG.debug("[" + link + "] is redirected to [" + newLink + "]"); } oldHm.releaseConnection(); hm = checkLink(newLink, nbRedirect + 1); // Restore the hm to "Moved permanently" | "Moved temporarily" | "Temporary redirect" // if the new location is found to allow us to report it if (hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0) { return oldHm; } } } finally { hm.releaseConnection(); } return hm; } }