org.wikipedia.vlsergey.secretary.jwpf.HttpBot.java Source code

Java tutorial

Introduction

Here is the source code for org.wikipedia.vlsergey.secretary.jwpf.HttpBot.java

Source

/*
 * Copyright 2007 Thomas Stock.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 * 
 * Contributors:
 * Philipp Kohl 
 */
package org.wikipedia.vlsergey.secretary.jwpf;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.SocketException;
import java.net.URI;
import java.util.Iterator;
import java.util.List;
import java.util.zip.GZIPInputStream;

import javax.annotation.PostConstruct;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.NoHttpResponseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.impl.client.AbstractHttpClient;
import org.apache.http.params.HttpConnectionParams;
import org.springframework.beans.factory.annotation.Autowired;
import org.wikipedia.vlsergey.secretary.http.HttpManager;
import org.wikipedia.vlsergey.secretary.jwpf.actions.ContentProcessable;
import org.wikipedia.vlsergey.secretary.jwpf.utils.ActionException;
import org.wikipedia.vlsergey.secretary.jwpf.utils.CookieException;
import org.wikipedia.vlsergey.secretary.jwpf.utils.ProcessException;
import org.wikipedia.vlsergey.secretary.utils.IoUtils;

/**
 * 
 * @author Thomas Stock
 */
public abstract class HttpBot {

    private static final String GZIP_CONTENT_ENCODING = "gzip";

    private static final Log log = LogFactory.getLog(HttpBot.class);

    /**
     * Returns the character set from the <tt>Content-Type</tt> header.
     * 
     * @param contentheader
     *            The content header.
     * @return String The character set.
     */
    protected static String getContentCharSet(Header contentheader) {
        log.trace("enter getContentCharSet( Header contentheader )");

        String encoding = null;
        if (contentheader != null) {
            HeaderElement values[] = contentheader.getElements();
            // I expect only one header element to be there
            // No more. no less
            if (values.length == 1) {
                NameValuePair param = values[0].getParameterByName("charset");
                if (param != null) {
                    // If I get anything "funny"
                    // UnsupportedEncondingException will result
                    encoding = param.getValue();
                }
            }
        }
        if (encoding == null) {
            encoding = MediaWikiBot.ENCODING;
            if (log.isDebugEnabled()) {
                log.debug("Default charset used: " + encoding);
            }
        }
        return encoding;
    }

    private AbstractHttpClient httpClient;

    @Autowired
    private HttpManager httpManager;

    private URI site;

    protected void get(final HttpGet getMethod, final ContentProcessable action)
            throws IOException, CookieException, ProcessException {
        getMethod.getParams().setParameter("http.protocol.content-charset", MediaWikiBot.ENCODING);

        getMethod.setHeader("Accept-Encoding", GZIP_CONTENT_ENCODING);

        httpClient.execute(getMethod, new ResponseHandler<Object>() {
            @Override
            public Object handleResponse(HttpResponse httpResponse) throws ClientProtocolException, IOException {

                try {
                    action.validateReturningCookies(httpClient.getCookieStore().getCookies(), getMethod);

                    InputStream inputStream = httpResponse.getEntity().getContent();
                    String contentEncoding = httpResponse.getEntity().getContentEncoding() != null
                            ? httpResponse.getEntity().getContentEncoding().getValue()
                            : "";
                    if ("gzip".equalsIgnoreCase(contentEncoding))
                        inputStream = new GZIPInputStream(inputStream);

                    int statuscode = httpResponse.getStatusLine().getStatusCode();

                    if (statuscode == HttpStatus.SC_NOT_FOUND) {
                        log.warn("Not Found: " + getMethod.getRequestLine().getUri());
                        throw new FileNotFoundException(getMethod.getRequestLine().getUri());
                    }
                    if (statuscode == HttpStatus.SC_INTERNAL_SERVER_ERROR) {
                        throw new ServerErrorException(httpResponse.getStatusLine());
                    }
                    if (statuscode != HttpStatus.SC_OK) {
                        throw new ClientProtocolException(httpResponse.getStatusLine().toString());
                    }

                    String encoding = StringUtils
                            .substringAfter(httpResponse.getEntity().getContentType().getValue(), "charset=");
                    String out = IoUtils.readToString(inputStream, encoding);
                    action.processReturningText(getMethod, out);
                } catch (CookieException exc) {
                    throw new ClientProtocolException(exc);
                } catch (ProcessException exc) {
                    throw new ClientProtocolException(exc);
                }
                return null;
            }
        });

    }

    public URI getSite() {
        return site;
    }

    @PostConstruct
    public synchronized void init() {
        httpClient = httpManager.newLocalhostHttpClient();
        HttpConnectionParams.setConnectionTimeout(httpClient.getParams(), 60000);
        HttpConnectionParams.setSoTimeout(httpClient.getParams(), 60000);
    }

    private void onPostResponse(final ContentProcessable action, final HttpPost postMethod, HttpResponse response)
            throws IOException {
        try {
            int statuscode = response.getStatusLine().getStatusCode();
            if (action.followRedirects() && (statuscode == HttpStatus.SC_MOVED_TEMPORARILY
                    || statuscode == HttpStatus.SC_MOVED_PERMANENTLY || statuscode == HttpStatus.SC_SEE_OTHER
                    || statuscode == HttpStatus.SC_TEMPORARY_REDIRECT)) {
                /*
                 * Usually a successful form-based login results in a redicrect
                 * to another url
                 */
                Header header = response.getFirstHeader("location");
                if (header != null) {
                    String newuri = header.getValue();
                    if ((newuri == null) || (newuri.equals(""))) {
                        newuri = "/";
                    }
                    log.debug("Redirect target: " + newuri);

                    HttpPost redirect = new HttpPost(newuri);
                    redirect.setEntity(postMethod.getEntity());
                    redirect.setHeader("Accept-Encoding", GZIP_CONTENT_ENCODING);
                    log.trace("GET: " + redirect.getURI());
                    httpClient.execute(redirect, new ResponseHandler<Object>() {
                        @Override
                        public Object handleResponse(HttpResponse response)
                                throws ClientProtocolException, IOException {
                            // no more redirects?
                            onPostResponse(action, postMethod, response);
                            return null;
                        }
                    });
                    return;
                }
            }

            if (statuscode == HttpStatus.SC_INTERNAL_SERVER_ERROR) {
                throw new ServerErrorException(response.getStatusLine());
            }
            if (statuscode != HttpStatus.SC_OK) {
                throw new ClientProtocolException(response.getStatusLine().toString());
            }

            final Header databaseLag = response.getFirstHeader("X-Database-Lag");
            final Header retryAfter = response.getFirstHeader("Retry-After");
            if (databaseLag != null) {
                throw new DatabaseLagException(databaseLag, retryAfter);
            }

            InputStream inputStream = response.getEntity().getContent();
            String out;
            try {
                String encoding = response.getFirstHeader("Content-Encoding") != null
                        ? response.getFirstHeader("Content-Encoding").getValue()
                        : "";
                if (GZIP_CONTENT_ENCODING.equalsIgnoreCase(encoding)) {
                    inputStream = new GZIPInputStream(inputStream);
                }

                Header charsetHeader = response.getFirstHeader("Content-Type");
                String charset;
                if (charsetHeader == null)
                    charset = MediaWikiBot.ENCODING;
                else
                    charset = getContentCharSet(charsetHeader);

                out = IoUtils.readToString(inputStream, charset);
            } finally {
                inputStream.close();
            }

            action.processReturningText(postMethod, out);

            action.validateReturningCookies(httpClient.getCookieStore().getCookies(), postMethod);

            log.trace(postMethod.getURI() + " || " + "POST: " + response.getStatusLine().toString());
        } catch (CookieException exc) {
            throw new ClientProtocolException(exc.getMessage(), exc);
        } catch (ProcessException exc) {
            throw new ClientProtocolException(exc.getMessage(), exc);
        }
    }

    protected final void performAction(final ContentProcessable contentProcessable)
            throws ActionException, ProcessException {
        List<HttpRequestBase> msgs = contentProcessable.getMessages();
        Iterator<HttpRequestBase> it = msgs.iterator();
        while (it.hasNext()) {
            HttpRequestBase httpMethod = it.next();
            if (getSite() != null) {

                URI uri = httpMethod.getURI();
                if (!uri.getPath().startsWith("/wiki/")) {
                    try {
                        String str = getSite().getScheme() + "://" + getSite().getHost()
                                + (getSite().getPort() == -1 ? "" : ":" + getSite().getPort()) + getSite().getPath()
                                + uri.getPath() + (uri.getRawQuery() != null ? ("?" + uri.getRawQuery()) : "");
                        uri = new URI(str);
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                    httpMethod.setURI(uri);
                } else {
                    try {
                        String str = getSite().getScheme() + "://" + getSite().getHost()
                                + (getSite().getPort() == -1 ? "" : ":" + getSite().getPort()) + uri.getPath()
                                + (uri.getRawQuery() != null ? ("?" + uri.getRawQuery()) : "");
                        uri = new URI(str);
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                    httpMethod.setURI(uri);
                }

                // logger.debug("path is: " + httpMethod.getURI());
            }

            try {
                while (true) {
                    try {
                        if (httpMethod instanceof HttpGet) {
                            get((HttpGet) httpMethod, contentProcessable);
                        } else {
                            post((HttpPost) httpMethod, contentProcessable);
                        }
                        break;
                    } catch (NoHttpResponseException exc) {
                        log.info("NoHttpResponseException, wait 6 seconds");
                        try {
                            Thread.sleep(5 * 1000);
                        } catch (InterruptedException e) {
                        }
                    } catch (SocketException exc) {
                        log.info("SocketException, wait 5 seconds");
                        try {
                            Thread.sleep(5 * 1000);
                        } catch (InterruptedException e) {
                        }
                    } catch (ServerErrorException exc) {
                        log.info("ServerErrorException (" + exc.getStatusLine() + "), wait 5 seconds");
                        try {
                            Thread.sleep(5 * 1000);
                        } catch (InterruptedException e) {
                        }
                    } catch (DatabaseLagException exc) {
                        log.info("Database lag occured: " + exc.databaseLag);
                        int retryAfter = 6;
                        try {
                            retryAfter = Integer.parseInt(exc.retryAfter.getValue());
                        } catch (Exception exc2) {
                            // ignore
                        }
                        if (retryAfter != 0) {
                            log.info("Waiting for " + retryAfter + " seconds");
                            try {
                                Thread.sleep(retryAfter * 1000);
                            } catch (InterruptedException e) {
                            }
                        }
                    }
                }
            } catch (IOException e1) {
                throw new ActionException(e1);
            }
        }
    }

    protected void post(final HttpPost postMethod, final ContentProcessable action)
            throws IOException, ProcessException, CookieException {
        postMethod.getParams().setParameter("http.protocol.content-charset", MediaWikiBot.ENCODING);

        postMethod.setHeader("Accept-Encoding", GZIP_CONTENT_ENCODING);

        httpClient.execute(postMethod, new ResponseHandler<Object>() {
            @Override
            public Object handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
                onPostResponse(action, postMethod, response);
                return null;
            }
        });
    }

    public void setHttpManager(HttpManager httpManager) {
        this.httpManager = httpManager;
    }

    public void setSite(URI site) {
        this.site = site;
    }
}