org.peterbaldwin.client.android.delicious.WebPageTitleRequest.java Source code

Java tutorial

Introduction

Here is the source code for org.peterbaldwin.client.android.delicious.WebPageTitleRequest.java

Source

/*-
 *  Copyright (C) 2009 Peter Baldwin   
 *  
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.peterbaldwin.client.android.delicious;

import java.io.IOException;
import java.net.URI;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.ProtocolException;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.DefaultRedirectHandler;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.xml.sax.XMLReader;

import android.os.Handler;
import android.os.Message;
import android.text.Editable;
import android.text.Html;
import android.util.Log;

/**
 * Extracts the title from a web page.
 */
public class WebPageTitleRequest extends DefaultRedirectHandler implements Runnable, Html.TagHandler {

    public static final int HANDLE_TITLE = 1;
    public static final int HANDLE_REDIRECT = 2;

    private static final String LOG_TAG = "WebPageTitleRequest";

    /**
     * Removes charset and any other parameters from Content-Type.
     * <p>
     * For example, {@code removeCharset("text/html; charset=utf-8")} returns
     * {@code "text/html"}.
     */
    private static String normalizeContentType(String contentType) {
        if (contentType != null) {
            int index = contentType.indexOf(';');
            if (index != -1) {
                contentType = contentType.substring(0, index);
            }
        }
        contentType = contentType.trim();
        return contentType;
    }

    private static boolean isHtml(String contentType) {
        contentType = normalizeContentType(contentType);
        return "text/html".equals(contentType) || "application/xhtml+xml".equals(contentType)
                || "application/xml".equals(contentType);
    }

    private final String mUrl;
    private final Handler mHandler;
    private int mStart;
    private String mTitle;
    private String mRedirectLocation;

    public WebPageTitleRequest(String url, Handler handler) {
        super();
        mUrl = url;
        mHandler = handler;
    }

    /**
     * {@inheritDoc}
     */
    public void run() {
        try {
            DefaultHttpClient client = new DefaultHttpClient();
            client.setRedirectHandler(this);
            try {
                HttpGet request = new HttpGet(mUrl);

                // Set a generic User-Agent to avoid being 
                // redirected to a mobile UI.
                request.addHeader("User-Agent", "Mozilla/5.0");

                HttpResponse response = client.execute(request);
                HttpEntity entity = response.getEntity();
                StatusLine statusLine = response.getStatusLine();
                try {
                    int statusCode = statusLine.getStatusCode();
                    if (statusCode != HttpStatus.SC_OK) {
                        throw new IOException("Unexpected response code: " + statusCode);
                    }

                    // Send redirect before checking content type
                    // because the redirect is important even if the
                    // title cannot be extracted.
                    if (mRedirectLocation != null && !mUrl.equals(mRedirectLocation)) {
                        int what = HANDLE_REDIRECT;
                        Object obj = mRedirectLocation;
                        Message msg = mHandler.obtainMessage(what, obj);
                        msg.sendToTarget();
                    }
                    Header contentType = entity.getContentType();
                    if (contentType != null) {
                        String value = contentType.getValue();
                        if (!isHtml(value)) {
                            // This is important because the user might try
                            // bookmarking a video or another large file.
                            throw new IOException("Unsupported content type: " + value);
                        }
                    } else {
                        throw new IOException("Content type is missing");
                    }
                    String source = EntityUtils.toString(entity);
                    Html.ImageGetter imageGetter = null;
                    Html.TagHandler tagHandler = this;
                    Html.fromHtml(source, imageGetter, tagHandler);
                } finally {
                    if (entity != null) {
                        entity.consumeContent();
                    }
                }
            } finally {
                client.getConnectionManager().shutdown();
            }
        } catch (TerminateParser e) {
            // Thrown by handleTag to terminate parser early.
        } catch (IOException e) {
            Log.e(LOG_TAG, "i/o error", e);
        } catch (RuntimeException e) {
            Log.e(LOG_TAG, "runtime error", e);
        } catch (Error e) {
            Log.e(LOG_TAG, "severe error", e);
        } finally {
            Message msg = mHandler.obtainMessage(HANDLE_TITLE, mTitle);
            msg.sendToTarget();
        }
    }

    /**
     * {@inheritDoc}
     */
    public void handleTag(boolean opening, String tag, Editable output, XMLReader xmlReader) {
        if ("title".equalsIgnoreCase(tag)) {
            if (opening) {
                mStart = output.length();
            } else {
                int end = output.length();
                String title = output.subSequence(mStart, end).toString();

                // Collapse internal whitespace
                title = title.replaceAll("\\s+", " ");

                // Remove leading/trailing space
                title = title.trim();

                mTitle = title;

                throw new TerminateParser();
            }
        }
    }

    @Override
    public URI getLocationURI(HttpResponse response, HttpContext context) throws ProtocolException {
        URI location = super.getLocationURI(response, context);
        if (location != null) {
            mRedirectLocation = location.toString();
        }
        return location;
    }

    @SuppressWarnings("serial")
    private static class TerminateParser extends RuntimeException {
    };
}