com.revetkn.achewood.AchewoodExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.revetkn.achewood.AchewoodExtractor.java

Source

/**
 * Copyright (c) 2006 Mark Allen [mark.a.allen@gmail.com]
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

package com.revetkn.achewood;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

import java.util.Random;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;

import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

/**
 * Extracts <a href="http://www.achewood.com">Achewood</a> comics from the web.
 * <p>
 * Waits a few seconds between requests to be 'nice' to the server.
 * <p>
 * It's trivial to extend this to take command-line arguments [initial strip date, for
 * example], but since standard usage is not frequent enough to warrant that, all
 * configuration is done via constants.
 * <p>
 * Note: since this is not an interactive application, the implementation strives for
 * clarity - not speed.
 * @author <a href="mailto:mark.a.allen@gmail.com">Mark Allen</a>
 * @version $Id$
 * @since 0.1
 */
public class AchewoodExtractor {
    /**
     * Runs the extractor application.
     * @param args Command-line arguments [ignored].
     */
    public static void main(String[] args) {
        try {
            new AchewoodExtractor().execute();
        } catch (IOException e) {
            System.err.println("An I/O error occurred during execution: " + e);
        }
    }

    /**
     * Pulls strips from the web and saves them locally, starting with
     * <tt>INITIAL_STRIP_DATE</tt> and ending with today's date.
     * @throws IOException If an error occurs during processing.
     */
    public void execute() throws IOException {
        int stripsRead = 0;
        int totalAttempts = 0;
        DateTime stripDate = ACHEWOOD_DATE_FORMATTER.parseDateTime(INITIAL_STRIP_DATE);

        while (stripDate.isBeforeNow()) {
            System.out.print("Checking " + LONG_DATE_FORMATTER.print(stripDate) + "...");

            ++totalAttempts;

            byte[] strip = retrieveStrip(stripDate);

            // Only process if we find a strip
            if (strip != null) {
                System.out.println("found a strip.");

                saveStrip(strip, stripDate);

                ++stripsRead;
            } else {
                System.out.println("no strip exists for that day.");
            }

            stripDate = stripDate.plusDays(1);

            waitAFewSeconds();
        }

        System.out.println(stripsRead + " strips read in " + totalAttempts + " attempts.");
    }

    /**
     * Retrieves a strip from the web.
     * @param stripDate The date of the strip to retrieve.
     * @return Raw bytes of the strip image, or <tt>null</tt> if no image was found.
     * @throws IOException If an error occurs while talking to achewood.com.
     */
    protected byte[] retrieveStrip(DateTime stripDate) throws IOException {
        HttpClient httpClient = new HttpClient();
        GetMethod getMethod = new GetMethod(BASE_STRIP_URL + ACHEWOOD_DATE_FORMATTER.print(stripDate));

        byte[] responseBody = null;

        try {
            httpClient.executeMethod(getMethod);
            responseBody = getMethod.getResponseBody();
        } finally {
            getMethod.releaseConnection();
        }

        if (getMethod.getStatusCode() != 200) {
            return null;
        }

        return responseBody;
    }

    /**
     * Saves a strip to disk, using its date to figure out where to put it.
     * @param image The raw strip image to save.
     * @see #createStripFilename(DateTime)
     * @see #createStorageDirectoriesIfNeeded(DateTime)
     * @param stripDate The date of the strip to save.
     * @throws IOException If an error occurs while saving the strip.
     */
    protected void saveStrip(byte[] image, DateTime stripDate) throws IOException {
        FileOutputStream os = null;

        createStorageDirectoriesIfNeeded(stripDate);

        try {
            os = new FileOutputStream(createStripFilename(stripDate));
            os.write(image);
            os.flush();
        } finally {
            if (os != null) {
                os.close();
            }
        }
    }

    /**
     * Helper method to build an 'organized' storage path for saved strips by examining
     * the strip's date.
     * <p>
     * General format is <tt>BASE_SAVED_DIRECTORY</tt>/Year/Month.
     * @param stripDate Date from which the directory structure is built.
     * @return A storage path for the given strip date.
     */
    protected String createStoragePath(DateTime stripDate) {
        return BASE_SAVED_DIRECTORY + File.separator + stripDate.getYear() + File.separator
                + MONTH_ONLY_DATE_FORMATTER.print(stripDate);
    }

    /**
     * Helper method to create a strip filename given the strip's date.
     * @see #createStoragePath(DateTime)
     * @param stripDate Date from which the strip filename is built.
     * @return A filename for the given strip date.
     */
    protected String createStripFilename(DateTime stripDate) {
        return createStoragePath(stripDate) + File.separator + DAY_ONLY_DATE_FORMATTER.print(stripDate) + ".gif";
    }

    /**
     * Creates directories as necessary to organize locally-stored strips.
     * @see #createStoragePath(DateTime)
     * @param stripDate Date from which the directory structure is built.
     * @throws IOException If an error occurs while creating directories.
     */
    protected void createStorageDirectoriesIfNeeded(DateTime stripDate) throws IOException {
        File directories = new File(createStoragePath(stripDate));

        if (!directories.exists()) {
            if (!directories.mkdirs()) {
                throw new IOException("Could not create directories: " + directories);
            }
        }
    }

    /**
     * Makes the current thread sleep for a bit.
     */
    protected void waitAFewSeconds() {
        try {
            Thread.sleep((1 + new Random().nextInt(MAX_SECONDS_TO_WAIT)) * 1000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Achewood date formatter.
     */
    private static final DateTimeFormatter ACHEWOOD_DATE_FORMATTER = DateTimeFormat.forPattern("MMddyyyy");

    /**
     * Human-friendly date formatter [for display].
     */
    private static final DateTimeFormatter LONG_DATE_FORMATTER = DateTimeFormat.forPattern("MMMM d, yyyy");

    /**
     * Month-only date formatter [for display].
     */
    private static final DateTimeFormatter MONTH_ONLY_DATE_FORMATTER = DateTimeFormat.forPattern("MMMM");

    /**
     * Day-only date formatter [for display].
     */
    private static final DateTimeFormatter DAY_ONLY_DATE_FORMATTER = DateTimeFormat.forPattern("dd");

    /**
     * The date on which to start retrieving strips.
     */
    private static final String INITIAL_STRIP_DATE = "10012001";

    /**
     * The base URL used to retrieve a strip.
     */
    private static final String BASE_STRIP_URL = "http://www.achewood.com/comic.php?date=";

    /**
     * The base directory into which saved strips are written.
     */
    private static final String BASE_SAVED_DIRECTORY = "achewood";

    /**
     * Upper bound - 1, in seconds, for random wait time between strip retrieval.
     */
    private static final int MAX_SECONDS_TO_WAIT = 4;
}