jshm.sh.scraper.wiki.ActionsScraper.java Source code

Java tutorial

Introduction

Here is the source code for jshm.sh.scraper.wiki.ActionsScraper.java

Source

/*
 * -----LICENSE START-----
 * JSHManager - A Java-based tool for managing one's ScoreHero account.
 * Copyright (C) 2008, 2009 Tim Mullin
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 * -----LICENSE END-----
*/
package jshm.sh.scraper.wiki;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import jshm.exceptions.ScraperException;
import jshm.sh.Client;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;

/**
 * This class serves to scrape actions from the wiki into a useful form.
 * @author Tim Mullin
 *
 */
public class ActionsScraper {
    static final Logger LOG = Logger.getLogger(ActionsScraper.class.getName());

    static {
        LOG.setLevel(Level.FINE);
    }

    public static Map<String, List<Action>> scrape(final String wikiUrl) throws IOException, ScraperException {
        return scrape(wikiUrl, new HashMap<String, List<Action>>());
    }

    public static Map<String, List<Action>> scrape(String wikiUrl, final Map<String, List<Action>> ret)
            throws IOException, ScraperException {
        if (!wikiUrl.endsWith("/raw"))
            wikiUrl = wikiUrl + "/raw";

        HttpClient client = Client.getHttpClient();
        GetMethod method = new GetMethod(wikiUrl);
        client.executeMethod(method);

        if (method.getStatusCode() != 200) {
            LOG.warning("Non-200 response for " + wikiUrl + " - " + method.getStatusLine());
            return ret;
        }

        String charset = method.getResponseCharSet();
        LOG.fine("Charset for HTTP response is: " + charset);
        if (null == charset || charset.isEmpty())
            charset = "ISO-8859-1";

        return scrape(new InputStreamReader(method.getResponseBodyAsStream(), charset), ret);
    }

    public static Map<String, List<Action>> scrape(final Reader reader) throws IOException, ScraperException {
        return scrape(reader, new HashMap<String, List<Action>>());
    }

    public static Map<String, List<Action>> scrape(final Reader reader, final Map<String, List<Action>> ret)
            throws IOException, ScraperException {
        if (null == ret)
            throw new NullPointerException("ret");

        BufferedReader in = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader);

        StringBuilder sb = null;
        String lastKey = null;
        Action action = null;

        int c = -1, leftBraceCount = 0, rightBraceCount = 0;
        boolean isQuotedString = false;
        Expect expect = Expect.LEFT_BRACE;

        while (-1 != (c = in.read())) {
            switch (expect) {
            case LEFT_BRACE:
                if ('{' == c) {
                    leftBraceCount++;

                    if (2 == leftBraceCount) {
                        LOG.finer("got 2 left braces, expecting name");
                        expect = Expect.NAME;
                        action = new Action();
                        sb = new StringBuilder();
                        isQuotedString = false;
                    }
                }

                // skipping non action block
                continue;

            case NAME:
                if (Character.isLetter(c)) {
                    sb.append((char) c);
                    continue;
                } else if ('}' == c) {
                    // no key/value pairs
                    rightBraceCount++;
                    action.name = sb.toString();
                    if (null == ret.get(action.name))
                        ret.put(action.name, new ArrayList<Action>());
                    ret.get(action.name).add(action);
                    expect = Expect.RIGHT_BRACE;

                    LOG.finer("got right brace after name (" + action.name + "), action has no args");
                    continue;
                } else if (Character.isWhitespace(c)) {
                    // we've read some characters for the name
                    if (0 != sb.length()) {
                        action.name = sb.toString();
                        if (null == ret.get(action.name))
                            ret.put(action.name, new ArrayList<Action>());
                        ret.get(action.name).add(action);
                        expect = Expect.KEY;
                        sb = new StringBuilder();
                        isQuotedString = false;

                        LOG.finer("got whitespace after name (" + action.name + ")");
                    }

                    // else there's whitespace before the name

                    continue;
                }

                throw new ScraperException("expecting next letter in name or whitespace after, got: " + (char) c);

            case KEY:
                if (Character.isLetter(c)) {
                    sb.append((char) c);
                    continue;
                } else if (Character.isWhitespace(c) && 0 == sb.length()) {
                    // extra whitespace between last thing and this key 
                    continue;
                } else if ('}' == c) {
                    if (0 != sb.length())
                        throw new ScraperException("expecting next letter in key but got right brace");

                    LOG.finer("got right brace after last value");

                    rightBraceCount++;
                    expect = Expect.RIGHT_BRACE;
                    continue;
                } else if ('=' == c) {
                    lastKey = sb.toString();

                    LOG.finer("got equals sign after key (" + lastKey + ")");

                    expect = Expect.VALUE;
                    sb = new StringBuilder();
                    isQuotedString = false;
                    continue;
                }

                throw new ScraperException("expecting next letter in key or equals sign, got: " + (char) c);

            case VALUE:
                if (isQuotedString && '"' != c && '}' != c) {
                    sb.append((char) c);
                    continue;
                } else if ('"' == c || (isQuotedString && '}' == c)) {
                    if ('}' == c) {
                        // malformed, no end quote
                        isQuotedString = false;
                    } else {
                        isQuotedString = !isQuotedString;
                    }

                    if (isQuotedString) {
                        LOG.finest("got opening quote for value of key (" + lastKey + ")");
                    } else {
                        String value = org.htmlparser.util.Translate.decode(sb.toString().trim());

                        LOG.finer("got closing quote for value (" + value + ")");

                        action.args.put(lastKey, value);

                        if ('}' == c) {
                            expect = Expect.RIGHT_BRACE;
                            rightBraceCount++;
                        } else {
                            expect = Expect.KEY;
                        }

                        lastKey = null;
                        sb = new StringBuilder();
                        isQuotedString = false;
                    }

                    continue;
                } else if (Character.isWhitespace(c)) {
                    LOG.finer("got whitespace before value for key (" + lastKey + ")");
                    expect = Expect.KEY;
                    lastKey = null;
                    sb = new StringBuilder();
                    isQuotedString = false;

                    continue;
                }

                throw new ScraperException("expecting quote or next letter in value, got: " + (char) c);

            case RIGHT_BRACE:
                if ('}' == c) {
                    rightBraceCount++;

                    if (2 == rightBraceCount) {
                        LOG.finer("got 2nd right brace");
                        leftBraceCount = rightBraceCount = 0;
                        expect = Expect.LEFT_BRACE;
                        continue;
                    }
                }

                throw new ScraperException("expecting 2nd right brace, got " + (char) c);
            }
        }

        return ret;
    }

    private static enum Expect {
        LEFT_BRACE, NAME, KEY, VALUE, RIGHT_BRACE
    }
}