import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

 * Implementation of {@link Connection}.
 * @see org.jsoup.Jsoup#connect(String)
public class HttpConnection implements Connection {
  public static Connection connect(String url) {
    Connection con = new HttpConnection();
    return con;

  public static Connection connect(URL url) {
    Connection con = new HttpConnection();
    return con;

  private Connection.Request req;
  private Connection.Response res;

  private HttpConnection() {
    req = new Request();
    res = new Response();

  public Connection url(URL url) {
    return this;

  public Connection url(String url) {
    Validate.notEmpty(url, "Must supply a valid URL");
    try {
      req.url(new URL(url));
    } catch (MalformedURLException e) {
      throw new IllegalArgumentException("Malformed URL: " + url, e);
    return this;

  public Connection userAgent(String userAgent) {
    Validate.notNull(userAgent, "User agent must not be null");
    req.header("User-Agent", userAgent);
    return this;

  public Connection timeout(int millis) {
    return this;

  public Connection followRedirects(boolean followRedirects) {
    return this;

  public Connection referrer(String referrer) {
    Validate.notNull(referrer, "Referrer must not be null");
    req.header("Referer", referrer);
    return this;

  public Connection method(Method method) {
    return this;

  public Connection data(String key, String value) {, value));
    return this;

  public Connection data(Map<String, String> data) {
    Validate.notNull(data, "Data map must not be null");
    for (Map.Entry<String, String> entry : data.entrySet()) {, entry.getValue()));
    return this;

  public Connection data(String... keyvals) {
    Validate.notNull(keyvals, "Data key value pairs must not be null");
    Validate.isTrue(keyvals.length % 2 == 0,
        "Must supply an even number of key value pairs");
    for (int i = 0; i < keyvals.length; i += 2) {
      String key = keyvals[i];
      String value = keyvals[i + 1];
      Validate.notEmpty(key, "Data key must not be empty");
      Validate.notNull(value, "Data value must not be null");, value));
    return this;

  public Connection header(String name, String value) {
    req.header(name, value);
    return this;

  public Connection cookie(String name, String value) {
    req.cookie(name, value);
    return this;

  public Connection.Response execute() throws IOException {
    res = Response.execute(req);
    return res;

  public Connection.Request request() {
    return req;

  public Connection request(Connection.Request request) {
    req = request;
    return this;

  public Connection.Response response() {
    return res;

  public Connection response(Connection.Response response) {
    res = response;
    return this;

  @SuppressWarnings({ "unchecked" })
  private static abstract class Base<T extends Connection.Base> implements
      Connection.Base<T> {
    URL url;
    Method method;
    Map<String, String> headers;
    Map<String, String> cookies;

    private Base() {
      headers = new LinkedHashMap<String, String>();
      cookies = new LinkedHashMap<String, String>();

    public URL url() {
      return url;

    public T url(URL url) {
      Validate.notNull(url, "URL must not be null");
      this.url = url;
      return (T) this;

    public Method method() {
      return method;

    public T method(Method method) {
      Validate.notNull(method, "Method must not be null");
      this.method = method;
      return (T) this;

    public String header(String name) {
      Validate.notNull(name, "Header name must not be null");
      return getHeaderCaseInsensitive(name);

    public T header(String name, String value) {
      Validate.notEmpty(name, "Header name must not be empty");
      Validate.notNull(value, "Header value must not be null");
      removeHeader(name); // ensures we don't get an "accept-encoding" and
                // a "Accept-Encoding"
      headers.put(name, value);
      return (T) this;

    public boolean hasHeader(String name) {
      Validate.notEmpty(name, "Header name must not be empty");
      return getHeaderCaseInsensitive(name) != null;

    public T removeHeader(String name) {
      Validate.notEmpty(name, "Header name must not be empty");
      Map.Entry<String, String> entry = scanHeaders(name); // remove is
                                  // case
                                  // insensitive
                                  // too
      if (entry != null)
        headers.remove(entry.getKey()); // ensures correct case
      return (T) this;

    public Map<String, String> headers() {
      return headers;

    private String getHeaderCaseInsensitive(String name) {
      Validate.notNull(name, "Header name must not be null");
      // quick evals for common case of title case, lower case, then scan
      // for mixed
      String value = headers.get(name);
      if (value == null)
        value = headers.get(name.toLowerCase());
      if (value == null) {
        Map.Entry<String, String> entry = scanHeaders(name);
        if (entry != null)
          value = entry.getValue();
      return value;

    private Map.Entry<String, String> scanHeaders(String name) {
      String lc = name.toLowerCase();
      for (Map.Entry<String, String> entry : headers.entrySet()) {
        if (entry.getKey().toLowerCase().equals(lc))
          return entry;
      return null;

    public String cookie(String name) {
      Validate.notNull(name, "Cookie name must not be null");
      return cookies.get(name);

    public T cookie(String name, String value) {
      Validate.notEmpty(name, "Cookie name must not be empty");
      Validate.notNull(value, "Cookie value must not be null");
      cookies.put(name, value);
      return (T) this;

    public boolean hasCookie(String name) {
      Validate.notEmpty("Cookie name must not be empty");
      return cookies.containsKey(name);

    public T removeCookie(String name) {
      Validate.notEmpty("Cookie name must not be empty");
      return (T) this;

    public Map<String, String> cookies() {
      return cookies;

  public static class Request extends Base<Connection.Request> implements
      Connection.Request {
    private int timeoutMilliseconds;
    private boolean followRedirects;
    private Collection<Connection.KeyVal> data;

    private Request() {
      timeoutMilliseconds = 3000;
      followRedirects = true;
      data = new ArrayList<Connection.KeyVal>();
      method = Connection.Method.GET;
      headers.put("Accept-Encoding", "gzip");

    public int timeout() {
      return timeoutMilliseconds;

    public Request timeout(int millis) {
      Validate.isTrue(millis >= 0,
          "Timeout milliseconds must be 0 (infinite) or greater");
      timeoutMilliseconds = millis;
      return this;

    public boolean followRedirects() {
      return followRedirects;

    public Connection.Request followRedirects(boolean followRedirects) {
      this.followRedirects = followRedirects;
      return this;

    public Request data(Connection.KeyVal keyval) {
      Validate.notNull(keyval, "Key val must not be null");
      return this;

    public Collection<Connection.KeyVal> data() {
      return data;

  public static class Response extends Base<Connection.Response> implements
      Connection.Response {
    private static final int MAX_REDIRECTS = 20;
    private int statusCode;
    private String statusMessage;
    private ByteBuffer byteData;
    private String charset;
    private String contentType;
    private boolean executed = false;
    private int numRedirects = 0;

    Response() {

    private Response(Response previousResponse) throws IOException {
      if (previousResponse != null) {
        numRedirects = previousResponse.numRedirects + 1;
        if (numRedirects >= MAX_REDIRECTS)
          throw new IOException(
                  "Too many redirects occurred trying to load URL %s",

    static Response execute(Connection.Request req) throws IOException {
      return execute(req, null);

    static Response execute(Connection.Request req,
        Response previousResponse) throws IOException {
      Validate.notNull(req, "Request must not be null");
      String protocol = req.url().getProtocol();
          protocol.equals("http") || protocol.equals("https"),
          "Only http & https protocols supported");

      // set up the request for execution
      if (req.method() == Connection.Method.GET && > 0)
        serialiseRequestUrl(req); // appends query string
      HttpURLConnection conn = createConnection(req);
      if (req.method() == Connection.Method.POST)
        writePost(, conn.getOutputStream());

      int status = conn.getResponseCode();
      boolean needsRedirect = false;
      if (status != HttpURLConnection.HTTP_OK) {
        if (status == HttpURLConnection.HTTP_MOVED_TEMP
            || status == HttpURLConnection.HTTP_MOVED_PERM
            || status == HttpURLConnection.HTTP_SEE_OTHER)
          needsRedirect = true;
          throw new IOException(status + " error loading URL "
              + req.url().toString());
      Response res = new Response(previousResponse);
      res.setupFromConnection(conn, previousResponse);
      if (needsRedirect && req.followRedirects()) {
        req.url(new URL(req.url(), res.header("Location")));
        for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add
                                          // response
                                          // cookies
                                          // to
                                          // request
                                          // (for
                                          // e.g.
                                          // login
                                          // posts)
          req.cookie(cookie.getKey(), cookie.getValue());
        return execute(req, res);

      InputStream inStream = null;
      try {
        inStream = res.hasHeader("Content-Encoding")
            && res.header("Content-Encoding").equalsIgnoreCase(
                "gzip") ? new BufferedInputStream(
            new GZIPInputStream(conn.getInputStream()))
            : new BufferedInputStream(conn.getInputStream());
        res.byteData = DataUtil.readToByteBuffer(inStream);
        res.charset = DataUtil
            .getCharsetFromContentType(res.contentType); // may be
                                    // null,
                                    // readInputStream
                                    // deals
                                    // with
                                    // it
      } finally {
        if (inStream != null)

      res.executed = true;
      return res;

    public int statusCode() {
      return statusCode;

    public String statusMessage() {
      return statusMessage;

    public String charset() {
      return charset;

    public String contentType() {
      return contentType;

    public String body() {
          "Request must be executed (with .execute(), .get(), or .post() before getting response body");
      // charset gets set from header on execute, and from meta-equiv on
      // parse. parse may not have happened yet
      String body;
      if (charset == null)
        body = Charset.forName(DataUtil.defaultCharset)
        body = Charset.forName(charset).decode(byteData).toString();
      return body;

    public byte[] bodyAsBytes() {
          "Request must be executed (with .execute(), .get(), or .post() before getting response body");
      return byteData.array();

    // set up connection defaults, and details from request
    private static HttpURLConnection createConnection(Connection.Request req)
        throws IOException {
      HttpURLConnection conn = (HttpURLConnection) req.url()
      conn.setInstanceFollowRedirects(false); // don't rely on native
                          // redirection support
      if (req.method() == Method.POST)
      if (req.cookies().size() > 0)
        conn.addRequestProperty("Cookie", getRequestCookieString(req));
      for (Map.Entry<String, String> header : req.headers().entrySet()) {
        conn.addRequestProperty(header.getKey(), header.getValue());
      return conn;

    // set up url, method, header, cookies
    private void setupFromConnection(HttpURLConnection conn,
        Connection.Response previousResponse) throws IOException {
      method = Connection.Method.valueOf(conn.getRequestMethod());
      url = conn.getURL();
      statusCode = conn.getResponseCode();
      statusMessage = conn.getResponseMessage();
      contentType = conn.getContentType();

      // headers into map
      Map<String, List<String>> resHeaders = conn.getHeaderFields();
      for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
        String name = entry.getKey();
        if (name == null)
          continue; // http/1.1 line

        List<String> values = entry.getValue();

        if (name.equalsIgnoreCase("Set-Cookie")) {
          for (String value : values) {
            TokenQueue cd = new TokenQueue(value);
            String cookieName = cd.chompTo("=").trim();
            String cookieVal = cd.consumeTo(";").trim();
            // ignores path, date, domain, secure et al. req'd?
            cookie(cookieName, cookieVal);
        } else { // only take the first instance of each header
          if (!values.isEmpty())
            header(name, values.get(0));

      // if from a redirect, map previous response cookies into this
      // response
      if (previousResponse != null) {
        for (Map.Entry<String, String> prevCookie : previousResponse
            .cookies().entrySet()) {
          if (!hasCookie(prevCookie.getKey()))
            cookie(prevCookie.getKey(), prevCookie.getValue());

    private static void writePost(Collection<Connection.KeyVal> data,
        OutputStream outputStream) throws IOException {
      OutputStreamWriter w = new OutputStreamWriter(outputStream,
      boolean first = true;
      for (Connection.KeyVal keyVal : data) {
        if (!first)
          first = false;

        w.write(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset));

    private static String getRequestCookieString(Connection.Request req) {
      StringBuilder sb = new StringBuilder();
      boolean first = true;
      for (Map.Entry<String, String> cookie : req.cookies().entrySet()) {
        if (!first)
          sb.append("; ");
          first = false;
        // todo: spec says only ascii, no escaping / encoding defined.
        // validate on set? or escape somehow here?
      return sb.toString();

    // for get url reqs, serialise the data map into the url
    private static void serialiseRequestUrl(Connection.Request req)
        throws IOException {
      URL in = req.url();
      StringBuilder url = new StringBuilder();
      boolean first = true;
      // reconstitute the query, ready for appends
          .append(in.getAuthority()) // includes host, port
      if (in.getQuery() != null) {
        first = false;
      for (Connection.KeyVal keyVal : {
        if (!first)
          first = false;
            URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset))
      req.url(new URL(url.toString()));; // moved into url as get params

  public static class KeyVal implements Connection.KeyVal {
    private String key;
    private String value;

    public static KeyVal create(String key, String value) {
      Validate.notEmpty(key, "Data key must not be empty");
      Validate.notNull(value, "Data value must not be null");
      return new KeyVal(key, value);

    private KeyVal(String key, String value) {
      this.key = key;
      this.value = value;

    public KeyVal key(String key) {
      Validate.notEmpty(key, "Data key must not be empty");
      this.key = key;
      return this;

    public String key() {
      return key;

    public KeyVal value(String value) {
      Validate.notNull(value, "Data value must not be null");
      this.value = value;
      return this;

    public String value() {
      return value;

    public String toString() {
      return key + "=" + value;

 * A Connection provides a convenient interface to fetch content from the web,
 * and parse them into Documents.
 * <p>
 * To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}.
 * Connections contain {@link Connection.Request} and
 * {@link Connection.Response} objects. The request objects are reusable as
 * prototype requests.
 * <p>
 * Request configuration can be made using either the shortcut methods in
 * Connection (e.g. {@link #userAgent(String)}), or by methods in the
 * Connection.Request object directly. All request configuration must be made
 * before the request is executed.
 * <p>
 * The Connection interface is <b>currently in beta</b> and subject to change.
 * Comments, suggestions, and bug reports are welcome.
interface Connection {

   * GET and POST http methods.
  public enum Method {

   * Set the request URL to fetch. The protocol must be HTTP or HTTPS.
   * @param url
   *            URL to connect to
   * @return this Connection, for chaining
  public Connection url(URL url);

   * Set the request URL to fetch. The protocol must be HTTP or HTTPS.
   * @param url
   *            URL to connect to
   * @return this Connection, for chaining
  public Connection url(String url);

   * Set the request user-agent header.
   * @param userAgent
   *            user-agent to use
   * @return this Connection, for chaining
  public Connection userAgent(String userAgent);

   * Set the request timeouts (connect and read). If a timeout occurs, an
   * IOException will be thrown. The default timeout is 3 seconds (3000
   * millis). A timeout of zero is treated as an infinite timeout.
   * @param millis
   *            number of milliseconds (thousandths of a second) before timing
   *            out connects or reads.
   * @return this Connection, for chaining
  public Connection timeout(int millis);

   * Set the request referrer (aka "referer") header.
   * @param referrer
   *            referrer to use
   * @return this Connection, for chaining
  public Connection referrer(String referrer);

   * Configures the connection to (not) follow server redirects. By default
   * this is <b>true</b>.
   * @param followRedirects
   *            true if server redirects should be followed.
   * @return this Connection, for chaining
  public Connection followRedirects(boolean followRedirects);

   * Set the request method to use, GET or POST. Default is GET.
   * @param method
   *            HTTP request method
   * @return this Connection, for chaining
  public Connection method(Method method);

   * Add a request data parameter. Request parameters are sent in the request
   * query string for GETs, and in the request body for POSTs. A request may
   * have multiple values of the same name.
   * @param key
   *            data key
   * @param value
   *            data value
   * @return this Connection, for chaining
  public Connection data(String key, String value);

   * Adds all of the supplied data to the request data parameters
   * @param data
   *            map of data parameters
   * @return this Connection, for chaining
  public Connection data(Map<String, String> data);

   * Add a number of request data parameters. Multiple parameters may be set
   * at once, e.g.:
   * <code>.data("name", "jsoup", "language", "Java", "language", "English");</code>
   * creates a query string like:
   * <code>?name=jsoup&language=Java&language=English</code>
   * @param keyvals
   *            a set of key value pairs.
   * @return this Connection, for chaining
  public Connection data(String... keyvals);

   * Set a request header.
   * @param name
   *            header name
   * @param value
   *            header value
   * @return this Connection, for chaining
   * @see org.jsoup.Connection.Request#headers()
  public Connection header(String name, String value);

   * Set a cookie to be sent in the request
   * @param name
   *            name of cookie
   * @param value
   *            value of cookie
   * @return this Connection, for chaining
  public Connection cookie(String name, String value);

   * Execute the request.
   * @return a response object
   * @throws IOException
   *             on error
  public Response execute() throws IOException;

   * Get the request object associatated with this connection
   * @return request
  public Request request();

   * Set the connection's request
   * @param request
   *            new request object
   * @return this Connection, for chaining
  public Connection request(Request request);

   * Get the response, once the request has been executed
   * @return response
  public Response response();

   * Set the conenction's response
   * @param response
   *            new response
   * @return this Connection, for chaining
  public Connection response(Response response);

   * Common methods for Requests and Responses
   * @param <T>
   *            Type of Base, either Request or Response
  interface Base<T extends Base> {

     * Get the URL
     * @return URL
    public URL url();

     * Set the URL
     * @param url
     *            new URL
     * @return this, for chaining
    public T url(URL url);

     * Get the request method
     * @return method
    public Method method();

     * Set the request method
     * @param method
     *            new method
     * @return this, for chaining
    public T method(Method method);

     * Get the value of a header. This is a simplified header model, where a
     * header may only have one value.
     * <p>
     * Header names are case insensitive.
     * @param name
     *            name of header (case insensitive)
     * @return value of header, or null if not set.
     * @see #hasHeader(String)
     * @see #cookie(String)
    public String header(String name);

     * Set a header. This method will overwrite any existing header with the
     * same case insensitive name.
     * @param name
     *            Name of header
     * @param value
     *            Value of header
     * @return this, for chaining
    public T header(String name, String value);

     * Check if a header is present
     * @param name
     *            name of header (case insensitive)
     * @return if the header is present in this request/response
    public boolean hasHeader(String name);

     * Remove a header by name
     * @param name
     *            name of header to remove (case insensitive)
     * @return this, for chianing
    public T removeHeader(String name);

     * Retrieve all of the request/response headers as a map
     * @return headers
    public Map<String, String> headers();

     * Get a cookie value by name from this request/response.
     * <p>
     * Response objects have a simplified cookie model. Each cookie set in
     * the response is added to the response object's cookie key=value map.
     * The cookie's path, domain, and expiry date are ignored.
     * @param name
     *            name of cookie to retrieve.
     * @return value of cookie, or null if not set
    public String cookie(String name);

     * Set a cookie in this request/response.
     * @param name
     *            name of cookie
     * @param value
     *            value of cookie
     * @return this, for chianing
    public T cookie(String name, String value);

     * Check if a cookie is present
     * @param name
     *            name of cookie
     * @return if the cookie is present in this request/response
    public boolean hasCookie(String name);

     * Remove a cookie by name
     * @param name
     *            name of cookie to remove
     * @return this, for chianing
    public T removeCookie(String name);

     * Retrieve all of the request/response cookies as a map
     * @return cookies
    public Map<String, String> cookies();


   * Represents a HTTP request.
  public interface Request extends Base<Request> {

     * Get the request timeout, in milliseconds.
     * @return the timeout in milliseconds.
    public int timeout();

     * Update the request timeout.
     * @param millis
     *            timeout, in milliseconds
     * @return this Request, for chaining
    public Request timeout(int millis);

     * Get the current followRedirects configuration.
     * @return true if followRedirects is enabled.
    public boolean followRedirects();

     * Configures the request to (not) follow server redirects. By default
     * this is <b>true</b>.
     * @param followRedirects
     *            true if server redirects should be followed.
     * @return this Connection, for chaining
    public Request followRedirects(boolean followRedirects);

     * Add a data parameter to the request
     * @param keyval
     *            data to add.
     * @return this Request, for chaining
    public Request data(KeyVal keyval);

     * Get all of the request's data parameters
     * @return collection of keyvals
    public Collection<KeyVal> data();


   * Represents a HTTP response.
  public interface Response extends Base<Response> {

     * Get the status code of the response.
     * @return status code
    public int statusCode();

     * Get the status message of the response.
     * @return status message
    public String statusMessage();

     * Get the character set name of the response.
     * @return character set name
    public String charset();

     * Get the response content type (e.g. "text/html");
     * @return the response content type
    public String contentType();

     * Get the body of the response as a plain string.
     * @return body
    public String body();

     * Get the body of the response as an array of bytes.
     * @return body bytes
    public byte[] bodyAsBytes();

   * A Key Value tuple.
  public interface KeyVal {

     * Update the key of a keyval
     * @param key
     *            new key
     * @return this KeyVal, for chaining
    public KeyVal key(String key);

     * Get the key of a keyval
     * @return the key
    public String key();

     * Update the value of a keyval
     * @param value
     *            the new value
     * @return this KeyVal, for chaining
    public KeyVal value(String value);

     * Get the value of a keyval
     * @return the value
    public String value();

final class Validate {

  private Validate() {

   * Validates that the obect is not null
   * @param obj
   *            object to test
  public static void notNull(Object obj) {
    if (obj == null)
      throw new IllegalArgumentException("Object must not be null");

   * Validates that the object is not null
   * @param obj
   *            object to test
   * @param msg
   *            message to output if validation fails
  public static void notNull(Object obj, String msg) {
    if (obj == null)
      throw new IllegalArgumentException(msg);

   * Validates that the value is true
   * @param val
   *            object to test
  public static void isTrue(boolean val) {
    if (!val)
      throw new IllegalArgumentException("Must be true");

   * Validates that the value is true
   * @param val
   *            object to test
   * @param msg
   *            message to output if validation fails
  public static void isTrue(boolean val, String msg) {
    if (!val)
      throw new IllegalArgumentException(msg);

   * Validates that the array contains no null elements
   * @param objects
   *            the array to test
  public static void noNullElements(Object[] objects) {
    noNullElements(objects, "Array must not contain any null objects");

   * Validates that the array contains no null elements
   * @param objects
   *            the array to test
   * @param msg
   *            message to output if validation fails
  public static void noNullElements(Object[] objects, String msg) {
    for (Object obj : objects)
      if (obj == null)
        throw new IllegalArgumentException(msg);

   * Validates that the string is not empty
   * @param string
   *            the string to test
  public static void notEmpty(String string) {
    if (string == null || string.length() == 0)
      throw new IllegalArgumentException("String must not be empty");

   * Validates that the string is not empty
   * @param string
   *            the string to test
   * @param msg
   *            message to output if validation fails
  public static void notEmpty(String string, String msg) {
    if (string == null || string.length() == 0)
      throw new IllegalArgumentException(msg);

 * Internal static utilities for handling data.
class DataUtil {
  private static final Pattern charsetPattern = Pattern
  static final String defaultCharset = "UTF-8"; // used if not found in header
                          // or meta charset
  private static final int bufferSize = 0x20000; // ~130K.

  private DataUtil() {

   * Loads a file to a Document.
   * @param in
   *            file to load
   * @param charsetName
   *            character set of input
   * @param baseUri
   *            base URI of document, to resolve relative links against
   * @return Document
   * @throws IOException
   *             on IO error

  static ByteBuffer readToByteBuffer(InputStream inStream) throws IOException {
    byte[] buffer = new byte[bufferSize];
    ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize);
    int read;
    while (true) {
      read =;
      if (read == -1)
      outStream.write(buffer, 0, read);
    ByteBuffer byteData = ByteBuffer.wrap(outStream.toByteArray());
    return byteData;

   * Parse out a charset from a content type header.
   * @param contentType
   *            e.g. "text/html; charset=EUC-JP"
   * @return "EUC-JP", or null if not found. Charset is trimmed and
   *         uppercased.
  static String getCharsetFromContentType(String contentType) {
    if (contentType == null)
      return null;

    Matcher m = charsetPattern.matcher(contentType);
    if (m.find()) {
    return null;


 * A character queue with parsing helpers.
 * @author Jonathan Hedley
class TokenQueue {
    private String queue;
    private int pos = 0;
    private static final char ESC = '\\'; // escape char for chomp balanced.

     Create a new TokenQueue.
     @param data string of data to back queue.
    public TokenQueue(String data) {
        queue = data;

     * Is the queue empty?
     * @return true if no data left in queue.
    public boolean isEmpty() {
        return remainingLength() == 0;
    private int remainingLength() {
        return queue.length() - pos;

     * Retrieves but does not remove the first character from the queue.
     * @return First character, or 0 if empty.
    public char peek() {
        return isEmpty() ? 0 : queue.charAt(pos);

     Add a character to the start of the queue (will be the next character retrieved).
     @param c character to add
    public void addFirst(Character c) {

     Add a string to the start of the queue.
     @param seq string to add.
    public void addFirst(String seq) {
        // not very performant, but an edge case
        queue = seq + queue.substring(pos);
        pos = 0;

     * Tests if the next characters on the queue match the sequence. Case insensitive.
     * @param seq String to check queue for.
     * @return true if the next characters match.
    public boolean matches(String seq) {
        return queue.regionMatches(true, pos, seq, 0, seq.length());

     * Case sensitive match test.
     * @param seq
     * @return
    public boolean matchesCS(String seq) {
        return queue.startsWith(seq, pos);

     Tests if the next characters match any of the sequences. Case insensitive.
     @param seq
    public boolean matchesAny(String... seq) {
        for (String s : seq) {
            if (matches(s))
                return true;
        return false;

    public boolean matchesAny(char... seq) {
        if (isEmpty())
            return false;

        for (char c: seq) {
            if (queue.charAt(pos) == c)
                return true;
        return false;

    public boolean matchesStartTag() {
        // micro opt for matching "<x"
        return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character.isLetter(queue.charAt(pos+1)));

     * Tests if the queue matches the sequence (as with match), and if they do, removes the matched string from the
     * queue.
     * @param seq String to search for, and if found, remove from queue.
     * @return true if found and removed, false if not found.
    public boolean matchChomp(String seq) {
        if (matches(seq)) {
            pos += seq.length();
            return true;
        } else {
            return false;

     Tests if queue starts with a whitespace character.
     @return if starts with whitespace
    public boolean matchesWhitespace() {
        return !isEmpty() && Character.isWhitespace(queue.charAt(pos));

     Test if the queue matches a word character (letter or digit).
     @return if matches a word character
    public boolean matchesWord() {
        return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos));

     * Drops the next character off the queue.
    public void advance() {
        if (!isEmpty()) pos++;

     * Consume one character off queue.
     * @return first character on queue.
    public char consume() {
        return queue.charAt(pos++);

     * Consumes the supplied sequence of the queue. If the queue does not start with the supplied sequence, will
     * throw an illegal state exception -- but you should be running match() against that condition.
     Case insensitive.
     * @param seq sequence to remove from head of queue.
    public void consume(String seq) {
        if (!matches(seq))
            throw new IllegalStateException("Queue did not match expected sequence");
        int len = seq.length();
        if (len > remainingLength())
            throw new IllegalStateException("Queue not long enough to consume sequence");
        pos += len;

     * Pulls a string off the queue, up to but exclusive of the match sequence, or to the queue running out.
     * @param seq String to end on (and not include in return, but leave on queue). <b>Case sensitive.</b>
     * @return The matched data consumed from queue.
    public String consumeTo(String seq) {
        int offset = queue.indexOf(seq, pos);
        if (offset != -1) {
            String consumed = queue.substring(pos, offset);
            pos += consumed.length();
            return consumed;
        } else {
            return remainder();
    public String consumeToIgnoreCase(String seq) {
        int start = pos;
        String first = seq.substring(0, 1);
        boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if first is not cased, use index of
        while (!isEmpty()) {
            if (matches(seq))
            if (canScan) {
                int skip = queue.indexOf(first, pos) - pos;
                if (skip == 0) // this char is the skip char, but not match, so force advance of pos
                else if (skip < 0) // no chance of finding, grab to end
                    pos = queue.length();
                    pos += skip;

        String data = queue.substring(start, pos); 
        return data; 

     Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue.
     @param seq any number of terminators to consume to. <b>Case insensitive.</b>
     @return consumed string   
    // todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this
    // is is a case sensitive time...
    public String consumeToAny(String... seq) {
        int start = pos;
        while (!isEmpty() && !matchesAny(seq)) {

        String data = queue.substring(start, pos); 
        return data; 

     * Pulls a string off the queue (like consumeTo), and then pulls off the matched string (but does not return it).
     * <p>
     * If the queue runs out of characters before finding the seq, will return as much as it can (and queue will go
     * isEmpty() == true).
     * @param seq String to match up to, and not include in return, and to pull off queue. <b>Case sensitive.</b>
     * @return Data matched from queue.
    public String chompTo(String seq) {
        String data = consumeTo(seq);
        return data;
    public String chompToIgnoreCase(String seq) {
        String data = consumeToIgnoreCase(seq); // case insensitive scan
        return data;

     * Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
     * and leave " four" on the queue. Unbalanced openers and closers can be escaped (with \). Those escapes will be left
     * in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
     * contains text strings; use unescape for that.
     * @param open opener
     * @param close closer
     * @return data matched from the queue
    public String chompBalanced(char open, char close) {
        StringBuilder accum = new StringBuilder();
        int depth = 0;
        char last = 0;

        do {
            if (isEmpty()) break;
            Character c = consume();
            if (last == 0 || last != ESC) {
                if (c.equals(open))
                else if (c.equals(close))

            if (depth > 0 && last != 0)
                accum.append(c); // don't include the outer match pair in the return
            last = c;
        } while (depth > 0);
        return accum.toString();
     * Unescaped a \ escaped string.
     * @param in backslash escaped string
     * @return unescaped string
    public static String unescape(String in) {
        StringBuilder out = new StringBuilder();
        char last = 0;
        for (char c : in.toCharArray()) {
            if (c == ESC) {
                if (last != 0 && last == ESC)
            last = c;
        return out.toString();

     * Pulls the next run of whitespace characters of the queue.
    public boolean consumeWhitespace() {
        boolean seen = false;
        while (matchesWhitespace()) {
            seen = true;
        return seen;

     * Retrieves the next run of word type (letter or digit) off the queue.
     * @return String of word characters from queue, or empty string if none.
    public String consumeWord() {
        int start = pos;
        while (matchesWord())
        return queue.substring(start, pos);
     * Consume an tag name off the queue (word or :, _, -)
     * @return tag name
    public String consumeTagName() {
        int start = pos;
        while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-')))
        return queue.substring(start, pos);
     * Consume a CSS element selector (tag name, but | instead of : for namespaces, to not conflict with :pseudo selects).
     * @return tag name
    public String consumeElementSelector() {
        int start = pos;
        while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-')))
        return queue.substring(start, pos);

     Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
     @return identifier
    public String consumeCssIdentifier() {
        int start = pos;
        while (!isEmpty() && (matchesWord() || matchesAny('-', '_')))

        return queue.substring(start, pos);

     Consume an attribute key off the queue (letter, digit, -, _, :")
     @return attribute key
    public String consumeAttributeKey() {
        int start = pos;
        while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':')))
        return queue.substring(start, pos);

     Consume and return whatever is left on the queue.
     @return remained of queue.
    public String remainder() {
        StringBuilder accum = new StringBuilder();
        while (!isEmpty()) {
        return accum.toString();
    public String toString() {
        return queue.substring(pos);


