gribbit.http.utils.UTF8.java Source code

Java tutorial

Introduction

Here is the source code for gribbit.http.utils.UTF8.java

Source

/**
 * This file is part of the Gribbit Web Framework.
 * 
 *     https://github.com/lukehutch/gribbit
 *     
 * Originally from:
 * 
 *     https://github.com/webbit/webbit/blob/master/src/main/java/org/webbitserver/helpers/UTF8Output.java
 * 
 * Which is an adaptation of:
 * 
 *     http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
 * 
 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
 * associated documentation files (the "Software"), to deal in the Software without restriction,
 * including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all copies or substantial
 * portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package gribbit.http.utils;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.ChannelHandlerContext;

import java.io.UnsupportedEncodingException;

public class UTF8 {
    private static final int UTF8_ACCEPT = 0;
    private static final int UTF8_REJECT = 12;

    private static final byte[] TYPES = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7,
            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2,
            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };

    private static final byte[] STATES = { 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12,
            12, 12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12,
            24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12,
            12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12,
            36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 };

    private int state = UTF8_ACCEPT;
    private int codep = 0;

    private final StringBuilder stringBuilder = new StringBuilder();

    public class UTF8Exception extends UnsupportedEncodingException {
        private static final long serialVersionUID = 1L;

        public UTF8Exception(String reason) {
            super(reason);
        }

        public UTF8Exception(Exception e) {
            this(e.getMessage());
        }
    }

    public void append(byte[] bytes) throws UTF8Exception {
        for (int i = 0; i < bytes.length; i++) {
            append(bytes[i]);
        }
    }

    public void append(int b) throws UTF8Exception {
        byte type = TYPES[b & 0xFF];

        codep = (state != UTF8_ACCEPT) ? (b & 0x3f) | (codep << 6) : (0xff >> type) & (b);

        state = STATES[state + type];

        if (state == UTF8_ACCEPT) {
            // See http://goo.gl/JdIVSu
            if (codep < Character.MIN_HIGH_SURROGATE) {
                stringBuilder.append((char) codep);
            } else {
                for (char c : Character.toChars(codep)) {
                    stringBuilder.append(c);
                }
            }
        } else if (state == UTF8_REJECT) {
            throw new UTF8Exception("bytes are not UTF-8");
        }
    }

    public String getStringAndRecycle() throws UTF8Exception {
        if (state == UTF8_ACCEPT) {
            String string = stringBuilder.toString();
            stringBuilder.setLength(0);
            return string;
        } else {
            throw new UTF8Exception("bytes are not UTF-8");
        }
    }

    public static String utf8ToString(byte[] bytes) throws UTF8Exception {
        UTF8 decoder = new UTF8();
        decoder.append(bytes);
        return decoder.getStringAndRecycle();
    }

    public static byte[] stringToUTF8(String str) {
        try {
            return str.getBytes("UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    public static ByteBuf stringToUTF8ByteBuf(String str) {
        ByteBuf byteBuf = Unpooled.buffer(str.length() * 2);
        byteBuf.writeBytes(stringToUTF8(str));
        return byteBuf;
    }

    public static ByteBuf stringToUTF8ByteBuf(String str, ChannelHandlerContext ctx) {
        ByteBuf byteBuf = ctx.alloc().buffer(str.length() * 2);
        byteBuf.writeBytes(stringToUTF8(str));
        return byteBuf;
    }
}