package org.apache.tajo.util;

import io.netty.buffer.ByteBuf;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static io.netty.util.internal.StringUtil.isSurrogate;

 * Extra utilities for bytes
public class BytesUtils {

     * Parse the first byte of a vint/vlong to determine the number of bytes
     * @param value the first byte of the vint/vlong
     * @return the total number of bytes (1 to 9)
    public static int decodeVIntSize(byte value) {
        if (value >= -112) {
            return 1;
        } else if (value < -120) {
            return -119 - value;
        return -111 - value;

     * @param n Long to make a VLong of.
     * @return VLong as bytes array.
    public static byte[] vlongToBytes(long n) {
        byte[] result;
        int offset = 0;
        if (n >= -112 && n <= 127) {
            result = new byte[1];
            result[offset] = (byte) n;
            return result;

        int len = -112;
        if (n < 0) {
            n ^= -1L; // take one's complement'
            len = -120;

        long tmp = n;
        while (tmp != 0) {
            tmp = tmp >> 8;

        int size = decodeVIntSize((byte) len);

        result = new byte[size];
        result[offset++] = (byte) len;
        len = (len < -120) ? -(len + 120) : -(len + 112);

        for (int idx = len; idx != 0; idx--) {
            int shiftbits = (idx - 1) * 8;
            long mask = 0xFFL << shiftbits;
            result[offset++] = (byte) ((n & mask) >> shiftbits);
        return result;

    public static void writeVLong(ByteArrayOutputStream byteStream, long l) {
        byte[] vLongBytes = vlongToBytes(l);
        byteStream.write(vLongBytes, 0, vLongBytes.length);

     * Converts a char array to a ascii byte array.
     * @param chars string
     * @return the byte array
    static byte[] toASCIIBytes(char[] chars) {
        byte[] buffer = new byte[chars.length];
        for (int i = 0; i < chars.length; i++) {
            buffer[i] = (byte) chars[i];
        return buffer;

    public static byte[][] splitPreserveAllTokens(byte[] str, char separatorChar, int[] target, int numColumns) {
        return splitWorker(str, 0, -1, separatorChar, target, numColumns);

    public static byte[][] splitPreserveAllTokens(byte[] str, int offset, int length, byte[] separator,
            int[] target, int numColumns) {
        return splitWorker(str, offset, length, separator, target, numColumns);

    public static byte[][] splitPreserveAllTokens(byte[] str, char separatorChar, int numColumns) {
        return splitWorker(str, 0, -1, separatorChar, null, numColumns);

    private static byte[][] splitWorker(byte[] str, int offset, int length, char separatorChar, int[] target,
            int numColumns) {
        return splitWorker(str, offset, length, new byte[] { (byte) separatorChar }, target, numColumns);

     * Performs the logic for the <code>split</code> and
     * <code>splitPreserveAllTokens</code> methods that do not return a
     * maximum array length.
     * @param str  the String to parse, may be <code>null</code>
     * @param length amount of bytes to str
     * @param separator the ascii separate characters
     * @param target the projection target
     * @param numColumns number of columns to be retrieved              
     * @return an array of parsed Strings, <code>null</code> if null String input
    private static byte[][] splitWorker(byte[] str, int offset, int length, byte[] separator, int[] target,
            int numColumns) {
        if (str == null) {
            return null;
        if (length == 0) {
            return new byte[numColumns][0];
        if (length < 0) {
            length = str.length - offset;
        int indexMax = 0;
        if (target != null) {
            for (int index : target) {
                indexMax = Math.max(indexMax, index + 1);
        } else {
            indexMax = numColumns;

        int[][] indices = split(str, offset, length, separator, new int[indexMax][]);
        byte[][] result = new byte[numColumns][];

        // not-picked -> null, picked but not-exists -> byte[0]
        if (target != null) {
            for (int i : target) {
                int[] index = indices[i];
                result[i] = index == null ? new byte[0] : Arrays.copyOfRange(str, index[0], index[1]);
        } else {
            for (int i = 0; i < result.length; i++) {
                int[] index = indices[i];
                result[i] = index == null ? new byte[0] : Arrays.copyOfRange(str, index[0], index[1]);
        return result;

    public static int[][] split(byte[] str, int offset, int length, byte[] separator, int[][] indices) {
        if (indices.length == 0) {
            return indices; // trivial
        final int limit = offset + length;

        int start = offset;
        int colIndex = 0;
        for (int index = offset; index < limit;) {
            if (onDelimiter(str, index, limit, separator)) {
                indices[colIndex++] = new int[] { start, index };
                if (colIndex >= indices.length) {
                    return indices;
                index += separator.length;
                start = index;
            } else {
        if (colIndex < indices.length) {
            indices[colIndex] = new int[] { start, limit };
        return indices;

    private static boolean onDelimiter(byte[] input, int offset, int limit, byte[] delimiter) {
        for (int i = 0; i < delimiter.length; i++) {
            if (offset + i >= limit || input[offset + i] != delimiter[i]) {
                return false;
        return true;

    public static byte[][] splitTrivial(byte[] value, byte delimiter) {
        List<byte[]> split = new ArrayList<>();
        int prev = 0;
        for (int i = 0; i < value.length; i++) {
            if (value[i] == delimiter) {
                split.add(Arrays.copyOfRange(value, prev, i));
                prev = i + 1;
        if (prev <= value.length) {
            split.add(Arrays.copyOfRange(value, prev, value.length));
        return split.toArray(new byte[split.size()][]);

     * It gets the maximum length among all given the array of bytes.
     * Then, it adds padding (i.e., \0) to byte arrays which are shorter
     * than the maximum length.
     * @param bytes Byte arrays to be padded
     * @return The array of padded bytes
    public static byte[][] padBytes(byte[]... bytes) {
        byte[][] padded = new byte[bytes.length][];

        int maxLen = Integer.MIN_VALUE;

        for (byte[] aByte : bytes) {
            maxLen = Math.max(maxLen, aByte.length);

        for (int i = 0; i < bytes.length; i++) {
            int padLen = maxLen - bytes[i].length;
            if (padLen == 0) {
                padded[i] = bytes[i];
            } else if (padLen > 0) {
                padded[i] = Bytes.padTail(bytes[i], padLen);
            } else {
                throw new RuntimeException(
                        "maximum length: " + maxLen + ", bytes[" + i + "].length:" + bytes[i].length);

        return padded;

    public static byte[] trimBytes(byte[] bytes) {
        return new String(bytes).trim().getBytes();

     * this is an implementation copied from ByteBufUtil in netty4
    public static int writeUtf8(ByteBuf buffer, char[] chars, boolean ignoreSurrogate) {
        int oldWriterIndex = buffer.writerIndex();
        int writerIndex = oldWriterIndex;

        // We can use the _set methods as these not need to do any index checks and reference checks.
        // This is possible as we called ensureWritable(...) before.
        for (int i = 0; i < chars.length; i++) {
            char c = chars[i];
            if (c < 0x80) {
                buffer.setByte(writerIndex++, (byte) c);
            } else if (c < 0x800) {
                buffer.setByte(writerIndex++, (byte) (0xc0 | (c >> 6)));
                buffer.setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
            } else if (!ignoreSurrogate && isSurrogate(c)) {
                if (!Character.isHighSurrogate(c)) {
                    throw new IllegalArgumentException("Invalid encoding. "
                            + "Expected high (leading) surrogate at index " + i + " but got " + c);
                final char c2;
                try {
                    // Surrogate Pair consumes 2 characters. Optimistically try to get the next character to avoid
                    // duplicate bounds checking with charAt. If an IndexOutOfBoundsException is thrown we will
                    // re-throw a more informative exception describing the problem.
                    c2 = chars[++i];
                } catch (IndexOutOfBoundsException e) {
                    throw new IllegalArgumentException("Underflow. " + "Expected low (trailing) surrogate at index "
                            + i + " but no more characters found.", e);
                if (!Character.isLowSurrogate(c2)) {
                    throw new IllegalArgumentException("Invalid encoding. "
                            + "Expected low (trailing) surrogate at index " + i + " but got " + c2);
                int codePoint = Character.toCodePoint(c, c2);
                // See
                buffer.setByte(writerIndex++, (byte) (0xf0 | (codePoint >> 18)));
                buffer.setByte(writerIndex++, (byte) (0x80 | ((codePoint >> 12) & 0x3f)));
                buffer.setByte(writerIndex++, (byte) (0x80 | ((codePoint >> 6) & 0x3f)));
                buffer.setByte(writerIndex++, (byte) (0x80 | (codePoint & 0x3f)));
            } else {
                buffer.setByte(writerIndex++, (byte) (0xe0 | (c >> 12)));
                buffer.setByte(writerIndex++, (byte) (0x80 | ((c >> 6) & 0x3f)));
                buffer.setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
        // update the writerIndex without any extra checks for performance reasons
        return writerIndex - oldWriterIndex;