Java UTF from toUTF8ByteArray(String s)

Here you can find the source of toUTF8ByteArray(String s)

Description

convert the platform dependent string characters to UTF8 which can also be done by calling the java String method getBytes("UTF-8"),but I hope to do it from the ground up.

License

Apache License

Declaration


private static byte[] toUTF8ByteArray(String s) 

Method Source Code

//package com.java2s;
//License from project: Apache License 

public class Main {
    /** convert the platform dependent string characters to UTF8 which can 
      * also be done by calling the java String method getBytes("UTF-8"),but I
      * hope to do it from the ground up.
      *//*w  w  w .j  a  va  2 s.co m*/

    private static byte[] toUTF8ByteArray(String s) {
        int ichar;
        byte buffer[] = new byte[3 * (s.length())];
        byte hold[];
        int index = 0;
        int count = 0; //count the actual bytes in the
                       //buffer array          

        for (int i = 0; i < s.length(); i++) {
            ichar = (int) s.charAt(i);

            //determine the bytes for a specific character
            if ((ichar >= 0x0080) & (ichar <= 0x07FF)) {
                buffer[index++] = (byte) ((6 << 5) | ((ichar >> 6) & 31));
                buffer[index++] = (byte) ((2 << 6) | (ichar & 63));
                count += 2;
            }

            //determine the bytes for a specific character
            else if ((ichar >= 0x0800) & (ichar <= 0x0FFFF)) {
                buffer[index++] = (byte) ((14 << 4) | ((ichar >> 12) & 15));
                buffer[index++] = (byte) ((2 << 6) | ((ichar >> 6) & 63));
                buffer[index++] = (byte) ((2 << 6) | (ichar & 63));
                count += 3;
            }

            //determine the bytes for a specific character
            else if ((ichar >= 0x0000) & (ichar <= 0x007F)) {
                buffer[index++] = (byte) ((0 << 7) | (ichar & 127));
                count += 1;
            }

            //longer than 16 bit Unicode is not supported
            else
                throw new RuntimeException("Unsupported encoding character length!\n");
        }
        hold = new byte[count];
        System.arraycopy(buffer, 0, hold, 0, count); //trim to size
        return hold;
    }
}

Related

  1. toUTF8(String oldStr)
  2. toUtf8(String s)
  3. toUTF8(String s, String encoding)
  4. toUTF8(String str)
  5. toUtf8(String texto)
  6. toUtf8ByteArray(String source)
  7. toUTF8FromLatin1(byte[] outputBuffer, String string)
  8. toUTF8k(String in)
  9. toUtf8String(String source)