Java UTF from toUTFBody(String str)

Here you can find the source of toUTFBody(String str)

Description

to UTF Body

License

Open Source License

Declaration

public static String toUTFBody(String str) throws IOException 

Method Source Code

//package com.java2s;
//License from project: Open Source License 

import java.io.IOException;

import java.io.UTFDataFormatException;

public class Main {
    public static String toUTFBody(String str) throws IOException {
        int strlen = str.length();
        int utflen = 0;
        int c, count = 0;

        /* use charAt instead of copying String to char array */
        for (int i = 0; i < strlen; i++) {
            c = str.charAt(i);// w  ww  .j a  v  a2  s. co m
            if ((c >= 0x0001) && (c <= 0x007F)) {
                utflen++;
            } else if (c > 0x07FF) {
                utflen += 3;
            } else {
                utflen += 2;
            }
        }

        if (utflen > 65535)
            throw new UTFDataFormatException("encoded string too long: " + utflen + " bytes");

        byte[] bytearr = new byte[utflen];

        // bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
        // bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);

        int i = 0;
        for (i = 0; i < strlen; i++) {
            c = str.charAt(i);
            if (!((c >= 0x0001) && (c <= 0x007F)))
                break;
            bytearr[count++] = (byte) c;
        }

        for (; i < strlen; i++) {
            c = str.charAt(i);
            if ((c >= 0x0001) && (c <= 0x007F)) {
                bytearr[count++] = (byte) c;

            } else if (c > 0x07FF) {
                bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
                bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
            } else {
                bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
                bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
            }
        }
        // out.write(bytearr, 0, utflen + 2);
        // return utflen + 2;
        return new String(bytearr, "UTF-8");
    }
}

Related

  1. toUtf8Path(String path)
  2. toUTF8String(byte[] b, int offset, int length)
  3. toUtf8String(String source)
  4. toUTF8String(String str)
  5. toUTF_8(String s)
  6. toUtfBytes(final String s)
  7. toUtfString(char[] encoded)