Calculates the number of bytes needed to encode a string in UTF-8. - Java java.lang

Java examples for java.lang:String UTF

Description

Calculates the number of bytes needed to encode a string in UTF-8.

Demo Code

/*/*w  w w  . j ava 2s .c  o  m*/
 Written in 2013 by Peter O.
 Any copyright is dedicated to the Public Domain.
 http://creativecommons.org/publicdomain/zero/1.0/
 If you like this, you should donate to Peter O.
 at: http://upokecenter.dreamhosters.com/articles/donate-now-2/
 */
//package com.java2s;

public class Main {
    /**
     * Calculates the number of bytes needed to encode a string in UTF-8.
     * @param str A string object.
     * @param replace If true, treats unpaired surrogate code points as having 3
     * UTF-8 bytes (the UTF-8 length of the replacement character U + FFFD).
     * @return The number of bytes needed to encode the given string in UTF-8, or
     * -1 if the string contains an unpaired surrogate code point and {@code
     * replace} is false.
     * @throws NullPointerException The parameter {@code str} is null.
     */
    public static long GetUtf8Length(String str, boolean replace) {
        if (str == null) {
            throw new NullPointerException("str");
        }
        long size = 0;
        for (int i = 0; i < str.length(); ++i) {
            int c = str.charAt(i);
            if (c <= 0x7f) {
                ++size;
            } else if (c <= 0x7ff) {
                size += 2;
            } else if (c <= 0xd7ff || c >= 0xe000) {
                size += 3;
            } else if (c <= 0xdbff) { // UTF-16 leading surrogate
                ++i;
                if (i >= str.length() || str.charAt(i) < 0xdc00
                        || str.charAt(i) > 0xdfff) {
                    if (replace) {
                        size += 3;
                        --i;
                    } else {
                        return -1;
                    }
                } else {
                    size += 4;
                }
            } else {
                if (replace) {
                    size += 3;
                } else {
                    return -1;
                }
            }
        }
        return size;
    }
}

Related Tutorials