Java examples for java.lang:String UTF
Calculates the number of bytes needed to encode a string in UTF-8.
/*/*w w w . j ava 2s .c o m*/ Written in 2013 by Peter O. Any copyright is dedicated to the Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ If you like this, you should donate to Peter O. at: http://upokecenter.dreamhosters.com/articles/donate-now-2/ */ //package com.java2s; public class Main { /** * Calculates the number of bytes needed to encode a string in UTF-8. * @param str A string object. * @param replace If true, treats unpaired surrogate code points as having 3 * UTF-8 bytes (the UTF-8 length of the replacement character U + FFFD). * @return The number of bytes needed to encode the given string in UTF-8, or * -1 if the string contains an unpaired surrogate code point and {@code * replace} is false. * @throws NullPointerException The parameter {@code str} is null. */ public static long GetUtf8Length(String str, boolean replace) { if (str == null) { throw new NullPointerException("str"); } long size = 0; for (int i = 0; i < str.length(); ++i) { int c = str.charAt(i); if (c <= 0x7f) { ++size; } else if (c <= 0x7ff) { size += 2; } else if (c <= 0xd7ff || c >= 0xe000) { size += 3; } else if (c <= 0xdbff) { // UTF-16 leading surrogate ++i; if (i >= str.length() || str.charAt(i) < 0xdc00 || str.charAt(i) > 0xdfff) { if (replace) { size += 3; --i; } else { return -1; } } else { size += 4; } } else { if (replace) { size += 3; } else { return -1; } } } return size; } }