Java tutorial
import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; public class Main { public static long bytesRequiredToEncode(final String s, final Charset encoding) { int ENCODE_CHUNK = 100; long count = 0; for (int i = 0; i < s.length();) { int end = i + ENCODE_CHUNK; if (end >= s.length()) { end = s.length(); } else if (Character.isHighSurrogate(s.charAt(end))) { end++; } count += encoding.encode(s.substring(i, end)).remaining() + 1; i = end; } return count; } public static void main(String[] args) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 100; i++) { sb.appendCodePoint(1064124); } Charset cs = StandardCharsets.UTF_8; System.out.println(bytesRequiredToEncode(new String(sb), cs)); System.out.println(new String(sb).getBytes(cs).length); } }