Java String Truncate truncateStringToUtf8(final String original, final int maxBytes)

Description

Truncates the specified string to fit in the specified maximum number of UTF-8 bytes.

License

Open Source License

Parameter

Parameter	Description
original	The original string.
maxBytes	The maximum number of UTF-8 bytes available to store the string.

Return

If the string doesn't overflow the number of specified bytes, then the original string is returned, otherwise the string is truncated to the number of bytes available to encode

Declaration

public static String truncateStringToUtf8(final String original, final int maxBytes)

Method Source Code

//package com.java2s;
/*****************************************************************************
 * /*from www.j a  va2 s  .com*/
 * Copyright (C) Zenoss, Inc. 2010, all rights reserved.
 * 
 * This content is made available according to terms specified in
 * License.zenoss under the directory where your Zenoss product is installed.
 * 
 ****************************************************************************/

public class Main {
    /**
     * Truncates the specified string to fit in the specified maximum number of
     * UTF-8 bytes. This method will not split strings in the middle of
     * surrogate pairs.
     * 
     * @param original
     *            The original string.
     * @param maxBytes
     *            The maximum number of UTF-8 bytes available to store the
     *            string.
     * @return If the string doesn't overflow the number of specified bytes,
     *         then the original string is returned, otherwise the string is
     *         truncated to the number of bytes available to encode
     */
    public static String truncateStringToUtf8(final String original, final int maxBytes) {
        final int length = original.length();
        int newLength = 0;
        int currentBytes = 0;
        while (newLength < length) {
            final char c = original.charAt(newLength);
            boolean isSurrogate = false;
            if (c <= 0x7f) {
                ++currentBytes;
            } else if (c <= 0x7FF) {
                currentBytes += 2;
            } else if (c <= Character.MAX_HIGH_SURROGATE) {
                currentBytes += 4;
                isSurrogate = true;
            } else if (c <= 0xFFFF) {
                currentBytes += 3;
            } else {
                currentBytes += 4;
            }
            if (currentBytes > maxBytes) {
                break;
            }
            if (isSurrogate) {
                newLength += 2;
            } else {
                ++newLength;
            }
        }
        return (newLength == length) ? original : original.substring(0, newLength);
    }
}