Android String Sub String Get unicodePreservingSubstring(String str, int begin)

Description

Equivalent to:

 #unicodePreservingSubstring(String,int,int) ( str, begin, str.length())

License

Apache License

Declaration

public static String unicodePreservingSubstring(String str, int begin)

Method Source Code

//package com.java2s;
/**//from  ww  w  . j  a v  a  2 s .  c o m
 * Copyright (c) 2000, Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

public class Main {
    /**
     * Returns a substring of {@code str} that respects Unicode character
     * boundaries.
     *
     * <p>The string will never be split between a [high, low] surrogate pair,
     * as defined by {@link Character#isHighSurrogate} and
     * {@link Character#isLowSurrogate}.
     *
     * <p>If {@code begin} or {@code end} are the low surrogate of a unicode
     * character, it will be offset by -1.
     *
     * <p>This behavior guarantees that
     * {@code str.equals(StringUtil.unicodePreservingSubstring(str, 0, n) +
     *     StringUtil.unicodePreservingSubstring(str, n, str.length())) } is
     * true for all {@code n}.
     * </pre>
     *
     * <p>This means that unlike {@link String#substring(int, int)}, the length of
     * the returned substring may not necessarily be equivalent to
     * {@code end - begin}.
     *
     * @param str the original String
     * @param begin the beginning index, inclusive
     * @param end the ending index, exclusive
     * @return the specified substring, possibly adjusted in order to not
     *   split unicode surrogate pairs
     * @throws IndexOutOfBoundsException if the {@code begin} is negative,
     *   or {@code end} is larger than the length of {@code str}, or
     *   {@code begin} is larger than {@code end}
     */
    public static String unicodePreservingSubstring(String str, int begin,
            int end) {
        return str.substring(unicodePreservingIndex(str, begin),
                unicodePreservingIndex(str, end));
    }

    /**
     * Equivalent to:
     *
     * <pre>
     * {@link #unicodePreservingSubstring(String, int, int)}(
     *     str, begin, str.length())
     * </pre>
     */
    public static String unicodePreservingSubstring(String str, int begin) {
        return unicodePreservingSubstring(str, begin, str.length());
    }

    /**
     * Normalizes {@code index} such that it respects Unicode character
     * boundaries in {@code str}.
     *
     * <p>If {@code index} is the low surrogate of a unicode character,
     * the method returns {@code index - 1}. Otherwise, {@code index} is
     * returned.
     *
     * <p>In the case in which {@code index} falls in an invalid surrogate pair
     * (e.g. consecutive low surrogates, consecutive high surrogates), or if
     * if it is not a valid index into {@code str}, the original value of
     * {@code index} is returned.
     *
     * @param str the String
     * @param index the index to be normalized
     * @return a normalized index that does not split a Unicode character
     */
    public static int unicodePreservingIndex(String str, int index) {
        if (index > 0 && index < str.length()) {
            if (Character.isHighSurrogate(str.charAt(index - 1))
                    && Character.isLowSurrogate(str.charAt(index))) {
                return index - 1;
            }
        }
        return index;
    }
}

Android String Sub String Get unicodePreservingSubstring(String str, int begin)

Description

License

Declaration

Method Source Code

Related