Here you can find the source of unicodePreservingSubstring(String str, int begin)
#unicodePreservingSubstring(String,int,int) ( str, begin, str.length())
public static String unicodePreservingSubstring(String str, int begin)
//package com.java2s; /**//from ww w . j a v a 2 s . c o m * Copyright (c) 2000, Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class Main { /** * Returns a substring of {@code str} that respects Unicode character * boundaries. * * <p>The string will never be split between a [high, low] surrogate pair, * as defined by {@link Character#isHighSurrogate} and * {@link Character#isLowSurrogate}. * * <p>If {@code begin} or {@code end} are the low surrogate of a unicode * character, it will be offset by -1. * * <p>This behavior guarantees that * {@code str.equals(StringUtil.unicodePreservingSubstring(str, 0, n) + * StringUtil.unicodePreservingSubstring(str, n, str.length())) } is * true for all {@code n}. * </pre> * * <p>This means that unlike {@link String#substring(int, int)}, the length of * the returned substring may not necessarily be equivalent to * {@code end - begin}. * * @param str the original String * @param begin the beginning index, inclusive * @param end the ending index, exclusive * @return the specified substring, possibly adjusted in order to not * split unicode surrogate pairs * @throws IndexOutOfBoundsException if the {@code begin} is negative, * or {@code end} is larger than the length of {@code str}, or * {@code begin} is larger than {@code end} */ public static String unicodePreservingSubstring(String str, int begin, int end) { return str.substring(unicodePreservingIndex(str, begin), unicodePreservingIndex(str, end)); } /** * Equivalent to: * * <pre> * {@link #unicodePreservingSubstring(String, int, int)}( * str, begin, str.length()) * </pre> */ public static String unicodePreservingSubstring(String str, int begin) { return unicodePreservingSubstring(str, begin, str.length()); } /** * Normalizes {@code index} such that it respects Unicode character * boundaries in {@code str}. * * <p>If {@code index} is the low surrogate of a unicode character, * the method returns {@code index - 1}. Otherwise, {@code index} is * returned. * * <p>In the case in which {@code index} falls in an invalid surrogate pair * (e.g. consecutive low surrogates, consecutive high surrogates), or if * if it is not a valid index into {@code str}, the original value of * {@code index} is returned. * * @param str the String * @param index the index to be normalized * @return a normalized index that does not split a Unicode character */ public static int unicodePreservingIndex(String str, int index) { if (index > 0 && index < str.length()) { if (Character.isHighSurrogate(str.charAt(index - 1)) && Character.isLowSurrogate(str.charAt(index))) { return index - 1; } } return index; } }