Java tutorial
/* * LingPipe v. 3.9 * Copyright (C) 2003-2010 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */ //package com.aliasi.util; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.text.DecimalFormat; /** * Static utility methods for processing strings, characters and * string buffers. * * @author Bob Carpenter * @version 4.0.1 * @since LingPipe1.0 * @see java.lang.Character * @see java.lang.String * @see java.lang.StringBuilder */ public class Strings { /** * Returns {@code true} if the specified character sequence is a * valid sequence of UTF-16 {@code char} values. A sequence is * legal if each high surrogate {@code char} value is followed by * a low surrogate value (as defined by {@link * Character#isHighSurrogate(char)} and {@link * Character#isLowSurrogate(char)}). * * <p>This method does <b>not</b> check to see if the sequence of * code points defined by the UTF-16 consists only of code points * defined in the latest Unicode standard. The method only tests * the validity of the UTF-16 encoding sequence. * * @param cs Character sequence to test. * @return {@code true} if the sequence of characters is * legal in UTF-16. */ public static boolean isLegalUtf16(CharSequence cs) { for (int i = 0; i < cs.length(); ++i) { char high = cs.charAt(i); if (Character.isLowSurrogate(high)) return false; if (!Character.isHighSurrogate(high)) continue; ++i; if (i >= cs.length()) return false; char low = cs.charAt(i); if (!Character.isLowSurrogate(low)) return false; int codePoint = Character.toCodePoint(high, low); if (!Character.isValidCodePoint(codePoint)) return false; } return true; } }