Returns {@code true} if the specified character sequence is a valid sequence of UTF-16 char values.
/*
* LingPipe v. 3.9
* Copyright (C) 2003-2010 Alias-i
*
* This program is licensed under the Alias-i Royalty Free License
* Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i
* Royalty Free License Version 1 for more details.
*
* You should have received a copy of the Alias-i Royalty Free License
* Version 1 along with this program; if not, visit
* http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
* Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
* +1 (718) 290-9170.
*/
//package com.aliasi.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.DecimalFormat;
/**
* Static utility methods for processing strings, characters and
* string buffers.
*
* @author Bob Carpenter
* @version 4.0.1
* @since LingPipe1.0
* @see java.lang.Character
* @see java.lang.String
* @see java.lang.StringBuilder
*/
public class Strings {
/**
* Returns {@code true} if the specified character sequence is a
* valid sequence of UTF-16 {@code char} values. A sequence is
* legal if each high surrogate {@code char} value is followed by
* a low surrogate value (as defined by {@link
* Character#isHighSurrogate(char)} and {@link
* Character#isLowSurrogate(char)}).
*
* <p>This method does <b>not</b> check to see if the sequence of
* code points defined by the UTF-16 consists only of code points
* defined in the latest Unicode standard. The method only tests
* the validity of the UTF-16 encoding sequence.
*
* @param cs Character sequence to test.
* @return {@code true} if the sequence of characters is
* legal in UTF-16.
*/
public static boolean isLegalUtf16(CharSequence cs) {
for (int i = 0; i < cs.length(); ++i) {
char high = cs.charAt(i);
if (Character.isLowSurrogate(high))
return false;
if (!Character.isHighSurrogate(high))
continue;
++i;
if (i >= cs.length())
return false;
char low = cs.charAt(i);
if (!Character.isLowSurrogate(low))
return false;
int codePoint = Character.toCodePoint(high,low);
if (!Character.isValidCodePoint(codePoint))
return false;
}
return true;
}
}
Related examples in the same category