Java examples for java.lang:String UTF
Returns the number of code points in this UTF8 sequence.
/*/*from www.j a v a 2s . c o m*/ * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class Main{ /** * Returns the number of code points in this UTF8 sequence. * * <p>This method assumes valid UTF8 input. This method * <strong>does not perform</strong> full UTF8 validation, it will check only the * first byte of each codepoint (for multi-byte sequences any bytes after * the head are skipped). * * @throws IllegalArgumentException If invalid codepoint header byte occurs or the * content is prematurely truncated. */ public static int codePointCount(BytesRef utf8) { int pos = utf8.offset; final int limit = pos + utf8.length; final byte[] bytes = utf8.bytes; int codePointCount = 0; for (; pos < limit; codePointCount++) { int v = bytes[pos] & 0xFF; if (v < /* 0xxx xxxx */0x80) { pos += 1; continue; } if (v >= /* 110x xxxx */0xc0) { if (v < /* 111x xxxx */0xe0) { pos += 2; continue; } if (v < /* 1111 xxxx */0xf0) { pos += 3; continue; } if (v < /* 1111 1xxx */0xf8) { pos += 4; continue; } // fallthrough, consider 5 and 6 byte sequences invalid. } // Anything not covered above is invalid UTF8. throw new IllegalArgumentException(); } // Check if we didn't go over the limit on the last character. if (pos > limit) throw new IllegalArgumentException(); return codePointCount; } }