Check if the given character c is a combining character. - Java java.lang

Java examples for java.lang:char

Description

Check if the given character c is a combining character.

Demo Code

/*/*from  w w w.j a  v  a 2 s  .co m*/
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
//package com.java2s;

public class Main {
    /**
     * Check if the given character c is a combining character.
     * @param c the character to check
     * @return true of the c is a combining character
     */
    public static boolean isCombiningCharacter(final char c) {
        final int cval = (int) c;
        final int hiByte = cval >>> 8;
        switch (hiByte) {

        case 0x03: // LATIN
            return isInRange(c, 0x0300, 0x034E)
                    || isInRange(c, 0x0350, 0x0362);

        case 0x04: // CYRILLIC
            return isInRange(c, 0x0483, 0x0487);

        case 0x05: // HEBREW
            return isInRange(c, 0x0591, 0x05BD) || c == 0x05BF
                    || isInRange(c, 0x05C1, 0x05C2)
                    || isInRange(c, 0x05C4, 0x05C5) || c == 0x05C7;

        case 0x06: // ARABIC
            return isInRange(c, 0x064B, 0x0652)
                    || isInRange(c, 0x0657, 0x0658)
                    || isInRange(c, 0x06DF, 0x06E0)
                    || isInRange(c, 0x06EA, 0x06EC);

        case 0x07: //SYRIAC, THAANA, NKO
            return isInRange(c, 0x0730, 0x074A)
                    || isInRange(c, 0x07A6, 0x07B0)
                    || isInRange(c, 0x07EB, 0x07F3);

        case 0x09: // DEVANAGARI
            return isInRange(c, 0x0901, 0x0902) || c == 0x093C
                    || isInRange(c, 0x0941, 0x0948)
                    || c == 0x094D
                    || isInRange(c, 0x0951, 0x0954)
                    || isInRange(c, 0x0962, 0x0963)
                    // BENGALI
                    || c == 0x0981 || c == 0x09BC
                    || isInRange(c, 0x09C1, 0x09C4) || c == 0x09CD
                    || isInRange(c, 0x09E1, 0x09E3);

        case 0x0A: // GURMUKHI
            return isInRange(c, 0x0A01, 0x0A02) || c == 0x0A3C
                    || isInRange(c, 0x0A41, 0x0A42)
                    || isInRange(c, 0x0A47, 0x0A48)
                    || isInRange(c, 0x0A4B, 0x0A4D)
                    || c == 0x0A51
                    || isInRange(c, 0x0A70, 0x0A71)
                    || c == 0x0A75
                    // GUJARATI
                    || isInRange(c, 0x0A81, 0x0A82) || c == 0x0ABC
                    || isInRange(c, 0x0AC1, 0x0AC5)
                    || isInRange(c, 0x0AC7, 0x0AC8) || c == 0x0ACD
                    || isInRange(c, 0x0AE2, 0x0AE3);

        case 0x0B: // ORIYA
            return c == 0x0B01 || c == 0x0B3C || c == 0x0B3F
                    || isInRange(c, 0x0B41, 0x0B44) || c == 0x0B4D
                    || c == 0x0B56 || isInRange(c, 0x0B62, 0x0B63)
                    // TAMIL
                    || c == 0x0B82 || c == 0x0BC0 || c == 0x0BCD;

        case 0x0C: // TELUGU
            return isInRange(c, 0x0C3E, 0x0C40)
                    || isInRange(c, 0x0C46, 0x0C48)
                    || isInRange(c, 0x0C4A, 0x0C4D)
                    || isInRange(c, 0x0C55, 0x0C56)
                    || isInRange(c, 0x0C62, 0x0C63)
                    // KANNADA
                    || c == 0x0CBC || c == 0x0CBF || c == 0x0CC6
                    || isInRange(c, 0x0CCC, 0x0CCD)
                    || isInRange(c, 0x0CE2, 0x0CE3);

        case 0x0D: // MALAYALAM
            return isInRange(c, 0x0D41, 0x0D44) || c == 0x0D4D
                    || isInRange(c, 0x0D62, 0x0D63)
                    // SINHALA
                    || c == 0x0DCA || isInRange(c, 0x0DD2, 0x0DD4)
                    || c == 0x0DD6;

        case 0x0E: // THAI
            return c == 0x0E31
                    || isInRange(c, 0x0E34, 0x0E3A)
                    || isInRange(c, 0x0E47, 0x0E4E)
                    // LAO
                    || c == 0x0EB1 || isInRange(c, 0x0EB4, 0x0E39)
                    || isInRange(c, 0x0EBB, 0x0EBC)
                    || isInRange(c, 0x0EC8, 0x0ECD);

        case 0x0F: // TIBETAN
            return isInRange(c, 0x0F18, 0x0F19) || c == 0x0F35
                    || c == 0x0F37 || c == 0x0F39
                    || isInRange(c, 0x0F71, 0x0F7E) || c == 0x0F3E
                    || c == 0x0F3F || isInRange(c, 0x0F80, 0x0F84)
                    || isInRange(c, 0x0F86, 0x0F87) || c == 0x0FC6;

        case 0x10: // MYANMAR
            return isInRange(c, 0x102D, 0x1030)
                    || isInRange(c, 0x1032, 0x1037)
                    || isInRange(c, 0x1039, 0x103A)
                    || isInRange(c, 0x103D, 0x103E)
                    || isInRange(c, 0x1058, 0x1059)
                    || isInRange(c, 0x105E, 0x1060)
                    || isInRange(c, 0x1071, 0x1074) || c == 0x1082
                    || isInRange(c, 0x1085, 0x1086) || c == 0x108D;

        case 0x13: // ETHIOPIC
            return c == 0x135F;

        case 0x17: // TAGALOG
            return isInRange(c, 0x1712, 0x1714)
                    // HANUNOO
                    || isInRange(c, 0x1732, 0x1734)
                    // BUHID
                    || isInRange(c, 0x1752, 0x1753)
                    // TAGBANWA
                    || isInRange(c, 0x1772, 0x1773)
                    // KHMER
                    || isInRange(c, 0x17B7, 0x17BD) || c == 0x17C6
                    || isInRange(c, 0x17C9, 0x17D3) || c == 0x17DD;

        case 0x18: // MONGOLIAN
            return isInRange(c, 0x180B, 0x180D);

        case 0x19: // LIMBU
            return isInRange(c, 0x1920, 0x1922)
                    || isInRange(c, 0x1927, 0x1928) || c == 0x1932
                    || isInRange(c, 0x1939, 0x193B);

        case 0x1A: // MONGOLIAN
            return isInRange(c, 0x1A17, 0x1A8);

        case 0x1B: // BALINESE
            return isInRange(c, 0x1B00, 0x1B03) || c == 0x1B34
                    || isInRange(c, 0x1B36, 0x1B3A)
                    || c == 0x1B3C
                    || c == 0x1B42
                    // SUNDANESE
                    || isInRange(c, 0x1B80, 0x1B81)
                    || isInRange(c, 0x1BA2, 0x1BA5)
                    || isInRange(c, 0x1BA8, 0x1BA9);

        case 0x1C: // LEPCHA
            return isInRange(c, 0x1C2C, 0x1C33)
                    || isInRange(c, 0x1C36, 0x1C37);

        case 0x1D: // Additional Diacritics
            return isInRange(c, 0x1DC0, 0x1DFF) || c == 0x20E1
                    || isInRange(c, 0x20E5, 0x20E7)
                    || isInRange(c, 0x20E8, 0x20F0);

        case 0x20:
            return isInRange(c, 0x20D0, 0x20DC) || c == 0x20E1
                    || isInRange(c, 0x20E5, 0x20E7)
                    || isInRange(c, 0x20E8, 0x20F0);

        case 0x30:
            return isInRange(c, 0x302A, 0x302F) || c == 0x3099
                    || c == 0x309A;

        default:
            return false;
        }
    }

    /**
     * Checks if a given character c is within the range l .. h (inclusive).
     * @param c the character to check for
     * @param l the low value of the range
     * @param h the high value of the range
     * @return true if the character is i the range.
     */
    private static boolean isInRange(final char c, final int l, final int h) {
        return (c >= l && c <= h);
    }
}

Related Tutorials