Java examples for java.lang:char
For a given character c, return the version of the character that doesn't have an accent, stroke, etc.
/*/*from w ww . j a v a 2 s . com*/ * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //package com.java2s; public class Main { /** * For a given character c, return the version of the character that doesn't have * an accent, stroke, etc. If the character doesn't have accents, strokes, etc. * return 0x00. * @param c the character to check * @return the normalized character or 0x00 if the character doesn't need * normalization. */ public static char foldNonDiacriticChar(final char c) { switch (c) { // LATIN CAPITAL LETTER B WITH HOOK -> LATIN CAPITAL LETTER B case 0x0181: return (0x0042); // LATIN CAPITAL LETTER B WITH TOPBAR -> LATIN CAPITAL LETTER B case 0x0182: return (0x0042); // LATIN CAPITAL LETTER C WITH HOOK -> LATIN CAPITAL LETTER C case 0x0187: return (0x0043); // LATIN CAPITAL LETTER D WITH STROKE -> LATIN CAPITAL LETTER D case 0x0110: return (0x0044); // LATIN CAPITAL LETTER D WITH HOOK -> LATIN CAPITAL LETTER D case 0x018A: return (0x0044); // LATIN CAPITAL LETTER D WITH TOPBAR -> LATIN CAPITAL LETTER D case 0x018B: return (0x0044); // LATIN CAPITAL LETTER F WITH HOOK -> LATIN CAPITAL LETTER F case 0x0191: return (0x0046); // LATIN CAPITAL LETTER G WITH HOOK -> LATIN CAPITAL LETTER G case 0x0193: return (0x0047); // LATIN CAPITAL LETTER G WITH STROKE -> LATIN CAPITAL LETTER G case 0x01E4: return (0x0047); // LATIN CAPITAL LETTER H WITH STROKE -> LATIN CAPITAL LETTER H case 0x0126: return (0x0048); // LATIN CAPITAL LETTER I WITH STROKE -> LATIN CAPITAL LETTER I case 0x0197: return (0x0049); // LATIN CAPITAL LETTER K WITH HOOK -> LATIN CAPITAL LETTER K case 0x0198: return (0x004B); // LATIN CAPITAL LETTER L WITH STROKE -> LATIN CAPITAL LETTER L case 0x0141: return (0x004C); // LATIN CAPITAL LETTER N WITH LEFT HOOK -> LATIN CAPITAL LETTER N case 0x019D: return (0x004E); // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG -> LATIN CAPITAL LETTER N case 0x0220: return (0x004E); // LATIN CAPITAL LETTER O WITH STROKE -> LATIN CAPITAL LETTER O case 0x00D8: return (0x004F); // LATIN CAPITAL LETTER O WITH MIDDLE TILDE -> LATIN CAPITAL LETTER O case 0x019F: return (0x004F); // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE -> LATIN CAPITAL LETTER O case 0x01FE: return (0x004F); // LATIN CAPITAL LETTER P WITH HOOK -> LATIN CAPITAL LETTER P case 0x01A4: return (0x0050); // LATIN CAPITAL LETTER T WITH STROKE -> LATIN CAPITAL LETTER T case 0x0166: return (0x0054); // LATIN CAPITAL LETTER T WITH HOOK -> LATIN CAPITAL LETTER T case 0x01AC: return (0x0054); // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK -> LATIN CAPITAL LETTER T case 0x01AE: return (0x0054); // LATIN CAPITAL LETTER V WITH HOOK -> LATIN CAPITAL LETTER V case 0x01B2: return (0x0056); // LATIN CAPITAL LETTER Y WITH HOOK -> LATIN CAPITAL LETTER Y case 0x01B3: return (0x0059); // LATIN CAPITAL LETTER Z WITH STROKE -> LATIN CAPITAL LETTER Z case 0x01B5: return (0x005A); // LATIN CAPITAL LETTER Z WITH HOOK -> LATIN CAPITAL LETTER Z case 0x0224: return (0x005A); // LATIN SMALL LETTER B WITH STROKE -> LATIN SMALL LETTER B case 0x0180: return (0x0062); // LATIN SMALL LETTER B WITH TOPBAR -> LATIN SMALL LETTER B case 0x0183: return (0x0062); // LATIN SMALL LETTER B WITH HOOK -> LATIN SMALL LETTER B case 0x0253: return (0x0062); // LATIN SMALL LETTER C WITH HOOK -> LATIN SMALL LETTER C case 0x0188: return (0x0063); // LATIN SMALL LETTER C WITH CURL -> LATIN SMALL LETTER C case 0x0255: return (0x0063); // LATIN SMALL LETTER D WITH STROKE -> LATIN SMALL LETTER D case 0x0111: return (0x0064); // LATIN SMALL LETTER D WITH TOPBAR -> LATIN SMALL LETTER D case 0x018C: return (0x0064); // LATIN SMALL LETTER D WITH CURL -> LATIN SMALL LETTER D case 0x0221: return (0x0064); // LATIN SMALL LETTER D WITH TAIL -> LATIN SMALL LETTER D case 0x0256: return (0x0064); // LATIN SMALL LETTER D WITH HOOK -> LATIN SMALL LETTER D case 0x0257: return (0x0064); // LATIN SMALL LETTER F WITH HOOK -> LATIN SMALL LETTER F case 0x0192: return (0x0066); // LATIN SMALL LETTER G WITH STROKE -> LATIN SMALL LETTER G case 0x01E5: return (0x0067); // LATIN SMALL LETTER G WITH HOOK -> LATIN SMALL LETTER G case 0x0260: return (0x0067); // LATIN SMALL LETTER H WITH STROKE -> LATIN SMALL LETTER H case 0x0127: return (0x0068); // LATIN SMALL LETTER H WITH HOOK -> LATIN SMALL LETTER H case 0x0266: return (0x0068); // LATIN SMALL LETTER I WITH STROKE -> LATIN SMALL LETTER I case 0x0268: return (0x0069); // LATIN SMALL LETTER J WITH CROSSED-TAIL -> LATIN SMALL LETTER J case 0x029D: return (0x006A); // LATIN SMALL LETTER K WITH HOOK -> LATIN SMALL LETTER K case 0x0199: return (0x006B); // LATIN SMALL LETTER L WITH STROKE -> LATIN SMALL LETTER L case 0x0142: return (0x006C); // LATIN SMALL LETTER L WITH BAR -> LATIN SMALL LETTER L case 0x019A: return (0x006C); // LATIN SMALL LETTER L WITH CURL -> LATIN SMALL LETTER L case 0x0234: return (0x006C); // LATIN SMALL LETTER L WITH MIDDLE TILDE -> LATIN SMALL LETTER L case 0x026B: return (0x006C); // LATIN SMALL LETTER L WITH BELT -> LATIN SMALL LETTER L case 0x026C: return (0x006C); // LATIN SMALL LETTER L WITH RETROFLEX HOOK -> LATIN SMALL LETTER L case 0x026D: return (0x006C); // LATIN SMALL LETTER M WITH HOOK -> LATIN SMALL LETTER M case 0x0271: return (0x006D); // LATIN SMALL LETTER N WITH LONG RIGHT LEG -> LATIN SMALL LETTER N case 0x019E: return (0x006E); // LATIN SMALL LETTER N WITH CURL -> LATIN SMALL LETTER N case 0x0235: return (0x006E); // LATIN SMALL LETTER N WITH LEFT HOOK -> LATIN SMALL LETTER N case 0x0272: return (0x006E); // LATIN SMALL LETTER N WITH RETROFLEX HOOK -> LATIN SMALL LETTER N case 0x0273: return (0x006E); // LATIN SMALL LETTER O WITH STROKE -> LATIN SMALL LETTER O case 0x00F8: return (0x006F); // LATIN SMALL LETTER O WITH STROKE AND ACUTE -> LATIN SMALL LETTER O case 0x01FF: return (0x006F); // LATIN SMALL LETTER P WITH HOOK -> LATIN SMALL LETTER P case 0x01A5: return (0x0070); // LATIN SMALL LETTER Q WITH HOOK -> LATIN SMALL LETTER Q case 0x02A0: return (0x0071); // LATIN SMALL LETTER R WITH LONG LEG -> LATIN SMALL LETTER R case 0x027C: return (0x0072); // LATIN SMALL LETTER R WITH TAIL -> LATIN SMALL LETTER R case 0x027D: return (0x0072); // LATIN SMALL LETTER S WITH HOOK -> LATIN SMALL LETTER S case 0x0282: return (0x0073); // LATIN SMALL LETTER T WITH STROKE -> LATIN SMALL LETTER T case 0x0167: return (0x0074); // LATIN SMALL LETTER T WITH PALATAL HOOK -> LATIN SMALL LETTER T case 0x01AB: return (0x0074); // LATIN SMALL LETTER T WITH HOOK -> LATIN SMALL LETTER T case 0x01AD: return (0x0074); // LATIN SMALL LETTER T WITH CURL -> LATIN SMALL LETTER T case 0x0236: return (0x0074); // LATIN SMALL LETTER T WITH RETROFLEX HOOK -> LATIN SMALL LETTER T case 0x0288: return (0x0074); // LATIN SMALL LETTER V WITH HOOK -> LATIN SMALL LETTER V case 0x028B: return (0x0076); // LATIN SMALL LETTER Y WITH HOOK -> LATIN SMALL LETTER Y case 0x01B4: return (0x0079); // LATIN SMALL LETTER Z WITH STROKE -> LATIN SMALL LETTER Z case 0x01B6: return (0x007A); // LATIN SMALL LETTER Z WITH HOOK -> LATIN SMALL LETTER Z case 0x0225: return (0x007A); // LATIN SMALL LETTER Z WITH RETROFLEX HOOK -> LATIN SMALL LETTER Z case 0x0290: return (0x007A); // LATIN SMALL LETTER Z WITH CURL -> LATIN SMALL LETTER Z case 0x0291: return (0x007A); // LATIN SMALL LETTER SCHWA WITH HOOK -> LATIN SMALL LETTER SCHWA case 0x025A: return (0x0259); // LATIN SMALL LETTER ESH WITH CURL -> LATIN SMALL LETTER ESH case 0x0286: return (0x0283); // LATIN SMALL LETTER EZH WITH TAIL -> LATIN SMALL LETTER EZH case 0x01BA: return (0x0292); // LATIN SMALL LETTER EZH WITH CURL -> LATIN SMALL LETTER EZH case 0x0293: return (0x0292); // CYRILLIC CAPITAL LETTER GHE WITH UPTURN -> CYRILLIC CAPITAL LETTER GHE case 0x0490: return (0x0413); // CYRILLIC CAPITAL LETTER GHE WITH STROKE -> CYRILLIC CAPITAL LETTER GHE case 0x0492: return (0x0413); // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK -> CYRILLIC CAPITAL LETTER GHE case 0x0494: return (0x0413); // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER -> CYRILLIC CAPITAL LETTER ZHE case 0x0496: return (0x0416); // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER -> CYRILLIC CAPITAL LETTER ZE case 0x0498: return (0x0417); // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL -> CYRILLIC CAPITAL LETTER SHORT I case 0x048A: return (0x0419); // CYRILLIC CAPITAL LETTER KA WITH DESCENDER -> CYRILLIC CAPITAL LETTER KA case 0x049A: return (0x041A); // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE -> CYRILLIC CAPITAL LETTER KA case 0x049C: return (0x041A); // CYRILLIC CAPITAL LETTER KA WITH STROKE -> CYRILLIC CAPITAL LETTER KA case 0x049E: return (0x041A); // CYRILLIC CAPITAL LETTER KA WITH HOOK -> CYRILLIC CAPITAL LETTER KA case 0x04C3: return (0x041A); // CYRILLIC CAPITAL LETTER EL WITH TAIL -> CYRILLIC CAPITAL LETTER EL case 0x04C5: return (0x041B); // CYRILLIC CAPITAL LETTER EM WITH TAIL -> CYRILLIC CAPITAL LETTER EM case 0x04CD: return (0x041C); // CYRILLIC CAPITAL LETTER EN WITH DESCENDER -> CYRILLIC CAPITAL LETTER EN case 0x04A2: return (0x041D); // CYRILLIC CAPITAL LETTER EN WITH HOOK -> CYRILLIC CAPITAL LETTER EN case 0x04C7: return (0x041D); // CYRILLIC CAPITAL LETTER EN WITH TAIL -> CYRILLIC CAPITAL LETTER EN case 0x04C9: return (0x041D); // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK -> CYRILLIC CAPITAL LETTER PE case 0x04A6: return (0x041F); // CYRILLIC CAPITAL LETTER ER WITH TICK -> CYRILLIC CAPITAL LETTER ER case 0x048E: return (0x0420); // CYRILLIC CAPITAL LETTER ES WITH DESCENDER -> CYRILLIC CAPITAL LETTER ES case 0x04AA: return (0x0421); // CYRILLIC CAPITAL LETTER TE WITH DESCENDER -> CYRILLIC CAPITAL LETTER TE case 0x04AC: return (0x0422); // CYRILLIC CAPITAL LETTER HA WITH DESCENDER -> CYRILLIC CAPITAL LETTER HA case 0x04B2: return (0x0425); // CYRILLIC SMALL LETTER HA WITH DESCENDER -> CYRILLIC CAPITAL LETTER HA case 0x04B3: return (0x0425); // CYRILLIC SMALL LETTER GHE WITH UPTURN -> CYRILLIC SMALL LETTER GHE case 0x0491: return (0x0433); // CYRILLIC SMALL LETTER GHE WITH STROKE -> CYRILLIC SMALL LETTER GHE case 0x0493: return (0x0433); // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK -> CYRILLIC SMALL LETTER GHE case 0x0495: return (0x0433); // CYRILLIC SMALL LETTER ZHE WITH DESCENDER -> CYRILLIC SMALL LETTER ZHE case 0x0497: return (0x0436); // CYRILLIC SMALL LETTER ZE WITH DESCENDER -> CYRILLIC SMALL LETTER ZE case 0x0499: return (0x0437); // CYRILLIC SMALL LETTER SHORT I WITH TAIL -> CYRILLIC SMALL LETTER SHORT I case 0x048B: return (0x0439); // CYRILLIC SMALL LETTER KA WITH DESCENDER -> CYRILLIC SMALL LETTER KA case 0x049B: return (0x043A); // CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE -> CYRILLIC SMALL LETTER KA case 0x049D: return (0x043A); // CYRILLIC SMALL LETTER KA WITH STROKE -> CYRILLIC SMALL LETTER KA case 0x049F: return (0x043A); // CYRILLIC SMALL LETTER KA WITH HOOK -> CYRILLIC SMALL LETTER KA case 0x04C4: return (0x043A); // CYRILLIC SMALL LETTER EL WITH TAIL -> CYRILLIC SMALL LETTER EL case 0x04C6: return (0x043B); // CYRILLIC SMALL LETTER EM WITH TAIL -> CYRILLIC SMALL LETTER EM case 0x04CE: return (0x043C); // CYRILLIC SMALL LETTER EN WITH DESCENDER -> CYRILLIC SMALL LETTER EN case 0x04A3: return (0x043D); // CYRILLIC SMALL LETTER EN WITH HOOK -> CYRILLIC SMALL LETTER EN case 0x04C8: return (0x043D); // CYRILLIC SMALL LETTER EN WITH TAIL -> CYRILLIC SMALL LETTER EN case 0x04CA: return (0x043D); // CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK -> CYRILLIC SMALL LETTER PE case 0x04A7: return (0x043F); // CYRILLIC SMALL LETTER ER WITH TICK -> CYRILLIC SMALL LETTER ER case 0x048F: return (0x0440); // CYRILLIC SMALL LETTER ES WITH DESCENDER -> CYRILLIC SMALL LETTER ES case 0x04AB: return (0x0441); // CYRILLIC SMALL LETTER TE WITH DESCENDER -> CYRILLIC SMALL LETTER TE case 0x04AD: return (0x0442); // CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE -> CYRILLIC SMALL LETTER CHE case 0x04B9: return (0x0447); // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO -> CYRILLIC CAPITAL LETTER OMEGA case 0x047C: return (0x0460); // CYRILLIC SMALL LETTER OMEGA WITH TITLO -> CYRILLIC SMALL LETTER OMEGA case 0x047D: return (0x0461); // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE -> // CYRILLIC CAPITAL LETTER STRAIGHT U case 0x04B0: return (0x04AE); // CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE -> // CYRILLIC SMALL LETTER STRAIGHT U case 0x04B1: return (0x04AF); // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER -> // CYRILLIC CAPITAL LETTER ABKHASIAN CHE case 0x04B6: return (0x04BC); // CYRILLIC SMALL LETTER CHE WITH DESCENDER -> CYRILLIC CAPITAL LETTER ABKHASIAN CHE case 0x04B7: return (0x04BC); // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE -> // CYRILLIC CAPITAL LETTER ABKHASIAN CHE case 0x04B8: return (0x04BC); // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER -> // CYRILLIC CAPITAL LETTER ABKHASIANCHE case 0x04BE: return (0x04BC); // CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER -> // CYRILLIC CAPITAL LETTER ABKHASIAN CHE case 0x04BF: return (0x04BC); // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE -> CYRILLIC CAPITAL LETTER ABKHASIAN CHE case 0x04CB: return (0x04BC); // CYRILLIC SMALL LETTER KHAKASSIAN CHE -> CYRILLIC CAPITAL LETTER ABKHASIAN CHE case 0x04CC: return (0x04BC); default: return (0x00); } } }