Determining validity of characters outside basic 7-bit range of Unicode, for XML 1.0
// Revised from ctc wstx
/**
* Simple utility class that encapsulates logic of determining validity
* of characters outside basic 7-bit range of Unicode, for XML 1.0
*/
public final class XmlChars
{
/* We don't need full 64k bits... (0x80 - 0x312C) / 32. But to
* simplify things, let's just include first 0x80 entries in there etc
*/
final static int SIZE = (0x3140 >> 5); // 32 bits per int
final static int[] sXml10StartChars = new int[SIZE];
static {
SETBITS(sXml10StartChars, 0xC0, 0xD6);
SETBITS(sXml10StartChars, 0xD8, 0xF6);
SETBITS(sXml10StartChars, 0xF8, 0xFF);
SETBITS(sXml10StartChars, 0x100, 0x131);
SETBITS(sXml10StartChars, 0x134, 0x13e);
SETBITS(sXml10StartChars, 0x141, 0x148);
SETBITS(sXml10StartChars, 0x14a, 0x17e);
SETBITS(sXml10StartChars, 0x180, 0x1c3);
SETBITS(sXml10StartChars, 0x1cd, 0x1f0);
SETBITS(sXml10StartChars, 0x1f4, 0x1f5);
SETBITS(sXml10StartChars, 0x1fa, 0x217);
SETBITS(sXml10StartChars, 0x250, 0x2a8);
SETBITS(sXml10StartChars, 0x2bb, 0x2c1);
SETBITS(sXml10StartChars, 0x386);
SETBITS(sXml10StartChars, 0x388, 0x38a);
SETBITS(sXml10StartChars, 0x38c);
SETBITS(sXml10StartChars, 0x38e, 0x3a1);
SETBITS(sXml10StartChars, 0x3a3, 0x3ce);
SETBITS(sXml10StartChars, 0x3d0, 0x3d6);
SETBITS(sXml10StartChars, 0x3da);
SETBITS(sXml10StartChars, 0x3dc);
SETBITS(sXml10StartChars, 0x3de);
SETBITS(sXml10StartChars, 0x3e0);
SETBITS(sXml10StartChars, 0x3e2, 0x3f3);
SETBITS(sXml10StartChars, 0x401, 0x40c);
SETBITS(sXml10StartChars, 0x40e, 0x44f);
SETBITS(sXml10StartChars, 0x451, 0x45c);
SETBITS(sXml10StartChars, 0x45e, 0x481);
SETBITS(sXml10StartChars, 0x490, 0x4c4);
SETBITS(sXml10StartChars, 0x4c7, 0x4c8);
SETBITS(sXml10StartChars, 0x4cb, 0x4cc);
SETBITS(sXml10StartChars, 0x4d0, 0x4eb);
SETBITS(sXml10StartChars, 0x4ee, 0x4f5);
SETBITS(sXml10StartChars, 0x4f8, 0x4f9);
SETBITS(sXml10StartChars, 0x531, 0x556);
SETBITS(sXml10StartChars, 0x559);
SETBITS(sXml10StartChars, 0x561, 0x586);
SETBITS(sXml10StartChars, 0x5d0, 0x5ea);
SETBITS(sXml10StartChars, 0x5f0, 0x5f2);
SETBITS(sXml10StartChars, 0x621, 0x63a);
SETBITS(sXml10StartChars, 0x641, 0x64a);
SETBITS(sXml10StartChars, 0x671, 0x6b7);
SETBITS(sXml10StartChars, 0x6ba, 0x6be);
SETBITS(sXml10StartChars, 0x6c0, 0x6ce);
SETBITS(sXml10StartChars, 0x6d0, 0x6d3);
SETBITS(sXml10StartChars, 0x6d5);
SETBITS(sXml10StartChars, 0x6e5, 0x6e6);
SETBITS(sXml10StartChars, 0x905, 0x939);
SETBITS(sXml10StartChars, 0x93d);
SETBITS(sXml10StartChars, 0x958, 0x961);
SETBITS(sXml10StartChars, 0x985, 0x98c);
SETBITS(sXml10StartChars, 0x98f, 0x990);
SETBITS(sXml10StartChars, 0x993, 0x9a8);
SETBITS(sXml10StartChars, 0x9aa, 0x9b0);
SETBITS(sXml10StartChars, 0x9b2);
SETBITS(sXml10StartChars, 0x9b6, 0x9b9);
SETBITS(sXml10StartChars, 0x9dc);
SETBITS(sXml10StartChars, 0x9dd);
SETBITS(sXml10StartChars, 0x9df, 0x9e1);
SETBITS(sXml10StartChars, 0x9f0); SETBITS(sXml10StartChars, 0x9f1);
SETBITS(sXml10StartChars, 0xA05, 0xA0A);
SETBITS(sXml10StartChars, 0xA0F); SETBITS(sXml10StartChars, 0xA10);
SETBITS(sXml10StartChars, 0xA13, 0xA28);
SETBITS(sXml10StartChars, 0xA2A, 0xA30);
SETBITS(sXml10StartChars, 0xA32); SETBITS(sXml10StartChars, 0xA33);
SETBITS(sXml10StartChars, 0xA35); SETBITS(sXml10StartChars, 0xA36);
SETBITS(sXml10StartChars, 0xA38); SETBITS(sXml10StartChars, 0xA39);
SETBITS(sXml10StartChars, 0xA59, 0xA5C);
SETBITS(sXml10StartChars, 0xA5E);
SETBITS(sXml10StartChars, 0xA72, 0xA74);
SETBITS(sXml10StartChars, 0xA85, 0xA8B);
SETBITS(sXml10StartChars, 0xA8D);
SETBITS(sXml10StartChars, 0xA8F, 0xA91);
SETBITS(sXml10StartChars, 0xA93, 0xAA8);
SETBITS(sXml10StartChars, 0xAAA, 0xAB0);
SETBITS(sXml10StartChars, 0xAB2, 0xAB3);
SETBITS(sXml10StartChars, 0xAB5, 0xAB9);
SETBITS(sXml10StartChars, 0xABD);
SETBITS(sXml10StartChars, 0xAE0);
SETBITS(sXml10StartChars, 0xB05, 0xB0C);
SETBITS(sXml10StartChars, 0xB0F); SETBITS(sXml10StartChars, 0xB10);
SETBITS(sXml10StartChars, 0xB13, 0xB28);
SETBITS(sXml10StartChars, 0xB2A, 0xB30);
SETBITS(sXml10StartChars, 0xB32); SETBITS(sXml10StartChars, 0xB33);
SETBITS(sXml10StartChars, 0xB36, 0xB39);
SETBITS(sXml10StartChars, 0xB3D);
SETBITS(sXml10StartChars, 0xB5C); SETBITS(sXml10StartChars, 0xB5D);
SETBITS(sXml10StartChars, 0xB5F, 0xB61);
SETBITS(sXml10StartChars, 0xB85, 0xB8A);
SETBITS(sXml10StartChars, 0xB8E, 0xB90);
SETBITS(sXml10StartChars, 0xB92, 0xB95);
SETBITS(sXml10StartChars, 0xB99, 0xB9A);
SETBITS(sXml10StartChars, 0xB9C);
SETBITS(sXml10StartChars, 0xB9E); SETBITS(sXml10StartChars, 0xB9F);
SETBITS(sXml10StartChars, 0xBA3); SETBITS(sXml10StartChars, 0xBA4);
SETBITS(sXml10StartChars, 0xBA8, 0xBAA);
SETBITS(sXml10StartChars, 0xBAE, 0xBB5);
SETBITS(sXml10StartChars, 0xBB7, 0xBB9);
SETBITS(sXml10StartChars, 0xC05, 0xC0C);
SETBITS(sXml10StartChars, 0xC0E, 0xC10);
SETBITS(sXml10StartChars, 0xC12, 0xC28);
SETBITS(sXml10StartChars, 0xC2A, 0xC33);
SETBITS(sXml10StartChars, 0xC35, 0xC39);
SETBITS(sXml10StartChars, 0xC60); SETBITS(sXml10StartChars, 0xC61);
SETBITS(sXml10StartChars, 0xC85, 0xC8C);
SETBITS(sXml10StartChars, 0xC8E, 0xC90);
SETBITS(sXml10StartChars, 0xC92, 0xCA8);
SETBITS(sXml10StartChars, 0xCAA, 0xCB3);
SETBITS(sXml10StartChars, 0xCB5, 0xCB9);
SETBITS(sXml10StartChars, 0xCDE);
SETBITS(sXml10StartChars, 0xCE0); SETBITS(sXml10StartChars, 0xCE1);
SETBITS(sXml10StartChars, 0xD05, 0xD0C);
SETBITS(sXml10StartChars, 0xD0E, 0xD10);
SETBITS(sXml10StartChars, 0xD12, 0xD28);
SETBITS(sXml10StartChars, 0xD2A, 0xD39);
SETBITS(sXml10StartChars, 0xD60); SETBITS(sXml10StartChars, 0xD61);
SETBITS(sXml10StartChars, 0xE01, 0xE2E);
SETBITS(sXml10StartChars, 0xE30);
SETBITS(sXml10StartChars, 0xE32); SETBITS(sXml10StartChars, 0xE33);
SETBITS(sXml10StartChars, 0xE40, 0xE45);
SETBITS(sXml10StartChars, 0xE81); SETBITS(sXml10StartChars, 0xE82);
SETBITS(sXml10StartChars, 0xE84);
SETBITS(sXml10StartChars, 0xE87); SETBITS(sXml10StartChars, 0xE88);
SETBITS(sXml10StartChars, 0xE8A); SETBITS(sXml10StartChars, 0xE8D);
SETBITS(sXml10StartChars, 0xE94, 0xE97);
SETBITS(sXml10StartChars, 0xE99, 0xE9F);
SETBITS(sXml10StartChars, 0xEA1, 0xEA3);
SETBITS(sXml10StartChars, 0xEA5); SETBITS(sXml10StartChars, 0xEA7);
SETBITS(sXml10StartChars, 0xEAA); SETBITS(sXml10StartChars, 0xEAB);
SETBITS(sXml10StartChars, 0xEAD); SETBITS(sXml10StartChars, 0xEAE);
SETBITS(sXml10StartChars, 0xEB0);
SETBITS(sXml10StartChars, 0xEB2); SETBITS(sXml10StartChars, 0xEB3);
SETBITS(sXml10StartChars, 0xEBD);
SETBITS(sXml10StartChars, 0xEC0, 0xEC4);
SETBITS(sXml10StartChars, 0xF40, 0xF47);
SETBITS(sXml10StartChars, 0xF49, 0xF69);
SETBITS(sXml10StartChars, 0x10a0, 0x10c5);
SETBITS(sXml10StartChars, 0x10d0, 0x10f6);
SETBITS(sXml10StartChars, 0x1100);
SETBITS(sXml10StartChars, 0x1102, 0x1103);
SETBITS(sXml10StartChars, 0x1105, 0x1107);
SETBITS(sXml10StartChars, 0x1109);
SETBITS(sXml10StartChars, 0x110b, 0x110c);
SETBITS(sXml10StartChars, 0x110e, 0x1112);
SETBITS(sXml10StartChars, 0x113c);
SETBITS(sXml10StartChars, 0x113e);
SETBITS(sXml10StartChars, 0x1140);
SETBITS(sXml10StartChars, 0x114c);
SETBITS(sXml10StartChars, 0x114e);
SETBITS(sXml10StartChars, 0x1150);
SETBITS(sXml10StartChars, 0x1154, 0x1155);
SETBITS(sXml10StartChars, 0x1159);
SETBITS(sXml10StartChars, 0x115f, 0x1161);
SETBITS(sXml10StartChars, 0x1163);
SETBITS(sXml10StartChars, 0x1165);
SETBITS(sXml10StartChars, 0x1167);
SETBITS(sXml10StartChars, 0x1169);
SETBITS(sXml10StartChars, 0x116d, 0x116e);
SETBITS(sXml10StartChars, 0x1172, 0x1173);
SETBITS(sXml10StartChars, 0x1175);
SETBITS(sXml10StartChars, 0x119e);
SETBITS(sXml10StartChars, 0x11a8);
SETBITS(sXml10StartChars, 0x11ab);
SETBITS(sXml10StartChars, 0x11ae, 0x11af);
SETBITS(sXml10StartChars, 0x11b7, 0x11b8);
SETBITS(sXml10StartChars, 0x11ba);
SETBITS(sXml10StartChars, 0x11bc, 0x11c2);
SETBITS(sXml10StartChars, 0x11eb);
SETBITS(sXml10StartChars, 0x11f0);
SETBITS(sXml10StartChars, 0x11f9);
SETBITS(sXml10StartChars, 0x1e00, 0x1e9b);
SETBITS(sXml10StartChars, 0x1ea0, 0x1ef9);
SETBITS(sXml10StartChars, 0x1f00, 0x1f15);
SETBITS(sXml10StartChars, 0x1f18, 0x1f1d);
SETBITS(sXml10StartChars, 0x1f20, 0x1f45);
SETBITS(sXml10StartChars, 0x1f48, 0x1f4d);
SETBITS(sXml10StartChars, 0x1f50, 0x1f57);
SETBITS(sXml10StartChars, 0x1f59);
SETBITS(sXml10StartChars, 0x1f5b);
SETBITS(sXml10StartChars, 0x1f5d);
SETBITS(sXml10StartChars, 0x1f5f, 0x1f7d);
SETBITS(sXml10StartChars, 0x1f80, 0x1fb4);
SETBITS(sXml10StartChars, 0x1fb6, 0x1fbc);
SETBITS(sXml10StartChars, 0x1fbe);
SETBITS(sXml10StartChars, 0x1fc2, 0x1fc4);
SETBITS(sXml10StartChars, 0x1fc6, 0x1fcc);
SETBITS(sXml10StartChars, 0x1fd0, 0x1fd3);
SETBITS(sXml10StartChars, 0x1fd6, 0x1fdb);
SETBITS(sXml10StartChars, 0x1fe0, 0x1fec);
SETBITS(sXml10StartChars, 0x1ff2, 0x1ff4);
SETBITS(sXml10StartChars, 0x1ff6, 0x1ffc);
SETBITS(sXml10StartChars, 0x2126);
SETBITS(sXml10StartChars, 0x212a, 0x212b);
SETBITS(sXml10StartChars, 0x212e);
SETBITS(sXml10StartChars, 0x2180, 0x2182);
SETBITS(sXml10StartChars, 0x3041, 0x3094);
SETBITS(sXml10StartChars, 0x30a1, 0x30fa);
SETBITS(sXml10StartChars, 0x3105, 0x312c);
// note: AC00 - D7A3 handled separately
// [86] Ideographic (but note: > 0x312c handled separately)
SETBITS(sXml10StartChars, 0x3007);
SETBITS(sXml10StartChars, 0x3021, 0x3029);
}
final static int[] sXml10Chars = new int[SIZE];
static {
// Let's start with all valid start chars:
System.arraycopy(sXml10StartChars, 0, sXml10Chars, 0, SIZE);
// [87] CombiningChar ::=
SETBITS(sXml10Chars, 0x300, 0x345);
SETBITS(sXml10Chars, 0x360, 0x361);
SETBITS(sXml10Chars, 0x483, 0x486);
SETBITS(sXml10Chars, 0x591, 0x5a1);
SETBITS(sXml10Chars, 0x5a3, 0x5b9);
SETBITS(sXml10Chars, 0x5bb, 0x5bd);
SETBITS(sXml10Chars, 0x5bf);
SETBITS(sXml10Chars, 0x5c1, 0x5c2);
SETBITS(sXml10Chars, 0x5c4);
SETBITS(sXml10Chars, 0x64b, 0x652);
SETBITS(sXml10Chars, 0x670);
SETBITS(sXml10Chars, 0x6d6, 0x6dc);
SETBITS(sXml10Chars, 0x6dd, 0x6df);
SETBITS(sXml10Chars, 0x6e0, 0x6e4);
SETBITS(sXml10Chars, 0x6e7, 0x6e8);
SETBITS(sXml10Chars, 0x6ea, 0x6ed);
SETBITS(sXml10Chars, 0x901, 0x903);
SETBITS(sXml10Chars, 0x93c);
SETBITS(sXml10Chars, 0x93e, 0x94c);
SETBITS(sXml10Chars, 0x94d);
SETBITS(sXml10Chars, 0x951, 0x954);
SETBITS(sXml10Chars, 0x962); SETBITS(sXml10Chars, 0x963);
SETBITS(sXml10Chars, 0x981, 0x983);
SETBITS(sXml10Chars, 0x9bc);
SETBITS(sXml10Chars, 0x9be); SETBITS(sXml10Chars, 0x9bf);
SETBITS(sXml10Chars, 0x9c0, 0x9c4);
SETBITS(sXml10Chars, 0x9c7); SETBITS(sXml10Chars, 0x9c8);
SETBITS(sXml10Chars, 0x9cb, 0x9cd);
SETBITS(sXml10Chars, 0x9d7);
SETBITS(sXml10Chars, 0x9e2); SETBITS(sXml10Chars, 0x9e3);
SETBITS(sXml10Chars, 0xA02);
SETBITS(sXml10Chars, 0xA3C);
SETBITS(sXml10Chars, 0xA3E); SETBITS(sXml10Chars, 0xA3F);
SETBITS(sXml10Chars, 0xA40, 0xA42);
SETBITS(sXml10Chars, 0xA47); SETBITS(sXml10Chars, 0xA48);
SETBITS(sXml10Chars, 0xA4B, 0xA4D);
SETBITS(sXml10Chars, 0xA70); SETBITS(sXml10Chars, 0xA71);
SETBITS(sXml10Chars, 0xA81, 0xA83);
SETBITS(sXml10Chars, 0xABC);
SETBITS(sXml10Chars, 0xABE, 0xAC5);
SETBITS(sXml10Chars, 0xAC7, 0xAC9);
SETBITS(sXml10Chars, 0xACB, 0xACD);
SETBITS(sXml10Chars, 0xB01, 0xB03);
SETBITS(sXml10Chars, 0xB3C);
SETBITS(sXml10Chars, 0xB3E, 0xB43);
SETBITS(sXml10Chars, 0xB47); SETBITS(sXml10Chars, 0xB48);
SETBITS(sXml10Chars, 0xB4B, 0xB4D);
SETBITS(sXml10Chars, 0xB56); SETBITS(sXml10Chars, 0xB57);
SETBITS(sXml10Chars, 0xB82); SETBITS(sXml10Chars, 0xB83);
SETBITS(sXml10Chars, 0xBBE, 0xBC2);
SETBITS(sXml10Chars, 0xBC6, 0xBC8);
SETBITS(sXml10Chars, 0xBCA, 0xBCD);
SETBITS(sXml10Chars, 0xBD7);
SETBITS(sXml10Chars, 0xC01, 0xC03);
SETBITS(sXml10Chars, 0xC3E, 0xC44);
SETBITS(sXml10Chars, 0xC46, 0xC48);
SETBITS(sXml10Chars, 0xC4A, 0xC4D);
SETBITS(sXml10Chars, 0xC55, 0xC56);
SETBITS(sXml10Chars, 0xC82, 0xC83);
SETBITS(sXml10Chars, 0xCBE, 0xCC4);
SETBITS(sXml10Chars, 0xCC6, 0xCC8);
SETBITS(sXml10Chars, 0xCCA, 0xCCD);
SETBITS(sXml10Chars, 0xCD5, 0xCD6);
SETBITS(sXml10Chars, 0xD02, 0xD03);
SETBITS(sXml10Chars, 0xD3E, 0xD43);
SETBITS(sXml10Chars, 0xD46, 0xD48);
SETBITS(sXml10Chars, 0xD4A, 0xD4D);
SETBITS(sXml10Chars, 0xD57);
SETBITS(sXml10Chars, 0xE31);
SETBITS(sXml10Chars, 0xE34, 0xE3A);
SETBITS(sXml10Chars, 0xE47, 0xE4E);
SETBITS(sXml10Chars, 0xEB1);
SETBITS(sXml10Chars, 0xEB4, 0xEB9);
SETBITS(sXml10Chars, 0xEBB, 0xEBC);
SETBITS(sXml10Chars, 0xEC8, 0xECD);
SETBITS(sXml10Chars, 0xF18, 0xF19);
SETBITS(sXml10Chars, 0xF35); SETBITS(sXml10Chars, 0xF37);
SETBITS(sXml10Chars, 0xF39);
SETBITS(sXml10Chars, 0xF3E); SETBITS(sXml10Chars, 0xF3F);
SETBITS(sXml10Chars, 0xF71, 0xF84);
SETBITS(sXml10Chars, 0xF86, 0xF8B);
SETBITS(sXml10Chars, 0xF90, 0xF95);
SETBITS(sXml10Chars, 0xF97);
SETBITS(sXml10Chars, 0xF99, 0xFAD);
SETBITS(sXml10Chars, 0xFB1, 0xFB7);
SETBITS(sXml10Chars, 0xFB9);
SETBITS(sXml10Chars, 0x20D0, 0x20DC);
SETBITS(sXml10Chars, 0x20E1);
SETBITS(sXml10Chars, 0x302A, 0x302F);
SETBITS(sXml10Chars, 0x3099); SETBITS(sXml10Chars, 0x309A);
// [88] Digit:
SETBITS(sXml10Chars, 0x660, 0x669);
SETBITS(sXml10Chars, 0x6f0, 0x6f9);
SETBITS(sXml10Chars, 0x966, 0x96f);
SETBITS(sXml10Chars, 0x9e6, 0x9ef);
SETBITS(sXml10Chars, 0xa66, 0xa6f);
SETBITS(sXml10Chars, 0xae6, 0xaef);
SETBITS(sXml10Chars, 0xb66, 0xb6f);
SETBITS(sXml10Chars, 0xbe7, 0xbef);
SETBITS(sXml10Chars, 0xc66, 0xc6f);
SETBITS(sXml10Chars, 0xce6, 0xcef);
SETBITS(sXml10Chars, 0xd66, 0xd6f);
SETBITS(sXml10Chars, 0xe50, 0xe59);
SETBITS(sXml10Chars, 0xed0, 0xed9);
SETBITS(sXml10Chars, 0xf20, 0xf29);
// [89] Extender:
SETBITS(sXml10Chars, 0xb7);
SETBITS(sXml10Chars, 0x2d0);
SETBITS(sXml10Chars, 0x2d1);
SETBITS(sXml10Chars, 0x387);
SETBITS(sXml10Chars, 0x640);
SETBITS(sXml10Chars, 0xE46);
SETBITS(sXml10Chars, 0xEC6);
SETBITS(sXml10Chars, 0x3005);
SETBITS(sXml10Chars, 0x3031, 0x3035);
SETBITS(sXml10Chars, 0x309d, 0x309e);
SETBITS(sXml10Chars, 0x30fc, 0x30fe);
}
private XmlChars() { }
public final static boolean is10NameStartChar(char c)
{
// First, let's deal with outliers
if (c > 0x312C) { // Most valid chars are below this..
if (c < 0xAC00) {
return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms
}
if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars
return true;
}
/* As to surrogate pairs... let's do the bare minimum;
* 0xD800 - 0xDBFF (high surrogate) are ok; low surrogates
* can only follow high one
*/
return (c <= 0xDBFF && c >= 0xD800);
}
// but then we'll just need to use the table...
int ix = (int) c;
return (sXml10StartChars[ix >> 5] & (1 << (ix & 31))) != 0;
}
public final static boolean is10NameChar(char c)
{
// First, let's deal with outliers
if (c > 0x312C) { // Most valid chars are below this..
if (c < 0xAC00) {
return (c >= 0x4E00 && c <= 0x9FA5); // valid ideograms
}
if (c <= 0xD7A3) { // 0xAC00 - 0xD7A3, valid base chars
return true;
}
/* As to surrogate pairs... let's do the bare minimum;
* 0xD800 - 0xDFFF (high, low surrogate) are ok (need to
* check pairing in future)
*/
return (c >= 0xD800 && c <= 0xDFFF);
}
// but then we'll just need to use the table...
int ix = (int) c;
return (sXml10Chars[ix >> 5] & (1 << (ix & 31))) != 0;
}
public final static boolean is11NameStartChar(char c)
{
// Others are checked block-by-block:
if (c <= 0x2FEF) {
if (c < 0x300) {
if (c < 0x00C0) { // 8-bit ctrl chars
return false;
}
// most of the rest are fine...
return (c != 0xD7 && c != 0xF7);
}
if (c >= 0x2C00) {
// 0x2C00 - 0x2FEF are ok
return true;
}
if (c < 0x370 || c > 0x218F) {
// 0x300 - 0x36F, 0x2190 - 0x2BFF invalid
return false;
}
if (c < 0x2000) {
// 0x370 - 0x37D, 0x37F - 0x1FFF are ok
return (c != 0x37E);
}
if (c >= 0x2070) {
// 0x2070 - 0x218F are ok
return (c <= 0x218F);
}
// And finally, 0x200C - 0x200D
return (c == 0x200C || c == 0x200D);
}
// 0x3000 and above:
if (c >= 0x3001) {
/* Hmmh, let's allow high surrogates here, without checking
* that they are properly followed... crude basic support,
* I know, but allows valid combinations, just doesn't catch
* invalid ones
*/
if (c <= 0xDBFF) { // 0x3001 - 0xD7FF (chars),
// 0xD800 - 0xDBFF (high surrogate) are ok (unlike DC00-DFFF)
return true;
}
if (c >= 0xF900 && c <= 0xFFFD) {
/* Check above removes low surrogate (since one can not
* START an identifier), and byte-order markers..
*/
return (c <= 0xFDCF || c >= 0xFDF0);
}
}
return false;
}
public final static boolean is11NameChar(char c)
{
// Others are checked block-by-block:
if (c <= 0x2FEF) {
if (c < 0x2000) { // only 8-bit ctrl chars and 0x37E to filter out
return (c >= 0x00C0 && c != 0x37E) || (c == 0xB7);
}
if (c >= 0x2C00) {
// 0x100 - 0x1FFF, 0x2C00 - 0x2FEF are ok
return true;
}
if (c < 0x200C || c > 0x218F) {
// 0x2000 - 0x200B, 0x2190 - 0x2BFF invalid
return false;
}
if (c >= 0x2070) {
// 0x2070 - 0x218F are ok
return true;
}
// And finally, 0x200C - 0x200D, 0x203F - 0x2040 are ok
return (c == 0x200C || c == 0x200D
|| c == 0x203F || c == 0x2040);
}
// 0x3000 and above:
if (c >= 0x3001) {
/* Hmmh, let's allow surrogate heres, without checking that
* they have proper ordering. For non-first name chars, both are
* ok, for valid names. Crude basic support,
* I know, but allows valid combinations, just doesn't catch
* invalid ones
*/
if (c <= 0xDFFF) { // 0x3001 - 0xD7FF (chars),
// 0xD800 - 0xDFFF (high, low surrogate) are ok:
return true;
}
if (c >= 0xF900 && c <= 0xFFFD) {
/* Check above removes other invalid chars (below valid
* range), and byte-order markers (0xFFFE, 0xFFFF).
*/
return (c <= 0xFDCF || c >= 0xFDF0);
}
}
return false;
}
private static void SETBITS(int[] array, int start, int end)
{
int bit1 = (start & 31);
int bit2 = (end & 31);
start >>= 5;
end >>= 5;
/* Ok; this is not perfectly optimal, but should be good enough...
* we'll only do one-by-one at the ends.
*/
if (start == end) {
for (; bit1 <= bit2; ++bit1) {
array[start] |= (1 << bit1);
}
} else {
for (int bit = bit1; bit <= 31; ++bit) {
array[start] |= (1 << bit);
}
while (++start < end) {
array[start] = -1;
}
for (int bit = 0; bit <= bit2; ++bit) {
array[end] |= (1 << bit);
}
}
}
private static void SETBITS(int[] array, int point) {
int ix = (point >> 5);
int bit = (point & 31);
array[ix] |= (1 << bit);
}
}
Related examples in the same category