UTF Util
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//package freenet.utils;
//import junit.framework.TestCase;
/**
*
*
* @author Alberto Bacchelli <sback@freenetproject.org>
*/
public final class UTFUtil {
public void testFake() {
}
//printable ascii symbols
public static final char PRINTABLE_ASCII[] = {
' ','!','@','#','$','%','^','&','(',')','+','=','{','}','[',']',':',';','\\','\"','\'',
',','<','>','.','?','~','`'};
//stressed UTF chars values
public static final char STRESSED_UTF[] = {
//EinseINEineI?TEi?tAe??TaE?OtAeJoTaGjOt
'\u00c9','\u00e2','\u00fb','\u0114','\u012d','\u0146','\u015f','\u00ca','\u00e3','\u00fc',
'\u0115','\u012e','\u0147','\u0160','\u00cb','\u00e4','\u00fd','\u0116','\u012f','\u0148',
'\u0161','\u00cc','\u00e5','\u00fe','\u0117','\u0130','\u0149','\u0162','\u00cd','\u00e6',
'\u00ff','\u0118','\u0131','\u014a','\u0163','\u00ce','\u00e7','\u0100','\u0119','\u0132',
'\u014b','\u0164','\u00cf','\u00e8','\u0101','\u011a','\u0133','\u014c','\u0165','\u00d0',
'\u00e9','\u0102','\u011b','\u0134','\u014d','\u0166','\u00d1','\u00ea','\u0103','\u011c',
'\u0135','\u014e','\u0167',
//AgKoUaGkOuCg?oUcGLuCglUcGLRuCglrUcHLRuChlrU
'\u00d2','\u00eb','\u0104','\u011d','\u0136','\u014f','\u0168','\u00d3','\u00ec','\u0105',
'\u011e','\u0137','\u0150','\u0169','\u00d4','\u00ed','\u0106','\u011f','\u0138','\u0151',
'\u016a','\u00d5','\u00ee','\u0107','\u0120','\u0139','\u0152','\u016b','\u00d6','\u00ef',
'\u0108','\u0121','\u013a','\u0153','\u016c','\u00d7','\u00f0','\u0109','\u0122','\u013b',
'\u0154','\u016d','\u00d8','\u00f1','\u010a','\u0123','\u013c','\u0155','\u016e','\u00d9',
'\u00f2','\u010b','\u0124','\u013d','\u0156','\u016f','\u00da','\u00f3','\u010c','\u0125',
'\u013e','\u0157','\u0170',
//cH?RuDh?rUdILSuilsWdINSwEinsYeINSy
'\u00db','\u00f4','\u010d','\u0126','\u013f','\u0158','\u0171','\u00dc','\u00f5','\u010e',
'\u0127','\u0140','\u0159','\u0172','\u00dd','\u00f6','\u010f','\u0128','\u0141','\u015a',
'\u0173','\u00de','\u00f7','\u0110','\u0129','\u0142','\u015b','\u0174','\u00df','\u00f8',
'\u0111','\u012a','\u0143','\u015c','\u0175','\u00e0','\u00f9','\u0112','\u012b','\u0144',
'\u015d','\u0176','\u00e1','\u00fa','\u0113','\u012c','\u0145','\u015e','\u0177'};
/*
* HTML entities ISO-88591
* see for reference http://www.w3.org/TR/html4/sgml/entities.html#iso-88591
*/
public static final String HTML_ENTITIES_UTF[][] = {
//ISO 8859-1 Symbol Entities
{"\u00a1","¡"},{"\u00a2","¢"},{"\u00a3","£"},{"\u00a4","¤"},
{"\u00a5","¥"},{"\u00a6","¦"},{"\u00a7","§"},{"\u00a8","¨"},
{"\u00a9","©"},{"\u00aa","ª"},{"\u00ab","«"},{"\u00ac","¬"},
{"\u00ad","­"},{"\u00ae","®"},{"\u00af","¯"},
{"\u00b0","°"},{"\u00b1","±"},{"\u00b2","²"},{"\u00b3","³"},
{"\u00b4","´"},{"\u00b5","µ"},{"\u00b6","¶"},{"\u00b7","·"},
{"\u00b8","¸"},{"\u00b9","¹"},{"\u00ba","º"},{"\u00bb","»"},
{"\u00bc","¼"},{"\u00bd","½"},{"\u00be","¾"},{"\u00bf","¿"},
//ISO 8859-1 Character Entities
{"\u00c0","À"},{"\u00c1","Á"},{"\u00c2","Â"},{"\u00c3","Ã"},
{"\u00c4","Ä"},{"\u00c5","Å"},{"\u00c6","Æ"},{"\u00c7","Ç"},
{"\u00c8","È"},{"\u00c9","É"},{"\u00ca","Ê"},{"\u00cb","Ë"},
{"\u00cc","Ì"},{"\u00cd","Í"},{"\u00ce","Î"},{"\u00cf","Ï"},
{"\u00d0","Ð"},{"\u00d1","Ñ"},{"\u00d2","Ò"},{"\u00d3","Ó"},
{"\u00d4","Ô"},{"\u00d5","Õ"},{"\u00d6","Ö"},{"\u00d7","×"},
{"\u00d8","Ø"},{"\u00d9","Ù"},{"\u00da","Ú"},{"\u00db","Û"},
{"\u00dc","Ü"},{"\u00dd","Ý"},{"\u00de","Þ"},{"\u00df","ß"},
{"\u00e0","à"},{"\u00e1","á"},{"\u00e2","â"},{"\u00e3","ã"},
{"\u00e4","ä"},{"\u00e5","å"},{"\u00e6","æ"},{"\u00e7","ç"},
{"\u00e8","è"},{"\u00e9","é"},{"\u00ea","ê"},{"\u00eb","ë"},
{"\u00ec","ì"},{"\u00ed","í"},{"\u00ee","î"},{"\u00ef","ï"},
{"\u00f0","ð"},{"\u00f1","ñ"},
{"\u00f2","ò"},{"\u00f3","ó"},{"\u00f4","ô"},{"\u00f5","õ"},
{"\u00f6","ö"},{"\u00f7","÷"},{"\u00f8","ø"},
{"\u00f9","ù"},{"\u00fa","ú"},{"\u00fb","û"},{"\u00fc","ü"},
{"\u00fd","ý"},{"\u00fe","þ"},{"\u00ff","ÿ"},
//Greek
{"\u0391","Α"},{"\u0392","Β"},{"\u0393","Γ"},{"\u0394","Δ"},
{"\u0395","Ε"},{"\u0396","Ζ"},{"\u0397","Η"},{"\u0398","Θ"},
{"\u0399","Ι"},{"\u039a","Κ"},{"\u039b","Λ"},{"\u039c","Μ"},
{"\u039d","Ν"},{"\u039e","Ξ"},{"\u039f","Ο"},{"\u03a0","Π"},
{"\u03a1","Ρ"},{"\u03a3","Σ"},{"\u03a4","Τ"},{"\u03a5","Υ"},
{"\u03a6","Φ"},{"\u03a7","Χ"},{"\u03a8","Ψ"},{"\u03a9","Ω"},
{"\u03b1","α"},{"\u03b2","β"},{"\u03b3","γ"},{"\u03b4","δ"},
{"\u03b5","ε"},{"\u03b6","ζ"},{"\u03b7","η"},{"\u03b8","θ"},
{"\u03b9","ι"},{"\u03ba","κ"},{"\u03bb","λ"},{"\u03bc","μ"},
{"\u03bd","ν"},{"\u03be","ξ"},{"\u03bf","ο"},{"\u03c0","π"},
{"\u03c1","ρ"},{"\u03c2","ς"},{"\u03c3","σ"},{"\u03c4","τ"},
{"\u03c5","υ"},{"\u03c6","φ"},{"\u03c7","χ"},{"\u03c8","ψ"},
{"\u03c9","ω"},{"\u03d1","ϑ"},{"\u03d2","ϒ"},{"\u03d6","ϖ"},
//General Punctuation
{"\u2022","•"},{"\u2026","…"},{"\u2032","′"},{"\u2033","″"},
{"\u203e","‾"},{"\u2044","⁄"},
//Letterlike Symbols
{"\u2118","℘"},{"\u2111","ℑ"},{"\u211c","ℜ"},{"\u2122","™"},
{"\u2135","ℵ"},
//Arrows
{"\u2190","←"},{"\u2191","↑"},{"\u2192","→"},{"\u2193","↓"},
{"\u2194","↔"},{"\u21b5","↵"},{"\u21d0","⇐"},{"\u21d1","⇑"},
{"\u21d2","⇒"},{"\u21d3","⇓"},{"\u21d4","⇔"},
//Mathematical Operators
{"\u2200","∀"},{"\u2202","∂"},{"\u2203","∃"},{"\u2205","∅"},
{"\u2207","∇"},{"\u2208","∈"},{"\u2209","∉"},{"\u220b","∋"},
{"\u220f","∏"},{"\u2211","∑"},{"\u2212","−"},{"\u2217","∗"},
{"\u221a","√"},{"\u221d","∝"},{"\u221e","∞"},{"\u2220","∠"},
{"\u2227","∧"},{"\u2228","∨"},{"\u2229","∩"},{"\u222a","∪"},
{"\u222b","∫"},{"\u2234","∴"},{"\u223c","∼"},{"\u2245","≅"},
{"\u2248","≈"},{"\u2260","≠"},{"\u2261","≡"},{"\u2264","≤"},
{"\u2265","≥"},{"\u2282","⊂"},{"\u2283","⊃"},{"\u2284","⊄"},
{"\u2286","⊆"},{"\u2287","⊇"},{"\u2295","⊕"},{"\u2297","⊗"},
{"\u22a5","⊥"},{"\u22c5","⋅"},
//Miscellaneous Technical
{"\u2308","⌈"},{"\u2309","⌉"},{"\u230a","⌊"},{"\u230b","⌋"},
{"\u2329","⟨"},{"\u232a","⟩"},
//Geometric Shapes
{"\u25ca","◊"},{"\u2660","♠"},{"\u2663","♣"},{"\u2665","♥"},
{"\u2666","♦"},
//Latin Extended-A
{"\u0152","Œ"},{"\u0153","œ"},{"\u0160","Š"},{"\u0161","š"},
{"\u0178","Ÿ"},
//Spacing Modifier Letters
{"\u02c6","ˆ"},{"\u02dc","˜"},
//General Punctuation
{"\u2002"," "},{"\u2003"," "},{"\u2009"," "},{"\u200c","‌"},
{"\u200d","‍"},{"\u200e","‎"},{"\u200f","‏"},{"\u2013","–"},
{"\u2014","—"},{"\u2018","‘"},{"\u2019","’"},{"\u201a","‚"},
{"\u201c","“"},{"\u201d","”"},{"\u201e","„"},{"\u2020","†"},
{"\u2021","‡"},{"\u2030","‰"},{"\u2039","‹"},{"\u203a","›"},
{"\u20ac","€"}
};
}
Related examples in the same category