Java tutorial
/** DR Radio 2 is developed by Jacob Nordfalk, Hanafi Mughrabi and Frederik Aagaard. Some parts of the code are loosely based on Sveriges Radio Play for Android. DR Radio 2 for Android is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation. DR Radio 2 for Android is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DR Radio 2 for Android. If not, see <http://www.gnu.org/licenses/>. */ package dk.dr.radio.data; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.Reader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashMap; import dk.dr.radio.diverse.Log; public class EoDiverse { /** * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports only HTML 3.0 entities. * Kilde: http://stackoverflow.com/questions/994331/java-how-to-decode-html-character-entities-in-java-like-httputility-htmldecode */ public static final String unescapeHtml3(final String input) { StringWriter writer = null; int len = input.length(); int i = 1; int st = 0; while (true) { // look for '&' while (i < len && input.charAt(i - 1) != '&') i++; if (i >= len) break; // found '&', look for ';' int j = i; while (j < len && j < i + MAX_ESCAPE + 1 && input.charAt(j) != ';') j++; if (j == len || j < i + MIN_ESCAPE || j == i + MAX_ESCAPE + 1) { i++; continue; } // found escape if (input.charAt(i) == '#') { // numeric escape int k = i + 1; int radix = 10; final char firstChar = input.charAt(k); if (firstChar == 'x' || firstChar == 'X') { k++; radix = 16; } try { int entityValue = Integer.parseInt(input.substring(k, j), radix); if (writer == null) writer = new StringWriter(input.length()); writer.append(input.substring(st, i - 1)); if (entityValue > 0xFFFF) { final char[] chrs = Character.toChars(entityValue); writer.write(chrs[0]); writer.write(chrs[1]); } else { writer.write(entityValue); } } catch (NumberFormatException ex) { i++; continue; } } else { // named escape CharSequence value = lookupMap.get(input.substring(i, j)); if (value == null) { i++; continue; } if (writer == null) writer = new StringWriter(input.length()); writer.append(input.substring(st, i - 1)); writer.append(value); } // skip escape st = j + 1; i = st; } if (writer != null) { writer.append(input.substring(st, len)); return writer.toString(); } return input; } private static final String[][] ESCAPES = { { "\"", "quot" }, // " - double-quote { "&", "amp" }, // & - ampersand { "<", "lt" }, // < - less-than { ">", "gt" }, // > - greater-than // Mapping to escape ISO-8859-1 characters to their named HTML 3.x equivalents. { "\u00A0", "nbsp" }, // non-breaking space { "\u00A1", "iexcl" }, // inverted exclamation mark { "\u00A2", "cent" }, // cent sign { "\u00A3", "pound" }, // pound sign { "\u00A4", "curren" }, // currency sign { "\u00A5", "yen" }, // yen sign = yuan sign { "\u00A6", "brvbar" }, // broken bar = broken vertical bar { "\u00A7", "sect" }, // section sign { "\u00A8", "uml" }, // diaeresis = spacing diaeresis { "\u00A9", "copy" }, // - copyright sign { "\u00AA", "ordf" }, // feminine ordinal indicator { "\u00AB", "laquo" }, // left-pointing double angle quotation mark = left pointing guillemet { "\u00AC", "not" }, // not sign { "\u00AD", "shy" }, // soft hyphen = discretionary hyphen { "\u00AE", "reg" }, // - registered trademark sign { "\u00AF", "macr" }, // macron = spacing macron = overline = APL overbar { "\u00B0", "deg" }, // degree sign { "\u00B1", "plusmn" }, // plus-minus sign = plus-or-minus sign { "\u00B2", "sup2" }, // superscript two = superscript digit two = squared { "\u00B3", "sup3" }, // superscript three = superscript digit three = cubed { "\u00B4", "acute" }, // acute accent = spacing acute { "\u00B5", "micro" }, // micro sign { "\u00B6", "para" }, // pilcrow sign = paragraph sign { "\u00B7", "middot" }, // middle dot = Georgian comma = Greek middle dot { "\u00B8", "cedil" }, // cedilla = spacing cedilla { "\u00B9", "sup1" }, // superscript one = superscript digit one { "\u00BA", "ordm" }, // masculine ordinal indicator { "\u00BB", "raquo" }, // right-pointing double angle quotation mark = right pointing guillemet { "\u00BC", "frac14" }, // vulgar fraction one quarter = fraction one quarter { "\u00BD", "frac12" }, // vulgar fraction one half = fraction one half { "\u00BE", "frac34" }, // vulgar fraction three quarters = fraction three quarters { "\u00BF", "iquest" }, // inverted question mark = turned question mark { "\u00C0", "Agrave" }, // ? - uppercase A, grave accent { "\u00C1", "Aacute" }, // - uppercase A, acute accent { "\u00C2", "Acirc" }, // - uppercase A, circumflex accent { "\u00C3", "Atilde" }, // - uppercase A, tilde { "\u00C4", "Auml" }, // - uppercase A, umlaut { "\u00C5", "Aring" }, // - uppercase A, ring { "\u00C6", "AElig" }, // - uppercase AE { "\u00C7", "Ccedil" }, // - uppercase C, cedilla { "\u00C8", "Egrave" }, // - uppercase E, grave accent { "\u00C9", "Eacute" }, // - uppercase E, acute accent { "\u00CA", "Ecirc" }, // - uppercase E, circumflex accent { "\u00CB", "Euml" }, // - uppercase E, umlaut { "\u00CC", "Igrave" }, // - uppercase I, grave accent { "\u00CD", "Iacute" }, // ? - uppercase I, acute accent { "\u00CE", "Icirc" }, // - uppercase I, circumflex accent { "\u00CF", "Iuml" }, // - uppercase I, umlaut { "\u00D0", "ETH" }, // - uppercase Eth, Icelandic { "\u00D1", "Ntilde" }, // - uppercase N, tilde { "\u00D2", "Ograve" }, // - uppercase O, grave accent { "\u00D3", "Oacute" }, // - uppercase O, acute accent { "\u00D4", "Ocirc" }, // - uppercase O, circumflex accent { "\u00D5", "Otilde" }, // - uppercase O, tilde { "\u00D6", "Ouml" }, // - uppercase O, umlaut { "\u00D7", "times" }, // multiplication sign { "\u00D8", "Oslash" }, // - uppercase O, slash { "\u00D9", "Ugrave" }, // - uppercase U, grave accent { "\u00DA", "Uacute" }, // - uppercase U, acute accent { "\u00DB", "Ucirc" }, // - uppercase U, circumflex accent { "\u00DC", "Uuml" }, // - uppercase U, umlaut { "\u00DD", "Yacute" }, // - uppercase Y, acute accent { "\u00DE", "THORN" }, // - uppercase THORN, Icelandic { "\u00DF", "szlig" }, // - lowercase sharps, German { "\u00E0", "agrave" }, // - lowercase a, grave accent { "\u00E1", "aacute" }, // - lowercase a, acute accent { "\u00E2", "acirc" }, // - lowercase a, circumflex accent { "\u00E3", "atilde" }, // - lowercase a, tilde { "\u00E4", "auml" }, // - lowercase a, umlaut { "\u00E5", "aring" }, // - lowercase a, ring { "\u00E6", "aelig" }, // - lowercase ae { "\u00E7", "ccedil" }, // - lowercase c, cedilla { "\u00E8", "egrave" }, // - lowercase e, grave accent { "\u00E9", "eacute" }, // - lowercase e, acute accent { "\u00EA", "ecirc" }, // - lowercase e, circumflex accent { "\u00EB", "euml" }, // - lowercase e, umlaut { "\u00EC", "igrave" }, // - lowercase i, grave accent { "\u00ED", "iacute" }, // - lowercase i, acute accent { "\u00EE", "icirc" }, // - lowercase i, circumflex accent { "\u00EF", "iuml" }, // - lowercase i, umlaut { "\u00F0", "eth" }, // - lowercase eth, Icelandic { "\u00F1", "ntilde" }, // ? - lowercase n, tilde { "\u00F2", "ograve" }, // - lowercase o, grave accent { "\u00F3", "oacute" }, // - lowercase o, acute accent { "\u00F4", "ocirc" }, // - lowercase o, circumflex accent { "\u00F5", "otilde" }, // - lowercase o, tilde { "\u00F6", "ouml" }, // - lowercase o, umlaut { "\u00F7", "divide" }, // division sign { "\u00F8", "oslash" }, // - lowercase o, slash { "\u00F9", "ugrave" }, // - lowercase u, grave accent { "\u00FA", "uacute" }, // - lowercase u, acute accent { "\u00FB", "ucirc" }, // - lowercase u, circumflex accent { "\u00FC", "uuml" }, // - lowercase u, umlaut { "\u00FD", "yacute" }, // ? - lowercase y, acute accent { "\u00FE", "thorn" }, // - lowercase thorn, Icelandic { "\u00FF", "yuml" }, // ? - lowercase y, umlaut }; private static final int MIN_ESCAPE = 2; private static final int MAX_ESCAPE = 6; private static final HashMap<String, CharSequence> lookupMap; static { lookupMap = new HashMap<String, CharSequence>(); for (final CharSequence[] seq : ESCAPES) lookupMap.put(seq[1].toString(), seq[0]); } public static String begrnsLgd(String navn) { if (navn == null) return ""; if (navn.length()<80) return navn; return navn.substring(0,80); } }