dk.dr.radio.data.EoDiverse.java Source code

Introduction

Here is the source code for dk.dr.radio.data.EoDiverse.java
Source

  /**
   DR Radio 2 is developed by Jacob Nordfalk, Hanafi Mughrabi and Frederik Aagaard.
   Some parts of the code are loosely based on Sveriges Radio Play for Android.
    
   DR Radio 2 for Android is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License version 2 as published by
   the Free Software Foundation.
    
   DR Radio 2 for Android is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   See the GNU General Public License for more details.
    
   You should have received a copy of the GNU General Public License along with
   DR Radio 2 for Android.  If not, see <http://www.gnu.org/licenses/>.
    
   */

  package dk.dr.radio.data;

  import org.json.JSONArray;
  import org.json.JSONException;
  import org.json.JSONObject;

  import java.io.BufferedInputStream;
  import java.io.File;
  import java.io.IOException;
  import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.io.OutputStream;
  import java.io.Reader;
  import java.io.StringWriter;
  import java.io.UnsupportedEncodingException;
  import java.net.HttpURLConnection;
  import java.net.URL;
  import java.net.UnknownHostException;
  import java.util.ArrayList;
  import java.util.HashMap;

  import dk.dr.radio.diverse.Log;

  public class EoDiverse {

      /**
       * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports only HTML 3.0 entities.
       * Kilde: http://stackoverflow.com/questions/994331/java-how-to-decode-html-character-entities-in-java-like-httputility-htmldecode
       */
      public static final String unescapeHtml3(final String input) {
          StringWriter writer = null;
          int len = input.length();
          int i = 1;
          int st = 0;
          while (true) {
              // look for '&'
              while (i < len && input.charAt(i - 1) != '&')
                  i++;
              if (i >= len)
                  break;

              // found '&', look for ';'
              int j = i;
              while (j < len && j < i + MAX_ESCAPE + 1 && input.charAt(j) != ';')
                  j++;
              if (j == len || j < i + MIN_ESCAPE || j == i + MAX_ESCAPE + 1) {
                  i++;
                  continue;
              }

              // found escape
              if (input.charAt(i) == '#') {
                  // numeric escape
                  int k = i + 1;
                  int radix = 10;

                  final char firstChar = input.charAt(k);
                  if (firstChar == 'x' || firstChar == 'X') {
                      k++;
                      radix = 16;
                  }

                  try {
                      int entityValue = Integer.parseInt(input.substring(k, j), radix);

                      if (writer == null)
                          writer = new StringWriter(input.length());
                      writer.append(input.substring(st, i - 1));

                      if (entityValue > 0xFFFF) {
                          final char[] chrs = Character.toChars(entityValue);
                          writer.write(chrs[0]);
                          writer.write(chrs[1]);
                      } else {
                          writer.write(entityValue);
                      }

                  } catch (NumberFormatException ex) {
                      i++;
                      continue;
                  }
              } else {
                  // named escape
                  CharSequence value = lookupMap.get(input.substring(i, j));
                  if (value == null) {
                      i++;
                      continue;
                  }

                  if (writer == null)
                      writer = new StringWriter(input.length());
                  writer.append(input.substring(st, i - 1));

                  writer.append(value);
              }

              // skip escape
              st = j + 1;
              i = st;
          }

          if (writer != null) {
              writer.append(input.substring(st, len));
              return writer.toString();
          }
          return input;
      }

      private static final String[][] ESCAPES = { { "\"", "quot" }, // " - double-quote
              { "&", "amp" }, // & - ampersand
              { "<", "lt" }, // < - less-than
              { ">", "gt" }, // > - greater-than

              // Mapping to escape ISO-8859-1 characters to their named HTML 3.x equivalents.
              { "\u00A0", "nbsp" }, // non-breaking space
              { "\u00A1", "iexcl" }, // inverted exclamation mark
              { "\u00A2", "cent" }, // cent sign
              { "\u00A3", "pound" }, // pound sign
              { "\u00A4", "curren" }, // currency sign
              { "\u00A5", "yen" }, // yen sign = yuan sign
              { "\u00A6", "brvbar" }, // broken bar = broken vertical bar
              { "\u00A7", "sect" }, // section sign
              { "\u00A8", "uml" }, // diaeresis = spacing diaeresis
              { "\u00A9", "copy" }, //  - copyright sign
              { "\u00AA", "ordf" }, // feminine ordinal indicator
              { "\u00AB", "laquo" }, // left-pointing double angle quotation mark = left pointing guillemet
              { "\u00AC", "not" }, // not sign
              { "\u00AD", "shy" }, // soft hyphen = discretionary hyphen
              { "\u00AE", "reg" }, //  - registered trademark sign
              { "\u00AF", "macr" }, // macron = spacing macron = overline = APL overbar
              { "\u00B0", "deg" }, // degree sign
              { "\u00B1", "plusmn" }, // plus-minus sign = plus-or-minus sign
              { "\u00B2", "sup2" }, // superscript two = superscript digit two = squared
              { "\u00B3", "sup3" }, // superscript three = superscript digit three = cubed
              { "\u00B4", "acute" }, // acute accent = spacing acute
              { "\u00B5", "micro" }, // micro sign
              { "\u00B6", "para" }, // pilcrow sign = paragraph sign
              { "\u00B7", "middot" }, // middle dot = Georgian comma = Greek middle dot
              { "\u00B8", "cedil" }, // cedilla = spacing cedilla
              { "\u00B9", "sup1" }, // superscript one = superscript digit one
              { "\u00BA", "ordm" }, // masculine ordinal indicator
              { "\u00BB", "raquo" }, // right-pointing double angle quotation mark = right pointing guillemet
              { "\u00BC", "frac14" }, // vulgar fraction one quarter = fraction one quarter
              { "\u00BD", "frac12" }, // vulgar fraction one half = fraction one half
              { "\u00BE", "frac34" }, // vulgar fraction three quarters = fraction three quarters
              { "\u00BF", "iquest" }, // inverted question mark = turned question mark
              { "\u00C0", "Agrave" }, // ? - uppercase A, grave accent
              { "\u00C1", "Aacute" }, //  - uppercase A, acute accent
              { "\u00C2", "Acirc" }, //  - uppercase A, circumflex accent
              { "\u00C3", "Atilde" }, //  - uppercase A, tilde
              { "\u00C4", "Auml" }, //  - uppercase A, umlaut
              { "\u00C5", "Aring" }, //  - uppercase A, ring
              { "\u00C6", "AElig" }, //  - uppercase AE
              { "\u00C7", "Ccedil" }, //  - uppercase C, cedilla
              { "\u00C8", "Egrave" }, //  - uppercase E, grave accent
              { "\u00C9", "Eacute" }, //  - uppercase E, acute accent
              { "\u00CA", "Ecirc" }, //  - uppercase E, circumflex accent
              { "\u00CB", "Euml" }, //  - uppercase E, umlaut
              { "\u00CC", "Igrave" }, //  - uppercase I, grave accent
              { "\u00CD", "Iacute" }, // ? - uppercase I, acute accent
              { "\u00CE", "Icirc" }, //  - uppercase I, circumflex accent
              { "\u00CF", "Iuml" }, //  - uppercase I, umlaut
              { "\u00D0", "ETH" }, //  - uppercase Eth, Icelandic
              { "\u00D1", "Ntilde" }, //  - uppercase N, tilde
              { "\u00D2", "Ograve" }, //  - uppercase O, grave accent
              { "\u00D3", "Oacute" }, //  - uppercase O, acute accent
              { "\u00D4", "Ocirc" }, //  - uppercase O, circumflex accent
              { "\u00D5", "Otilde" }, //  - uppercase O, tilde
              { "\u00D6", "Ouml" }, //  - uppercase O, umlaut
              { "\u00D7", "times" }, // multiplication sign
              { "\u00D8", "Oslash" }, //  - uppercase O, slash
              { "\u00D9", "Ugrave" }, //  - uppercase U, grave accent
              { "\u00DA", "Uacute" }, //  - uppercase U, acute accent
              { "\u00DB", "Ucirc" }, //  - uppercase U, circumflex accent
              { "\u00DC", "Uuml" }, //  - uppercase U, umlaut
              { "\u00DD", "Yacute" }, //  - uppercase Y, acute accent
              { "\u00DE", "THORN" }, //  - uppercase THORN, Icelandic
              { "\u00DF", "szlig" }, //  - lowercase sharps, German
              { "\u00E0", "agrave" }, //  - lowercase a, grave accent
              { "\u00E1", "aacute" }, //  - lowercase a, acute accent
              { "\u00E2", "acirc" }, //  - lowercase a, circumflex accent
              { "\u00E3", "atilde" }, //  - lowercase a, tilde
              { "\u00E4", "auml" }, //  - lowercase a, umlaut
              { "\u00E5", "aring" }, //  - lowercase a, ring
              { "\u00E6", "aelig" }, //  - lowercase ae
              { "\u00E7", "ccedil" }, //  - lowercase c, cedilla
              { "\u00E8", "egrave" }, //  - lowercase e, grave accent
              { "\u00E9", "eacute" }, //  - lowercase e, acute accent
              { "\u00EA", "ecirc" }, //  - lowercase e, circumflex accent
              { "\u00EB", "euml" }, //  - lowercase e, umlaut
              { "\u00EC", "igrave" }, //  - lowercase i, grave accent
              { "\u00ED", "iacute" }, //  - lowercase i, acute accent
              { "\u00EE", "icirc" }, //  - lowercase i, circumflex accent
              { "\u00EF", "iuml" }, //  - lowercase i, umlaut
              { "\u00F0", "eth" }, //  - lowercase eth, Icelandic
              { "\u00F1", "ntilde" }, // ? - lowercase n, tilde
              { "\u00F2", "ograve" }, //  - lowercase o, grave accent
              { "\u00F3", "oacute" }, //  - lowercase o, acute accent
              { "\u00F4", "ocirc" }, //  - lowercase o, circumflex accent
              { "\u00F5", "otilde" }, //  - lowercase o, tilde
              { "\u00F6", "ouml" }, //  - lowercase o, umlaut
              { "\u00F7", "divide" }, // division sign
              { "\u00F8", "oslash" }, //  - lowercase o, slash
              { "\u00F9", "ugrave" }, //  - lowercase u, grave accent
              { "\u00FA", "uacute" }, //  - lowercase u, acute accent
              { "\u00FB", "ucirc" }, //  - lowercase u, circumflex accent
              { "\u00FC", "uuml" }, //  - lowercase u, umlaut
              { "\u00FD", "yacute" }, // ? - lowercase y, acute accent
              { "\u00FE", "thorn" }, //  - lowercase thorn, Icelandic
              { "\u00FF", "yuml" }, // ? - lowercase y, umlaut
      };

      private static final int MIN_ESCAPE = 2;
      private static final int MAX_ESCAPE = 6;

      private static final HashMap<String, CharSequence> lookupMap;
      static {
          lookupMap = new HashMap<String, CharSequence>();
          for (final CharSequence[] seq : ESCAPES)
              lookupMap.put(seq[1].toString(), seq[0]);
      }

public static String begrnsLgd(String navn) {
  if (navn == null) return "";
  if (navn.length()<80) return navn;
  return navn.substring(0,80);
}
  }