Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:mServer.crawler.sender.MediathekSwr.java

private void addToList__() {
    //Theman suchen
    final String MUSTER_START = "<div class=\"mediaCon\">";
    final String MUSTER_STOPP = "<h2 class=\"rasterHeadline\">OFT GESUCHT</h2>";
    final String MUSTER_URL = "<a href=\"tvshow.htm?show=";
    final String MUSTER_THEMA = "title=\"";
    MSStringBuilder strSeite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
    GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
    strSeite = getUrlIo.getUriWithDelay(SENDERNAME, "http://swrmediathek.de/tvlist.htm", StandardCharsets.UTF_8,
            2, strSeite, "", 4, TimeUnit.SECONDS);
    int pos = 0;//  w  ww.j av a 2  s .  c  o  m
    String url;
    String thema;
    int stop = strSeite.indexOf(MUSTER_STOPP);
    while (!Config.getStop() && (pos = strSeite.indexOf(MUSTER_START, pos)) != -1) {
        if (stop > 0 && pos > stop) {
            break;
        }
        pos += MUSTER_START.length();
        url = strSeite.extract(MUSTER_URL, "\"", pos);
        thema = strSeite.extract(MUSTER_THEMA, "\"", pos);
        thema = StringEscapeUtils.unescapeHtml4(thema.trim()); //wird gleich benutzt und muss dann schon stimmen
        if (thema.isEmpty()) {
            Log.errorLog(915263078, "kein Thema");
        }
        if (url.isEmpty()) {
            Log.errorLog(163255009, "keine URL");
        } else {
            //url = url.replace("&amp;", "&");
            String[] add = new String[] { "http://swrmediathek.de/tvshow.htm?show=" + url, thema };
            listeThemen.addUrl(add);
        }
    }
}

From source file:com.geemvc.taglib.html.MessageTagSupport.java

@Override
public void doTag() throws JspException {
    if (locale != null && (lang != null || country != null))
        throw new JspException(
                "You can only set one of of either 'locale' or a 'language/country' combination.");

    if (lang != null && country != null)
        locale = new Locale(lang, country);

    else if (lang != null)
        locale = new Locale(lang);

    String label = null;//  www .  j  a  v a2s  . c o m

    // Handle string keys normally.
    if (key instanceof String) {
        label = messageResolver.resolve((String) key, locale, requestContext(), true);
    } else if (key.getClass().isEnum()) {
        // Attempt to resolve <enun-fqn>.<enum-value>.
        label = messageResolver.resolve(
                new StringBuilder(key.getClass().getName()).append(Char.DOT).append(key).toString(),
                requestContext(), true);

        // Attempt to resolve <enun-simple-name>.<enum-value>.
        if (label == null)
            label = messageResolver.resolve(
                    new StringBuilder(key.getClass().getSimpleName()).append(Char.DOT).append(key).toString(),
                    requestContext(), true);
    } else if (key instanceof Boolean) {
        // Attempt to resolve Boolean.true or Boolean.false.
        label = messageResolver.resolve(new StringBuilder(Boolean.class.getSimpleName()).append(Char.DOT)
                .append(String.valueOf(key).toLowerCase()).toString(), requestContext(), true);
    } else {
        throw new JspException("The type '" + key.getClass().getName()
                + "' cannot be used as a message key in MessageTagSupport. Only the types String, Boolean or enums are supported.");
    }

    if (label != null) {
        if (escapeHTML)
            label = StringEscapeUtils.escapeHtml4(label);

        if (escapeJavascript)
            label = StringEscapeUtils.escapeEcmaScript(label);

        if (escapeJson)
            label = StringEscapeUtils.escapeJson(label);

        if (unescapeHTML)
            label = StringEscapeUtils.unescapeHtml4(label);

        if (unescapeJavascript)
            label = StringEscapeUtils.unescapeEcmaScript(label);

        if (unescapeJson)
            label = StringEscapeUtils.unescapeJson(label);
    }

    if (label == null) {
        label = getBodyContent();

        if (label == null)
            label = String.format("???%s???", key);
    }

    // Deal with parameters.
    if (label != null) {
        List<Object> params = messageParameters();

        if (params != null && !params.isEmpty())
            label = MessageFormat.format(label, params.toArray());
    }

    if (!Str.isEmpty(var)) {
        jspContext.setAttribute(var, label, scope());
    } else {
        try {
            jspContext.getOut().write(label);
        } catch (IOException e) {
            throw new JspException(e);
        }
    }
}

From source file:com.jomp16.google.Google.java

@Override
public void onGenericMessage(GenericMessageEvent event) throws Exception {
    ArrayList<String> args = new ArrayList<>();
    Matcher matcher = Pattern.compile("[^\\s\"']+|\"([^\"]*)\"|'([^']*)'").matcher(event.getMessage());
    while (matcher.find()) {
        if (matcher.group(1) != null) {
            // Add double-quoted string without the quotes
            args.add(matcher.group(1));/*from   w w  w. j  a  va 2 s. co m*/
        } else if (matcher.group(2) != null) {
            // Add single-quoted string without the quotes
            args.add(matcher.group(2));
        } else {
            // Add unquoted word
            args.add(matcher.group());
        }
    }
    if (args.get(0).toLowerCase().equals(prefix + "google")) {
        if (args.size() >= 2) {
            String url = String.format(GOOGLE, URLEncoder.encode(args.get(1), "UTF-8"));
            BufferedReader reader = new BufferedReader(
                    new InputStreamReader(Request.Get(url).execute().returnContent().asStream()));
            GoogleSearch search = new Gson().fromJson(reader, GoogleSearch.class);

            reader.close();
            if (!search.responseStatus.equals("200")) {
                event.respond(languageManager.getString("Error"));
                return;
            }
            if (search.responseData.results.size() <= 0) {
                event.respond(languageManager.getString("NoResultsFound"));
                return;
            }

            if (args.size() >= 3) {
                for (int i = 0; i < Integer.parseInt(args.get(2)); i++) {
                    String title = StringEscapeUtils
                            .unescapeHtml4(search.responseData.results.get(i).titleNoFormatting);
                    String url2 = URLDecoder.decode(search.responseData.results.get(i).unescapedUrl, "UTF-8");
                    event.respond(languageManager.getString("Result", (i + 1), title, url2));
                }
            } else {
                String title = StringEscapeUtils
                        .unescapeHtml4(search.responseData.results.get(0).titleNoFormatting);
                String url2 = URLDecoder.decode(search.responseData.results.get(0).unescapedUrl, "UTF-8");
                event.respond(languageManager.getString("Result", 1, title, url2));
            }
        } else {
            event.respond(languageManager.getString("CommandSyntax", prefix));
        }
        args.clear();
    }
}

From source file:de.blizzy.documentr.markdown.macro.impl.FlattrMacroTest.java

@Test
public void getHtml() {
    String html = macro.getHtml(macroContext);
    @SuppressWarnings("nls")
    String re = "^<a href=\"([^\"]+)\">"
            + "<img src=\"https://api\\.flattr\\.com/button/flattr-badge-large\\.png\"/>" + "</a>$"; //$NON-NLS-2$
    assertRE(re, html);/*from w  w  w .  j  a  v  a2 s. com*/

    Matcher matcher = Pattern.compile(re, Pattern.DOTALL).matcher(html);
    matcher.find();
    String url = StringEscapeUtils.unescapeHtml4(matcher.group(1));
    UriComponents components = UriComponentsBuilder.fromHttpUrl(url).build();
    assertEquals("https", components.getScheme()); //$NON-NLS-1$
    assertEquals("flattr.com", components.getHost()); //$NON-NLS-1$
    assertEquals(-1, components.getPort());
    assertEquals("/submit/auto", components.getPath()); //$NON-NLS-1$
    MultiValueMap<String, String> params = components.getQueryParams();
    assertEquals(FLATTR_USER_ID, params.getFirst("user_id")); //$NON-NLS-1$
    assertEquals(PAGE_URL, params.getFirst("url")); //$NON-NLS-1$
    assertEquals(PAGE_TITLE, params.getFirst("title")); //$NON-NLS-1$
    assertEquals("text", params.getFirst("category")); //$NON-NLS-1$ //$NON-NLS-2$
    assertTrue(params.getFirst("tags").equals(TAG_1 + "," + TAG_2) || //$NON-NLS-1$ //$NON-NLS-2$
            params.getFirst("tags").equals(TAG_2 + "," + TAG_1)); //$NON-NLS-1$ //$NON-NLS-2$
}

From source file:com.romeikat.datamessie.core.base.service.download.AbstractDownloader.java

protected InputStream asInputStream(final URLConnection urlConnection, final boolean stripNonValidXMLCharacters,
        final boolean unescapeHtml4) throws Exception {
    final InputStream urlInputStream = urlConnection.getInputStream();
    final Charset urlCharset = getCharset(urlConnection);
    final InputStreamReader urlInputStreamReader = new InputStreamReader(urlInputStream, urlCharset);
    final BufferedReader urlBufferedReader = new BufferedReader(urlInputStreamReader);
    // Read lines
    final StringBuilder sb = new StringBuilder();
    String line;/*from  w w w.  j  a  v  a  2  s. c  o  m*/
    while ((line = urlBufferedReader.readLine()) != null) {
        sb.append(line + "\n");
    }
    urlBufferedReader.close();
    // Strip non-valid characters as specified by the XML 1.0 standard
    String content = sb.toString();
    if (stripNonValidXMLCharacters) {
        content = xmlUtil.stripNonValidXMLCharacters(content);
    }
    // Unescape HTML characters
    if (unescapeHtml4) {
        content = StringEscapeUtils.unescapeHtml4(content);
    }
    // Return as stream
    return new ByteArrayInputStream(content.getBytes(urlCharset.name()));
}

From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java

/**
 * Extracts the text from an HTML page./*from   www  .  j av a  2  s .  c o  m*/
 * 
 * @param html
 * @return 
 */
public static String extractText(String html) {
    //return Jsoup.parse(text).text();
    html = replaceImgWithAlt(html);
    html = safeRemoveAllTags(html);

    html = StringEscapeUtils.unescapeHtml4(html);

    return html;
}

From source file:emily.command.fun.RedditCommand.java

@Override
public String execute(DiscordBot bot, String[] args, MessageChannel channel, User author,
        Message inputMessage) {/*  w  w  w . j  a  v a2 s .c  om*/
    String subReddit = "funny";
    if (args.length > 0) {
        subReddit = args[0];
    }
    List<Post> dailyTop = RedditScraper.getDailyTop(subReddit);
    if (dailyTop.size() == 0) {
        return Templates.command.reddit_sub_not_found.formatGuild(channel);
    }
    Random rng = new Random();
    Post post;
    do {
        int index = rng.nextInt(dailyTop.size());
        post = dailyTop.remove(index);
        if (post.data.is_self) {
            break;
        }
        if (whitelistedDomains.contains(post.data.domain)) {
            break;
        }
    } while (dailyTop.size() > 0);
    if (post.data.is_self) {
        return ":newspaper:\n" + post.data.getTitle() + "\n" + post.data.getSelftext();
    }
    if (post.data.url != null && post.data.url.length() > 20) {
        return post.data.title + "\n" + post.data.url;
    }
    ImagePreview preview = post.data.getPreview();
    if (preview != null && preview.images.size() > 0) {
        if (channel.getType().equals(ChannelType.TEXT) && !PermissionUtil.checkPermission((TextChannel) channel,
                ((TextChannel) channel).getGuild().getSelfMember(), Permission.MESSAGE_ATTACH_FILES)) {
            return Templates.permission_missing.formatGuild(channel, "MESSAGE_ATTACH_FILES");
        }
        for (Image image : preview.images) {
            try (InputStream in = new URL(StringEscapeUtils.unescapeHtml4(image.source.url)).openStream()) {
                File outputfile = new File("tmp_" + channel.getId() + ".jpg");
                ImageIO.write(ImageIO.read(in), "jpg", outputfile);
                bot.queue.add(
                        channel.sendFile(outputfile, new MessageBuilder().append(post.data.title).build()),
                        message -> outputfile.delete());
                return "";
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    return Templates.command.reddit_nothing.formatGuild(channel);
}

From source file:com.jaeksoft.searchlib.parser.htmlParser.HtmlDocumentProvider.java

final public String getTitle() {
    if (titleCache != null)
        return titleCache;
    if (rootNode == null)
        return null;
    String[] p1 = { "html", "head", "title" };
    String title = rootNode.getFirstTextNode(p1);
    if (title == null) {
        String[] p2 = { "html", "title" };
        title = rootNode.getFirstTextNode(p2);
    }//from w w w.j  a v  a  2  s  .  co  m
    if (title == null)
        return null;
    titleCache = StringEscapeUtils.unescapeHtml4(title);
    return titleCache;
}

From source file:com.nttec.everychan.chans.chan420.Chan420JsonMapper.java

public static PostModel mapPostModel(JSONObject object, String boardName) {
    PostModel model = new PostModel();
    model.number = Long.toString(object.getLong("no"));
    model.name = StringEscapeUtils
            .unescapeHtml4(object.optString("name", "Anonymous").replaceAll("</?span[^>]*?>", ""));
    model.subject = StringEscapeUtils.unescapeHtml4(object.optString("sub", ""));
    model.comment = object.optString("com", "");
    model.email = null;/*  w w  w .  j  a  v a2  s  .  co m*/
    model.trip = object.optString("trip", "");
    model.op = false;
    String id = object.optString("id", "");
    model.sage = id.equalsIgnoreCase("Heaven");
    if (!id.equals(""))
        model.name += (" ID:" + id);
    model.timestamp = object.getLong("time") * 1000;
    model.parentThread = object.optString("resto", "0");
    if (model.parentThread.equals("0"))
        model.parentThread = model.number;
    model.comment = toHtml(model.comment, boardName, model.parentThread);
    String ext = object.optString("ext", "");
    if (!ext.equals("")) {
        AttachmentModel attachment = new AttachmentModel();
        switch (ext) {
        case ".jpg":
        case ".png":
            attachment.type = AttachmentModel.TYPE_IMAGE_STATIC;
            break;
        case ".gif":
            attachment.type = AttachmentModel.TYPE_IMAGE_GIF;
            break;
        case ".svg":
        case ".svgz":
            attachment.type = AttachmentModel.TYPE_IMAGE_SVG;
            break;
        case ".webm":
            attachment.type = AttachmentModel.TYPE_VIDEO;
            break;
        default:
            attachment.type = AttachmentModel.TYPE_OTHER_FILE;
        }
        attachment.size = object.optInt("fsize", -1);
        if (attachment.size > 0)
            attachment.size = Math.round(attachment.size / 1024f);
        attachment.width = object.optInt("w", -1);
        attachment.height = object.optInt("h", -1);
        attachment.originalName = object.optString("filename", "") + ext;
        attachment.isSpoiler = object.optInt("spoiler") == 1;
        long tim = object.optLong("filename");
        if (tim != 0) {
            attachment.thumbnail = "/" + boardName + "/thumb/" + Long.toString(tim) + "s.jpg";
            attachment.path = "/" + boardName + "/src/" + Long.toString(tim) + ext;
        } else {
            String filename = attachment.originalName;
            try {
                filename = URLEncoder.encode(filename, "UTF-8").replace("+", "%20");
            } catch (Exception e) {
            }
            attachment.path = "/" + boardName + "/src/" + filename;
        }
        model.attachments = new AttachmentModel[] { attachment };
    }
    return model;
}

From source file:com.wellsandwhistles.android.redditsp.reddit.prepared.RedditPreparedMessage.java

@Override
public View getBody(final AppCompatActivity activity, final Integer textColor, final Float textSize,
        final boolean showLinkButtons) {

    final LinearLayout subjectLayout = new LinearLayout(activity);
    subjectLayout.setOrientation(LinearLayout.VERTICAL);

    final TextView subjectText = new TextView(activity);
    subjectText.setText(StringEscapeUtils.unescapeHtml4(src.subject != null ? src.subject : "(no subject)"));
    subjectText.setTextColor(textColor);
    subjectText.setTextSize(textSize);/*from   w ww .  j a v a  2 s  .  c om*/
    subjectText.setTypeface(null, Typeface.BOLD);

    subjectLayout.addView(subjectText);
    subjectLayout.addView(body.buildView(activity, textColor, textSize, showLinkButtons));

    return subjectLayout;
}