List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:mServer.crawler.sender.MediathekSwr.java
private void addToList__() { //Theman suchen final String MUSTER_START = "<div class=\"mediaCon\">"; final String MUSTER_STOPP = "<h2 class=\"rasterHeadline\">OFT GESUCHT</h2>"; final String MUSTER_URL = "<a href=\"tvshow.htm?show="; final String MUSTER_THEMA = "title=\""; MSStringBuilder strSeite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER); GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden()); strSeite = getUrlIo.getUriWithDelay(SENDERNAME, "http://swrmediathek.de/tvlist.htm", StandardCharsets.UTF_8, 2, strSeite, "", 4, TimeUnit.SECONDS); int pos = 0;// w ww.j av a 2 s . c o m String url; String thema; int stop = strSeite.indexOf(MUSTER_STOPP); while (!Config.getStop() && (pos = strSeite.indexOf(MUSTER_START, pos)) != -1) { if (stop > 0 && pos > stop) { break; } pos += MUSTER_START.length(); url = strSeite.extract(MUSTER_URL, "\"", pos); thema = strSeite.extract(MUSTER_THEMA, "\"", pos); thema = StringEscapeUtils.unescapeHtml4(thema.trim()); //wird gleich benutzt und muss dann schon stimmen if (thema.isEmpty()) { Log.errorLog(915263078, "kein Thema"); } if (url.isEmpty()) { Log.errorLog(163255009, "keine URL"); } else { //url = url.replace("&", "&"); String[] add = new String[] { "http://swrmediathek.de/tvshow.htm?show=" + url, thema }; listeThemen.addUrl(add); } } }
From source file:com.geemvc.taglib.html.MessageTagSupport.java
@Override public void doTag() throws JspException { if (locale != null && (lang != null || country != null)) throw new JspException( "You can only set one of of either 'locale' or a 'language/country' combination."); if (lang != null && country != null) locale = new Locale(lang, country); else if (lang != null) locale = new Locale(lang); String label = null;// www . j a v a2s . c o m // Handle string keys normally. if (key instanceof String) { label = messageResolver.resolve((String) key, locale, requestContext(), true); } else if (key.getClass().isEnum()) { // Attempt to resolve <enun-fqn>.<enum-value>. label = messageResolver.resolve( new StringBuilder(key.getClass().getName()).append(Char.DOT).append(key).toString(), requestContext(), true); // Attempt to resolve <enun-simple-name>.<enum-value>. if (label == null) label = messageResolver.resolve( new StringBuilder(key.getClass().getSimpleName()).append(Char.DOT).append(key).toString(), requestContext(), true); } else if (key instanceof Boolean) { // Attempt to resolve Boolean.true or Boolean.false. label = messageResolver.resolve(new StringBuilder(Boolean.class.getSimpleName()).append(Char.DOT) .append(String.valueOf(key).toLowerCase()).toString(), requestContext(), true); } else { throw new JspException("The type '" + key.getClass().getName() + "' cannot be used as a message key in MessageTagSupport. Only the types String, Boolean or enums are supported."); } if (label != null) { if (escapeHTML) label = StringEscapeUtils.escapeHtml4(label); if (escapeJavascript) label = StringEscapeUtils.escapeEcmaScript(label); if (escapeJson) label = StringEscapeUtils.escapeJson(label); if (unescapeHTML) label = StringEscapeUtils.unescapeHtml4(label); if (unescapeJavascript) label = StringEscapeUtils.unescapeEcmaScript(label); if (unescapeJson) label = StringEscapeUtils.unescapeJson(label); } if (label == null) { label = getBodyContent(); if (label == null) label = String.format("???%s???", key); } // Deal with parameters. if (label != null) { List<Object> params = messageParameters(); if (params != null && !params.isEmpty()) label = MessageFormat.format(label, params.toArray()); } if (!Str.isEmpty(var)) { jspContext.setAttribute(var, label, scope()); } else { try { jspContext.getOut().write(label); } catch (IOException e) { throw new JspException(e); } } }
From source file:com.jomp16.google.Google.java
@Override public void onGenericMessage(GenericMessageEvent event) throws Exception { ArrayList<String> args = new ArrayList<>(); Matcher matcher = Pattern.compile("[^\\s\"']+|\"([^\"]*)\"|'([^']*)'").matcher(event.getMessage()); while (matcher.find()) { if (matcher.group(1) != null) { // Add double-quoted string without the quotes args.add(matcher.group(1));/*from w w w. j a va 2 s. co m*/ } else if (matcher.group(2) != null) { // Add single-quoted string without the quotes args.add(matcher.group(2)); } else { // Add unquoted word args.add(matcher.group()); } } if (args.get(0).toLowerCase().equals(prefix + "google")) { if (args.size() >= 2) { String url = String.format(GOOGLE, URLEncoder.encode(args.get(1), "UTF-8")); BufferedReader reader = new BufferedReader( new InputStreamReader(Request.Get(url).execute().returnContent().asStream())); GoogleSearch search = new Gson().fromJson(reader, GoogleSearch.class); reader.close(); if (!search.responseStatus.equals("200")) { event.respond(languageManager.getString("Error")); return; } if (search.responseData.results.size() <= 0) { event.respond(languageManager.getString("NoResultsFound")); return; } if (args.size() >= 3) { for (int i = 0; i < Integer.parseInt(args.get(2)); i++) { String title = StringEscapeUtils .unescapeHtml4(search.responseData.results.get(i).titleNoFormatting); String url2 = URLDecoder.decode(search.responseData.results.get(i).unescapedUrl, "UTF-8"); event.respond(languageManager.getString("Result", (i + 1), title, url2)); } } else { String title = StringEscapeUtils .unescapeHtml4(search.responseData.results.get(0).titleNoFormatting); String url2 = URLDecoder.decode(search.responseData.results.get(0).unescapedUrl, "UTF-8"); event.respond(languageManager.getString("Result", 1, title, url2)); } } else { event.respond(languageManager.getString("CommandSyntax", prefix)); } args.clear(); } }
From source file:de.blizzy.documentr.markdown.macro.impl.FlattrMacroTest.java
@Test public void getHtml() { String html = macro.getHtml(macroContext); @SuppressWarnings("nls") String re = "^<a href=\"([^\"]+)\">" + "<img src=\"https://api\\.flattr\\.com/button/flattr-badge-large\\.png\"/>" + "</a>$"; //$NON-NLS-2$ assertRE(re, html);/*from w w w . j a v a2 s. com*/ Matcher matcher = Pattern.compile(re, Pattern.DOTALL).matcher(html); matcher.find(); String url = StringEscapeUtils.unescapeHtml4(matcher.group(1)); UriComponents components = UriComponentsBuilder.fromHttpUrl(url).build(); assertEquals("https", components.getScheme()); //$NON-NLS-1$ assertEquals("flattr.com", components.getHost()); //$NON-NLS-1$ assertEquals(-1, components.getPort()); assertEquals("/submit/auto", components.getPath()); //$NON-NLS-1$ MultiValueMap<String, String> params = components.getQueryParams(); assertEquals(FLATTR_USER_ID, params.getFirst("user_id")); //$NON-NLS-1$ assertEquals(PAGE_URL, params.getFirst("url")); //$NON-NLS-1$ assertEquals(PAGE_TITLE, params.getFirst("title")); //$NON-NLS-1$ assertEquals("text", params.getFirst("category")); //$NON-NLS-1$ //$NON-NLS-2$ assertTrue(params.getFirst("tags").equals(TAG_1 + "," + TAG_2) || //$NON-NLS-1$ //$NON-NLS-2$ params.getFirst("tags").equals(TAG_2 + "," + TAG_1)); //$NON-NLS-1$ //$NON-NLS-2$ }
From source file:com.romeikat.datamessie.core.base.service.download.AbstractDownloader.java
protected InputStream asInputStream(final URLConnection urlConnection, final boolean stripNonValidXMLCharacters, final boolean unescapeHtml4) throws Exception { final InputStream urlInputStream = urlConnection.getInputStream(); final Charset urlCharset = getCharset(urlConnection); final InputStreamReader urlInputStreamReader = new InputStreamReader(urlInputStream, urlCharset); final BufferedReader urlBufferedReader = new BufferedReader(urlInputStreamReader); // Read lines final StringBuilder sb = new StringBuilder(); String line;/*from w w w. j a v a 2 s. c o m*/ while ((line = urlBufferedReader.readLine()) != null) { sb.append(line + "\n"); } urlBufferedReader.close(); // Strip non-valid characters as specified by the XML 1.0 standard String content = sb.toString(); if (stripNonValidXMLCharacters) { content = xmlUtil.stripNonValidXMLCharacters(content); } // Unescape HTML characters if (unescapeHtml4) { content = StringEscapeUtils.unescapeHtml4(content); } // Return as stream return new ByteArrayInputStream(content.getBytes(urlCharset.name())); }
From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java
/** * Extracts the text from an HTML page./*from www . j av a 2 s . c o m*/ * * @param html * @return */ public static String extractText(String html) { //return Jsoup.parse(text).text(); html = replaceImgWithAlt(html); html = safeRemoveAllTags(html); html = StringEscapeUtils.unescapeHtml4(html); return html; }
From source file:emily.command.fun.RedditCommand.java
@Override public String execute(DiscordBot bot, String[] args, MessageChannel channel, User author, Message inputMessage) {/* w w w . j a v a2 s .c om*/ String subReddit = "funny"; if (args.length > 0) { subReddit = args[0]; } List<Post> dailyTop = RedditScraper.getDailyTop(subReddit); if (dailyTop.size() == 0) { return Templates.command.reddit_sub_not_found.formatGuild(channel); } Random rng = new Random(); Post post; do { int index = rng.nextInt(dailyTop.size()); post = dailyTop.remove(index); if (post.data.is_self) { break; } if (whitelistedDomains.contains(post.data.domain)) { break; } } while (dailyTop.size() > 0); if (post.data.is_self) { return ":newspaper:\n" + post.data.getTitle() + "\n" + post.data.getSelftext(); } if (post.data.url != null && post.data.url.length() > 20) { return post.data.title + "\n" + post.data.url; } ImagePreview preview = post.data.getPreview(); if (preview != null && preview.images.size() > 0) { if (channel.getType().equals(ChannelType.TEXT) && !PermissionUtil.checkPermission((TextChannel) channel, ((TextChannel) channel).getGuild().getSelfMember(), Permission.MESSAGE_ATTACH_FILES)) { return Templates.permission_missing.formatGuild(channel, "MESSAGE_ATTACH_FILES"); } for (Image image : preview.images) { try (InputStream in = new URL(StringEscapeUtils.unescapeHtml4(image.source.url)).openStream()) { File outputfile = new File("tmp_" + channel.getId() + ".jpg"); ImageIO.write(ImageIO.read(in), "jpg", outputfile); bot.queue.add( channel.sendFile(outputfile, new MessageBuilder().append(post.data.title).build()), message -> outputfile.delete()); return ""; } catch (IOException e) { e.printStackTrace(); } } } return Templates.command.reddit_nothing.formatGuild(channel); }
From source file:com.jaeksoft.searchlib.parser.htmlParser.HtmlDocumentProvider.java
final public String getTitle() { if (titleCache != null) return titleCache; if (rootNode == null) return null; String[] p1 = { "html", "head", "title" }; String title = rootNode.getFirstTextNode(p1); if (title == null) { String[] p2 = { "html", "title" }; title = rootNode.getFirstTextNode(p2); }//from w w w.j a v a 2 s . co m if (title == null) return null; titleCache = StringEscapeUtils.unescapeHtml4(title); return titleCache; }
From source file:com.nttec.everychan.chans.chan420.Chan420JsonMapper.java
public static PostModel mapPostModel(JSONObject object, String boardName) { PostModel model = new PostModel(); model.number = Long.toString(object.getLong("no")); model.name = StringEscapeUtils .unescapeHtml4(object.optString("name", "Anonymous").replaceAll("</?span[^>]*?>", "")); model.subject = StringEscapeUtils.unescapeHtml4(object.optString("sub", "")); model.comment = object.optString("com", ""); model.email = null;/* w w w . j a v a2 s . co m*/ model.trip = object.optString("trip", ""); model.op = false; String id = object.optString("id", ""); model.sage = id.equalsIgnoreCase("Heaven"); if (!id.equals("")) model.name += (" ID:" + id); model.timestamp = object.getLong("time") * 1000; model.parentThread = object.optString("resto", "0"); if (model.parentThread.equals("0")) model.parentThread = model.number; model.comment = toHtml(model.comment, boardName, model.parentThread); String ext = object.optString("ext", ""); if (!ext.equals("")) { AttachmentModel attachment = new AttachmentModel(); switch (ext) { case ".jpg": case ".png": attachment.type = AttachmentModel.TYPE_IMAGE_STATIC; break; case ".gif": attachment.type = AttachmentModel.TYPE_IMAGE_GIF; break; case ".svg": case ".svgz": attachment.type = AttachmentModel.TYPE_IMAGE_SVG; break; case ".webm": attachment.type = AttachmentModel.TYPE_VIDEO; break; default: attachment.type = AttachmentModel.TYPE_OTHER_FILE; } attachment.size = object.optInt("fsize", -1); if (attachment.size > 0) attachment.size = Math.round(attachment.size / 1024f); attachment.width = object.optInt("w", -1); attachment.height = object.optInt("h", -1); attachment.originalName = object.optString("filename", "") + ext; attachment.isSpoiler = object.optInt("spoiler") == 1; long tim = object.optLong("filename"); if (tim != 0) { attachment.thumbnail = "/" + boardName + "/thumb/" + Long.toString(tim) + "s.jpg"; attachment.path = "/" + boardName + "/src/" + Long.toString(tim) + ext; } else { String filename = attachment.originalName; try { filename = URLEncoder.encode(filename, "UTF-8").replace("+", "%20"); } catch (Exception e) { } attachment.path = "/" + boardName + "/src/" + filename; } model.attachments = new AttachmentModel[] { attachment }; } return model; }
From source file:com.wellsandwhistles.android.redditsp.reddit.prepared.RedditPreparedMessage.java
@Override public View getBody(final AppCompatActivity activity, final Integer textColor, final Float textSize, final boolean showLinkButtons) { final LinearLayout subjectLayout = new LinearLayout(activity); subjectLayout.setOrientation(LinearLayout.VERTICAL); final TextView subjectText = new TextView(activity); subjectText.setText(StringEscapeUtils.unescapeHtml4(src.subject != null ? src.subject : "(no subject)")); subjectText.setTextColor(textColor); subjectText.setTextSize(textSize);/*from w ww . j a v a 2 s . c om*/ subjectText.setTypeface(null, Typeface.BOLD); subjectLayout.addView(subjectText); subjectLayout.addView(body.buildView(activity, textColor, textSize, showLinkButtons)); return subjectLayout; }