List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:com.jaeksoft.searchlib.parser.htmlParser.HtmlDocumentProvider.java
final public static String getMetaContent(final HtmlNodeAbstract<?> node) { String content = node.getAttributeText("content"); if (content == null) return null; return StringEscapeUtils.unescapeHtml4(content); }
From source file:com.wellsandwhistles.android.redditsp.image.ImageInfo.java
public static ImageInfo parseImgur(final JsonBufferedObject object) throws IOException, InterruptedException { final JsonBufferedObject image = object.getObject("image"); final JsonBufferedObject links = object.getObject("links"); String urlOriginal = null;// w w w . ja v a2 s .c o m String urlBigSquare = null; String title = null; String caption = null; String type = null; boolean isAnimated = false; Long width = null; Long height = null; Long size = null; if (image != null) { title = image.getString("title"); caption = image.getString("caption"); type = image.getString("type"); isAnimated = "true".equals(image.getString("animated")); width = image.getLong("width"); height = image.getLong("height"); size = image.getLong("size"); } if (links != null) { urlOriginal = links.getString("original"); if (urlOriginal != null && isAnimated) urlOriginal = urlOriginal.replace(".gif", ".mp4"); urlBigSquare = links.getString("big_square"); } if (title != null) { title = StringEscapeUtils.unescapeHtml4(title); } if (caption != null) { caption = StringEscapeUtils.unescapeHtml4(caption); } return new ImageInfo(urlOriginal, urlBigSquare, title, caption, type, isAnimated, width, height, size, isAnimated ? MediaType.VIDEO : MediaType.IMAGE); }
From source file:msearch.tool.MSFunktionen.java
public static void unescape(DatenFilm film) { film.arr[DatenFilm.FILM_THEMA_NR] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_THEMA_NR].trim()); film.arr[DatenFilm.FILM_THEMA_NR] = StringEscapeUtils .unescapeHtml4(film.arr[DatenFilm.FILM_THEMA_NR].trim()); // Beschreibung film.arr[DatenFilm.FILM_BESCHREIBUNG_NR] = StringEscapeUtils .unescapeXml(film.arr[DatenFilm.FILM_BESCHREIBUNG_NR].trim()); film.arr[DatenFilm.FILM_BESCHREIBUNG_NR] = StringEscapeUtils .unescapeHtml4(film.arr[DatenFilm.FILM_BESCHREIBUNG_NR].trim()); film.arr[DatenFilm.FILM_BESCHREIBUNG_NR] = removeHtml(film.arr[DatenFilm.FILM_BESCHREIBUNG_NR]); // Titel/* w w w . j a va2 s . c o m*/ film.arr[DatenFilm.FILM_TITEL_NR] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_TITEL_NR].trim()); film.arr[DatenFilm.FILM_TITEL_NR] = StringEscapeUtils .unescapeHtml4(film.arr[DatenFilm.FILM_TITEL_NR].trim()); }
From source file:com.glowinteractive.reforger.Item.java
public synchronized void parse() { if (!_parsed) { String[] pair;// ww w.j a va 2s. c o m String[] elements; String attribute; URL url = null; StringBuilder wowhead = new StringBuilder("http://www.wowhead.com/"); TagNode ref = null; //<editor-fold defaultstate="collapsed" desc="Parse name."> ref = _data.findElementByAttValue("class", "name-shadow", true, true); assert ref != null && ref.getText() != null : "Error: unable to determine item name."; _name = (ref != null) ? StringEscapeUtils.unescapeHtml4(ref.getText().toString()) : ""; //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Parse item ID."> ref = _data.findElementByName("a", false); assert ref != null : "Error: unable to determine item attributes."; attribute = ref.getAttributeByName("href"); elements = attribute.split("/wow/en/item/"); assert elements.length == 2 : "Error: unexpected Armory data format."; wowhead.append("item=").append(elements[1]); //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Extract data-item string."> ref = _data.findElementByName("a", false); assert ref != null : "Error: unable to determine item attributes."; attribute = ref.getAttributeByName("data-item"); elements = StringEscapeUtils.unescapeHtml4((attribute != null) ? attribute : "").split("&"); //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Parse Armory data-item attributes."> for (String e : elements) { pair = e.split("="); if ("e".equals(pair[0])) { // Permanent Enchantment wowhead.append("&ench=").append(pair[1]); } if ("re".equals(pair[0])) { // Reforge ID (not currently supported by Wowhead) wowhead.append("&rf=").append(pair[1]); } if ("es".equals(pair[0])) { // Additional Socket wowhead.append("&sock"); } if ("r".equals(pair[0])) { // Random Itemization wowhead.append("&rand=").append(pair[1]); } if ("set".equals(pair[0])) { // Set Pieces Equipped wowhead.append("&pcs=").append(pair[1].replace(',', ':')); } } //</editor-fold> //<editor-fold defaultstate="collapsed" desc="Parse Armory gem ID's."> TagNode[] gems = _data.getElementsByAttValue("class", "gem", true, true); final int GEM_COUNT = gems.length; if (GEM_COUNT != 0) { String suffix; wowhead.append("&gems="); for (int i = 0; i < GEM_COUNT; ++i) { suffix = gems[i].getAttributeByName("href").replace("/wow/en/item/", ""); wowhead.append(suffix); if (i + 1 < GEM_COUNT) { wowhead.append(":"); } } } //</editor-fold> wowhead.append("&power"); System.out.println(" " + _name); //<editor-fold defaultstate="collapsed" desc="Download and parse Wowhead JSON data."> try { url = new URL(wowhead.toString()); } catch (Exception e) { Logger.getLogger(Item.class.getSimpleName()).log(Level.SEVERE, null, e); } Pattern p = Pattern.compile(TOOLTIP_FORMAT); Matcher m = p.matcher(URLRetriever.fetchContents(url)); String itemPayload = (m.find()) ? m.group(1) : ""; HtmlCleaner parser = new HtmlCleaner(); CleanerTransformations transform = new CleanerTransformations(); TagTransformation strip = new TagTransformation("small"); transform.addTransformation(strip); parser.setTransformations(transform); TagNode root = parser.clean(itemPayload); //</editor-fold> root.traverse(this); System.out.println(); _parsed = true; } }
From source file:mSearch.tool.Functions.java
public static void unescape(DatenFilm film) { film.arr[DatenFilm.FILM_THEMA] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_THEMA].trim()); film.arr[DatenFilm.FILM_THEMA] = StringEscapeUtils.unescapeHtml4(film.arr[DatenFilm.FILM_THEMA].trim()); // Beschreibung film.arr[DatenFilm.FILM_BESCHREIBUNG] = StringEscapeUtils .unescapeXml(film.arr[DatenFilm.FILM_BESCHREIBUNG].trim()); film.arr[DatenFilm.FILM_BESCHREIBUNG] = StringEscapeUtils .unescapeHtml4(film.arr[DatenFilm.FILM_BESCHREIBUNG].trim()); film.arr[DatenFilm.FILM_BESCHREIBUNG] = removeHtml(film.arr[DatenFilm.FILM_BESCHREIBUNG]); // Titel//from w w w .ja v a 2s . c o m film.arr[DatenFilm.FILM_TITEL] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_TITEL].trim()); film.arr[DatenFilm.FILM_TITEL] = StringEscapeUtils.unescapeHtml4(film.arr[DatenFilm.FILM_TITEL].trim()); }
From source file:com.wellsandwhistles.android.redditsp.reddit.things.RedditComment.java
public HashSet<String> computeAllLinks() { return LinkHandler.computeAllLinks(StringEscapeUtils.unescapeHtml4(body_html)); }
From source file:com.green.modules.cms.service.ArticleService.java
@Transactional(readOnly = false) public void save(Article article) { if (article.getArticleData().getContent() != null) { article.getArticleData()//from ww w. jav a 2 s. c o m .setContent(StringEscapeUtils.unescapeHtml4(article.getArticleData().getContent())); } // ???? if (!SecurityUtils.getSubject().isPermitted("cms:article:audit")) { article.setDelFlag(Article.DEL_FLAG_AUDIT); } // ????? if (article.getCategory() != null && StringUtils.isNotBlank(article.getCategory().getId())) { Category category = categoryDao.get(article.getCategory().getId()); if (!Article.YES.equals(category.getIsAudit())) { article.setDelFlag(Article.DEL_FLAG_NORMAL); } } article.setUpdateBy(UserUtils.getUser()); article.setUpdateDate(new Date()); if (StringUtils.isNotBlank(article.getViewConfig())) { article.setViewConfig(StringEscapeUtils.unescapeHtml4(article.getViewConfig())); } articleDao.clear(); articleDao.save(article); }
From source file:com.hongqiang.shop.modules.cms.service.ArticleService.java
@Transactional(readOnly = false) public void save(Article article) { if (article.getArticleData().getContent() != null) { article.getArticleData()// w ww . ja v a 2 s. co m .setContent(StringEscapeUtils.unescapeHtml4(article.getArticleData().getContent())); } // ???? if (!SecurityUtils.getSubject().isPermitted("cms:article:audit")) { article.setDelFlag(Article.DEL_FLAG_AUDIT); } // ????? if (article.getCategory() != null && article.getCategory().getId() != null) { Category category = categoryDao.findOne(article.getCategory().getId()); if (!Article.YES.equals(category.getIsAudit())) { article.setDelFlag(Article.DEL_FLAG_NORMAL); } } article.setUpdateBy(UserUtils.getUser()); article.setUpdateDate(new Date()); articleDao.clear(); articleDao.save(article); }
From source file:cn.kk.exia.MangaDownloader.java
private static String analyzeAndDownload(final String line, final int num, final Logger log, final boolean first, final String targetDir) throws IOException { String pageTitle = Helper.substringBetweenNarrow(line, "<h1>", "</h1>"); if (Helper.isNotEmptyOrNull(pageTitle)) { if (Helper.isNotEmptyOrNull(MangaDownloader.mangaTitle)) { // System.out.println("??-title: " + MangaDownloader.mangaTitle); pageTitle = Helper.escapeFileName(StringEscapeUtils.unescapeHtml4(MangaDownloader.mangaTitle)); } else {//from w w w . j av a2 s .co m // System.out.println("??-h1: " + pageTitle); pageTitle = Helper.escapeFileName(StringEscapeUtils.unescapeHtml4(pageTitle)); } final File dir = new File(targetDir + File.separator + pageTitle); if (first) { log.log("??" + pageTitle); } dir.mkdirs(); MangaDownloader.nextUrl = Helper.substringBetweenNarrow(line, "<a href=\"", "-" + num + "\"><img"); if (Helper.isNotEmptyOrNull(MangaDownloader.nextUrl)) { MangaDownloader.nextUrl += "-" + num; final String img = Helper.substringBetweenNarrow(line.substring(line.indexOf("<iframe")), "<img src=\"", "\" style=\""); if (Helper.isNotEmptyOrNull(img)) { try { String imgName = img.substring(img.lastIndexOf('/')); if (imgName.indexOf('=') != -1) { imgName = imgName.substring(imgName.lastIndexOf('=') + 1); } imgName = Helper.escapeFileName(imgName); String ext = ".jpg"; final int idx = imgName.lastIndexOf('.'); if (idx != -1) { ext = imgName.substring(idx); } if (null != MangaDownloader.download(img, new File(dir, pageTitle + "_" + num + ext).getAbsolutePath(), false, log)) { log.log("?" + img); try { if (MangaDownloader.lineCounter++ >= 5) { MangaDownloader.lineCounter = 0; Thread.sleep((40 * MangaDownloader.sleepBase) + (int) (Math.random() * 12 * MangaDownloader.sleepBase)); // 12000 } else { Thread.sleep((8 * MangaDownloader.sleepBase) + (int) (Math.random() * 5 * MangaDownloader.sleepBase)); } } catch (final InterruptedException e) { e.printStackTrace(); log.err("" + e.toString()); } } else { log.log("" + img); try { Thread.sleep((5 * MangaDownloader.sleepBase) + (int) (Math.random() * 5 * MangaDownloader.sleepBase)); } catch (final InterruptedException e) { e.printStackTrace(); log.err("" + e.toString()); } } } catch (final IOException e) { // e.printStackTrace(); log.err("" + e.toString()); throw e; } return MangaDownloader.nextUrl; } } } return null; }
From source file:net.krautchan.data.KCThread.java
private void makeDigest(KCPosting posting) { if (null == posting) { return;/*from ww w. j a va 2 s .c o m*/ } digest = posting.getContent(); int len = digest.length(); if (len > 250) len = 250; digest = digest.substring(0, len); if (digest.charAt(digest.length() - 1) == '&') { digest = digest.substring(0, len - 1); } digest = digest.replaceAll("[\n\r\u0085\u2028\u2029]", " ").replaceAll(" +", " ").trim(); digest = StringEscapeUtils.unescapeHtml4(digest); digest = digest.replaceAll("<span class=\"spoiler\">.+?</span>", ""); digest = digest.replaceAll("\\<.*?\\>", " "); digest = digest.replaceAll("https?://.+? ", " "); digest = digest.replaceAll(" +", " "); len = digest.length(); if (len > 200) len = 200; digest = digest.substring(0, len); digest = digest.replaceAll("\\<.*", ""); int pos = digest.length() - 1; char c = digest.charAt(pos); while ((c != ' ') && (pos > 150)) { pos--; c = digest.charAt(pos); } digest = digest.trim(); Iterator<String> iter = posting.getFileUids().iterator(); while (iter.hasNext()) { digest += "\n " + posting.getFile(iter.next()); } }