Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input)

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:com.jaeksoft.searchlib.parser.htmlParser.HtmlDocumentProvider.java

final public static String getMetaContent(final HtmlNodeAbstract<?> node) {
    String content = node.getAttributeText("content");
    if (content == null)
        return null;
    return StringEscapeUtils.unescapeHtml4(content);
}

From source file:com.wellsandwhistles.android.redditsp.image.ImageInfo.java

public static ImageInfo parseImgur(final JsonBufferedObject object) throws IOException, InterruptedException {

    final JsonBufferedObject image = object.getObject("image");
    final JsonBufferedObject links = object.getObject("links");

    String urlOriginal = null;//  w  w  w  . ja v a2  s .c  o m
    String urlBigSquare = null;
    String title = null;
    String caption = null;
    String type = null;
    boolean isAnimated = false;
    Long width = null;
    Long height = null;
    Long size = null;

    if (image != null) {
        title = image.getString("title");
        caption = image.getString("caption");
        type = image.getString("type");
        isAnimated = "true".equals(image.getString("animated"));
        width = image.getLong("width");
        height = image.getLong("height");
        size = image.getLong("size");
    }

    if (links != null) {
        urlOriginal = links.getString("original");
        if (urlOriginal != null && isAnimated)
            urlOriginal = urlOriginal.replace(".gif", ".mp4");

        urlBigSquare = links.getString("big_square");
    }

    if (title != null) {
        title = StringEscapeUtils.unescapeHtml4(title);
    }

    if (caption != null) {
        caption = StringEscapeUtils.unescapeHtml4(caption);
    }

    return new ImageInfo(urlOriginal, urlBigSquare, title, caption, type, isAnimated, width, height, size,
            isAnimated ? MediaType.VIDEO : MediaType.IMAGE);
}

From source file:msearch.tool.MSFunktionen.java

public static void unescape(DatenFilm film) {
    film.arr[DatenFilm.FILM_THEMA_NR] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_THEMA_NR].trim());
    film.arr[DatenFilm.FILM_THEMA_NR] = StringEscapeUtils
            .unescapeHtml4(film.arr[DatenFilm.FILM_THEMA_NR].trim());

    // Beschreibung
    film.arr[DatenFilm.FILM_BESCHREIBUNG_NR] = StringEscapeUtils
            .unescapeXml(film.arr[DatenFilm.FILM_BESCHREIBUNG_NR].trim());
    film.arr[DatenFilm.FILM_BESCHREIBUNG_NR] = StringEscapeUtils
            .unescapeHtml4(film.arr[DatenFilm.FILM_BESCHREIBUNG_NR].trim());
    film.arr[DatenFilm.FILM_BESCHREIBUNG_NR] = removeHtml(film.arr[DatenFilm.FILM_BESCHREIBUNG_NR]);

    // Titel/*  w w w  .  j  a va2 s .  c  o m*/
    film.arr[DatenFilm.FILM_TITEL_NR] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_TITEL_NR].trim());
    film.arr[DatenFilm.FILM_TITEL_NR] = StringEscapeUtils
            .unescapeHtml4(film.arr[DatenFilm.FILM_TITEL_NR].trim());
}

From source file:com.glowinteractive.reforger.Item.java

public synchronized void parse() {
    if (!_parsed) {
        String[] pair;// ww  w.j a va 2s. c  o m
        String[] elements;
        String attribute;

        URL url = null;

        StringBuilder wowhead = new StringBuilder("http://www.wowhead.com/");
        TagNode ref = null;

        //<editor-fold defaultstate="collapsed" desc="Parse name.">
        ref = _data.findElementByAttValue("class", "name-shadow", true, true);
        assert ref != null && ref.getText() != null : "Error: unable to determine item name.";
        _name = (ref != null) ? StringEscapeUtils.unescapeHtml4(ref.getText().toString()) : "";
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="Parse item ID.">
        ref = _data.findElementByName("a", false);
        assert ref != null : "Error: unable to determine item attributes.";
        attribute = ref.getAttributeByName("href");
        elements = attribute.split("/wow/en/item/");
        assert elements.length == 2 : "Error: unexpected Armory data format.";
        wowhead.append("item=").append(elements[1]);
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="Extract data-item string.">
        ref = _data.findElementByName("a", false);
        assert ref != null : "Error: unable to determine item attributes.";
        attribute = ref.getAttributeByName("data-item");
        elements = StringEscapeUtils.unescapeHtml4((attribute != null) ? attribute : "").split("&");
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="Parse Armory data-item attributes.">
        for (String e : elements) {
            pair = e.split("=");

            if ("e".equals(pair[0])) {
                // Permanent Enchantment
                wowhead.append("&ench=").append(pair[1]);
            }

            if ("re".equals(pair[0])) {
                // Reforge ID (not currently supported by Wowhead)
                wowhead.append("&rf=").append(pair[1]);
            }

            if ("es".equals(pair[0])) {
                // Additional Socket
                wowhead.append("&sock");
            }

            if ("r".equals(pair[0])) {
                // Random Itemization
                wowhead.append("&rand=").append(pair[1]);
            }

            if ("set".equals(pair[0])) {
                // Set Pieces Equipped
                wowhead.append("&pcs=").append(pair[1].replace(',', ':'));
            }
        }
        //</editor-fold>

        //<editor-fold defaultstate="collapsed" desc="Parse Armory gem ID's.">
        TagNode[] gems = _data.getElementsByAttValue("class", "gem", true, true);
        final int GEM_COUNT = gems.length;

        if (GEM_COUNT != 0) {
            String suffix;

            wowhead.append("&gems=");

            for (int i = 0; i < GEM_COUNT; ++i) {
                suffix = gems[i].getAttributeByName("href").replace("/wow/en/item/", "");
                wowhead.append(suffix);
                if (i + 1 < GEM_COUNT) {
                    wowhead.append(":");
                }
            }
        }
        //</editor-fold>

        wowhead.append("&power");

        System.out.println("  " + _name);

        //<editor-fold defaultstate="collapsed" desc="Download and parse Wowhead JSON data.">
        try {
            url = new URL(wowhead.toString());
        } catch (Exception e) {
            Logger.getLogger(Item.class.getSimpleName()).log(Level.SEVERE, null, e);
        }

        Pattern p = Pattern.compile(TOOLTIP_FORMAT);
        Matcher m = p.matcher(URLRetriever.fetchContents(url));
        String itemPayload = (m.find()) ? m.group(1) : "";

        HtmlCleaner parser = new HtmlCleaner();
        CleanerTransformations transform = new CleanerTransformations();
        TagTransformation strip = new TagTransformation("small");
        transform.addTransformation(strip);
        parser.setTransformations(transform);

        TagNode root = parser.clean(itemPayload);
        //</editor-fold>

        root.traverse(this);

        System.out.println();

        _parsed = true;
    }
}

From source file:mSearch.tool.Functions.java

public static void unescape(DatenFilm film) {
    film.arr[DatenFilm.FILM_THEMA] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_THEMA].trim());
    film.arr[DatenFilm.FILM_THEMA] = StringEscapeUtils.unescapeHtml4(film.arr[DatenFilm.FILM_THEMA].trim());

    // Beschreibung
    film.arr[DatenFilm.FILM_BESCHREIBUNG] = StringEscapeUtils
            .unescapeXml(film.arr[DatenFilm.FILM_BESCHREIBUNG].trim());
    film.arr[DatenFilm.FILM_BESCHREIBUNG] = StringEscapeUtils
            .unescapeHtml4(film.arr[DatenFilm.FILM_BESCHREIBUNG].trim());
    film.arr[DatenFilm.FILM_BESCHREIBUNG] = removeHtml(film.arr[DatenFilm.FILM_BESCHREIBUNG]);

    // Titel//from  w  w  w  .ja v  a 2s . c  o m
    film.arr[DatenFilm.FILM_TITEL] = StringEscapeUtils.unescapeXml(film.arr[DatenFilm.FILM_TITEL].trim());
    film.arr[DatenFilm.FILM_TITEL] = StringEscapeUtils.unescapeHtml4(film.arr[DatenFilm.FILM_TITEL].trim());
}

From source file:com.wellsandwhistles.android.redditsp.reddit.things.RedditComment.java

public HashSet<String> computeAllLinks() {
    return LinkHandler.computeAllLinks(StringEscapeUtils.unescapeHtml4(body_html));
}

From source file:com.green.modules.cms.service.ArticleService.java

@Transactional(readOnly = false)
public void save(Article article) {
    if (article.getArticleData().getContent() != null) {
        article.getArticleData()//from ww w. jav a 2 s. c o m
                .setContent(StringEscapeUtils.unescapeHtml4(article.getArticleData().getContent()));
    }
    // ????
    if (!SecurityUtils.getSubject().isPermitted("cms:article:audit")) {
        article.setDelFlag(Article.DEL_FLAG_AUDIT);
    }
    // ?????
    if (article.getCategory() != null && StringUtils.isNotBlank(article.getCategory().getId())) {
        Category category = categoryDao.get(article.getCategory().getId());
        if (!Article.YES.equals(category.getIsAudit())) {
            article.setDelFlag(Article.DEL_FLAG_NORMAL);
        }
    }
    article.setUpdateBy(UserUtils.getUser());
    article.setUpdateDate(new Date());
    if (StringUtils.isNotBlank(article.getViewConfig())) {
        article.setViewConfig(StringEscapeUtils.unescapeHtml4(article.getViewConfig()));
    }
    articleDao.clear();
    articleDao.save(article);
}

From source file:com.hongqiang.shop.modules.cms.service.ArticleService.java

@Transactional(readOnly = false)
public void save(Article article) {
    if (article.getArticleData().getContent() != null) {
        article.getArticleData()// w  ww . ja v  a  2 s.  co m
                .setContent(StringEscapeUtils.unescapeHtml4(article.getArticleData().getContent()));
    }
    // ????
    if (!SecurityUtils.getSubject().isPermitted("cms:article:audit")) {
        article.setDelFlag(Article.DEL_FLAG_AUDIT);
    }
    // ?????
    if (article.getCategory() != null && article.getCategory().getId() != null) {
        Category category = categoryDao.findOne(article.getCategory().getId());
        if (!Article.YES.equals(category.getIsAudit())) {
            article.setDelFlag(Article.DEL_FLAG_NORMAL);
        }
    }
    article.setUpdateBy(UserUtils.getUser());
    article.setUpdateDate(new Date());
    articleDao.clear();
    articleDao.save(article);
}

From source file:cn.kk.exia.MangaDownloader.java

private static String analyzeAndDownload(final String line, final int num, final Logger log,
        final boolean first, final String targetDir) throws IOException {
    String pageTitle = Helper.substringBetweenNarrow(line, "<h1>", "</h1>");
    if (Helper.isNotEmptyOrNull(pageTitle)) {
        if (Helper.isNotEmptyOrNull(MangaDownloader.mangaTitle)) {
            // System.out.println("??-title: " + MangaDownloader.mangaTitle);
            pageTitle = Helper.escapeFileName(StringEscapeUtils.unescapeHtml4(MangaDownloader.mangaTitle));
        } else {//from w  w w  .  j  av  a2  s .co  m
            // System.out.println("??-h1: " + pageTitle);
            pageTitle = Helper.escapeFileName(StringEscapeUtils.unescapeHtml4(pageTitle));
        }
        final File dir = new File(targetDir + File.separator + pageTitle);
        if (first) {
            log.log("??" + pageTitle);
        }
        dir.mkdirs();
        MangaDownloader.nextUrl = Helper.substringBetweenNarrow(line, "<a href=\"", "-" + num + "\"><img");
        if (Helper.isNotEmptyOrNull(MangaDownloader.nextUrl)) {
            MangaDownloader.nextUrl += "-" + num;
            final String img = Helper.substringBetweenNarrow(line.substring(line.indexOf("<iframe")),
                    "<img src=\"", "\" style=\"");
            if (Helper.isNotEmptyOrNull(img)) {
                try {
                    String imgName = img.substring(img.lastIndexOf('/'));
                    if (imgName.indexOf('=') != -1) {
                        imgName = imgName.substring(imgName.lastIndexOf('=') + 1);
                    }
                    imgName = Helper.escapeFileName(imgName);
                    String ext = ".jpg";
                    final int idx = imgName.lastIndexOf('.');
                    if (idx != -1) {
                        ext = imgName.substring(idx);
                    }
                    if (null != MangaDownloader.download(img,
                            new File(dir, pageTitle + "_" + num + ext).getAbsolutePath(), false, log)) {
                        log.log("?" + img);
                        try {
                            if (MangaDownloader.lineCounter++ >= 5) {
                                MangaDownloader.lineCounter = 0;
                                Thread.sleep((40 * MangaDownloader.sleepBase)
                                        + (int) (Math.random() * 12 * MangaDownloader.sleepBase)); // 12000
                            } else {
                                Thread.sleep((8 * MangaDownloader.sleepBase)
                                        + (int) (Math.random() * 5 * MangaDownloader.sleepBase));
                            }
                        } catch (final InterruptedException e) {
                            e.printStackTrace();
                            log.err("" + e.toString());
                        }
                    } else {
                        log.log("" + img);
                        try {
                            Thread.sleep((5 * MangaDownloader.sleepBase)
                                    + (int) (Math.random() * 5 * MangaDownloader.sleepBase));
                        } catch (final InterruptedException e) {
                            e.printStackTrace();
                            log.err("" + e.toString());
                        }
                    }
                } catch (final IOException e) {
                    // e.printStackTrace();
                    log.err("" + e.toString());
                    throw e;
                }
                return MangaDownloader.nextUrl;
            }
        }
    }
    return null;
}

From source file:net.krautchan.data.KCThread.java

private void makeDigest(KCPosting posting) {
    if (null == posting) {
        return;/*from  ww  w. j  a  va 2 s  .c o m*/
    }
    digest = posting.getContent();
    int len = digest.length();
    if (len > 250)
        len = 250;
    digest = digest.substring(0, len);
    if (digest.charAt(digest.length() - 1) == '&') {
        digest = digest.substring(0, len - 1);
    }
    digest = digest.replaceAll("[\n\r\u0085\u2028\u2029]", " ").replaceAll(" +", " ").trim();
    digest = StringEscapeUtils.unescapeHtml4(digest);
    digest = digest.replaceAll("<span class=\"spoiler\">.+?</span>", "");
    digest = digest.replaceAll("\\<.*?\\>", " ");
    digest = digest.replaceAll("https?://.+? ", " ");
    digest = digest.replaceAll(" +", " ");
    len = digest.length();
    if (len > 200)
        len = 200;
    digest = digest.substring(0, len);
    digest = digest.replaceAll("\\<.*", "");
    int pos = digest.length() - 1;
    char c = digest.charAt(pos);
    while ((c != ' ') && (pos > 150)) {
        pos--;
        c = digest.charAt(pos);
    }
    digest = digest.trim();
    Iterator<String> iter = posting.getFileUids().iterator();
    while (iter.hasNext()) {
        digest += "\n   " + posting.getFile(iter.next());

    }
}