Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input)

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:org.kawanfw.commons.util.HtmlConverter.java

/**
 * //from   w  ww . j av a 2  s  .co  m
 * Converts special HTML values of characters to their original values. <br>
 * Example : <code>"&amp;eacute;"</code>"is converted to ""
 * <p>
 * 
 * @param string
 *            A String to convert from HTML to original
 *            <p>
 * @return A String of char converted to original values
 * 
 */

public static String fromHtml(String string) {

    if (DO_NOTHING)
        return string;

    if (string == null)
        return string;

    if (string.contains("&")) {
        return StringEscapeUtils.unescapeHtml4(string);
    } else {
        return string;
    }

}

From source file:org.kie.workbench.common.stunner.bpmn.backend.BaseDirectDiagramMarshaller.java

private String renderToString(Bpmn2Resource resource) throws IOException {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    try {//from   w  ww  . j ava  2s  .  co  m
        resource.save(outputStream, new HashMap<>());
    } finally {
        outputStream.close();
    }
    return StringEscapeUtils.unescapeHtml4(outputStream.toString("UTF-8"));
}

From source file:org.kie.workbench.common.stunner.bpmn.backend.BPMNDirectDiagramMarshaller.java

private String renderToString(Bpmn2Resource resource) throws IOException {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    resource.save(outputStream, new HashMap<>());
    return StringEscapeUtils.unescapeHtml4(outputStream.toString("UTF-8"));
}

From source file:org.kie.workbench.common.stunner.bpmn.backend.marshall.json.Bpmn2Marshaller.java

public String marshall(final Diagram<Graph, Metadata> diagram) throws IOException {
    JBPMBpmn2ResourceImpl res = marshallToBpmn2Resource(diagram);
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    res.save(outputStream, new HashMap<>());
    return StringEscapeUtils.unescapeHtml4(outputStream.toString("UTF-8"));
}

From source file:org.kuali.test.utils.Utils.java

/**
 *
 * @param node//from  www .j  av  a2  s  .  co  m
 * @return
 */
public static String cleanDisplayText(Node node) {
    StringBuilder buf = new StringBuilder(128);
    getCleanedText(node, buf);
    return Utils.trimString(StringEscapeUtils.unescapeHtml4(buf.toString()));
}

From source file:org.languagetool.dev.blogs.BlogChecker.java

private String cleanup(String content) {
    String result = content.replaceAll("\\s+", " ").replaceAll("<div.*?>", "").replaceAll("</div>", "\n\n")
            .replaceAll("</h[1-6]>", "\n\n").replaceAll("<li>", "\n").replaceAll("<p.*?>", "")
            .replaceAll("</p>", "\n\n").replaceAll("<a.*?>", "").replaceAll("</a>", "")
            .replaceAll("<br\\s*/>", "").replaceAll("<br>", "").replaceAll("<.*?>", "");
    return StringEscapeUtils.unescapeHtml4(result).replace("", " "); // nbsp
}

From source file:org.languagetool.dev.wikipedia.TextConverter.java

public void visit(XmlEntityRef er) {
    addMapping(er);//from ww w.j  a va 2 s .  c  om
    if ("nbsp".equals(er.getName())) {
        write('\u00A0'); // non-breaking space
    } else {
        String ch = StringEscapeUtils.unescapeHtml4("&" + er.getName() + ";");
        write(ch);
    }
}

From source file:org.lockss.daemon.OpenUrlResolver.java

/**
 * Get the URL for the specified key from the plugin.
 * @param plugin the plugin/*from   w  w  w.  jav  a  2s .com*/
 * @param pluginKeys the plugin keys
 * @param paramMap the param map
 * @return the URL for the specified key
 */
private OpenUrlInfo getPluginUrl(Plugin plugin, FeatureEntry[] pluginEntries, TypedEntryMap paramMap) {
    ExternalizableMap map;

    // get printf pattern for pluginKey property
    try {
        Method method = plugin.getClass().getMethod("getDefinitionMap", (new Class[0]));
        Object obj = method.invoke(plugin);
        if (!(obj instanceof ExternalizableMap)) {
            return noOpenUrlInfo;
        }
        map = (ExternalizableMap) obj;
    } catch (Exception ex) {
        log.error("getDefinitionMap", ex);
        return noOpenUrlInfo;
    }

    String proxySpec = null;
    try {
        proxySpec = paramMap.getString(ConfigParamDescr.CRAWL_PROXY.getKey());
    } catch (NoSuchElementException ex) {
        // no crawl_proxy param specified
    }

    for (FeatureEntry pluginEntry : pluginEntries) {
        // locate object value for plugin key path
        String pluginKey = pluginEntry.auFeatureKey;
        String[] pluginKeyPath = pluginKey.split("/");
        Object obj = map.getMapElement(pluginKeyPath[0]);
        for (int i = 1; (i < pluginKeyPath.length); i++) {
            if (obj instanceof Map) {
                obj = ((Map<String, ?>) obj).get(pluginKeyPath[i]);
            } else {
                // all path elements except last one must be a map;
                obj = null;
                break;
            }
        }

        if (obj instanceof Map) {
            // match TDB AU_FEATURE_KEY value to key in map 
            String auFeatureKey = "*"; // default entry
            try {
                auFeatureKey = paramMap.getString(AU_FEATURE_KEY);
            } catch (NoSuchElementException ex) {
            }

            // entry may have multiple keys; '*' is the default entry
            Object val = null;
            for (Map.Entry<String, ?> entry : ((Map<String, ?>) obj).entrySet()) {
                String key = entry.getKey();
                if (key.equals(auFeatureKey) || key.startsWith(auFeatureKey + ";")
                        || key.endsWith(";" + auFeatureKey) || (key.indexOf(";" + auFeatureKey + ";") >= 0)) {
                    val = entry.getValue();
                    break;
                }
            }
            obj = val;
            pluginKey += "/" + auFeatureKey;
        }

        if (obj == null) {
            log.debug("unknown plugin key: " + pluginKey);
            continue;
        }

        Collection<String> printfStrings = null;
        if (obj instanceof String) {
            // get single pattern for start url
            printfStrings = Collections.singleton((String) obj);
        } else if (obj instanceof Collection) {
            printfStrings = (Collection<String>) obj;
        } else {
            log.debug("unknown type for plugin key: " + pluginKey + ": " + obj.getClass().getName());
            continue;
        }

        log.debug3("Trying plugin key: " + pluginKey + " for plugin: " + plugin.getPluginId() + " with "
                + printfStrings.size() + " printf strings");

        // set up converter for use with feature URL printf strings
        UrlListConverter converter = PrintfConverter.newUrlListConverter(plugin, paramMap);
        converter.setAllowUntypedArgs(true);

        for (String s : printfStrings) {
            String url = null;
            s = StringEscapeUtils.unescapeHtml4(s);
            try {
                List<String> urls = converter.getUrlList(s);
                if ((urls != null) && !urls.isEmpty()) {
                    // if multiple urls match, the first one will do
                    url = urls.get(0);
                }
            } catch (Throwable ex) {
                log.debug("invalid  conversion for " + s, ex);
                continue;
            }

            // validate URL: either it's cached, or it can be reached
            if (!StringUtil.isNullString(url)) {
                log.debug3("Resolving from url: " + url);
                url = resolveUrl(url, proxySpec);
                if (url != null) {
                    return OpenUrlInfo.newInstance(url, proxySpec, pluginEntry.resolvedTo);
                }
            }
        }
    }

    return noOpenUrlInfo;
}

From source file:org.lockss.extractor.JsoupTagExtractor.java

/**
 * take the value for a selector from an html page and perform the necessary
 * transformations to regularize it for storing in the article metadata.
 * this will strip embedded html selectors, unescape any escaped html and remove
 * any extra spaces.//  w w w  .jav a2s  .  c  o m
 *
 * @param name the selector name
 * @param value the value
 * @return the regularized value
 */
private String processHtml(final String name, String value) {
    value = HtmlUtil.stripHtmlTags(value);
    // remove character entities from content
    value = StringEscapeUtils.unescapeHtml4(value);
    // normalize multiple whitespaces to a single space character
    value = value.replaceAll("\\s+", " ");
    return value;
}

From source file:org.lockss.extractor.SimpleHtmlMetaTagMetadataExtractor.java

protected void putValue(ArticleMetadata ret, String name, String content) {
    // filter raw HTML tags embedded within content -- publishers get sloppy
    content = HtmlUtil.stripHtmlTags(content);
    // remove character entities from content
    content = StringEscapeUtils.unescapeHtml4(content);
    // normalize multiple whitespace characters to a single space character
    Matcher m = whiteSpacePat.matcher(content);
    content = m.replaceAll(" ");

    if (log.isDebug3())
        log.debug3("Add: " + name + " = " + content);
    ret.putRaw(name, content);/*from   w  ww.ja v  a  2 s  . co m*/
}