List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:org.kawanfw.commons.util.HtmlConverter.java
/** * //from w ww . j av a 2 s .co m * Converts special HTML values of characters to their original values. <br> * Example : <code>"&eacute;"</code>"is converted to "" * <p> * * @param string * A String to convert from HTML to original * <p> * @return A String of char converted to original values * */ public static String fromHtml(String string) { if (DO_NOTHING) return string; if (string == null) return string; if (string.contains("&")) { return StringEscapeUtils.unescapeHtml4(string); } else { return string; } }
From source file:org.kie.workbench.common.stunner.bpmn.backend.BaseDirectDiagramMarshaller.java
private String renderToString(Bpmn2Resource resource) throws IOException { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); try {//from w ww . j ava 2s . co m resource.save(outputStream, new HashMap<>()); } finally { outputStream.close(); } return StringEscapeUtils.unescapeHtml4(outputStream.toString("UTF-8")); }
From source file:org.kie.workbench.common.stunner.bpmn.backend.BPMNDirectDiagramMarshaller.java
private String renderToString(Bpmn2Resource resource) throws IOException { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); resource.save(outputStream, new HashMap<>()); return StringEscapeUtils.unescapeHtml4(outputStream.toString("UTF-8")); }
From source file:org.kie.workbench.common.stunner.bpmn.backend.marshall.json.Bpmn2Marshaller.java
public String marshall(final Diagram<Graph, Metadata> diagram) throws IOException { JBPMBpmn2ResourceImpl res = marshallToBpmn2Resource(diagram); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); res.save(outputStream, new HashMap<>()); return StringEscapeUtils.unescapeHtml4(outputStream.toString("UTF-8")); }
From source file:org.kuali.test.utils.Utils.java
/** * * @param node//from www .j av a2 s . co m * @return */ public static String cleanDisplayText(Node node) { StringBuilder buf = new StringBuilder(128); getCleanedText(node, buf); return Utils.trimString(StringEscapeUtils.unescapeHtml4(buf.toString())); }
From source file:org.languagetool.dev.blogs.BlogChecker.java
private String cleanup(String content) { String result = content.replaceAll("\\s+", " ").replaceAll("<div.*?>", "").replaceAll("</div>", "\n\n") .replaceAll("</h[1-6]>", "\n\n").replaceAll("<li>", "\n").replaceAll("<p.*?>", "") .replaceAll("</p>", "\n\n").replaceAll("<a.*?>", "").replaceAll("</a>", "") .replaceAll("<br\\s*/>", "").replaceAll("<br>", "").replaceAll("<.*?>", ""); return StringEscapeUtils.unescapeHtml4(result).replace("", " "); // nbsp }
From source file:org.languagetool.dev.wikipedia.TextConverter.java
public void visit(XmlEntityRef er) { addMapping(er);//from ww w.j a va 2 s . c om if ("nbsp".equals(er.getName())) { write('\u00A0'); // non-breaking space } else { String ch = StringEscapeUtils.unescapeHtml4("&" + er.getName() + ";"); write(ch); } }
From source file:org.lockss.daemon.OpenUrlResolver.java
/** * Get the URL for the specified key from the plugin. * @param plugin the plugin/*from w w w. jav a 2s .com*/ * @param pluginKeys the plugin keys * @param paramMap the param map * @return the URL for the specified key */ private OpenUrlInfo getPluginUrl(Plugin plugin, FeatureEntry[] pluginEntries, TypedEntryMap paramMap) { ExternalizableMap map; // get printf pattern for pluginKey property try { Method method = plugin.getClass().getMethod("getDefinitionMap", (new Class[0])); Object obj = method.invoke(plugin); if (!(obj instanceof ExternalizableMap)) { return noOpenUrlInfo; } map = (ExternalizableMap) obj; } catch (Exception ex) { log.error("getDefinitionMap", ex); return noOpenUrlInfo; } String proxySpec = null; try { proxySpec = paramMap.getString(ConfigParamDescr.CRAWL_PROXY.getKey()); } catch (NoSuchElementException ex) { // no crawl_proxy param specified } for (FeatureEntry pluginEntry : pluginEntries) { // locate object value for plugin key path String pluginKey = pluginEntry.auFeatureKey; String[] pluginKeyPath = pluginKey.split("/"); Object obj = map.getMapElement(pluginKeyPath[0]); for (int i = 1; (i < pluginKeyPath.length); i++) { if (obj instanceof Map) { obj = ((Map<String, ?>) obj).get(pluginKeyPath[i]); } else { // all path elements except last one must be a map; obj = null; break; } } if (obj instanceof Map) { // match TDB AU_FEATURE_KEY value to key in map String auFeatureKey = "*"; // default entry try { auFeatureKey = paramMap.getString(AU_FEATURE_KEY); } catch (NoSuchElementException ex) { } // entry may have multiple keys; '*' is the default entry Object val = null; for (Map.Entry<String, ?> entry : ((Map<String, ?>) obj).entrySet()) { String key = entry.getKey(); if (key.equals(auFeatureKey) || key.startsWith(auFeatureKey + ";") || key.endsWith(";" + auFeatureKey) || (key.indexOf(";" + auFeatureKey + ";") >= 0)) { val = entry.getValue(); break; } } obj = val; pluginKey += "/" + auFeatureKey; } if (obj == null) { log.debug("unknown plugin key: " + pluginKey); continue; } Collection<String> printfStrings = null; if (obj instanceof String) { // get single pattern for start url printfStrings = Collections.singleton((String) obj); } else if (obj instanceof Collection) { printfStrings = (Collection<String>) obj; } else { log.debug("unknown type for plugin key: " + pluginKey + ": " + obj.getClass().getName()); continue; } log.debug3("Trying plugin key: " + pluginKey + " for plugin: " + plugin.getPluginId() + " with " + printfStrings.size() + " printf strings"); // set up converter for use with feature URL printf strings UrlListConverter converter = PrintfConverter.newUrlListConverter(plugin, paramMap); converter.setAllowUntypedArgs(true); for (String s : printfStrings) { String url = null; s = StringEscapeUtils.unescapeHtml4(s); try { List<String> urls = converter.getUrlList(s); if ((urls != null) && !urls.isEmpty()) { // if multiple urls match, the first one will do url = urls.get(0); } } catch (Throwable ex) { log.debug("invalid conversion for " + s, ex); continue; } // validate URL: either it's cached, or it can be reached if (!StringUtil.isNullString(url)) { log.debug3("Resolving from url: " + url); url = resolveUrl(url, proxySpec); if (url != null) { return OpenUrlInfo.newInstance(url, proxySpec, pluginEntry.resolvedTo); } } } } return noOpenUrlInfo; }
From source file:org.lockss.extractor.JsoupTagExtractor.java
/** * take the value for a selector from an html page and perform the necessary * transformations to regularize it for storing in the article metadata. * this will strip embedded html selectors, unescape any escaped html and remove * any extra spaces.// w w w .jav a2s . c o m * * @param name the selector name * @param value the value * @return the regularized value */ private String processHtml(final String name, String value) { value = HtmlUtil.stripHtmlTags(value); // remove character entities from content value = StringEscapeUtils.unescapeHtml4(value); // normalize multiple whitespaces to a single space character value = value.replaceAll("\\s+", " "); return value; }
From source file:org.lockss.extractor.SimpleHtmlMetaTagMetadataExtractor.java
protected void putValue(ArticleMetadata ret, String name, String content) { // filter raw HTML tags embedded within content -- publishers get sloppy content = HtmlUtil.stripHtmlTags(content); // remove character entities from content content = StringEscapeUtils.unescapeHtml4(content); // normalize multiple whitespace characters to a single space character Matcher m = whiteSpacePat.matcher(content); content = m.replaceAll(" "); if (log.isDebug3()) log.debug3("Add: " + name + " = " + content); ret.putRaw(name, content);/*from w ww.ja v a 2 s . co m*/ }