List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:org.apache.uima.ruta.engine.HtmlConverter.java
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); inputViewName = (String) aContext.getConfigParameterValue(PARAM_INPUT_VIEW); inputViewName = StringUtils.isBlank(inputViewName) ? null : inputViewName; modifiedViewName = (String) aContext.getConfigParameterValue(PARAM_OUTPUT_VIEW); modifiedViewName = StringUtils.isBlank(modifiedViewName) ? DEFAULT_MODIFIED_VIEW : modifiedViewName; replaceLinebreaks = (Boolean) aContext.getConfigParameterValue(PARAM_REPLACE_LINEBREAKS); replaceLinebreaks = replaceLinebreaks == null ? true : replaceLinebreaks; skipWhitespaces = (Boolean) aContext.getConfigParameterValue(PARAM_SKIP_WHITESPACES); skipWhitespaces = skipWhitespaces == null ? true : skipWhitespaces; processAll = (Boolean) aContext.getConfigParameterValue(PARAM_PROCESS_ALL); processAll = processAll == null ? true : processAll; linebreakReplacement = (String) aContext.getConfigParameterValue(PARAM_LINEBREAK_REPLACEMENT); linebreakReplacement = linebreakReplacement == null ? "" : linebreakReplacement; String conversionPolicy = (String) aContext.getConfigParameterValue(PARAM_CONVERSION_POLICY); if (StringUtils.isBlank(conversionPolicy) || conversionPolicy.equals("heuristic")) { conversionPolicy = "heuristic"; } else if (conversionPolicy.equals("explicit")) { } else if (conversionPolicy.equals("none")) { } else {// w ww. j a va 2s . c o m throw new ResourceInitializationException("illegal conversionPolicy parameter value", new Object[0]); } String[] nlTags = (String[]) aContext.getConfigParameterValue(PARAM_NEWLINE_INDUCING_TAGS); if (nlTags == null) { newlineInducingTags = new String[] { "br", "p", "div", "ul", "ol", "dl", "li", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote" }; } // check assertions if (modifiedViewName.equals(inputViewName)) { throw new ResourceInitializationException("input and output view names must differ!", new Object[0]); } conversionPatterns = (String[]) aContext.getConfigParameterValue(PARAM_CONVERSION_PATTERNS); if (conversionPatterns == null) { conversionPatterns = new String[] { " ", "«", "»", """, "&", "<", ">", "'", "§", "¨", "©", "™", "®", "ö", "ä", "ü", " " }; } conversionReplacements = (String[]) aContext.getConfigParameterValue(PARAM_CONVERSION_REPLACEMENTS); if (conversionReplacements == null) { conversionReplacements = new String[conversionPatterns.length]; for (int i = 0; i < conversionPatterns.length; i++) { String c = conversionPatterns[i]; String rep = StringEscapeUtils.unescapeHtml4(c); conversionReplacements[i] = rep; } } gapText = (String) aContext.getConfigParameterValue(PARAM_GAP_TEXT); gapText = gapText == null ? "" : gapText; useSpaceGap = (Boolean) aContext.getConfigParameterValue(PARAM_USE_SPACE_GAP); useSpaceGap = useSpaceGap == null ? false : useSpaceGap; if (useSpaceGap) { gapText = " "; } gapInducingTags = (String[]) aContext.getConfigParameterValue(PARAM_GAP_INDUCING_TAGS); gapInducingTags = gapInducingTags == null ? new String[0] : gapInducingTags; expandOffsets = (Boolean) aContext.getConfigParameterValue(PARAM_EXPAND_OFFSETS); expandOffsets = expandOffsets == null ? false : expandOffsets; newlineInducingTagRegExp = (String) aContext.getConfigParameterValue(PARAM_NEWLINE_INDUCING_TAG_REGEXP); }
From source file:org.apache.uima.ruta.engine.HtmlConverter.java
private SortedSet<HtmlConverterPSpan> htmlDecoding(SortedSet<HtmlConverterPSpan> visibleSpansSoFar) { TreeSet<HtmlConverterPSpan> copy = new TreeSet<HtmlConverterPSpan>(visibleSpansSoFar); Pattern patt = Pattern.compile("(&[a-zA-Z0-9]{2,6};)|(&#\\d{2,5};)"); for (HtmlConverterPSpan pSpan : visibleSpansSoFar) { String spanTxt = pSpan.getTxt(); Matcher matcher = patt.matcher(spanTxt); if (matcher.find()) { copy.remove(pSpan);/*from w w w . j a v a 2 s.c o m*/ int pSpanBegin = pSpan.getBegin(); int ioff = pSpan.getBegin(); do { String sourceString = matcher.group(); String replacement = StringEscapeUtils.unescapeHtml4(sourceString); HtmlConverterPSpanReplacement replacementSpan = new HtmlConverterPSpanReplacement( pSpanBegin + matcher.start(), pSpanBegin + matcher.end(), replacement); copy.add(replacementSpan); int replacementLength = sourceString.length(); if (pSpanBegin + matcher.end() > ioff + replacementLength) { int ib = ioff; int ie = pSpanBegin + matcher.start(); String newTxt = spanTxt.substring(ib - pSpanBegin, ie - pSpanBegin); copy.add(new HtmlConverterPSpan(ib, ie, newTxt)); ioff = ie; } ioff += replacementLength; // } while (matcher.find()); if (ioff < pSpan.getEnd()) { int ib = ioff; int ie = pSpan.getEnd(); String newTxt = spanTxt.substring(ib - pSpanBegin, ie - pSpanBegin); copy.add(new HtmlConverterPSpan(ioff, pSpan.getEnd(), newTxt)); } } } return copy; }
From source file:org.asciidoctor.RedPenTreeProcessor.java
/** * Process a paragraph of processed text. * <p/>/* ww w . j a v a 2 s . c om*/ * This method takes the processed form of the text generated by the Ruby RedPen AsciiDoctor backend found in AsciiDocParser.java * If the source form of the paragraph is also provided, it uses this to calculate the RedPen offsets for the characters in the text. * <p/> * The AsciiDoctor parser does not return the line numbers via the AsciiDoctorJ Block interface. Therefore, the custom RedPen AsciiDoctor backend * encodes the line number between ^A and ^B, if known. * <p/> * Hence the processed text for a paragraph is typically: * <p/> * ^Alinenumber^Bparagraph_text * <p/> * This method first breaks the paragraph on these line markers and adjusts the running lineNumber variable, * and then processes each sub-paragraph/sentence using standard RedPen sentence-end-position delimiting. * * @param paragraph the AsciiDoctor processed text, already converted by the RedPen AsciiDoctor backend * @param sourceText the raw source text * @param sentences A list of sentences discovered in the processed text */ protected void processParagraph(String paragraph, String sourceText, List<Sentence> sentences) { paragraph = paragraph == null ? "" : paragraph; sourceText = sourceText == null ? "" : sourceText; int offset = 0; String[] sublines = paragraph.split(String.valueOf(REDPEN_ASCIIDOCTOR_BACKEND_LINE_START)); for (String subline : sublines) { int lineNumberEndPos = subline.indexOf(REDPEN_ASCIIDOCTOR_BACKEND_LINENUMBER_DELIM); if (lineNumberEndPos != -1) { try { lineNumber = Integer.valueOf(subline.substring(0, lineNumberEndPos)); } catch (Exception e) { LOG.error("Error when parsing line number from converted AsciiDoc", e); } subline = subline.substring(lineNumberEndPos + 1); } subline = StringEscapeUtils.unescapeHtml4(subline); while (true) { int periodPosition = sentenceExtractor.getSentenceEndPosition(subline); if (periodPosition != -1) { String candidateSentence = subline.substring(0, periodPosition + 1); subline = subline.substring(periodPosition + 1); periodPosition = sentenceExtractor.getSentenceEndPosition(sourceText); String sourceSentence = ""; if (periodPosition != -1) { sourceSentence = sourceText.substring(0, periodPosition + 1); sourceText = sourceText.substring(periodPosition + 1); } LineOffset lineOffset = addSentence(new LineOffset(lineNumber, offset), candidateSentence, sourceSentence, sentenceExtractor, sentences); lineNumber = lineOffset.lineNum; offset = lineOffset.offset; } else { break; } } if (!subline.trim().isEmpty()) { addSentence(new LineOffset(lineNumber, offset), subline, sourceText, sentenceExtractor, sentences); } } lineNumber++; }
From source file:org.asqatasun.processing.ProcessRemarkServiceImplTest.java
/** * Test of setDocument method, of class ProcessRemarkServiceImpl. *//*from w ww. j av a2 s . com*/ public void testGetSnippetFromElement() { ProcessRemarkServiceImpl instance = new ProcessRemarkServiceImpl(null, null, null); //--------------------------------------------------------------------// //-----------------------Test1----------------------------------------// //--------------------------------------------------------------------// String rawHtml = "<label> <span>Rechercher:</span> " + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" " + "class=\"text\" id=\"searchfield\" " + "name=\"search&qudsqqqssqdsqdsqdo\" /></label>"; Document document = Jsoup.parse(rawHtml); Element element = document.getElementsByTag("label").iterator().next(); String snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); String expectedSnippet = "<label> <span>Rechercher:</span> " + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" " + "class=\"text\" id=\"searchfield\" " + "name=\"search&qudsqqqssqdsqdsqdo\" />[...]</label>"; assertEquals(expectedSnippet, snippet); //--------------------------------------------------------------------// //-----------------------Test2----------------------------------------// //--------------------------------------------------------------------// rawHtml = "<label> <span>New Rechercher:</span> " + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\" " + " id=\"searchfield\" class=\"myclass other-class1 other-class2\" > " + "anything</p></label>"; document = Jsoup.parse(rawHtml); element = document.getElementsByTag("label").iterator().next(); snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); expectedSnippet = "<label> <span>New Rechercher:</span> " + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\"" + " id=\"searchfield\" class=\"myclass other-class1 other-class2\">" + "[...]</p>[...]</label>"; assertEquals(expectedSnippet, snippet); //--------------------------------------------------------------------// //-----------------------Test3----------------------------------------// //--------------------------------------------------------------------// rawHtml = "<iframe align=\"left\" width=\"315px\" " + "scrolling=\"no\" height=\"160px\" frameborder=\"0\" " + "id=\"link-meteo\" src=\"http://www.anyUrl.com/module/onelocationsearch?ShowSearch=true&StartDate=2012-06-01&Days=2&location=bruxelles&url=http://meteo1.lavenir.net&cssfile=http://lavenir.net/extra/weather/styles.css\">" + "</iframe> "; document = Jsoup.parse(rawHtml); element = document.getElementsByTag("iframe").iterator().next(); snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); expectedSnippet = rawHtml.trim(); assertEquals(expectedSnippet, snippet); //--------------------------------------------------------------------// //-----------------------Test4----------------------------------------// //--------------------------------------------------------------------// rawHtml = " <center> <script type=\"text/javascript\"> if (articledetail == false) initAdhese('IMU.SUPER.WIDE'); </script> " + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\"></script> " + " <div class=\"adhese_300x250\"> <script src=\"http://1.adhesecdn.be/pool/lib/68641.js?t=1371729603000\"></script> " + "<script src=\"http://anyUrl.com/pagead/show_ads.js\" type=\"text/javascript\"></script>" + "<ins style=\"display:inline-table;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\">" + "<ins style=\"display:block;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\" id=\"aswift_1_anchor\">" + "<iframe width=\"300\" scrolling=\"no\" height=\"250\" frameborder=\"0\" style=\"left:0;position:absolute;top:0;\" name=\"aswift_1\" id=\"aswift_1\" onload=\"var i=this.id,s=window.google_iframe_oncopy,H=s&&s.handlers,h=H&&H[i],w=this.contentWindow,d;try{d=w.document}catch(e){}if(h&&d&&(!d.body||!d.body.firstChild)){if(h.call){setTimeout(h,0)}else if(h.match){w.location.replace(h)}}\" allowtransparency=\"true\" hspace=\"0\" vspace=\"0\" marginheight=\"0\" marginwidth=\"0\"></iframe>" + "</ins>" + "</ins>" + "</div> " + "</center> "; document = Jsoup.parse(rawHtml); element = document.getElementsByTag("center").iterator().next(); snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element)); expectedSnippet = "<center> <script type=\"text/javascript\"> if (articledetail == false) initAdhese('IMU.SUPER.WIDE'); </script> " + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\">[...]</script>" + "[...]</center>"; assertEquals(expectedSnippet, snippet); }
From source file:org.asqatasun.webapp.report.expression.I18nExpression.java
@Override public Object evaluate(Map fields, Map variables, Map parameters) { String key = keyRetriever.retrieveKey(fields, variables, parameters); if (!resourceBundleList.isEmpty()) { String i18nValue = retrieveI18nValue(key); if (escapeHtml) { return StringEscapeUtils.escapeHtml4(i18nValue); }/*from w w w . java 2s. c om*/ return StringEscapeUtils.unescapeHtml4(i18nValue); } return key; }
From source file:org.asqatasun.webapp.report.expression.ResultStyleExpression.java
public ResultStyleExpression(String result, String bundleName, Locale locale) { if (bundleName != null) { ResourceBundle resourceBundle = ResourceBundle.getBundle(bundleName, locale); this.result = StringEscapeUtils.unescapeHtml4(resourceBundle.getString(result)); } else {// w ww . j a v a 2 s . c om this.result = result; } }
From source file:org.asqatasun.webapp.report.layout.builder.SubtitleBuilderImpl.java
private String getRefAndLevel(AuditStatistics auditStatistics, Locale locale) { ResourceBundle refBundle = ResourceBundle.getBundle(refBundleName, locale); Collection<ResourceBundle> refAndlevelValueBundleList = new ArrayList(); for (String bundle : refAndLevelValueBundleNameList) { refAndlevelValueBundleList.add(ResourceBundle.getBundle(bundle, locale)); }/* w ww .j a v a2 s . com*/ ResourceBundle levelBundle = ResourceBundle.getBundle(levelBundleName, locale); StringBuilder refAndLevel = new StringBuilder(); refAndLevel.append(StringEscapeUtils.unescapeHtml4(refBundle.getString(REF_KEY))); refAndLevel.append(DOUBLE_DOT_KEY); refAndLevel.append(StringEscapeUtils.unescapeHtml4( retrieveI18nValue(auditStatistics.getParametersMap().get(REF_KEY), refAndlevelValueBundleList))); refAndLevel.append(SEPARATOR_KEY); refAndLevel.append(StringEscapeUtils.unescapeHtml4(levelBundle.getString(LEVEL_KEY))); refAndLevel.append(DOUBLE_DOT_KEY); refAndLevel.append(StringEscapeUtils.unescapeHtml4(retrieveI18nValue( auditStatistics.getParametersMap().get(LEVEL_KEY).replace(";", "-"), refAndlevelValueBundleList))); return refAndLevel.toString(); }
From source file:org.asqatasun.webapp.report.layout.column.builder.ElementColumnBuilderImpl.java
@Override public AbstractColumn getElementColumn(Locale locale) { ColumnBuilder columnBuilder = ColumnBuilder.getNew(); columnBuilder.setWidth(columnWidth); if (propertyName != null && valueClassName != null) { columnBuilder.setColumnProperty(propertyName, valueClassName); }//from w w w .jav a2 s . co m if (customExpressionBuilder != null) { columnBuilder.setCustomExpression(customExpressionBuilder.build(locale)); } if (style != null) { columnBuilder.setStyle(style); } if (headerStyle != null) { columnBuilder.setHeaderStyle(headerStyle); } if (columnTitleBundleName != null) { ResourceBundle bundle = ResourceBundle.getBundle(columnTitleBundleName, locale); if (columnTitleKey != null) { columnBuilder.setTitle(StringEscapeUtils.unescapeHtml4(bundle.getString(columnTitleKey))); } } if (conditionalStyleBuilderList != null && !conditionalStyleBuilderList.isEmpty()) { columnBuilder.addConditionalStyles(buildConditionStyleList(locale)); } AbstractColumn elementColumn = null; try { elementColumn = columnBuilder.build(); } catch (ColumnBuilderException ex) { LOGGER.error(ex); } return elementColumn; }
From source file:org.cerberus.servlet.crud.testdata.ReadTestDataLib.java
/** * Auxiliary method that converts a test data library object to a JSON * object.// ww w . j a v a 2s. co m * * @param testDataLib test data library * @param unescapeXML indicates whether the XML retrieved in the Envelope * should be un-escaped or not. * @return JSON object * @throws JSONException */ private JSONObject convertTestDataLibToJSONObject(TestDataLib testDataLib, boolean unescapeContent) throws JSONException { if (unescapeContent) { //general testDataLib.setDescription(StringEscapeUtils.unescapeHtml4(testDataLib.getDescription())); //SQL testDataLib.setScript(StringEscapeUtils.unescapeHtml4(testDataLib.getScript())); //SOAP testDataLib.setServicePath(StringEscapeUtils.unescapeHtml4(testDataLib.getServicePath())); testDataLib.setMethod(StringEscapeUtils.unescapeHtml4(testDataLib.getMethod())); testDataLib.setEnvelope(StringEscapeUtils.unescapeXml(testDataLib.getEnvelope())); //CSV testDataLib.setCsvUrl(StringEscapeUtils.unescapeHtml4(testDataLib.getCsvUrl())); testDataLib.setSeparator(StringEscapeUtils.unescapeHtml4(testDataLib.getSeparator())); } Gson gson = new Gson(); JSONObject result = new JSONObject(gson.toJson(testDataLib)); return result; }
From source file:org.cerberus.util.ParameterParserUtil.java
/** * * @param inParam/*w w w .j ava2s . c o m*/ * @param defaultValue * @return * @throws UnsupportedEncodingException */ public static String parseStringParamAndSanitize(String inParam, String defaultValue) throws UnsupportedEncodingException { if (inParam == null) { return defaultValue; } else { return URLDecoder.decode(StringEscapeUtils.unescapeHtml4(POLICY.sanitize(inParam)), "UTF-8"); } }