Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:org.apache.uima.ruta.engine.HtmlConverter.java

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);
    inputViewName = (String) aContext.getConfigParameterValue(PARAM_INPUT_VIEW);
    inputViewName = StringUtils.isBlank(inputViewName) ? null : inputViewName;
    modifiedViewName = (String) aContext.getConfigParameterValue(PARAM_OUTPUT_VIEW);
    modifiedViewName = StringUtils.isBlank(modifiedViewName) ? DEFAULT_MODIFIED_VIEW : modifiedViewName;
    replaceLinebreaks = (Boolean) aContext.getConfigParameterValue(PARAM_REPLACE_LINEBREAKS);
    replaceLinebreaks = replaceLinebreaks == null ? true : replaceLinebreaks;
    skipWhitespaces = (Boolean) aContext.getConfigParameterValue(PARAM_SKIP_WHITESPACES);
    skipWhitespaces = skipWhitespaces == null ? true : skipWhitespaces;
    processAll = (Boolean) aContext.getConfigParameterValue(PARAM_PROCESS_ALL);
    processAll = processAll == null ? true : processAll;
    linebreakReplacement = (String) aContext.getConfigParameterValue(PARAM_LINEBREAK_REPLACEMENT);
    linebreakReplacement = linebreakReplacement == null ? "" : linebreakReplacement;
    String conversionPolicy = (String) aContext.getConfigParameterValue(PARAM_CONVERSION_POLICY);
    if (StringUtils.isBlank(conversionPolicy) || conversionPolicy.equals("heuristic")) {
        conversionPolicy = "heuristic";
    } else if (conversionPolicy.equals("explicit")) {
    } else if (conversionPolicy.equals("none")) {
    } else {//  w ww.  j  a  va 2s  .  c o m
        throw new ResourceInitializationException("illegal conversionPolicy parameter value", new Object[0]);
    }
    String[] nlTags = (String[]) aContext.getConfigParameterValue(PARAM_NEWLINE_INDUCING_TAGS);
    if (nlTags == null) {
        newlineInducingTags = new String[] { "br", "p", "div", "ul", "ol", "dl", "li", "h1", "h2", "h3", "h4",
                "h5", "h6", "blockquote" };

    }
    // check assertions
    if (modifiedViewName.equals(inputViewName)) {
        throw new ResourceInitializationException("input and output view names must differ!", new Object[0]);
    }
    conversionPatterns = (String[]) aContext.getConfigParameterValue(PARAM_CONVERSION_PATTERNS);
    if (conversionPatterns == null) {
        conversionPatterns = new String[] { " ", "«", "»", """, "&", "<", ">",
                "'", "§", "¨", "©", "™", "®", "ö", "ä", "ü",
                " " };
    }
    conversionReplacements = (String[]) aContext.getConfigParameterValue(PARAM_CONVERSION_REPLACEMENTS);
    if (conversionReplacements == null) {
        conversionReplacements = new String[conversionPatterns.length];
        for (int i = 0; i < conversionPatterns.length; i++) {
            String c = conversionPatterns[i];
            String rep = StringEscapeUtils.unescapeHtml4(c);
            conversionReplacements[i] = rep;
        }
    }

    gapText = (String) aContext.getConfigParameterValue(PARAM_GAP_TEXT);
    gapText = gapText == null ? "" : gapText;

    useSpaceGap = (Boolean) aContext.getConfigParameterValue(PARAM_USE_SPACE_GAP);
    useSpaceGap = useSpaceGap == null ? false : useSpaceGap;

    if (useSpaceGap) {
        gapText = " ";
    }

    gapInducingTags = (String[]) aContext.getConfigParameterValue(PARAM_GAP_INDUCING_TAGS);
    gapInducingTags = gapInducingTags == null ? new String[0] : gapInducingTags;

    expandOffsets = (Boolean) aContext.getConfigParameterValue(PARAM_EXPAND_OFFSETS);
    expandOffsets = expandOffsets == null ? false : expandOffsets;

    newlineInducingTagRegExp = (String) aContext.getConfigParameterValue(PARAM_NEWLINE_INDUCING_TAG_REGEXP);
}

From source file:org.apache.uima.ruta.engine.HtmlConverter.java

private SortedSet<HtmlConverterPSpan> htmlDecoding(SortedSet<HtmlConverterPSpan> visibleSpansSoFar) {
    TreeSet<HtmlConverterPSpan> copy = new TreeSet<HtmlConverterPSpan>(visibleSpansSoFar);

    Pattern patt = Pattern.compile("(&[a-zA-Z0-9]{2,6};)|(&#\\d{2,5};)");

    for (HtmlConverterPSpan pSpan : visibleSpansSoFar) {
        String spanTxt = pSpan.getTxt();
        Matcher matcher = patt.matcher(spanTxt);

        if (matcher.find()) {
            copy.remove(pSpan);/*from   w w  w  . j  a v a 2  s.c o m*/
            int pSpanBegin = pSpan.getBegin();
            int ioff = pSpan.getBegin();
            do {
                String sourceString = matcher.group();
                String replacement = StringEscapeUtils.unescapeHtml4(sourceString);
                HtmlConverterPSpanReplacement replacementSpan = new HtmlConverterPSpanReplacement(
                        pSpanBegin + matcher.start(), pSpanBegin + matcher.end(), replacement);
                copy.add(replacementSpan);

                int replacementLength = sourceString.length();
                if (pSpanBegin + matcher.end() > ioff + replacementLength) {
                    int ib = ioff;
                    int ie = pSpanBegin + matcher.start();
                    String newTxt = spanTxt.substring(ib - pSpanBegin, ie - pSpanBegin);
                    copy.add(new HtmlConverterPSpan(ib, ie, newTxt));
                    ioff = ie;
                }
                ioff += replacementLength; //
            } while (matcher.find());
            if (ioff < pSpan.getEnd()) {
                int ib = ioff;
                int ie = pSpan.getEnd();
                String newTxt = spanTxt.substring(ib - pSpanBegin, ie - pSpanBegin);
                copy.add(new HtmlConverterPSpan(ioff, pSpan.getEnd(), newTxt));
            }
        }
    }
    return copy;
}

From source file:org.asciidoctor.RedPenTreeProcessor.java

/**
 * Process a paragraph of processed text.
 * <p/>/*  ww w  . j a v  a  2  s .  c  om*/
 * This method takes the processed form of the text generated by the Ruby RedPen AsciiDoctor backend found in AsciiDocParser.java
 * If the source form of the paragraph is also provided, it uses this to calculate the RedPen offsets for the characters in the text.
 * <p/>
 * The AsciiDoctor parser does not return the line numbers via the AsciiDoctorJ Block interface. Therefore, the custom RedPen AsciiDoctor backend
 * encodes the line number between ^A and ^B, if known.
 * <p/>
 * Hence the processed text for a paragraph is typically:
 * <p/>
 * ^Alinenumber^Bparagraph_text
 * <p/>
 * This method first breaks the paragraph on these line markers and adjusts the running lineNumber variable,
 * and then processes each sub-paragraph/sentence using standard RedPen sentence-end-position delimiting.
 *
 * @param paragraph  the AsciiDoctor processed text, already converted by the RedPen AsciiDoctor backend
 * @param sourceText the raw source text
 * @param sentences  A list of sentences discovered in the processed text
 */
protected void processParagraph(String paragraph, String sourceText, List<Sentence> sentences) {

    paragraph = paragraph == null ? "" : paragraph;
    sourceText = sourceText == null ? "" : sourceText;
    int offset = 0;

    String[] sublines = paragraph.split(String.valueOf(REDPEN_ASCIIDOCTOR_BACKEND_LINE_START));

    for (String subline : sublines) {
        int lineNumberEndPos = subline.indexOf(REDPEN_ASCIIDOCTOR_BACKEND_LINENUMBER_DELIM);
        if (lineNumberEndPos != -1) {
            try {
                lineNumber = Integer.valueOf(subline.substring(0, lineNumberEndPos));
            } catch (Exception e) {
                LOG.error("Error when parsing line number from converted AsciiDoc", e);
            }
            subline = subline.substring(lineNumberEndPos + 1);
        }
        subline = StringEscapeUtils.unescapeHtml4(subline);

        while (true) {
            int periodPosition = sentenceExtractor.getSentenceEndPosition(subline);
            if (periodPosition != -1) {
                String candidateSentence = subline.substring(0, periodPosition + 1);
                subline = subline.substring(periodPosition + 1);
                periodPosition = sentenceExtractor.getSentenceEndPosition(sourceText);
                String sourceSentence = "";
                if (periodPosition != -1) {
                    sourceSentence = sourceText.substring(0, periodPosition + 1);
                    sourceText = sourceText.substring(periodPosition + 1);
                }
                LineOffset lineOffset = addSentence(new LineOffset(lineNumber, offset), candidateSentence,
                        sourceSentence, sentenceExtractor, sentences);
                lineNumber = lineOffset.lineNum;
                offset = lineOffset.offset;
            } else {
                break;
            }
        }
        if (!subline.trim().isEmpty()) {
            addSentence(new LineOffset(lineNumber, offset), subline, sourceText, sentenceExtractor, sentences);
        }
    }

    lineNumber++;
}

From source file:org.asqatasun.processing.ProcessRemarkServiceImplTest.java

/**
 * Test of setDocument method, of class ProcessRemarkServiceImpl.
 *//*from   w  ww. j av a2  s  . com*/
public void testGetSnippetFromElement() {
    ProcessRemarkServiceImpl instance = new ProcessRemarkServiceImpl(null, null, null);

    //--------------------------------------------------------------------//
    //-----------------------Test1----------------------------------------//
    //--------------------------------------------------------------------//
    String rawHtml = "<label> <span>Rechercher:</span> "
            + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" "
            + "class=\"text\" id=\"searchfield\" " + "name=\"search&qudsqqqssqdsqdsqdo\" /></label>";
    Document document = Jsoup.parse(rawHtml);
    Element element = document.getElementsByTag("label").iterator().next();
    String snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    String expectedSnippet = "<label> <span>Rechercher:</span> "
            + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" "
            + "class=\"text\" id=\"searchfield\" " + "name=\"search&amp;qudsqqqssqdsqdsqdo\" />[...]</label>";
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test2----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = "<label> <span>New Rechercher:</span> "
            + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\" "
            + " id=\"searchfield\" class=\"myclass other-class1 other-class2\" > " + "anything</p></label>";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("label").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = "<label> <span>New Rechercher:</span> "
            + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\""
            + " id=\"searchfield\" class=\"myclass other-class1 other-class2\">" + "[...]</p>[...]</label>";
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test3----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = "<iframe align=\"left\" width=\"315px\" " + "scrolling=\"no\" height=\"160px\" frameborder=\"0\" "
            + "id=\"link-meteo\" src=\"http://www.anyUrl.com/module/onelocationsearch?ShowSearch=true&amp;StartDate=2012-06-01&amp;Days=2&amp;location=bruxelles&amp;url=http://meteo1.lavenir.net&amp;cssfile=http://lavenir.net/extra/weather/styles.css\">"
            + "</iframe> ";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("iframe").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = rawHtml.trim();
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test4----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = " <center>  <script type=\"text/javascript\">    if (articledetail == false) initAdhese('IMU.SUPER.WIDE');     </script> "
            + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\"></script> "
            + " <div class=\"adhese_300x250\">  <script src=\"http://1.adhesecdn.be/pool/lib/68641.js?t=1371729603000\"></script> "
            + "<script src=\"http://anyUrl.com/pagead/show_ads.js\" type=\"text/javascript\"></script>"
            + "<ins style=\"display:inline-table;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\">"
            + "<ins style=\"display:block;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\" id=\"aswift_1_anchor\">"
            + "<iframe width=\"300\" scrolling=\"no\" height=\"250\" frameborder=\"0\" style=\"left:0;position:absolute;top:0;\" name=\"aswift_1\" id=\"aswift_1\" onload=\"var i=this.id,s=window.google_iframe_oncopy,H=s&amp;&amp;s.handlers,h=H&amp;&amp;H[i],w=this.contentWindow,d;try{d=w.document}catch(e){}if(h&amp;&amp;d&amp;&amp;(!d.body||!d.body.firstChild)){if(h.call){setTimeout(h,0)}else if(h.match){w.location.replace(h)}}\" allowtransparency=\"true\" hspace=\"0\" vspace=\"0\" marginheight=\"0\" marginwidth=\"0\"></iframe>"
            + "</ins>" + "</ins>" + "</div> " + "</center> ";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("center").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = "<center> <script type=\"text/javascript\"> if (articledetail == false) initAdhese('IMU.SUPER.WIDE'); </script> "
            + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\">[...]</script>"
            + "[...]</center>";
    assertEquals(expectedSnippet, snippet);
}

From source file:org.asqatasun.webapp.report.expression.I18nExpression.java

@Override
public Object evaluate(Map fields, Map variables, Map parameters) {
    String key = keyRetriever.retrieveKey(fields, variables, parameters);
    if (!resourceBundleList.isEmpty()) {
        String i18nValue = retrieveI18nValue(key);
        if (escapeHtml) {
            return StringEscapeUtils.escapeHtml4(i18nValue);
        }/*from w  w w . java  2s. c  om*/
        return StringEscapeUtils.unescapeHtml4(i18nValue);
    }
    return key;
}

From source file:org.asqatasun.webapp.report.expression.ResultStyleExpression.java

public ResultStyleExpression(String result, String bundleName, Locale locale) {
    if (bundleName != null) {
        ResourceBundle resourceBundle = ResourceBundle.getBundle(bundleName, locale);
        this.result = StringEscapeUtils.unescapeHtml4(resourceBundle.getString(result));
    } else {// w  ww . j  a v  a 2  s  . c  om
        this.result = result;
    }
}

From source file:org.asqatasun.webapp.report.layout.builder.SubtitleBuilderImpl.java

private String getRefAndLevel(AuditStatistics auditStatistics, Locale locale) {
    ResourceBundle refBundle = ResourceBundle.getBundle(refBundleName, locale);
    Collection<ResourceBundle> refAndlevelValueBundleList = new ArrayList();
    for (String bundle : refAndLevelValueBundleNameList) {
        refAndlevelValueBundleList.add(ResourceBundle.getBundle(bundle, locale));
    }/*  w ww  .j a v a2  s  .  com*/
    ResourceBundle levelBundle = ResourceBundle.getBundle(levelBundleName, locale);
    StringBuilder refAndLevel = new StringBuilder();
    refAndLevel.append(StringEscapeUtils.unescapeHtml4(refBundle.getString(REF_KEY)));
    refAndLevel.append(DOUBLE_DOT_KEY);
    refAndLevel.append(StringEscapeUtils.unescapeHtml4(
            retrieveI18nValue(auditStatistics.getParametersMap().get(REF_KEY), refAndlevelValueBundleList)));
    refAndLevel.append(SEPARATOR_KEY);
    refAndLevel.append(StringEscapeUtils.unescapeHtml4(levelBundle.getString(LEVEL_KEY)));
    refAndLevel.append(DOUBLE_DOT_KEY);
    refAndLevel.append(StringEscapeUtils.unescapeHtml4(retrieveI18nValue(
            auditStatistics.getParametersMap().get(LEVEL_KEY).replace(";", "-"), refAndlevelValueBundleList)));
    return refAndLevel.toString();
}

From source file:org.asqatasun.webapp.report.layout.column.builder.ElementColumnBuilderImpl.java

@Override
public AbstractColumn getElementColumn(Locale locale) {
    ColumnBuilder columnBuilder = ColumnBuilder.getNew();
    columnBuilder.setWidth(columnWidth);
    if (propertyName != null && valueClassName != null) {
        columnBuilder.setColumnProperty(propertyName, valueClassName);
    }//from  w  w w  .jav a2 s . co m
    if (customExpressionBuilder != null) {
        columnBuilder.setCustomExpression(customExpressionBuilder.build(locale));
    }
    if (style != null) {
        columnBuilder.setStyle(style);
    }
    if (headerStyle != null) {
        columnBuilder.setHeaderStyle(headerStyle);
    }
    if (columnTitleBundleName != null) {
        ResourceBundle bundle = ResourceBundle.getBundle(columnTitleBundleName, locale);
        if (columnTitleKey != null) {
            columnBuilder.setTitle(StringEscapeUtils.unescapeHtml4(bundle.getString(columnTitleKey)));
        }
    }
    if (conditionalStyleBuilderList != null && !conditionalStyleBuilderList.isEmpty()) {
        columnBuilder.addConditionalStyles(buildConditionStyleList(locale));
    }
    AbstractColumn elementColumn = null;
    try {
        elementColumn = columnBuilder.build();
    } catch (ColumnBuilderException ex) {
        LOGGER.error(ex);
    }
    return elementColumn;
}

From source file:org.cerberus.servlet.crud.testdata.ReadTestDataLib.java

/**
 * Auxiliary method that converts a test data library object to a JSON
 * object.// ww  w . j a  v  a 2s. co  m
 *
 * @param testDataLib test data library
 * @param unescapeXML indicates whether the XML retrieved in the Envelope
 * should be un-escaped or not.
 * @return JSON object
 * @throws JSONException
 */
private JSONObject convertTestDataLibToJSONObject(TestDataLib testDataLib, boolean unescapeContent)
        throws JSONException {

    if (unescapeContent) {
        //general            
        testDataLib.setDescription(StringEscapeUtils.unescapeHtml4(testDataLib.getDescription()));

        //SQL
        testDataLib.setScript(StringEscapeUtils.unescapeHtml4(testDataLib.getScript()));

        //SOAP
        testDataLib.setServicePath(StringEscapeUtils.unescapeHtml4(testDataLib.getServicePath()));
        testDataLib.setMethod(StringEscapeUtils.unescapeHtml4(testDataLib.getMethod()));
        testDataLib.setEnvelope(StringEscapeUtils.unescapeXml(testDataLib.getEnvelope()));

        //CSV
        testDataLib.setCsvUrl(StringEscapeUtils.unescapeHtml4(testDataLib.getCsvUrl()));
        testDataLib.setSeparator(StringEscapeUtils.unescapeHtml4(testDataLib.getSeparator()));
    }

    Gson gson = new Gson();
    JSONObject result = new JSONObject(gson.toJson(testDataLib));
    return result;
}

From source file:org.cerberus.util.ParameterParserUtil.java

/**
 *
 * @param inParam/*w w w  .j  ava2s . c  o  m*/
 * @param defaultValue
 * @return
 * @throws UnsupportedEncodingException
 */
public static String parseStringParamAndSanitize(String inParam, String defaultValue)
        throws UnsupportedEncodingException {
    if (inParam == null) {
        return defaultValue;
    } else {
        return URLDecoder.decode(StringEscapeUtils.unescapeHtml4(POLICY.sanitize(inParam)), "UTF-8");
    }
}