Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:br.bireme.tb.RIPSA.java

/**
 * Given a cell, uses its info tho fill the holes of the template String.
 * @param cell a cell having the data to fill the holes
 * @return a String with holes replaced//from ww w  . j  av a2  s  . co  m
 * @throws IOException 
 */
static String cell2html(final Cell cell) throws IOException {
    assert cell != null;

    if (TEMPLATE == null) {
        throw new IOException("TEMPLATE is null");
    }
    final String qualifRec = cell.getElem().qualifRec.toString();
    final Matcher mat = EDITION_PAT.matcher(qualifRec);
    if (!mat.find()) {
        throw new IOException("out of pattern url [" + qualifRec + "]");
    }
    final String edition = mat.group(2);
    final String father = cell.getElem().father.toString();
    final Matcher matf = Pattern.compile("idb(\\d{4})").matcher(father);
    if (!matf.find()) {
        throw new IOException("out of pattern url [" + father + "]");
    }
    final String year = matf.group(1);

    String str = TEMPLATE;
    final StringBuilder builder = new StringBuilder();
    boolean first;

    final String title = cell.getTitle();
    final String title2 = (title == null) ? "" : title;
    str = str.replace("$$title$$", title2);

    final String subtitle = cell.getSubtitle();
    if ((subtitle != null) && (!subtitle.isEmpty())) {
        str = str.replace("$$subtitle$$", "<h2>" + subtitle + "</h2>");
    } else {
        str = str.replace("$$subtitle$$", "");
    }

    str = str.replace("$$description$$",
            "Clulas IDB - " + edition + " - " + year + " - " + title2 + " - " + subtitle);

    final StringBuilder aux = new StringBuilder(title);
    final Map<String, String> tableOptions = cell.getElem().tableOptions;

    if ((tableOptions != null) && (!tableOptions.isEmpty())) {
        for (String opt : tableOptions.values()) {
            if ((!opt.equals("No ativa")) && (!opt.equals("Todas as categorias"))) {
                aux.append(", ");
                aux.append(StringEscapeUtils.unescapeHtml4(opt));
            }
        }
    }
    str = str.replace("$$keywords$$", aux.toString());

    final List<String> scope = cell.getScope();
    builder.setLength(0);
    first = true;
    if ((scope != null) && (!scope.isEmpty())) {
        builder.append("<h3>");
        for (String scp : scope) {
            if (first) {
                first = false;
            } else {
                builder.append("<br/>\n");
            }
            builder.append(scp);
        }
        builder.append("</h3>");
        str = str.replace("$$scope$$", builder.toString());
    } else {
        str = str.replace("$$scope$$", "");
    }

    final List<String> header = cell.getHeader();
    builder.setLength(0);
    first = true;
    if ((header != null) && (!header.isEmpty())) {
        for (String hdr : header) {
            if (first) {
                first = false;
            } else {
                builder.append("<br/>\n");
            }
            builder.append(hdr);
        }
        str = str.replace("$$celheader$$", builder.toString());
    } else {
        str = str.replace("$$celheader$$", "");
    }

    final String row = cell.getRow();
    if ((row != null) && (!row.isEmpty())) {
        str = str.replace("$$celrow$$", row);
    } else {
        str = str.replace("$$celrow$$", "");
    }

    final String value = cell.getValue();
    if ((value != null) && (!value.isEmpty())) {
        String celVal;
        try {
            celVal = NFMT.format(NFMT.parse(value).floatValue());
        } catch (ParseException ex) {
            celVal = value;
        }
        str = str.replace("$$celval$$", celVal);
    } else {
        str = str.replace("$$celval$$", "");
    }

    final List<String> sources = cell.getSources();
    builder.setLength(0);
    if ((sources != null) && (!sources.isEmpty())) {
        builder.append("<div class=\"note\">\n");
        builder.append("\t\t\t\t\t\t\t<label>Fonte(s):</label>\n");
        for (String source : sources) {
            builder.append("\t\t\t\t\t\t\t<p>");
            builder.append(source);
            builder.append("</p>\n");
        }
        builder.append("\t\t\t\t\t\t</div>\n");
        str = str.replace("$$sources$$", builder.toString());
    } else {
        str = str.replace("$$sources$$", "");
    }

    final List<String> labels = cell.getLabels();
    builder.setLength(0);
    if ((labels != null) && (!labels.isEmpty())) {
        builder.append("<div class=\"note\">\n");
        builder.append("\t\t\t\t\t\t\t<label>Legenda(s):</label>\n");
        for (String label : labels) {
            builder.append("\t\t\t\t\t\t\t<p>");
            builder.append(label);
            builder.append("</p>\n");
        }
        builder.append("\t\t\t\t\t\t</div>\n");
        str = str.replace("$$labels$$", builder.toString());
    } else {
        str = str.replace("$$labels$$", "");
    }

    final List<String> notes = cell.getNotes();
    builder.setLength(0);
    if ((notes != null) && (!notes.isEmpty())) {
        builder.append("<div class=\"note\">\n");
        builder.append("\t\t\t\t\t\t\t<label>Nota(s):</label>\n");
        for (String note : notes) {
            builder.append("\t\t\t\t\t\t\t<p>");
            builder.append(note);
            builder.append("</p>\n");
        }
        builder.append("\t\t\t\t\t\t</div>\n");
        str = str.replace("$$notes$$", builder.toString());
    } else {
        str = str.replace("$$notes$$", "");
    }

    str = str.replace("$$father$$", cell.getElem().father.toString());
    str = str.replace("$$qualifRec$$", cell.getElem().qualifRec.toString());

    builder.setLength(0);
    if ((tableOptions != null) && (!tableOptions.isEmpty())) {
        str = str.replace("$$tableHeader$$",
                "<strong>Filtros usados para a" + " gerao da tabela de dados do TabNet</strong>\n<ul>\n");
        /*str = str.replace("$$tableHeader$$", "<strong>Tabela de dados do "
              + "TabNet gerada com os seguintes filtros:</strong><br/><br/>");*/
        for (Map.Entry<String, String> option : tableOptions.entrySet()) {
            builder.append("\t\t\t\t\t\t<li><label>");
            builder.append(option.getKey());
            builder.append(":</label> ");
            builder.append(option.getValue());
            builder.append("</li>\n");
        }
        str = str.replace("$$tableOptions$$", builder.toString() + "\n</ul>");
    } else {
        str = str.replace("$$tableHeader$$", "");
        str = str.replace("$$tableOptions$$", "");
    }

    return str;
}

From source file:net.java.sip.communicator.impl.growlnotification.GrowlNotificationServiceImpl.java

/**
 * Implements <tt>PopupMessageHandler#showPopupMessage()</tt>
 *
 * @param popupMessage the message we will show
 *///from  w  ww .  ja v  a2  s  .  com
public void showPopupMessage(PopupMessage popupMessage) {
    String messageBody = popupMessage.getMessage();
    String messageTitle = popupMessage.getMessageTitle();

    // remove eventual HTML code before showing the pop-up message
    messageBody = messageBody.replaceAll("</?\\w++[^>]*+>", "");
    messageTitle = messageTitle.replaceAll("</?\\w++[^>]*+>", "");

    // unescape any chars that can be escaped inside the text
    messageBody = StringEscapeUtils.unescapeHtml4(messageBody);
    messageTitle = StringEscapeUtils.unescapeHtml4(messageTitle);

    growl.notifyGrowlOf(messageTitle, messageBody, SHOW_POPUP_MESSAGE_TYPE, popupMessage.getIcon(),
            popupMessage.getTag());
}

From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java

/**
 * Removes all non-text tags (Javascript, css etc) from a string along with
 * all the attributes from the tags.//from  ww w .j a va  2 s .c o m
 * 
 * @param html
 * @return 
 */
public static String removeNonTextTagsAndAttributes(String html) {
    html = removeNonTextTags(html);

    Matcher m = REMOVE_ATTRIBUTES_PATTERN.matcher(html);
    if (m.find()) {
        html = m.replaceAll("<$1$2>");
    }

    html = StringEscapeUtils.unescapeHtml4(html);

    return html;
}

From source file:common.Utilities.java

public static String getWikiContent(String xmlString) {
    int startIndex = xmlString.indexOf(REV_START);
    int endIndex = xmlString.indexOf(REV_END);
    if (startIndex != -1 && endIndex != -1) {
        return StringEscapeUtils.unescapeHtml4(xmlString.substring(startIndex + REV_START.length(), endIndex));
    }/*  w  w w. ja v  a2s .co m*/
    return null;
}

From source file:com.nttec.everychan.chans.dvach.DvachReader.java

@Override
protected void customFilters(int ch) throws IOException {
    if (inDate)//from  w w w.ja va2  s .  co m
        dateBuf.append((char) ch);

    if (ch == TRIP_FILTER[curTripPos]) {
        ++curTripPos;
        if (curTripPos == TRIP_FILTER.length) {
            currentPost.trip = StringEscapeUtils
                    .unescapeHtml4(RegexUtils.removeHtmlTags(readUntilSequence("</span>".toCharArray())))
                    .trim();
            curTripPos = 0;
        }
    } else {
        if (curTripPos != 0)
            curTripPos = ch == TRIP_FILTER[0] ? 1 : 0;
    }

    if (ch == TINATRIP_FILTER[curTinaTripPos]) {
        ++curTinaTripPos;
        if (curTinaTripPos == TINATRIP_FILTER.length) {
            currentPost.trip = StringEscapeUtils
                    .unescapeHtml4(RegexUtils.removeHtmlTags(readUntilSequence("</span>".toCharArray()))).trim()
                    + '\u2655';
            curTinaTripPos = 0;
        }
    } else {
        if (curTinaTripPos != 0)
            curTinaTripPos = ch == TINATRIP_FILTER[0] ? 1 : 0;
    }

    if (ch == NUM_FILTER[curNumPos]) {
        ++curNumPos;
        if (curNumPos == NUM_FILTER.length) {
            currentPost.number = readUntilSequence("\"".toCharArray());
            curNumPos = 0;
        }
    } else {
        if (curNumPos != 0)
            curNumPos = ch == NUM_FILTER[0] ? 1 : 0;
    }

    if (ch == LABELOPEN_FILTER[curLabelOpenPos]) {
        ++curLabelOpenPos;
        if (curLabelOpenPos == LABELOPEN_FILTER.length) {
            inDate = true;
            dateBuf.setLength(0);
            curLabelOpenPos = 0;
        }
    } else {
        if (curLabelOpenPos != 0)
            curLabelOpenPos = ch == LABELOPEN_FILTER[0] ? 1 : 0;
    }

    if (ch == LABELCLOSE_FILTER[curLabelClosePos]) {
        ++curLabelClosePos;
        if (curLabelClosePos == LABELCLOSE_FILTER.length) {
            inDate = false;
            parseDvachDate(dateBuf.toString());
            curLabelClosePos = 0;
        }
    } else {
        if (curLabelClosePos != 0)
            curLabelClosePos = ch == LABELCLOSE_FILTER[0] ? 1 : 0;
    }

    if (ch == COUNTRYBALL_FILTER[curCountryBallPos]) {
        ++curCountryBallPos;
        if (curCountryBallPos == COUNTRYBALL_FILTER.length) {
            parseCountryBall(readUntilSequence(">".toCharArray()));
            curCountryBallPos = 0;
        }
    } else {
        if (curCountryBallPos != 0)
            curCountryBallPos = ch == COUNTRYBALL_FILTER[0] ? 1 : 0;
    }
}

From source file:com.konakart.actions.SuggestedSearch.java

/**
 * Method to process Solr Term results. Currently the method unescapes any HTML returned by
 * Solr.//from w ww. j a v a 2 s . com
 * 
 * @param result
 * @param rich
 * @return Returns the processed String
 */
private String processTermResult(String result, boolean rich) {
    if (result == null || result.length() == 0 || !result.contains("&")) {
        return result;
    }

    result = StringEscapeUtils.unescapeHtml4(result);
    if (rich) {
        /*
         * If the search string ends in ampersand, solr will split &amp; to highlight just the &
         * char.
         */
        result = result.replace(END_TAG + "amp;", END_TAG);
    }
    return result;
}

From source file:FBMsgExtractor.MessageFormatting.java

public MessageFormatting() {
    File inFile = new File(
            "J:\\Uni Work Backup\\Dropbox\\Alex Facebook Thread\\After Parsing\\TrimmedMessages.txt");
    try {//from www.  j  a  v  a  2s  . com
        leagueIDs = new HashMap<>();

        leagueIDs.put("36819082", "Ben Beowulf Reid");
        leagueIDs.put("514098273", "Reginald Amukoshi Emvula");
        leagueIDs.put("36818470", "Alex Holehouse");
        leagueIDs.put("61416976", "Gregory van der Donk");
        leagueIDs.put("503074462", "James Rees");
        leagueIDs.put("505420379", "Jonathan Cain");
        leagueIDs.put("286302858", "Nick Cool Swallow");
        leagueIDs.put("502555542", "Ben Morgan");
        leagueIDs.put("500535605", "Vernon Silson");

        Date date = new Date();

        messageDetails = new HashMap<>();

        int counter = 0;

        outPut = new PrintWriter(
                "J:\\Uni Work Backup\\Dropbox\\Alex Facebook Thread\\After Parsing\\CleanedMessages.txt");

        br = new BufferedReader(new InputStreamReader(new FileInputStream(inFile)));
        String line;

        while (((line = br.readLine()) != null)) //&& (count < 100))
        {

            if (line.trim().equals(""))
                continue;

            if (line.startsWith("***LEAGUETHREAD***")) {
                messageDetails.put("*ThreadNumber", line.replace("***LEAGUETHREAD***", "").trim());
                continue;
            }

            if (line.startsWith("***TITLE***")) {
                messageDetails.put("*Title", line.replace("***TITLE***", "").trim());
                continue;
            }

            if (line.startsWith("***MARKER***")) {
                messageDetails.put("*Marker", line.replace("***MARKER***", "").trim());
                continue;
            }

            if (line.startsWith("MessageID:")) {
                messageDetails.put("*MessageID", line.replace("MessageID:", "").trim());
                continue;
            }

            if (line.startsWith("Time:")) {
                line = line.replace("Time: ", "");
                long tempTime = Long.parseLong(line.trim());
                date.setTime(tempTime);
                messageDetails.put("*Time", date.toString());
                continue;
            }

            if (line.startsWith("Author:")) {
                line = line.replace("Author: ", "").trim();
                messageDetails.put("*Author", leagueIDs.get(line));
                continue;
            }

            if (line.startsWith("Message:")) {
                line = line.replace("Message:", "");
                String tempMessage = new String(StringEscapeUtils.unescapeHtml4(line).getBytes("ISO-8859-1"))
                        .trim();

                messageDetails.put("*Message", tempMessage);
            }

            if (line.startsWith("Link:")) {
                line = line.replace("Link: ", "");
                line = line.replace("***LINK: \"", "***LINK: ");
                line = line.replace(" ImageShack</span>", "");
                line = line.replace("</span>", "").trim();

                line = StringEscapeUtils.unescapeHtml4(line);

                line = line.replace("http://lm.facebook.com/l.php?u=", "");
                line = line.replace("https://m.facebook.com/l.php?u=", "");

                String tempLink = "";
                char[] linkArray = line.toCharArray();

                for (int i = (line.indexOf("***LINK:") + 8); i < line.indexOf("||"); i++) {
                    tempLink += linkArray[i];

                }
                tempLink = tempLink.trim();

                line = line.replace(tempLink, "***URL***");

                tempLink = URLDecoder.decode(tempLink, "ISO-8859-1");

                line = line.replace("***URL***", tempLink);

                messageDetails.put("*Link", line);
                messageDetails.put("*LinkURL", tempLink);
                continue;
            }

            if (line.startsWith("Image:")) {
                line = line.replace("Image:", "");
                line = line.replace("***IMAGE: \"", "***IMAGE: ").trim();

                line = StringEscapeUtils.unescapeHtml4(line);

                String tempLink = "";
                char[] linkArray = line.toCharArray();

                for (int i = (line.indexOf("***IMAGE:") + 9); i < line.indexOf("\""); i++) {
                    tempLink += linkArray[i];

                }
                tempLink = tempLink.trim();

                char[] cutLine = line.toCharArray();
                String tempLine = "";

                for (int i = 0; i < line.indexOf(tempLink); i++) {
                    tempLine += cutLine[i];
                }

                tempLink = URLDecoder.decode(tempLink, "ISO-8859-1");
                tempLink = tempLink.replaceAll("s100x100/", "");
                tempLink = tempLink.replaceAll("s75x225/", "");
                tempLink = tempLink.replaceAll("p50x50/", "");
                tempLink = tempLink.replaceAll("&preview=1&width=194&height=194", "");

                tempLine += tempLink;
                tempLine = tempLine.trim();

                messageDetails.put("*Image", tempLine);
                messageDetails.put("*ImageURL", tempLink);
                continue;
            }

            if (line.startsWith("***END***")) {
                if (line.contains(""))
                    System.out.println(messageDetails.get("*Marker"));

                if (messageDetails.containsKey("*ThreadNumber")) {
                    outPut.println("League Thread " + messageDetails.get("*ThreadNumber"));
                    System.out.println(messageDetails.get("*ThreadNumber") + "-" + messageDetails.get("*Time")
                            + "-" + messageDetails.get("*Title"));
                    outPut.println();
                }

                if (messageDetails.containsKey("*Title")) {
                    outPut.println("Title: " + messageDetails.get("*Title"));
                    outPut.println();
                }

                if (messageDetails.containsKey("*Marker")) {
                    outPut.println("*************");
                    outPut.println("Marker: " + messageDetails.get("*Marker"));
                }

                if (messageDetails.containsKey("*MessageID")) {
                    outPut.println("MessageID: " + messageDetails.get("*MessageID"));
                }

                if (messageDetails.containsKey("*Author")) {
                    outPut.println("Author: " + messageDetails.get("*Author"));
                }

                if (messageDetails.containsKey("*Time")) {
                    outPut.println("Time: " + messageDetails.get("*Time"));
                    outPut.println();
                }

                //if(messageDetails.containsKey("*Image") && messageDetails.containsKey("*Link"))
                //System.out.println(messageDetails.get("*Marker"));

                if (messageDetails.containsKey("*Image")) {
                    String tempImage = messageDetails.get("*Image");

                    if (messageDetails.containsKey("*Message")) {
                        tempImage = tempImage.replace(messageDetails.get("*Message"), "").trim();
                    }

                    outPut.println("Image: " + tempImage);
                    outPut.println("ImageURL: " + messageDetails.get("*ImageURL"));
                    outPut.println();
                }

                if (messageDetails.containsKey("*Link")) {
                    String tempLink = messageDetails.get("*Link");

                    if (messageDetails.containsKey("*Message")) {
                        tempLink = tempLink.replace(messageDetails.get("*Message"), "").trim();
                    }

                    outPut.println("Link: " + tempLink);
                    outPut.println("LinkURL: " + messageDetails.get("*LinkURL"));
                    outPut.println();
                }

                if (messageDetails.containsKey("*Message")) {
                    outPut.println("Message: " + messageDetails.get("*Message"));
                    outPut.println();
                }

                messageDetails.clear();
                outPut.println("***END***");
                outPut.println();
            }

            outPut.flush();
        }
        outPut.close();

    } catch (Exception e) {
        System.out.println(e);
    }

}

From source file:com.nttec.everychan.chans.dvach.DvachBoardsListReader.java

private void handleFilter(int filter) throws IOException {
    switch (filter) {
    case FILTER_CATEGORY:
        skipUntilSequence(CLOSE);/*from   w  w  w  .ja v a  2 s .c om*/
        String cat = readUntilSequence(DT_CLOSE);
        if (!cat.contains("?"))
            currentCategory = StringEscapeUtils.unescapeHtml4(cat);
        else
            end = true;
        break;
    case FILTER_BOARD:
        skipUntilSequence(CLOSE);
        String board = readUntilSequence(DD_CLOSE);
        Matcher boardMatcher = BOARD_PATTERN.matcher(board);
        if (boardMatcher.find()) {
            SimpleBoardModel model = new SimpleBoardModel();
            model.chan = DvachModule.CHAN_NAME;
            model.boardName = boardMatcher.group(1);
            model.boardDescription = boardMatcher.group(2);
            model.boardCategory = currentCategory;
            model.nsfw = SFW_BOARDS.indexOf(model.boardName) == -1;
            boards.add(model);
        }
    }
}

From source file:com.github.hronom.scrape.dat.rooms.core.grabbers.JxBrowserGrabber.java

@Override
public String grabContent(String url, String proxyHost, int proxyPort, String proxyUsername,
        String proxyPassword) {//  www  . j  a v  a 2 s.  c o m
    try {
        Browser browser;

        // Set proxy.
        if (proxyHost != null && proxyPort > 0) {
            HostPortPair hostPortPair = new HostPortPair(proxyHost, proxyPort);
            CustomProxyConfig customProxyConfig = new CustomProxyConfig(hostPortPair, hostPortPair,
                    hostPortPair);
            browser = new Browser(customProxyConfig);
        } else {
            DirectProxyConfig directProxyConfig = new DirectProxyConfig();
            browser = new Browser(directProxyConfig);
        }

        if (proxyUsername != null && proxyPassword != null) {
            browser.getContext().getNetworkService().setNetworkDelegate(new DefaultNetworkDelegate() {
                @Override
                public boolean onAuthRequired(AuthRequiredParams params) {
                    if (params.isProxy()) {
                        params.setUsername(proxyUsername);
                        params.setPassword(proxyPassword);
                        return false;
                    }
                    return true;
                }
            });
        } else {
            browser.getContext().getNetworkService().setNetworkDelegate(new DefaultNetworkDelegate());
        }

        browser.loadURL(url);

        // Wait for loading.
        while (browser.isLoading()) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                logger.error(e);
            }
        }
        String html = browser.getHTML();
        html = StringEscapeUtils.unescapeHtml4(html);
        browser.stop();
        browser.dispose();
        return html;
    } catch (ExceptionInInitializerError exceptionInInitializerError) {
        logger.error(exceptionInInitializerError);
        return null;
    }
}

From source file:com.betel.flowers.pdf.util.XMLtoHtml.java

public String checkHTML(String htmlString) throws IOException {

    String checkedhtml = null;/*from   w ww  .  j a  v  a 2  s .com*/
    try {
        Document docHtml = Jsoup.parse(htmlString);
        docHtml.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
        String value = new String(docHtml.html());
        checkedhtml = StringEscapeUtils.unescapeHtml4(value);
    } catch (Exception ex) {
        throw ex;
    }
    return checkedhtml;
}