List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:br.bireme.tb.RIPSA.java
/** * Given a cell, uses its info tho fill the holes of the template String. * @param cell a cell having the data to fill the holes * @return a String with holes replaced//from ww w . j av a2 s . co m * @throws IOException */ static String cell2html(final Cell cell) throws IOException { assert cell != null; if (TEMPLATE == null) { throw new IOException("TEMPLATE is null"); } final String qualifRec = cell.getElem().qualifRec.toString(); final Matcher mat = EDITION_PAT.matcher(qualifRec); if (!mat.find()) { throw new IOException("out of pattern url [" + qualifRec + "]"); } final String edition = mat.group(2); final String father = cell.getElem().father.toString(); final Matcher matf = Pattern.compile("idb(\\d{4})").matcher(father); if (!matf.find()) { throw new IOException("out of pattern url [" + father + "]"); } final String year = matf.group(1); String str = TEMPLATE; final StringBuilder builder = new StringBuilder(); boolean first; final String title = cell.getTitle(); final String title2 = (title == null) ? "" : title; str = str.replace("$$title$$", title2); final String subtitle = cell.getSubtitle(); if ((subtitle != null) && (!subtitle.isEmpty())) { str = str.replace("$$subtitle$$", "<h2>" + subtitle + "</h2>"); } else { str = str.replace("$$subtitle$$", ""); } str = str.replace("$$description$$", "Clulas IDB - " + edition + " - " + year + " - " + title2 + " - " + subtitle); final StringBuilder aux = new StringBuilder(title); final Map<String, String> tableOptions = cell.getElem().tableOptions; if ((tableOptions != null) && (!tableOptions.isEmpty())) { for (String opt : tableOptions.values()) { if ((!opt.equals("No ativa")) && (!opt.equals("Todas as categorias"))) { aux.append(", "); aux.append(StringEscapeUtils.unescapeHtml4(opt)); } } } str = str.replace("$$keywords$$", aux.toString()); final List<String> scope = cell.getScope(); builder.setLength(0); first = true; if ((scope != null) && (!scope.isEmpty())) { builder.append("<h3>"); for (String scp : scope) { if (first) { first = false; } else { builder.append("<br/>\n"); } builder.append(scp); } builder.append("</h3>"); str = str.replace("$$scope$$", builder.toString()); } else { str = str.replace("$$scope$$", ""); } final List<String> header = cell.getHeader(); builder.setLength(0); first = true; if ((header != null) && (!header.isEmpty())) { for (String hdr : header) { if (first) { first = false; } else { builder.append("<br/>\n"); } builder.append(hdr); } str = str.replace("$$celheader$$", builder.toString()); } else { str = str.replace("$$celheader$$", ""); } final String row = cell.getRow(); if ((row != null) && (!row.isEmpty())) { str = str.replace("$$celrow$$", row); } else { str = str.replace("$$celrow$$", ""); } final String value = cell.getValue(); if ((value != null) && (!value.isEmpty())) { String celVal; try { celVal = NFMT.format(NFMT.parse(value).floatValue()); } catch (ParseException ex) { celVal = value; } str = str.replace("$$celval$$", celVal); } else { str = str.replace("$$celval$$", ""); } final List<String> sources = cell.getSources(); builder.setLength(0); if ((sources != null) && (!sources.isEmpty())) { builder.append("<div class=\"note\">\n"); builder.append("\t\t\t\t\t\t\t<label>Fonte(s):</label>\n"); for (String source : sources) { builder.append("\t\t\t\t\t\t\t<p>"); builder.append(source); builder.append("</p>\n"); } builder.append("\t\t\t\t\t\t</div>\n"); str = str.replace("$$sources$$", builder.toString()); } else { str = str.replace("$$sources$$", ""); } final List<String> labels = cell.getLabels(); builder.setLength(0); if ((labels != null) && (!labels.isEmpty())) { builder.append("<div class=\"note\">\n"); builder.append("\t\t\t\t\t\t\t<label>Legenda(s):</label>\n"); for (String label : labels) { builder.append("\t\t\t\t\t\t\t<p>"); builder.append(label); builder.append("</p>\n"); } builder.append("\t\t\t\t\t\t</div>\n"); str = str.replace("$$labels$$", builder.toString()); } else { str = str.replace("$$labels$$", ""); } final List<String> notes = cell.getNotes(); builder.setLength(0); if ((notes != null) && (!notes.isEmpty())) { builder.append("<div class=\"note\">\n"); builder.append("\t\t\t\t\t\t\t<label>Nota(s):</label>\n"); for (String note : notes) { builder.append("\t\t\t\t\t\t\t<p>"); builder.append(note); builder.append("</p>\n"); } builder.append("\t\t\t\t\t\t</div>\n"); str = str.replace("$$notes$$", builder.toString()); } else { str = str.replace("$$notes$$", ""); } str = str.replace("$$father$$", cell.getElem().father.toString()); str = str.replace("$$qualifRec$$", cell.getElem().qualifRec.toString()); builder.setLength(0); if ((tableOptions != null) && (!tableOptions.isEmpty())) { str = str.replace("$$tableHeader$$", "<strong>Filtros usados para a" + " gerao da tabela de dados do TabNet</strong>\n<ul>\n"); /*str = str.replace("$$tableHeader$$", "<strong>Tabela de dados do " + "TabNet gerada com os seguintes filtros:</strong><br/><br/>");*/ for (Map.Entry<String, String> option : tableOptions.entrySet()) { builder.append("\t\t\t\t\t\t<li><label>"); builder.append(option.getKey()); builder.append(":</label> "); builder.append(option.getValue()); builder.append("</li>\n"); } str = str.replace("$$tableOptions$$", builder.toString() + "\n</ul>"); } else { str = str.replace("$$tableHeader$$", ""); str = str.replace("$$tableOptions$$", ""); } return str; }
From source file:net.java.sip.communicator.impl.growlnotification.GrowlNotificationServiceImpl.java
/** * Implements <tt>PopupMessageHandler#showPopupMessage()</tt> * * @param popupMessage the message we will show *///from w ww . ja v a2 s . com public void showPopupMessage(PopupMessage popupMessage) { String messageBody = popupMessage.getMessage(); String messageTitle = popupMessage.getMessageTitle(); // remove eventual HTML code before showing the pop-up message messageBody = messageBody.replaceAll("</?\\w++[^>]*+>", ""); messageTitle = messageTitle.replaceAll("</?\\w++[^>]*+>", ""); // unescape any chars that can be escaped inside the text messageBody = StringEscapeUtils.unescapeHtml4(messageBody); messageTitle = StringEscapeUtils.unescapeHtml4(messageTitle); growl.notifyGrowlOf(messageTitle, messageBody, SHOW_POPUP_MESSAGE_TYPE, popupMessage.getIcon(), popupMessage.getTag()); }
From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java
/** * Removes all non-text tags (Javascript, css etc) from a string along with * all the attributes from the tags.//from ww w .j a va 2 s .c o m * * @param html * @return */ public static String removeNonTextTagsAndAttributes(String html) { html = removeNonTextTags(html); Matcher m = REMOVE_ATTRIBUTES_PATTERN.matcher(html); if (m.find()) { html = m.replaceAll("<$1$2>"); } html = StringEscapeUtils.unescapeHtml4(html); return html; }
From source file:common.Utilities.java
public static String getWikiContent(String xmlString) { int startIndex = xmlString.indexOf(REV_START); int endIndex = xmlString.indexOf(REV_END); if (startIndex != -1 && endIndex != -1) { return StringEscapeUtils.unescapeHtml4(xmlString.substring(startIndex + REV_START.length(), endIndex)); }/* w w w. ja v a2s .co m*/ return null; }
From source file:com.nttec.everychan.chans.dvach.DvachReader.java
@Override protected void customFilters(int ch) throws IOException { if (inDate)//from w w w.ja va2 s . co m dateBuf.append((char) ch); if (ch == TRIP_FILTER[curTripPos]) { ++curTripPos; if (curTripPos == TRIP_FILTER.length) { currentPost.trip = StringEscapeUtils .unescapeHtml4(RegexUtils.removeHtmlTags(readUntilSequence("</span>".toCharArray()))) .trim(); curTripPos = 0; } } else { if (curTripPos != 0) curTripPos = ch == TRIP_FILTER[0] ? 1 : 0; } if (ch == TINATRIP_FILTER[curTinaTripPos]) { ++curTinaTripPos; if (curTinaTripPos == TINATRIP_FILTER.length) { currentPost.trip = StringEscapeUtils .unescapeHtml4(RegexUtils.removeHtmlTags(readUntilSequence("</span>".toCharArray()))).trim() + '\u2655'; curTinaTripPos = 0; } } else { if (curTinaTripPos != 0) curTinaTripPos = ch == TINATRIP_FILTER[0] ? 1 : 0; } if (ch == NUM_FILTER[curNumPos]) { ++curNumPos; if (curNumPos == NUM_FILTER.length) { currentPost.number = readUntilSequence("\"".toCharArray()); curNumPos = 0; } } else { if (curNumPos != 0) curNumPos = ch == NUM_FILTER[0] ? 1 : 0; } if (ch == LABELOPEN_FILTER[curLabelOpenPos]) { ++curLabelOpenPos; if (curLabelOpenPos == LABELOPEN_FILTER.length) { inDate = true; dateBuf.setLength(0); curLabelOpenPos = 0; } } else { if (curLabelOpenPos != 0) curLabelOpenPos = ch == LABELOPEN_FILTER[0] ? 1 : 0; } if (ch == LABELCLOSE_FILTER[curLabelClosePos]) { ++curLabelClosePos; if (curLabelClosePos == LABELCLOSE_FILTER.length) { inDate = false; parseDvachDate(dateBuf.toString()); curLabelClosePos = 0; } } else { if (curLabelClosePos != 0) curLabelClosePos = ch == LABELCLOSE_FILTER[0] ? 1 : 0; } if (ch == COUNTRYBALL_FILTER[curCountryBallPos]) { ++curCountryBallPos; if (curCountryBallPos == COUNTRYBALL_FILTER.length) { parseCountryBall(readUntilSequence(">".toCharArray())); curCountryBallPos = 0; } } else { if (curCountryBallPos != 0) curCountryBallPos = ch == COUNTRYBALL_FILTER[0] ? 1 : 0; } }
From source file:com.konakart.actions.SuggestedSearch.java
/** * Method to process Solr Term results. Currently the method unescapes any HTML returned by * Solr.//from w ww. j a v a 2 s . com * * @param result * @param rich * @return Returns the processed String */ private String processTermResult(String result, boolean rich) { if (result == null || result.length() == 0 || !result.contains("&")) { return result; } result = StringEscapeUtils.unescapeHtml4(result); if (rich) { /* * If the search string ends in ampersand, solr will split & to highlight just the & * char. */ result = result.replace(END_TAG + "amp;", END_TAG); } return result; }
From source file:FBMsgExtractor.MessageFormatting.java
public MessageFormatting() { File inFile = new File( "J:\\Uni Work Backup\\Dropbox\\Alex Facebook Thread\\After Parsing\\TrimmedMessages.txt"); try {//from www. j a v a 2s . com leagueIDs = new HashMap<>(); leagueIDs.put("36819082", "Ben Beowulf Reid"); leagueIDs.put("514098273", "Reginald Amukoshi Emvula"); leagueIDs.put("36818470", "Alex Holehouse"); leagueIDs.put("61416976", "Gregory van der Donk"); leagueIDs.put("503074462", "James Rees"); leagueIDs.put("505420379", "Jonathan Cain"); leagueIDs.put("286302858", "Nick Cool Swallow"); leagueIDs.put("502555542", "Ben Morgan"); leagueIDs.put("500535605", "Vernon Silson"); Date date = new Date(); messageDetails = new HashMap<>(); int counter = 0; outPut = new PrintWriter( "J:\\Uni Work Backup\\Dropbox\\Alex Facebook Thread\\After Parsing\\CleanedMessages.txt"); br = new BufferedReader(new InputStreamReader(new FileInputStream(inFile))); String line; while (((line = br.readLine()) != null)) //&& (count < 100)) { if (line.trim().equals("")) continue; if (line.startsWith("***LEAGUETHREAD***")) { messageDetails.put("*ThreadNumber", line.replace("***LEAGUETHREAD***", "").trim()); continue; } if (line.startsWith("***TITLE***")) { messageDetails.put("*Title", line.replace("***TITLE***", "").trim()); continue; } if (line.startsWith("***MARKER***")) { messageDetails.put("*Marker", line.replace("***MARKER***", "").trim()); continue; } if (line.startsWith("MessageID:")) { messageDetails.put("*MessageID", line.replace("MessageID:", "").trim()); continue; } if (line.startsWith("Time:")) { line = line.replace("Time: ", ""); long tempTime = Long.parseLong(line.trim()); date.setTime(tempTime); messageDetails.put("*Time", date.toString()); continue; } if (line.startsWith("Author:")) { line = line.replace("Author: ", "").trim(); messageDetails.put("*Author", leagueIDs.get(line)); continue; } if (line.startsWith("Message:")) { line = line.replace("Message:", ""); String tempMessage = new String(StringEscapeUtils.unescapeHtml4(line).getBytes("ISO-8859-1")) .trim(); messageDetails.put("*Message", tempMessage); } if (line.startsWith("Link:")) { line = line.replace("Link: ", ""); line = line.replace("***LINK: \"", "***LINK: "); line = line.replace(" ImageShack</span>", ""); line = line.replace("</span>", "").trim(); line = StringEscapeUtils.unescapeHtml4(line); line = line.replace("http://lm.facebook.com/l.php?u=", ""); line = line.replace("https://m.facebook.com/l.php?u=", ""); String tempLink = ""; char[] linkArray = line.toCharArray(); for (int i = (line.indexOf("***LINK:") + 8); i < line.indexOf("||"); i++) { tempLink += linkArray[i]; } tempLink = tempLink.trim(); line = line.replace(tempLink, "***URL***"); tempLink = URLDecoder.decode(tempLink, "ISO-8859-1"); line = line.replace("***URL***", tempLink); messageDetails.put("*Link", line); messageDetails.put("*LinkURL", tempLink); continue; } if (line.startsWith("Image:")) { line = line.replace("Image:", ""); line = line.replace("***IMAGE: \"", "***IMAGE: ").trim(); line = StringEscapeUtils.unescapeHtml4(line); String tempLink = ""; char[] linkArray = line.toCharArray(); for (int i = (line.indexOf("***IMAGE:") + 9); i < line.indexOf("\""); i++) { tempLink += linkArray[i]; } tempLink = tempLink.trim(); char[] cutLine = line.toCharArray(); String tempLine = ""; for (int i = 0; i < line.indexOf(tempLink); i++) { tempLine += cutLine[i]; } tempLink = URLDecoder.decode(tempLink, "ISO-8859-1"); tempLink = tempLink.replaceAll("s100x100/", ""); tempLink = tempLink.replaceAll("s75x225/", ""); tempLink = tempLink.replaceAll("p50x50/", ""); tempLink = tempLink.replaceAll("&preview=1&width=194&height=194", ""); tempLine += tempLink; tempLine = tempLine.trim(); messageDetails.put("*Image", tempLine); messageDetails.put("*ImageURL", tempLink); continue; } if (line.startsWith("***END***")) { if (line.contains("")) System.out.println(messageDetails.get("*Marker")); if (messageDetails.containsKey("*ThreadNumber")) { outPut.println("League Thread " + messageDetails.get("*ThreadNumber")); System.out.println(messageDetails.get("*ThreadNumber") + "-" + messageDetails.get("*Time") + "-" + messageDetails.get("*Title")); outPut.println(); } if (messageDetails.containsKey("*Title")) { outPut.println("Title: " + messageDetails.get("*Title")); outPut.println(); } if (messageDetails.containsKey("*Marker")) { outPut.println("*************"); outPut.println("Marker: " + messageDetails.get("*Marker")); } if (messageDetails.containsKey("*MessageID")) { outPut.println("MessageID: " + messageDetails.get("*MessageID")); } if (messageDetails.containsKey("*Author")) { outPut.println("Author: " + messageDetails.get("*Author")); } if (messageDetails.containsKey("*Time")) { outPut.println("Time: " + messageDetails.get("*Time")); outPut.println(); } //if(messageDetails.containsKey("*Image") && messageDetails.containsKey("*Link")) //System.out.println(messageDetails.get("*Marker")); if (messageDetails.containsKey("*Image")) { String tempImage = messageDetails.get("*Image"); if (messageDetails.containsKey("*Message")) { tempImage = tempImage.replace(messageDetails.get("*Message"), "").trim(); } outPut.println("Image: " + tempImage); outPut.println("ImageURL: " + messageDetails.get("*ImageURL")); outPut.println(); } if (messageDetails.containsKey("*Link")) { String tempLink = messageDetails.get("*Link"); if (messageDetails.containsKey("*Message")) { tempLink = tempLink.replace(messageDetails.get("*Message"), "").trim(); } outPut.println("Link: " + tempLink); outPut.println("LinkURL: " + messageDetails.get("*LinkURL")); outPut.println(); } if (messageDetails.containsKey("*Message")) { outPut.println("Message: " + messageDetails.get("*Message")); outPut.println(); } messageDetails.clear(); outPut.println("***END***"); outPut.println(); } outPut.flush(); } outPut.close(); } catch (Exception e) { System.out.println(e); } }
From source file:com.nttec.everychan.chans.dvach.DvachBoardsListReader.java
private void handleFilter(int filter) throws IOException { switch (filter) { case FILTER_CATEGORY: skipUntilSequence(CLOSE);/*from w w w .ja v a 2 s .c om*/ String cat = readUntilSequence(DT_CLOSE); if (!cat.contains("?")) currentCategory = StringEscapeUtils.unescapeHtml4(cat); else end = true; break; case FILTER_BOARD: skipUntilSequence(CLOSE); String board = readUntilSequence(DD_CLOSE); Matcher boardMatcher = BOARD_PATTERN.matcher(board); if (boardMatcher.find()) { SimpleBoardModel model = new SimpleBoardModel(); model.chan = DvachModule.CHAN_NAME; model.boardName = boardMatcher.group(1); model.boardDescription = boardMatcher.group(2); model.boardCategory = currentCategory; model.nsfw = SFW_BOARDS.indexOf(model.boardName) == -1; boards.add(model); } } }
From source file:com.github.hronom.scrape.dat.rooms.core.grabbers.JxBrowserGrabber.java
@Override public String grabContent(String url, String proxyHost, int proxyPort, String proxyUsername, String proxyPassword) {// www . j a v a 2 s. c o m try { Browser browser; // Set proxy. if (proxyHost != null && proxyPort > 0) { HostPortPair hostPortPair = new HostPortPair(proxyHost, proxyPort); CustomProxyConfig customProxyConfig = new CustomProxyConfig(hostPortPair, hostPortPair, hostPortPair); browser = new Browser(customProxyConfig); } else { DirectProxyConfig directProxyConfig = new DirectProxyConfig(); browser = new Browser(directProxyConfig); } if (proxyUsername != null && proxyPassword != null) { browser.getContext().getNetworkService().setNetworkDelegate(new DefaultNetworkDelegate() { @Override public boolean onAuthRequired(AuthRequiredParams params) { if (params.isProxy()) { params.setUsername(proxyUsername); params.setPassword(proxyPassword); return false; } return true; } }); } else { browser.getContext().getNetworkService().setNetworkDelegate(new DefaultNetworkDelegate()); } browser.loadURL(url); // Wait for loading. while (browser.isLoading()) { try { Thread.sleep(1000); } catch (InterruptedException e) { logger.error(e); } } String html = browser.getHTML(); html = StringEscapeUtils.unescapeHtml4(html); browser.stop(); browser.dispose(); return html; } catch (ExceptionInInitializerError exceptionInInitializerError) { logger.error(exceptionInInitializerError); return null; } }
From source file:com.betel.flowers.pdf.util.XMLtoHtml.java
public String checkHTML(String htmlString) throws IOException { String checkedhtml = null;/*from w ww . j a v a 2 s .com*/ try { Document docHtml = Jsoup.parse(htmlString); docHtml.outputSettings().syntax(Document.OutputSettings.Syntax.xml); String value = new String(docHtml.html()); checkedhtml = StringEscapeUtils.unescapeHtml4(value); } catch (Exception ex) { throw ex; } return checkedhtml; }