List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:com.zestedesavoir.zestwriter.model.Content.java
public void saveToHtml(File file, MdTextController index) { try (FileOutputStream fos = new FileOutputStream(file)) { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, "UTF8")); String mdValue = exportContentToMarkdown(0, getDepth()); String htmlValue = StringEscapeUtils.unescapeHtml4(index.markdownToHtml(mdValue)); htmlValue = normalizeHtml(htmlValue); writer.append(MainApp.getMdUtils().addHeaderAndFooterStrict(htmlValue, getTitle())); writer.flush();//from w w w. ja va 2s . c om } catch (Exception e) { MainApp.getLogger().error(e.getMessage(), e); } }
From source file:com.wellsandwhistles.android.redditsp.image.ImageInfo.java
public static ImageInfo parseImgurV3(final JsonBufferedObject object) throws IOException, InterruptedException { String id = null;// w w w .java 2 s . com String urlOriginal = null; String thumbnailUrl = null; String title = null; String caption = null; String type = null; boolean isAnimated = false; Long width = null; Long height = null; Long size = null; boolean mp4 = false; if (object != null) { id = object.getString("id"); title = object.getString("title"); caption = object.getString("description"); type = object.getString("type"); isAnimated = object.getBoolean("animated"); width = object.getLong("width"); height = object.getLong("height"); size = object.getLong("size"); if (object.getString("mp4") != null) { urlOriginal = object.getString("mp4"); mp4 = true; size = object.getLong("mp4_size"); } else { urlOriginal = object.getString("link"); } } if (title != null) { title = StringEscapeUtils.unescapeHtml4(title); } if (caption != null) { caption = StringEscapeUtils.unescapeHtml4(caption); } if (id != null) { thumbnailUrl = "https://i.imgur.com/" + id + "b.jpg"; } return new ImageInfo(urlOriginal, thumbnailUrl, title, caption, type, isAnimated, width, height, size, mp4 ? MediaType.VIDEO : MediaType.IMAGE); }
From source file:com.navercorp.pinpoint.web.controller.BusinessTransactionController.java
@RequestMapping(value = "/bind", method = RequestMethod.POST) @ResponseBody//from ww w . j a va2s .c o m public String metaDataBind(@RequestParam("type") String type, @RequestParam("metaData") String metaData, @RequestParam("bind") String bind) { if (logger.isDebugEnabled()) { logger.debug("POST /bind params {metaData={}, bind={}}", metaData, bind); } if (metaData == null) { return ""; } List<String> bindValues; String combinedResult = ""; if (type.equals("sql")) { bindValues = parameterParser.parseOutputParameter(bind); combinedResult = sqlParser.combineBindValues(metaData, bindValues); } else if (type.equals("mongoJson")) { bindValues = parameterJsonParser.parseOutputParameter(bind); combinedResult = mongoJsonParser.combineBindValues(metaData, bindValues); } if (logger.isDebugEnabled()) { logger.debug("Combined result={}", combinedResult); } if (type.equals("mongoJson")) { return StringEscapeUtils.unescapeHtml4(combinedResult); } return StringEscapeUtils.escapeHtml4(combinedResult); }
From source file:com.nttec.everychan.chans.makaba.MakabaJsonMapper.java
static PostModel mapPostModel(JSONObject source, String boardName) throws JSONException { PostModel model = new PostModel(); try {/*from ww w.ja va 2s.c o m*/ model.number = source.getString("num"); } catch (JSONException e) { model.number = Long.toString(source.getLong("num")); } model.name = StringEscapeUtils .unescapeHtml4(RegexUtils.removeHtmlSpanTags(getStringSafe(source, "name", ""))); model.subject = StringEscapeUtils.unescapeHtml4(getStringSafe(source, "subject", "")); model.comment = getStringSafe(source, "comment", ""); model.email = getStringSafe(source, "email", ""); if (model.email.startsWith("mailto:")) model.email = model.email.substring(7); model.trip = getStringSafe(source, "trip", ""); if (model.trip != null) { if (model.trip.equals("!!%adm%!!")) model.trip = "## Abu ##"; else if (model.trip.equals("!!%mod%!!")) model.trip = "## Mod ##"; else if (model.trip.equals("!!%Inquisitor%!!")) model.trip = "## Applejack ##"; else if (model.trip.equals("!!%coder%!!")) model.trip = "## ##"; } model.icons = parseIcons(getStringSafe(source, "icon", "")); model.op = getIntSafe(source, "op", 0) == 1; model.sage = model.email.toLowerCase(Locale.US).contains("sage") || model.name.contains("ID:\u00A0Heaven"); model.timestamp = source.getLong("timestamp") * 1000; model.parentThread = getStringSafe(source, "parent", model.number); if (model.parentThread.equals("0")) model.parentThread = model.number; if (source.has("files")) { JSONArray filesArray = source.getJSONArray("files"); model.attachments = new AttachmentModel[filesArray.length()]; for (int i = 0; i < filesArray.length(); ++i) { model.attachments[i] = mapAttachmentModel(filesArray.getJSONObject(i), boardName); } } else model.attachments = null; int banned = getIntSafe(source, "banned", 0); switch (banned) { case 1: model.comment = model.comment + "<br/><em><font color=\"red\">(? ? ? . ?.)</font></em>"; break; case 2: model.comment = model.comment + "<br/><em><font color=\"red\">(? ? ? .)</font></em>"; break; } return model; }
From source file:com.nttec.everychan.chans.krautchan.KrautCatalogReader.java
private void handleFilter(int filterIndex) throws IOException { switch (filterIndex) { case FILTER_THREAD_NUMBER: currentThread.posts[0].number = readUntilSequence(FILTERS_CLOSE[filterIndex]); break;//w w w. java 2 s . c o m case FILTER_THREAD_TITLE: String headerHtml = readUntilSequence(FILTERS_CLOSE[filterIndex]); int countryBallIndex = headerHtml.indexOf("<img class=\"post_country\" src=\"/images/balls/"); if (countryBallIndex == -1) { countryBallIndex = headerHtml.indexOf("<img class=\"post_country\" src=\"/images/warballs/"); } if (countryBallIndex != -1) { int start = countryBallIndex + 31; int end = headerHtml.indexOf('\"', start); if (end != -1) { BadgeIconModel icon = new BadgeIconModel(); icon.source = headerHtml.substring(start, end); currentThread.posts[0].icons = new BadgeIconModel[] { icon }; } } currentThread.posts[0].subject = StringEscapeUtils.unescapeHtml4(RegexUtils.removeHtmlTags(headerHtml)) .trim(); break; case FILTER_THUMBNAIL: AttachmentModel attachment = new AttachmentModel(); attachment.type = AttachmentModel.TYPE_IMAGE_STATIC; attachment.size = -1; attachment.width = -1; attachment.height = -1; attachment.thumbnail = "/thumbnails/" + readUntilSequence(FILTERS_CLOSE[filterIndex]); attachment.path = attachment.thumbnail.replace("/thumbnails/", "/files/"); currentThread.posts[0].attachments = new AttachmentModel[] { attachment }; break; case FILTER_OMITTED: parseOmittedString(readUntilSequence(FILTERS_CLOSE[filterIndex])); break; case FILTER_POST: skipUntilSequence(SECTION_OPEN); currentThread.posts[0].comment = readUntilSequence(SECTION_CLOSE); break; case FILTER_THREAD_END: finalizeThread(); break; } }
From source file:com.github.hronom.scrape.dat.website.controllers.ScrapeButtonController.java
public void processByHtmlUnit() { // Disable fields in view. scrapeView.setWebsiteUrlTextFieldEnabled(false); scrapeView.setSelectorTextFieldEnabled(false); scrapeView.setScrapeButtonEnabled(false); scrapeView.setWorkInProgress(true);//from ww w . j ava2 s .c o m scrapeView.setOutput(""); scrapeView.setProgressBarTaskText("initializing"); logger.info("Start processing..."); long beginTime = System.currentTimeMillis(); // Output input parameters. if (!scrapeView.getWebsiteUrl().isEmpty() && !scrapeView.getSelector().isEmpty()) { logger.info("Input parameters: \"" + scrapeView.getWebsiteUrl() + "\", \"" + scrapeView.getSelector() + "\", \""); } // Process. try { URL url = new URL(scrapeView.getWebsiteUrl()); scrapeView.setProgressBarTaskText("requesting page"); logger.info("Requesting page..."); HtmlPage page = webClient.getPage(url); logger.info("Requesting of page completed."); scrapeView.setProgressBarTaskText("viewing page as XML"); logger.info("View page as XML"); String xml = page.asXml(); // Unescape html. scrapeView.setProgressBarTaskText("unescaping HTML"); logger.info("Unescape html"); xml = StringEscapeUtils.unescapeHtml4(xml); logger.info("Get selector"); String selector = scrapeView.getSelector(); if (!xml.isEmpty() && !selector.isEmpty()) { scrapeView.setProgressBarTaskText("parsing HTML"); logger.info("Parse HTML"); Document doc = Jsoup.parse(xml); scrapeView.setProgressBarTaskText("selecting elements in HTML"); logger.info("select elements in HTML"); Elements selectedElements = doc.select(selector); if (!selectedElements.isEmpty()) { scrapeView.setProgressBarTaskText("parsing selected elements"); logger.info("Parse extracted elements"); StringBuilder sb = new StringBuilder(); for (Element element : selectedElements) { String body = element.html(); sb.append(body); sb.append("\n"); sb.append("\n"); } scrapeView.setOutput(sb.toString()); } } } catch (Exception e) { logger.error(e); } webClient.close(); long endTime = System.currentTimeMillis(); logger.info("Process time: " + (endTime - beginTime) + " ms."); logger.info("Processing complete."); // Enable fields in view. scrapeView.setWorkInProgress(false); scrapeView.setScrapeButtonEnabled(true); scrapeView.setSelectorTextFieldEnabled(true); scrapeView.setWebsiteUrlTextFieldEnabled(true); }
From source file:de.dplatz.padersprinter.control.TripService.java
static String parseStringNode(Node node, String expr, XPath xpath) throws XPathExpressionException { String val = (String) xpath.evaluate(expr, node, XPathConstants.STRING); val = StringEscapeUtils.unescapeHtml4(val); return val; }
From source file:mServer.crawler.sender.MediathekBr.java
private void getTheman() { final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html"; final String MUSTER_URL = "<a href=\"/mediathek/video/"; final String MUSTER_URL_1 = "sendungen/"; final String MUSTER_URL_2 = "video/"; listeThemen.clear();//from w ww .ja v a 2s . com MSStringBuilder seite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER); GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden()); seite = getUrlIo.getUri(SENDERNAME, ADRESSE, StandardCharsets.UTF_8, 5 /* versuche */, seite, ""); int pos1; int pos2; String url = ""; if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) { while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) { if (Config.getStop()) { break; } try { pos1 += MUSTER_URL.length(); if ((pos2 = seite.indexOf("\"", pos1)) != -1) { url = seite.substring(pos1, pos2); } String thema = seite.extract("<span>", "<", pos1); thema = StringEscapeUtils.unescapeXml(thema.trim()); thema = StringEscapeUtils.unescapeHtml4(thema.trim()); if (!listeAlleThemen.contains(thema)) { listeAlleThemen.add(thema); } if (url.isEmpty() || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) { continue; } /// der BR ist etwas zu langsam dafr???? // // in die Liste eintragen // String[] add; // if (MSearchConfig.senderAllesLaden) { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""}; // } else { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""}; // } // in die Liste eintragen String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema }; listeThemen.addUrl(add); } catch (Exception ex) { Log.errorLog(821213698, ex); } } } }
From source file:com.nttec.everychan.chans.cirno.CirnoCatalogReader.java
private void handleFilter(int filterIndex) throws IOException { switch (filterIndex) { case FILTER_START: String start = readUntilSequence(FILTERS_CLOSE[filterIndex]).trim(); Matcher matcher = PATTERN_START.matcher(start); if (matcher.matches()) { currentThread.posts[0].number = matcher.group(1); try { currentThread.posts[0].timestamp = DateFormats.IICHAN_DATE_FORMAT.parse(matcher.group(2)) .getTime();//from w w w. ja v a 2s.com } catch (Exception e) { } } else { StringBuilder number = new StringBuilder(); char ch; int i = 0; while ((ch = start.charAt(i++)) >= '0' && ch <= '9') number.append(ch); if (number.length() > 0) currentThread.posts[0].number = number.toString(); } break; case FILTER_THUMBNAIL: AttachmentModel attachment = new AttachmentModel(); attachment.type = AttachmentModel.TYPE_IMAGE_STATIC; attachment.size = -1; attachment.width = -1; attachment.height = -1; attachment.thumbnail = readUntilSequence(FILTERS_CLOSE[filterIndex]); attachment.path = attachment.thumbnail.replace("/thumb/", "/src/").replaceAll("(\\d+)s\\.", "$1."); currentThread.posts[0].attachments = new AttachmentModel[] { attachment }; break; case FILTER_SUBJECT: currentThread.posts[0].subject = StringEscapeUtils .unescapeHtml4(readUntilSequence(FILTERS_CLOSE[filterIndex])).trim(); break; case FILTER_COMMENT: currentThread.posts[0].comment = readUntilSequence(FILTERS_CLOSE[filterIndex]); break; case FILTER_END: finalizeThread(); break; } }
From source file:com.seleniumtests.connectors.mails.ImapClient.java
/** * get list of all emails in folder/*w w w . ja v a 2s . com*/ * * @param folderName folder to read * @param firstMessageTime date from which we should get messages * @param firstMessageIndex index of the firste message to find * @throws MessagingException * @throws IOException */ @Override public List<Email> getEmails(String folderName, int firstMessageIndex, LocalDateTime firstMessageTime) throws MessagingException, IOException { if (folderName == null) { throw new MessagingException("folder ne doit pas tre vide"); } // Get folder Folder folder = store.getFolder(folderName); folder.open(Folder.READ_ONLY); // Get directory Message[] messages = folder.getMessages(); List<Message> preFilteredMessages = new ArrayList<>(); final LocalDateTime firstTime = firstMessageTime; // on filtre les message en fonction du mode de recherche if (searchMode == SearchMode.BY_INDEX || firstTime == null) { for (int i = firstMessageIndex, n = messages.length; i < n; i++) { preFilteredMessages.add(messages[i]); } } else { preFilteredMessages = Arrays.asList(folder.search(new SearchTerm() { private static final long serialVersionUID = 1L; @Override public boolean match(Message msg) { try { return !msg.getReceivedDate() .before(Date.from(firstTime.atZone(ZoneId.systemDefault()).toInstant())); } catch (MessagingException e) { return false; } } })); } List<Email> filteredEmails = new ArrayList<>(); lastMessageIndex = messages.length; for (Message message : preFilteredMessages) { String contentType = ""; try { contentType = message.getContentType(); } catch (MessagingException e) { MimeMessage msg = (MimeMessage) message; message = new MimeMessage(msg); contentType = message.getContentType(); } // decode content String messageContent = ""; List<String> attachments = new ArrayList<>(); if (contentType.toLowerCase().contains("text/html")) { messageContent += StringEscapeUtils.unescapeHtml4(message.getContent().toString()); } else if (contentType.toLowerCase().contains("multipart/")) { List<BodyPart> partList = getMessageParts((Multipart) message.getContent()); // store content in list for (BodyPart part : partList) { String partContentType = part.getContentType().toLowerCase(); if (partContentType.contains("text/html")) { messageContent = messageContent .concat(StringEscapeUtils.unescapeHtml4(part.getContent().toString())); } else if (partContentType.contains("text/") && !partContentType.contains("vcard")) { messageContent = messageContent.concat((String) part.getContent().toString()); } else if (partContentType.contains("image") || partContentType.contains("application/") || partContentType.contains("text/x-vcard")) { if (part.getFileName() != null) { attachments.add(part.getFileName()); } else { attachments.add(part.getDescription()); } } else { logger.debug("type: " + part.getContentType()); } } } // create a new email filteredEmails.add(new Email(message.getSubject(), messageContent, "", message.getReceivedDate().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(), attachments)); } folder.close(false); return filteredEmails; }