List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:msearch.filmeSuchen.sender.MediathekReader.java
String checkThema(String thema) { thema = StringEscapeUtils.unescapeXml(thema.trim()); thema = StringEscapeUtils.unescapeHtml4(thema.trim()); if (listeAllThemen.contains(thema)) { return thema; } else {/*from w w w. j a v a2s .c o m*/ return sendername; } }
From source file:com.huguesjohnson.retroleague.rss.RssParserHandler.java
@Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); try {// w w w . ja va 2 s .c o m String tagName = localName; if ((tagName == null) || (tagName.length() < 1)) { tagName = qName; } if (inItemBlock) { if (tagName.equalsIgnoreCase(TAG_ITEM)) { if (this.currentEntry.getPostedDate().compareTo(this.minimumDate) > 0) { this.entryList.add(this.currentEntry); } this.currentEntry = null; readThis = false; inItemBlock = false; } else if (tagName.equalsIgnoreCase(TAG_TITLE)) { //check if this is the rss channel title or the item title if (this.currentEntry != null) { String title = this.characters.toString(); if (this.source == Sources.Facebook) { title = StringEscapeUtils.unescapeHtml4(title); } this.currentEntry.setTitle(title); } readThis = false; } else if (tagName.equalsIgnoreCase(TAG_DESCRIPTION)) { //check if this is the rss channel description or the item description if (this.currentEntry != null) { //TODO - hack to ensure content is html, maybe look for a less hacky solution StringBuffer content = new StringBuffer(this.characters.toString()); if ((!content.substring(0, 1).equals("<")) || (!content.substring(0, 4).equals("<"))) { content.insert(0, "<p>"); content.append("</p>"); } this.currentEntry.setContent(content.toString()); } readThis = false; } else if (tagName.equalsIgnoreCase(TAG_PUBDATE)) { String sDate = this.characters.toString(); this.currentEntry.setPostedDate(this.dateParser.parseDate(sDate)); readThis = false; } else if (tagName.equalsIgnoreCase(TAG_LINK)) { //check if this is the rss channel link or the item link if (this.currentEntry != null) { this.currentEntry.setUrl(this.characters.toString()); } readThis = false; } } else { if (tagName.equalsIgnoreCase(TAG_TITLE)) { //don't care about the rss channel title readThis = false; } } } catch (Exception x) { Log.e(TAG, "endElement", x); } }
From source file:com.wellsandwhistles.android.redditsp.reddit.things.RedditSubreddit.java
public String getSidebarHtml(boolean nightMode) { final String unescaped = StringEscapeUtils.unescapeHtml4(description_html); final StringBuilder result = new StringBuilder(unescaped.length() + 512); result.append("<html>"); result.append("<head>"); result.append("<meta name=\"viewport\" content=\"width=device-width, user-scalable=yes\">"); if (nightMode) { result.append("<style>"); result.append("body {color: white; background-color: black;}"); result.append("a {color: #3399FF; background-color: 000033;}"); result.append("</style>"); }/*from w w w . ja va 2 s .co m*/ result.append("</head>"); result.append("<body>"); result.append(unescaped); result.append("</body>"); result.append("</html>"); return result.toString(); }
From source file:com.nttec.everychan.chans.sevenchan.SevenchanReader.java
@Override protected void customFilters(int ch) throws IOException { if (inDate)/*www. j a va2s.c o m*/ dateBuffer.append((char) ch); if (ch == NUMBER_FILTER[curNumberPos]) { ++curNumberPos; if (curNumberPos == NUMBER_FILTER.length) { currentPost.number = readUntilSequence("\"".toCharArray()); curNumberPos = 0; } } else { if (curNumberPos != 0) curNumberPos = ch == NUMBER_FILTER[0] ? 1 : 0; } if (ch == SUBJECT_FILTER[curSubjectPos]) { ++curSubjectPos; if (curSubjectPos == SUBJECT_FILTER.length) { currentPost.subject = CryptoUtils .fixCloudflareEmails(StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim()); curSubjectPos = 0; } } else { if (curSubjectPos != 0) curSubjectPos = ch == SUBJECT_FILTER[0] ? 1 : 0; } if (ch == ATTACHMENT_FILTER[curAttachmentPos]) { ++curAttachmentPos; if (curAttachmentPos == ATTACHMENT_FILTER.length) { parseAttachment(readUntilSequence(P_CLOSE)); curAttachmentPos = 0; } } else { if (curAttachmentPos != 0) curAttachmentPos = ch == ATTACHMENT_FILTER[0] ? 1 : 0; } if (ch == ATTACHMENT_MULTI_FIRST_FILTER[curAttachmentMultiFirstPos]) { ++curAttachmentMultiFirstPos; if (curAttachmentMultiFirstPos == ATTACHMENT_MULTI_FIRST_FILTER.length) { parseAttachment(readUntilSequence(SPAN_CLOSE)); curAttachmentMultiFirstPos = 0; } } else { if (curAttachmentMultiFirstPos != 0) curAttachmentMultiFirstPos = ch == ATTACHMENT_MULTI_FIRST_FILTER[0] ? 1 : 0; } if (ch == ATTACHMENT_MULTI_FILTER[curAttachmentMultiPos]) { ++curAttachmentMultiPos; if (curAttachmentMultiPos == ATTACHMENT_MULTI_FILTER.length) { parseAttachment(readUntilSequence(SPAN_CLOSE)); curAttachmentMultiPos = 0; } } else { if (curAttachmentMultiPos != 0) curAttachmentMultiPos = ch == ATTACHMENT_MULTI_FILTER[0] ? 1 : 0; } if (ch == COMMENT_FILTER[curCommentPos]) { ++curCommentPos; if (curCommentPos == COMMENT_FILTER.length) { currentPost.comment = readPostComment(); if (lastAdminMark != null) { currentPost.trip = lastAdminMark + (currentPost.trip == null ? "" : currentPost.trip); lastAdminMark = null; } if (lastModMark != null) { currentPost.trip = lastModMark + (currentPost.trip == null ? "" : currentPost.trip); lastModMark = null; } finalizePost(); curCommentPos = 0; } } else { if (curCommentPos != 0) curCommentPos = ch == COMMENT_FILTER[0] ? 1 : 0; } if (ch == DATE_START_FILTER[curDateStartPos]) { ++curDateStartPos; if (curDateStartPos == DATE_START_FILTER.length) { inDate = true; dateBuffer.setLength(0); curDateStartPos = 0; } } else { if (curDateStartPos != 0) curDateStartPos = ch == DATE_START_FILTER[0] ? 1 : 0; } if (ch == DATE_END_FILTER[curDateEndPos]) { ++curDateEndPos; if (curDateEndPos == DATE_END_FILTER.length) { Matcher m = DATE_PATTERN.matcher(dateBuffer.toString().trim()); if (m.find()) { String date = m.group(1); parseDate(date); if (currentPost.timestamp == 0) { try { date = StringEscapeUtils.unescapeHtml4(date); date = new StringBuilder().append((char) (date.charAt(2) - 65248)) .append((char) (date.charAt(3) - 65248)).append('/') .append((char) (date.charAt(5) - 65248)).append((char) (date.charAt(6) - 65248)) .append('/').append((char) (date.charAt(8) - 65248)) .append((char) (date.charAt(9) - 65248)).append(' ') .append((char) (date.charAt(15) - 65248)) .append((char) (date.charAt(16) - 65248)).append(':') .append((char) (date.charAt(18) - 65248)) .append((char) (date.charAt(19) - 65248)).append(':') .append((char) (date.charAt(21) - 65248)) .append((char) (date.charAt(22) - 65248)).toString(); currentPost.timestamp = DATE_FORMAT_ALT.parse(date).getTime(); } catch (Exception e) { } } } inDate = false; dateBuffer.setLength(0); curDateEndPos = 0; } } else { if (curDateEndPos != 0) curDateEndPos = ch == DATE_END_FILTER[0] ? 1 : 0; } if (ch == ADMIN_FILTER[curAdminPos]) { ++curAdminPos; if (curAdminPos == ADMIN_FILTER.length) { lastAdminMark = StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim(); curAdminPos = 0; } } else { if (curAdminPos != 0) curAdminPos = ch == ADMIN_FILTER[0] ? 1 : 0; } if (ch == MOD_FILTER[curModPos]) { ++curModPos; if (curModPos == MOD_FILTER.length) { lastModMark = StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim(); curModPos = 0; } } else { if (curModPos != 0) curModPos = ch == MOD_FILTER[0] ? 1 : 0; } }
From source file:msearch.filmeSuchen.sender.MediathekBr.java
private void getTheman() { final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html"; final String MUSTER_URL = "<a href=\"/mediathek/video/"; final String MUSTER_URL_1 = "sendungen/"; final String MUSTER_URL_2 = "video/"; listeThemen.clear();//from www . j a va 2 s . c o m MSStringBuilder seite = new MSStringBuilder(MSConst.STRING_BUFFER_START_BUFFER); //seite = getUrlIo.getUri_Utf(SENDERNAME, ADRESSE, seite, ""); seite = getUrlIo.getUri(SENDERNAME, ADRESSE, MSConst.KODIERUNG_UTF, 5 /* versuche */, seite, ""); int pos1 = 0; int pos2; String url = ""; if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) { while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) { try { pos1 += MUSTER_URL.length(); if ((pos2 = seite.indexOf("\"", pos1)) != -1) { url = seite.substring(pos1, pos2); } String thema = seite.extract("<span>", "<", pos1); thema = StringEscapeUtils.unescapeXml(thema.trim()); thema = StringEscapeUtils.unescapeHtml4(thema.trim()); if (!listeAllThemen.contains(thema)) { listeAllThemen.add(thema); } if (url.equals("") || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) { continue; } /// der BR ist etwas zu langsam dafr???? // // in die Liste eintragen // String[] add; // if (MSearchConfig.senderAllesLaden) { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""}; // } else { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""}; // } // in die Liste eintragen String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema }; listeThemen.addUrl(add); } catch (Exception ex) { MSLog.fehlerMeldung(-821213698, MSLog.FEHLER_ART_MREADER, this.getClass().getSimpleName(), ex); } } } }
From source file:com.ryan.ryanreader.reddit.prepared.RedditPreparedPost.java
public RedditPreparedPost(final Context context, final CacheManager cm, final int listId, final RedditPost post, final long timestamp, final boolean showSubreddit, final RedditSubreddit parentSubreddit, final boolean updateNeeded, final boolean showThumbnails, final boolean precacheImages, final RedditAccount user) { this.src = post; this.parentSubreddit = parentSubreddit; this.showSubreddit = showSubreddit; if (post.title == null) { title = "[null]"; } else {//from w ww . jav a2 s .c o m title = StringEscapeUtils.unescapeHtml4(post.title.replace('\n', ' ')).trim(); } idAlone = post.id; idAndType = post.name; url = post.url; commentCount = post.num_comments; if (post.likes == null) { voteDirection = 0; } else { voteDirection = Boolean.TRUE.equals(post.likes) ? 1 : -1; } imageUrl = LinkHandler.getImageUrl(post.url); thumbnailUrl = post.thumbnail; hasThumbnail = showThumbnails && (hasThumbnail(post) || imageUrl != null); // TODO parameterise final int thumbnailWidth = General.dpToPixels(context, 64); if (hasThumbnail && hasThumbnail(post)) { downloadThumbnail(context, thumbnailWidth, cm, listId, false); } if (imageUrl != null && precacheImages) { downloadThumbnail(context, thumbnailWidth, cm, listId, true); } // TODO precache comments (respect settings) lastChange = timestamp; if (voteDirection != 0 || saved || hidden) { RedditChangeDataManager.getInstance(context).update(parentSubreddit.url, user, this, true); } else if (updateNeeded) { RedditChangeDataManager.getInstance(context).update(parentSubreddit.url, user, this, false); } rebuildSubtitle(context); }
From source file:com.nebkat.plugin.url.URLPlugin.java
@EventHandler public void onMessage(PrivMessageEvent e) { // Filter targets and ignores if ((mConfig.channels != null && mConfig.channels.stream() .noneMatch((channel) -> channel.equalsIgnoreCase(e.getTarget().getName()))) || (mConfig.ignore != null//from w w w. j av a 2 s.c o m && mConfig.ignore.stream().anyMatch((ignore) -> e.getSource().match(ignore)))) { return; } Matcher matcher = URL_MATCHER.matcher(e.getMessage()); if (!matcher.find()) { return; } String url = matcher.group(); HttpGet get = new HttpGet(url); // Execute the request HttpContext context = new BasicHttpContext(); HttpResponse response; try { response = ConnectionManager.getHttpClient().execute(get, context); } catch (IOException ex) { get.abort(); return; } Header contentType = response.getEntity().getContentType(); if (contentType == null) { get.abort(); return; } String mimeType = contentType.getValue().split(";")[0].trim(); if (!mimeType.equals("text/html") || response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { get.abort(); return; } if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { get.abort(); return; } HttpHost currentHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST); boolean redirected = context.getAttribute(ConnectionManager.REDIRECTED) != null; StringBuilder page = new StringBuilder(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(response.getEntity().getContent()))) { String line; while ((line = reader.readLine()) != null) { if (page.length() > 2 * 1024 * 1024) { reader.close(); get.abort(); return; } page.append(line); matcher = TITLE_MATCHER.matcher(page); if (matcher.find()) { String title = StringEscapeUtils.unescapeHtml4(matcher.group(1).trim()); if (title.length() <= 0) { return; } else if (title.length() > 100) { title = title.substring(0, 100) + "..."; } Irc.message(e.getSession(), e.getTarget(), "[Link] " + Irc.TEXT_BOLD + currentHost.toHostString() + Irc.TEXT_RESET + (redirected ? " [redirected]" : "") + ": " + title); return; } } } catch (IOException ex) { // Ignore } }
From source file:edu.jhu.hlt.concrete.stanford.ConcreteStanfordTokensSentenceAnalytic.java
@Override public TokenizedCommunication annotate(SectionedCommunication arg0) throws AnalyticException { final Communication cp = new Communication(arg0.getRoot()); if (!cp.isSetText()) throw new AnalyticException("communication.text must be set to run this analytic."); AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(cp); AnalyticUUIDGenerator g = f.create(); List<Section> sList = arg0.getSections().stream() // temporary hack - filter out // any zero-length TextSpans. .filter(s -> {/*from ww w . java 2 s. c o m*/ final TextSpan ts = s.getTextSpan(); return ts.getStart() != ts.getEnding(); }) // temporary hack - filter out any // TextSpans that contain only whitespace. .filter(s -> { final TextSpan ts = s.getTextSpan(); final int b = ts.getStart(); final int e = ts.getEnding(); if (e < b) { LOGGER.warn("Invalid text span: end is less than start. Document: {}; TextSpan: {}", cp.getId(), ts.toString()); return false; } String txt = cp.getText().substring(b, e); // that isn't enough, could get HTML encoded blank spaces. if (txt.contains(" ")) txt = StringEscapeUtils.unescapeHtml4(txt); String slim = txt.trim().replaceAll("\\p{Zs}", ""); return !slim.isEmpty(); }).collect(Collectors.toList()); final int newSize = sList.size(); final int oSize = arg0.getSections().size(); if (newSize < oSize) LOGGER.info("Dropped {} section(s) because they were zero-length or contained only whitespace.", oSize - newSize); // for each section, run stanford tokenization and sentence splitting for (Section s : sList) { LOGGER.debug("Annotating section: {}", s.getUuid().getUuidString()); final TextSpan sts = s.getTextSpan(); final String sectTxt = cp.getText().substring(sts.getStart(), sts.getEnding()); // final String sectTxt = new SuperTextSpan(sts, cp).getText(); LOGGER.debug("Section text: {}", sectTxt); final Annotation sectAnnotation = new Annotation(sectTxt); LOGGER.debug("Got annotation keys:"); sectAnnotation.keySet().forEach(k -> LOGGER.debug("{}", k)); this.pipeline.annotate(sectAnnotation); LOGGER.trace("Post annotation annotation keys:"); sectAnnotation.keySet().forEach(k -> LOGGER.trace("{}", k)); List<CoreLabel> tokensOnly = sectAnnotation.get(TokensAnnotation.class); tokensOnly.forEach( cl -> LOGGER.trace("Got non-sent Stanford token: {}", cl.toShorterString(new String[0]))); // LOGGER.debug("Got first sentence text annotation: {}", sectAnnotation.get(SentencesAnnotation.class).get(0).get(TextAnnotation.class)); List<Sentence> stList = annotationToSentenceList(sectAnnotation, sts.getStart(), g); s.setSentenceList(stList); } cp.setSectionList(sList); try { return new CachedTokenizationCommunication(cp); } catch (MiscommunicationException e) { throw new AnalyticException(e); } }
From source file:com.hack23.cia.web.impl.ui.application.views.common.chartfactory.impl.DocumentChartDataManagerImpl.java
/** * Gets the view riksdagen org document daily summary map. * * @return the view riksdagen org document daily summary map */// w w w. j a v a2s. co m private Map<String, List<ViewRiksdagenOrgDocumentDailySummary>> getViewRiksdagenOrgDocumentDailySummaryMap() { final DataContainer<ViewRiksdagenOrgDocumentDailySummary, RiksdagenDocumentOrgSummaryEmbeddedId> politicianBallotSummaryDailyDataContainer = applicationManager .getDataContainer(ViewRiksdagenOrgDocumentDailySummary.class); return politicianBallotSummaryDailyDataContainer.getAll().parallelStream() .filter(t -> t != null && !t.getEmbeddedId().getPublicDate().startsWith(YEAR_PREFIX)) .collect(Collectors.groupingBy( t -> StringEscapeUtils.unescapeHtml4(t.getEmbeddedId().getOrg()).toUpperCase(Locale.ENGLISH) .replace(UNDER_SCORE, EMPTY_STRING).replace(MINUS_SIGN, EMPTY_STRING).trim())); }
From source file:com.green.modules.cms.service.CategoryService.java
@Transactional(readOnly = false) public void save(Category category) { category.setSite(new Site(Site.getCurrentSiteId())); category.setParent(this.get(category.getParent().getId())); String oldParentIds = category.getParentIds(); // ??parentIds?parentIds category.setParentIds(category.getParent().getParentIds() + category.getParent().getId() + ","); if (StringUtils.isNotBlank(category.getViewConfig())) { category.setViewConfig(StringEscapeUtils.unescapeHtml4(category.getViewConfig())); }/*w w w. j a v a 2 s . c o m*/ categoryDao.clear(); categoryDao.save(category); // ? parentIds List<Category> list = categoryDao.findByParentIdsLike("%," + category.getId() + ",%"); for (Category e : list) { e.setParentIds(e.getParentIds().replace(oldParentIds, category.getParentIds())); } categoryDao.save(list); UserUtils.removeCache(CACHE_CATEGORY_LIST); CmsUtils.removeCache("mainNavList_" + category.getSite().getId()); }