Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:msearch.filmeSuchen.sender.MediathekReader.java

String checkThema(String thema) {
    thema = StringEscapeUtils.unescapeXml(thema.trim());
    thema = StringEscapeUtils.unescapeHtml4(thema.trim());
    if (listeAllThemen.contains(thema)) {
        return thema;
    } else {/*from   w w  w.  j  a  v  a2s  .c  o m*/
        return sendername;
    }
}

From source file:com.huguesjohnson.retroleague.rss.RssParserHandler.java

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
    super.endElement(uri, localName, qName);
    try {//  w w w .  ja va 2 s  .c o m
        String tagName = localName;
        if ((tagName == null) || (tagName.length() < 1)) {
            tagName = qName;
        }
        if (inItemBlock) {
            if (tagName.equalsIgnoreCase(TAG_ITEM)) {
                if (this.currentEntry.getPostedDate().compareTo(this.minimumDate) > 0) {
                    this.entryList.add(this.currentEntry);
                }
                this.currentEntry = null;
                readThis = false;
                inItemBlock = false;
            } else if (tagName.equalsIgnoreCase(TAG_TITLE)) {
                //check if this is the rss channel title or the item title
                if (this.currentEntry != null) {
                    String title = this.characters.toString();
                    if (this.source == Sources.Facebook) {
                        title = StringEscapeUtils.unescapeHtml4(title);
                    }
                    this.currentEntry.setTitle(title);
                }
                readThis = false;
            } else if (tagName.equalsIgnoreCase(TAG_DESCRIPTION)) {
                //check if this is the rss channel description or the item description
                if (this.currentEntry != null) {
                    //TODO - hack to ensure content is html, maybe look for a less hacky solution
                    StringBuffer content = new StringBuffer(this.characters.toString());
                    if ((!content.substring(0, 1).equals("<")) || (!content.substring(0, 4).equals("&lt;"))) {
                        content.insert(0, "<p>");
                        content.append("</p>");
                    }
                    this.currentEntry.setContent(content.toString());
                }
                readThis = false;
            } else if (tagName.equalsIgnoreCase(TAG_PUBDATE)) {
                String sDate = this.characters.toString();
                this.currentEntry.setPostedDate(this.dateParser.parseDate(sDate));
                readThis = false;
            } else if (tagName.equalsIgnoreCase(TAG_LINK)) {
                //check if this is the rss channel link or the item link
                if (this.currentEntry != null) {
                    this.currentEntry.setUrl(this.characters.toString());
                }
                readThis = false;
            }
        } else {
            if (tagName.equalsIgnoreCase(TAG_TITLE)) {
                //don't care about the rss channel title
                readThis = false;
            }
        }
    } catch (Exception x) {
        Log.e(TAG, "endElement", x);
    }
}

From source file:com.wellsandwhistles.android.redditsp.reddit.things.RedditSubreddit.java

public String getSidebarHtml(boolean nightMode) {
    final String unescaped = StringEscapeUtils.unescapeHtml4(description_html);

    final StringBuilder result = new StringBuilder(unescaped.length() + 512);

    result.append("<html>");

    result.append("<head>");
    result.append("<meta name=\"viewport\" content=\"width=device-width, user-scalable=yes\">");

    if (nightMode) {
        result.append("<style>");
        result.append("body {color: white; background-color: black;}");
        result.append("a {color: #3399FF; background-color: 000033;}");
        result.append("</style>");
    }/*from w w  w . ja va  2  s  .co  m*/

    result.append("</head>");

    result.append("<body>");
    result.append(unescaped);
    result.append("</body>");

    result.append("</html>");

    return result.toString();
}

From source file:com.nttec.everychan.chans.sevenchan.SevenchanReader.java

@Override
protected void customFilters(int ch) throws IOException {
    if (inDate)/*www.  j  a  va2s.c  o  m*/
        dateBuffer.append((char) ch);

    if (ch == NUMBER_FILTER[curNumberPos]) {
        ++curNumberPos;
        if (curNumberPos == NUMBER_FILTER.length) {
            currentPost.number = readUntilSequence("\"".toCharArray());
            curNumberPos = 0;
        }
    } else {
        if (curNumberPos != 0)
            curNumberPos = ch == NUMBER_FILTER[0] ? 1 : 0;
    }

    if (ch == SUBJECT_FILTER[curSubjectPos]) {
        ++curSubjectPos;
        if (curSubjectPos == SUBJECT_FILTER.length) {
            currentPost.subject = CryptoUtils
                    .fixCloudflareEmails(StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim());
            curSubjectPos = 0;
        }
    } else {
        if (curSubjectPos != 0)
            curSubjectPos = ch == SUBJECT_FILTER[0] ? 1 : 0;
    }

    if (ch == ATTACHMENT_FILTER[curAttachmentPos]) {
        ++curAttachmentPos;
        if (curAttachmentPos == ATTACHMENT_FILTER.length) {
            parseAttachment(readUntilSequence(P_CLOSE));
            curAttachmentPos = 0;
        }
    } else {
        if (curAttachmentPos != 0)
            curAttachmentPos = ch == ATTACHMENT_FILTER[0] ? 1 : 0;
    }

    if (ch == ATTACHMENT_MULTI_FIRST_FILTER[curAttachmentMultiFirstPos]) {
        ++curAttachmentMultiFirstPos;
        if (curAttachmentMultiFirstPos == ATTACHMENT_MULTI_FIRST_FILTER.length) {
            parseAttachment(readUntilSequence(SPAN_CLOSE));
            curAttachmentMultiFirstPos = 0;
        }
    } else {
        if (curAttachmentMultiFirstPos != 0)
            curAttachmentMultiFirstPos = ch == ATTACHMENT_MULTI_FIRST_FILTER[0] ? 1 : 0;
    }

    if (ch == ATTACHMENT_MULTI_FILTER[curAttachmentMultiPos]) {
        ++curAttachmentMultiPos;
        if (curAttachmentMultiPos == ATTACHMENT_MULTI_FILTER.length) {
            parseAttachment(readUntilSequence(SPAN_CLOSE));
            curAttachmentMultiPos = 0;
        }
    } else {
        if (curAttachmentMultiPos != 0)
            curAttachmentMultiPos = ch == ATTACHMENT_MULTI_FILTER[0] ? 1 : 0;
    }

    if (ch == COMMENT_FILTER[curCommentPos]) {
        ++curCommentPos;
        if (curCommentPos == COMMENT_FILTER.length) {
            currentPost.comment = readPostComment();
            if (lastAdminMark != null) {
                currentPost.trip = lastAdminMark + (currentPost.trip == null ? "" : currentPost.trip);
                lastAdminMark = null;
            }
            if (lastModMark != null) {
                currentPost.trip = lastModMark + (currentPost.trip == null ? "" : currentPost.trip);
                lastModMark = null;
            }
            finalizePost();
            curCommentPos = 0;
        }
    } else {
        if (curCommentPos != 0)
            curCommentPos = ch == COMMENT_FILTER[0] ? 1 : 0;
    }

    if (ch == DATE_START_FILTER[curDateStartPos]) {
        ++curDateStartPos;
        if (curDateStartPos == DATE_START_FILTER.length) {
            inDate = true;
            dateBuffer.setLength(0);
            curDateStartPos = 0;
        }
    } else {
        if (curDateStartPos != 0)
            curDateStartPos = ch == DATE_START_FILTER[0] ? 1 : 0;
    }

    if (ch == DATE_END_FILTER[curDateEndPos]) {
        ++curDateEndPos;
        if (curDateEndPos == DATE_END_FILTER.length) {
            Matcher m = DATE_PATTERN.matcher(dateBuffer.toString().trim());
            if (m.find()) {
                String date = m.group(1);
                parseDate(date);
                if (currentPost.timestamp == 0) {
                    try {
                        date = StringEscapeUtils.unescapeHtml4(date);
                        date = new StringBuilder().append((char) (date.charAt(2) - 65248))
                                .append((char) (date.charAt(3) - 65248)).append('/')
                                .append((char) (date.charAt(5) - 65248)).append((char) (date.charAt(6) - 65248))
                                .append('/').append((char) (date.charAt(8) - 65248))
                                .append((char) (date.charAt(9) - 65248)).append(' ')
                                .append((char) (date.charAt(15) - 65248))
                                .append((char) (date.charAt(16) - 65248)).append(':')
                                .append((char) (date.charAt(18) - 65248))
                                .append((char) (date.charAt(19) - 65248)).append(':')
                                .append((char) (date.charAt(21) - 65248))
                                .append((char) (date.charAt(22) - 65248)).toString();
                        currentPost.timestamp = DATE_FORMAT_ALT.parse(date).getTime();
                    } catch (Exception e) {
                    }
                }
            }
            inDate = false;
            dateBuffer.setLength(0);
            curDateEndPos = 0;
        }
    } else {
        if (curDateEndPos != 0)
            curDateEndPos = ch == DATE_END_FILTER[0] ? 1 : 0;
    }

    if (ch == ADMIN_FILTER[curAdminPos]) {
        ++curAdminPos;
        if (curAdminPos == ADMIN_FILTER.length) {
            lastAdminMark = StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim();
            curAdminPos = 0;
        }
    } else {
        if (curAdminPos != 0)
            curAdminPos = ch == ADMIN_FILTER[0] ? 1 : 0;
    }

    if (ch == MOD_FILTER[curModPos]) {
        ++curModPos;
        if (curModPos == MOD_FILTER.length) {
            lastModMark = StringEscapeUtils.unescapeHtml4(readUntilSequence(SPAN_CLOSE)).trim();
            curModPos = 0;
        }
    } else {
        if (curModPos != 0)
            curModPos = ch == MOD_FILTER[0] ? 1 : 0;
    }

}

From source file:msearch.filmeSuchen.sender.MediathekBr.java

private void getTheman() {
    final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html";
    final String MUSTER_URL = "<a href=\"/mediathek/video/";
    final String MUSTER_URL_1 = "sendungen/";
    final String MUSTER_URL_2 = "video/";
    listeThemen.clear();//from  www .  j  a va  2 s  . c  o  m
    MSStringBuilder seite = new MSStringBuilder(MSConst.STRING_BUFFER_START_BUFFER);
    //seite = getUrlIo.getUri_Utf(SENDERNAME, ADRESSE, seite, "");
    seite = getUrlIo.getUri(SENDERNAME, ADRESSE, MSConst.KODIERUNG_UTF, 5 /* versuche */, seite, "");
    int pos1 = 0;
    int pos2;
    String url = "";
    if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) {
        while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) {
            try {
                pos1 += MUSTER_URL.length();
                if ((pos2 = seite.indexOf("\"", pos1)) != -1) {
                    url = seite.substring(pos1, pos2);
                }
                String thema = seite.extract("<span>", "<", pos1);
                thema = StringEscapeUtils.unescapeXml(thema.trim());
                thema = StringEscapeUtils.unescapeHtml4(thema.trim());
                if (!listeAllThemen.contains(thema)) {
                    listeAllThemen.add(thema);
                }
                if (url.equals("") || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) {
                    continue;
                }
                /// der BR ist etwas zu langsam dafr????
                //                    // in die Liste eintragen
                //                    String[] add;
                //                    if (MSearchConfig.senderAllesLaden) {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""};
                //                    } else {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""};
                //                    }
                // in die Liste eintragen
                String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema };
                listeThemen.addUrl(add);
            } catch (Exception ex) {
                MSLog.fehlerMeldung(-821213698, MSLog.FEHLER_ART_MREADER, this.getClass().getSimpleName(), ex);
            }
        }
    }
}

From source file:com.ryan.ryanreader.reddit.prepared.RedditPreparedPost.java

public RedditPreparedPost(final Context context, final CacheManager cm, final int listId, final RedditPost post,
        final long timestamp, final boolean showSubreddit, final RedditSubreddit parentSubreddit,
        final boolean updateNeeded, final boolean showThumbnails, final boolean precacheImages,
        final RedditAccount user) {

    this.src = post;
    this.parentSubreddit = parentSubreddit;
    this.showSubreddit = showSubreddit;

    if (post.title == null) {
        title = "[null]";
    } else {//from   w ww  . jav a2 s  .c o m
        title = StringEscapeUtils.unescapeHtml4(post.title.replace('\n', ' ')).trim();
    }

    idAlone = post.id;
    idAndType = post.name;
    url = post.url;
    commentCount = post.num_comments;

    if (post.likes == null) {
        voteDirection = 0;
    } else {
        voteDirection = Boolean.TRUE.equals(post.likes) ? 1 : -1;
    }

    imageUrl = LinkHandler.getImageUrl(post.url);
    thumbnailUrl = post.thumbnail;
    hasThumbnail = showThumbnails && (hasThumbnail(post) || imageUrl != null);

    // TODO parameterise
    final int thumbnailWidth = General.dpToPixels(context, 64);

    if (hasThumbnail && hasThumbnail(post)) {
        downloadThumbnail(context, thumbnailWidth, cm, listId, false);
    }

    if (imageUrl != null && precacheImages) {
        downloadThumbnail(context, thumbnailWidth, cm, listId, true);
    }

    // TODO precache comments (respect settings)

    lastChange = timestamp;
    if (voteDirection != 0 || saved || hidden) {
        RedditChangeDataManager.getInstance(context).update(parentSubreddit.url, user, this, true);
    } else if (updateNeeded) {
        RedditChangeDataManager.getInstance(context).update(parentSubreddit.url, user, this, false);
    }

    rebuildSubtitle(context);
}

From source file:com.nebkat.plugin.url.URLPlugin.java

@EventHandler
public void onMessage(PrivMessageEvent e) {
    // Filter targets and ignores
    if ((mConfig.channels != null && mConfig.channels.stream()
            .noneMatch((channel) -> channel.equalsIgnoreCase(e.getTarget().getName())))
            || (mConfig.ignore != null//from  w w  w. j av a  2 s.c  o m
                    && mConfig.ignore.stream().anyMatch((ignore) -> e.getSource().match(ignore)))) {
        return;
    }

    Matcher matcher = URL_MATCHER.matcher(e.getMessage());
    if (!matcher.find()) {
        return;
    }
    String url = matcher.group();

    HttpGet get = new HttpGet(url);

    // Execute the request
    HttpContext context = new BasicHttpContext();
    HttpResponse response;
    try {
        response = ConnectionManager.getHttpClient().execute(get, context);
    } catch (IOException ex) {
        get.abort();
        return;
    }

    Header contentType = response.getEntity().getContentType();
    if (contentType == null) {
        get.abort();
        return;
    }
    String mimeType = contentType.getValue().split(";")[0].trim();
    if (!mimeType.equals("text/html") || response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
        get.abort();
        return;
    }

    if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
        get.abort();
        return;
    }

    HttpHost currentHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
    boolean redirected = context.getAttribute(ConnectionManager.REDIRECTED) != null;

    StringBuilder page = new StringBuilder();
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(response.getEntity().getContent()))) {
        String line;
        while ((line = reader.readLine()) != null) {
            if (page.length() > 2 * 1024 * 1024) {
                reader.close();
                get.abort();
                return;
            }
            page.append(line);
            matcher = TITLE_MATCHER.matcher(page);
            if (matcher.find()) {
                String title = StringEscapeUtils.unescapeHtml4(matcher.group(1).trim());
                if (title.length() <= 0) {
                    return;
                } else if (title.length() > 100) {
                    title = title.substring(0, 100) + "...";
                }
                Irc.message(e.getSession(), e.getTarget(),
                        "[Link] " + Irc.TEXT_BOLD + currentHost.toHostString() + Irc.TEXT_RESET
                                + (redirected ? " [redirected]" : "") + ": " + title);
                return;
            }
        }
    } catch (IOException ex) {
        // Ignore
    }
}

From source file:edu.jhu.hlt.concrete.stanford.ConcreteStanfordTokensSentenceAnalytic.java

@Override
public TokenizedCommunication annotate(SectionedCommunication arg0) throws AnalyticException {
    final Communication cp = new Communication(arg0.getRoot());
    if (!cp.isSetText())
        throw new AnalyticException("communication.text must be set to run this analytic.");
    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(cp);
    AnalyticUUIDGenerator g = f.create();
    List<Section> sList = arg0.getSections().stream()
            // temporary hack - filter out
            // any zero-length TextSpans.
            .filter(s -> {/*from  ww  w  .  java 2 s.  c  o m*/
                final TextSpan ts = s.getTextSpan();
                return ts.getStart() != ts.getEnding();
            })
            // temporary hack - filter out any
            // TextSpans that contain only whitespace.
            .filter(s -> {
                final TextSpan ts = s.getTextSpan();
                final int b = ts.getStart();
                final int e = ts.getEnding();
                if (e < b) {
                    LOGGER.warn("Invalid text span: end is less than start. Document: {}; TextSpan: {}",
                            cp.getId(), ts.toString());
                    return false;
                }
                String txt = cp.getText().substring(b, e);
                // that isn't enough, could get HTML encoded blank spaces.
                if (txt.contains("&nbsp"))
                    txt = StringEscapeUtils.unescapeHtml4(txt);

                String slim = txt.trim().replaceAll("\\p{Zs}", "");
                return !slim.isEmpty();
            }).collect(Collectors.toList());
    final int newSize = sList.size();
    final int oSize = arg0.getSections().size();
    if (newSize < oSize)
        LOGGER.info("Dropped {} section(s) because they were zero-length or contained only whitespace.",
                oSize - newSize);
    // for each section, run stanford tokenization and sentence splitting
    for (Section s : sList) {
        LOGGER.debug("Annotating section: {}", s.getUuid().getUuidString());
        final TextSpan sts = s.getTextSpan();
        final String sectTxt = cp.getText().substring(sts.getStart(), sts.getEnding());
        // final String sectTxt = new SuperTextSpan(sts, cp).getText();
        LOGGER.debug("Section text: {}", sectTxt);
        final Annotation sectAnnotation = new Annotation(sectTxt);
        LOGGER.debug("Got annotation keys:");
        sectAnnotation.keySet().forEach(k -> LOGGER.debug("{}", k));
        this.pipeline.annotate(sectAnnotation);
        LOGGER.trace("Post annotation annotation keys:");
        sectAnnotation.keySet().forEach(k -> LOGGER.trace("{}", k));

        List<CoreLabel> tokensOnly = sectAnnotation.get(TokensAnnotation.class);
        tokensOnly.forEach(
                cl -> LOGGER.trace("Got non-sent Stanford token: {}", cl.toShorterString(new String[0])));
        // LOGGER.debug("Got first sentence text annotation: {}", sectAnnotation.get(SentencesAnnotation.class).get(0).get(TextAnnotation.class));
        List<Sentence> stList = annotationToSentenceList(sectAnnotation, sts.getStart(), g);
        s.setSentenceList(stList);
    }

    cp.setSectionList(sList);
    try {
        return new CachedTokenizationCommunication(cp);
    } catch (MiscommunicationException e) {
        throw new AnalyticException(e);
    }
}

From source file:com.hack23.cia.web.impl.ui.application.views.common.chartfactory.impl.DocumentChartDataManagerImpl.java

/**
 * Gets the view riksdagen org document daily summary map.
 *
 * @return the view riksdagen org document daily summary map
 */// w w  w. j a  v  a2s. co  m
private Map<String, List<ViewRiksdagenOrgDocumentDailySummary>> getViewRiksdagenOrgDocumentDailySummaryMap() {
    final DataContainer<ViewRiksdagenOrgDocumentDailySummary, RiksdagenDocumentOrgSummaryEmbeddedId> politicianBallotSummaryDailyDataContainer = applicationManager
            .getDataContainer(ViewRiksdagenOrgDocumentDailySummary.class);

    return politicianBallotSummaryDailyDataContainer.getAll().parallelStream()
            .filter(t -> t != null && !t.getEmbeddedId().getPublicDate().startsWith(YEAR_PREFIX))
            .collect(Collectors.groupingBy(
                    t -> StringEscapeUtils.unescapeHtml4(t.getEmbeddedId().getOrg()).toUpperCase(Locale.ENGLISH)
                            .replace(UNDER_SCORE, EMPTY_STRING).replace(MINUS_SIGN, EMPTY_STRING).trim()));
}

From source file:com.green.modules.cms.service.CategoryService.java

@Transactional(readOnly = false)
public void save(Category category) {
    category.setSite(new Site(Site.getCurrentSiteId()));
    category.setParent(this.get(category.getParent().getId()));
    String oldParentIds = category.getParentIds(); // ??parentIds?parentIds
    category.setParentIds(category.getParent().getParentIds() + category.getParent().getId() + ",");
    if (StringUtils.isNotBlank(category.getViewConfig())) {
        category.setViewConfig(StringEscapeUtils.unescapeHtml4(category.getViewConfig()));
    }/*w w w. j  a v a 2 s .  c o  m*/
    categoryDao.clear();
    categoryDao.save(category);
    // ? parentIds
    List<Category> list = categoryDao.findByParentIdsLike("%," + category.getId() + ",%");
    for (Category e : list) {
        e.setParentIds(e.getParentIds().replace(oldParentIds, category.getParentIds()));
    }
    categoryDao.save(list);
    UserUtils.removeCache(CACHE_CATEGORY_LIST);
    CmsUtils.removeCache("mainNavList_" + category.getSite().getId());
}