Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:nya.miku.wishmaster.chans.nullchan.AbstractInstant0chan.java

@Override
public SimpleBoardModel[] getBoardsList(ProgressListener listener, CancellableTask task,
        SimpleBoardModel[] oldBoardsList) throws Exception {
    String url = getUsingUrl() + "boards10.json";
    try {/* w w  w .  java  2  s .  c om*/
        JSONArray json = downloadJSONArray(url, oldBoardsList != null, listener, task);
        if (json == null)
            return oldBoardsList;
        List<SimpleBoardModel> list = new ArrayList<>();
        for (int i = 0; i < json.length(); ++i) {
            String currentCategory = json.getJSONObject(i).optString("name");
            JSONArray boards = json.getJSONObject(i).getJSONArray("boards");
            for (int j = 0; j < boards.length(); ++j) {
                SimpleBoardModel model = new SimpleBoardModel();
                model.chan = getChanName();
                model.boardName = boards.getJSONObject(j).getString("dir");
                model.boardDescription = StringEscapeUtils
                        .unescapeHtml4(boards.getJSONObject(j).optString("desc", model.boardName));
                model.boardCategory = currentCategory;
                model.nsfw = model.boardName.equals("b") || currentCategory.equalsIgnoreCase("adult");
                list.add(model);
            }
        }
        return list.toArray(new SimpleBoardModel[list.size()]);
    } catch (JSONException e) {
        return new SimpleBoardModel[0];
    }
}

From source file:nya.miku.wishmaster.chans.nullchan.AbstractInstant0chan.java

private ThreadModel mapCatalogThreadModel(JSONObject json, String boardName) {
    ThreadModel model = new ThreadModel();
    model.threadNumber = json.optString("id", null);
    if (model.threadNumber == null)
        throw new RuntimeException();
    model.postsCount = json.optInt("reply_count", -2) + 1;
    model.attachmentsCount = json.optInt("images", -2) + 1;
    model.isClosed = json.optInt("locked", 0) != 0;
    model.isSticky = json.optInt("stickied", 0) != 0;

    PostModel opPost = new PostModel();
    opPost.number = model.threadNumber;/*ww  w . j  a va 2 s  .  c om*/
    opPost.name = StringEscapeUtils.unescapeHtml4(RegexUtils.removeHtmlSpanTags(json.optString("name")));
    opPost.subject = StringEscapeUtils.unescapeHtml4(json.optString("subject"));
    opPost.comment = json.optString("message");
    opPost.trip = json.optString("tripcode");
    opPost.timestamp = json.optLong("timestamp") * 1000;
    opPost.parentThread = model.threadNumber;

    String ext = json.optString("file_type", "");
    if (!ext.equals("")) {
        AttachmentModel attachment = new AttachmentModel();
        switch (ext) {
        case "jpg":
        case "jpeg":
        case "png":
            attachment.type = AttachmentModel.TYPE_IMAGE_STATIC;
            break;
        case "gif":
            attachment.type = AttachmentModel.TYPE_IMAGE_GIF;
            break;
        case "mp3":
        case "ogg":
            attachment.type = AttachmentModel.TYPE_AUDIO;
            break;
        case "webm":
        case "mp4":
            attachment.type = AttachmentModel.TYPE_VIDEO;
            break;
        case "you":
            attachment.type = AttachmentModel.TYPE_OTHER_NOTFILE;
            break;
        default:
            attachment.type = AttachmentModel.TYPE_OTHER_FILE;
        }
        attachment.width = json.optInt("image_w", -1);
        attachment.height = json.optInt("image_h", -1);
        attachment.size = -1;
        String fileName = json.optString("file", "");
        if (!fileName.equals("")) {
            if (ext.equals("you")) {
                attachment.thumbnail = (useHttps() ? "https" : "http") + "://img.youtube.com/vi/" + fileName
                        + "/default.jpg";
                attachment.path = (useHttps() ? "https" : "http") + "://youtube.com/watch?v=" + fileName;
            } else {
                attachment.thumbnail = "/" + boardName + "/thumb/" + fileName + "s." + ext;
                attachment.path = "/" + boardName + "/src/" + fileName + "." + ext;
            }
            opPost.attachments = new AttachmentModel[] { attachment };
        }
    }
    model.posts = new PostModel[] { opPost };
    return model;
}

From source file:opengovcrawler.DB.java

public static String EscapeHtml(String htmlText) {
    String escapedText = StringEscapeUtils.unescapeHtml4(htmlText);//escapeHtml(htmlText);
    return escapedText;
}

From source file:org.andstatus.app.msg.MessageViewItem.java

@NonNull
private String getCleanedBody(String body) {
    String out = MyHtml.fromHtml(body).toLowerCase();
    out = StringEscapeUtils.unescapeHtml4(out);
    return out.replaceAll("\n", " ").replaceAll("  ", " ").replaceFirst(".*(favorited something by.*)", "$1");
}

From source file:org.apache.jmeter.extractor.RegexExtractor.java

private String getInputString(SampleResult result) {
    String inputString = useUrl() ? result.getUrlAsString() // Bug 39707
            : useHeaders() ? result.getResponseHeaders()
                    : useRequestHeaders() ? result.getRequestHeaders()
                            : useCode() ? result.getResponseCode() // Bug 43451
                                    : useMessage() ? result.getResponseMessage() // Bug 43451
                                            : useUnescapedBody()
                                                    ? StringEscapeUtils
                                                            .unescapeHtml4(result.getResponseDataAsString())
                                                    : useBodyAsDocument()
                                                            ? Document.getTextFromDocument(
                                                                    result.getResponseData())
                                                            : result.getResponseDataAsString() // Bug 36898
    ;/*w w  w  . j  a  v  a  2  s  . co  m*/
    if (log.isDebugEnabled()) {
        log.debug("Input = " + inputString);
    }
    return inputString;
}

From source file:org.apache.jmeter.functions.UnEscapeHtml.java

/** {@inheritDoc} */
@Override/*from w  ww . j  a  v  a 2s .com*/
public String execute(SampleResult previousResult, Sampler currentSampler) throws InvalidVariableException {

    String escapedString = ((CompoundVariable) values[0]).execute();
    return StringEscapeUtils.unescapeHtml4(escapedString);

}

From source file:org.apache.lucene.analysis.kr.test.KoreanAnalyzerTest.java

public void testJavaEscape() throws Exception {

    String str = StringEscapeUtils.unescapeHtml4("&#48085;");
    System.out.println(str);/*from   w  w  w .  j a v a 2 s .  c  o  m*/

    //??
    String han = StringEscapeUtils.unescapeJava("0x3400");
    han = StringEscapeUtils.escapeJava("?");

    System.out.println(han);

}

From source file:org.apache.mahout.text.wikipedia.WikipediaDatasetCreatorMapper.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String document = value.toString();
    document = StringEscapeUtils.unescapeHtml4(CLOSE_TEXT_TAG_PATTERN
            .matcher(OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll(""));
    String catMatch = findMatchingCategory(document);
    if (!"Unknown".equals(catMatch)) {
        StringBuilder contents = new StringBuilder(1000);
        TokenStream stream = analyzer.tokenStream(catMatch, new StringReader(document));
        CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();/*from w ww .j  a v a 2  s  .com*/
        while (stream.incrementToken()) {
            contents.append(termAtt.buffer(), 0, termAtt.length()).append(' ');
        }
        context.write(new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")),
                new Text(contents.toString()));
        stream.end();
        Closeables.close(stream, true);
    }
}

From source file:org.apache.mahout.text.wikipedia.WikipediaMapper.java

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    String content = value.toString();
    if (content.contains(REDIRECT)) {
        return;/*from  w w  w  .j  ava  2s  .  c o m*/
    }
    String document;
    String title;
    try {
        document = getDocument(content);
        title = getTitle(content);
    } catch (RuntimeException e) {
        // TODO: reporter.getCounter("Wikipedia", "Parse errors").increment(1);
        return;
    }

    String catMatch = findMatchingCategory(document);
    if (!all) {
        if ("Unknown".equals(catMatch)) {
            return;
        }
    }

    document = StringEscapeUtils.unescapeHtml4(document);
    if (removeLabels) {
        document = removeCategoriesFromText(document);
        // Reject documents with malformed tags
        if (document == null) {
            return;
        }
    }

    // write out in Bayes input style: key: /Category/document_name
    String category = "/" + catMatch.toLowerCase(Locale.ENGLISH) + "/"
            + SPACE_NON_ALPHA_PATTERN.matcher(title).replaceAll("_");

    context.write(new Text(category), new Text(document));
}

From source file:org.apache.tika.parser.pdf.EnhancedPDFParser.java

private String getMatchGroup(Matcher m, int group) {
    return StringEscapeUtils.unescapeHtml4(StringUtils.trimToEmpty(m.group(group)));
}