List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:nya.miku.wishmaster.chans.nullchan.AbstractInstant0chan.java
@Override public SimpleBoardModel[] getBoardsList(ProgressListener listener, CancellableTask task, SimpleBoardModel[] oldBoardsList) throws Exception { String url = getUsingUrl() + "boards10.json"; try {/* w w w . java 2 s . c om*/ JSONArray json = downloadJSONArray(url, oldBoardsList != null, listener, task); if (json == null) return oldBoardsList; List<SimpleBoardModel> list = new ArrayList<>(); for (int i = 0; i < json.length(); ++i) { String currentCategory = json.getJSONObject(i).optString("name"); JSONArray boards = json.getJSONObject(i).getJSONArray("boards"); for (int j = 0; j < boards.length(); ++j) { SimpleBoardModel model = new SimpleBoardModel(); model.chan = getChanName(); model.boardName = boards.getJSONObject(j).getString("dir"); model.boardDescription = StringEscapeUtils .unescapeHtml4(boards.getJSONObject(j).optString("desc", model.boardName)); model.boardCategory = currentCategory; model.nsfw = model.boardName.equals("b") || currentCategory.equalsIgnoreCase("adult"); list.add(model); } } return list.toArray(new SimpleBoardModel[list.size()]); } catch (JSONException e) { return new SimpleBoardModel[0]; } }
From source file:nya.miku.wishmaster.chans.nullchan.AbstractInstant0chan.java
private ThreadModel mapCatalogThreadModel(JSONObject json, String boardName) { ThreadModel model = new ThreadModel(); model.threadNumber = json.optString("id", null); if (model.threadNumber == null) throw new RuntimeException(); model.postsCount = json.optInt("reply_count", -2) + 1; model.attachmentsCount = json.optInt("images", -2) + 1; model.isClosed = json.optInt("locked", 0) != 0; model.isSticky = json.optInt("stickied", 0) != 0; PostModel opPost = new PostModel(); opPost.number = model.threadNumber;/*ww w . j a va 2 s . c om*/ opPost.name = StringEscapeUtils.unescapeHtml4(RegexUtils.removeHtmlSpanTags(json.optString("name"))); opPost.subject = StringEscapeUtils.unescapeHtml4(json.optString("subject")); opPost.comment = json.optString("message"); opPost.trip = json.optString("tripcode"); opPost.timestamp = json.optLong("timestamp") * 1000; opPost.parentThread = model.threadNumber; String ext = json.optString("file_type", ""); if (!ext.equals("")) { AttachmentModel attachment = new AttachmentModel(); switch (ext) { case "jpg": case "jpeg": case "png": attachment.type = AttachmentModel.TYPE_IMAGE_STATIC; break; case "gif": attachment.type = AttachmentModel.TYPE_IMAGE_GIF; break; case "mp3": case "ogg": attachment.type = AttachmentModel.TYPE_AUDIO; break; case "webm": case "mp4": attachment.type = AttachmentModel.TYPE_VIDEO; break; case "you": attachment.type = AttachmentModel.TYPE_OTHER_NOTFILE; break; default: attachment.type = AttachmentModel.TYPE_OTHER_FILE; } attachment.width = json.optInt("image_w", -1); attachment.height = json.optInt("image_h", -1); attachment.size = -1; String fileName = json.optString("file", ""); if (!fileName.equals("")) { if (ext.equals("you")) { attachment.thumbnail = (useHttps() ? "https" : "http") + "://img.youtube.com/vi/" + fileName + "/default.jpg"; attachment.path = (useHttps() ? "https" : "http") + "://youtube.com/watch?v=" + fileName; } else { attachment.thumbnail = "/" + boardName + "/thumb/" + fileName + "s." + ext; attachment.path = "/" + boardName + "/src/" + fileName + "." + ext; } opPost.attachments = new AttachmentModel[] { attachment }; } } model.posts = new PostModel[] { opPost }; return model; }
From source file:opengovcrawler.DB.java
public static String EscapeHtml(String htmlText) { String escapedText = StringEscapeUtils.unescapeHtml4(htmlText);//escapeHtml(htmlText); return escapedText; }
From source file:org.andstatus.app.msg.MessageViewItem.java
@NonNull private String getCleanedBody(String body) { String out = MyHtml.fromHtml(body).toLowerCase(); out = StringEscapeUtils.unescapeHtml4(out); return out.replaceAll("\n", " ").replaceAll(" ", " ").replaceFirst(".*(favorited something by.*)", "$1"); }
From source file:org.apache.jmeter.extractor.RegexExtractor.java
private String getInputString(SampleResult result) { String inputString = useUrl() ? result.getUrlAsString() // Bug 39707 : useHeaders() ? result.getResponseHeaders() : useRequestHeaders() ? result.getRequestHeaders() : useCode() ? result.getResponseCode() // Bug 43451 : useMessage() ? result.getResponseMessage() // Bug 43451 : useUnescapedBody() ? StringEscapeUtils .unescapeHtml4(result.getResponseDataAsString()) : useBodyAsDocument() ? Document.getTextFromDocument( result.getResponseData()) : result.getResponseDataAsString() // Bug 36898 ;/*w w w . j a v a 2 s . co m*/ if (log.isDebugEnabled()) { log.debug("Input = " + inputString); } return inputString; }
From source file:org.apache.jmeter.functions.UnEscapeHtml.java
/** {@inheritDoc} */ @Override/*from w ww . j a v a 2s .com*/ public String execute(SampleResult previousResult, Sampler currentSampler) throws InvalidVariableException { String escapedString = ((CompoundVariable) values[0]).execute(); return StringEscapeUtils.unescapeHtml4(escapedString); }
From source file:org.apache.lucene.analysis.kr.test.KoreanAnalyzerTest.java
public void testJavaEscape() throws Exception { String str = StringEscapeUtils.unescapeHtml4("믕"); System.out.println(str);/*from w w w . j a v a 2 s . c o m*/ //?? String han = StringEscapeUtils.unescapeJava("0x3400"); han = StringEscapeUtils.escapeJava("?"); System.out.println(han); }
From source file:org.apache.mahout.text.wikipedia.WikipediaDatasetCreatorMapper.java
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String document = value.toString(); document = StringEscapeUtils.unescapeHtml4(CLOSE_TEXT_TAG_PATTERN .matcher(OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll("")); String catMatch = findMatchingCategory(document); if (!"Unknown".equals(catMatch)) { StringBuilder contents = new StringBuilder(1000); TokenStream stream = analyzer.tokenStream(catMatch, new StringReader(document)); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset();/*from w ww .j a v a 2 s .com*/ while (stream.incrementToken()) { contents.append(termAtt.buffer(), 0, termAtt.length()).append(' '); } context.write(new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")), new Text(contents.toString())); stream.end(); Closeables.close(stream, true); } }
From source file:org.apache.mahout.text.wikipedia.WikipediaMapper.java
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String content = value.toString(); if (content.contains(REDIRECT)) { return;/*from w w w .j ava 2s . c o m*/ } String document; String title; try { document = getDocument(content); title = getTitle(content); } catch (RuntimeException e) { // TODO: reporter.getCounter("Wikipedia", "Parse errors").increment(1); return; } String catMatch = findMatchingCategory(document); if (!all) { if ("Unknown".equals(catMatch)) { return; } } document = StringEscapeUtils.unescapeHtml4(document); if (removeLabels) { document = removeCategoriesFromText(document); // Reject documents with malformed tags if (document == null) { return; } } // write out in Bayes input style: key: /Category/document_name String category = "/" + catMatch.toLowerCase(Locale.ENGLISH) + "/" + SPACE_NON_ALPHA_PATTERN.matcher(title).replaceAll("_"); context.write(new Text(category), new Text(document)); }
From source file:org.apache.tika.parser.pdf.EnhancedPDFParser.java
private String getMatchGroup(Matcher m, int group) { return StringEscapeUtils.unescapeHtml4(StringUtils.trimToEmpty(m.group(group))); }