List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:net.krautchan.data.KCPosting.java
public void setContent(String content) { String locContent = content.replaceFirst(kcNummer + "\">\\s*", ""); locContent = StringEscapeUtils.unescapeHtml4(locContent); locContent = locContent.replaceAll("<p>", ""); locContent = locContent.replaceAll("</p>", " "); this.content = sanitizeContent(locContent); }
From source file:ee.ria.xroad.opmonitordaemon.HealthDataRequestHandler.java
private static ServiceId convertIdentifier(String id) { // Construct a valid service identifier so we can compare it's // provider to the optionally provided exchange partner's client ID String[] idParts = id.split(IDENTIFIER_SEPARATOR, SERVICE_ID_NUM_PARTS); for (int i = 0; i < idParts.length; i++) { idParts[i] = StringEscapeUtils.unescapeHtml4(idParts[i]); if (StringUtils.isBlank(idParts[i])) { idParts[i] = null;// w ww . j a v a2s. com } } return idParts.length > SERVICE_ID_NUM_PARTS - 1 ? ServiceId.create(idParts[0], idParts[1], idParts[2], idParts[SERVICE_ID_SUBSYSTEM_PART], idParts[SERVICE_ID_CODE_PART], idParts[SERVICE_ID_VERSION_PART]) : ServiceId.create(idParts[0], idParts[1], idParts[2], idParts[SERVICE_ID_SUBSYSTEM_PART], idParts[SERVICE_ID_CODE_PART]); }
From source file:eu.nerdz.api.impl.reverse.messages.ReverseConversationHandler.java
/** * This method parses the sender's username from a raw message. * * @param messageString the raw message string parsed by splitMessage * @return A String containing the sender's username * @throws ContentException// ww w . j av a 2s . c o m */ private String parseSender(String messageString) throws ContentException { int closeLinkPosition = messageString.lastIndexOf("</a>", messageString.lastIndexOf("<time")); if (closeLinkPosition < 0) { throw new ContentException("malformed response: " + messageString); } int nickStart = messageString.lastIndexOf('>', closeLinkPosition) + 1; if (nickStart < 0) { throw new ContentException("malformed response: " + messageString); } return StringEscapeUtils.unescapeHtml4(messageString.substring(nickStart, closeLinkPosition)); }
From source file:com.jaeksoft.searchlib.parser.HtmlParser.java
private void getBodyTextContent(ParserResultItem result, StringBuilder sb, HtmlNodeAbstract<?> node, boolean bAddBlock, String[] directFields, int recursion, Set<Object> nodeExclusionsSet) { if (recursion == 0) { Logging.warn("Max recursion reached (getBodyTextContent)"); return;// www . j av a 2s .co m } if (nodeExclusionsSet != null) if (nodeExclusionsSet.contains(node.node)) return; recursion--; if (node.isComment()) return; String nodeName = node.getNodeName(); if ("script".equalsIgnoreCase(nodeName)) return; if ("style".equalsIgnoreCase(nodeName)) return; if ("object".equalsIgnoreCase(nodeName)) return; if ("title".equalsIgnoreCase(nodeName)) return; if ("oss".equalsIgnoreCase(nodeName)) { if ("yes".equalsIgnoreCase(node.getAttribute("ignore"))) return; } boolean bEnterDirectField = false; String classNameAttribute = node.getAttribute("class"); if (classNameAttribute != null) { String[] classNames = org.apache.commons.lang.StringUtils.split(classNameAttribute); if (classNames != null) { for (String className : classNames) { if (OPENSEARCHSERVER_IGNORE.equalsIgnoreCase(className)) return; if (className.startsWith(OPENSEARCHSERVER_FIELD)) { String directField = classNameAttribute.substring(OPENSEARCHSERVER_FIELD_LENGTH); if (directField.length() > 0) { directFields = directField.split("\\."); bEnterDirectField = directFields.length > 0; } } } } } if (node.isTextNode()) { String text = node.getText(); text = text.replaceAll("\\r", " "); text = text.replaceAll("\\n", " "); text = StringUtils.replaceConsecutiveSpaces(text, " "); text = text.trim(); if (text.length() > 0) { text = StringEscapeUtils.unescapeHtml4(text); if (sb.length() > 0) sb.append(' '); sb.append(text); } } List<HtmlNodeAbstract<?>> children = node.getChildNodes(); if (children != null) for (HtmlNodeAbstract<?> htmlNode : children) getBodyTextContent(result, sb, htmlNode, bAddBlock, directFields, recursion, nodeExclusionsSet); if (bAddBlock && nodeName != null && sb.length() > 0) { String currentTag = nodeName.toLowerCase(); boolean bForSentence = sb.charAt(sb.length() - 1) != '.' && sentenceTagSet.contains(currentTag); if (bForSentence || bEnterDirectField) { if (directFields != null) result.addDirectFields(directFields, sb.toString()); else addFieldBody(result, currentTag, sb.toString()); sb.setLength(0); } } }
From source file:cx.fbn.nevernote.sql.REnSearch.java
private boolean matchContentAny(Note n) { if (todo.size() == 0 && resource.size() == 0 && searchPhrases.size() == 0) return true; // pull back the record n = conn.getNoteTable().getNote(n.getGuid(), true, true, false, false, false); // Check for search phrases String text = StringEscapeUtils.unescapeHtml4(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase(); boolean negative = false; for (int i = 0; i < searchPhrases.size(); i++) { String phrase = searchPhrases.get(i); if (phrase.startsWith("-")) { negative = true;//from ww w. j ava2 s. com phrase = phrase.substring(1); } else negative = false; phrase = phrase.substring(1); phrase = phrase.substring(0, phrase.length() - 1); if (text.indexOf(phrase) >= 0) { if (negative) return false; else return true; } if (text.indexOf(phrase) < 0 && negative) return true; } for (int i = 0; i < todo.size(); i++) { String value = todo.get(i); value = value.replace("\"", ""); boolean desiredState; if (!value.endsWith(":false") && !value.endsWith(":true") && !value.endsWith(":*") && !value.endsWith("*")) return false; if (value.endsWith(":false")) desiredState = false; else desiredState = true; if (value.startsWith("-")) desiredState = !desiredState; int pos = n.getContent().indexOf("<en-todo"); if (pos == -1 && value.startsWith("-") && (value.endsWith("*") || value.endsWith(":"))) return true; if (value.endsWith("*")) return true; while (pos > -1) { int endPos = n.getContent().indexOf("/>", pos); String segment = n.getContent().substring(pos, endPos); boolean currentState; if (segment.toLowerCase().indexOf("checked=\"true\"") == -1) currentState = false; else currentState = true; if (desiredState == currentState) return true; pos = n.getContent().indexOf("<en-todo", pos + 1); } } // Check resources for (int i = 0; i < resource.size(); i++) { String resourceString = resource.get(i); resourceString = resourceString.replace("\"", ""); if (resourceString.startsWith("-")) negative = true; resourceString = resourceString.substring(resourceString.indexOf(":") + 1); for (int j = 0; j < n.getResourcesSize(); j++) { boolean match = stringMatch(n.getResources().get(j).getMime(), resourceString, negative); if (match) return true; } } return false; }
From source file:eu.nerdz.api.impl.reverse.messages.ReverseConversationHandler.java
/** * This method parses the message from a raw message string. * * @param messageString the raw message string parsed by splitMessage * @return A message/*from ww w. j ava 2 s.co m*/ * @throws ContentException */ private String parseMessage(String messageString) throws ContentException { int msgStart = messageString.lastIndexOf("1pt solid #FFF\">") + 16; if (msgStart <= 0) { throw new ContentException("malformed message string: " + messageString); } return StringEscapeUtils.unescapeHtml4(this.removeTags(messageString.substring(msgStart))); }
From source file:net.krautchan.data.KCPosting.java
public void setField(Fields fields, String arg) throws ParseException { switch (fields) { case KC_NUM: { if (null != arg) { kcNummer = Long.parseLong(arg); }//w w w . j a va 2 s .co m break; } case TITLE: { setTitle(StringEscapeUtils.unescapeHtml4(arg)); break; } case USER: { user = arg; break; } case DATE: { setDate(arg); break; } case URI: { Matcher m = uriPat.matcher(arg); if (m.find()) { this.setUri(m.group(1)); } break; } case IMAGES: { Matcher m = imgPat.matcher(arg); int i = 0; while (m.find()) { files.put(m.group(1), m.group(2)); i++; } break; } case CONTENT: { content = arg.replaceFirst(kcNummer + "\">\\s*", "").trim(); if (content.endsWith("</p>")) { content = content.substring(0, content.length() - 4); } content = content.replaceAll("</p>\\s+<p", "</p><p"); originalContent = content.replaceAll("<br>", "<br />"); content = sanitizeContent(content); break; } case SAGE: { sage = true; break; } default: throw new IllegalStateException("Illegal State in KCPosting:setField"); } }
From source file:by.heap.remark.convert.TextCleaner.java
/** * Clean the given input text based on the original configuration Options. * Optionally, don't escape special characters. * * @param oinput The text to be cleaned. Can be any object. JSoup nodes are handled specially. * @param normalText If false, don't escape special characters. This is usually only used for * inline code or code blocks, because they don't need to be escaped. * @return The cleaned text.//from www . j a v a 2 s. c o m */ private String clean(Object oinput, boolean normalText) { String input; if (oinput instanceof TextNode) { input = getTextNodeText((TextNode) oinput, normalText); } else if (oinput instanceof Element) { if (normalText) { input = ((Element) oinput).text(); } else { input = getPreformattedText((Element) oinput); } } else { input = oinput.toString(); } String result; if (input.length() == 0) { // not seen, so just return an empty string. result = ""; } else if (normalText) { // For non-code text, newlines are _never_ allowed. // Replace one or more set of whitespace chars followed by a newline with a single space. input = LINEBREAK_REMOVER.matcher(input).replaceAll(" "); // now escape special characters. for (final Escape rep : escapes) { input = rep.pattern.matcher(input).replaceAll(rep.replacement); } StringBuffer output = doReplacements(input, entityReplacementsPattern); if (unicodeReplacementsPattern != null) { output = doReplacements(output, unicodeReplacementsPattern); } result = output.toString(); } else { // we have to revert ALL HTML entities for code, because they will end up // double-encoded by markdown // we also don't need to worry about escaping anything // note: we have to manually replace ' because it is ignored by StringEscapeUtils for some reason. result = StringEscapeUtils.unescapeHtml4(input.replace("'", "'")); } return result; }
From source file:com.github.naoghuman.cm.model.subcategory.SubCategoryModel.java
@Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { this.setId(in.readLong()); this.setMatrixId(in.readLong()); this.setCategoryId(in.readLong()); this.setGenerationTime(in.readLong()); this.setTitle(StringEscapeUtils.unescapeHtml4(String.valueOf(in.readObject()))); this.setDescription(StringEscapeUtils.unescapeHtml4(String.valueOf(in.readObject()))); }
From source file:fr.mcc.ginco.rest.services.ThesaurusRestService.java
/** * Public method used to publish thesaurus * * @throws IOException// w w w .j a v a2 s . c o m */ @GET @Path("/publishVocabulary") @Consumes({ MediaType.APPLICATION_JSON }) @Produces(MediaType.TEXT_HTML) @PreAuthorize("hasPermission(#thesaurusId, '0')") public String publishVocabulary(@QueryParam("thesaurusId") String thesaurusId, @QueryParam("userId") String userId) throws IOException { Thesaurus object = thesaurusService.getThesaurusById(thesaurusId); if (object != null) { thesaurusService.publishThesaurus(object); thesaurusVersionHistoryService.publishThesaurus(object, userId); //Update vocabulary date thesaurusService.updateThesaurusDate(object); } ObjectMapper mapper = new ObjectMapper(); String serialized = mapper.writeValueAsString(new ExtJsonFormLoadData(object)); return StringEscapeUtils.unescapeHtml4(serialized); }