Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input)

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:net.krautchan.data.KCPosting.java

public void setContent(String content) {
    String locContent = content.replaceFirst(kcNummer + "\">\\s*", "");
    locContent = StringEscapeUtils.unescapeHtml4(locContent);
    locContent = locContent.replaceAll("<p>", "");
    locContent = locContent.replaceAll("</p>", " ");
    this.content = sanitizeContent(locContent);
}

From source file:ee.ria.xroad.opmonitordaemon.HealthDataRequestHandler.java

private static ServiceId convertIdentifier(String id) {
    // Construct a valid service identifier so we can compare it's
    // provider to the optionally provided exchange partner's client ID
    String[] idParts = id.split(IDENTIFIER_SEPARATOR, SERVICE_ID_NUM_PARTS);

    for (int i = 0; i < idParts.length; i++) {
        idParts[i] = StringEscapeUtils.unescapeHtml4(idParts[i]);

        if (StringUtils.isBlank(idParts[i])) {
            idParts[i] = null;//  w  ww . j a v  a2s.  com
        }
    }

    return idParts.length > SERVICE_ID_NUM_PARTS - 1
            ? ServiceId.create(idParts[0], idParts[1], idParts[2], idParts[SERVICE_ID_SUBSYSTEM_PART],
                    idParts[SERVICE_ID_CODE_PART], idParts[SERVICE_ID_VERSION_PART])
            : ServiceId.create(idParts[0], idParts[1], idParts[2], idParts[SERVICE_ID_SUBSYSTEM_PART],
                    idParts[SERVICE_ID_CODE_PART]);
}

From source file:eu.nerdz.api.impl.reverse.messages.ReverseConversationHandler.java

/**
 * This method parses the sender's username from a raw message.
 *
 * @param messageString the raw message string parsed by splitMessage
 * @return A String containing the sender's username
 * @throws ContentException// ww w .  j  av a 2s .  c  o  m
 */
private String parseSender(String messageString) throws ContentException {

    int closeLinkPosition = messageString.lastIndexOf("</a>", messageString.lastIndexOf("<time"));
    if (closeLinkPosition < 0) {
        throw new ContentException("malformed response: " + messageString);
    }

    int nickStart = messageString.lastIndexOf('>', closeLinkPosition) + 1;
    if (nickStart < 0) {
        throw new ContentException("malformed response: " + messageString);
    }

    return StringEscapeUtils.unescapeHtml4(messageString.substring(nickStart, closeLinkPosition));

}

From source file:com.jaeksoft.searchlib.parser.HtmlParser.java

private void getBodyTextContent(ParserResultItem result, StringBuilder sb, HtmlNodeAbstract<?> node,
        boolean bAddBlock, String[] directFields, int recursion, Set<Object> nodeExclusionsSet) {
    if (recursion == 0) {
        Logging.warn("Max recursion reached (getBodyTextContent)");
        return;//  www .  j  av a  2s .co m
    }
    if (nodeExclusionsSet != null)
        if (nodeExclusionsSet.contains(node.node))
            return;

    recursion--;
    if (node.isComment())
        return;
    String nodeName = node.getNodeName();
    if ("script".equalsIgnoreCase(nodeName))
        return;
    if ("style".equalsIgnoreCase(nodeName))
        return;
    if ("object".equalsIgnoreCase(nodeName))
        return;
    if ("title".equalsIgnoreCase(nodeName))
        return;
    if ("oss".equalsIgnoreCase(nodeName)) {
        if ("yes".equalsIgnoreCase(node.getAttribute("ignore")))
            return;
    }

    boolean bEnterDirectField = false;
    String classNameAttribute = node.getAttribute("class");
    if (classNameAttribute != null) {
        String[] classNames = org.apache.commons.lang.StringUtils.split(classNameAttribute);
        if (classNames != null) {
            for (String className : classNames) {
                if (OPENSEARCHSERVER_IGNORE.equalsIgnoreCase(className))
                    return;
                if (className.startsWith(OPENSEARCHSERVER_FIELD)) {
                    String directField = classNameAttribute.substring(OPENSEARCHSERVER_FIELD_LENGTH);
                    if (directField.length() > 0) {
                        directFields = directField.split("\\.");
                        bEnterDirectField = directFields.length > 0;
                    }
                }
            }
        }
    }

    if (node.isTextNode()) {
        String text = node.getText();
        text = text.replaceAll("\\r", " ");
        text = text.replaceAll("\\n", " ");
        text = StringUtils.replaceConsecutiveSpaces(text, " ");
        text = text.trim();
        if (text.length() > 0) {
            text = StringEscapeUtils.unescapeHtml4(text);
            if (sb.length() > 0)
                sb.append(' ');
            sb.append(text);
        }
    }
    List<HtmlNodeAbstract<?>> children = node.getChildNodes();
    if (children != null)
        for (HtmlNodeAbstract<?> htmlNode : children)
            getBodyTextContent(result, sb, htmlNode, bAddBlock, directFields, recursion, nodeExclusionsSet);

    if (bAddBlock && nodeName != null && sb.length() > 0) {
        String currentTag = nodeName.toLowerCase();
        boolean bForSentence = sb.charAt(sb.length() - 1) != '.' && sentenceTagSet.contains(currentTag);
        if (bForSentence || bEnterDirectField) {
            if (directFields != null)
                result.addDirectFields(directFields, sb.toString());
            else
                addFieldBody(result, currentTag, sb.toString());
            sb.setLength(0);
        }
    }
}

From source file:cx.fbn.nevernote.sql.REnSearch.java

private boolean matchContentAny(Note n) {
    if (todo.size() == 0 && resource.size() == 0 && searchPhrases.size() == 0)
        return true;

    // pull back the record
    n = conn.getNoteTable().getNote(n.getGuid(), true, true, false, false, false);

    // Check for search phrases
    String text = StringEscapeUtils.unescapeHtml4(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase();
    boolean negative = false;
    for (int i = 0; i < searchPhrases.size(); i++) {
        String phrase = searchPhrases.get(i);
        if (phrase.startsWith("-")) {
            negative = true;//from   ww  w.  j ava2 s. com
            phrase = phrase.substring(1);
        } else
            negative = false;
        phrase = phrase.substring(1);
        phrase = phrase.substring(0, phrase.length() - 1);
        if (text.indexOf(phrase) >= 0) {
            if (negative)
                return false;
            else
                return true;
        }
        if (text.indexOf(phrase) < 0 && negative)
            return true;
    }

    for (int i = 0; i < todo.size(); i++) {
        String value = todo.get(i);
        value = value.replace("\"", "");
        boolean desiredState;
        if (!value.endsWith(":false") && !value.endsWith(":true") && !value.endsWith(":*")
                && !value.endsWith("*"))
            return false;
        if (value.endsWith(":false"))
            desiredState = false;
        else
            desiredState = true;
        if (value.startsWith("-"))
            desiredState = !desiredState;
        int pos = n.getContent().indexOf("<en-todo");
        if (pos == -1 && value.startsWith("-") && (value.endsWith("*") || value.endsWith(":")))
            return true;
        if (value.endsWith("*"))
            return true;
        while (pos > -1) {
            int endPos = n.getContent().indexOf("/>", pos);
            String segment = n.getContent().substring(pos, endPos);
            boolean currentState;
            if (segment.toLowerCase().indexOf("checked=\"true\"") == -1)
                currentState = false;
            else
                currentState = true;
            if (desiredState == currentState)
                return true;

            pos = n.getContent().indexOf("<en-todo", pos + 1);
        }
    }

    // Check resources
    for (int i = 0; i < resource.size(); i++) {
        String resourceString = resource.get(i);
        resourceString = resourceString.replace("\"", "");
        if (resourceString.startsWith("-"))
            negative = true;
        resourceString = resourceString.substring(resourceString.indexOf(":") + 1);
        for (int j = 0; j < n.getResourcesSize(); j++) {
            boolean match = stringMatch(n.getResources().get(j).getMime(), resourceString, negative);
            if (match)
                return true;
        }
    }
    return false;
}

From source file:eu.nerdz.api.impl.reverse.messages.ReverseConversationHandler.java

/**
 * This method parses the message from a raw message string.
 *
 * @param messageString the raw message string parsed by splitMessage
 * @return A message/*from   ww  w. j ava 2  s.co m*/
 * @throws ContentException
 */
private String parseMessage(String messageString) throws ContentException {

    int msgStart = messageString.lastIndexOf("1pt solid #FFF\">") + 16;
    if (msgStart <= 0) {
        throw new ContentException("malformed message string: " + messageString);
    }

    return StringEscapeUtils.unescapeHtml4(this.removeTags(messageString.substring(msgStart)));

}

From source file:net.krautchan.data.KCPosting.java

public void setField(Fields fields, String arg) throws ParseException {
    switch (fields) {
    case KC_NUM: {
        if (null != arg) {
            kcNummer = Long.parseLong(arg);
        }//w  w w  .  j a va  2  s .co  m
        break;
    }
    case TITLE: {
        setTitle(StringEscapeUtils.unescapeHtml4(arg));
        break;
    }
    case USER: {
        user = arg;
        break;
    }
    case DATE: {
        setDate(arg);
        break;
    }
    case URI: {
        Matcher m = uriPat.matcher(arg);
        if (m.find()) {
            this.setUri(m.group(1));
        }
        break;
    }
    case IMAGES: {
        Matcher m = imgPat.matcher(arg);
        int i = 0;
        while (m.find()) {
            files.put(m.group(1), m.group(2));
            i++;
        }
        break;
    }
    case CONTENT: {
        content = arg.replaceFirst(kcNummer + "\">\\s*", "").trim();
        if (content.endsWith("</p>")) {
            content = content.substring(0, content.length() - 4);
        }
        content = content.replaceAll("</p>\\s+<p", "</p><p");
        originalContent = content.replaceAll("<br>", "<br />");
        content = sanitizeContent(content);
        break;
    }
    case SAGE: {
        sage = true;
        break;
    }
    default:
        throw new IllegalStateException("Illegal State in KCPosting:setField");
    }
}

From source file:by.heap.remark.convert.TextCleaner.java

/**
 * Clean the given input text based on the original configuration Options.
 * Optionally, don't escape special characters.
 *
 * @param oinput The text to be cleaned. Can be any object. JSoup nodes are handled specially.
 * @param normalText If false, don't escape special characters.  This is usually only used for
 *                 inline code or code blocks, because they don't need to be escaped.
 * @return The cleaned text.//from  www  .  j a  v a 2 s. c o m
 */
private String clean(Object oinput, boolean normalText) {
    String input;
    if (oinput instanceof TextNode) {
        input = getTextNodeText((TextNode) oinput, normalText);
    } else if (oinput instanceof Element) {
        if (normalText) {
            input = ((Element) oinput).text();
        } else {
            input = getPreformattedText((Element) oinput);
        }
    } else {
        input = oinput.toString();
    }
    String result;
    if (input.length() == 0) {
        // not seen, so just return an empty string.
        result = "";
    } else if (normalText) {
        // For non-code text, newlines are _never_ allowed.
        // Replace one or more set of whitespace chars followed by a newline with a single space.
        input = LINEBREAK_REMOVER.matcher(input).replaceAll(" ");

        // now escape special characters.
        for (final Escape rep : escapes) {
            input = rep.pattern.matcher(input).replaceAll(rep.replacement);
        }
        StringBuffer output = doReplacements(input, entityReplacementsPattern);
        if (unicodeReplacementsPattern != null) {
            output = doReplacements(output, unicodeReplacementsPattern);
        }
        result = output.toString();
    } else {
        // we have to revert ALL HTML entities for code, because they will end up
        // double-encoded by markdown
        // we also don't need to worry about escaping anything
        // note: we have to manually replace &apos; because it is ignored by StringEscapeUtils for some reason.
        result = StringEscapeUtils.unescapeHtml4(input.replace("&apos;", "'"));
    }
    return result;
}

From source file:com.github.naoghuman.cm.model.subcategory.SubCategoryModel.java

@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
    this.setId(in.readLong());
    this.setMatrixId(in.readLong());
    this.setCategoryId(in.readLong());
    this.setGenerationTime(in.readLong());
    this.setTitle(StringEscapeUtils.unescapeHtml4(String.valueOf(in.readObject())));
    this.setDescription(StringEscapeUtils.unescapeHtml4(String.valueOf(in.readObject())));
}

From source file:fr.mcc.ginco.rest.services.ThesaurusRestService.java

/**
 * Public method used to publish thesaurus
 *
 * @throws IOException//  w  w  w  .j  a  v  a2  s . c  o m
 */
@GET
@Path("/publishVocabulary")
@Consumes({ MediaType.APPLICATION_JSON })
@Produces(MediaType.TEXT_HTML)
@PreAuthorize("hasPermission(#thesaurusId, '0')")
public String publishVocabulary(@QueryParam("thesaurusId") String thesaurusId,
        @QueryParam("userId") String userId) throws IOException {
    Thesaurus object = thesaurusService.getThesaurusById(thesaurusId);

    if (object != null) {
        thesaurusService.publishThesaurus(object);
        thesaurusVersionHistoryService.publishThesaurus(object, userId);

        //Update vocabulary date
        thesaurusService.updateThesaurusDate(object);
    }

    ObjectMapper mapper = new ObjectMapper();
    String serialized = mapper.writeValueAsString(new ExtJsonFormLoadData(object));

    return StringEscapeUtils.unescapeHtml4(serialized);
}