Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeXml

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeXml.

Prototype

public static final String unescapeXml(final String input)

Source Link

Document

Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Supports only the five basic XML entities (gt, lt, quot, amp, apos).

Usage

From source file:de.fatalix.book.importer.BookMigrator.java

private static BookEntry parseOPF(File pathToOPF, BookEntry bmd) throws IOException {
    List<String> lines = Files.readAllLines(pathToOPF.toPath(), Charset.forName("UTF-8"));
    boolean multiLineDescription = false;
    String description = "";
    for (String line : lines) {
        if (multiLineDescription) {
            multiLineDescription = false;
            if (line.split("<").length == 1) {
                multiLineDescription = true;
                description = description + line;
            } else {
                description = description + line.split("<")[0];
                description = StringEscapeUtils.unescapeXml(description);
                bmd.setDescription(description);
            }//from   www. j  a v  a  2s  . c  o m
        } else if (line.contains("dc:title")) {
            String title = line.split(">")[1].split("<")[0];
            bmd.setTitle(title);
        } else if (line.contains("dc:creator")) {
            String creator = line.split(">")[1].split("<")[0];
            bmd.setAuthor(creator);
        } else if (line.contains("dc:description")) {
            String value = line.split(">")[1];
            if (value.split("<").length == 1) {
                multiLineDescription = true;
                description = value;
            } else {
                value = value.split("<")[0];
                value = StringEscapeUtils.unescapeXml(value);
                bmd.setDescription(value);
            }
        } else if (line.contains("dc:publisher")) {
            String value = line.split(">")[1].split("<")[0];
            bmd.setPublisher(value);
        } else if (line.contains("dc:date")) {
            String value = line.split(">")[1].split("<")[0];
            DateTime dtReleaseDate = new DateTime(value, DateTimeZone.UTC);
            if (dtReleaseDate.getYear() != 101) {
                bmd.setReleaseDate(dtReleaseDate.toDate());
            }
        } else if (line.contains("dc:language")) {
            String value = line.split(">")[1].split("<")[0];
            bmd.setLanguage(value);
        } else if (line.contains("opf:scheme=\"ISBN\"")) {
            String value = line.split(">")[1].split("<")[0];
            bmd.setIsbn(value);
        }
    }
    return bmd;
}

From source file:com.daphne.es.maintain.staticresource.web.controller.StaticResourceVersionController.java

private String versionedStaticResourceContent(String fileRealPath, String content, String newVersion)
        throws IOException {

    content = StringEscapeUtils.unescapeXml(content);
    if (newVersion != null && newVersion.equals("1")) {
        newVersion = "?" + newVersion;
    }/*w w w .ja  va 2  s  .  c  o  m*/

    File file = new File(fileRealPath);

    List<String> contents = FileUtils.readLines(file);

    for (int i = 0, l = contents.size(); i < l; i++) {
        String fileContent = contents.get(i);
        if (content.equals(fileContent)) {
            Matcher matcher = scriptPattern.matcher(content);
            if (!matcher.matches()) {
                matcher = linkPattern.matcher(content);
            }
            if (newVersion == null) { //
                content = matcher.replaceAll("$1$2$5");
            } else {
                content = matcher.replaceAll("$1$2$3" + newVersion + "$5");
            }
            contents.set(i, content);
            break;
        }
    }
    FileUtils.writeLines(file, contents);

    return content;
}

From source file:com.zyz.mobile.book.UserBookData.java

@SuppressWarnings("UnusedAssignment")
@Override//ww  w. j  av a  2 s  .  c  o  m
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
        throws SAXException {
    if (localName.equals(Element.BOOK)) {
        mStatus.in_book = true;
        try {
            if (atts.getLength() >= 1) {
                setOffset(Integer.parseInt(atts.getValue(0)));
            }
            if (atts.getLength() >= 2) {
                mVersion = Integer.parseInt(atts.getValue(1));
            }
        } catch (NumberFormatException e) {

        }
    } else if (localName.equals(Element.SPANS)) {
        mStatus.in_spans = true;
    } else if (localName.equals(Element.SPAN)) {
        mStatus.in_span = true;

        try {
            int i = 0;
            if (atts.getLength() >= 4) {
                int type = Integer.parseInt(atts.getValue(i++));
                int color = Integer.parseInt(atts.getValue(i++));
                int start = Integer.parseInt(atts.getValue(i++));
                int end = Integer.parseInt(atts.getValue(i++));

                mCurrentSpanObj = new UserSpan(UserSpanType.toEnum(type), color, start, end);
                insertSpan(mCurrentSpanObj);
            }
            if (atts.getLength() >= 5) {
                mCurrentSpanObj.setDescription(StringEscapeUtils.unescapeXml(atts.getValue(i)));
            }
        } catch (NumberFormatException e) {
            // should not happen unless the file is corrupted or modified incorrectly
        }
    } else if (localName.equals(Element.NOTE)) {
        mStatus.in_note = true;
    } else if (localName.equals(Element.BOOKMARKS)) {
        mStatus.in_bookmarks = true;
    } else if (localName.equals(Element.BOOKMARK)) {
        mStatus.in_bookmark = true;
        int i = 0;

        try {
            if (mStatus.in_bookmarks) {
                if (atts.getLength() >= 1) {
                    int start = Integer.parseInt(atts.getValue(i++));
                    mCurrentSpanObj = (new UserSpan.Builder()).setType(UserSpanType.BOOKMARK).setStart(start)
                            .create();
                    insertSpan(mCurrentSpanObj);
                }
                if (atts.getLength() >= 2) {
                    mCurrentSpanObj.setDescription(StringEscapeUtils.unescapeXml(atts.getValue(i)));
                }
            } else if (mStatus.in_history) {
                if (atts.getLength() >= 1) {
                    int offset = Integer.parseInt(atts.getValue(i++));
                    mLocationHistory.add(offset);
                }
            }
        } catch (NumberFormatException e) {

        }

    } else if (localName.equals(Element.HISTORY)) {
        mStatus.in_history = true;
    }
}

From source file:com.erbjuder.logger.server.soap.services.LogMessageServiceBase.java

private LogMessage addTransactioLogData(Transactions.Transaction.TransactionLogData transactionLogData,
        LogMessage logMessage) {//w w  w .j av a  2  s .c  o  m

    String base64 = MimeTypes.BASE64;
    String label = StringEscapeUtils.unescapeXml(transactionLogData.getContentLabel().trim().toLowerCase());
    String mimeType = transactionLogData.getContentMimeType().trim().toLowerCase();
    String content = StringEscapeUtils.unescapeXml(transactionLogData.getContent().trim());
    long size = 0;
    if (base64.equalsIgnoreCase(mimeType)) {
        content = this.XMLFormatter(content);
        size = content.getBytes().length;

    } else {
        size = content.getBytes().length;
    }

    // 
    // Bind data to logmessage
    return this.updateLogMessage(label, mimeType, content, size, logMessage);

}

From source file:android.databinding.tool.util.XmlEditor.java

private static String defaultReplacement(XMLParser.AttributeContext attr) {
    String textWithQuotes = attr.attrValue.getText();
    String escapedText = textWithQuotes.substring(1, textWithQuotes.length() - 1);
    if (!escapedText.startsWith("@{") || !escapedText.endsWith("}")) {
        return null;
    }/* w w  w.j a  va 2  s .c om*/
    String text = StringEscapeUtils.unescapeXml(escapedText.substring(2, escapedText.length() - 1));
    ANTLRInputStream inputStream = new ANTLRInputStream(text);
    BindingExpressionLexer lexer = new BindingExpressionLexer(inputStream);
    CommonTokenStream tokenStream = new CommonTokenStream(lexer);
    BindingExpressionParser parser = new BindingExpressionParser(tokenStream);
    BindingExpressionParser.BindingSyntaxContext root = parser.bindingSyntax();
    BindingExpressionParser.DefaultsContext defaults = root.defaults();
    if (defaults != null) {
        BindingExpressionParser.ConstantValueContext constantValue = defaults.constantValue();
        BindingExpressionParser.LiteralContext literal = constantValue.literal();
        if (literal != null) {
            BindingExpressionParser.StringLiteralContext stringLiteral = literal.stringLiteral();
            if (stringLiteral != null) {
                TerminalNode doubleQuote = stringLiteral.DoubleQuoteString();
                if (doubleQuote != null) {
                    String quotedStr = doubleQuote.getText();
                    String unquoted = quotedStr.substring(1, quotedStr.length() - 1);
                    return StringEscapeUtils.escapeXml10(unquoted);
                } else {
                    String quotedStr = stringLiteral.SingleQuoteString().getText();
                    String unquoted = quotedStr.substring(1, quotedStr.length() - 1);
                    String unescaped = unquoted.replace("\"", "\\\"").replace("\\`", "`");
                    return StringEscapeUtils.escapeXml10(unescaped);
                }
            }
        }
        return constantValue.getText();
    }
    return null;
}

From source file:com.jaeksoft.searchlib.crawler.web.spider.HtmlArchiver.java

final private void checkStyleCSS(TagNode node) throws ClientProtocolException, IllegalStateException,
        IOException, SearchLibException, URISyntaxException {
    if (!("style".equalsIgnoreCase(node.getName())))
        return;//from  w w  w. j  a  v a2 s.c  o  m
    String attr = node.getAttributeByName("type");
    if (!StringUtils.isEmpty(attr) && !"text/css".equalsIgnoreCase(attr))
        return;
    attr = node.getAttributeByName("media");
    if (!StringUtils.isEmpty(attr) && !"screen".equalsIgnoreCase(attr) && !"all".equalsIgnoreCase(attr))
        return;
    StringBuilder builder = (StringBuilder) node.getText();
    if (builder == null)
        return;
    String content = builder.toString();
    String newContent = StringEscapeUtils.unescapeXml(content);
    StringBuffer sb = checkCSSContent(baseUrl, newContent);
    if (sb != null)
        newContent = sb.toString();
    if (newContent.equals(content))
        return;
    node.removeAllChildren();
    node.addChild(new ContentNode(newContent));
}

From source file:com.jaeksoft.searchlib.crawler.web.spider.HtmlArchiver.java

final private void checkScriptContent(TagNode node, Set<TagNode> disableScriptNodeSet) {
    if (!("script".equalsIgnoreCase(node.getName())))
        return;/*from w  w  w.  ja  v  a  2 s.com*/
    if (disableScriptNodeSet != null && hasAncestorXPath(disableScriptNodeSet, node)) {
        node.removeFromTree();
        return;
    }
    StringBuilder builder = (StringBuilder) node.getText();
    if (builder == null)
        return;
    String content = builder.toString();
    if (content == null)
        return;
    String newContent = StringEscapeUtils.unescapeXml(content);
    if (newContent.equals(content))
        return;
    node.removeAllChildren();
    node.addChild(new ContentNode(newContent));
}

From source file:com.screenslicer.core.scrape.type.Result.java

public void addUrl(Node node, String href, String title, boolean textSibling, boolean anchorSibling,
        boolean loneBlock, boolean image) {
    ++numUrls;//from w  w w.j a  va2  s.  co m
    String cleanHref = CommonUtil.strip(href, false);
    String cleanTitle = StringEscapeUtils
            .unescapeHtml4(StringEscapeUtils.unescapeXml(CommonUtil.strip(title, false)));
    cleanTitle = titleJunk.matcher(cleanTitle).replaceAll("");
    String noPunctTitle = cleanTitle.replaceAll("\\p{Punct}", "");
    if (CommonUtil.isEmpty(noPunctTitle)) {
        cleanTitle = "";
    }
    String coreTitle = cleanTitle.replace(" ", "").replace(" ...", "");
    if (!cleanHref.isEmpty() && !"#".equals(cleanHref) && !cleanTitle.isEmpty()
            && (!coreTitle.contains("/") || coreTitle.contains(" ") || !coreTitle.contains("."))) {
        int existingScore = 0;
        existingScore += titleHasTextSibling ? 0 : 1;
        existingScore += titleHasAnchorSibling ? 0 : 1;
        existingScore += !titleHasLoneBlock ? 0 : 1;
        existingScore += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? -10 : 0;
        int curScore = 0;
        curScore += textSibling ? 0 : 1;
        curScore += anchorSibling ? 0 : 1;
        curScore += !loneBlock ? 0 : 1;
        curScore += cleanTitle.matches("^\\p{Punct}.*$") ? -10 : 0;
        int curCompare = 0;
        if (url != null) {
            curCompare += cleanTitle.matches("^\\p{Punct}.*$") ? -5 : 0;
            curCompare += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? 9 : 0;
            curCompare += curScore < existingScore ? -2 : curScore > existingScore ? 2 : 0;
            curCompare += curScore <= 1 && curScore <= existingScore ? -1 : 0;
            curCompare += cleanTitle.length() <= urlTitle.length() / 2 ? -2
                    : cleanTitle.length() > urlTitle.length() * 2 ? 2 : 0;
            curCompare += cleanHref.contains("#") && !url.contains("#") ? -2
                    : !cleanHref.contains("#") && url.contains("#") ? 2 : 0;
            int nearestBlock = Util.nearestBlock(node);
            int existingBlock = Util.nearestBlock(urlNodes.get(url));
            curCompare += nearestBlock > existingBlock ? -2 : nearestBlock == existingBlock ? 0 : 2;
        }
        if (!hasImgUrl && CommonUtil.isEmpty(altUrl) && CommonUtil.isEmpty(altUrlTitle)
                && (isImg(cleanTitle) || isImg(cleanHref))) {
            altUrl = cleanHref;
            altUrlTitle = cleanTitle;
            urlNodes.put(cleanHref, node);
            hasImgUrl = true;
        } else if (url == null || !image && titleHasImage || curCompare > 1) {
            if (url != null) {
                priorUrl = url;
                priorUrlTitle = urlTitle;
                addToSummary(urlTitle, true, urlNodes.get(url));
                fallbackUrls.add(url);
                fallbackUrlTitles.add(urlTitle);
            }
            url = cleanHref;
            urlNodes.put(url, node);
            urlTitle = cleanTitle;
            titleHasTextSibling = textSibling;
            titleHasAnchorSibling = anchorSibling;
            titleHasLoneBlock = loneBlock;
            titleHasImage = image;
        } else if (curCompare > -1) {
            fallbackUrls.add(cleanHref);
            urlNodes.put(cleanHref, node);
            fallbackUrlTitles.add(cleanTitle);
            addToSummary(cleanTitle, true, node);
        } else {
            addToSummary(cleanTitle, true, node);
        }
    }
}

From source file:com.screenslicer.core.scrape.type.ScrapeResult.java

public void addUrl(Node node, String href, String title, boolean textSibling, boolean anchorSibling,
        boolean loneBlock, boolean image) {
    ++numUrls;/*from w  w w  . j  a va 2 s.  c  om*/
    String cleanHref = CommonUtil.strip(href, false);
    String cleanTitle = StringEscapeUtils
            .unescapeHtml4(StringEscapeUtils.unescapeXml(CommonUtil.strip(title, false)));
    cleanTitle = titleJunk.matcher(cleanTitle).replaceAll("");
    String noPunctTitle = cleanTitle.replaceAll("\\p{Punct}", "");
    if (CommonUtil.isEmpty(noPunctTitle)) {
        cleanTitle = "";
    }
    String coreTitle = cleanTitle.replace(" ", "").replace(" ...", "");
    if (!cleanHref.isEmpty() && !"#".equals(cleanHref) && !cleanTitle.isEmpty()
            && (!coreTitle.contains("/") || coreTitle.contains(" ") || !coreTitle.contains("."))) {
        int existingScore = 0;
        existingScore += titleHasTextSibling ? 0 : 1;
        existingScore += titleHasAnchorSibling ? 0 : 1;
        existingScore += !titleHasLoneBlock ? 0 : 1;
        existingScore += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? -10 : 0;
        int curScore = 0;
        curScore += textSibling ? 0 : 1;
        curScore += anchorSibling ? 0 : 1;
        curScore += !loneBlock ? 0 : 1;
        curScore += cleanTitle.matches("^\\p{Punct}.*$") ? -10 : 0;
        int curCompare = 0;
        if (url != null) {
            curCompare += cleanTitle.matches("^\\p{Punct}.*$") ? -5 : 0;
            curCompare += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? 9 : 0;
            curCompare += curScore < existingScore ? -2 : curScore > existingScore ? 2 : 0;
            curCompare += curScore <= 1 && curScore <= existingScore ? -1 : 0;
            curCompare += cleanTitle.length() <= urlTitle.length() / 2 ? -2
                    : cleanTitle.length() > urlTitle.length() * 2 ? 2 : 0;
            curCompare += cleanHref.contains("#") && !url.contains("#") ? -2
                    : !cleanHref.contains("#") && url.contains("#") ? 2 : 0;
            int nearestBlock = NodeUtil.nearestBlock(node);
            int existingBlock = NodeUtil.nearestBlock(urlNodes.get(url));
            curCompare += nearestBlock > existingBlock ? -2 : nearestBlock == existingBlock ? 0 : 2;
        }
        if (!hasImgUrl && CommonUtil.isEmpty(altUrl) && CommonUtil.isEmpty(altUrlTitle)
                && (isImg(cleanTitle) || isImg(cleanHref))) {
            altUrl = cleanHref;
            altUrlTitle = cleanTitle;
            urlNodes.put(cleanHref, node);
            hasImgUrl = true;
        } else if (url == null || !image && titleHasImage || curCompare > 1) {
            if (url != null) {
                priorUrl = url;
                priorUrlTitle = urlTitle;
                addToSummary(urlTitle, true, urlNodes.get(url));
                fallbackUrls.add(url);
                fallbackUrlTitles.add(urlTitle);
            }
            url = cleanHref;
            urlNodes.put(url, node);
            urlTitle = cleanTitle;
            titleHasTextSibling = textSibling;
            titleHasAnchorSibling = anchorSibling;
            titleHasLoneBlock = loneBlock;
            titleHasImage = image;
        } else if (curCompare > -1) {
            fallbackUrls.add(cleanHref);
            urlNodes.put(cleanHref, node);
            fallbackUrlTitles.add(cleanTitle);
            addToSummary(cleanTitle, true, node);
        } else {
            addToSummary(cleanTitle, true, node);
        }
    }
}

From source file:edu.isistan.carcha.plugin.editors.TraceabilityEditor.java

/**
 * Gets the line number for a given cross cutting concern or design decision.
 *
 * @param name the name/*from   w  w w.j a va 2 s.  c o  m*/
 * @param kind the kind
 * @return line number
 */
int getLineFor(String name, String kind) {
    int i = 0;
    String editorText = editor.getDocumentProvider().getDocument(editor.getEditorInput()).get();
    String[] lines = editorText.split(System.getProperty("line.separator"));

    int temp = 1;
    String lineEscaped;
    boolean find = false;
    for (int j = 0; j < lines.length && !find; j++) {
        lineEscaped = StringEscapeUtils.unescapeXml(lines[j]);
        if (lineEscaped.contains(name) && lineEscaped.contains(kind)) {
            i = temp;
            find = true;
        }
        temp++;
    }
    return i;
}