List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeXml
public static final String unescapeXml(final String input)
Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
From source file:de.fatalix.book.importer.BookMigrator.java
private static BookEntry parseOPF(File pathToOPF, BookEntry bmd) throws IOException { List<String> lines = Files.readAllLines(pathToOPF.toPath(), Charset.forName("UTF-8")); boolean multiLineDescription = false; String description = ""; for (String line : lines) { if (multiLineDescription) { multiLineDescription = false; if (line.split("<").length == 1) { multiLineDescription = true; description = description + line; } else { description = description + line.split("<")[0]; description = StringEscapeUtils.unescapeXml(description); bmd.setDescription(description); }//from www. j a v a 2s . c o m } else if (line.contains("dc:title")) { String title = line.split(">")[1].split("<")[0]; bmd.setTitle(title); } else if (line.contains("dc:creator")) { String creator = line.split(">")[1].split("<")[0]; bmd.setAuthor(creator); } else if (line.contains("dc:description")) { String value = line.split(">")[1]; if (value.split("<").length == 1) { multiLineDescription = true; description = value; } else { value = value.split("<")[0]; value = StringEscapeUtils.unescapeXml(value); bmd.setDescription(value); } } else if (line.contains("dc:publisher")) { String value = line.split(">")[1].split("<")[0]; bmd.setPublisher(value); } else if (line.contains("dc:date")) { String value = line.split(">")[1].split("<")[0]; DateTime dtReleaseDate = new DateTime(value, DateTimeZone.UTC); if (dtReleaseDate.getYear() != 101) { bmd.setReleaseDate(dtReleaseDate.toDate()); } } else if (line.contains("dc:language")) { String value = line.split(">")[1].split("<")[0]; bmd.setLanguage(value); } else if (line.contains("opf:scheme=\"ISBN\"")) { String value = line.split(">")[1].split("<")[0]; bmd.setIsbn(value); } } return bmd; }
From source file:com.daphne.es.maintain.staticresource.web.controller.StaticResourceVersionController.java
private String versionedStaticResourceContent(String fileRealPath, String content, String newVersion) throws IOException { content = StringEscapeUtils.unescapeXml(content); if (newVersion != null && newVersion.equals("1")) { newVersion = "?" + newVersion; }/*w w w .ja va 2 s . c o m*/ File file = new File(fileRealPath); List<String> contents = FileUtils.readLines(file); for (int i = 0, l = contents.size(); i < l; i++) { String fileContent = contents.get(i); if (content.equals(fileContent)) { Matcher matcher = scriptPattern.matcher(content); if (!matcher.matches()) { matcher = linkPattern.matcher(content); } if (newVersion == null) { // content = matcher.replaceAll("$1$2$5"); } else { content = matcher.replaceAll("$1$2$3" + newVersion + "$5"); } contents.set(i, content); break; } } FileUtils.writeLines(file, contents); return content; }
From source file:com.zyz.mobile.book.UserBookData.java
@SuppressWarnings("UnusedAssignment") @Override//ww w. j av a 2 s . c o m public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (localName.equals(Element.BOOK)) { mStatus.in_book = true; try { if (atts.getLength() >= 1) { setOffset(Integer.parseInt(atts.getValue(0))); } if (atts.getLength() >= 2) { mVersion = Integer.parseInt(atts.getValue(1)); } } catch (NumberFormatException e) { } } else if (localName.equals(Element.SPANS)) { mStatus.in_spans = true; } else if (localName.equals(Element.SPAN)) { mStatus.in_span = true; try { int i = 0; if (atts.getLength() >= 4) { int type = Integer.parseInt(atts.getValue(i++)); int color = Integer.parseInt(atts.getValue(i++)); int start = Integer.parseInt(atts.getValue(i++)); int end = Integer.parseInt(atts.getValue(i++)); mCurrentSpanObj = new UserSpan(UserSpanType.toEnum(type), color, start, end); insertSpan(mCurrentSpanObj); } if (atts.getLength() >= 5) { mCurrentSpanObj.setDescription(StringEscapeUtils.unescapeXml(atts.getValue(i))); } } catch (NumberFormatException e) { // should not happen unless the file is corrupted or modified incorrectly } } else if (localName.equals(Element.NOTE)) { mStatus.in_note = true; } else if (localName.equals(Element.BOOKMARKS)) { mStatus.in_bookmarks = true; } else if (localName.equals(Element.BOOKMARK)) { mStatus.in_bookmark = true; int i = 0; try { if (mStatus.in_bookmarks) { if (atts.getLength() >= 1) { int start = Integer.parseInt(atts.getValue(i++)); mCurrentSpanObj = (new UserSpan.Builder()).setType(UserSpanType.BOOKMARK).setStart(start) .create(); insertSpan(mCurrentSpanObj); } if (atts.getLength() >= 2) { mCurrentSpanObj.setDescription(StringEscapeUtils.unescapeXml(atts.getValue(i))); } } else if (mStatus.in_history) { if (atts.getLength() >= 1) { int offset = Integer.parseInt(atts.getValue(i++)); mLocationHistory.add(offset); } } } catch (NumberFormatException e) { } } else if (localName.equals(Element.HISTORY)) { mStatus.in_history = true; } }
From source file:com.erbjuder.logger.server.soap.services.LogMessageServiceBase.java
private LogMessage addTransactioLogData(Transactions.Transaction.TransactionLogData transactionLogData, LogMessage logMessage) {//w w w .j av a 2 s .c o m String base64 = MimeTypes.BASE64; String label = StringEscapeUtils.unescapeXml(transactionLogData.getContentLabel().trim().toLowerCase()); String mimeType = transactionLogData.getContentMimeType().trim().toLowerCase(); String content = StringEscapeUtils.unescapeXml(transactionLogData.getContent().trim()); long size = 0; if (base64.equalsIgnoreCase(mimeType)) { content = this.XMLFormatter(content); size = content.getBytes().length; } else { size = content.getBytes().length; } // // Bind data to logmessage return this.updateLogMessage(label, mimeType, content, size, logMessage); }
From source file:android.databinding.tool.util.XmlEditor.java
private static String defaultReplacement(XMLParser.AttributeContext attr) { String textWithQuotes = attr.attrValue.getText(); String escapedText = textWithQuotes.substring(1, textWithQuotes.length() - 1); if (!escapedText.startsWith("@{") || !escapedText.endsWith("}")) { return null; }/* w w w.j a va 2 s .c om*/ String text = StringEscapeUtils.unescapeXml(escapedText.substring(2, escapedText.length() - 1)); ANTLRInputStream inputStream = new ANTLRInputStream(text); BindingExpressionLexer lexer = new BindingExpressionLexer(inputStream); CommonTokenStream tokenStream = new CommonTokenStream(lexer); BindingExpressionParser parser = new BindingExpressionParser(tokenStream); BindingExpressionParser.BindingSyntaxContext root = parser.bindingSyntax(); BindingExpressionParser.DefaultsContext defaults = root.defaults(); if (defaults != null) { BindingExpressionParser.ConstantValueContext constantValue = defaults.constantValue(); BindingExpressionParser.LiteralContext literal = constantValue.literal(); if (literal != null) { BindingExpressionParser.StringLiteralContext stringLiteral = literal.stringLiteral(); if (stringLiteral != null) { TerminalNode doubleQuote = stringLiteral.DoubleQuoteString(); if (doubleQuote != null) { String quotedStr = doubleQuote.getText(); String unquoted = quotedStr.substring(1, quotedStr.length() - 1); return StringEscapeUtils.escapeXml10(unquoted); } else { String quotedStr = stringLiteral.SingleQuoteString().getText(); String unquoted = quotedStr.substring(1, quotedStr.length() - 1); String unescaped = unquoted.replace("\"", "\\\"").replace("\\`", "`"); return StringEscapeUtils.escapeXml10(unescaped); } } } return constantValue.getText(); } return null; }
From source file:com.jaeksoft.searchlib.crawler.web.spider.HtmlArchiver.java
final private void checkStyleCSS(TagNode node) throws ClientProtocolException, IllegalStateException, IOException, SearchLibException, URISyntaxException { if (!("style".equalsIgnoreCase(node.getName()))) return;//from w w w. j a v a2 s.c o m String attr = node.getAttributeByName("type"); if (!StringUtils.isEmpty(attr) && !"text/css".equalsIgnoreCase(attr)) return; attr = node.getAttributeByName("media"); if (!StringUtils.isEmpty(attr) && !"screen".equalsIgnoreCase(attr) && !"all".equalsIgnoreCase(attr)) return; StringBuilder builder = (StringBuilder) node.getText(); if (builder == null) return; String content = builder.toString(); String newContent = StringEscapeUtils.unescapeXml(content); StringBuffer sb = checkCSSContent(baseUrl, newContent); if (sb != null) newContent = sb.toString(); if (newContent.equals(content)) return; node.removeAllChildren(); node.addChild(new ContentNode(newContent)); }
From source file:com.jaeksoft.searchlib.crawler.web.spider.HtmlArchiver.java
final private void checkScriptContent(TagNode node, Set<TagNode> disableScriptNodeSet) { if (!("script".equalsIgnoreCase(node.getName()))) return;/*from w w w. ja v a 2 s.com*/ if (disableScriptNodeSet != null && hasAncestorXPath(disableScriptNodeSet, node)) { node.removeFromTree(); return; } StringBuilder builder = (StringBuilder) node.getText(); if (builder == null) return; String content = builder.toString(); if (content == null) return; String newContent = StringEscapeUtils.unescapeXml(content); if (newContent.equals(content)) return; node.removeAllChildren(); node.addChild(new ContentNode(newContent)); }
From source file:com.screenslicer.core.scrape.type.Result.java
public void addUrl(Node node, String href, String title, boolean textSibling, boolean anchorSibling, boolean loneBlock, boolean image) { ++numUrls;//from w w w.j a va2 s. co m String cleanHref = CommonUtil.strip(href, false); String cleanTitle = StringEscapeUtils .unescapeHtml4(StringEscapeUtils.unescapeXml(CommonUtil.strip(title, false))); cleanTitle = titleJunk.matcher(cleanTitle).replaceAll(""); String noPunctTitle = cleanTitle.replaceAll("\\p{Punct}", ""); if (CommonUtil.isEmpty(noPunctTitle)) { cleanTitle = ""; } String coreTitle = cleanTitle.replace(" ", "").replace(" ...", ""); if (!cleanHref.isEmpty() && !"#".equals(cleanHref) && !cleanTitle.isEmpty() && (!coreTitle.contains("/") || coreTitle.contains(" ") || !coreTitle.contains("."))) { int existingScore = 0; existingScore += titleHasTextSibling ? 0 : 1; existingScore += titleHasAnchorSibling ? 0 : 1; existingScore += !titleHasLoneBlock ? 0 : 1; existingScore += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? -10 : 0; int curScore = 0; curScore += textSibling ? 0 : 1; curScore += anchorSibling ? 0 : 1; curScore += !loneBlock ? 0 : 1; curScore += cleanTitle.matches("^\\p{Punct}.*$") ? -10 : 0; int curCompare = 0; if (url != null) { curCompare += cleanTitle.matches("^\\p{Punct}.*$") ? -5 : 0; curCompare += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? 9 : 0; curCompare += curScore < existingScore ? -2 : curScore > existingScore ? 2 : 0; curCompare += curScore <= 1 && curScore <= existingScore ? -1 : 0; curCompare += cleanTitle.length() <= urlTitle.length() / 2 ? -2 : cleanTitle.length() > urlTitle.length() * 2 ? 2 : 0; curCompare += cleanHref.contains("#") && !url.contains("#") ? -2 : !cleanHref.contains("#") && url.contains("#") ? 2 : 0; int nearestBlock = Util.nearestBlock(node); int existingBlock = Util.nearestBlock(urlNodes.get(url)); curCompare += nearestBlock > existingBlock ? -2 : nearestBlock == existingBlock ? 0 : 2; } if (!hasImgUrl && CommonUtil.isEmpty(altUrl) && CommonUtil.isEmpty(altUrlTitle) && (isImg(cleanTitle) || isImg(cleanHref))) { altUrl = cleanHref; altUrlTitle = cleanTitle; urlNodes.put(cleanHref, node); hasImgUrl = true; } else if (url == null || !image && titleHasImage || curCompare > 1) { if (url != null) { priorUrl = url; priorUrlTitle = urlTitle; addToSummary(urlTitle, true, urlNodes.get(url)); fallbackUrls.add(url); fallbackUrlTitles.add(urlTitle); } url = cleanHref; urlNodes.put(url, node); urlTitle = cleanTitle; titleHasTextSibling = textSibling; titleHasAnchorSibling = anchorSibling; titleHasLoneBlock = loneBlock; titleHasImage = image; } else if (curCompare > -1) { fallbackUrls.add(cleanHref); urlNodes.put(cleanHref, node); fallbackUrlTitles.add(cleanTitle); addToSummary(cleanTitle, true, node); } else { addToSummary(cleanTitle, true, node); } } }
From source file:com.screenslicer.core.scrape.type.ScrapeResult.java
public void addUrl(Node node, String href, String title, boolean textSibling, boolean anchorSibling, boolean loneBlock, boolean image) { ++numUrls;/*from w w w . j a va 2 s. c om*/ String cleanHref = CommonUtil.strip(href, false); String cleanTitle = StringEscapeUtils .unescapeHtml4(StringEscapeUtils.unescapeXml(CommonUtil.strip(title, false))); cleanTitle = titleJunk.matcher(cleanTitle).replaceAll(""); String noPunctTitle = cleanTitle.replaceAll("\\p{Punct}", ""); if (CommonUtil.isEmpty(noPunctTitle)) { cleanTitle = ""; } String coreTitle = cleanTitle.replace(" ", "").replace(" ...", ""); if (!cleanHref.isEmpty() && !"#".equals(cleanHref) && !cleanTitle.isEmpty() && (!coreTitle.contains("/") || coreTitle.contains(" ") || !coreTitle.contains("."))) { int existingScore = 0; existingScore += titleHasTextSibling ? 0 : 1; existingScore += titleHasAnchorSibling ? 0 : 1; existingScore += !titleHasLoneBlock ? 0 : 1; existingScore += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? -10 : 0; int curScore = 0; curScore += textSibling ? 0 : 1; curScore += anchorSibling ? 0 : 1; curScore += !loneBlock ? 0 : 1; curScore += cleanTitle.matches("^\\p{Punct}.*$") ? -10 : 0; int curCompare = 0; if (url != null) { curCompare += cleanTitle.matches("^\\p{Punct}.*$") ? -5 : 0; curCompare += urlTitle == null || urlTitle.matches("^\\p{Punct}.*$") ? 9 : 0; curCompare += curScore < existingScore ? -2 : curScore > existingScore ? 2 : 0; curCompare += curScore <= 1 && curScore <= existingScore ? -1 : 0; curCompare += cleanTitle.length() <= urlTitle.length() / 2 ? -2 : cleanTitle.length() > urlTitle.length() * 2 ? 2 : 0; curCompare += cleanHref.contains("#") && !url.contains("#") ? -2 : !cleanHref.contains("#") && url.contains("#") ? 2 : 0; int nearestBlock = NodeUtil.nearestBlock(node); int existingBlock = NodeUtil.nearestBlock(urlNodes.get(url)); curCompare += nearestBlock > existingBlock ? -2 : nearestBlock == existingBlock ? 0 : 2; } if (!hasImgUrl && CommonUtil.isEmpty(altUrl) && CommonUtil.isEmpty(altUrlTitle) && (isImg(cleanTitle) || isImg(cleanHref))) { altUrl = cleanHref; altUrlTitle = cleanTitle; urlNodes.put(cleanHref, node); hasImgUrl = true; } else if (url == null || !image && titleHasImage || curCompare > 1) { if (url != null) { priorUrl = url; priorUrlTitle = urlTitle; addToSummary(urlTitle, true, urlNodes.get(url)); fallbackUrls.add(url); fallbackUrlTitles.add(urlTitle); } url = cleanHref; urlNodes.put(url, node); urlTitle = cleanTitle; titleHasTextSibling = textSibling; titleHasAnchorSibling = anchorSibling; titleHasLoneBlock = loneBlock; titleHasImage = image; } else if (curCompare > -1) { fallbackUrls.add(cleanHref); urlNodes.put(cleanHref, node); fallbackUrlTitles.add(cleanTitle); addToSummary(cleanTitle, true, node); } else { addToSummary(cleanTitle, true, node); } } }
From source file:edu.isistan.carcha.plugin.editors.TraceabilityEditor.java
/** * Gets the line number for a given cross cutting concern or design decision. * * @param name the name/*from w w w.j a va 2 s. c o m*/ * @param kind the kind * @return line number */ int getLineFor(String name, String kind) { int i = 0; String editorText = editor.getDocumentProvider().getDocument(editor.getEditorInput()).get(); String[] lines = editorText.split(System.getProperty("line.separator")); int temp = 1; String lineEscaped; boolean find = false; for (int j = 0; j < lines.length && !find; j++) { lineEscaped = StringEscapeUtils.unescapeXml(lines[j]); if (lineEscaped.contains(name) && lineEscaped.contains(kind)) { i = temp; find = true; } temp++; } return i; }