List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeXml
public static final String unescapeXml(final String input)
Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
From source file:com.itude.mobile.mobbl.core.model.parser.MBXmlDocumentParser.java
private void endValidElement(String uri, String localName, String qName) throws SAXException { String string = StringEscapeUtils.unescapeXml(_characters.toString().trim()); if (string.length() > 0) { if (_stack.peek() instanceof MBElement && ((MBElement) _stack.peek()).isValidAttribute("text()")) { ((MBElement) _stack.peek()).setAttributeValue(string, "text()"); } else {//from w w w.j a va 2 s .c o m MBLog.w(MBConstants.APPLICATION_NAME, "MBXmlDocumentParser.endElement: Text (" + string + ") specified in body of element " + localName + " is ignored because the element has no text() attribute defined"); } } if (_stack.size() > 1) { _stack.pop(); _pathStack.pop(); } }
From source file:com.github.koraktor.steamcondenser.community.XMLData.java
/** * Returns the string value of the element with the given name (or path) * with converted XML escaped characters * * @param names The name of the elements representing the path to the * target element/*from www . jav a 2s .c o m*/ * @return The unescaped string value of the named element */ public String getUnescapedString(String... names) { return StringEscapeUtils.unescapeXml(this.getString(names)); }
From source file:edu.lternet.pasta.portal.search.AuthorSearch.java
/** * Parses the Solr query results using regular expression matching (as * opposed to XML parsing)//from www. j a v a 2s . co m * * @param xml the Solr query results, an XML document string * @param fieldName the field name to parse out of the XML, e.g. "author" * @return a String array of field values parsed from the XML */ private static String[] parseQueryResults(String xml, String fieldName) { String[] values = null; final String patternStr = String.format("^\\s*<%s>(.+)</%s>\\s*$", fieldName, fieldName); Pattern pattern = Pattern.compile(patternStr); TreeSet<String> valueSet = new TreeSet<String>(); if (xml != null) { String[] lines = xml.split("\n"); for (String line : lines) { Matcher matcher = pattern.matcher(line); if (matcher.matches()) { String capturedValue = matcher.group(1).trim(); String unescapedXML = StringEscapeUtils.unescapeXml(capturedValue); String trimmedXML = unescapedXML.replace("\r", " ").replace("\n", " ").replaceAll("\\s+", " ") .trim(); String escapedXML = StringEscapeUtils.escapeXml(trimmedXML); valueSet.add(escapedXML); } } values = valueSet.toArray(new String[valueSet.size()]); } return values; }
From source file:lv.coref.io.MmaxReaderWriter.java
public void readWords(Text text, String fileName) { try {//from ww w . j a va 2 s . c om File file = new File(fileName); DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document doc = dBuilder.parse(file); NodeList markables = doc.getElementsByTagName("word"); words = new ArrayList<String>(); for (int i = 0; i < markables.getLength(); i++) { Node markable = markables.item(i); String word = markable.getFirstChild().getNodeValue(); word = StringEscapeUtils.unescapeXml(word); // String idString = markable.getAttributes().getNamedItem("id").getNodeValue(); words.add(word); } } catch (Exception e) { log.log(Level.SEVERE, "Error reading " + fileName, e); } }
From source file:jCMPL.CmplMsg.java
/** * Reads the CmplMessages into the jCMPL structures from th corresponding CmplMessage file or a CmplMessage string * @param msgStr CmplMessage string /*w w w . jav a 2 s. c o m*/ * @throws CmplException */ protected void readCmplMessages(String msgStr) throws CmplException { if (msgStr.isEmpty() && _msgFile.isEmpty()) { throw new CmplException("Neither cmplMessageFile nor cmplMessageString defined"); } ArrayList<String> lines = null; if (!_msgFile.isEmpty()) { String line = ""; try { BufferedReader in = new BufferedReader(new FileReader(_msgFile)); lines = new ArrayList<String>(); while ((line = in.readLine()) != null) { lines.add(line); } in.close(); } catch (IOException err) { throw new CmplException("Cannot read message file :" + _msgFile + " ->" + err); } } else { lines = new ArrayList<String>(Arrays.asList(msgStr.split("\n"))); } int lineNr = 1; Boolean generalSection = false; Boolean msgSection = false; for (String line : lines) { if (lineNr == 1) { if (line.contains("<?xml version")) { lineNr++; continue; } else { throw new CmplException("Cant't read cmplMessage file - Not a XML file !"); } } if (lineNr == 2) { if (line.contains("<CmplMessages")) { lineNr++; continue; } else { throw new CmplException("Cant't read cmplMessage file - Not a CmplMessages file !"); } } if (line.contains("<general>")) { generalSection = true; msgSection = false; continue; } if (line.contains("</general>")) { generalSection = false; continue; } if (line.contains("<messages")) { generalSection = false; msgSection = true; continue; } if (line.contains("</messages")) { msgSection = false; continue; } if (generalSection) { if (line.contains("<generalStatus")) { _cmplStatus = CmplTools.xmlStrToString(line, "<generalStatus>([^\"]*)</generalStatus>"); continue; } if (line.contains("<message")) { _cmplMessage = CmplTools.xmlStrToString(line, "<message>([^\"]*)</message>"); continue; } if (line.contains("<cmplVersion")) { _cmplVersion = CmplTools.xmlStrToString(line, "<cmplVersion>([^\"]*)</cmplVersion>"); continue; } } if (msgSection) { if (line.contains("<message")) { ArrayList<String> tmpList = new ArrayList<String>(); String patternStr = "\"([^\"]*)\""; Pattern pattern = Pattern.compile(patternStr); Matcher matcher = pattern.matcher(line); while (matcher.find()) { tmpList.add(matcher.group(1)); } CmplMsg x = new CmplMsg(); x.setType(tmpList.get(0)); x.setFile(tmpList.get(1)); x.setLine(tmpList.get(2)); x.setDesrciption(StringEscapeUtils.unescapeXml(tmpList.get(3))); _cmplMessageList.add(x); } } } }
From source file:com.daphne.es.maintain.staticresource.web.controller.StaticResourceVersionController.java
private StaticResource switchStaticResourceContent(String rootRealPath, String versionedResourceRealPath, String fileName, String content, boolean isMin) throws IOException { StaticResource resource = extractResource(fileName, content); String filePath = resource.getUrl(); filePath = filePath.replace("${ctx}", rootRealPath); if (isMin) {//from w ww.j av a2s . com File file = new File(YuiCompressorUtils.getCompressFileName(filePath)); if (!file.exists()) { throw new RuntimeException("" + resource.getUrl()); } } else { File file = new File(YuiCompressorUtils.getNoneCompressFileName(filePath)); if (!file.exists()) { throw new RuntimeException("?" + resource.getUrl()); } } content = StringEscapeUtils.unescapeXml(content); File file = new File(versionedResourceRealPath + fileName); List<String> contents = FileUtils.readLines(file); for (int i = 0, l = contents.size(); i < l; i++) { String fileContent = contents.get(i); if (content.equals(fileContent)) { Matcher matcher = scriptPattern.matcher(content); if (!matcher.matches()) { matcher = linkPattern.matcher(content); } String newUrl = isMin ? YuiCompressorUtils.getCompressFileName(resource.getUrl()) : YuiCompressorUtils.getNoneCompressFileName(resource.getUrl()); content = matcher.replaceAll("$1" + Matcher.quoteReplacement(newUrl) + "$3$4$5"); contents.set(i, content); resource.setContent(content); resource.setUrl(newUrl); break; } } FileUtils.writeLines(file, contents); return resource; }
From source file:com.streamsets.pipeline.lib.el.StringEL.java
@ElFunction(prefix = "str", name = "unescapeXML", description = "Returns an unescaped string from a string with XML special characters escaped.") public static String unescapeXml(@ElParam("string") String string) { return StringEscapeUtils.unescapeXml(string); }
From source file:com.hygenics.parser.JDump.java
private void toFile() { ArrayList<String> archs = new ArrayList<String>(); List<Future<ArrayList<String>>> qfutures; Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4); ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum)); int dumped = 0; if (archive) { log.info("Cleaning"); for (String k : fpaths.keySet()) { String fpath = ""; for (String ofp : fpaths.get(k).keySet()) { fpath = ofp;//from w w w . j av a 2s.c o m } if (fpath.length() > 0) { String[] barr = fpath.split("\\/"); String basefile = ""; Archiver zip = new Archiver(); for (int i = 0; i > barr.length - 1; i++) { basefile += (i == 0) ? barr[i] : "/" + barr[i]; } if (basefile.trim().length() > 0) { zip.setBasedirectory(basefile); zip.setZipDirectory(basefile + "archive.zip"); zip.setAvoidanceString(".zip|archive"); zip.setDelFiles(true); zip.run(); } } } } log.info("Dumping"); for (String table : fpaths.keySet()) { int offset = 0; if (template.checkTable(table, table.split("\\.")[0])) { if (template.getCount(table) > 0) { log.info("Dumping for " + table); // get header String select = "SELECT * FROM " + table; String fpath = null; ArrayList<String> jsons; String condition; int w = 0; int start = offset; int chunksize = (int) Math.ceil(pullsize / qnum); // get fpath for (String ofp : fpaths.get(table).keySet()) { start = fpaths.get(table).get(ofp); fpath = ofp; } // perform write if (headers != null && fpath != null) { List<String> headersList = headers.get(table); String output = null; boolean existed = true; if (addFileDate) { fpath = fpath + Calendar.getInstance().getTime().toString().trim().replaceAll(":|\\s", "") + ".txt"; } // check to see if file should be created if (!new File(fpath).exists()) { try { new File(fpath).createNewFile(); } catch (IOException e) { e.printStackTrace(); } existed = false; } // check to see if file must be recreated if (!append) { File f = new File(fpath); f.delete(); try { f.createNewFile(); } catch (IOException e) { e.printStackTrace(); } } if (headersList != null && (append == false || existed == false)) { for (String header : headersList) { output = (output == null) ? StringEscapeUtils.unescapeXml(header) : output + delimeter + StringEscapeUtils.unescapeXml(header); } } do { // get records jsons = new ArrayList<String>(pullsize); log.info("Looking for Pages."); for (int conn = 0; conn < qnum; conn++) { // create condition condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND " + pullid + " < " + Integer.toString(start + (chunksize * (conn + 1))); if (extracondition != null) { condition += " " + extracondition.trim(); } // get queries qcollect.add(new SplitQuery(template, (select + condition))); log.info("Fetching " + select + condition); } start += (chunksize * qnum); qfutures = fjp.invokeAll(qcollect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<ArrayList<String>> f : qfutures) { try { ArrayList<String> test = f.get(); if (test != null) { if (test.size() > 0) { jsons.addAll(test); } } if (f.isDone() == false) { f.cancel(true); } f = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } qcollect = new HashSet<Callable<ArrayList<String>>>(4); qfutures = null; log.info("Finished Getting Pages"); // post records to the file try (FileWriter fw = new FileWriter(new File(fpath), true)) { // get and write headers if (jsons.size() > 0) { fw.write(output + "\n"); // write data for (String json : jsons) { output = null; JsonObject jo = JsonObject.readFrom(json); if (jo.size() >= headersList.size()) {// allows // trimming // of // table // to // key // aspects output = null; for (String key : headers.get(table)) { if (jo.get(key.toLowerCase()) != null) { String data = StringEscapeUtils .unescapeXml(jo.get(key.toLowerCase()).asString()); if (replacementPattern != null) { data = data.replaceAll(replacementPattern, ""); data = data.replace(delimeter, delimreplace); } output = (output == null) ? data.replaceAll("[^\u0020-\u0070 ]+", "") : output + delimeter + data.replaceAll("[^\u0020-\u0070 ]+", ""); } else { output += delimeter; } } if (output != null && output.trim().length() > headersList.size()) { fw.write(output + "\n"); } } else { if (jsons.size() == 0) { Log.info( "Number of Headers and Keys from Json Array and Headers List Impossible to Match"); try { throw new MismatchException( "Number of Headers: " + headersList.size() + " && Number of Keys: " + jo.size()); } catch (MismatchException e) { e.printStackTrace(); } } } output = null; } } else { log.info("EOF FOUND! No New Records in This Iteration....Stopping."); } } catch (IOException e) { e.printStackTrace(); } } while (jsons.size() > 0); } else { try { throw new NullPointerException( "No Headers Input to Class. Please Create the Requisite Map."); } catch (NullPointerException e) { e.printStackTrace(); } } dumped += 1; } else { try { throw new NoDataException("No Data Found in Table " + table); } catch (NoDataException e) { e.printStackTrace(); } } } else { log.info("Missing Table " + table); try { throw new NullPointerException("Table " + table + " Does Not Exist!!!"); } catch (NullPointerException e) { e.printStackTrace(); } } } // end LOOP if (!fjp.isShutdown()) { fjp.shutdownNow(); } if (dumped == 0) { log.error("No Data Found in Any Table"); System.exit(-1); } }
From source file:com.hygenics.parser.JDumpWithReference.java
private void toFile() { List<Future<ArrayList<String>>> qfutures; Set<Callable<ArrayList<String>>> qcollect = new HashSet<Callable<ArrayList<String>>>(4); ForkJoinPool fjp = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * procnum)); int dumped = 0; if (archive) { log.info("Cleaning"); for (String k : fpaths.keySet()) { String fpath = ""; for (String ofp : fpaths.get(k).keySet()) { fpath = ofp;/*from w w w . j a v a 2 s . c om*/ } if (fpath.length() > 0) { String[] barr = fpath.split("\\/"); String basefile = ""; Archiver zip = new Archiver(); for (int i = 0; i > barr.length - 1; i++) { basefile += (i == 0) ? barr[i] : "/" + barr[i]; } if (basefile.trim().length() > 0) { zip.setBasedirectory(basefile); zip.setZipDirectory(basefile + "archive.zip"); zip.setAvoidanceString(".zip|archive"); zip.setDelFiles(true); zip.run(); } } } } log.info("Dumping"); for (String table : fpaths.keySet()) { int offset = 0; if (template.checkTable(this.baseschema + "." + table, this.baseschema)) { if (template.getCount(this.baseschema + "." + table) > 0) { log.info("Dumping for " + table); // get header String select = "SELECT * FROM " + this.baseschema + "." + table; String fpath = null; ArrayList<String> jsons; String condition; int w = 0; int start = offset; int chunksize = (int) Math.ceil(pullsize / qnum); // get fpath for (String ofp : fpaths.get(table).keySet()) { start = fpaths.get(table).get(ofp); fpath = ofp; } // perform write if (headers != null && fpath != null) { List<String> headersList = headers.get(table); String output = null; boolean existed = true; if (addFileDate) { fpath = fpath + Calendar.getInstance().getTime().toString().trim().replaceAll(":|\\s", "") + ".txt"; } // check to see if file should be created if (!new File(fpath).exists()) { try { new File(this.baseFilePath + fpath).createNewFile(); } catch (IOException e) { e.printStackTrace(); } existed = false; } // check to see if file must be recreated if (!append) { File f = new File(this.baseFilePath + fpath); f.delete(); try { f.createNewFile(); } catch (IOException e) { e.printStackTrace(); } } if (headersList != null && (append == false || existed == false)) { for (String header : headersList) { output = (output == null) ? StringEscapeUtils.unescapeXml(header) : output + delimeter + StringEscapeUtils.unescapeXml(header); } } do { // get records jsons = new ArrayList<String>(pullsize); log.info("Looking for Pages."); for (int conn = 0; conn < qnum; conn++) { // create condition condition = " WHERE " + pullid + " >= " + (start + (conn * chunksize)) + " AND " + pullid + " < " + Integer.toString(start + (chunksize * (conn + 1))); if (extracondition != null) { condition += " " + extracondition.trim(); } // get queries qcollect.add(new SplitQuery(template, (select + condition))); log.info("Fetching " + select + condition); } start += (chunksize * qnum); qfutures = fjp.invokeAll(qcollect); w = 0; while (fjp.getActiveThreadCount() > 0 && fjp.isQuiescent() == false) { w++; } log.info("Waited for " + w + " cycles"); for (Future<ArrayList<String>> f : qfutures) { try { ArrayList<String> test = f.get(); if (test != null) { if (test.size() > 0) { jsons.addAll(test); } } if (f.isDone() == false) { f.cancel(true); } f = null; } catch (Exception e) { log.warn("Encoding Error!"); e.printStackTrace(); } } qcollect = new HashSet<Callable<ArrayList<String>>>(4); qfutures = null; log.info("Finished Getting Pages"); // post records to the file try (FileWriter fw = new FileWriter(new File(this.baseFilePath + fpath), true)) { // get and write headers if (jsons.size() > 0) { fw.write(output + "\n"); // write data for (String json : jsons) { output = null; JsonObject jo = JsonObject.readFrom(json); if (jo.size() >= headersList.size()) {// allows // trimming // of // table // to // key // aspects output = null; for (String key : headers.get(table)) { if (jo.get(key.toLowerCase()) != null) { String data = StringEscapeUtils .unescapeXml(jo.get(key.toLowerCase()).asString()); if (replacementPattern != null) { data = data.replaceAll(replacementPattern, ""); data = data.replace(delimeter, delimreplace); } output = (output == null) ? data.replaceAll("[^\u0020-\u007E ]+", "") : output + delimeter + data.replaceAll("[^\u0020-\u007E ]+", ""); } else { output += delimeter; } } if (output != null && output.trim().length() > headersList.size()) { fw.write(output + "\n"); } } else { if (jsons.size() == 0) { Log.info( "Number of Headers and Keys from Json Array and Headers List Impossible to Match"); try { throw new MismatchException( "Number of Headers: " + headersList.size() + " && Number of Keys: " + jo.size()); } catch (MismatchException e) { e.printStackTrace(); } } } output = null; } } else { log.info("EOF FOUND! No New Records in This Iteration....Stopping."); } } catch (IOException e) { e.printStackTrace(); } } while (jsons.size() > 0); } else { try { throw new NullPointerException( "No Headers Input to Class. Please Create the Requisite Map."); } catch (NullPointerException e) { e.printStackTrace(); } } dumped += 1; } else { try { throw new NoDataException("No Data in Table " + table); } catch (NoDataException e) { e.printStackTrace(); } } } else { log.info("Missing Table " + table); try { throw new NullPointerException("Table " + table + " Does Not Exist!!!"); } catch (NullPointerException e) { e.printStackTrace(); } } } // end LOOP if (!fjp.isShutdown()) { fjp.shutdownNow(); } if (dumped == 0) { log.error("No Data found in Any Tables"); System.exit(-1); } }
From source file:lv.coref.io.MmaxReaderWriter.java
public void readCoreferences(Text text, String fileName) { try {//from w ww . j av a 2s . c o m File file = new File(fileName); DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document doc = dBuilder.parse(file); NodeList markables = doc.getElementsByTagName("markable"); int mentionId = 1; int twinlessMCId = 10000; // ids for twinless mentions for (int i = 0; i < markables.getLength(); i++) { Node markable = markables.item(i); String spanString = (markable.getAttributes().getNamedItem("span") != null) ? markable.getAttributes().getNamedItem("span").getNodeValue() : null; String category = (markable.getAttributes().getNamedItem("category") != null) ? markable.getAttributes().getNamedItem("category").getNodeValue() : null; String type = (markable.getAttributes().getNamedItem("type") != null) ? markable.getAttributes().getNamedItem("type").getNodeValue() : null; String rule = (markable.getAttributes().getNamedItem("rule") != null) ? markable.getAttributes().getNamedItem("rule").getNodeValue() : null; String headString = (markable.getAttributes().getNamedItem("heads") != null) ? markable.getAttributes().getNamedItem("heads").getNodeValue() : ""; headString = StringEscapeUtils.unescapeXml(headString); String corefString = markable.getAttributes().getNamedItem("coref_class").getNodeValue(); String id = null; if (corefString.startsWith("set_")) id = corefString.substring(4); else id = Integer.toString(twinlessMCId++); Pair<Integer, Integer> span = getSpanFromString(spanString, "word_"); int start = span.first - 1; int end = span.second - 1; Token startToken = text.getToken(start); Token endToken = text.getToken(end); List<Token> tokens = startToken.getSentence().subList(startToken.getPosition(), endToken.getPosition() + 1); List<Token> heads = getHeadsFromString(headString, tokens); Mention m = new Mention(Integer.toString(mentionId++), tokens, heads); m.setCategory(category); m.setType(Constants.Type.valueOf(type.toUpperCase())); startToken.getSentence().addMention(m); MentionChain mc = text.getMentionChain(id); if (mc == null) { mc = new MentionChain(id, m); startToken.getSentence().getText().addMentionChain(mc); } else { mc.add(m); } } } catch (Exception e) { log.log(Level.SEVERE, "Error reading " + fileName, e); } }