List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeXml
public static final String unescapeXml(final String input)
Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
From source file:org.openaccessbutton.openaccessbutton.blog.RssParser.java
/** * Extract text from a tag//from w w w. j av a 2 s. c om */ private String readText(XmlPullParser parser) throws IOException, XmlPullParserException { String result = ""; if (parser.next() == XmlPullParser.TEXT) { result = parser.getText(); parser.nextTag(); } // Unicode characters are escaped in the XML, so we unescape those return StringEscapeUtils.unescapeXml(result); }
From source file:org.opendatakit.aggregate.form.PropertyMapSerializer.java
/** * Deserialize the XML representation for a key-value map. * * @param parameterDocument// ww w . j a v a2s.c o m * @return parameter map as a Map<String,String> key-value map. */ public static Map<String, String> deserializeRequestParameters(String parameterDocument) { Map<String, String> parameters = new HashMap<String, String>(); if (parameterDocument == null) return parameters; if (!parameterDocument.startsWith(K_XML_BEGIN_PARAMETERS)) { throw new IllegalArgumentException( "bad parameter list -- not beginning with " + K_XML_BEGIN_PARAMETERS); } int iNext = K_XML_BEGIN_PARAMETERS.length(); while (parameterDocument.regionMatches(iNext, K_XML_BEGIN_PARAMETER_BEGIN_KEY, 0, K_XML_BEGIN_PARAMETER_BEGIN_KEY.length())) { iNext += K_XML_BEGIN_PARAMETER_BEGIN_KEY.length(); int iEnd = parameterDocument.indexOf(K_XML_END_KEY_BEGIN_VALUE, iNext); if (iEnd == -1) { throw new IllegalArgumentException("bad parameter list -- end-key-begin-value not found"); } String key = StringEscapeUtils.unescapeXml(parameterDocument.substring(iNext, iEnd)); iNext = iEnd + K_XML_END_KEY_BEGIN_VALUE.length(); iEnd = parameterDocument.indexOf(K_XML_END_VALUE_END_PARAMETER, iNext); if (iEnd == -1) { throw new IllegalArgumentException("bad parameter list -- end-value-end-parameter not found"); } String value = StringEscapeUtils.unescapeXml(parameterDocument.substring(iNext, iEnd)); iNext = iEnd + K_XML_END_VALUE_END_PARAMETER.length(); parameters.put(key, value); } if (!parameterDocument.regionMatches(iNext, K_XML_END_PARAMETERS, 0, K_XML_END_PARAMETERS.length())) { throw new IllegalArgumentException("bad parameter list -- end-parameters not found"); } iNext += K_XML_END_PARAMETERS.length(); if (iNext != parameterDocument.length()) { throw new IllegalArgumentException("bad parameter list -- extra characters found"); } return parameters; }
From source file:org.tensin.sonos.model.Entry.java
/** * Gets the album art uri. * * @return the URI for the album art. */ public String getAlbumArtUri() { return StringEscapeUtils.unescapeXml(albumArtUri); }
From source file:org.tinymediamanager.core.Settings.java
/** * Gets the proxy password./*from w w w .j a v a2 s. c o m*/ * * @return the proxy password */ @XmlElement(name = PROXY_PASSWORD) @XmlJavaTypeAdapter(EncryptedStringXmlAdapter.class) public String getProxyPassword() { return StringEscapeUtils.unescapeXml(proxyPassword); }
From source file:pl.datamatica.traccar.api.GPXParser.java
public Result parse(InputStream inputStream, Device device) throws XMLStreamException, ParseException, IOException { Result result = new Result(); TimeZone tz = TimeZone.getTimeZone("UTC"); DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); DateFormat dateFormatWithMS = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); dateFormat.setTimeZone(tz);//from ww w. ja v a 2 s .com dateFormatWithMS.setTimeZone(tz); XMLStreamReader xsr = XMLInputFactory.newFactory().createXMLStreamReader(inputStream); ObjectMapper jsonMapper = new ObjectMapper(); result.positions = new LinkedList<>(); Position position = null; Stack<String> extensionsElements = new Stack<>(); boolean extensionsStarted = false; Map<String, Object> other = null; while (xsr.hasNext()) { xsr.next(); if (xsr.getEventType() == XMLStreamReader.START_ELEMENT) { if (xsr.getLocalName().equalsIgnoreCase("trkpt")) { position = new Position(); position.setLongitude(Double.parseDouble(xsr.getAttributeValue(null, "lon"))); position.setLatitude(Double.parseDouble(xsr.getAttributeValue(null, "lat"))); position.setValid(Boolean.TRUE); position.setDevice(device); } else if (xsr.getLocalName().equalsIgnoreCase("time")) { if (position != null) { String strTime = xsr.getElementText(); if (strTime.length() == 20) { position.setTime(dateFormat.parse(strTime)); } else { position.setTime(dateFormatWithMS.parse(strTime)); } } } else if (xsr.getLocalName().equalsIgnoreCase("ele") && position != null) { position.setAltitude(Double.parseDouble(xsr.getElementText())); } else if (xsr.getLocalName().equalsIgnoreCase("address") && position != null) { position.setAddress(StringEscapeUtils.unescapeXml(xsr.getElementText())); } else if (xsr.getLocalName().equalsIgnoreCase("protocol") && position != null) { position.setProtocol(xsr.getElementText()); } else if (xsr.getLocalName().equalsIgnoreCase("speed") && position != null) { position.setSpeed(Double.parseDouble(xsr.getElementText())); } else if (xsr.getLocalName().equalsIgnoreCase("power") && position != null) { position.setPower(Double.parseDouble(xsr.getElementText())); } else if (xsr.getLocalName().equalsIgnoreCase("course") && position != null) { position.setCourse(Double.parseDouble(xsr.getElementText())); } else if (xsr.getLocalName().equalsIgnoreCase("other") && position != null) { position.setOther(StringEscapeUtils.unescapeXml(xsr.getElementText())); } else if (xsr.getLocalName().equalsIgnoreCase("extensions")) { other = new LinkedHashMap<>(); extensionsStarted = true; } else if (position != null && extensionsStarted && other != null) { extensionsElements.push(xsr.getLocalName()); } } else if (xsr.getEventType() == XMLStreamReader.END_ELEMENT) { if (xsr.getLocalName().equalsIgnoreCase("trkpt")) { if (other == null) { other = new HashMap<>(); } if (position.getOther() != null) { if (position.getOther().startsWith("<")) { XMLStreamReader otherReader = XMLInputFactory.newFactory() .createXMLStreamReader(new StringReader(position.getOther())); while (otherReader.hasNext()) { if (otherReader.next() == XMLStreamReader.START_ELEMENT && !otherReader.getLocalName().equals("info")) { other.put(otherReader.getLocalName(), otherReader.getElementText()); } } } else { Map<String, Object> parsedOther = jsonMapper.readValue(position.getOther(), LinkedHashMap.class); other.putAll(parsedOther); } } if (other.containsKey("protocol") && position.getProtocol() == null) { position.setProtocol(other.get("protocol").toString()); } else if (!other.containsKey("protocol") && position.getProtocol() == null) { position.setProtocol("gpx_import"); } other.put("import_type", (result.positions.isEmpty() ? "import_start" : "import")); position.setOther(jsonMapper.writeValueAsString(other)); result.positions.add(position); if (result.latestPosition == null || result.latestPosition.getTime().compareTo(position.getTime()) < 0) { result.latestPosition = position; } position = null; other = null; } else if (xsr.getLocalName().equalsIgnoreCase("extensions")) { extensionsStarted = false; } else if (extensionsStarted) { extensionsElements.pop(); } } else if (extensionsStarted && other != null && xsr.getEventType() == XMLStreamReader.CHARACTERS && !xsr.getText().trim().isEmpty() && !extensionsElements.empty()) { String name = ""; for (int i = 0; i < extensionsElements.size(); i++) { name += (name.length() > 0 ? "-" : "") + extensionsElements.get(i); } other.put(name, xsr.getText()); } } if (result.positions.size() > 1) { Position last = ((LinkedList<Position>) result.positions).getLast(); Map<String, Object> parsedOther = jsonMapper.readValue(last.getOther(), LinkedHashMap.class); parsedOther.put("import_type", "import_end"); last.setOther(jsonMapper.writeValueAsString(parsedOther)); } return result; }
From source file:pt.ua.tm.neji.core.module.BaseReader.java
public String unescapeXML(String text) { StringEscapeUtils.unescapeXml(text); // New lines, are new ROIs // unescapedText = unescapedText.replaceAll("\n", "</roi>\n<roi>"); // unescapedText = Regex.replace(unescapedText, "\n", "</roi>\n<roi>"); return Regex.replace(text, roiRun); }
From source file:pt.ua.tm.neji.evaluation.whatizit.XML2A1Module.java
private void addTrigger(StringBuffer yytext, int start, String tag, String group, boolean add) { String annotationTag = yytext.substring(startTag, start + tag.length() + 3); String annotation = annotationTag.substring(annotationTag.indexOf(">") + 1, annotationTag.lastIndexOf("<")); annotation = StringEscapeUtils.unescapeXml(annotation); int startAnnotation = text.length(); int endAnnotation = text.length() + annotation.length(); text.append(annotation);//from w w w .ja va2 s.c o m if (add) { Concept concept = new Concept(startAnnotation, endAnnotation, group, annotation); concept.getIdentifiers().addAll(getIDs(annotationTag, group)); conceptList.add(concept); } previousEnd = start + tag.length() + 3; }
From source file:pt.ua.tm.neji.util.obo.OBO2TSV.java
private static String format(final String in) { String name = in;// w ww .j av a2 s. c o m name = name.replaceAll("_", ""); name = name.replaceAll("\\s+\\((Japanese|Spanish)\\)", ""); name = name.replaceAll("(&#[0-9]+)", "$1;"); name = StringEscapeUtils.unescapeXml(name); name = name.toLowerCase(); return name; }
From source file:pt.ua.tm.neji.util.XMLParsing.java
public static String solveXMLEscapingProblems(String text, DfaRun nbsRun, DfaRun ampRun) { // Remove no breaking white spaces text = Regex.replace(text, nbsRun);//w w w .j a va 2 s . c o m // text = text.replaceAll("\u2002", " "); // Solve MEDLINE bug of HTML codes escaped twice text = StringEscapeUtils.unescapeXml(text); // Solve MEDLINE bug that escapes HTML4 hex codes text = Regex.replace(text, ampRun); return text; }
From source file:pt.ua.tm.neji.util.XMLParsing.java
public static String solveXMLEscapingProblems(String text) { // Remove no breaking white spaces // text = text.replaceAll("\\u00A0", " "); text = Regex.replace(text, "\u00A0", " "); // text = Regex.replace(text, nbsRun); // Solve MEDLINE bug that puts an XML tag with various lines // text = text.replaceAll("\n\\s+", " "); // text = text.replaceAll("\n", " "); // Solve MEDLINE bug of HTML codes escaped twice text = StringEscapeUtils.unescapeXml(text); // Solve MEDLINE bug that escapes HTML4 hex codes // text = text.replaceAll("&#", "&#"); text = Regex.replace(text, "&#", "&#"); // text = Regex.replace(text, ampRun); return text;//from www . j av a 2 s . c o m }