List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeXml
public static final String unescapeXml(final String input)
Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
From source file:hoot.services.controllers.wps.MarkItemsReviewedWpsTest.java
@Override protected String verifyWpsResponse(final String responseStr) throws Exception { final Document responseData = XmlDocumentBuilder.parse(responseStr, false); Assert.assertNotNull(responseData);//from w ww. ja v a 2s.c o m System.out.println(XmlDocumentBuilder.toString(responseData)); XPath xpath = XmlDocumentBuilder.createXPath(); long changesetId = -1; try { Assert.assertEquals(processId, xpath.evaluate(".//Process/Identifier", responseData)); NodeList returnedNodes = XPathAPI.selectNodeList(responseData, ".//ProcessOutputs/Output"); Assert.assertEquals(3, returnedNodes.getLength()); Assert.assertEquals("changesetUploadResponse", xpath.evaluate(".//ProcessOutputs/Output[1]/Identifier", responseData)); Assert.assertEquals("string", xpath.evaluate(".//ProcessOutputs/Output[1]/Data/LiteralData/@dataType", responseData)); final Document changesetResponse = XmlDocumentBuilder.parse(StringEscapeUtils .unescapeXml(xpath.evaluate(".//ProcessOutputs/Output[1]/Data/LiteralData", responseData))); Assert.assertNotNull(changesetResponse); //Changeset uploading is heavily tested in the OSM controller tests, so not testing that here; Assert.assertEquals("numItemsMarkedReviewed", xpath.evaluate(".//ProcessOutputs/Output[2]/Identifier", responseData)); Assert.assertEquals("integer", xpath.evaluate(".//ProcessOutputs/Output[2]/Data/LiteralData/@dataType", responseData)); Assert.assertEquals(5, Integer.parseInt(xpath.evaluate(".//ProcessOutputs/Output[2]/Data/LiteralData", responseData))); Assert.assertEquals("changesetId", xpath.evaluate(".//ProcessOutputs/Output[3]/Identifier", responseData)); Assert.assertEquals("integer", xpath.evaluate(".//ProcessOutputs/Output[3]/Data/LiteralData/@dataType", responseData)); changesetId = Integer .parseInt(xpath.evaluate(".//ProcessOutputs/Output[3]/Data/LiteralData", responseData)); Assert.assertTrue(changesetId > -1); } catch (XPathExpressionException e) { Assert.fail("Error parsing response document: " + e.getMessage()); } //verify the changeset data was written ReviewTestUtils.changesetId = changesetId; ReviewTestUtils.verifyDataMarkedAsReviewed(true); return null; }
From source file:mServer.crawler.sender.MediathekBr.java
private void getTheman() { final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html"; final String MUSTER_URL = "<a href=\"/mediathek/video/"; final String MUSTER_URL_1 = "sendungen/"; final String MUSTER_URL_2 = "video/"; listeThemen.clear();// w w w . ja v a 2 s. c o m MSStringBuilder seite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER); GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden()); seite = getUrlIo.getUri(SENDERNAME, ADRESSE, StandardCharsets.UTF_8, 5 /* versuche */, seite, ""); int pos1; int pos2; String url = ""; if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) { while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) { if (Config.getStop()) { break; } try { pos1 += MUSTER_URL.length(); if ((pos2 = seite.indexOf("\"", pos1)) != -1) { url = seite.substring(pos1, pos2); } String thema = seite.extract("<span>", "<", pos1); thema = StringEscapeUtils.unescapeXml(thema.trim()); thema = StringEscapeUtils.unescapeHtml4(thema.trim()); if (!listeAlleThemen.contains(thema)) { listeAlleThemen.add(thema); } if (url.isEmpty() || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) { continue; } /// der BR ist etwas zu langsam dafr???? // // in die Liste eintragen // String[] add; // if (MSearchConfig.senderAllesLaden) { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""}; // } else { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""}; // } // in die Liste eintragen String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema }; listeThemen.addUrl(add); } catch (Exception ex) { Log.errorLog(821213698, ex); } } } }
From source file:fr.mcc.ginco.audit.commands.TermCommandBuilder.java
/** * Builds the list of command lines for preferred terms added between two revisions * * @param previousTerms//from w w w. j a va 2s .co m * @param currentTerms * @return */ public List<CommandLine> buildAddedPrefTermsLines(List<ThesaurusTerm> previousTerms, List<ThesaurusTerm> currentTerms) { List<CommandLine> termsOperations = new ArrayList<CommandLine>(); Map<String, ThesaurusTerm> oldLexicalValues = mistralStructuresBuilder.getTermVersionsView(previousTerms); Map<String, List<ThesaurusTerm>> newNotPreferredTermsByTerm = mistralStructuresBuilder .getNotPreferredTermsByTerm(currentTerms); for (ThesaurusTerm currentTerm : currentTerms) { if (!oldLexicalValues.containsKey(currentTerm.getLexicalValue()) && currentTerm.getPrefered()) { CommandLine additionLine = new CommandLine(); if (!newNotPreferredTermsByTerm.get(currentTerm.getLexicalValue()).isEmpty()) { additionLine.setValue( CommandLine.STARS + StringEscapeUtils.unescapeXml(currentTerm.getLexicalValue())); termsOperations.add(additionLine); } else { Set<ThesaurusConcept> allParents = new HashSet<ThesaurusConcept>(); for (ThesaurusTerm cuttentChildTerm : currentTerms) { allParents.addAll(cuttentChildTerm.getConcept().getParentConcepts()); } if (currentTerm.getConcept().getParentConcepts().isEmpty() && !allParents.contains(currentTerm.getConcept())) { additionLine.setValue(StringEscapeUtils.unescapeXml(currentTerm.getLexicalValue())); termsOperations.add(additionLine); } } } } return termsOperations; }
From source file:com.jaeksoft.searchlib.crawler.web.spider.HtmlArchiver.java
final private String downloadObject(URL parentUrl, String src, String contentType) throws ClientProtocolException, IllegalStateException, IOException, SearchLibException, URISyntaxException {//from w ww. j a v a2 s . co m RecursiveEntry recursiveEntry = recursiveSecurity.enter(); if (recursiveEntry == null) { Logging.warn("Max recursion reached - " + recursiveSecurity + " src: " + src + " url: " + parentUrl); return src; } try { src = StringEscapeUtils.unescapeXml(src); URL objectURL = LinkUtils.getLink(parentUrl, src, null, false); if (objectURL == null) return src; if (objectURL.equals(pageUrl)) { return "index.html"; } String urlString = objectURL.toExternalForm(); String fileName = urlFileMap.get(urlString); if (fileName != null) return getLocalPath(parentUrl, fileName); DownloadItem downloadItem = null; try { downloadItem = downloader.get(objectURL.toURI(), null); } catch (IOException e) { Logging.warn("IO Exception on " + objectURL.toURI(), e); return src; } fileName = downloadItem.getFileName(); if (fileName == null || fileName.length() == 0) return src; downloadItem.checkNoErrorRange(200, 300); String baseName = FilenameUtils.getBaseName(fileName); String extension = FilenameUtils.getExtension(fileName); if (contentType == null) contentType = downloadItem.getContentBaseType(); if ("text/html".equalsIgnoreCase(contentType)) extension = "html"; else if ("text/javascript".equalsIgnoreCase(contentType)) extension = "js"; else if ("text/css".equalsIgnoreCase(contentType)) extension = "css"; else if ("application/x-shockwave-flash".equalsIgnoreCase(contentType)) extension = "swf"; else if ("image/png".equalsIgnoreCase(contentType)) extension = "png"; else if ("image/gif".equalsIgnoreCase(contentType)) extension = "gif"; else if ("image/jpeg".equalsIgnoreCase(contentType)) extension = "jpg"; else if ("image/jpg".equalsIgnoreCase(contentType)) extension = "jpg"; File destFile = getAndRegisterDestFile(urlString, baseName, extension); if ("css".equals(extension)) { String cssContent = downloadItem.getContentAsString(); StringBuffer sb = checkCSSContent(objectURL, cssContent); if (sb != null && sb.length() > 0) cssContent = sb.toString(); FileUtils.write(destFile, cssContent); } else downloadItem.writeToFile(destFile); return getLocalPath(parentUrl, destFile.getName()); } catch (HttpHostConnectException e) { Logging.warn(e); return src; } catch (UnknownHostException e) { Logging.warn(e); return src; } catch (WrongStatusCodeException e) { Logging.warn(e); return src; } finally { recursiveEntry.release(); } }
From source file:fr.mcc.ginco.exports.skos.SKOSConceptExporter.java
/** * Export minimal concept information//www. j av a2s. c o m * * @param concept * @param parent * @param scheme * @param factory * @param vocab * @return */ private Model exportConceptInformation(ThesaurusConcept concept, Resource conceptResource, Model model, OntModel ontModel) { Resource inScheme = model.createResource(concept.getThesaurus().getIdentifier()); model.add(conceptResource, SKOS.IN_SCHEME, inScheme); model.add(conceptResource, DCTerms.created, DateUtil.toISO8601String(concept.getCreated())); model.add(conceptResource, DCTerms.modified, DateUtil.toISO8601String(concept.getModified())); if (concept.getNotation() != null && !concept.getNotation().isEmpty()) { model.add(conceptResource, SKOS.NOTATION, StringEscapeUtils.unescapeXml(concept.getNotation())); } DatatypeProperty statusOnt = ontModel.createDatatypeProperty(ISOTHES.getURI() + "status"); Literal l = ontModel.createLiteral("status"); statusOnt.addLabel(l); model.add(conceptResource, ISOTHES.STATUS, concept.getStatus().toString()); return model; }
From source file:com.itude.mobile.mobbl.core.model.parser.MBXmlDocumentParser.java
@Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { MBElementContainer element = null;//from w ww . j a va 2 s .c o m boolean copyAttributes = true; // check that we have the correct document type if (_stack.size() == 0) { if (!localName.equals(_rootElementName)) { String message = "Error parsing document " + _definition.getName() + ": encountered an element with name " + localName + " but expected " + _rootElementName; throw new MBInvalidDocumentException(message); } element = _rootElement; copyAttributes = _copyRootAttributes; } else if (isValidPath(getCurrentPath())) { _pathStack.add(localName); try { MBElementDefinition elemDef = _definition.getElementWithPath(getCurrentPath()); element = new MBElement(elemDef); _stack.peek().addElement((MBElement) element); } catch (MBInvalidElementNameException e) { MBLog.w(MBConstants.APPLICATION_NAME, "Skipping element with name " + localName + ". Element is not in definition " + _definition.getName()); _ignoredPaths.add(getCurrentPath()); } } // add name to pathStack if a child element has the same name as an element that's already on an ignored path else if (localName.equals(_pathStack.peek())) { _pathStack.add(localName); _ignoredPaths.add(getCurrentPath()); } if (element != null) { // Do not process elements that are not defined; so also check for nil definition if (copyAttributes && element.getDefinition() != null) { for (int i = 0; i < attributes.getLength(); i++) { String unescapedXml = StringEscapeUtils.unescapeXml(attributes.getValue(i)); ((MBElement) element).setAttributeValue(unescapedXml, attributes.getLocalName(i), false); } } _stack.add(element); } }
From source file:com.nike.cerberus.auth.connector.onelogin.OneLoginAuthConnector.java
/** * Takes the list of ldapGroups received from OneLogin and parses them in to a set of Strings * @param ldapGroups A string consisting of ldap groups received from OneLogin * @return A set of Strings consisting of the ldap groups that were parsed from the provided string *//* www .j ava2 s . co m*/ protected Set<String> parseLdapGroups(final String ldapGroups) { Set<String> groups = new HashSet<>(); if (ldapGroups == null) { return groups; } // One Login double xml escapes entries String escapedLdapGroups = StringEscapeUtils.unescapeXml(StringEscapeUtils.unescapeXml(ldapGroups)); Iterable<String> canonicalNameIterable; Iterable<String> piecesIterable; Iterable<String> canonicalNames = Splitter.on(";").split(escapedLdapGroups); for (String canonicalName : canonicalNames) { canonicalNameIterable = Splitter.on(",").split(canonicalName); String[] pieces = Iterables.toArray(canonicalNameIterable, String.class); piecesIterable = Splitter.on("=").split(pieces[0]); String[] parts = Iterables.toArray(piecesIterable, String.class); if (parts.length >= 2) { groups.add(parts[1]); } else { throw ApiException.newBuilder().withApiErrors(DefaultApiError.SERVICE_UNAVAILABLE) .withExceptionMessage("OneLogin user info member-of field is malformed!").build(); } } return groups; }
From source file:fr.mcc.ginco.audit.csv.JournalLineBuilder.java
private Set<String> getConceptLabel(Set<ThesaurusConcept> concepts) { Set<String> conceptLabels = new HashSet<String>(); for (ThesaurusConcept concept : concepts) { String conceptId = concept.getIdentifier(); String conceptLexicalValue = ""; try {//from w ww .jav a 2 s . c om conceptLexicalValue = StringEscapeUtils .unescapeXml(thesaurusConceptService.getConceptLabel(concept.getIdentifier())); } catch (BusinessException bex) { } conceptLabels.add(conceptLexicalValue + " (" + conceptId + ")"); } return conceptLabels; }
From source file:de.gesundkrank.wikipedia.hadoop.parser.Parser.java
private boolean readText(String line, BufferedReader in) throws IOException { if (!foundText) { line = matchTextBegin(line);/* www . j a va 2 s . c o m*/ if (line != null) { StringBuilder text = new StringBuilder(); do { Matcher textEMatcher = TEXT_END_PATTERN.matcher(line); if (textEMatcher.matches()) { text.append(textEMatcher.group(1)); break; } line = StringEscapeUtils.unescapeXml(line); text.append(line).append(NEWLINE); line = in.readLine(); } while (line != null); foundText = true; revision.setText(text.toString()); return true; } } return false; }
From source file:au.gov.ga.earthsci.discovery.csw.CSWDiscoveryResult.java
public CSWDiscoveryResult(CSWDiscovery discovery, int index, Element cswRecordElement) throws XPathExpressionException { super(discovery, index); XPath xpath = WWXML.makeXPath(); String title = (String) xpath.compile("title/text()").evaluate(cswRecordElement, XPathConstants.STRING); //$NON-NLS-1$ title = StringEscapeUtils.unescapeXml(title); String description = (String) xpath.compile("description/text()").evaluate(cswRecordElement, //$NON-NLS-1$ XPathConstants.STRING); description = StringEscapeUtils.unescapeXml(description); //normalize newlines description = description.replace("\r\n", "\n").replace("\r", "\n"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ this.title = title; this.description = description; NodeList referenceElements = (NodeList) xpath.compile("references/reference").evaluate(cswRecordElement, //$NON-NLS-1$ XPathConstants.NODESET); for (int i = 0; i < referenceElements.getLength(); i++) { Element referenceElement = (Element) referenceElements.item(i); String scheme = referenceElement.getAttribute("scheme"); //$NON-NLS-1$ try {//from w ww. ja v a 2 s .c o m URL url = new URL(referenceElement.getTextContent()); references.add(url); referenceSchemes.add(scheme); } catch (MalformedURLException e) { } } Sector bounds = null; String min = (String) xpath.compile("boundingBox/lowerCorner/text()").evaluate(cswRecordElement, //$NON-NLS-1$ XPathConstants.STRING); String max = (String) xpath.compile("boundingBox/upperCorner/text()").evaluate(cswRecordElement, //$NON-NLS-1$ XPathConstants.STRING); if (!Util.isBlank(min) && !Util.isBlank(max)) { min = StringEscapeUtils.unescapeXml(min); max = StringEscapeUtils.unescapeXml(max); String doubleGroup = "([-+]?(?:\\d*\\.?\\d+)|(?:\\d+\\.))"; //$NON-NLS-1$ Pattern pattern = Pattern.compile("\\s*" + doubleGroup + "\\s+" + doubleGroup + "\\s*"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ Matcher minMatcher = pattern.matcher(min); Matcher maxMatcher = pattern.matcher(max); if (minMatcher.matches() && maxMatcher.matches()) { double minLon = Double.parseDouble(minMatcher.group(1)); double minLat = Double.parseDouble(minMatcher.group(2)); double maxLon = Double.parseDouble(maxMatcher.group(1)); double maxLat = Double.parseDouble(maxMatcher.group(2)); bounds = Sector.fromDegrees(minLat, maxLat, minLon, maxLon); } } this.bounds = Bounds.fromSector(bounds); }