Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeXml

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeXml

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeXml.

Prototype

public static final String unescapeXml(final String input) 

Source Link

Document

Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Supports only the five basic XML entities (gt, lt, quot, amp, apos).

Usage

From source file:hoot.services.controllers.wps.MarkItemsReviewedWpsTest.java

@Override
protected String verifyWpsResponse(final String responseStr) throws Exception {
    final Document responseData = XmlDocumentBuilder.parse(responseStr, false);
    Assert.assertNotNull(responseData);//from w  ww. ja v  a  2s.c o  m
    System.out.println(XmlDocumentBuilder.toString(responseData));

    XPath xpath = XmlDocumentBuilder.createXPath();
    long changesetId = -1;
    try {
        Assert.assertEquals(processId, xpath.evaluate(".//Process/Identifier", responseData));
        NodeList returnedNodes = XPathAPI.selectNodeList(responseData, ".//ProcessOutputs/Output");
        Assert.assertEquals(3, returnedNodes.getLength());

        Assert.assertEquals("changesetUploadResponse",
                xpath.evaluate(".//ProcessOutputs/Output[1]/Identifier", responseData));
        Assert.assertEquals("string",
                xpath.evaluate(".//ProcessOutputs/Output[1]/Data/LiteralData/@dataType", responseData));
        final Document changesetResponse = XmlDocumentBuilder.parse(StringEscapeUtils
                .unescapeXml(xpath.evaluate(".//ProcessOutputs/Output[1]/Data/LiteralData", responseData)));
        Assert.assertNotNull(changesetResponse);
        //Changeset uploading is heavily tested in the OSM controller tests, so not testing that here;

        Assert.assertEquals("numItemsMarkedReviewed",
                xpath.evaluate(".//ProcessOutputs/Output[2]/Identifier", responseData));
        Assert.assertEquals("integer",
                xpath.evaluate(".//ProcessOutputs/Output[2]/Data/LiteralData/@dataType", responseData));
        Assert.assertEquals(5,
                Integer.parseInt(xpath.evaluate(".//ProcessOutputs/Output[2]/Data/LiteralData", responseData)));

        Assert.assertEquals("changesetId",
                xpath.evaluate(".//ProcessOutputs/Output[3]/Identifier", responseData));
        Assert.assertEquals("integer",
                xpath.evaluate(".//ProcessOutputs/Output[3]/Data/LiteralData/@dataType", responseData));
        changesetId = Integer
                .parseInt(xpath.evaluate(".//ProcessOutputs/Output[3]/Data/LiteralData", responseData));
        Assert.assertTrue(changesetId > -1);
    } catch (XPathExpressionException e) {
        Assert.fail("Error parsing response document: " + e.getMessage());
    }

    //verify the changeset data was written
    ReviewTestUtils.changesetId = changesetId;
    ReviewTestUtils.verifyDataMarkedAsReviewed(true);

    return null;
}

From source file:mServer.crawler.sender.MediathekBr.java

private void getTheman() {
    final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html";
    final String MUSTER_URL = "<a href=\"/mediathek/video/";
    final String MUSTER_URL_1 = "sendungen/";
    final String MUSTER_URL_2 = "video/";
    listeThemen.clear();//  w  w w . ja  v  a  2 s.  c o m
    MSStringBuilder seite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
    GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
    seite = getUrlIo.getUri(SENDERNAME, ADRESSE, StandardCharsets.UTF_8, 5 /* versuche */, seite, "");
    int pos1;
    int pos2;
    String url = "";
    if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) {
        while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) {
            if (Config.getStop()) {
                break;
            }

            try {
                pos1 += MUSTER_URL.length();
                if ((pos2 = seite.indexOf("\"", pos1)) != -1) {
                    url = seite.substring(pos1, pos2);
                }
                String thema = seite.extract("<span>", "<", pos1);
                thema = StringEscapeUtils.unescapeXml(thema.trim());
                thema = StringEscapeUtils.unescapeHtml4(thema.trim());
                if (!listeAlleThemen.contains(thema)) {
                    listeAlleThemen.add(thema);
                }
                if (url.isEmpty() || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) {
                    continue;
                }
                /// der BR ist etwas zu langsam dafr????
                //                    // in die Liste eintragen
                //                    String[] add;
                //                    if (MSearchConfig.senderAllesLaden) {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""};
                //                    } else {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""};
                //                    }
                // in die Liste eintragen
                String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema };
                listeThemen.addUrl(add);
            } catch (Exception ex) {
                Log.errorLog(821213698, ex);
            }
        }
    }
}

From source file:fr.mcc.ginco.audit.commands.TermCommandBuilder.java

/**
 * Builds the list of command lines for preferred terms added between two revisions
 *
 * @param previousTerms//from   w w  w.  j  a  va 2s .co m
 * @param currentTerms
 * @return
 */
public List<CommandLine> buildAddedPrefTermsLines(List<ThesaurusTerm> previousTerms,
        List<ThesaurusTerm> currentTerms) {
    List<CommandLine> termsOperations = new ArrayList<CommandLine>();

    Map<String, ThesaurusTerm> oldLexicalValues = mistralStructuresBuilder.getTermVersionsView(previousTerms);

    Map<String, List<ThesaurusTerm>> newNotPreferredTermsByTerm = mistralStructuresBuilder
            .getNotPreferredTermsByTerm(currentTerms);

    for (ThesaurusTerm currentTerm : currentTerms) {
        if (!oldLexicalValues.containsKey(currentTerm.getLexicalValue()) && currentTerm.getPrefered()) {
            CommandLine additionLine = new CommandLine();
            if (!newNotPreferredTermsByTerm.get(currentTerm.getLexicalValue()).isEmpty()) {
                additionLine.setValue(
                        CommandLine.STARS + StringEscapeUtils.unescapeXml(currentTerm.getLexicalValue()));
                termsOperations.add(additionLine);
            } else {
                Set<ThesaurusConcept> allParents = new HashSet<ThesaurusConcept>();
                for (ThesaurusTerm cuttentChildTerm : currentTerms) {
                    allParents.addAll(cuttentChildTerm.getConcept().getParentConcepts());
                }
                if (currentTerm.getConcept().getParentConcepts().isEmpty()
                        && !allParents.contains(currentTerm.getConcept())) {
                    additionLine.setValue(StringEscapeUtils.unescapeXml(currentTerm.getLexicalValue()));
                    termsOperations.add(additionLine);
                }
            }
        }
    }
    return termsOperations;
}

From source file:com.jaeksoft.searchlib.crawler.web.spider.HtmlArchiver.java

final private String downloadObject(URL parentUrl, String src, String contentType)
        throws ClientProtocolException, IllegalStateException, IOException, SearchLibException,
        URISyntaxException {//from   w ww.  j a  v  a2 s . co m
    RecursiveEntry recursiveEntry = recursiveSecurity.enter();
    if (recursiveEntry == null) {
        Logging.warn("Max recursion reached - " + recursiveSecurity + " src: " + src + " url: " + parentUrl);
        return src;
    }
    try {
        src = StringEscapeUtils.unescapeXml(src);
        URL objectURL = LinkUtils.getLink(parentUrl, src, null, false);
        if (objectURL == null)
            return src;
        if (objectURL.equals(pageUrl)) {
            return "index.html";
        }
        String urlString = objectURL.toExternalForm();
        String fileName = urlFileMap.get(urlString);
        if (fileName != null)
            return getLocalPath(parentUrl, fileName);
        DownloadItem downloadItem = null;
        try {
            downloadItem = downloader.get(objectURL.toURI(), null);
        } catch (IOException e) {
            Logging.warn("IO Exception on " + objectURL.toURI(), e);
            return src;
        }
        fileName = downloadItem.getFileName();
        if (fileName == null || fileName.length() == 0)
            return src;
        downloadItem.checkNoErrorRange(200, 300);
        String baseName = FilenameUtils.getBaseName(fileName);
        String extension = FilenameUtils.getExtension(fileName);
        if (contentType == null)
            contentType = downloadItem.getContentBaseType();
        if ("text/html".equalsIgnoreCase(contentType))
            extension = "html";
        else if ("text/javascript".equalsIgnoreCase(contentType))
            extension = "js";
        else if ("text/css".equalsIgnoreCase(contentType))
            extension = "css";
        else if ("application/x-shockwave-flash".equalsIgnoreCase(contentType))
            extension = "swf";
        else if ("image/png".equalsIgnoreCase(contentType))
            extension = "png";
        else if ("image/gif".equalsIgnoreCase(contentType))
            extension = "gif";
        else if ("image/jpeg".equalsIgnoreCase(contentType))
            extension = "jpg";
        else if ("image/jpg".equalsIgnoreCase(contentType))
            extension = "jpg";
        File destFile = getAndRegisterDestFile(urlString, baseName, extension);
        if ("css".equals(extension)) {
            String cssContent = downloadItem.getContentAsString();
            StringBuffer sb = checkCSSContent(objectURL, cssContent);
            if (sb != null && sb.length() > 0)
                cssContent = sb.toString();
            FileUtils.write(destFile, cssContent);
        } else
            downloadItem.writeToFile(destFile);

        return getLocalPath(parentUrl, destFile.getName());
    } catch (HttpHostConnectException e) {
        Logging.warn(e);
        return src;
    } catch (UnknownHostException e) {
        Logging.warn(e);
        return src;
    } catch (WrongStatusCodeException e) {
        Logging.warn(e);
        return src;
    } finally {
        recursiveEntry.release();
    }
}

From source file:fr.mcc.ginco.exports.skos.SKOSConceptExporter.java

/**
 * Export minimal concept information//www. j  av  a2s.  c o  m
 *
 * @param concept
 * @param parent
 * @param scheme
 * @param factory
 * @param vocab
 * @return
 */
private Model exportConceptInformation(ThesaurusConcept concept, Resource conceptResource, Model model,
        OntModel ontModel) {

    Resource inScheme = model.createResource(concept.getThesaurus().getIdentifier());
    model.add(conceptResource, SKOS.IN_SCHEME, inScheme);

    model.add(conceptResource, DCTerms.created, DateUtil.toISO8601String(concept.getCreated()));
    model.add(conceptResource, DCTerms.modified, DateUtil.toISO8601String(concept.getModified()));

    if (concept.getNotation() != null && !concept.getNotation().isEmpty()) {

        model.add(conceptResource, SKOS.NOTATION, StringEscapeUtils.unescapeXml(concept.getNotation()));

    }

    DatatypeProperty statusOnt = ontModel.createDatatypeProperty(ISOTHES.getURI() + "status");
    Literal l = ontModel.createLiteral("status");
    statusOnt.addLabel(l);

    model.add(conceptResource, ISOTHES.STATUS, concept.getStatus().toString());

    return model;
}

From source file:com.itude.mobile.mobbl.core.model.parser.MBXmlDocumentParser.java

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes)
        throws SAXException {

    MBElementContainer element = null;//from  w ww  . j a  va 2 s .c  o m
    boolean copyAttributes = true;

    // check that we have the correct document type
    if (_stack.size() == 0) {
        if (!localName.equals(_rootElementName)) {
            String message = "Error parsing document " + _definition.getName()
                    + ": encountered an element with name " + localName + " but expected " + _rootElementName;
            throw new MBInvalidDocumentException(message);
        }

        element = _rootElement;
        copyAttributes = _copyRootAttributes;
    } else if (isValidPath(getCurrentPath())) {
        _pathStack.add(localName);
        try {
            MBElementDefinition elemDef = _definition.getElementWithPath(getCurrentPath());
            element = new MBElement(elemDef);
            _stack.peek().addElement((MBElement) element);
        } catch (MBInvalidElementNameException e) {
            MBLog.w(MBConstants.APPLICATION_NAME, "Skipping element with name " + localName
                    + ". Element is not in definition " + _definition.getName());
            _ignoredPaths.add(getCurrentPath());
        }
    }
    // add name to pathStack if a child element has the same name as an element that's already on an ignored path
    else if (localName.equals(_pathStack.peek())) {
        _pathStack.add(localName);
        _ignoredPaths.add(getCurrentPath());
    }

    if (element != null) {
        // Do not process elements that are not defined; so also check for nil definition
        if (copyAttributes && element.getDefinition() != null) {

            for (int i = 0; i < attributes.getLength(); i++) {
                String unescapedXml = StringEscapeUtils.unescapeXml(attributes.getValue(i));
                ((MBElement) element).setAttributeValue(unescapedXml, attributes.getLocalName(i), false);
            }
        }
        _stack.add(element);
    }
}

From source file:com.nike.cerberus.auth.connector.onelogin.OneLoginAuthConnector.java

/**
 * Takes the list of ldapGroups received from OneLogin and parses them in to a set of Strings
 * @param ldapGroups A string consisting of ldap groups received from OneLogin
 * @return A set of Strings consisting of the ldap groups that were parsed from the provided string
 *//*  www  .j  ava2 s  . co m*/
protected Set<String> parseLdapGroups(final String ldapGroups) {
    Set<String> groups = new HashSet<>();
    if (ldapGroups == null) {
        return groups;
    }

    // One Login double xml escapes entries
    String escapedLdapGroups = StringEscapeUtils.unescapeXml(StringEscapeUtils.unescapeXml(ldapGroups));

    Iterable<String> canonicalNameIterable;
    Iterable<String> piecesIterable;
    Iterable<String> canonicalNames = Splitter.on(";").split(escapedLdapGroups);
    for (String canonicalName : canonicalNames) {
        canonicalNameIterable = Splitter.on(",").split(canonicalName);
        String[] pieces = Iterables.toArray(canonicalNameIterable, String.class);

        piecesIterable = Splitter.on("=").split(pieces[0]);
        String[] parts = Iterables.toArray(piecesIterable, String.class);
        if (parts.length >= 2) {
            groups.add(parts[1]);
        } else {
            throw ApiException.newBuilder().withApiErrors(DefaultApiError.SERVICE_UNAVAILABLE)
                    .withExceptionMessage("OneLogin user info member-of field is malformed!").build();
        }
    }

    return groups;
}

From source file:fr.mcc.ginco.audit.csv.JournalLineBuilder.java

private Set<String> getConceptLabel(Set<ThesaurusConcept> concepts) {
    Set<String> conceptLabels = new HashSet<String>();
    for (ThesaurusConcept concept : concepts) {
        String conceptId = concept.getIdentifier();
        String conceptLexicalValue = "";
        try {//from   w ww  .jav a 2  s . c om
            conceptLexicalValue = StringEscapeUtils
                    .unescapeXml(thesaurusConceptService.getConceptLabel(concept.getIdentifier()));
        } catch (BusinessException bex) {

        }
        conceptLabels.add(conceptLexicalValue + " (" + conceptId + ")");
    }
    return conceptLabels;
}

From source file:de.gesundkrank.wikipedia.hadoop.parser.Parser.java

private boolean readText(String line, BufferedReader in) throws IOException {
    if (!foundText) {
        line = matchTextBegin(line);/*  www .  j a va 2 s . c o m*/
        if (line != null) {
            StringBuilder text = new StringBuilder();
            do {
                Matcher textEMatcher = TEXT_END_PATTERN.matcher(line);
                if (textEMatcher.matches()) {
                    text.append(textEMatcher.group(1));
                    break;
                }
                line = StringEscapeUtils.unescapeXml(line);

                text.append(line).append(NEWLINE);
                line = in.readLine();
            } while (line != null);

            foundText = true;
            revision.setText(text.toString());
            return true;
        }
    }

    return false;
}

From source file:au.gov.ga.earthsci.discovery.csw.CSWDiscoveryResult.java

public CSWDiscoveryResult(CSWDiscovery discovery, int index, Element cswRecordElement)
        throws XPathExpressionException {
    super(discovery, index);

    XPath xpath = WWXML.makeXPath();

    String title = (String) xpath.compile("title/text()").evaluate(cswRecordElement, XPathConstants.STRING); //$NON-NLS-1$
    title = StringEscapeUtils.unescapeXml(title);

    String description = (String) xpath.compile("description/text()").evaluate(cswRecordElement, //$NON-NLS-1$
            XPathConstants.STRING);
    description = StringEscapeUtils.unescapeXml(description);

    //normalize newlines
    description = description.replace("\r\n", "\n").replace("\r", "\n"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$

    this.title = title;
    this.description = description;

    NodeList referenceElements = (NodeList) xpath.compile("references/reference").evaluate(cswRecordElement, //$NON-NLS-1$
            XPathConstants.NODESET);
    for (int i = 0; i < referenceElements.getLength(); i++) {
        Element referenceElement = (Element) referenceElements.item(i);
        String scheme = referenceElement.getAttribute("scheme"); //$NON-NLS-1$
        try {//from  w  ww. ja  v  a  2  s .c  o  m
            URL url = new URL(referenceElement.getTextContent());
            references.add(url);
            referenceSchemes.add(scheme);
        } catch (MalformedURLException e) {
        }
    }

    Sector bounds = null;
    String min = (String) xpath.compile("boundingBox/lowerCorner/text()").evaluate(cswRecordElement, //$NON-NLS-1$
            XPathConstants.STRING);
    String max = (String) xpath.compile("boundingBox/upperCorner/text()").evaluate(cswRecordElement, //$NON-NLS-1$
            XPathConstants.STRING);
    if (!Util.isBlank(min) && !Util.isBlank(max)) {
        min = StringEscapeUtils.unescapeXml(min);
        max = StringEscapeUtils.unescapeXml(max);
        String doubleGroup = "([-+]?(?:\\d*\\.?\\d+)|(?:\\d+\\.))"; //$NON-NLS-1$
        Pattern pattern = Pattern.compile("\\s*" + doubleGroup + "\\s+" + doubleGroup + "\\s*"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
        Matcher minMatcher = pattern.matcher(min);
        Matcher maxMatcher = pattern.matcher(max);
        if (minMatcher.matches() && maxMatcher.matches()) {
            double minLon = Double.parseDouble(minMatcher.group(1));
            double minLat = Double.parseDouble(minMatcher.group(2));
            double maxLon = Double.parseDouble(maxMatcher.group(1));
            double maxLat = Double.parseDouble(maxMatcher.group(2));
            bounds = Sector.fromDegrees(minLat, maxLat, minLon, maxLon);
        }
    }
    this.bounds = Bounds.fromSector(bounds);
}