Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeXml

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeXml

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeXml.

Prototype

public static final String unescapeXml(final String input) 

Source Link

Document

Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Supports only the five basic XML entities (gt, lt, quot, amp, apos).

Usage

From source file:edu.illinois.cs.cogcomp.wikifier.wiki.importing.WikipediaRedirectExtractor.java

private String cleanupTitle(String title) {
    int end = title.indexOf("</title>");
    String titleString = end != -1 ? title.substring(titlePattern.length(), end) : title;
    titleString = StringEscapeUtils.unescapeXml(titleString).replace(' ', '_');
    return titleString;
}

From source file:msearch.filmeSuchen.sender.MediathekKika.java

void addToListNormal() {
    //<strong style="margin-left:10px;">Sesamstrae prsentiert: Eine Mhre fr Zwei</strong><br />
    //<a style="margin-left:20px;" href="?programm=168&amp;id=14487&amp;ag=5" title="Sendung vom 10.10.2012" class="overlay_link">42. Ordnung ist das halbe Chaos</a><br />
    //<a style="margin-left:20px;" href="?programm=168&amp;id=14485&amp;ag=5" title="Sendung vom 10.10.2012" class="overlay_link">41. ber den Wolken</a><br />
    final String ADRESSE = "http://kikaplus.net/clients/kika/kikaplus/";
    final String MUSTER_URL = "<a style=\"margin-left:20px;\" href=\"";
    final String MUSTER_THEMA = "<strong style=\"margin-left:10px;\">";
    final String MUSTER_DATUM = "title=\"Sendung vom ";
    listeThemen.clear();//from www.  ja v a 2  s. com
    seite = new MSStringBuilder(MSConst.STRING_BUFFER_START_BUFFER);
    seite = getUrlIo.getUri(SENDERNAME, ADRESSE, MSConst.KODIERUNG_UTF, 3, seite, "KiKA: Startseite");
    int pos = 0;
    int pos1, pos2, stop, pDatum1, pDatum2, pTitel1, pTitel2;
    String url, thema, datum, titel;
    while ((pos = seite.indexOf(MUSTER_THEMA, pos)) != -1) {
        try {
            thema = "";
            pos += MUSTER_THEMA.length();
            stop = seite.indexOf(MUSTER_THEMA, pos);
            pos1 = pos;
            if ((pos2 = seite.indexOf("<", pos1)) != -1) {
                thema = seite.substring(pos1, pos2);
            }
            while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) {
                titel = "";
                datum = "";
                if (stop != -1 && pos1 > stop) {
                    // dann schon das nchste Thema
                    break;
                }
                pos1 += MUSTER_URL.length();
                if ((pos2 = seite.indexOf("\"", pos1)) != -1) {
                    url = seite.substring(pos1, pos2);
                    //if (!url.equals("")) {
                    url = StringEscapeUtils.unescapeXml(url);
                    if (!url.equals("") && !url.startsWith("http://") && !url.startsWith("/")) {
                        // Datum
                        if ((pDatum1 = seite.indexOf(MUSTER_DATUM, pos2)) != -1) {
                            pDatum1 += MUSTER_DATUM.length();
                            if ((pDatum2 = seite.indexOf("\"", pDatum1)) != -1) {
                                if (stop != -1 && pDatum1 < stop && pDatum2 < stop) {
                                    // dann schon das nchste Thema
                                    datum = seite.substring(pDatum1, pDatum2);
                                }
                            }
                        }

                        // Titel
                        if ((pTitel1 = seite.indexOf(">", pos2)) != -1) {
                            pTitel1 += 1;
                            if ((pTitel2 = seite.indexOf("<", pTitel1)) != -1) {
                                //if (stop != -1 && pTitel1 > stop && pTitel2 > stop) {
                                if (stop != -1 && pTitel1 < stop && pTitel2 < stop) {
                                    titel = seite.substring(pTitel1, pTitel2);
                                }
                            }
                        }
                        // in die Liste eintragen
                        String[] add = new String[] { ADRESSE + url, thema, titel, datum };
                        listeThemen.addUrl(add);
                    }
                }
            }
        } catch (Exception ex) {
            MSLog.fehlerMeldung(-302025469, MSLog.FEHLER_ART_MREADER, "MediathekKiKA.addToList", ex, "");
        }
    }
}

From source file:fr.mcc.ginco.audit.csv.JournalLineBuilder.java

/**
 * Builds the list of revision lines for the event of term lexical value
 * change/*  ww w. j a v  a2  s  .c om*/
 *
 * @param term
 * @param revision
 * @param oldLexicalValue
 * @return
 */
public JournalLine buildTermLexicalValueChangedLine(ThesaurusTerm term, GincoRevEntity revision,
        String oldLexicalValue) {
    JournalLine journal = buildLineBase(JournalEventsEnum.THESAURUSTERM_LEXICAL_VALUE_UPDATE, revision);
    journal.setTermId(term.getIdentifier());
    if (term.getConcept() != null) {
        journal.setConceptId(term.getConcept().getIdentifier());
    }
    journal.setNewLexicalValue(StringEscapeUtils.unescapeXml(term.getLexicalValue()));
    journal.setOldLexicalValue(StringEscapeUtils.unescapeXml(oldLexicalValue));

    return journal;
}

From source file:de.fatalix.book.importer.CalibriImporter.java

private static BookEntry parseOPF(Path pathToOPF, BookEntry bmd) throws IOException {
    List<String> lines = Files.readAllLines(pathToOPF, Charset.forName("UTF-8"));
    boolean multiLineDescription = false;
    String description = "";
    for (String line : lines) {
        if (multiLineDescription) {
            multiLineDescription = false;
            if (line.split("<").length == 1) {
                multiLineDescription = true;
                description = description + line;
            } else {
                description = description + line.split("<")[0];
                description = StringEscapeUtils.unescapeXml(description);
                bmd.setDescription(description);
            }//from  w  ww  .j a v a 2s  .c o m
        } else {
            if (line.contains("dc:title")) {
                String title = line.split(">")[1].split("<")[0];
                bmd.setTitle(title);
            } else if (line.contains("dc:creator")) {
                String creator = line.split(">")[1].split("<")[0];
                bmd.setAuthor(creator);
            } else if (line.contains("dc:description")) {
                String value = line.split(">")[1];
                if (value.split("<").length == 1) {
                    multiLineDescription = true;
                    description = value;
                } else {
                    value = value.split("<")[0];
                    value = StringEscapeUtils.unescapeXml(value);
                    bmd.setDescription(value);
                }
            } else if (line.contains("dc:publisher")) {
                String value = line.split(">")[1].split("<")[0];
                bmd.setPublisher(value);
            } else if (line.contains("dc:date")) {
                String value = line.split(">")[1].split("<")[0];
                DateTime dtReleaseDate = new DateTime(value);
                if (dtReleaseDate.getYear() != 101) {
                    bmd.setReleaseDate(dtReleaseDate.toDate());
                }
            } else if (line.contains("dc:language")) {
                String value = line.split(">")[1].split("<")[0];
                bmd.setLanguage(value);
            } else if (line.contains("opf:scheme=\"ISBN\"")) {
                String value = line.split(">")[1].split("<")[0];
                bmd.setIsbn(value);

            }
        }
    }
    return bmd;
}

From source file:net.java.sip.communicator.impl.protocol.jabber.InfoRetreiver.java

/**
 * Retrieve details and return them or if missing return an empty list.
 * @param contactAddress the address to search for.
 * @return the details or empty list.// w w w .  j a  va 2  s .  c  o m
 */
protected List<GenericDetail> retrieveDetails(String contactAddress) {
    List<GenericDetail> result = new LinkedList<GenericDetail>();
    try {
        XMPPConnection connection = jabberProvider.getConnection();

        if (connection == null || !connection.isAuthenticated())
            return null;

        VCard card = new VCard();

        // if there is no value or is equals to the default one
        // load vcard using smack load method
        if (vcardTimeoutReply == -1 || vcardTimeoutReply == SmackConfiguration.getPacketReplyTimeout())
            card.load(connection, contactAddress);
        else
            load(card, connection, contactAddress, vcardTimeoutReply);

        String tmp;

        tmp = checkForFullName(card);
        if (tmp != null)
            result.add(new DisplayNameDetail(StringEscapeUtils.unescapeXml(tmp)));

        tmp = card.getFirstName();
        if (tmp != null)
            result.add(new FirstNameDetail(StringEscapeUtils.unescapeXml(tmp)));

        tmp = card.getMiddleName();
        if (tmp != null)
            result.add(new MiddleNameDetail(StringEscapeUtils.unescapeXml(tmp)));

        tmp = card.getLastName();
        if (tmp != null)
            result.add(new LastNameDetail(StringEscapeUtils.unescapeXml(tmp)));

        tmp = card.getNickName();
        if (tmp != null)
            result.add(new NicknameDetail(StringEscapeUtils.unescapeXml(tmp)));

        tmp = card.getField("BDAY");
        if (tmp != null) {
            try {
                Calendar birthDateCalendar = Calendar.getInstance();
                DateFormat dateFormat = new SimpleDateFormat(
                        JabberActivator.getResources().getI18NString("plugin.accountinfo.BDAY_FORMAT"));
                Date birthDate = dateFormat.parse(tmp);
                birthDateCalendar.setTime(birthDate);
                BirthDateDetail bd = new BirthDateDetail(birthDateCalendar);
                result.add(bd);
            } catch (ParseException e) {
            }
        }
        // Home Details
        // addrField one of
        // POSTAL, PARCEL, (DOM | INTL), PREF, POBOX, EXTADR, STREET,
        // LOCALITY, REGION, PCODE, CTRY
        tmp = card.getAddressFieldHome("STREET");
        if (tmp != null)
            result.add(new AddressDetail(tmp));

        tmp = card.getAddressFieldHome("LOCALITY");
        if (tmp != null)
            result.add(new CityDetail(tmp));

        tmp = card.getAddressFieldHome("REGION");
        if (tmp != null)
            result.add(new ProvinceDetail(tmp));

        tmp = card.getAddressFieldHome("PCODE");
        if (tmp != null)
            result.add(new PostalCodeDetail(tmp));

        tmp = card.getAddressFieldHome("CTRY");
        if (tmp != null)
            result.add(new CountryDetail(tmp));

        // phoneType one of
        //VOICE, FAX, PAGER, MSG, CELL, VIDEO, BBS, MODEM, ISDN, PCS, PREF

        tmp = card.getPhoneHome("VOICE");
        if (tmp != null)
            result.add(new PhoneNumberDetail(tmp));

        tmp = card.getPhoneHome("VIDEO");
        if (tmp != null)
            result.add(new VideoDetail(tmp));

        tmp = card.getPhoneHome("FAX");
        if (tmp != null)
            result.add(new FaxDetail(tmp));

        tmp = card.getPhoneHome("PAGER");
        if (tmp != null)
            result.add(new PagerDetail(tmp));

        tmp = card.getPhoneHome("CELL");
        if (tmp != null)
            result.add(new MobilePhoneDetail(tmp));

        tmp = card.getPhoneHome("TEXT");
        if (tmp != null)
            result.add(new MobilePhoneDetail(tmp));

        tmp = card.getEmailHome();
        if (tmp != null)
            result.add(new EmailAddressDetail(tmp));

        // Work Details
        // addrField one of
        // POSTAL, PARCEL, (DOM | INTL), PREF, POBOX, EXTADR, STREET,
        // LOCALITY, REGION, PCODE, CTRY
        tmp = card.getAddressFieldWork("STREET");
        if (tmp != null)
            result.add(new WorkAddressDetail(tmp));

        tmp = card.getAddressFieldWork("LOCALITY");
        if (tmp != null)
            result.add(new WorkCityDetail(tmp));

        tmp = card.getAddressFieldWork("REGION");
        if (tmp != null)
            result.add(new WorkProvinceDetail(tmp));

        tmp = card.getAddressFieldWork("PCODE");
        if (tmp != null)
            result.add(new WorkPostalCodeDetail(tmp));

        //                tmp = card.getAddressFieldWork("CTRY");
        //                if(tmp != null)
        //                    result.add(new WorkCountryDetail(tmp);

        // phoneType one of
        //VOICE, FAX, PAGER, MSG, CELL, VIDEO, BBS, MODEM, ISDN, PCS, PREF

        tmp = card.getPhoneWork("VOICE");
        if (tmp != null)
            result.add(new WorkPhoneDetail(tmp));

        tmp = card.getPhoneWork("VIDEO");
        if (tmp != null)
            result.add(new WorkVideoDetail(tmp));

        tmp = card.getPhoneWork("FAX");
        if (tmp != null)
            result.add(new WorkFaxDetail(tmp));

        tmp = card.getPhoneWork("PAGER");
        if (tmp != null)
            result.add(new WorkPagerDetail(tmp));

        tmp = card.getPhoneWork("CELL");
        if (tmp != null)
            result.add(new WorkMobilePhoneDetail(tmp));

        tmp = card.getPhoneWork("TEXT");
        if (tmp != null)
            result.add(new WorkMobilePhoneDetail(tmp));

        tmp = card.getEmailWork();
        if (tmp != null)
            result.add(new WorkEmailAddressDetail(tmp));

        tmp = card.getOrganization();
        if (tmp != null)
            result.add(new WorkOrganizationNameDetail(tmp));

        tmp = card.getOrganizationUnit();
        if (tmp != null)
            result.add(new WorkDepartmentNameDetail(tmp));

        tmp = card.getField("TITLE");
        if (tmp != null)
            result.add(new JobTitleDetail(tmp));

        tmp = card.getField("ABOUTME");
        if (tmp != null)
            result.add(new AboutMeDetail(tmp));

        byte[] imageBytes = card.getAvatar();
        if (imageBytes != null && imageBytes.length > 0) {
            result.add(new ImageDetail("Image", imageBytes));
        }

        try {
            tmp = card.getField("URL");
            if (tmp != null)
                result.add(new URLDetail("URL", new URL(tmp)));
        } catch (MalformedURLException e) {
        }
    } catch (Throwable exc) {
        String msg = "Cannot load details for contact " + contactAddress + " : " + exc.getMessage();
        if (logger.isTraceEnabled())
            logger.error(msg, exc);
        else
            logger.error(msg);
    }

    retreivedDetails.put(contactAddress, result);

    return result;
}

From source file:com.sencha.gxt.core.rebind.XTemplateParser.java

public TemplateModel parse(String template) throws UnableToCompleteException {
    // look for parameters or tags (Consider combining into one pattern)
    TemplateModel model = new TemplateModel();
    Stack<ContainerTemplateChunk> stack = new Stack<ContainerTemplateChunk>();
    stack.push(model);//from  www  . ja  v  a  2  s  . c o m
    Matcher m = NON_LITERAL_PATTERN.matcher(template);
    int lastMatchEnd = 0;
    while (m.find()) {
        // range of the current non-literal
        int begin = m.start(), end = m.end();
        String currentMatch = template.substring(begin, end);

        // if there was content since the last non-literal chunk, track it
        if (lastMatchEnd < begin) {
            ContentChunk c = literal(template.substring(lastMatchEnd, begin));
            stack.peek().children.add(c);
            log(c);
        }

        // move the last match pointer
        lastMatchEnd = end;

        // tpl tag starting
        Matcher tagOpenMatch = TAG_PATTERN.matcher(currentMatch);
        if (tagOpenMatch.matches()) {
            ControlChunk c = new ControlChunk();
            c.controls = new HashMap<String, String>();
            String attrs = tagOpenMatch.group(1).trim();
            Matcher attrMatcher = ATTR_PATTERN.matcher(attrs);
            while (attrMatcher.find()) {
                // should be if or for
                String key = attrMatcher.group(1);
                // must be html-decoded
                String encodedValue = attrMatcher.group(2) == null ? attrMatcher.group(3)
                        : attrMatcher.group(2);
                String value = StringEscapeUtils.unescapeXml(encodedValue);
                c.controls.put(key, value);
            }
            stack.peek().children.add(c);
            stack.push(c);
            log(c);
            continue;
        }

        // tpl tag ending
        Matcher tagCloseMatch = TAG_CLOSE_PATTERN.matcher(currentMatch);
        if (tagCloseMatch.matches()) {
            TemplateChunk c;
            try {
                c = stack.pop();
            } catch (EmptyStackException ex) {
                logger.log(Type.ERROR, "Too many </tpl> tags");
                throw new UnableToCompleteException();
            }
            log(c);
            continue;
        }

        // reference (code)
        Matcher codeMatch = INVOKE_PATTERN.matcher(currentMatch);
        if (codeMatch.matches()) {
            ContentChunk c = new ContentChunk();
            c.type = ContentType.CODE;
            c.content = codeMatch.group(1);
            stack.peek().children.add(c);
            log(c);
            continue;
        }

        // reference (param)
        Matcher paramMatch = PARAM_PATTERN.matcher(currentMatch);
        if (paramMatch.matches()) {
            ContentChunk c = new ContentChunk();
            c.type = ContentType.REFERENCE;
            c.content = paramMatch.group(1);
            stack.peek().children.add(c);
            log(c);
            continue;
        }
    }
    // handle trailing content
    if (lastMatchEnd < template.length()) {
        ContentChunk c = literal(template.substring(lastMatchEnd));
        log(c);
        model.children.add(c);
    }
    if (model != stack.peek()) {
        logger.log(Type.ERROR, "Too few </tpl> tags");
        throw new UnableToCompleteException();
    }
    return model;
}

From source file:msearch.filmeSuchen.sender.MediathekReader.java

String checkThema(String thema) {
    thema = StringEscapeUtils.unescapeXml(thema.trim());
    thema = StringEscapeUtils.unescapeHtml4(thema.trim());
    if (listeAllThemen.contains(thema)) {
        return thema;
    } else {/*from  w  w w  .  j  a v a2 s  .co  m*/
        return sendername;
    }
}

From source file:msearch.filmeSuchen.sender.MediathekBr.java

private void getTheman() {
    final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html";
    final String MUSTER_URL = "<a href=\"/mediathek/video/";
    final String MUSTER_URL_1 = "sendungen/";
    final String MUSTER_URL_2 = "video/";
    listeThemen.clear();/*ww w .  ja  va  2 s .  co  m*/
    MSStringBuilder seite = new MSStringBuilder(MSConst.STRING_BUFFER_START_BUFFER);
    //seite = getUrlIo.getUri_Utf(SENDERNAME, ADRESSE, seite, "");
    seite = getUrlIo.getUri(SENDERNAME, ADRESSE, MSConst.KODIERUNG_UTF, 5 /* versuche */, seite, "");
    int pos1 = 0;
    int pos2;
    String url = "";
    if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) {
        while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) {
            try {
                pos1 += MUSTER_URL.length();
                if ((pos2 = seite.indexOf("\"", pos1)) != -1) {
                    url = seite.substring(pos1, pos2);
                }
                String thema = seite.extract("<span>", "<", pos1);
                thema = StringEscapeUtils.unescapeXml(thema.trim());
                thema = StringEscapeUtils.unescapeHtml4(thema.trim());
                if (!listeAllThemen.contains(thema)) {
                    listeAllThemen.add(thema);
                }
                if (url.equals("") || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) {
                    continue;
                }
                /// der BR ist etwas zu langsam dafr????
                //                    // in die Liste eintragen
                //                    String[] add;
                //                    if (MSearchConfig.senderAllesLaden) {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""};
                //                    } else {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""};
                //                    }
                // in die Liste eintragen
                String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema };
                listeThemen.addUrl(add);
            } catch (Exception ex) {
                MSLog.fehlerMeldung(-821213698, MSLog.FEHLER_ART_MREADER, this.getClass().getSimpleName(), ex);
            }
        }
    }
}

From source file:fr.mcc.ginco.audit.csv.JournalLineBuilder.java

/**
 * Builds the list of revision lines for the event of term attachment to a
 * concept change// w w w  .  j a v  a  2 s  . c om
 *
 * @param term
 * @param revision
 * @param preferredTerm preferred term of the term concept in the previous version
 * @return
 */
public JournalLine buildTermAttachmentChangedLine(ThesaurusTerm term, GincoRevEntity revision,
        ThesaurusTerm preferredTerm) {
    JournalLine journal = buildLineBase(JournalEventsEnum.THESAURUSTERM_LINKED_TO_CONCEPT, revision);
    journal.setTermId(term.getIdentifier());
    journal.setConceptId(term.getConcept().getIdentifier());
    journal.setNewLexicalValue(StringEscapeUtils.unescapeXml(term.getLexicalValue()));
    journal.setOldLexicalValue(StringEscapeUtils.unescapeXml(term.getLexicalValue()));
    return journal;
}

From source file:ac.ucy.cs.spdx.service.SpdxViolationAnalysis.java

@POST
@Path("/correct/")
@Consumes(MediaType.TEXT_PLAIN)// w  ww .  jav a2s .  c o m
@Produces(MediaType.TEXT_XML)
public Response correctSpdx(String jsonString) throws Exception {

    ObjectMapper mapper = new ObjectMapper();
    JsonNode fileNode = null;
    try {
        fileNode = mapper.readTree(jsonString);
    } catch (JsonProcessingException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    String fileName = fileNode.get("filename").toString();
    fileName = fileName.substring(1, fileName.length() - 1);

    final String LICENSE_HTML = "http://spdx.org/licenses/";

    String contentXML = fileNode.get("content").toString();
    contentXML = StringEscapeUtils.unescapeXml(contentXML);
    contentXML = contentXML.substring(1, contentXML.length() - 1);

    String newDeclared = fileNode.get("declared").toString();
    newDeclared = newDeclared.substring(1, newDeclared.length() - 1);

    String fullpath = ParseRdf.parseToRdf(fileName, contentXML);
    setLastCorrected(fullpath);

    File xmlFile = new File(fullpath);

    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
    Document doc = dBuilder.parse(xmlFile);

    if (doc.getElementsByTagName("licenseDeclared").item(0).getAttributes()
            .getNamedItem("rdf:resource") == null) {
        Element e = (Element) doc.getElementsByTagName("licenseDeclared").item(0);
        e.setAttribute("rdf:resource", LICENSE_HTML + newDeclared);
    } else {
        doc.getElementsByTagName("licenseDeclared").item(0).getAttributes().getNamedItem("rdf:resource")
                .setNodeValue(LICENSE_HTML + newDeclared);
    }

    TransformerFactory transformerFactory = TransformerFactory.newInstance();
    Transformer transformer = transformerFactory.newTransformer();
    DOMSource source = new DOMSource(doc);

    PrintWriter writer = new PrintWriter(xmlFile);
    writer.print("");
    writer.close();

    StreamResult result = new StreamResult(xmlFile);

    transformer.transform(source, result);

    ResponseBuilder response = Response.ok((Object) xmlFile);
    response.header("Content-Disposition", "attachment; filename=" + fileName);
    return response.build();// {"filename":"anomos","declared":"Apache-2.0","content":""}

}