List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeXml
public static final String unescapeXml(final String input)
Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
Supports only the five basic XML entities (gt, lt, quot, amp, apos).
From source file:edu.illinois.cs.cogcomp.wikifier.wiki.importing.WikipediaRedirectExtractor.java
private String cleanupTitle(String title) { int end = title.indexOf("</title>"); String titleString = end != -1 ? title.substring(titlePattern.length(), end) : title; titleString = StringEscapeUtils.unescapeXml(titleString).replace(' ', '_'); return titleString; }
From source file:msearch.filmeSuchen.sender.MediathekKika.java
void addToListNormal() { //<strong style="margin-left:10px;">Sesamstrae prsentiert: Eine Mhre fr Zwei</strong><br /> //<a style="margin-left:20px;" href="?programm=168&id=14487&ag=5" title="Sendung vom 10.10.2012" class="overlay_link">42. Ordnung ist das halbe Chaos</a><br /> //<a style="margin-left:20px;" href="?programm=168&id=14485&ag=5" title="Sendung vom 10.10.2012" class="overlay_link">41. ber den Wolken</a><br /> final String ADRESSE = "http://kikaplus.net/clients/kika/kikaplus/"; final String MUSTER_URL = "<a style=\"margin-left:20px;\" href=\""; final String MUSTER_THEMA = "<strong style=\"margin-left:10px;\">"; final String MUSTER_DATUM = "title=\"Sendung vom "; listeThemen.clear();//from www. ja v a 2 s. com seite = new MSStringBuilder(MSConst.STRING_BUFFER_START_BUFFER); seite = getUrlIo.getUri(SENDERNAME, ADRESSE, MSConst.KODIERUNG_UTF, 3, seite, "KiKA: Startseite"); int pos = 0; int pos1, pos2, stop, pDatum1, pDatum2, pTitel1, pTitel2; String url, thema, datum, titel; while ((pos = seite.indexOf(MUSTER_THEMA, pos)) != -1) { try { thema = ""; pos += MUSTER_THEMA.length(); stop = seite.indexOf(MUSTER_THEMA, pos); pos1 = pos; if ((pos2 = seite.indexOf("<", pos1)) != -1) { thema = seite.substring(pos1, pos2); } while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) { titel = ""; datum = ""; if (stop != -1 && pos1 > stop) { // dann schon das nchste Thema break; } pos1 += MUSTER_URL.length(); if ((pos2 = seite.indexOf("\"", pos1)) != -1) { url = seite.substring(pos1, pos2); //if (!url.equals("")) { url = StringEscapeUtils.unescapeXml(url); if (!url.equals("") && !url.startsWith("http://") && !url.startsWith("/")) { // Datum if ((pDatum1 = seite.indexOf(MUSTER_DATUM, pos2)) != -1) { pDatum1 += MUSTER_DATUM.length(); if ((pDatum2 = seite.indexOf("\"", pDatum1)) != -1) { if (stop != -1 && pDatum1 < stop && pDatum2 < stop) { // dann schon das nchste Thema datum = seite.substring(pDatum1, pDatum2); } } } // Titel if ((pTitel1 = seite.indexOf(">", pos2)) != -1) { pTitel1 += 1; if ((pTitel2 = seite.indexOf("<", pTitel1)) != -1) { //if (stop != -1 && pTitel1 > stop && pTitel2 > stop) { if (stop != -1 && pTitel1 < stop && pTitel2 < stop) { titel = seite.substring(pTitel1, pTitel2); } } } // in die Liste eintragen String[] add = new String[] { ADRESSE + url, thema, titel, datum }; listeThemen.addUrl(add); } } } } catch (Exception ex) { MSLog.fehlerMeldung(-302025469, MSLog.FEHLER_ART_MREADER, "MediathekKiKA.addToList", ex, ""); } } }
From source file:fr.mcc.ginco.audit.csv.JournalLineBuilder.java
/** * Builds the list of revision lines for the event of term lexical value * change/* ww w. j a v a2 s .c om*/ * * @param term * @param revision * @param oldLexicalValue * @return */ public JournalLine buildTermLexicalValueChangedLine(ThesaurusTerm term, GincoRevEntity revision, String oldLexicalValue) { JournalLine journal = buildLineBase(JournalEventsEnum.THESAURUSTERM_LEXICAL_VALUE_UPDATE, revision); journal.setTermId(term.getIdentifier()); if (term.getConcept() != null) { journal.setConceptId(term.getConcept().getIdentifier()); } journal.setNewLexicalValue(StringEscapeUtils.unescapeXml(term.getLexicalValue())); journal.setOldLexicalValue(StringEscapeUtils.unescapeXml(oldLexicalValue)); return journal; }
From source file:de.fatalix.book.importer.CalibriImporter.java
private static BookEntry parseOPF(Path pathToOPF, BookEntry bmd) throws IOException { List<String> lines = Files.readAllLines(pathToOPF, Charset.forName("UTF-8")); boolean multiLineDescription = false; String description = ""; for (String line : lines) { if (multiLineDescription) { multiLineDescription = false; if (line.split("<").length == 1) { multiLineDescription = true; description = description + line; } else { description = description + line.split("<")[0]; description = StringEscapeUtils.unescapeXml(description); bmd.setDescription(description); }//from w ww .j a v a 2s .c o m } else { if (line.contains("dc:title")) { String title = line.split(">")[1].split("<")[0]; bmd.setTitle(title); } else if (line.contains("dc:creator")) { String creator = line.split(">")[1].split("<")[0]; bmd.setAuthor(creator); } else if (line.contains("dc:description")) { String value = line.split(">")[1]; if (value.split("<").length == 1) { multiLineDescription = true; description = value; } else { value = value.split("<")[0]; value = StringEscapeUtils.unescapeXml(value); bmd.setDescription(value); } } else if (line.contains("dc:publisher")) { String value = line.split(">")[1].split("<")[0]; bmd.setPublisher(value); } else if (line.contains("dc:date")) { String value = line.split(">")[1].split("<")[0]; DateTime dtReleaseDate = new DateTime(value); if (dtReleaseDate.getYear() != 101) { bmd.setReleaseDate(dtReleaseDate.toDate()); } } else if (line.contains("dc:language")) { String value = line.split(">")[1].split("<")[0]; bmd.setLanguage(value); } else if (line.contains("opf:scheme=\"ISBN\"")) { String value = line.split(">")[1].split("<")[0]; bmd.setIsbn(value); } } } return bmd; }
From source file:net.java.sip.communicator.impl.protocol.jabber.InfoRetreiver.java
/** * Retrieve details and return them or if missing return an empty list. * @param contactAddress the address to search for. * @return the details or empty list.// w w w . j a va 2 s . c o m */ protected List<GenericDetail> retrieveDetails(String contactAddress) { List<GenericDetail> result = new LinkedList<GenericDetail>(); try { XMPPConnection connection = jabberProvider.getConnection(); if (connection == null || !connection.isAuthenticated()) return null; VCard card = new VCard(); // if there is no value or is equals to the default one // load vcard using smack load method if (vcardTimeoutReply == -1 || vcardTimeoutReply == SmackConfiguration.getPacketReplyTimeout()) card.load(connection, contactAddress); else load(card, connection, contactAddress, vcardTimeoutReply); String tmp; tmp = checkForFullName(card); if (tmp != null) result.add(new DisplayNameDetail(StringEscapeUtils.unescapeXml(tmp))); tmp = card.getFirstName(); if (tmp != null) result.add(new FirstNameDetail(StringEscapeUtils.unescapeXml(tmp))); tmp = card.getMiddleName(); if (tmp != null) result.add(new MiddleNameDetail(StringEscapeUtils.unescapeXml(tmp))); tmp = card.getLastName(); if (tmp != null) result.add(new LastNameDetail(StringEscapeUtils.unescapeXml(tmp))); tmp = card.getNickName(); if (tmp != null) result.add(new NicknameDetail(StringEscapeUtils.unescapeXml(tmp))); tmp = card.getField("BDAY"); if (tmp != null) { try { Calendar birthDateCalendar = Calendar.getInstance(); DateFormat dateFormat = new SimpleDateFormat( JabberActivator.getResources().getI18NString("plugin.accountinfo.BDAY_FORMAT")); Date birthDate = dateFormat.parse(tmp); birthDateCalendar.setTime(birthDate); BirthDateDetail bd = new BirthDateDetail(birthDateCalendar); result.add(bd); } catch (ParseException e) { } } // Home Details // addrField one of // POSTAL, PARCEL, (DOM | INTL), PREF, POBOX, EXTADR, STREET, // LOCALITY, REGION, PCODE, CTRY tmp = card.getAddressFieldHome("STREET"); if (tmp != null) result.add(new AddressDetail(tmp)); tmp = card.getAddressFieldHome("LOCALITY"); if (tmp != null) result.add(new CityDetail(tmp)); tmp = card.getAddressFieldHome("REGION"); if (tmp != null) result.add(new ProvinceDetail(tmp)); tmp = card.getAddressFieldHome("PCODE"); if (tmp != null) result.add(new PostalCodeDetail(tmp)); tmp = card.getAddressFieldHome("CTRY"); if (tmp != null) result.add(new CountryDetail(tmp)); // phoneType one of //VOICE, FAX, PAGER, MSG, CELL, VIDEO, BBS, MODEM, ISDN, PCS, PREF tmp = card.getPhoneHome("VOICE"); if (tmp != null) result.add(new PhoneNumberDetail(tmp)); tmp = card.getPhoneHome("VIDEO"); if (tmp != null) result.add(new VideoDetail(tmp)); tmp = card.getPhoneHome("FAX"); if (tmp != null) result.add(new FaxDetail(tmp)); tmp = card.getPhoneHome("PAGER"); if (tmp != null) result.add(new PagerDetail(tmp)); tmp = card.getPhoneHome("CELL"); if (tmp != null) result.add(new MobilePhoneDetail(tmp)); tmp = card.getPhoneHome("TEXT"); if (tmp != null) result.add(new MobilePhoneDetail(tmp)); tmp = card.getEmailHome(); if (tmp != null) result.add(new EmailAddressDetail(tmp)); // Work Details // addrField one of // POSTAL, PARCEL, (DOM | INTL), PREF, POBOX, EXTADR, STREET, // LOCALITY, REGION, PCODE, CTRY tmp = card.getAddressFieldWork("STREET"); if (tmp != null) result.add(new WorkAddressDetail(tmp)); tmp = card.getAddressFieldWork("LOCALITY"); if (tmp != null) result.add(new WorkCityDetail(tmp)); tmp = card.getAddressFieldWork("REGION"); if (tmp != null) result.add(new WorkProvinceDetail(tmp)); tmp = card.getAddressFieldWork("PCODE"); if (tmp != null) result.add(new WorkPostalCodeDetail(tmp)); // tmp = card.getAddressFieldWork("CTRY"); // if(tmp != null) // result.add(new WorkCountryDetail(tmp); // phoneType one of //VOICE, FAX, PAGER, MSG, CELL, VIDEO, BBS, MODEM, ISDN, PCS, PREF tmp = card.getPhoneWork("VOICE"); if (tmp != null) result.add(new WorkPhoneDetail(tmp)); tmp = card.getPhoneWork("VIDEO"); if (tmp != null) result.add(new WorkVideoDetail(tmp)); tmp = card.getPhoneWork("FAX"); if (tmp != null) result.add(new WorkFaxDetail(tmp)); tmp = card.getPhoneWork("PAGER"); if (tmp != null) result.add(new WorkPagerDetail(tmp)); tmp = card.getPhoneWork("CELL"); if (tmp != null) result.add(new WorkMobilePhoneDetail(tmp)); tmp = card.getPhoneWork("TEXT"); if (tmp != null) result.add(new WorkMobilePhoneDetail(tmp)); tmp = card.getEmailWork(); if (tmp != null) result.add(new WorkEmailAddressDetail(tmp)); tmp = card.getOrganization(); if (tmp != null) result.add(new WorkOrganizationNameDetail(tmp)); tmp = card.getOrganizationUnit(); if (tmp != null) result.add(new WorkDepartmentNameDetail(tmp)); tmp = card.getField("TITLE"); if (tmp != null) result.add(new JobTitleDetail(tmp)); tmp = card.getField("ABOUTME"); if (tmp != null) result.add(new AboutMeDetail(tmp)); byte[] imageBytes = card.getAvatar(); if (imageBytes != null && imageBytes.length > 0) { result.add(new ImageDetail("Image", imageBytes)); } try { tmp = card.getField("URL"); if (tmp != null) result.add(new URLDetail("URL", new URL(tmp))); } catch (MalformedURLException e) { } } catch (Throwable exc) { String msg = "Cannot load details for contact " + contactAddress + " : " + exc.getMessage(); if (logger.isTraceEnabled()) logger.error(msg, exc); else logger.error(msg); } retreivedDetails.put(contactAddress, result); return result; }
From source file:com.sencha.gxt.core.rebind.XTemplateParser.java
public TemplateModel parse(String template) throws UnableToCompleteException { // look for parameters or tags (Consider combining into one pattern) TemplateModel model = new TemplateModel(); Stack<ContainerTemplateChunk> stack = new Stack<ContainerTemplateChunk>(); stack.push(model);//from www . ja v a 2 s . c o m Matcher m = NON_LITERAL_PATTERN.matcher(template); int lastMatchEnd = 0; while (m.find()) { // range of the current non-literal int begin = m.start(), end = m.end(); String currentMatch = template.substring(begin, end); // if there was content since the last non-literal chunk, track it if (lastMatchEnd < begin) { ContentChunk c = literal(template.substring(lastMatchEnd, begin)); stack.peek().children.add(c); log(c); } // move the last match pointer lastMatchEnd = end; // tpl tag starting Matcher tagOpenMatch = TAG_PATTERN.matcher(currentMatch); if (tagOpenMatch.matches()) { ControlChunk c = new ControlChunk(); c.controls = new HashMap<String, String>(); String attrs = tagOpenMatch.group(1).trim(); Matcher attrMatcher = ATTR_PATTERN.matcher(attrs); while (attrMatcher.find()) { // should be if or for String key = attrMatcher.group(1); // must be html-decoded String encodedValue = attrMatcher.group(2) == null ? attrMatcher.group(3) : attrMatcher.group(2); String value = StringEscapeUtils.unescapeXml(encodedValue); c.controls.put(key, value); } stack.peek().children.add(c); stack.push(c); log(c); continue; } // tpl tag ending Matcher tagCloseMatch = TAG_CLOSE_PATTERN.matcher(currentMatch); if (tagCloseMatch.matches()) { TemplateChunk c; try { c = stack.pop(); } catch (EmptyStackException ex) { logger.log(Type.ERROR, "Too many </tpl> tags"); throw new UnableToCompleteException(); } log(c); continue; } // reference (code) Matcher codeMatch = INVOKE_PATTERN.matcher(currentMatch); if (codeMatch.matches()) { ContentChunk c = new ContentChunk(); c.type = ContentType.CODE; c.content = codeMatch.group(1); stack.peek().children.add(c); log(c); continue; } // reference (param) Matcher paramMatch = PARAM_PATTERN.matcher(currentMatch); if (paramMatch.matches()) { ContentChunk c = new ContentChunk(); c.type = ContentType.REFERENCE; c.content = paramMatch.group(1); stack.peek().children.add(c); log(c); continue; } } // handle trailing content if (lastMatchEnd < template.length()) { ContentChunk c = literal(template.substring(lastMatchEnd)); log(c); model.children.add(c); } if (model != stack.peek()) { logger.log(Type.ERROR, "Too few </tpl> tags"); throw new UnableToCompleteException(); } return model; }
From source file:msearch.filmeSuchen.sender.MediathekReader.java
String checkThema(String thema) { thema = StringEscapeUtils.unescapeXml(thema.trim()); thema = StringEscapeUtils.unescapeHtml4(thema.trim()); if (listeAllThemen.contains(thema)) { return thema; } else {/*from w w w . j a v a2 s .co m*/ return sendername; } }
From source file:msearch.filmeSuchen.sender.MediathekBr.java
private void getTheman() { final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html"; final String MUSTER_URL = "<a href=\"/mediathek/video/"; final String MUSTER_URL_1 = "sendungen/"; final String MUSTER_URL_2 = "video/"; listeThemen.clear();/*ww w . ja va 2 s . co m*/ MSStringBuilder seite = new MSStringBuilder(MSConst.STRING_BUFFER_START_BUFFER); //seite = getUrlIo.getUri_Utf(SENDERNAME, ADRESSE, seite, ""); seite = getUrlIo.getUri(SENDERNAME, ADRESSE, MSConst.KODIERUNG_UTF, 5 /* versuche */, seite, ""); int pos1 = 0; int pos2; String url = ""; if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) { while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) { try { pos1 += MUSTER_URL.length(); if ((pos2 = seite.indexOf("\"", pos1)) != -1) { url = seite.substring(pos1, pos2); } String thema = seite.extract("<span>", "<", pos1); thema = StringEscapeUtils.unescapeXml(thema.trim()); thema = StringEscapeUtils.unescapeHtml4(thema.trim()); if (!listeAllThemen.contains(thema)) { listeAllThemen.add(thema); } if (url.equals("") || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) { continue; } /// der BR ist etwas zu langsam dafr???? // // in die Liste eintragen // String[] add; // if (MSearchConfig.senderAllesLaden) { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""}; // } else { // add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""}; // } // in die Liste eintragen String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema }; listeThemen.addUrl(add); } catch (Exception ex) { MSLog.fehlerMeldung(-821213698, MSLog.FEHLER_ART_MREADER, this.getClass().getSimpleName(), ex); } } } }
From source file:fr.mcc.ginco.audit.csv.JournalLineBuilder.java
/** * Builds the list of revision lines for the event of term attachment to a * concept change// w w w . j a v a 2 s . c om * * @param term * @param revision * @param preferredTerm preferred term of the term concept in the previous version * @return */ public JournalLine buildTermAttachmentChangedLine(ThesaurusTerm term, GincoRevEntity revision, ThesaurusTerm preferredTerm) { JournalLine journal = buildLineBase(JournalEventsEnum.THESAURUSTERM_LINKED_TO_CONCEPT, revision); journal.setTermId(term.getIdentifier()); journal.setConceptId(term.getConcept().getIdentifier()); journal.setNewLexicalValue(StringEscapeUtils.unescapeXml(term.getLexicalValue())); journal.setOldLexicalValue(StringEscapeUtils.unescapeXml(term.getLexicalValue())); return journal; }
From source file:ac.ucy.cs.spdx.service.SpdxViolationAnalysis.java
@POST @Path("/correct/") @Consumes(MediaType.TEXT_PLAIN)// w ww . jav a2s . c o m @Produces(MediaType.TEXT_XML) public Response correctSpdx(String jsonString) throws Exception { ObjectMapper mapper = new ObjectMapper(); JsonNode fileNode = null; try { fileNode = mapper.readTree(jsonString); } catch (JsonProcessingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } String fileName = fileNode.get("filename").toString(); fileName = fileName.substring(1, fileName.length() - 1); final String LICENSE_HTML = "http://spdx.org/licenses/"; String contentXML = fileNode.get("content").toString(); contentXML = StringEscapeUtils.unescapeXml(contentXML); contentXML = contentXML.substring(1, contentXML.length() - 1); String newDeclared = fileNode.get("declared").toString(); newDeclared = newDeclared.substring(1, newDeclared.length() - 1); String fullpath = ParseRdf.parseToRdf(fileName, contentXML); setLastCorrected(fullpath); File xmlFile = new File(fullpath); DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(xmlFile); if (doc.getElementsByTagName("licenseDeclared").item(0).getAttributes() .getNamedItem("rdf:resource") == null) { Element e = (Element) doc.getElementsByTagName("licenseDeclared").item(0); e.setAttribute("rdf:resource", LICENSE_HTML + newDeclared); } else { doc.getElementsByTagName("licenseDeclared").item(0).getAttributes().getNamedItem("rdf:resource") .setNodeValue(LICENSE_HTML + newDeclared); } TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); DOMSource source = new DOMSource(doc); PrintWriter writer = new PrintWriter(xmlFile); writer.print(""); writer.close(); StreamResult result = new StreamResult(xmlFile); transformer.transform(source, result); ResponseBuilder response = Response.ok((Object) xmlFile); response.header("Content-Disposition", "attachment; filename=" + fileName); return response.build();// {"filename":"anomos","declared":"Apache-2.0","content":""} }