List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:com.openlopd.agpd.nota.ws.NotaWebservice.java
public String registrarXml(FileDataBase xmlFile) throws Exception { String endpoint = "https://www.aespd.es:443/agenciapd/axis/SolicitudService?wsdl"; if (logger.isInfoEnabled()) { logger.info("Iniciando notificacin en el entorno de {}.", entorno); }/*from w w w . j a v a2 s . c om*/ Service service = new Service(); Call call = (Call) service.createCall(); call.setTargetEndpointAddress(new java.net.URL(endpoint)); if (entorno.equals(Entornos.exp.name())) { // Operacin para el sistema en produccin. call.setOperationName(new QName("http://soapinterop.org/", "registrarXml")); } else { // Operacin para el sistema en pruebas. call.setOperationName(new QName("http://soapinterop.org/", "probarXml")); } // Codificacin y envo del fichero. String codecFile = Base64.encode(xmlFile.getFile()); String ret = (String) call.invoke(new Object[] { codecFile }); // TODO: Hay que verificar que la firma de esto es correcta. return StringEscapeUtils.unescapeHtml4(new String(Base64.decode(ret), "ISO-8859-1")); //return new String(Base64.decode(ret), "ISO-8859-1"); }
From source file:com.nttec.everychan.chans.cirno.Chan410Reader.java
@Override protected void parseDate(String date) { super.parseDate(date); if (currentPost.timestamp == 0) { Matcher matcher = SPAN_ADMIN_PATTERN.matcher(date); if (matcher.matches()) { currentPost.trip = (currentPost.trip == null ? "" : currentPost.trip) + StringEscapeUtils.unescapeHtml4(matcher.group(1).trim()); super.parseDate(matcher.group(2)); }// w w w . j av a 2 s. c om } }
From source file:com.wellsandwhistles.android.redditsp.reddit.prepared.RedditParsedPost.java
public String getUnescapedSelfText() { return StringEscapeUtils.unescapeHtml4(mSrc.selftext); }
From source file:gr.demokritos.iit.cru.creativity.reasoning.semantic.WebMiner.java
public static String WebMiner(String seed, int difficulty, String language, boolean compactForm) throws ClassNotFoundException, SQLException, IOException, InstantiationException, IllegalAccessException {/*from w ww .j a va2 s .co m*/ Gson gson = new Gson(); Connect c = new Connect(language); RandomWordGenerator r = new RandomWordGenerator(c); String randomPhrase = r.selectRandomWord(seed, difficulty).replace(",", " "); InfoSummarization inf = new InfoSummarization(c); LinkedHashMap<String, Double> TagCloud = new LinkedHashMap<String, Double>(); Set<String> pages = new HashSet<String>(); ArrayList<String> urls = new ArrayList<String>(); ArrayList<String> urls_temp = new ArrayList<String>(); if (language.equalsIgnoreCase("en")) { if (randomPhrase.length() == 0) { randomPhrase = seed; } String bingAppId = c.getBingAppId(); BingCrawler bc = new BingCrawler(bingAppId, language); urls_temp = bc.crawl(randomPhrase); int url_loop = 0; while ((url_loop < 5) && (url_loop < urls_temp.size())) { urls.add(urls_temp.get(url_loop)); url_loop++; } } else if (language.equalsIgnoreCase("el")) { String bingAppId = c.getBingAppId(); BingCrawler bc = new BingCrawler(bingAppId, language); urls_temp = bc.crawl(randomPhrase); int url_loop = 0; while ((url_loop < 5) && (url_loop < urls_temp.size())) { urls.add(urls_temp.get(url_loop)); url_loop++; } } else if (language.equalsIgnoreCase("de")) {//keep only the first word of the random phrase for search if (randomPhrase.length() == 0) { randomPhrase = seed; } urls_temp = HTMLUtilities.linkExtractor( "http://www.fragfinn.de/kinderliste/suche?start=0&query=" + randomPhrase.split(" ")[0], "UTF-8", 0); for (String url : urls_temp) { urls.add(StringEscapeUtils.unescapeHtml4(url)); if (urls.size() == 5) { break; } } } String delims = "[{} .,;?!():\"]+"; String[] words = randomPhrase.split(","); String[] user_keywords = seed.split(delims); if (urls.size() > 0) { ExecutorService threadPool = Executors.newFixedThreadPool(urls.size()); for (String url : urls) { threadPool.submit(new HTMLPages(url, pages, language)); //stopWordSet, tokensHashMap,language)); // threadPool.submit(HTMLTokenizer()); } threadPool.shutdown(); while (!threadPool.isTerminated()) { } LinkedHashMap<ArrayList<String>, Double> temp = inf.TopTermsBing(pages, compactForm); HashMap<String, Double> temp2 = new HashMap<String, Double>(); for (ArrayList<String> stems : temp.keySet()) { for (int j = 0; j < stems.size(); j++) { String s = stems.get(j).split("\\{")[0]; s = s.replace(",", " "); s = s.trim(); boolean wordnet = true; //if term is not one of the initial random phrase for (int i = 0; i < words.length; i++) { if (s.equalsIgnoreCase(words[i])) { wordnet = false; } } //and if it 's not in the initial words of user for (int i = 0; i < user_keywords.length; i++) { if (s.equalsIgnoreCase(user_keywords[i])) { wordnet = false; } } //in german or greek, ignore english words from search english words if (language.equalsIgnoreCase("de") || language.equalsIgnoreCase("el")) { if (c.getWn().getCommonPos(s) != null) { continue; } } //return it with its stem's weight if (wordnet) { //for every stem, put each of its corresponding terms to tagCloud with the stem's tf temp2.put(stems.get(j), temp.get(stems)); } } } TagCloud = inf.sortHashMapByValues(temp2); threadPool.shutdownNow(); } String json = gson.toJson(TagCloud); c.CloseConnection(); return json; }
From source file:com.joey.Fujikom.common.mapper.JsonMapper.java
public JsonMapper(Include include) { // ?/*from ww w. j av a 2 s . c o m*/ if (include != null) { this.setSerializationInclusion(include); } // ????????? this.enableSimple(); // JSONJava this.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); // ? this.getSerializerProvider().setNullValueSerializer(new JsonSerializer<Object>() { @Override public void serialize(Object value, JsonGenerator jgen, SerializerProvider provider) throws IOException, JsonProcessingException { jgen.writeString(""); } }); // HTML? this.registerModule(new SimpleModule().addSerializer(String.class, new JsonSerializer<String>() { @Override public void serialize(String value, JsonGenerator jgen, SerializerProvider provider) throws IOException, JsonProcessingException { jgen.writeString(StringEscapeUtils.unescapeHtml4(value)); } })); // this.setTimeZone(TimeZone.getDefault());//getTimeZone("GMT+8:00") }
From source file:fr.mcc.ginco.solr.TermSolrConverter.java
/** * Convert a Thesaurus Term into a SolrDocument * * @param thesaurusTerm/*from w ww . j a v a 2 s. com*/ * @return SolrInputDocument */ public SolrInputDocument convertSolrTerm(ThesaurusTerm thesaurusTerm) { SolrInputDocument doc = new SolrInputDocument(); doc.addField(SolrField.THESAURUSID, thesaurusTerm.getThesaurusId()); doc.addField(SolrField.THESAURUSTITLE, thesaurusTerm.getThesaurus().getTitle()); doc.addField(SolrField.IDENTIFIER, thesaurusTerm.getIdentifier()); doc.addField(SolrField.LEXICALVALUE, StringEscapeUtils.unescapeHtml4(thesaurusTerm.getLexicalValue().replace("'", "'"))); doc.addField(SolrField.TYPE, ThesaurusTerm.class.getSimpleName()); doc.addField(SolrField.LANGUAGE, thesaurusTerm.getLanguage().getId()); if (thesaurusTerm.getConcept() != null) { doc.addField(SolrField.CONCEPTID, thesaurusTerm.getConcept().getIdentifier()); } boolean preferred; if (thesaurusTerm.getPrefered() == null) { preferred = false; } else { preferred = thesaurusTerm.getPrefered(); } if (preferred) { doc.addField(SolrField.EXT_TYPE, ExtEntityType.TERM_PREF); } else { doc.addField(SolrField.EXT_TYPE, ExtEntityType.TERM_NON_PREF); } Timestamp modifiedDate = new Timestamp(thesaurusTerm.getModified().getTime()); doc.addField(SolrField.MODIFIED, modifiedDate); Timestamp createdDate = new Timestamp(thesaurusTerm.getCreated().getTime()); doc.addField(SolrField.CREATED, createdDate); doc.addField(SolrField.STATUS, thesaurusTerm.getStatus()); List<Note> notes = noteService.getTermNotePaginatedList(thesaurusTerm.getIdentifier(), 0, 0); for (Note note : notes) { doc.addField(SolrField.NOTES, note.getLexicalValue()); } return doc; }
From source file:gov.llnl.ontology.text.corpora.UkWacDocumentReader.java
/** * {@inheritDoc}/* w w w . ja v a 2 s. c o m*/ */ public gov.llnl.ontology.text.Document readDocument(String doc, String corpusName) { String[] lines = doc.split("\\n"); // Find the title. int titleStart = lines[0].indexOf("id=\"") + 4; int titleEnd = lines[0].lastIndexOf("\">"); String key = lines[0].substring(titleStart, titleEnd); long id = key.hashCode(); StringBuilder builder = new StringBuilder(); for (int i = 1; i < lines.length - 1; ++i) { // Skip empty lines and xml tags. if (lines[i].length() == 0 || lines[i].endsWith("s>")) continue; lines[i] = StringEscapeUtils.unescapeHtml4(lines[i]); String[] toks = lines[i].split("\\s+"); builder.append(toks[0]).append(" "); } return new SimpleDocument(corpusName, builder.toString(), doc, key, id, key, new HashSet<String>()); }
From source file:com.dalthed.tucan.TucanMobile.java
/** * Gibt bei einem String wie "04-00-0126-vu Mathematik 1 (für ET)" "Mathematik 1 (für ET)" zurck * @param evNameString//from w w w.ja va 2 s . c o m * @return */ public static String getEventNameByString(String evNameString) { String[] evNameAr = nbspPat.split(evNameString); if (evNameAr.length == 2) { return StringEscapeUtils.unescapeHtml4(evNameAr[1]); } else { return evNameString; } }
From source file:com.datumbox.framework.utilities.text.cleaners.HTMLCleaner.java
public static String extractText(String text) { //return Jsoup.parse(text).text(); text = replaceImgWithAlt(text);//from w ww . ja v a2 s .c o m text = safeRemoveAllTags(text); text = StringEscapeUtils.unescapeHtml4(text); return text; }
From source file:gr.demokritos.iit.textforms.TextForm.java
/** * Remove HTML strings from the text/*from w w w. j ava 2 s . com*/ */ protected void removeHTML() { this.text = this.text.replaceAll("&", "and"); this.text = StringEscapeUtils.unescapeHtml4(this.text); }