List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:cx.fbn.nevernote.sql.REnSearch.java
private boolean matchContentAll(Note n) { if (todo.size() == 0 && resource.size() == 0 && searchPhrases.size() == 0) return true; n = conn.getNoteTable().getNote(n.getGuid(), true, true, false, false, false); // Check for search phrases String text = StringEscapeUtils.unescapeHtml4(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase(); boolean negative = false; for (int i = 0; i < searchPhrases.size(); i++) { String phrase = searchPhrases.get(i); if (phrase.startsWith("-")) { negative = true;/*ww w . j a v a 2 s .co m*/ phrase = phrase.substring(1); } else negative = false; phrase = phrase.substring(1); phrase = phrase.substring(0, phrase.length() - 1); if (text.indexOf(phrase) >= 0 && negative) { return false; } if (text.indexOf(phrase) < 0 && !negative) return false; } for (int i = 0; i < todo.size(); i++) { String value = todo.get(i); value = value.replace("\"", ""); boolean desiredState; if (!value.endsWith(":false") && !value.endsWith(":true") && !value.endsWith(":*") && !value.endsWith("*")) return false; if (value.endsWith(":false")) desiredState = false; else desiredState = true; if (value.startsWith("-")) desiredState = !desiredState; int pos = n.getContent().indexOf("<en-todo"); if (pos == -1 && !value.startsWith("-")) return false; if (pos > -1 && value.startsWith("-") && (value.endsWith("*") || value.endsWith(":"))) return false; if (pos == -1 && !value.startsWith("-")) return false; boolean returnTodo = false; while (pos > -1) { int endPos = n.getContent().indexOf(">", pos); String segment = n.getContent().substring(pos, endPos); boolean currentState; if (segment.toLowerCase().indexOf("checked=\"true\"") == -1) currentState = false; else currentState = true; if (desiredState == currentState) returnTodo = true; if (value.endsWith("*") || value.endsWith(":")) returnTodo = true; pos = n.getContent().indexOf("<en-todo", pos + 1); } if (!returnTodo) return false; } // Check resources for (int i = 0; i < resource.size(); i++) { String resourceString = resource.get(i); resourceString = resourceString.replace("\"", ""); negative = false; if (resourceString.startsWith("-")) negative = true; resourceString = resourceString.substring(resourceString.indexOf(":") + 1); if (resourceString.equals("")) return false; for (int j = 0; j < n.getResourcesSize(); j++) { boolean match = stringMatch(n.getResources().get(j).getMime(), resourceString, negative); if (!match && !negative) return false; if (match && negative) return false; } } return true; }
From source file:com.nttec.everychan.chans.krautchan.KrautModule.java
@Override public String deletePost(DeletePostModel model, ProgressListener listener, CancellableTask task) throws Exception { String url = (useHttps() ? "https://" : "http://") + CHAN_DOMAIN + "/delete"; List<NameValuePair> pairs = new ArrayList<NameValuePair>(); pairs.add(new BasicNameValuePair("post_" + model.postNumber, "delete")); pairs.add(new BasicNameValuePair("password", model.password)); pairs.add(new BasicNameValuePair("board", model.boardName)); HttpRequestModel request = HttpRequestModel.builder().setPOST(new UrlEncodedFormEntity(pairs, "UTF-8")) .setNoRedirect(true).build(); HttpResponseModel response = null;// ww w .java 2 s.com try { response = HttpStreamer.getInstance().getFromUrl(url, request, httpClient, null, task); if (response.statusCode == 302) { for (Header header : response.headers) { if (header != null && HttpHeaders.LOCATION.equalsIgnoreCase(header.getName())) { String location = header.getValue(); if (location.contains("banned")) throw new Exception("You are banned"); break; } } return null; } else if (response.statusCode == 200) { ByteArrayOutputStream output = new ByteArrayOutputStream(1024); IOUtils.copyStream(response.stream, output); String htmlResponse = output.toString("UTF-8"); int messageNoticePos = htmlResponse.indexOf("class=\"message_notice"); if (messageNoticePos == -1) return null; int p2 = htmlResponse.indexOf('>', messageNoticePos); if (p2 != -1) { String errorMessage = htmlResponse.substring(p2 + 1); int p3 = errorMessage.indexOf("</tr>"); if (p3 != -1) errorMessage = errorMessage.substring(0, p3); errorMessage = RegexUtils.trimToSpace( StringEscapeUtils.unescapeHtml4(RegexUtils.removeHtmlTags(errorMessage)).trim()); throw new Exception(errorMessage); } } throw new HttpWrongStatusCodeException(response.statusCode, response.statusCode + " - " + response.statusReason); } finally { if (response != null) response.release(); } }
From source file:edu.illinois.cs.cogcomp.ner.Main.java
/** * process the single input file, produce output on standard out if no output directory is * defined, or produce the output in the output directory by the same file name as the input * file, or if a specific output filename is specified, use that name. * //from w ww. j ava 2 s .co m * @param infile * @throws Exception */ private void processInputFile(File infile) throws Exception { String s = InFile.readFileText(infile.toString()); s = StringEscapeUtils.unescapeHtml4(s); TextAnnotation ta = tab.createTextAnnotation(s); s = this.produceOutput(this.nerAnnotator.getView(ta), ta); this.getResultProcessor().publish(s, infile.getName()); }
From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { try {//from w ww. java 2s . c o m // wir iterieren schn ber die page-Eintrge. Darin gibt es dann title, timestamp, <contributor> => <username> und text. den text mssen // wir noch bereinigen. dazu nehmen wir eine Vorverarbeitung mit bliki - dazu mssen wir aber selbst nochmal den String vorbereiten und // nachbereinigen. Leider. WikipediaDumpParserConfig wikipediaDumpParserConfig = context.get(WikipediaDumpParserConfig.class); if (wikipediaDumpParserConfig == null) { Logger.getLogger(WikipediaDumpParser.class.getName()) .info("No wikipedia parser config found. Will take the default one."); wikipediaDumpParserConfig = new WikipediaDumpParserConfig(); } TikaInputStream tikaStream = TikaInputStream.get(stream); File fWikipediaDumpFile4Stream = tikaStream.getFile(); MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueHashMap<String, String>(); if (wikipediaDumpParserConfig.determinePageRedirects) hsPageTitle2Redirects = getPageTitle2Redirects(new FileInputStream(fWikipediaDumpFile4Stream)); HashSet<String> hsRedirectPageTitles = new HashSet<String>(hsPageTitle2Redirects.values()); String strCleanedText = ""; String strBaseURL = null; XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); XMLEventReader xmlEventReader = xmlInputFactory .createXMLEventReader(new FileInputStream(fWikipediaDumpFile4Stream), "Utf-8"); while (xmlEventReader.hasNext()) { XMLEvent xmlEvent = xmlEventReader.nextEvent(); if (xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("page")) { if (metadata.size() == 0) continue; // den mimetype wollen wir auch noch in den Metadaten haben metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia+xml"); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); xhtml.characters(strCleanedText.toCharArray(), 0, strCleanedText.length()); xhtml.endElement("p"); xhtml.endDocument(); } if (!xmlEvent.isStartElement()) continue; // ##### die siteinfo if (strBaseURL == null && xmlEvent.asStartElement().getName().getLocalPart().equals("base")) { // http://de.wikipedia.org/wiki/Wikipedia:Hauptseite =>http://de.wikipedia.org/wiki/ strBaseURL = readNextCharEventsText(xmlEventReader); strBaseURL = strBaseURL.substring(0, strBaseURL.lastIndexOf("/") + 1); } // ##### die page if (xmlEvent.asStartElement().getName().getLocalPart().equals("page")) { for (String strKey : metadata.names()) metadata.remove(strKey); } // ##### der Title if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) { // wir merken uns immer den aktuellen Titel String strCurrentTitle = readNextCharEventsText(xmlEventReader); if (strCurrentTitle.equalsIgnoreCase("DuckDuckGo")) { int fasd = 8; } if (strCurrentTitle.toLowerCase().contains("duck") && strCurrentTitle.toLowerCase().contains("go")) { int is = 666; } // wenn der Titel eine redirect-Page ist, dann tragen wir die ganze Page aus der EventQueue aus, springen an das endPage, und // haben somit diese Seite ignoriert. Ferner ignorieren wir auch spezielle wikipedia-Seiten String strSmallTitle = strCurrentTitle.trim().toLowerCase(); if (hsRedirectPageTitles.contains(strCurrentTitle) || hsRedirectPageTitles.contains(strSmallTitle) || hsRedirectPageTitles.contains(strCurrentTitle.trim()) || strSmallTitle.startsWith("category:") || strSmallTitle.startsWith("kategorie:") || strSmallTitle.startsWith("vorlage:") || strSmallTitle.startsWith("template:") || strSmallTitle.startsWith("hilfe:") || strSmallTitle.startsWith("help:") || strSmallTitle.startsWith("wikipedia:") || strSmallTitle.startsWith("portal:") || strSmallTitle.startsWith("mediawiki:")) { while (true) { XMLEvent nextXmlEvent = xmlEventReader.nextEvent(); if (nextXmlEvent.isEndElement() && nextXmlEvent.asEndElement().getName().getLocalPart().equals("page")) break; } } else { metadata.add(Metadata.TITLE, strCurrentTitle); metadata.add(Metadata.SOURCE, strBaseURL + strCurrentTitle); for (String strRedirect : hsPageTitle2Redirects.get(strCurrentTitle)) { // wir ignorieren Titel, die sich lediglich durch gro/kleinschreibung unterscheiden if (!StringUtils.containsIgnoreCase(strRedirect, metadata.getValues(Metadata.TITLE))) metadata.add(Metadata.TITLE, strRedirect); } } continue; } // ##### der text if (xmlEvent.asStartElement().getName().getLocalPart().equals("text")) { String strText = readNextCharEventsText(xmlEventReader); if (wikipediaDumpParserConfig.parseLinksAndCategories) parseLinksAndCategories(strText, strBaseURL, metadata, handler); if (wikipediaDumpParserConfig.parseInfoBoxes) parseInfoBox(strText, metadata, handler); if (wikipediaDumpParserConfig.parseGeoCoordinates) parseGeoCoordinates(strText, metadata); // aufgrund einiger Defizite in dem verwendeten cleaner mssen wir hier leider noch zu-und nacharbeiten strText = strText.replaceAll("==\n", "==\n\n"); strText = strText.replaceAll("\n==", "\n\n=="); strCleanedText = m_wikiModel.render(new PlainTextConverter(), strText); strCleanedText = strCleanedText.replaceAll("\\{\\{", " "); strCleanedText = strCleanedText.replaceAll("\\}\\}", " "); strCleanedText = StringEscapeUtils.unescapeHtml4(strCleanedText); continue; } // ##### der timestamp if (xmlEvent.asStartElement().getName().getLocalPart().equals("timestamp")) { String strTimestamp = readNextCharEventsText(xmlEventReader); metadata.add(Metadata.MODIFIED, strTimestamp); continue; } // ##### der username if (xmlEvent.asStartElement().getName().getLocalPart().equals("username")) { String strUsername = readNextCharEventsText(xmlEventReader); metadata.add(Metadata.CREATOR, strUsername); continue; } } } catch (Exception e) { Logger.getLogger(WikipediaDumpParser.class.getName()).log(Level.SEVERE, "Error", e); } }
From source file:com.wegas.core.Helper.java
/** * Insensitive contains//from w w w . ja va 2 s . co m * * @param text text to search in * @param criteria criteria to search for * @return match */ public static Boolean insensitiveContains(String text, String criteria) { if (text == null) { return false; } return Pattern.compile(Pattern.quote(criteria), Pattern.CASE_INSENSITIVE) .matcher(StringEscapeUtils.unescapeHtml4(text)).find(); }
From source file:cgeo.geocaching.connector.gc.GCParser.java
static SearchResult parseCacheFromText(final String pageIn, final CancellableHandler handler) { CancellableHandler.sendLoadProgressDetail(handler, R.string.cache_dialog_loading_details_status_details); if (StringUtils.isBlank(pageIn)) { Log.e("GCParser.parseCache: No page given"); return null; }//ww w . j ava 2 s. c o m final SearchResult searchResult = new SearchResult(); if (pageIn.contains(GCConstants.STRING_UNPUBLISHED_OTHER) || pageIn.contains(GCConstants.STRING_UNPUBLISHED_FROM_SEARCH)) { searchResult.setError(StatusCode.UNPUBLISHED_CACHE); return searchResult; } if (pageIn.contains(GCConstants.STRING_PREMIUMONLY_1) || pageIn.contains(GCConstants.STRING_PREMIUMONLY_2)) { searchResult.setError(StatusCode.PREMIUM_ONLY); return searchResult; } final String cacheName = Html.fromHtml(TextUtils.getMatch(pageIn, GCConstants.PATTERN_NAME, true, "")) .toString(); if (GCConstants.STRING_UNKNOWN_ERROR.equalsIgnoreCase(cacheName)) { searchResult.setError(StatusCode.UNKNOWN_ERROR); return searchResult; } // first handle the content with line breaks, then trim everything for easier matching and reduced memory consumption in parsed fields String personalNoteWithLineBreaks = ""; MatcherWrapper matcher = new MatcherWrapper(GCConstants.PATTERN_PERSONALNOTE, pageIn); if (matcher.find()) { personalNoteWithLineBreaks = matcher.group(1).trim(); } final String page = TextUtils.replaceWhitespace(pageIn); final Geocache cache = new Geocache(); cache.setDisabled(page.contains(GCConstants.STRING_DISABLED)); cache.setArchived(page.contains(GCConstants.STRING_ARCHIVED)); cache.setPremiumMembersOnly(TextUtils.matches(page, GCConstants.PATTERN_PREMIUMMEMBERS)); cache.setFavorite(TextUtils.matches(page, GCConstants.PATTERN_FAVORITE)); // cache geocode cache.setGeocode(TextUtils.getMatch(page, GCConstants.PATTERN_GEOCODE, true, cache.getGeocode())); // cache id cache.setCacheId(TextUtils.getMatch(page, GCConstants.PATTERN_CACHEID, true, cache.getCacheId())); // cache guid cache.setGuid(TextUtils.getMatch(page, GCConstants.PATTERN_GUID, true, cache.getGuid())); // name cache.setName(cacheName); // owner real name cache.setOwnerUserId(Network .decode(TextUtils.getMatch(page, GCConstants.PATTERN_OWNER_USERID, true, cache.getOwnerUserId()))); cache.setUserModifiedCoords(false); String tableInside = page; final int pos = tableInside.indexOf(GCConstants.STRING_CACHEDETAILS); if (pos == -1) { Log.e("GCParser.parseCache: ID \"cacheDetails\" not found on page"); return null; } tableInside = tableInside.substring(pos); if (StringUtils.isNotBlank(tableInside)) { // cache terrain String result = TextUtils.getMatch(tableInside, GCConstants.PATTERN_TERRAIN, true, null); if (result != null) { try { cache.setTerrain(Float.parseFloat(StringUtils.replaceChars(result, '_', '.'))); } catch (final NumberFormatException e) { Log.e("Error parsing terrain value", e); } } // cache difficulty result = TextUtils.getMatch(tableInside, GCConstants.PATTERN_DIFFICULTY, true, null); if (result != null) { try { cache.setDifficulty(Float.parseFloat(StringUtils.replaceChars(result, '_', '.'))); } catch (final NumberFormatException e) { Log.e("Error parsing difficulty value", e); } } // owner cache.setOwnerDisplayName(StringEscapeUtils.unescapeHtml4(TextUtils.getMatch(tableInside, GCConstants.PATTERN_OWNER_DISPLAYNAME, true, cache.getOwnerDisplayName()))); // hidden try { String hiddenString = TextUtils.getMatch(tableInside, GCConstants.PATTERN_HIDDEN, true, null); if (StringUtils.isNotBlank(hiddenString)) { cache.setHidden(GCLogin.parseGcCustomDate(hiddenString)); } if (cache.getHiddenDate() == null) { // event date hiddenString = TextUtils.getMatch(tableInside, GCConstants.PATTERN_HIDDENEVENT, true, null); if (StringUtils.isNotBlank(hiddenString)) { cache.setHidden(GCLogin.parseGcCustomDate(hiddenString)); } } } catch (final ParseException e) { // failed to parse cache hidden date Log.w("GCParser.parseCache: Failed to parse cache hidden (event) date"); } // favorite try { cache.setFavoritePoints(Integer .parseInt(TextUtils.getMatch(tableInside, GCConstants.PATTERN_FAVORITECOUNT, true, "0"))); } catch (final NumberFormatException e) { Log.e("Error parsing favorite count", e); } // cache size cache.setSize(CacheSize.getById( TextUtils.getMatch(tableInside, GCConstants.PATTERN_SIZE, true, CacheSize.NOT_CHOSEN.id))); } // cache found cache.setFound(TextUtils.matches(page, GCConstants.PATTERN_FOUND) || TextUtils.matches(page, GCConstants.PATTERN_FOUND_ALTERNATIVE)); // cache found date try { final String foundDateString = TextUtils.getMatch(page, GCConstants.PATTERN_FOUND_DATE, true, null); if (StringUtils.isNotBlank(foundDateString)) { cache.setVisitedDate(GCLogin.parseGcCustomDate(foundDateString).getTime()); } } catch (final ParseException e) { // failed to parse cache found date Log.w("GCParser.parseCache: Failed to parse cache found date"); } // cache type cache.setType(CacheType .getByPattern(TextUtils.getMatch(page, GCConstants.PATTERN_TYPE, true, cache.getType().id))); // on watchlist cache.setOnWatchlist(TextUtils.matches(page, GCConstants.PATTERN_WATCHLIST)); // latitude and longitude. Can only be retrieved if user is logged in String latlon = TextUtils.getMatch(page, GCConstants.PATTERN_LATLON, true, ""); if (StringUtils.isNotEmpty(latlon)) { try { cache.setCoords(new Geopoint(latlon)); cache.setReliableLatLon(true); } catch (final Geopoint.GeopointException e) { Log.w("GCParser.parseCache: Failed to parse cache coordinates", e); } } // cache location cache.setLocation(TextUtils.getMatch(page, GCConstants.PATTERN_LOCATION, true, "")); // cache hint final String result = TextUtils.getMatch(page, GCConstants.PATTERN_HINT, false, null); if (result != null) { // replace linebreak and paragraph tags final String hint = GCConstants.PATTERN_LINEBREAK.matcher(result).replaceAll("\n"); cache.setHint(StringUtils.replace(hint, "</p>", "").trim()); } cache.checkFields(); // cache personal note cache.setPersonalNote(personalNoteWithLineBreaks); // cache short description cache.setShortDescription(TextUtils.getMatch(page, GCConstants.PATTERN_SHORTDESC, true, "")); // cache description cache.setDescription(TextUtils.getMatch(page, GCConstants.PATTERN_DESC, true, "")); // cache attributes try { final String attributesPre = TextUtils.getMatch(page, GCConstants.PATTERN_ATTRIBUTES, true, null); if (null != attributesPre) { final MatcherWrapper matcherAttributesInside = new MatcherWrapper( GCConstants.PATTERN_ATTRIBUTESINSIDE, attributesPre); final ArrayList<String> attributes = new ArrayList<String>(); while (matcherAttributesInside.find()) { if (matcherAttributesInside.groupCount() > 1 && !matcherAttributesInside.group(2).equalsIgnoreCase("blank")) { // by default, use the tooltip of the attribute String attribute = matcherAttributesInside.group(2).toLowerCase(Locale.US); // if the image name can be recognized, use the image name as attribute final String imageName = matcherAttributesInside.group(1).trim(); if (StringUtils.isNotEmpty(imageName)) { final int start = imageName.lastIndexOf('/'); final int end = imageName.lastIndexOf('.'); if (start >= 0 && end >= 0) { attribute = imageName.substring(start + 1, end).replace('-', '_') .toLowerCase(Locale.US); } } attributes.add(attribute); } } cache.setAttributes(attributes); } } catch (final RuntimeException e) { // failed to parse cache attributes Log.w("GCParser.parseCache: Failed to parse cache attributes"); } // cache spoilers try { if (CancellableHandler.isCancelled(handler)) { return null; } CancellableHandler.sendLoadProgressDetail(handler, R.string.cache_dialog_loading_details_status_spoilers); final MatcherWrapper matcherSpoilersInside = new MatcherWrapper(GCConstants.PATTERN_SPOILER_IMAGE, page); while (matcherSpoilersInside.find()) { // the original spoiler URL (include .../display/... contains a low-resolution image // if we shorten the URL we get the original-resolution image final String url = matcherSpoilersInside.group(1).replace("/display", ""); String title = null; if (matcherSpoilersInside.group(3) != null) { title = matcherSpoilersInside.group(3); } String description = null; if (matcherSpoilersInside.group(4) != null) { description = matcherSpoilersInside.group(4); } cache.addSpoiler(new Image(url, title, description)); } } catch (final RuntimeException e) { // failed to parse cache spoilers Log.w("GCParser.parseCache: Failed to parse cache spoilers"); } // cache inventory try { cache.setInventoryItems(0); final MatcherWrapper matcherInventory = new MatcherWrapper(GCConstants.PATTERN_INVENTORY, page); if (matcherInventory.find()) { if (cache.getInventory() == null) { cache.setInventory(new ArrayList<Trackable>()); } if (matcherInventory.groupCount() > 1) { final String inventoryPre = matcherInventory.group(2); if (StringUtils.isNotBlank(inventoryPre)) { final MatcherWrapper matcherInventoryInside = new MatcherWrapper( GCConstants.PATTERN_INVENTORYINSIDE, inventoryPre); while (matcherInventoryInside.find()) { if (matcherInventoryInside.groupCount() > 0) { final Trackable inventoryItem = new Trackable(); inventoryItem.setGuid(matcherInventoryInside.group(1)); inventoryItem.setName(matcherInventoryInside.group(2)); cache.getInventory().add(inventoryItem); cache.setInventoryItems(cache.getInventoryItems() + 1); } } } } } } catch (final RuntimeException e) { // failed to parse cache inventory Log.w("GCParser.parseCache: Failed to parse cache inventory (2)"); } // cache logs counts try { final String countlogs = TextUtils.getMatch(page, GCConstants.PATTERN_COUNTLOGS, true, null); if (null != countlogs) { final MatcherWrapper matcherLog = new MatcherWrapper(GCConstants.PATTERN_COUNTLOG, countlogs); while (matcherLog.find()) { final String typeStr = matcherLog.group(1); final String countStr = getNumberString(matcherLog.group(2)); if (StringUtils.isNotBlank(typeStr) && LogType.UNKNOWN != LogType.getByIconName(typeStr) && StringUtils.isNotBlank(countStr)) { cache.getLogCounts().put(LogType.getByIconName(typeStr), Integer.parseInt(countStr)); } } } } catch (final NumberFormatException e) { // failed to parse logs Log.w("GCParser.parseCache: Failed to parse cache log count"); } // waypoints - reset collection cache.setWaypoints(Collections.<Waypoint>emptyList(), false); // add waypoint for original coordinates in case of user-modified listing-coordinates try { final String originalCoords = TextUtils.getMatch(page, GCConstants.PATTERN_LATLON_ORIG, false, null); if (null != originalCoords) { final Waypoint waypoint = new Waypoint( CgeoApplication.getInstance().getString(R.string.cache_coordinates_original), WaypointType.ORIGINAL, false); waypoint.setCoords(new Geopoint(originalCoords)); cache.addOrChangeWaypoint(waypoint, false); cache.setUserModifiedCoords(true); } } catch (final Geopoint.GeopointException e) { } int wpBegin = page.indexOf("<table class=\"Table\" id=\"ctl00_ContentBody_Waypoints\">"); if (wpBegin != -1) { // parse waypoints if (CancellableHandler.isCancelled(handler)) { return null; } CancellableHandler.sendLoadProgressDetail(handler, R.string.cache_dialog_loading_details_status_waypoints); String wpList = page.substring(wpBegin); int wpEnd = wpList.indexOf("</p>"); if (wpEnd > -1 && wpEnd <= wpList.length()) { wpList = wpList.substring(0, wpEnd); } if (!wpList.contains("No additional waypoints to display.")) { wpEnd = wpList.indexOf("</table>"); wpList = wpList.substring(0, wpEnd); wpBegin = wpList.indexOf("<tbody>"); wpEnd = wpList.indexOf("</tbody>"); if (wpBegin >= 0 && wpEnd >= 0 && wpEnd <= wpList.length()) { wpList = wpList.substring(wpBegin + 7, wpEnd); } final String[] wpItems = wpList.split("<tr"); for (int j = 1; j < wpItems.length; j++) { String[] wp = wpItems[j].split("<td"); // waypoint name // res is null during the unit tests final String name = TextUtils.getMatch(wp[6], GCConstants.PATTERN_WPNAME, true, 1, CgeoApplication.getInstance().getString(R.string.waypoint), true); // waypoint type final String resulttype = TextUtils.getMatch(wp[3], GCConstants.PATTERN_WPTYPE, null); final Waypoint waypoint = new Waypoint(name, WaypointType.findById(resulttype), false); // waypoint prefix waypoint.setPrefix(TextUtils.getMatch(wp[4], GCConstants.PATTERN_WPPREFIXORLOOKUPORLATLON, true, 2, waypoint.getPrefix(), false)); // waypoint lookup waypoint.setLookup(TextUtils.getMatch(wp[5], GCConstants.PATTERN_WPPREFIXORLOOKUPORLATLON, true, 2, waypoint.getLookup(), false)); // waypoint latitude and longitude latlon = Html.fromHtml(TextUtils.getMatch(wp[7], GCConstants.PATTERN_WPPREFIXORLOOKUPORLATLON, false, 2, "", false)).toString().trim(); if (!StringUtils.startsWith(latlon, "???")) { waypoint.setLatlon(latlon); waypoint.setCoords(new Geopoint(latlon)); } j++; if (wpItems.length > j) { wp = wpItems[j].split("<td"); } // waypoint note waypoint.setNote(TextUtils.getMatch(wp[3], GCConstants.PATTERN_WPNOTE, waypoint.getNote())); cache.addOrChangeWaypoint(waypoint, false); } } } cache.parseWaypointsFromNote(); // logs cache.setLogs(getLogsFromDetails(page, false)); // last check for necessary cache conditions if (StringUtils.isBlank(cache.getGeocode())) { searchResult.setError(StatusCode.UNKNOWN_ERROR); return searchResult; } cache.setDetailedUpdatedNow(); searchResult.addAndPutInCache(Collections.singletonList(cache)); return searchResult; }
From source file:com.rockagen.commons.util.CommUtil.java
/** * <p>//w w w . ja v a 2 s. c om * unescape HTML see StringEscapeUtils.unescapeHtml4(str) * </p> * <p> * Supports all known HTML 4.0 entities, including funky accents. Note that * the commonly used apostrophe escape character (&apos;) is not a legal * entity and so is not supported). * </p> * <p> * For example: * </p> * <p> * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code> * </p> * becomes: * <p> * <code>"bread" & "butter"</code>. * </p> * * @param str value * @return string */ public static String unescapeHtml4(String str) { if (isBlank(str)) { return str; } return StringEscapeUtils.unescapeHtml4(str); }
From source file:info.icefilms.icestream.browse.Location.java
protected static String CleanString(String string) { // Remove any html tags string = Pattern.compile("<.*?>", Pattern.DOTALL).matcher(string).replaceAll(""); // Replace any special HTML characters string = StringEscapeUtils.unescapeHtml4(string); return string; }
From source file:de.akra.idocit.java.services.SimpleJavadocParser.java
private String formatText(final String text) { return StringEscapeUtils .unescapeHtml4(text.replaceAll("<br/>", StringUtils.NEW_LINE).replaceAll("<tab/>", "\t")); }
From source file:com.nttec.everychan.api.util.WakabaReader.java
/** * ? ? , HTML-?, ? ? <span class="filesize">. * ? , ?? {@link #currentAttachments} * ?? {link {@link ThreadModel#attachmentsCount}) {@link #currentThread}.<br> * ? ?? /*from w w w .j a v a2s .c om*/ * (? , ?? , , ??, ?, ? ?). */ protected void parseAttachment(String html) { AttachmentModel attachment = new AttachmentModel(); attachment.size = -1; int startHref, endHref; if ((startHref = html.indexOf("href=\"")) != -1 && (endHref = html.indexOf('\"', startHref + 6)) != -1) { attachment.path = html.substring(startHref + 6, endHref); String pathLower = attachment.path.toLowerCase(Locale.US); if (pathLower.endsWith(".jpg") || pathLower.endsWith(".jpeg") || pathLower.endsWith(".png")) attachment.type = AttachmentModel.TYPE_IMAGE_STATIC; else if (pathLower.endsWith(".gif")) attachment.type = AttachmentModel.TYPE_IMAGE_GIF; else if (pathLower.endsWith(".svg") || pathLower.endsWith(".svgz")) attachment.type = AttachmentModel.TYPE_IMAGE_SVG; else if (pathLower.endsWith(".webm") || pathLower.endsWith(".mp4") || pathLower.endsWith(".ogv")) attachment.type = AttachmentModel.TYPE_VIDEO; else if (pathLower.endsWith(".mp3") || pathLower.endsWith(".ogg")) attachment.type = AttachmentModel.TYPE_AUDIO; else if (pathLower.startsWith("http") && (pathLower.contains("youtube."))) attachment.type = AttachmentModel.TYPE_OTHER_NOTFILE; else attachment.type = AttachmentModel.TYPE_OTHER_FILE; } else { return; } Matcher byteSizeMatcher = ATTACHMENT_SIZE_PATTERN.matcher(html); while (byteSizeMatcher.find()) { try { String digits = byteSizeMatcher.group(1).replace(',', '.'); int multiplier = 1; String prefix = byteSizeMatcher.group(2); if (prefix != null) { if (prefix.equalsIgnoreCase("") || prefix.equalsIgnoreCase("k")) multiplier = 1024; else if (prefix.equalsIgnoreCase("") || prefix.equalsIgnoreCase("m")) multiplier = 1024 * 1024; } int value = Math.round(Float.parseFloat(digits) / 1024 * multiplier); attachment.size = value; char nextChar = ' '; int index = byteSizeMatcher.end(); while (index < html.length() && nextChar <= ' ') nextChar = html.charAt(index++); if (nextChar == ',') break; } catch (NumberFormatException e) { } } Matcher pxSizeMatcher = ATTACHMENT_PX_SIZE_PATTERN.matcher(html); int indexEndPxSize = -1; while (pxSizeMatcher.find()) { try { int width = Integer.parseInt(pxSizeMatcher.group(1)); int height = Integer.parseInt(pxSizeMatcher.group(2)); attachment.width = width; attachment.height = height; indexEndPxSize = pxSizeMatcher.end(); char nextChar = ' '; int index = pxSizeMatcher.end(); while (index < html.length() && nextChar <= ' ') nextChar = html.charAt(index++); if (nextChar == ',') break; } catch (NumberFormatException e) { } } if (indexEndPxSize != -1) { Matcher originalNameMatcher = ATTACHMENT_ORIGINAL_NAME_PATTERN.matcher(html); if (originalNameMatcher.find(indexEndPxSize)) { String originalName = originalNameMatcher.group(1).trim(); if (originalName != null && originalName.length() > 0) { attachment.originalName = StringEscapeUtils.unescapeHtml4(originalName); } } } ++currentThread.attachmentsCount; currentAttachments.add(attachment); }