List of usage examples for java.util.regex Pattern MULTILINE
int MULTILINE
To view the source code for java.util.regex Pattern MULTILINE.
Click Source Link
From source file:com.liferay.blade.cli.command.CreateCommandTest.java
private void _lacks(File file, String regex) throws Exception { String content = FileUtil.read(file); Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.DOTALL); Matcher matcher = pattern.matcher(content); Assert.assertFalse(matcher.matches()); }
From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java
private static String extractRegexFromXpath(String original_xpath) { Pattern addedRegex = Pattern.compile("regex\\(.*\\)\\s*$", Pattern.MULTILINE | Pattern.DOTALL); Matcher matcher = addedRegex.matcher(original_xpath); boolean matchFound = matcher.find(); if (matchFound) { try {//ww w .j a v a 2 s . c o m return matcher.group(); } catch (Exception e) { return null; } } return null; }
From source file:org.apache.hadoop.hive.ql.QTestUtil.java
private String getCommand(String tname) { String commands = qMap.get(tname); StringBuilder newCommands = new StringBuilder(commands.length()); int lastMatchEnd = 0; Matcher commentMatcher = Pattern.compile("^--.*$", Pattern.MULTILINE).matcher(commands); // remove the comments while (commentMatcher.find()) { newCommands.append(commands.substring(lastMatchEnd, commentMatcher.start())); lastMatchEnd = commentMatcher.end(); }//from w w w . ja v a 2 s. c o m newCommands.append(commands.substring(lastMatchEnd, commands.length())); commands = newCommands.toString(); return commands; }
From source file:carnero.cgeo.original.libs.Base.java
public CacheWrap parseCache(String page, int reason) { if (page == null || page.length() == 0) { Log.e(Settings.tag, "cgeoBase.parseCache: No page given"); return null; }// w w w . j a va2 s. c o m final Pattern patternGeocode = Pattern.compile( "<meta name=\"og:url\" content=\"[^\"]+/(GC[0-9A-Z]+)\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternCacheId = Pattern.compile("/seek/log\\.aspx\\?ID=(\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternCacheGuid = Pattern.compile( "<link rel=\"alternate\" href=\"[^\"]*/datastore/rss_galleryimages\\.ashx\\?guid=([0-9a-z\\-]+)\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternType = Pattern.compile( "<img src=\"[^\"]*/WptTypes/\\d+\\.gif\" alt=\"([^\"]+)\" (title=\"[^\"]*\" )?width=\"32\" height=\"32\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternName = Pattern.compile( "<h2[^>]*>[^<]*<span id=\"ctl00_ContentBody_CacheName\">([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternSize = Pattern.compile( "<div class=\"CacheSize[^\"]*\">[^<]*<p[^>]*>[^S]*Size[^:]*:[^<]*<span[^>]*>[^<]*<img src=\"[^\"]*/icons/container/[a-z_]+\\.gif\" alt=\"Size: ([^\"]+)\"[^>]*>[^<]*<small>[^<]*</small>[^<]*</span>[^<]*</p>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternDifficulty = Pattern.compile( "<span id=\"ctl00_ContentBody_uxLegendScale\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternTerrain = Pattern.compile( "<span id=\"ctl00_ContentBody_Localize6\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternOwner = Pattern.compile( "<span class=\"minorCacheDetails\">[^\\w]*An?([^\\w]*Event)?[^\\w]*cache[^\\w]*by[^<]*<a href=\"[^\"]+\">([^<]+)</a>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternOwnerReal = Pattern.compile( "<a id=\"ctl00_ContentBody_uxFindLinksHiddenByThisUser\" href=\"[^\"]*/seek/nearest\\.aspx\\?u=*([^\"]+)\">[^<]+</a>", Pattern.CASE_INSENSITIVE); final Pattern patternHidden = Pattern.compile( "<span[^>]*>[^\\w]*Hidden[^:]*:[^\\d]*((\\d+)\\/(\\d+)\\/(\\d+))[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternHiddenEvent = Pattern.compile( "<span[^>]*>[^\\w]*Event[^\\w]*Date[^:]*:[^\\w]*[a-zA-Z]+,[^\\d]*((\\d+)[^\\w]*(\\w+)[^\\d]*(\\d+))[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternFavourite = Pattern.compile( "<a id=\"uxFavContainerLink\"[^>]*>[^<]*<div[^<]*<span class=\"favorite-value\">[^\\d]*([0-9]+)[^\\d^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternFound = Pattern.compile( "<p>[^<]*<a id=\"ctl00_ContentBody_hlFoundItLog\"[^<]*<img src=\".*/images/stockholm/16x16/check\\.gif\"[^>]*>[^<]*</a>[^<]*</p>", Pattern.CASE_INSENSITIVE); final Pattern patternLatLon = Pattern.compile("<span id=\"uxLatLon\"[^>]*>([^<]*)<\\/span>", Pattern.CASE_INSENSITIVE); final Pattern patternLocation = Pattern.compile("<span id=\"ctl00_ContentBody_Location\"[^>]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); final Pattern patternHint = Pattern.compile( "<p>([^<]*<strong>)?[^\\w]*Additional Hints([^<]*<\\/strong>)?[^\\(]*\\(<a[^>]+>Encrypt</a>\\)[^<]*<\\/p>[^<]*<div id=\"div_hint\"[^>]*>(.*)</div>[^<]*<div id=[\\'|\"]dk[\\'|\"]", Pattern.CASE_INSENSITIVE); final Pattern patternDescShort = Pattern.compile( "<div class=\"UserSuppliedContent\">[^<]*<span id=\"ctl00_ContentBody_ShortDescription\"[^>]*>((?:(?!</span>[^\\w^<]*</div>).)*)</span>[^\\w^<]*</div>", Pattern.CASE_INSENSITIVE); final Pattern patternDesc = Pattern.compile( "<div class=\"UserSuppliedContent\">[^<]*<span id=\"ctl00_ContentBody_LongDescription\"[^>]*>((?:(?!</span>[^\\w^<]*</div>).)*)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>[^\\w]*Additional Hints</strong>", Pattern.CASE_INSENSITIVE); final Pattern patternCountLogs = Pattern.compile( "<span id=\"ctl00_ContentBody_lblFindCounts\"><p>(.*)<\\/p><\\/span>", Pattern.CASE_INSENSITIVE); final Pattern patternCountLog = Pattern.compile( " src=\"\\/images\\/icons\\/([^\\.]*).gif\" alt=\"[^\"]*\" title=\"[^\"]*\" />([0-9]*)[^0-9]+", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); // final Pattern patternLogs = Pattern.compile("<table class=\"LogsTable[^\"]*\"[^>]*>((?:(?!</table>).)*)</table>[^<]*<p", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternLogs = Pattern.compile("<table class=\"LogsTable[^\"]*\"[^>]*>(.*)</table>[^<]*<p", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); /* * <tr> * <td class="Nothing"> * <div class="FloatLeft LogDisplayLeft"> * <p class="logOwnerProfileName"> * <strong><a href="/profile/?guid=96c94662-f2c9-450a-99ef-4c034dcde72b" id="184034162">CERV.cz</a></strong> * </p> * <p class="logOwnerBadge"><img src='/images/icons/prem_user.gif' title='Premium Member' /> Premium Member</p> * <p class="logOwnerAvatar"><a href="/profile/?guid=96c94662-f2c9-450a-99ef-4c034dcde72b"><img src="/images/default_avatar.jpg" height='48' width='48' /></a></p> * <p class="logOwnerStats"> * <img src="/images/icons/icon_smile.png" title="Caches Found" /> 567</div> * <div class="FloatLeft LogDisplayRight"> * <div class="HalfLeft LogType"> * <strong><img src="http://www.geocaching.com/images/icons/icon_smile.gif" alt="Found it" title="Found it" /> Found it</strong> * </div> * <div class="HalfRight AlignRight"> * <span class="minorDetails LogDate">09/03/2011</span> * </div> * <div class="Clear LogContent"> * <p class="LogText">13:29 diky za kes!</p> * <div class="AlignRight"> * <small><a href="log.aspx?LUID=8da01276-7881-4ec9-8d23-8938d7f2984e" title="View Log">View Log</a></small> * </div></div></div> * </td> * </tr> */ final Pattern patternLogUser = Pattern.compile( "<p class=\"logOwnerProfileName\">[^<]*<strong>[^<]*<a[^>]*>([^<]+)</a>[^<]*</strong>[^<]*</p>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternLogFounds = Pattern.compile( "<p class=\"logOwnerStats\"><img[^>]*>[^\\d]*(\\d+)[^<]*</div>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternLogIcon = Pattern.compile( "<strong>[^<]*<img src=\"[^\"]*/images/icons/([^\"]+)\\.gif\"[^>]*>[^<]+</strong>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternLogDate = Pattern.compile( "<span class=\"minorDetails LogDate\">([0-9]+/[0-9]+/[0-9]+)[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternLogText = Pattern.compile("<p class=\"LogText\">((?:(?!</p>).)*)</p>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternAttributes = Pattern.compile( "<h3 class=\"WidgetHeader\">[^<]*<img[^>]+>[^\\w]*Attributes[^<]*</h3>[^<]*<div class=\"WidgetBody\">(([^<]*<img src=\"[^\"]+\" alt=\"[^\"]+\"[^>]*>)+)[^<]*<p", Pattern.CASE_INSENSITIVE); final Pattern patternAttributesInside = Pattern.compile("[^<]*<img src=\"([^\"]+)\" alt=\"([^\"]+)\"[^>]*>", Pattern.CASE_INSENSITIVE); final Pattern patternSpoilers = Pattern.compile( "<span id=\"ctl00_ContentBody_Images\">((<a href=\"[^\"]+\"[^>]*>[^<]*<img[^>]+>[^<]*<span>[^>]+</span>[^<]*</a>[^<]*<br[^>]*>([^<]*(<br[^>]*>)+)?)+)[^<]*</span>", Pattern.CASE_INSENSITIVE); final Pattern patternSpoilersInside = Pattern.compile( "[^<]*<a href=\"([^\"]+)\"[^>]*>[^<]*<img[^>]+>[^<]*<span>([^>]+)</span>[^<]*</a>[^<]*<br[^>]*>(([^<]*)(<br[^<]*>)+)?", Pattern.CASE_INSENSITIVE); final Pattern patternInventory = Pattern.compile( "<span id=\"ctl00_ContentBody_uxTravelBugList_uxInventoryLabel\">[^\\w]*Inventory[^<]*</span>[^<]*</h3>[^<]*<div class=\"WidgetBody\">([^<]*<ul>(([^<]*<li>[^<]*<a href=\"[^\"]+\"[^>]*>[^<]*<img src=\"[^\"]+\"[^>]*>[^<]*<span>[^<]+<\\/span>[^<]*<\\/a>[^<]*<\\/li>)+)[^<]*<\\/ul>)?", Pattern.CASE_INSENSITIVE); final Pattern patternInventoryInside = Pattern.compile( "[^<]*<li>[^<]*<a href=\"[a-z0-9\\-\\_\\.\\?\\/\\:\\@]*\\/track\\/details\\.aspx\\?guid=([0-9a-z\\-]+)[^\"]*\"[^>]*>[^<]*<img src=\"[^\"]+\"[^>]*>[^<]*<span>([^<]+)<\\/span>[^<]*<\\/a>[^<]*<\\/li>", Pattern.CASE_INSENSITIVE); final CacheWrap caches = new CacheWrap(); final Cache cache = new Cache(); if (page.indexOf("Cache is Unpublished") > -1) { caches.error = "cache was unpublished"; return caches; } if (page.indexOf("Sorry, the owner of this listing has made it viewable to Premium Members only.") != -1) { caches.error = "requested cache is for premium members only"; return caches; } if (page.indexOf("has chosen to make this cache listing visible to Premium Members only.") != -1) { caches.error = "requested cache is for premium members only"; return caches; } if (page.indexOf("<li>This cache is temporarily unavailable.") != -1) { cache.disabled = true; } else { cache.disabled = false; } if (page.indexOf("<li>This cache has been archived,") != -1) { cache.archived = true; } else { cache.archived = false; } if (page.indexOf("<p class=\"Warning\">This is a Premium Member Only cache.</p>") != -1) { cache.members = true; } else { cache.members = false; } cache.reason = reason; // cache geocode try { final Matcher matcherGeocode = patternGeocode.matcher(page); while (matcherGeocode.find()) { if (matcherGeocode.groupCount() > 0) { cache.geocode = (String) matcherGeocode.group(1); } } } catch (Exception e) { // failed to parse cache geocode Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache geocode"); } // cache id try { final Matcher matcherCacheId = patternCacheId.matcher(page); while (matcherCacheId.find()) { if (matcherCacheId.groupCount() > 0) { cache.cacheid = (String) matcherCacheId.group(1); } } } catch (Exception e) { // failed to parse cache id Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache id"); } // cache guid try { final Matcher matcherCacheGuid = patternCacheGuid.matcher(page); while (matcherCacheGuid.find()) { if (matcherCacheGuid.groupCount() > 0) { cache.guid = (String) matcherCacheGuid.group(1); } } } catch (Exception e) { // failed to parse cache guid Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache guid"); } // name try { final Matcher matcherName = patternName.matcher(page); while (matcherName.find()) { if (matcherName.groupCount() > 0) { cache.name = Html.fromHtml(matcherName.group(1)).toString(); } } } catch (Exception e) { // failed to parse cache name Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache name"); } // owner real name try { final Matcher matcherOwnerReal = patternOwnerReal.matcher(page); while (matcherOwnerReal.find()) { if (matcherOwnerReal.groupCount() > 0) { cache.ownerReal = URLDecoder.decode(matcherOwnerReal.group(1)); } } } catch (Exception e) { // failed to parse owner real name Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache owner real name"); } final String username = settings.getUsername(); if (cache.ownerReal != null && username != null && cache.ownerReal.equalsIgnoreCase(username)) { cache.own = true; } int pos = -1; String tableInside = page; pos = tableInside.indexOf("id=\"cacheDetails\""); if (pos == -1) { Log.e(Settings.tag, "cgeoBase.parseCache: ID \"cacheDetails\" not found on page"); return null; } tableInside = tableInside.substring(pos); pos = tableInside.indexOf("<div class=\"CacheInformationTable\""); if (pos == -1) { Log.e(Settings.tag, "cgeoBase.parseCache: ID \"CacheInformationTable\" not found on page"); return null; } tableInside = tableInside.substring(0, pos); if (tableInside != null && tableInside.length() > 0) { // cache terrain try { final Matcher matcherTerrain = patternTerrain.matcher(tableInside); while (matcherTerrain.find()) { if (matcherTerrain.groupCount() > 0) { cache.terrain = new Float( Pattern.compile("_").matcher(matcherTerrain.group(1)).replaceAll(".")); } } } catch (Exception e) { // failed to parse terrain Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache terrain"); } // cache difficulty try { final Matcher matcherDifficulty = patternDifficulty.matcher(tableInside); while (matcherDifficulty.find()) { if (matcherDifficulty.groupCount() > 0) { cache.difficulty = new Float( Pattern.compile("_").matcher(matcherDifficulty.group(1)).replaceAll(".")); } } } catch (Exception e) { // failed to parse difficulty Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache difficulty"); } // owner try { final Matcher matcherOwner = patternOwner.matcher(tableInside); while (matcherOwner.find()) { if (matcherOwner.groupCount() > 0) { cache.owner = Html.fromHtml(matcherOwner.group(2)).toString(); } } } catch (Exception e) { // failed to parse owner Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache owner"); } // hidden try { final Matcher matcherHidden = patternHidden.matcher(tableInside); while (matcherHidden.find()) { if (matcherHidden.groupCount() > 0) { cache.hidden = dateIn.parse(matcherHidden.group(1)); } } } catch (Exception e) { // failed to parse cache hidden date Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache hidden date"); } if (cache.hidden == null) { // event date try { final Matcher matcherHiddenEvent = patternHiddenEvent.matcher(tableInside); while (matcherHiddenEvent.find()) { if (matcherHiddenEvent.groupCount() > 0) { cache.hidden = dateEvIn.parse(matcherHiddenEvent.group(1)); } } } catch (Exception e) { // failed to parse cache event date Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache event date"); } } // favourite try { final Matcher matcherFavourite = patternFavourite.matcher(tableInside); while (matcherFavourite.find()) { if (matcherFavourite.groupCount() > 0) { cache.favouriteCnt = Integer.parseInt(matcherFavourite.group(1)); } } } catch (Exception e) { // failed to parse favourite count Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse favourite count"); } // cache size try { final Matcher matcherSize = patternSize.matcher(tableInside); while (matcherSize.find()) { if (matcherSize.groupCount() > 0) { cache.size = matcherSize.group(1).toLowerCase(); } } } catch (Exception e) { // failed to parse size Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache size"); } } // cache found try { final Matcher matcherFound = patternFound.matcher(page); while (matcherFound.find()) { if (matcherFound.group() != null && matcherFound.group().length() > 0) { cache.found = true; } } } catch (Exception e) { // failed to parse found Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse found"); } // cache type try { final Matcher matcherType = patternType.matcher(page); while (matcherType.find()) { if (matcherType.groupCount() > 0) { cache.type = cacheTypes.get(matcherType.group(1).toLowerCase()); } } } catch (Exception e) { // failed to parse type Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache type"); } // latitude and logitude try { final Matcher matcherLatLon = patternLatLon.matcher(page); while (matcherLatLon.find()) { if (matcherLatLon.groupCount() > 0) { cache.latlon = matcherLatLon.group(1); HashMap<String, Object> tmp = this.parseLatlon(cache.latlon); if (tmp.size() > 0) { cache.latitude = (Double) tmp.get("latitude"); cache.longitude = (Double) tmp.get("longitude"); cache.latitudeString = (String) tmp.get("latitudeString"); cache.longitudeString = (String) tmp.get("longitudeString"); cache.reliableLatLon = true; } tmp = null; } } } catch (Exception e) { // failed to parse latitude and/or longitude Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache coordinates"); } // cache location try { final Matcher matcherLocation = patternLocation.matcher(page); while (matcherLocation.find()) { if (matcherLocation.groupCount() > 0) { cache.location = matcherLocation.group(1); } } } catch (Exception e) { // failed to parse location Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache location"); } // cache hint try { final Matcher matcherHint = patternHint.matcher(page); while (matcherHint.find()) { if (matcherHint.groupCount() > 2 && matcherHint.group(3) != null) { // replace linebreak and paragraph tags String hint = Pattern.compile("<(br|p)[^>]*>").matcher(matcherHint.group(3)).replaceAll("\n"); if (hint != null) { cache.hint = hint.replaceAll(Pattern.quote("</p>"), "").trim(); } } } } catch (Exception e) { // failed to parse hint Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache hint"); } // cache short description try { final Matcher matcherDescShort = patternDescShort.matcher(page); while (matcherDescShort.find()) { if (matcherDescShort.groupCount() > 0) { cache.shortdesc = matcherDescShort.group(1).trim(); } } } catch (Exception e) { // failed to parse short description Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache short description"); } // cache description try { final Matcher matcherDesc = patternDesc.matcher(page); while (matcherDesc.find()) { if (matcherDesc.groupCount() > 0) { cache.description = matcherDesc.group(1); } } } catch (Exception e) { // failed to parse short description Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache description"); } // cache attributes try { final Matcher matcherAttributes = patternAttributes.matcher(page); while (matcherAttributes.find()) { if (matcherAttributes.groupCount() > 0) { final String attributesPre = matcherAttributes.group(1); final Matcher matcherAttributesInside = patternAttributesInside.matcher(attributesPre); while (matcherAttributesInside.find()) { if (matcherAttributesInside.groupCount() > 1 && matcherAttributesInside.group(2).equalsIgnoreCase("blank") != true) { if (cache.attributes == null) { cache.attributes = new ArrayList<String>(); } // by default, use the tooltip of the attribute String attribute = matcherAttributesInside.group(2).toLowerCase(); // if the image name can be recognized, use the image name as attribute String imageName = matcherAttributesInside.group(1).trim(); if (imageName.length() > 0) { int start = imageName.lastIndexOf('/'); int end = imageName.lastIndexOf('.'); if (start >= 0 && end >= 0) { attribute = imageName.substring(start + 1, end).replace('-', '_'); } } cache.attributes.add(attribute); } } } } } catch (Exception e) { // failed to parse cache attributes Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache attributes"); } // cache spoilers try { final Matcher matcherSpoilers = patternSpoilers.matcher(page); while (matcherSpoilers.find()) { if (matcherSpoilers.groupCount() > 0) { final String spoilersPre = matcherSpoilers.group(1); final Matcher matcherSpoilersInside = patternSpoilersInside.matcher(spoilersPre); while (matcherSpoilersInside.find()) { if (matcherSpoilersInside.groupCount() > 0) { final Spoiler spoiler = new Spoiler(); spoiler.url = matcherSpoilersInside.group(1); if (matcherSpoilersInside.group(2) != null) { spoiler.title = matcherSpoilersInside.group(2); } if (matcherSpoilersInside.group(4) != null) { spoiler.description = matcherSpoilersInside.group(4); } if (cache.spoilers == null) { cache.spoilers = new ArrayList<Spoiler>(); } cache.spoilers.add(spoiler); } } } } } catch (Exception e) { // failed to parse cache spoilers Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache spoilers"); } // cache inventory try { cache.inventoryItems = 0; final Matcher matcherInventory = patternInventory.matcher(page); while (matcherInventory.find()) { if (cache.inventory == null) { cache.inventory = new ArrayList<Trackable>(); } if (matcherInventory.groupCount() > 1) { final String inventoryPre = matcherInventory.group(2); if (inventoryPre != null && inventoryPre.length() > 0) { final Matcher matcherInventoryInside = patternInventoryInside.matcher(inventoryPre); while (matcherInventoryInside.find()) { if (matcherInventoryInside.groupCount() > 0) { final Trackable inventoryItem = new Trackable(); inventoryItem.guid = matcherInventoryInside.group(1); inventoryItem.name = matcherInventoryInside.group(2); cache.inventory.add(inventoryItem); cache.inventoryItems++; } } } } } } catch (Exception e) { // failed to parse cache inventory Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache inventory (2)"); } // cache logs counts try { final Matcher matcherLogCounts = patternCountLogs.matcher(page); while (matcherLogCounts.find()) { if (matcherLogCounts.groupCount() > 0) { final String[] logs = matcherLogCounts.group(1).split("<img"); final int logsCnt = logs.length; for (int k = 1; k < logsCnt; k++) { Integer type = null; Integer count = null; final Matcher matcherLog = patternCountLog.matcher(logs[k]); if (matcherLog.find()) { String typeStr = matcherLog.group(1); String countStr = matcherLog.group(2); if (typeStr != null && typeStr.length() > 0) { if (logTypes.containsKey(typeStr.toLowerCase()) == true) { type = logTypes.get(typeStr.toLowerCase()); } } if (countStr != null && countStr.length() > 0) { count = Integer.parseInt(countStr); } if (type != null && count != null) { cache.logCounts.put(type, count); } } } } } } catch (Exception e) { // failed to parse logs Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache log count"); } // cache logs try { final Matcher matcherLogs = patternLogs.matcher(page); while (matcherLogs.find()) { Log.d(">>>>", "cnt: " + matcherLogs.groupCount()); if (matcherLogs.groupCount() > 0) { final String[] logs = matcherLogs.group(1).split("</tr><tr>"); final int logsCnt = logs.length; for (int k = 0; k < logsCnt; k++) { final CacheLog logDone = new CacheLog(); Matcher matcher; matcher = patternLogUser.matcher(logs[k]); if (matcher.find() && matcher.groupCount() > 0) { logDone.author = matcher.group(1).trim(); logDone.author = Html.fromHtml(logDone.author).toString(); } matcher = patternLogFounds.matcher(logs[k]); if (matcher.find() && matcher.groupCount() > 0) { try { logDone.found = Integer.parseInt(matcher.group(1).trim()); } catch (Exception e) { // NaN } } matcher = patternLogIcon.matcher(logs[k]); if (matcher.find() && matcher.groupCount() > 0) { if (logTypes.containsKey(matcher.group(1).toLowerCase()) == true) { logDone.type = logTypes.get(matcher.group(1).toLowerCase()); } else { logDone.type = logTypes.get("icon_note"); } } matcher = patternLogDate.matcher(logs[k]); if (matcher.find() && matcher.groupCount() > 0) { Date logDate = dateLogIn.parse(matcher.group(1)); if (logDate != null) { logDone.date = logDate.getTime(); } } matcher = patternLogText.matcher(logs[k]); if (matcher.find() && matcher.groupCount() > 0) { logDone.log = matcher.group(1).trim(); } if (cache.logs == null) { cache.logs = new ArrayList<CacheLog>(); } cache.logs.add(logDone); } } } } catch (Exception e) { // failed to parse logs Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse cache logs"); } int wpBegin = 0; int wpEnd = 0; wpBegin = page.indexOf("<table class=\"Table\" id=\"ctl00_ContentBody_Waypoints\">"); if (wpBegin != -1) { // parse waypoints final Pattern patternWpType = Pattern.compile("\\/wpttypes\\/sm\\/(.+)\\.jpg", Pattern.CASE_INSENSITIVE); final Pattern patternWpPrefixOrLookupOrLatlon = Pattern .compile(">([^<]*<[^>]+>)?([^<]+)(<[^>]+>[^<]*)?<\\/td>", Pattern.CASE_INSENSITIVE); final Pattern patternWpName = Pattern.compile(">[^<]*<a[^>]+>([^<]*)<\\/a>", Pattern.CASE_INSENSITIVE); final Pattern patternWpNote = Pattern.compile("colspan=\"6\">(.*)<\\/td>", Pattern.CASE_INSENSITIVE); String wpList = page.substring(wpBegin); wpEnd = wpList.indexOf("</p>"); if (wpEnd > -1 && wpEnd <= wpList.length()) { wpList = wpList.substring(0, wpEnd); } if (wpList.indexOf("No additional waypoints to display.") == -1) { wpEnd = wpList.indexOf("</table>"); wpList = wpList.substring(0, wpEnd); wpBegin = wpList.indexOf("<tbody>"); wpEnd = wpList.indexOf("</tbody>"); if (wpBegin >= 0 && wpEnd >= 0 && wpEnd <= wpList.length()) { wpList = wpList.substring(wpBegin + 7, wpEnd); } final String[] wpItems = wpList.split("<tr"); String[] wp; for (int j = 1; j < wpItems.length; j++) { final Waypoint waypoint = new Waypoint(); wp = wpItems[j].split("<td"); // waypoint type try { final Matcher matcherWpType = patternWpType.matcher(wp[3]); while (matcherWpType.find()) { if (matcherWpType.groupCount() > 0) { waypoint.type = matcherWpType.group(1); if (waypoint.type != null) { waypoint.type = waypoint.type.trim(); } } } } catch (Exception e) { // failed to parse type Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse waypoint type"); } // waypoint prefix try { final Matcher matcherWpPrefix = patternWpPrefixOrLookupOrLatlon.matcher(wp[4]); while (matcherWpPrefix.find()) { if (matcherWpPrefix.groupCount() > 1) { waypoint.prefix = matcherWpPrefix.group(2); if (waypoint.prefix != null) { waypoint.prefix = waypoint.prefix.trim(); } } } } catch (Exception e) { // failed to parse prefix Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse waypoint prefix"); } // waypoint lookup try { final Matcher matcherWpLookup = patternWpPrefixOrLookupOrLatlon.matcher(wp[5]); while (matcherWpLookup.find()) { if (matcherWpLookup.groupCount() > 1) { waypoint.lookup = matcherWpLookup.group(2); if (waypoint.lookup != null) { waypoint.lookup = waypoint.lookup.trim(); } } } } catch (Exception e) { // failed to parse lookup Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse waypoint lookup"); } // waypoint name try { final Matcher matcherWpName = patternWpName.matcher(wp[6]); while (matcherWpName.find()) { if (matcherWpName.groupCount() > 0) { waypoint.name = matcherWpName.group(1); if (waypoint.name != null) { waypoint.name = waypoint.name.trim(); } } } } catch (Exception e) { // failed to parse name Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse waypoint name"); } // waypoint latitude and logitude try { final Matcher matcherWpLatLon = patternWpPrefixOrLookupOrLatlon.matcher(wp[7]); while (matcherWpLatLon.find()) { if (matcherWpLatLon.groupCount() > 1) { waypoint.latlon = Html.fromHtml(matcherWpLatLon.group(2)).toString(); final HashMap<String, Object> tmp = this.parseLatlon(waypoint.latlon); if (tmp.size() > 0) { waypoint.latitude = (Double) tmp.get("latitude"); waypoint.longitude = (Double) tmp.get("longitude"); waypoint.latitudeString = (String) tmp.get("latitudeString"); waypoint.longitudeString = (String) tmp.get("longitudeString"); } } } } catch (Exception e) { // failed to parse latitude and/or longitude Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse waypoint coordinates"); } j++; if (wpItems.length > j) { wp = wpItems[j].split("<td"); } // waypoint note try { final Matcher matcherWpNote = patternWpNote.matcher(wp[3]); while (matcherWpNote.find()) { if (matcherWpNote.groupCount() > 0) { waypoint.note = matcherWpNote.group(1); if (waypoint.note != null) { waypoint.note = waypoint.note.trim(); } } } } catch (Exception e) { // failed to parse note Log.w(Settings.tag, "cgeoBase.parseCache: Failed to parse waypoint note"); } if (cache.waypoints == null) cache.waypoints = new ArrayList<Waypoint>(); cache.waypoints.add(waypoint); } } } if (cache.latitude != null && cache.longitude != null) { cache.elevation = getElevation(cache.latitude, cache.longitude); } cache.updated = System.currentTimeMillis(); cache.detailedUpdate = System.currentTimeMillis(); cache.detailed = true; caches.cacheList.add(cache); return caches; }
From source file:carnero.cgeo.cgBase.java
public cgCacheWrap parseCache(String page, int reason) { if (page == null || page.length() == 0) { Log.e(cgSettings.tag, "cgeoBase.parseCache: No page given"); return null; }//from w w w . jav a 2s . c o m final Pattern patternGeocode = Pattern.compile( "<meta name=\"og:url\" content=\"[^\"]+/(GC[0-9A-Z]+)\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternCacheId = Pattern.compile("/seek/log\\.aspx\\?ID=(\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternCacheGuid = Pattern.compile( "<link rel=\"alternate\" href=\"[^\"]*/datastore/rss_galleryimages\\.ashx\\?guid=([0-9a-z\\-]+)\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternType = Pattern.compile( "<img src=\"[^\"]*/WptTypes/\\d+\\.gif\" alt=\"([^\"]+)\" (title=\"[^\"]*\" )?width=\"32\" height=\"32\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternName = Pattern.compile( "<h2[^>]*>[^<]*<span id=\"ctl00_ContentBody_CacheName\">([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternSize = Pattern.compile( "<div class=\"CacheSize[^\"]*\">[^<]*<p[^>]*>[^S]*Size[^:]*:[^<]*<span[^>]*>[^<]*<img src=\"[^\"]*/icons/container/[a-z_]+\\.gif\" alt=\"Size: ([^\"]+)\"[^>]*>[^<]*<small>[^<]*</small>[^<]*</span>[^<]*</p>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternDifficulty = Pattern.compile( "<span id=\"ctl00_ContentBody_uxLegendScale\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternTerrain = Pattern.compile( "<span id=\"ctl00_ContentBody_Localize6\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternOwner = Pattern.compile( "<span class=\"minorCacheDetails\">[^\\w]*An?([^\\w]*Event)?[^\\w]*cache[^\\w]*by[^<]*<a href=\"[^\"]+\">([^<]+)</a>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternOwnerReal = Pattern.compile( "<a id=\"ctl00_ContentBody_uxFindLinksHiddenByThisUser\" href=\"[^\"]*/seek/nearest\\.aspx\\?u=*([^\"]+)\">[^<]+</a>", Pattern.CASE_INSENSITIVE); final Pattern patternHidden = Pattern.compile( "<span[^>]*>[^\\w]*Hidden[^:]*:[^\\d]*((\\d+)\\/(\\d+)\\/(\\d+))[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternHiddenEvent = Pattern.compile( "<span[^>]*>[^\\w]*Event[^\\w]*Date[^:]*:[^\\w]*[a-zA-Z]+,[^\\d]*((\\d+)[^\\w]*(\\w+)[^\\d]*(\\d+))[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternFavourite = Pattern.compile( "<a id=\"uxFavContainerLink\"[^>]*>[^<]*<div[^<]*<span class=\"favorite-value\">[^\\d]*([0-9]+)[^\\d^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternFound = Pattern.compile( "<p>[^<]*<a id=\"ctl00_ContentBody_hlFoundItLog\"[^<]*<img src=\".*/images/stockholm/16x16/check\\.gif\"[^>]*>[^<]*</a>[^<]*</p>", Pattern.CASE_INSENSITIVE); final Pattern patternLatLon = Pattern.compile( "<span id=\"ctl00_ContentBody_LatLon\"[^>]*>(<b>)?([^<]*)(<\\/b>)?<\\/span>", Pattern.CASE_INSENSITIVE); final Pattern patternLocation = Pattern.compile("<span id=\"ctl00_ContentBody_Location\"[^>]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); final Pattern patternHint = Pattern.compile( "<p>([^<]*<strong>)?[^\\w]*Additional Hints([^<]*<\\/strong>)?[^\\(]*\\(<a[^>]+>Encrypt</a>\\)[^<]*<\\/p>[^<]*<div id=\"div_hint\"[^>]*>(.*)</div>[^<]*<div id=[\\'|\"]dk[\\'|\"]", Pattern.CASE_INSENSITIVE); final Pattern patternDescShort = Pattern.compile( "<div class=\"UserSuppliedContent\">[^<]*<span id=\"ctl00_ContentBody_ShortDescription\"[^>]*>((?:(?!</span>[^\\w^<]*</div>).)*)</span>[^\\w^<]*</div>", Pattern.CASE_INSENSITIVE); final Pattern patternDesc = Pattern.compile( "<div class=\"UserSuppliedContent\">[^<]*<span id=\"ctl00_ContentBody_LongDescription\"[^>]*>((?:(?!</span>[^\\w^<]*</div>).)*)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>[^\\w]*Additional Hints</strong>", Pattern.CASE_INSENSITIVE); final Pattern patternCountLogs = Pattern.compile( "<span id=\"ctl00_ContentBody_lblFindCounts\"><p>(.*)<\\/p><\\/span>", Pattern.CASE_INSENSITIVE); final Pattern patternCountLog = Pattern.compile( " src=\"\\/images\\/icons\\/([^\\.]*).gif\" alt=\"[^\"]*\" title=\"[^\"]*\" />([0-9]*)[^0-9]+", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternLogs = Pattern.compile( "<table class=\"LogsTable[^\"]*\"[^>]*>[^<]*<tr>(.*)</tr>[^<]*</table>[^<]*<p", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternLog = Pattern.compile( "<td[^>]*>[^<]*<strong>[^<]*<img src=\"[^\"]*/images/icons/([^\\.]+)\\.[a-z]{2,5}\"[^>]*> ([a-zA-Z]+) (\\d+)(, (\\d+))? by <a href=[^>]+>([^<]+)</a>[<^]*</strong>([^\\(]*\\((\\d+) found\\))?(<br[^>]*>)+((?:(?!<small>).)*)(<br[^>]*>)+<small>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternAttributes = Pattern.compile( "<h3 class=\"WidgetHeader\">[^<]*<img[^>]+>[^\\w]*Attributes[^<]*</h3>[^<]*<div class=\"WidgetBody\">(([^<]*<img src=\"[^\"]+\" alt=\"[^\"]+\"[^>]*>)+)[^<]*<p", Pattern.CASE_INSENSITIVE); final Pattern patternAttributesInside = Pattern.compile("[^<]*<img src=\"([^\"]+)\" alt=\"([^\"]+)\"[^>]*>", Pattern.CASE_INSENSITIVE); final Pattern patternSpoilers = Pattern.compile( "<span id=\"ctl00_ContentBody_Images\">((<a href=\"[^\"]+\"[^>]*>[^<]*<img[^>]+>[^<]*<span>[^>]+</span>[^<]*</a>[^<]*<br[^>]*>([^<]*(<br[^>]*>)+)?)+)[^<]*</span>", Pattern.CASE_INSENSITIVE); final Pattern patternSpoilersInside = Pattern.compile( "[^<]*<a href=\"([^\"]+)\"[^>]*>[^<]*<img[^>]+>[^<]*<span>([^>]+)</span>[^<]*</a>[^<]*<br[^>]*>(([^<]*)(<br[^<]*>)+)?", Pattern.CASE_INSENSITIVE); final Pattern patternInventory = Pattern.compile( "<span id=\"ctl00_ContentBody_uxTravelBugList_uxInventoryLabel\">[^\\w]*Inventory[^<]*</span>[^<]*</h3>[^<]*<div class=\"WidgetBody\">([^<]*<ul>(([^<]*<li>[^<]*<a href=\"[^\"]+\"[^>]*>[^<]*<img src=\"[^\"]+\"[^>]*>[^<]*<span>[^<]+<\\/span>[^<]*<\\/a>[^<]*<\\/li>)+)[^<]*<\\/ul>)?", Pattern.CASE_INSENSITIVE); final Pattern patternInventoryInside = Pattern.compile( "[^<]*<li>[^<]*<a href=\"[a-z0-9\\-\\_\\.\\?\\/\\:\\@]*\\/track\\/details\\.aspx\\?guid=([0-9a-z\\-]+)[^\"]*\"[^>]*>[^<]*<img src=\"[^\"]+\"[^>]*>[^<]*<span>([^<]+)<\\/span>[^<]*<\\/a>[^<]*<\\/li>", Pattern.CASE_INSENSITIVE); final cgCacheWrap caches = new cgCacheWrap(); final cgCache cache = new cgCache(); if (page.indexOf("Cache is Unpublished") > -1) { caches.error = "cache was unpublished"; return caches; } if (page.indexOf("Sorry, the owner of this listing has made it viewable to Premium Members only.") != -1) { caches.error = "requested cache is for premium members only"; return caches; } if (page.indexOf("has chosen to make this cache listing visible to Premium Members only.") != -1) { caches.error = "requested cache is for premium members only"; return caches; } if (page.indexOf("<li>This cache is temporarily unavailable.") != -1) { cache.disabled = true; } else { cache.disabled = false; } if (page.indexOf("<li>This cache has been archived,") != -1) { cache.archived = true; } else { cache.archived = false; } if (page.indexOf("<p class=\"Warning\">This is a Premium Member Only cache.</p>") != -1) { cache.members = true; } else { cache.members = false; } cache.reason = reason; // cache geocode try { final Matcher matcherGeocode = patternGeocode.matcher(page); while (matcherGeocode.find()) { if (matcherGeocode.groupCount() > 0) { cache.geocode = (String) matcherGeocode.group(1); } } } catch (Exception e) { // failed to parse cache geocode Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache geocode"); } // cache id try { final Matcher matcherCacheId = patternCacheId.matcher(page); while (matcherCacheId.find()) { if (matcherCacheId.groupCount() > 0) { cache.cacheid = (String) matcherCacheId.group(1); } } } catch (Exception e) { // failed to parse cache id Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache id"); } // cache guid try { final Matcher matcherCacheGuid = patternCacheGuid.matcher(page); while (matcherCacheGuid.find()) { if (matcherCacheGuid.groupCount() > 0) { cache.guid = (String) matcherCacheGuid.group(1); } } } catch (Exception e) { // failed to parse cache guid Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache guid"); } // name try { final Matcher matcherName = patternName.matcher(page); while (matcherName.find()) { if (matcherName.groupCount() > 0) { cache.name = Html.fromHtml(matcherName.group(1)).toString(); } } } catch (Exception e) { // failed to parse cache name Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache name"); } // owner real name try { final Matcher matcherOwnerReal = patternOwnerReal.matcher(page); while (matcherOwnerReal.find()) { if (matcherOwnerReal.groupCount() > 0) { cache.ownerReal = URLDecoder.decode(matcherOwnerReal.group(1)); } } } catch (Exception e) { // failed to parse owner real name Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache owner real name"); } final String username = settings.getUsername(); if (cache.ownerReal != null && username != null && cache.ownerReal.equalsIgnoreCase(username)) { cache.own = true; } int pos = -1; String tableInside = page; pos = tableInside.indexOf("id=\"cacheDetails\""); if (pos == -1) { Log.e(cgSettings.tag, "cgeoBase.parseCache: ID \"cacheDetails\" not found on page"); return null; } tableInside = tableInside.substring(pos); pos = tableInside.indexOf("<div class=\"CacheInformationTable\""); if (pos == -1) { Log.e(cgSettings.tag, "cgeoBase.parseCache: ID \"CacheInformationTable\" not found on page"); return null; } tableInside = tableInside.substring(0, pos); if (tableInside != null && tableInside.length() > 0) { // cache terrain try { final Matcher matcherTerrain = patternTerrain.matcher(tableInside); while (matcherTerrain.find()) { if (matcherTerrain.groupCount() > 0) { cache.terrain = new Float( Pattern.compile("_").matcher(matcherTerrain.group(1)).replaceAll(".")); } } } catch (Exception e) { // failed to parse terrain Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache terrain"); } // cache difficulty try { final Matcher matcherDifficulty = patternDifficulty.matcher(tableInside); while (matcherDifficulty.find()) { if (matcherDifficulty.groupCount() > 0) { cache.difficulty = new Float( Pattern.compile("_").matcher(matcherDifficulty.group(1)).replaceAll(".")); } } } catch (Exception e) { // failed to parse difficulty Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache difficulty"); } // owner try { final Matcher matcherOwner = patternOwner.matcher(tableInside); while (matcherOwner.find()) { if (matcherOwner.groupCount() > 0) { cache.owner = Html.fromHtml(matcherOwner.group(2)).toString(); } } } catch (Exception e) { // failed to parse owner Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache owner"); } // hidden try { final Matcher matcherHidden = patternHidden.matcher(tableInside); while (matcherHidden.find()) { if (matcherHidden.groupCount() > 0) { cache.hidden = dateIn.parse(matcherHidden.group(1)); } } } catch (Exception e) { // failed to parse cache hidden date Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache hidden date"); } if (cache.hidden == null) { // event date try { final Matcher matcherHiddenEvent = patternHiddenEvent.matcher(tableInside); while (matcherHiddenEvent.find()) { if (matcherHiddenEvent.groupCount() > 0) { cache.hidden = dateEvIn.parse(matcherHiddenEvent.group(1)); } } } catch (Exception e) { // failed to parse cache event date Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache event date"); } } // favourite try { final Matcher matcherFavourite = patternFavourite.matcher(tableInside); while (matcherFavourite.find()) { if (matcherFavourite.groupCount() > 0) { cache.favouriteCnt = Integer.parseInt(matcherFavourite.group(1)); } } } catch (Exception e) { // failed to parse favourite count Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse favourite count"); } // cache size try { final Matcher matcherSize = patternSize.matcher(tableInside); while (matcherSize.find()) { if (matcherSize.groupCount() > 0) { cache.size = matcherSize.group(1).toLowerCase(); } } } catch (Exception e) { // failed to parse size Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache size"); } } // cache found try { final Matcher matcherFound = patternFound.matcher(page); while (matcherFound.find()) { if (matcherFound.group() != null && matcherFound.group().length() > 0) { cache.found = true; } } } catch (Exception e) { // failed to parse found Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse found"); } // cache type try { final Matcher matcherType = patternType.matcher(page); while (matcherType.find()) { if (matcherType.groupCount() > 0) { cache.type = cacheTypes.get(matcherType.group(1).toLowerCase()); } } } catch (Exception e) { // failed to parse type Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache type"); } // latitude and logitude try { final Matcher matcherLatLon = patternLatLon.matcher(page); while (matcherLatLon.find()) { if (matcherLatLon.groupCount() > 0) { cache.latlon = matcherLatLon.group(2); // first is <b> HashMap<String, Object> tmp = this.parseLatlon(cache.latlon); if (tmp.size() > 0) { cache.latitude = (Double) tmp.get("latitude"); cache.longitude = (Double) tmp.get("longitude"); cache.latitudeString = (String) tmp.get("latitudeString"); cache.longitudeString = (String) tmp.get("longitudeString"); cache.reliableLatLon = true; } tmp = null; } } } catch (Exception e) { // failed to parse latitude and/or longitude Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache coordinates"); } // cache location try { final Matcher matcherLocation = patternLocation.matcher(page); while (matcherLocation.find()) { if (matcherLocation.groupCount() > 0) { cache.location = matcherLocation.group(1); } } } catch (Exception e) { // failed to parse location Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache location"); } // cache hint try { final Matcher matcherHint = patternHint.matcher(page); while (matcherHint.find()) { if (matcherHint.groupCount() > 2 && matcherHint.group(3) != null) { // replace linebreak and paragraph tags String hint = Pattern.compile("<(br|p)[^>]*>").matcher(matcherHint.group(3)).replaceAll("\n"); if (hint != null) { cache.hint = hint.replaceAll(Pattern.quote("</p>"), "").trim(); } } } } catch (Exception e) { // failed to parse hint Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache hint"); } /* // short info debug Log.d(cgSettings.tag, "gc-code: " + cache.geocode); Log.d(cgSettings.tag, "id: " + cache.cacheid); Log.d(cgSettings.tag, "guid: " + cache.guid); Log.d(cgSettings.tag, "name: " + cache.name); Log.d(cgSettings.tag, "terrain: " + cache.terrain); Log.d(cgSettings.tag, "difficulty: " + cache.difficulty); Log.d(cgSettings.tag, "owner: " + cache.owner); Log.d(cgSettings.tag, "owner (real): " + cache.ownerReal); Log.d(cgSettings.tag, "hidden: " + dateOutShort.format(cache.hidden)); Log.d(cgSettings.tag, "favorite: " + cache.favouriteCnt); Log.d(cgSettings.tag, "size: " + cache.size); if (cache.found) { Log.d(cgSettings.tag, "found!"); } else { Log.d(cgSettings.tag, "not found"); } Log.d(cgSettings.tag, "type: " + cache.type); Log.d(cgSettings.tag, "latitude: " + String.format("%.6f", cache.latitude)); Log.d(cgSettings.tag, "longitude: " + String.format("%.6f", cache.longitude)); Log.d(cgSettings.tag, "location: " + cache.location); Log.d(cgSettings.tag, "hint: " + cache.hint); */ // cache short description try { final Matcher matcherDescShort = patternDescShort.matcher(page); while (matcherDescShort.find()) { if (matcherDescShort.groupCount() > 0) { cache.shortdesc = matcherDescShort.group(1).trim(); } } } catch (Exception e) { // failed to parse short description Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache short description"); } // cache description try { final Matcher matcherDesc = patternDesc.matcher(page); while (matcherDesc.find()) { if (matcherDesc.groupCount() > 0) { cache.description = matcherDesc.group(1); } } } catch (Exception e) { // failed to parse short description Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache description"); } // cache attributes try { final Matcher matcherAttributes = patternAttributes.matcher(page); while (matcherAttributes.find()) { if (matcherAttributes.groupCount() > 0) { final String attributesPre = matcherAttributes.group(1); final Matcher matcherAttributesInside = patternAttributesInside.matcher(attributesPre); while (matcherAttributesInside.find()) { if (matcherAttributesInside.groupCount() > 1 && matcherAttributesInside.group(2).equalsIgnoreCase("blank") != true) { if (cache.attributes == null) { cache.attributes = new ArrayList<String>(); } // by default, use the tooltip of the attribute String attribute = matcherAttributesInside.group(2).toLowerCase(); // if the image name can be recognized, use the image name as attribute String imageName = matcherAttributesInside.group(1).trim(); if (imageName.length() > 0) { int start = imageName.lastIndexOf('/'); int end = imageName.lastIndexOf('.'); if (start >= 0 && end >= 0) { attribute = imageName.substring(start + 1, end).replace('-', '_'); } } cache.attributes.add(attribute); } } } } } catch (Exception e) { // failed to parse cache attributes Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache attributes"); } // cache spoilers try { final Matcher matcherSpoilers = patternSpoilers.matcher(page); while (matcherSpoilers.find()) { if (matcherSpoilers.groupCount() > 0) { final String spoilersPre = matcherSpoilers.group(1); final Matcher matcherSpoilersInside = patternSpoilersInside.matcher(spoilersPre); while (matcherSpoilersInside.find()) { if (matcherSpoilersInside.groupCount() > 0) { final cgSpoiler spoiler = new cgSpoiler(); spoiler.url = matcherSpoilersInside.group(1); if (matcherSpoilersInside.group(2) != null) { spoiler.title = matcherSpoilersInside.group(2); } if (matcherSpoilersInside.group(4) != null) { spoiler.description = matcherSpoilersInside.group(4); } if (cache.spoilers == null) { cache.spoilers = new ArrayList<cgSpoiler>(); } cache.spoilers.add(spoiler); } } } } } catch (Exception e) { // failed to parse cache spoilers Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache spoilers"); } // cache inventory try { cache.inventoryItems = 0; final Matcher matcherInventory = patternInventory.matcher(page); while (matcherInventory.find()) { if (cache.inventory == null) { cache.inventory = new ArrayList<cgTrackable>(); } if (matcherInventory.groupCount() > 1) { final String inventoryPre = matcherInventory.group(2); if (inventoryPre != null && inventoryPre.length() > 0) { final Matcher matcherInventoryInside = patternInventoryInside.matcher(inventoryPre); while (matcherInventoryInside.find()) { if (matcherInventoryInside.groupCount() > 0) { final cgTrackable inventoryItem = new cgTrackable(); inventoryItem.guid = matcherInventoryInside.group(1); inventoryItem.name = matcherInventoryInside.group(2); cache.inventory.add(inventoryItem); cache.inventoryItems++; } } } } } } catch (Exception e) { // failed to parse cache inventory Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache inventory (2)"); } // cache logs counts try { final Matcher matcherLogCounts = patternCountLogs.matcher(page); while (matcherLogCounts.find()) { if (matcherLogCounts.groupCount() > 0) { final String[] logs = matcherLogCounts.group(1).split("<img"); final int logsCnt = logs.length; for (int k = 1; k < logsCnt; k++) { Integer type = null; Integer count = null; final Matcher matcherLog = patternCountLog.matcher(logs[k]); if (matcherLog.find()) { String typeStr = matcherLog.group(1); String countStr = matcherLog.group(2); if (typeStr != null && typeStr.length() > 0) { if (logTypes.containsKey(typeStr.toLowerCase()) == true) { type = logTypes.get(typeStr.toLowerCase()); } } if (countStr != null && countStr.length() > 0) { count = Integer.parseInt(countStr); } if (type != null && count != null) { cache.logCounts.put(type, count); } } } } } } catch (Exception e) { // failed to parse logs Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache log count"); } // cache logs try { final Matcher matcherLogs = patternLogs.matcher(page); while (matcherLogs.find()) { if (matcherLogs.groupCount() > 0) { final String[] logs = matcherLogs.group(1).split("</tr><tr>"); final int logsCnt = logs.length; for (int k = 0; k < logsCnt; k++) { final Matcher matcherLog = patternLog.matcher(logs[k]); if (matcherLog.find()) { final cgLog logDone = new cgLog(); String logTmp = matcherLog.group(10); int day = -1; try { day = Integer.parseInt(matcherLog.group(3)); } catch (Exception e) { Log.w(cgSettings.tag, "Failed to parse logs date (day): " + e.toString()); } int month = -1; // January | February | March | April | May | June | July | August | September | October | November | December if (matcherLog.group(2).equalsIgnoreCase("January")) { month = 0; } else if (matcherLog.group(2).equalsIgnoreCase("February")) { month = 1; } else if (matcherLog.group(2).equalsIgnoreCase("March")) { month = 2; } else if (matcherLog.group(2).equalsIgnoreCase("April")) { month = 3; } else if (matcherLog.group(2).equalsIgnoreCase("May")) { month = 4; } else if (matcherLog.group(2).equalsIgnoreCase("June")) { month = 5; } else if (matcherLog.group(2).equalsIgnoreCase("July")) { month = 6; } else if (matcherLog.group(2).equalsIgnoreCase("August")) { month = 7; } else if (matcherLog.group(2).equalsIgnoreCase("September")) { month = 8; } else if (matcherLog.group(2).equalsIgnoreCase("October")) { month = 9; } else if (matcherLog.group(2).equalsIgnoreCase("November")) { month = 10; } else if (matcherLog.group(2).equalsIgnoreCase("December")) { month = 11; } else { Log.w(cgSettings.tag, "Failed to parse logs date (month)."); } int year = -1; final String yearPre = matcherLog.group(5); if (yearPre == null) { Calendar date = Calendar.getInstance(); year = date.get(Calendar.YEAR); } else { try { year = Integer.parseInt(matcherLog.group(5)); } catch (Exception e) { Log.w(cgSettings.tag, "Failed to parse logs date (year): " + e.toString()); } } long logDate; if (year > 0 && month >= 0 && day > 0) { Calendar date = Calendar.getInstance(); date.set(year, month, day, 12, 0, 0); logDate = date.getTimeInMillis(); logDate = (long) (Math.ceil(logDate / 1000)) * 1000; } else { logDate = 0; } if (logTypes.containsKey(matcherLog.group(1).toLowerCase()) == true) { logDone.type = logTypes.get(matcherLog.group(1).toLowerCase()); } else { logDone.type = logTypes.get("icon_note"); } logDone.author = Html.fromHtml(matcherLog.group(6)).toString(); logDone.date = logDate; if (matcherLog.group(8) != null) { logDone.found = new Integer(matcherLog.group(8)); } logDone.log = logTmp; if (cache.logs == null) { cache.logs = new ArrayList<cgLog>(); } cache.logs.add(logDone); } } } } } catch (Exception e) { // failed to parse logs Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache logs"); } int wpBegin = 0; int wpEnd = 0; wpBegin = page.indexOf("<table class=\"Table\" id=\"ctl00_ContentBody_Waypoints\">"); if (wpBegin != -1) { // parse waypoints final Pattern patternWpType = Pattern.compile("\\/wpttypes\\/sm\\/(.+)\\.jpg", Pattern.CASE_INSENSITIVE); final Pattern patternWpPrefixOrLookupOrLatlon = Pattern .compile(">([^<]*<[^>]+>)?([^<]+)(<[^>]+>[^<]*)?<\\/td>", Pattern.CASE_INSENSITIVE); final Pattern patternWpName = Pattern.compile(">[^<]*<a[^>]+>([^<]*)<\\/a>", Pattern.CASE_INSENSITIVE); final Pattern patternWpNote = Pattern.compile("colspan=\"6\">(.*)<\\/td>", Pattern.CASE_INSENSITIVE); String wpList = page.substring(wpBegin); wpEnd = wpList.indexOf("</p>"); if (wpEnd > -1 && wpEnd <= wpList.length()) { wpList = wpList.substring(0, wpEnd); } if (wpList.indexOf("No additional waypoints to display.") == -1) { wpEnd = wpList.indexOf("</table>"); wpList = wpList.substring(0, wpEnd); wpBegin = wpList.indexOf("<tbody>"); wpEnd = wpList.indexOf("</tbody>"); if (wpBegin >= 0 && wpEnd >= 0 && wpEnd <= wpList.length()) { wpList = wpList.substring(wpBegin + 7, wpEnd); } final String[] wpItems = wpList.split("<tr"); String[] wp; for (int j = 1; j < wpItems.length; j++) { final cgWaypoint waypoint = new cgWaypoint(); wp = wpItems[j].split("<td"); // waypoint type try { final Matcher matcherWpType = patternWpType.matcher(wp[3]); while (matcherWpType.find()) { if (matcherWpType.groupCount() > 0) { waypoint.type = matcherWpType.group(1); if (waypoint.type != null) { waypoint.type = waypoint.type.trim(); } } } } catch (Exception e) { // failed to parse type Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse waypoint type"); } // waypoint prefix try { final Matcher matcherWpPrefix = patternWpPrefixOrLookupOrLatlon.matcher(wp[4]); while (matcherWpPrefix.find()) { if (matcherWpPrefix.groupCount() > 1) { waypoint.prefix = matcherWpPrefix.group(2); if (waypoint.prefix != null) { waypoint.prefix = waypoint.prefix.trim(); } } } } catch (Exception e) { // failed to parse prefix Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse waypoint prefix"); } // waypoint lookup try { final Matcher matcherWpLookup = patternWpPrefixOrLookupOrLatlon.matcher(wp[5]); while (matcherWpLookup.find()) { if (matcherWpLookup.groupCount() > 1) { waypoint.lookup = matcherWpLookup.group(2); if (waypoint.lookup != null) { waypoint.lookup = waypoint.lookup.trim(); } } } } catch (Exception e) { // failed to parse lookup Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse waypoint lookup"); } // waypoint name try { final Matcher matcherWpName = patternWpName.matcher(wp[6]); while (matcherWpName.find()) { if (matcherWpName.groupCount() > 0) { waypoint.name = matcherWpName.group(1); if (waypoint.name != null) { waypoint.name = waypoint.name.trim(); } } } } catch (Exception e) { // failed to parse name Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse waypoint name"); } // waypoint latitude and logitude try { final Matcher matcherWpLatLon = patternWpPrefixOrLookupOrLatlon.matcher(wp[7]); while (matcherWpLatLon.find()) { if (matcherWpLatLon.groupCount() > 1) { waypoint.latlon = Html.fromHtml(matcherWpLatLon.group(2)).toString(); final HashMap<String, Object> tmp = this.parseLatlon(waypoint.latlon); if (tmp.size() > 0) { waypoint.latitude = (Double) tmp.get("latitude"); waypoint.longitude = (Double) tmp.get("longitude"); waypoint.latitudeString = (String) tmp.get("latitudeString"); waypoint.longitudeString = (String) tmp.get("longitudeString"); } } } } catch (Exception e) { // failed to parse latitude and/or longitude Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse waypoint coordinates"); } j++; if (wpItems.length > j) { wp = wpItems[j].split("<td"); } // waypoint note try { final Matcher matcherWpNote = patternWpNote.matcher(wp[3]); while (matcherWpNote.find()) { if (matcherWpNote.groupCount() > 0) { waypoint.note = matcherWpNote.group(1); if (waypoint.note != null) { waypoint.note = waypoint.note.trim(); } } } } catch (Exception e) { // failed to parse note Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse waypoint note"); } if (cache.waypoints == null) cache.waypoints = new ArrayList<cgWaypoint>(); cache.waypoints.add(waypoint); } } } if (cache.latitude != null && cache.longitude != null) { cache.elevation = getElevation(cache.latitude, cache.longitude); } final cgRating rating = getRating(cache.guid, cache.geocode); if (rating != null) { cache.rating = rating.rating; cache.votes = rating.votes; cache.myVote = rating.myVote; } cache.updated = System.currentTimeMillis(); cache.detailedUpdate = System.currentTimeMillis(); cache.detailed = true; caches.cacheList.add(cache); return caches; }
From source file:de.geeksfactory.opacclient.apis.SISIS.java
@Override public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException { start(); // TODO: Is this necessary? int resultNum; if (!login(acc)) { return null; }//from ww w.j av a 2s . com // Geliehene Medien String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=1", ENCODING); List<LentItem> medien = new ArrayList<>(); Document doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_medialist(medien, doc, 1); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { continue; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_medialist(medien, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } if (doc.select("#label1").size() > 0) { resultNum = 0; String rNum = doc.select("#label1").first().text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum = Integer.parseInt(rNum); } assert (resultNum == medien.size()); } // Ordered media ("Bestellungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=6", ENCODING); List<ReservedItem> reserved = new ArrayList<>(); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_reslist("6", reserved, doc, 1); Elements label6 = doc.select("#label6"); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { break; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_reslist("6", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } // Prebooked media ("Vormerkungen") html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=7", ENCODING); doc = Jsoup.parse(html); doc.setBaseUri(opac_url); parse_reslist("7", reserved, doc, 1); if (doc.select(".box-right").size() > 0) { for (Element link : doc.select(".box-right").first().select("a")) { String href = link.attr("abs:href"); Map<String, String> hrefq = getQueryParamsFirst(href); if (hrefq == null || hrefq.get("methodToCall") == null) { break; } if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) { html = httpGet(href, ENCODING); parse_reslist("7", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos"))); } } } if (label6.size() > 0 && doc.select("#label7").size() > 0) { resultNum = 0; String rNum = label6.text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum = Integer.parseInt(rNum); } rNum = doc.select("#label7").text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1"); if (rNum.length() > 0) { resultNum += Integer.parseInt(rNum); } assert (resultNum == reserved.size()); } AccountData res = new AccountData(acc.getId()); if (doc.select("#label8").size() > 0) { String text = doc.select("#label8").first().text().trim(); if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) { text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2"); res.setPendingFees(text); } } Pattern p = Pattern.compile("[^0-9.]*", Pattern.MULTILINE); if (doc.select(".box3").size() > 0) { for (Element box : doc.select(".box3")) { if (box.select("strong").size() == 1) { String text = box.select("strong").text(); if (text.equals("Jahresgebhren")) { text = box.text(); text = p.matcher(text).replaceAll(""); res.setValidUntil(text); } } } } res.setLent(medien); res.setReservations(reserved); return res; }
From source file:csiro.pidsvc.mappingstore.Manager.java
public String resolveLookupValue(String ns, String key) { LookupMapDescriptor lookupDescriptor = getLookupMapType(ns); if (lookupDescriptor == null) return null; try {//w w w.ja va 2 s. c o m if (lookupDescriptor.isStatic()) { String ret = getLookupValue(ns, key); return ret == null ? lookupDescriptor.getDefaultValue(key) : ret; } else if (lookupDescriptor.isHttpResolver()) { final Pattern reType = Pattern.compile("^T:(.+)$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern reExtract = Pattern.compile("^E:(.+)$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern reNamespace = Pattern.compile("^NS:(.+?):(.+)$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); Matcher m; String endpoint, extractorType, extractor, content; String[] dynKeyValue = getLookupKeyValue(ns); if (dynKeyValue == null) return null; // Endpoint. endpoint = dynKeyValue[0]; if (endpoint.contains("$0")) endpoint = endpoint.replace("$0", key); else endpoint += key; // Type. m = reType.matcher(dynKeyValue[1]); m.find(); extractorType = m.group(1); // Extractor. m = reExtract.matcher(dynKeyValue[1]); m.find(); extractor = m.group(1); // Execute HTTP GET request. content = Http.simpleGetRequestStrict(endpoint); if (content == null) return lookupDescriptor.getDefaultValue(key); // Retrieve data. if (extractor.equals("")) return content; if (extractorType.equalsIgnoreCase("Regex")) { Pattern re = Pattern.compile(extractor); m = re.matcher(content); if (m.find()) return m.groupCount() > 0 ? m.group(1) : m.group(); } else if (extractorType.equalsIgnoreCase("XPath")) { Processor processor = new Processor(false); XPathCompiler xpathCompiler = processor.newXPathCompiler(); // Declare XML namespaces. m = reNamespace.matcher(dynKeyValue[1]); while (m.find()) xpathCompiler.declareNamespace(m.group(1), m.group(2)); // Evaluate XPath expression. XdmItem node = xpathCompiler.evaluateSingle(extractor, processor.newDocumentBuilder().build(new StreamSource(new StringReader(content)))); return node == null ? lookupDescriptor.getDefaultValue(key) : node.getStringValue(); } } } catch (Exception e) { _logger.error(e); } return lookupDescriptor.getDefaultValue(key); }
From source file:com.startupbidder.dao.MockDataBuilder.java
private List<Listing> getStartuplyListings(int fromId, int toId) { List<Listing> listings = new ArrayList<Listing>(); List<String> startuplyIds = getStartuplyIds(); log.info("Loading " + startuplyIds.size() + " Startuply listings"); try {/* w ww . jav a 2s .c om*/ Pattern namePattern = Pattern.compile("<h1 id=\"companyNameHeader\"[^>]*>([^<]*)", Pattern.MULTILINE); Pattern websitePattern = Pattern.compile("<a href=\"http://([^\"]*)\">\\1", Pattern.MULTILINE); Pattern addressPattern = Pattern.compile( "<table id=\"branchTable\".*<td class=\"SortedColumn\">[^<]*</td>\\s*<td[^>]*>([^<]*)", Pattern.MULTILINE | Pattern.DOTALL); Pattern logoPattern = Pattern.compile("<img\\s+src=\"([^\"]*)\"\\s+id=\"ctl00_Content_Logo\"", Pattern.MULTILINE); Pattern industriesPattern = Pattern.compile( "<div[^>]*>Industries</div>\\s*<div[^>]*>\\s*<a[^>]*>([^<]*)</a>\\s*", Pattern.MULTILINE); Pattern industries2Pattern = Pattern.compile("\\s*,\\s*<a[^>]*>([^<]*)</a>", Pattern.MULTILINE); Pattern missionPattern = Pattern.compile("<h1[^>]*>[^<]* Mission</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern mantraPattern = Pattern.compile("([^\\.]+)"); Pattern productsPattern = Pattern.compile("<h1[^>]*>[^<]* Products</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern teamPattern = Pattern.compile("<h1[^>]*>[^<]* Team</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern lifePattern = Pattern.compile("<h1[^>]*>Life [^<]</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); int counter = 0; for (String startuplyId : startuplyIds) { counter++; if (counter <= fromId || counter >= toId) { continue; } String startuplyPath = STARTUPLY_ROOT + "/Companies/" + startuplyId + ".aspx"; StartuplyCache startuplyCache = null; try { startuplyCache = getOfy().get(StartuplyCache.class, startuplyPath); } catch (NotFoundException e) { ; } if (startuplyCache == null) { try { URL url = new URL(startuplyPath); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setDoOutput(true); connection.setRequestMethod("GET"); StringWriter stringWriter = new StringWriter(); IOUtils.copy(connection.getInputStream(), stringWriter, "UTF-8"); String startuplyPage = stringWriter.toString(); connection.disconnect(); if (!StringUtils.isEmpty(startuplyPage)) { startuplyPage = startuplyPage.replaceAll("<br />", ""); // messes up multiline desc if we don't do this startuplyCache = new StartuplyCache(startuplyPath, startuplyPage); getOfy().put(startuplyCache); } } catch (Exception e) { log.log(Level.WARNING, "Exception while importing Startuply startup: " + startuplyId, e); } } if (startuplyCache == null) { log.info("Could not load Startuply cache for id: " + startuplyId); } else if (StringUtils.isEmpty(startuplyCache.page)) { log.info("Unable to import, empty response for Startuply cache page for id: " + startuplyId); } else { try { //log.info(startuplyCache.page); String name = ""; Matcher nameMatcher = namePattern.matcher(startuplyCache.page); if (nameMatcher.find()) { name = nameMatcher.group(1); } String address = ""; Matcher addressMatcher = addressPattern.matcher(startuplyCache.page); if (addressMatcher.find()) { address = addressMatcher.group(1); } String website = ""; Matcher websiteMatcher = websitePattern.matcher(startuplyCache.page); if (websiteMatcher.find()) { website = "http://" + websiteMatcher.group(1); } String logo = ""; Matcher logoMatcher = logoPattern.matcher(startuplyCache.page); if (logoMatcher.find()) { logo = STARTUPLY_ROOT + logoMatcher.group(1); } String industries = ""; Matcher industriesMatcher = industriesPattern.matcher(startuplyCache.page); if (industriesMatcher.find()) { industries = industriesMatcher.group(1); industriesMatcher.usePattern(industries2Pattern); while (industriesMatcher.find()) { industries += " " + industriesMatcher.group(1); } } String description = ""; String mantra = ""; Matcher missionMatcher = missionPattern.matcher(startuplyCache.page); if (missionMatcher.find()) { description = missionMatcher.group(1); Matcher mantraMatcher = mantraPattern.matcher(description); if (mantraMatcher.find()) { mantra = mantraMatcher.group(1); } } Matcher productsMatcher = productsPattern.matcher(startuplyCache.page); if (productsMatcher.find()) { String products = productsMatcher.group(1); description += " " + products; } Matcher teamMatcher = teamPattern.matcher(startuplyCache.page); if (teamMatcher.find()) { String team = teamMatcher.group(1); description += " " + team; } Matcher lifeMatcher = lifePattern.matcher(startuplyCache.page); if (lifeMatcher.find()) { String life = lifeMatcher.group(1); description += " " + life; } if (StringUtils.isEmpty(name)) { log.info("Unable to import, couldn't find name for Startuply id: " + startuplyId); } else { String type = bestGuessListingType(name, industries, description); //log.info("Matched name:[" + name + "] address:["+address + "] website:["+website + "] logo:["+logo + "] industries:["+industries+"] mantra:["+mantra+"] description:["+description+"]"); int askamt = 5 * new Random().nextInt(20) * 1000; int askpct = 5 + 5 * new Random().nextInt(9); if (askamt < 10000) { askamt = 0; } if (StringUtils.isEmpty(mantra)) { mantra = industries; } if (StringUtils.isEmpty(description)) { description = "In summary, " + name + " is a great company in the " + industries + " space."; } Listing listing = prepareListing(STARTUPLY, // DtoToVoConverter.convert(user), name, Listing.State.ACTIVE, type, askamt, askpct, mantra, description, website, null, null, logo, address); listings.add(listing); //log.info("Added Startuply listing: "+listing); log.info("Added Startuply listing " + counter + " of " + startuplyIds.size() + " name: " + name); } } catch (Exception e) { log.log(Level.WARNING, "Exception while importing Startuply startup: " + startuplyId, e); } } } } catch (Exception e) { e.printStackTrace(); } return listings; }
From source file:csiro.pidsvc.mappingstore.Manager.java
protected String exportLookupImpl(String ns) throws SQLException { PreparedStatement pst = null; ResultSet rs = null, rsMap = null; String ret = ""; try {//w ww . j a v a2 s. c o m if (ns == null) { // Export all lookup maps. pst = _connection .prepareStatement("SELECT ns, type, behaviour_type, behaviour_value FROM lookup_ns;"); } else { // Export a particular lookup map. pst = _connection.prepareStatement( "SELECT ns, type, behaviour_type, behaviour_value FROM lookup_ns WHERE ns = ?;"); pst.setString(1, ns); } if (pst.execute()) { rs = pst.getResultSet(); boolean dataAvailable = rs.next(); // Backups may be empty. Otherwise throw an exception. if (ns != null && !dataAvailable) throw new SQLException("Lookup map configuration cannot be exported. Data may be corrupted."); if (dataAvailable) { do { String lookupNamespace = rs.getString("ns"); String lookupType = rs.getString("type"); ret += "<lookup xmlns=\"urn:csiro:xmlns:pidsvc:backup:1.0\">"; ret += "<ns>" + StringEscapeUtils.escapeXml(lookupNamespace) + "</ns>"; String behaviourValue = rs.getString("behaviour_value"); ret += "<default type=\"" + StringEscapeUtils.escapeXml(rs.getString("behaviour_type")) + "\">" + (behaviourValue == null ? "" : StringEscapeUtils.escapeXml(behaviourValue)) + "</default>"; pst = _connection.prepareStatement("SELECT key, value FROM lookup WHERE ns = ?;"); pst.setString(1, lookupNamespace); if (!pst.execute()) throw new SQLException( "Lookup map configuration cannot be exported. Data may be corrupted."); rsMap = pst.getResultSet(); if (lookupType.equalsIgnoreCase("Static")) { ret += "<Static>"; while (rsMap.next()) { ret += "<pair>"; ret += "<key>" + StringEscapeUtils.escapeXml(rsMap.getString(1)) + "</key>"; ret += "<value>" + StringEscapeUtils.escapeXml(rsMap.getString(2)) + "</value>"; ret += "</pair>"; } ret += "</Static>"; } else if (lookupType.equalsIgnoreCase("HttpResolver")) { ret += "<HttpResolver>"; if (!rsMap.next()) throw new SQLException( "Lookup map configuration cannot be exported. Data is corrupted."); final Pattern reType = Pattern.compile("^T:(.+)$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern reExtract = Pattern.compile("^E:(.+)$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern reNamespace = Pattern.compile("^NS:(.+?):(.+)$", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); Matcher m; String namespaces = ""; String buf = rsMap.getString(2); ret += "<endpoint>" + StringEscapeUtils.escapeXml(rsMap.getString(1)) + "</endpoint>"; // Type. m = reType.matcher(buf); m.find(); ret += "<type>" + m.group(1) + "</type>"; // Extractor. m = reExtract.matcher(buf); m.find(); ret += "<extractor>" + StringEscapeUtils.escapeXml(m.group(1)) + "</extractor>"; // Namespaces. m = reNamespace.matcher(buf); while (m.find()) namespaces += "<ns prefix=\"" + StringEscapeUtils.escapeXml(m.group(1)) + "\">" + StringEscapeUtils.escapeXml(m.group(2)) + "</ns>"; if (!namespaces.isEmpty()) ret += "<namespaces>" + namespaces + "</namespaces>"; ret += "</HttpResolver>"; } ret += "</lookup>"; } while (rs.next()); } } } finally { if (rsMap != null) rsMap.close(); if (rs != null) rs.close(); if (pst != null) pst.close(); } return ret; }
From source file:com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine.java
private String performImportPythonScript(String importScript, String entityValue, AdvancedQueryPojo query, Boolean isSrcAdmin, LinkedList<String> debugLog) { String modCode = importScript; try {//from w ww .ja v a2s .c o m // Create a return value String fnName = "var" + new ObjectId(); // Pull all the imports, evaluate them outside the security manager Pattern importRegex = Pattern.compile("^\\s*(?:import|from)\\s[^\n]+", Pattern.MULTILINE); Matcher m = importRegex.matcher(importScript); StringBuffer sbImports = new StringBuffer(); while (m.find()) { sbImports.append(m.group()).append('\n'); } if (null != query) { _pyEngine.put("_query", query.toApi()); // full query } if (null != entityValue) { _pyEngine.put("_entityValue", entityValue); // allow either entityValue } _pyEngine.eval(sbImports.toString()); // Logging function if (null != debugLog) { String logName = "log" + new ObjectId(); _pyEngine.put(logName, debugLog); _pyEngine.eval("def ikanow_log(logmsg):\n " + logName + ".add(logmsg)\n\n"); } else { _pyEngine.eval("def ikanow_log(logmsg):\n pass\n\n"); } // Enable SSL everywhere (https://wiki.python.org/jython/NewSocketModule#SSLSupport) //http://tech.pedersen-live.com/2010/10/trusting-all-certificates-in-jython/ //didn't work: http://jython.xhaus.com/installing-an-all-trusting-security-provider-on-java-and-jython/ _pyEngine.eval(IOUtils .toString(SimpleFederatedQueryEngine.class.getResourceAsStream("JythonTrustManager.py"))); // Now run the script modCode = m.replaceAll(""); modCode = modCode.replaceAll("\n([^\n]+)$", "\n" + fnName + " = " + "$1"); if ((null == isSrcAdmin) || !isSrcAdmin) { _scriptingSecurityManager.eval(_pyEngine, modCode); } else { _pyEngine.eval(modCode); } // Get return value Object result = _pyEngine.get(fnName.toString()); //DEBUG if (_DEBUG) _logger.debug("DEB: T1: Return val from script: " + result); if (null == result) { throw new RuntimeException( "Null return from script - final line needs to evaluate expression to return, eg 'response.read()' or 'varname'"); } //TESTED (by hand) return result.toString(); } catch (Exception e) { throw new RuntimeException(e); } catch (Error ee) { throw new RuntimeException(ee); } }