List of usage examples for java.util.regex Pattern MULTILINE
int MULTILINE
To view the source code for java.util.regex Pattern MULTILINE.
Click Source Link
From source file:fr.inria.oak.paxquery.pact.operations.xml.navigation.SingleDocumentExtractor.java
/** * Checks if the match produced by the current node, on the stack s, has * all required child matches (that is, at least a required child match for * each required child).//from w w w. j a v a 2 s. c om * * It also fills in the match with the children that it had by transitivity * only * * @param s * @param endingNode */ private final void checkPruneAndFillIn(ExtractorMatchStack s, int endingNode) { ExtractorMatch se = s.findEntry(endingNode); if (se != null) { NavigationTreePatternNode pns = this.nodesByStacks.get(s); // this means all required children have been matched boolean childrenPresent = true; boolean correctValue = true; // checking value condition first if (pns.selectsValue()) { if (pns.getStringValue() != null) { String thisVal = pns.getStringValue(); if (se.getVal() == null) { correctValue = false; } else { if (thisVal.startsWith("~")) { Pattern p = Pattern.compile("(^|\\s+|[^a-zA-Z0-9]+)" + thisVal.substring(1, thisVal.length()) + "($|\\s+|[^a-zA-Z0-9]+)", Pattern.MULTILINE); Matcher m = p.matcher(se.getVal()); if (!m.find()) correctValue = false; } else { String otherVal = se.getVal(); switch (pns.getSelectOnValuePredicate()) { case PREDICATE_EQUAL: correctValue = otherVal.compareTo(thisVal) == 0; break; case PREDICATE_GREATEROREQUALTHAN: correctValue = otherVal.compareTo(thisVal) >= 0; break; case PREDICATE_GREATERTHAN: correctValue = otherVal.compareTo(thisVal) > 0; break; case PREDICATE_NOTEQUAL: correctValue = otherVal.compareTo(thisVal) != 0; break; case PREDICATE_SMALLEROREQUALTHAN: correctValue = otherVal.compareTo(thisVal) <= 0; break; case PREDICATE_SMALLERTHAN: correctValue = otherVal.compareTo(thisVal) < 0; break; default: logger.error("Predicate not supported in tree pattern!"); } } } } else { double thisVal = pns.getDoubleValue(); if (se.getVal() == null) { correctValue = false; } else { double otherVal = Double.parseDouble(se.getVal()); switch (pns.getSelectOnValuePredicate()) { case PREDICATE_EQUAL: correctValue = otherVal == thisVal; break; case PREDICATE_GREATEROREQUALTHAN: correctValue = otherVal >= thisVal; break; case PREDICATE_GREATERTHAN: correctValue = otherVal > thisVal; break; case PREDICATE_NOTEQUAL: correctValue = otherVal != thisVal; break; case PREDICATE_SMALLEROREQUALTHAN: correctValue = otherVal <= thisVal; break; case PREDICATE_SMALLERTHAN: correctValue = otherVal < thisVal; break; default: logger.error("Predicate not supported in tree pattern!"); } } } } if (correctValue) { Iterator<NavigationTreePatternEdge> iChildren = pns.getEdges().iterator(); while (iChildren.hasNext()) { NavigationTreePatternEdge thisEdge = iChildren.next(); // Only if it is not optional if (!thisEdge.isOptional()) { NavigationTreePatternNode nChild = thisEdge.n2; // stack for this child ExtractorMatchStack sChild = stacksByNodes.get(nChild); boolean hasChildInThisStack = (findChildInStack(se, sChild, thisEdge.isParent()) != null); if (!hasChildInThisStack) { childrenPresent = false; } else { boolean thisChildPresent = false; ArrayList<ExtractorMatch> o = se.childrenByStack.get(sChild); Iterator<ExtractorMatch> itChildrenThisStack = o.iterator(); while (itChildrenThisStack.hasNext()) { ExtractorMatch emChild = itChildrenThisStack.next(); if (emChild.erased) { } else { thisChildPresent = true; } } if (!thisChildPresent) { childrenPresent = false; } } } } } // connecting to the last open match in this stack, if necessary if (s.dnop == se) { s.dnop = se.ownParent; } // dropping this match if some required child is absent if ((!childrenPresent) || (!correctValue)) { se.erased = true; // dropping also what is underneath s.removeEntry(se); // clearing context if (s.dnop == null) { this.currentContexts.remove(pns); } // if se has no ownParent, then we can erase all its // descendant matches. if (se.ownParent == null) { // erase se's children Iterator<ArrayList<ExtractorMatch>> itChildren = se.childrenByStack.values().iterator(); while (itChildren.hasNext()) { Iterator<ExtractorMatch> it4 = itChildren.next().iterator(); while (it4.hasNext()) { ExtractorMatch sChild = it4.next(); sChild.recErase(); } } } else { // se.ownParent is not null // go see the children and connect them to the ownParent // tell the parent that these are its children int cnt = 0; Iterator<ArrayList<ExtractorMatch>> itChildren = se.childrenByStack.values().iterator(); while (itChildren.hasNext()) { cnt += itChildren.next().size(); } if (cnt > 0) { itChildren = se.childrenByStack.values().iterator(); while (itChildren.hasNext()) { Iterator<ExtractorMatch> it4 = itChildren.next().iterator(); while (it4.hasNext()) { ExtractorMatch sChild = it4.next(); ExtractorMatchStack theChildsStack = sChild.theStack; NavigationTreePatternNode sesNode = this.nodesByStacks.get(se.theStack); NavigationTreePatternNode sChildsNode = this.nodesByStacks.get(theChildsStack); // learn if this matches for the child node were supposed to be direct // descendants of their parent: boolean wasParentEdge = false; if (sesNode.getEdges() != null) { Iterator<NavigationTreePatternEdge> itEdges = sesNode.getEdges().iterator(); while (itEdges.hasNext()) { NavigationTreePatternEdge pe = itEdges.next(); if (pe.n2 == sChildsNode) { if (pe.isParent()) { wasParentEdge = true; } } } // now establish if it is OK to reconnect the children to their // parent's own parent if (!wasParentEdge || (se.ownParent.depth + 1 == sChild.depth)) { sChild.ownParent = se.ownParent; se.ownParent.addChild(sChild, sChild.theStack); } } } } } } } } }
From source file:org.extensiblecatalog.ncip.v2.millennium.MillenniumRemoteServiceManager.java
/** * getTesting/*from ww w . j ava 2 s . c om*/ * This class is created for tesing the return value of any pattern * * @param html * @return string */ public String getTesting(String html) { String strReturn = ""; String searchPattern = "^(.*?)<tr class=\"patFuncFinesEntryTitle\">(?s)(.*?)<td(.*?)class=\"patFuncFinesEntryTitle\">(.*?)" + "</td>(?s)(.*?)</tr>(?s)(.*?)<tr class=\"patFuncFinesEntryDetail\">(?s)(.*?)<td>(.*?)</td>(?s)(.*?)<td class=\"patFuncFinesDetailType\">" + "(.*?)</td>(?s)(.*?)<td(.*?)class=\"patFuncFinesDetailAmt\">(.*?)</td>$"; //String searchPattern = "^(.*?)<td(.*?)class=\"patFuncFinesTotalAmt\">(.*?)</em></td>$"; //LOG.debug("SearchingPattern: " + searchPattern); Pattern iPattern = Pattern.compile(searchPattern, Pattern.MULTILINE + Pattern.CASE_INSENSITIVE); Matcher iPatternMatch = iPattern.matcher(html); int i = 0; while (iPatternMatch.find()) { //LOG.debug("Testing: [" + i + "] - group(1): " + iPatternMatch.group(1)); LOG.debug("Testing: [" + i + "] - group(2): " + iPatternMatch.group(2)); LOG.debug("Testing: [" + i + "] - group(3): " + iPatternMatch.group(3)); LOG.debug("Testing: [" + i + "] - group(4): " + iPatternMatch.group(4)); LOG.debug("Testing: [" + i + "] - group(5): " + iPatternMatch.group(5)); LOG.debug("Testing: [" + i + "] - group(6): " + iPatternMatch.group(6)); LOG.debug("Testing: [" + i + "] - group(7): " + iPatternMatch.group(7)); LOG.debug("Testing: [" + i + "] - group(8): " + iPatternMatch.group(8)); LOG.debug("Testing: [" + i + "] - group(9): " + iPatternMatch.group(9)); LOG.debug("Testing: [" + i + "] - group(10): " + iPatternMatch.group(10)); LOG.debug("Testing: [" + i + "] - group(11): " + iPatternMatch.group(11)); LOG.debug("Testing: [" + i + "] - group(12): " + iPatternMatch.group(12)); LOG.debug("Testing: [" + i + "] - group(13): " + iPatternMatch.group(13)); strReturn = iPatternMatch.group(3).replace("\n", ""); i++; } if (iPatternMatch.find()) { strReturn = iPatternMatch.group(3).replace("\n", ""); LOG.debug("Millennium - Testing: " + strReturn); } return strReturn; }
From source file:org.graphwalker.ModelBasedTesting.java
private String parseManualInstructions(String manualInstructions) { if (!(getMachine() instanceof ExtendedFiniteStateMachine)) { return manualInstructions; }// w ww. j av a 2s . c om ExtendedFiniteStateMachine efsm = (ExtendedFiniteStateMachine) getMachine(); if (!(efsm.isJsEnabled() || efsm.isBeanShellEnabled())) { return manualInstructions; } String parsedStr = manualInstructions; Pattern p = Pattern.compile("\\{\\$(\\w+)\\}", Pattern.MULTILINE); Matcher m = p.matcher(manualInstructions); while (m.find()) { String data = m.group(1); parsedStr = manualInstructions.replaceAll("\\{\\$" + data + "\\}", efsm.getDataValue(data)); manualInstructions = parsedStr; } return parsedStr; }
From source file:org.alfresco.repo.content.transform.TransformerDebug.java
/** * Strips the leading number in a reference *///from w ww.ja va2 s. c om private String stripLeadingNumber(StringBuilder sb) { return sb == null ? null : Pattern.compile("^\\d+\\.", Pattern.MULTILINE).matcher(sb).replaceAll(""); }
From source file:cgeo.geocaching.cgBase.java
public cgCacheWrap parseSearch(cgSearchThread thread, String url, String page, boolean showCaptcha) { if (StringUtils.isBlank(page)) { Log.e(cgSettings.tag, "cgeoBase.parseSearch: No page given"); return null; }//from w w w . j av a 2 s .c om final cgCacheWrap caches = new cgCacheWrap(); final List<String> cids = new ArrayList<String>(); final List<String> guids = new ArrayList<String>(); String recaptchaChallenge = null; String recaptchaText = null; caches.url = url; final Pattern patternCacheType = Pattern.compile( "<td class=\"Merge\">[^<]*<a href=\"[^\"]*/seek/cache_details\\.aspx\\?guid=[^\"]+\"[^>]+>[^<]*<img src=\"[^\"]*/images/wpttypes/[^.]+\\.gif\" alt=\"([^\"]+)\" title=\"[^\"]+\"[^>]*>[^<]*</a>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternGuidAndDisabled = Pattern.compile( "<img src=\"[^\"]*/images/wpttypes/[^>]*>[^<]*</a></td><td class=\"Merge\">[^<]*<a href=\"[^\"]*/seek/cache_details\\.aspx\\?guid=([a-z0-9\\-]+)\" class=\"lnk([^\"]*)\">([^<]*<span>)?([^<]*)(</span>[^<]*)?</a>[^<]+<br />([^<]*)<span[^>]+>([^<]*)</span>([^<]*<img[^>]+>)?[^<]*<br />[^<]*</td>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternTbs = Pattern.compile( "<a id=\"ctl00_ContentBody_dlResults_ctl[0-9]+_uxTravelBugList\" class=\"tblist\" data-tbcount=\"([0-9]+)\" data-id=\"[^\"]*\"[^>]*>(.*)</a>", Pattern.CASE_INSENSITIVE); final Pattern patternTbsInside = Pattern.compile( "(<img src=\"[^\"]+\" alt=\"([^\"]+)\" title=\"[^\"]*\" />[^<]*)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternDirection = Pattern.compile( "<img id=\"ctl00_ContentBody_dlResults_ctl[0-9]+_uxDistanceAndHeading\" title=\"[^\"]*\" src=\"[^\"]*/seek/CacheDir\\.ashx\\?k=([^\"]+)\"[^>]*>", Pattern.CASE_INSENSITIVE); final Pattern patternCode = Pattern.compile("\\|\\W*(GC[a-z0-9]+)[^\\|]*\\|", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); final Pattern patternId = Pattern.compile("name=\"CID\"[^v]*value=\"([0-9]+)\"", Pattern.CASE_INSENSITIVE); final Pattern patternFavourite = Pattern.compile( "<span id=\"ctl00_ContentBody_dlResults_ctl[0-9]+_uxFavoritesValue\" title=\"[^\"]*\" class=\"favorite-rank\">([0-9]+)</span>", Pattern.CASE_INSENSITIVE); final Pattern patternTotalCnt = Pattern.compile( "<td class=\"PageBuilderWidget\"><span>Total Records[^<]*<b>(\\d+)<\\/b>", Pattern.CASE_INSENSITIVE); final Pattern patternRecaptcha = Pattern.compile( "<script[^>]*src=\"[^\"]*/recaptcha/api/challenge\\?k=([^\"]+)\"[^>]*>", Pattern.CASE_INSENSITIVE); final Pattern patternRecaptchaChallenge = Pattern.compile("challenge : '([^']+)'", Pattern.CASE_INSENSITIVE); caches.viewstates = getViewstates(page); // recaptcha if (showCaptcha) { try { String recaptchaJsParam = null; final Matcher matcherRecaptcha = patternRecaptcha.matcher(page); while (matcherRecaptcha.find()) { if (matcherRecaptcha.groupCount() > 0) { recaptchaJsParam = matcherRecaptcha.group(1); } } if (recaptchaJsParam != null) { final String recaptchaJs = request(false, "www.google.com", "/recaptcha/api/challenge", "GET", "k=" + urlencode_rfc3986(recaptchaJsParam.trim()), 0, true).getData(); if (StringUtils.isNotBlank(recaptchaJs)) { final Matcher matcherRecaptchaChallenge = patternRecaptchaChallenge.matcher(recaptchaJs); while (matcherRecaptchaChallenge.find()) { if (matcherRecaptchaChallenge.groupCount() > 0) { recaptchaChallenge = matcherRecaptchaChallenge.group(1).trim(); } } } } } catch (Exception e) { // failed to parse recaptcha challenge Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse recaptcha challenge"); } if (thread != null && StringUtils.isNotBlank(recaptchaChallenge)) { thread.setChallenge(recaptchaChallenge); thread.notifyNeed(); } } if (!page.contains("SearchResultsTable")) { // there are no results. aborting here avoids a wrong error log in the next parsing step return caches; } int startPos = page.indexOf("<div id=\"ctl00_ContentBody_ResultsPanel\""); if (startPos == -1) { Log.e(cgSettings.tag, "cgeoBase.parseSearch: ID \"ctl00_ContentBody_dlResults\" not found on page"); return null; } page = page.substring(startPos); // cut on <table startPos = page.indexOf(">"); int endPos = page.indexOf("ctl00_ContentBody_UnitTxt"); if (startPos == -1 || endPos == -1) { Log.e(cgSettings.tag, "cgeoBase.parseSearch: ID \"ctl00_ContentBody_UnitTxt\" not found on page"); return null; } page = page.substring(startPos + 1, endPos - startPos + 1); // cut between <table> and </table> final String[] rows = page.split("<tr class="); final int rows_count = rows.length; for (int z = 1; z < rows_count; z++) { cgCache cache = new cgCache(); String row = rows[z]; // check for cache type presence if (!row.contains("images/wpttypes")) { continue; } try { final Matcher matcherGuidAndDisabled = patternGuidAndDisabled.matcher(row); while (matcherGuidAndDisabled.find()) { if (matcherGuidAndDisabled.groupCount() > 0) { guids.add(matcherGuidAndDisabled.group(1)); cache.guid = matcherGuidAndDisabled.group(1); if (matcherGuidAndDisabled.group(4) != null) { cache.name = Html.fromHtml(matcherGuidAndDisabled.group(4).trim()).toString(); } if (matcherGuidAndDisabled.group(6) != null) { cache.location = Html.fromHtml(matcherGuidAndDisabled.group(6).trim()).toString(); } final String attr = matcherGuidAndDisabled.group(2); if (attr != null) { if (attr.contains("Strike")) { cache.disabled = true; } else { cache.disabled = false; } if (attr.contains("OldWarning")) { cache.archived = true; } else { cache.archived = false; } } } } } catch (Exception e) { // failed to parse GUID and/or Disabled Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse GUID and/or Disabled data"); } if (settings.excludeDisabled == 1 && (cache.disabled || cache.archived)) { // skip disabled and archived caches cache = null; continue; } String inventoryPre = null; // GC* code try { final Matcher matcherCode = patternCode.matcher(row); while (matcherCode.find()) { if (matcherCode.groupCount() > 0) { cache.geocode = matcherCode.group(1).toUpperCase(); } } } catch (Exception e) { // failed to parse code Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse cache code"); } // cache type try { final Matcher matcherCacheType = patternCacheType.matcher(row); while (matcherCacheType.find()) { if (matcherCacheType.groupCount() > 0) { cache.type = cacheTypes.get(matcherCacheType.group(1).toLowerCase()); } } } catch (Exception e) { // failed to parse type Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse cache type"); } // cache direction - image if (settings.getLoadDirImg()) { try { final Matcher matcherDirection = patternDirection.matcher(row); while (matcherDirection.find()) { if (matcherDirection.groupCount() > 0) { cache.directionImg = matcherDirection.group(1); } } } catch (Exception e) { // failed to parse direction image Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse cache direction image"); } } // cache inventory try { final Matcher matcherTbs = patternTbs.matcher(row); while (matcherTbs.find()) { if (matcherTbs.groupCount() > 0) { cache.inventoryItems = Integer.parseInt(matcherTbs.group(1)); inventoryPre = matcherTbs.group(2); } } } catch (Exception e) { // failed to parse inventory Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse cache inventory (1)"); } if (StringUtils.isNotBlank(inventoryPre)) { try { final Matcher matcherTbsInside = patternTbsInside.matcher(inventoryPre); while (matcherTbsInside.find()) { if (matcherTbsInside.groupCount() == 2 && matcherTbsInside.group(2) != null) { final String inventoryItem = matcherTbsInside.group(2).toLowerCase(); if (inventoryItem.equals("premium member only cache")) { continue; } else { if (cache.inventoryItems <= 0) { cache.inventoryItems = 1; } } } } } catch (Exception e) { // failed to parse cache inventory info Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse cache inventory info"); } } // premium cache cache.members = row.contains("/images/small_profile.gif"); // found it cache.found = row.contains("/images/icons/icon_smile"); // own it cache.own = row.contains("/images/silk/star.png"); // id try { final Matcher matcherId = patternId.matcher(row); while (matcherId.find()) { if (matcherId.groupCount() > 0) { cache.cacheId = matcherId.group(1); cids.add(cache.cacheId); } } } catch (Exception e) { // failed to parse cache id Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse cache id"); } // favourite count try { final Matcher matcherFavourite = patternFavourite.matcher(row); while (matcherFavourite.find()) { if (matcherFavourite.groupCount() > 0) { cache.favouriteCnt = Integer.parseInt(matcherFavourite.group(1)); } } } catch (Exception e) { // failed to parse favourite count Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse favourite count"); } if (cache.nameSp == null) { cache.nameSp = (new Spannable.Factory()).newSpannable(cache.name); if (cache.disabled || cache.archived) { // strike cache.nameSp.setSpan(new StrikethroughSpan(), 0, cache.nameSp.toString().length(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } } caches.cacheList.add(cache); } // total caches found try { final Matcher matcherTotalCnt = patternTotalCnt.matcher(page); while (matcherTotalCnt.find()) { if (matcherTotalCnt.groupCount() > 0) { if (matcherTotalCnt.group(1) != null) { caches.totalCnt = Integer.valueOf(matcherTotalCnt.group(1)); } } } } catch (Exception e) { // failed to parse cache count Log.w(cgSettings.tag, "cgeoBase.parseSearch: Failed to parse cache count"); } if (thread != null && recaptchaChallenge != null) { if (thread.getText() == null) { thread.waitForUser(); } recaptchaText = thread.getText(); } if (cids.size() > 0 && (recaptchaChallenge == null || (recaptchaChallenge != null && StringUtils.isNotBlank(recaptchaText)))) { Log.i(cgSettings.tag, "Trying to get .loc for " + cids.size() + " caches"); try { // get coordinates for parsed caches final String host = "www.geocaching.com"; final String path = "/seek/nearest.aspx"; final StringBuilder params = new StringBuilder(); params.append("__EVENTTARGET=&__EVENTARGUMENT="); if (ArrayUtils.isNotEmpty(caches.viewstates)) { params.append("&__VIEWSTATE="); params.append(urlencode_rfc3986(caches.viewstates[0])); if (caches.viewstates.length > 1) { for (int i = 1; i < caches.viewstates.length; i++) { params.append("&__VIEWSTATE" + i + "="); params.append(urlencode_rfc3986(caches.viewstates[i])); } params.append("&__VIEWSTATEFIELDCOUNT=" + caches.viewstates.length); } } for (String cid : cids) { params.append("&CID="); params.append(urlencode_rfc3986(cid)); } if (recaptchaChallenge != null && StringUtils.isNotBlank(recaptchaText)) { params.append("&recaptcha_challenge_field="); params.append(urlencode_rfc3986(recaptchaChallenge)); params.append("&recaptcha_response_field="); params.append(urlencode_rfc3986(recaptchaText)); } params.append("&ctl00%24ContentBody%24uxDownloadLoc=Download+Waypoints"); final String coordinates = request(false, host, path, "POST", params.toString(), 0, true).getData(); if (StringUtils.isNotBlank(coordinates)) { if (coordinates.contains( "You have not agreed to the license agreement. The license agreement is required before you can start downloading GPX or LOC files from Geocaching.com")) { Log.i(cgSettings.tag, "User has not agreed to the license agreement. Can\'t download .loc file."); caches.error = errorRetrieve.get(-7); return caches; } } LocParser.parseLoc(caches, coordinates); } catch (Exception e) { Log.e(cgSettings.tag, "cgBase.parseSearch.CIDs: " + e.toString()); } } // get direction images if (settings.getLoadDirImg()) { for (cgCache oneCache : caches.cacheList) { if (oneCache.coords == null && oneCache.directionImg != null) { cgDirectionImg.getDrawable(oneCache.geocode, oneCache.directionImg); } } } // get ratings if (guids.size() > 0) { Log.i(cgSettings.tag, "Trying to get ratings for " + cids.size() + " caches"); try { final Map<String, cgRating> ratings = getRating(guids, null); if (CollectionUtils.isNotEmpty(ratings)) { // save found cache coordinates for (cgCache oneCache : caches.cacheList) { if (ratings.containsKey(oneCache.guid)) { cgRating thisRating = ratings.get(oneCache.guid); oneCache.rating = thisRating.rating; oneCache.votes = thisRating.votes; oneCache.myVote = thisRating.myVote; } } } } catch (Exception e) { Log.e(cgSettings.tag, "cgBase.parseSearch.GCvote: " + e.toString()); } } return caches; }
From source file:it.drwolf.ridire.session.CrawlerManager.java
private long getURICount(Job job, String whichCount, User currentUser) throws IOException, HeritrixException, DocumentException, XPathExpressionException, SAXException { // this.updateJobsList(currentUser); Pattern pURICount = Pattern.compile( "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)", Pattern.MULTILINE); String jobName = job.getName(); Job j = this.getPersistedJob(jobName); if (j == null) { return 0L; }/* w ww . j a v a2 s. c om*/ if (job.getChildJobName() != null && job.getChildJobName().length() > 0) { jobName = job.getChildJobName(); } String dir = this.entityManager.find(Parameter.class, Parameter.JOBS_DIR.getKey()).getValue(); long uriCountFromCrawlReport = 0L; long queuedURICount = 0L; long discoveredURICount = 0L; HttpMethod method = null; String jobStatus = this.getJobStatus(jobName); // jobName = jobName.replaceAll(" ", "\\\\ "); try { while (true) { if (jobStatus.equals(CrawlStatus.RUNNING.toString())) { RandomAccessFile progressStatistics = null; try { progressStatistics = new RandomAccessFile(this.jobsDir + CrawlerManager.FILE_SEPARATOR + jobName + CrawlerManager.FILE_SEPARATOR + "logs" + CrawlerManager.FILE_SEPARATOR + "progress-statistics.log", "r"); if (progressStatistics != null) { progressStatistics.seek(Math.max(0, progressStatistics.length() - 3000)); String line = progressStatistics.readLine(); StringBuffer buffer = new StringBuffer(); while (line != null) { buffer.append(line + "\n"); line = progressStatistics.readLine(); } String progressStatisticsContent = buffer.toString(); Matcher m = pURICount.matcher(progressStatisticsContent); int start = 0; long queuedURICountTemp = 0L; long discoveredURICountTemp = 0L; long uriCountFromCrawlReportTemp = 0L; while (m.find(start)) { start = m.end(); queuedURICountTemp = Long.parseLong(m.group(2)); discoveredURICountTemp = Long.parseLong(m.group(1)); uriCountFromCrawlReportTemp = Long.parseLong(m.group(3)); } queuedURICount += queuedURICountTemp; discoveredURICount = discoveredURICountTemp; uriCountFromCrawlReport = uriCountFromCrawlReportTemp; } } catch (FileNotFoundException e) { // TODO: handle exception } finally { if (progressStatistics != null) { progressStatistics.close(); } } break; } else if (whichCount.equalsIgnoreCase("finishedURICount")) { File reportFile = new File( dir + CrawlerManager.FILE_SEPARATOR + jobName + CrawlerManager.FILE_SEPARATOR + "reports" + CrawlerManager.FILE_SEPARATOR + "crawl-report.txt"); if (reportFile.exists() && reportFile.canRead()) { String content = FileUtils.readFileToString(reportFile); Matcher m = CrawlerManager.pFinishedURICount.matcher(content); if (m.find()) { String bytes = m.group(1); uriCountFromCrawlReport += Long.parseLong(bytes); } } Matcher m = CrawlerManager.childJobPattern.matcher(jobName); if (m.matches()) { Integer count = Integer.parseInt(m.group(1)); if (count > 1) { count--; jobName = jobName.substring(0, jobName.indexOf("__")) + "__" + count; } else if (count == 1) { jobName = jobName.substring(0, jobName.indexOf("__")); } else { break; } } else { break; } } else { return 0L; } } } finally { if (method != null) { method.releaseConnection(); } } if (whichCount.equals("discoveredUriCount")) { return discoveredURICount; } if (whichCount.equals("queuedUriCount")) { return queuedURICount; } return uriCountFromCrawlReport; }
From source file:com.amazonaws.a2s.AmazonA2SClient.java
/** * Checks for presense of the Errors in the response * If errors found, constructs and throws AmazonA2SException * with information from the Errors// w w w .j a v a 2 s . c o m * */ private void throwIfErrors(String responseString, int status) throws AmazonA2SException { Pattern errorPattern = Pattern.compile( ".*\\<RequestId>(.*)\\</RequestId>.*" + "(\\<Error>\\<Code>(.*)\\</Code>\\<Message>(.*)\\</Message>\\</Error>).*(\\<Error>)?.*", Pattern.MULTILINE | Pattern.DOTALL); Matcher errorMatcher = errorPattern.matcher(responseString); if (errorMatcher.matches()) { String requestId = errorMatcher.group(1); String xml = errorMatcher.group(2); String code = errorMatcher.group(3); String message = errorMatcher.group(4); AmazonA2SException exception = new AmazonA2SException(message, status, code, requestId, xml); log.debug("Error found in the response: " + "Error code: " + code + "; " + "Error message: " + message + "; " + "Response XML: " + xml + "; " + "Request ID : " + requestId + "; "); throw exception; } }
From source file:com.liferay.blade.cli.CreateCommandTest.java
private void contains(String content, String pattern) throws Exception { assertTrue(Pattern.compile(pattern, Pattern.MULTILINE | Pattern.DOTALL).matcher(content).matches()); }
From source file:com.liferay.blade.cli.CreateCommandTest.java
private void lacks(File file, String pattern) throws Exception { String content = new String(IO.read(file)); assertFalse(Pattern.compile(pattern, Pattern.MULTILINE | Pattern.DOTALL).matcher(content).matches()); }
From source file:com.liferay.blade.cli.command.CreateCommandTest.java
private void _contains(String content, String regex) throws Exception { Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.DOTALL); Matcher matcher = pattern.matcher(content); Assert.assertTrue(matcher.matches()); }