List of usage examples for java.util.regex Matcher usePattern
public Matcher usePattern(Pattern newPattern)
From source file:com.startupbidder.dao.MockDataBuilder.java
private List<Listing> getStartuplyListings(int fromId, int toId) { List<Listing> listings = new ArrayList<Listing>(); List<String> startuplyIds = getStartuplyIds(); log.info("Loading " + startuplyIds.size() + " Startuply listings"); try {// ww w . ja va 2s .c o m Pattern namePattern = Pattern.compile("<h1 id=\"companyNameHeader\"[^>]*>([^<]*)", Pattern.MULTILINE); Pattern websitePattern = Pattern.compile("<a href=\"http://([^\"]*)\">\\1", Pattern.MULTILINE); Pattern addressPattern = Pattern.compile( "<table id=\"branchTable\".*<td class=\"SortedColumn\">[^<]*</td>\\s*<td[^>]*>([^<]*)", Pattern.MULTILINE | Pattern.DOTALL); Pattern logoPattern = Pattern.compile("<img\\s+src=\"([^\"]*)\"\\s+id=\"ctl00_Content_Logo\"", Pattern.MULTILINE); Pattern industriesPattern = Pattern.compile( "<div[^>]*>Industries</div>\\s*<div[^>]*>\\s*<a[^>]*>([^<]*)</a>\\s*", Pattern.MULTILINE); Pattern industries2Pattern = Pattern.compile("\\s*,\\s*<a[^>]*>([^<]*)</a>", Pattern.MULTILINE); Pattern missionPattern = Pattern.compile("<h1[^>]*>[^<]* Mission</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern mantraPattern = Pattern.compile("([^\\.]+)"); Pattern productsPattern = Pattern.compile("<h1[^>]*>[^<]* Products</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern teamPattern = Pattern.compile("<h1[^>]*>[^<]* Team</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern lifePattern = Pattern.compile("<h1[^>]*>Life [^<]</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); int counter = 0; for (String startuplyId : startuplyIds) { counter++; if (counter <= fromId || counter >= toId) { continue; } String startuplyPath = STARTUPLY_ROOT + "/Companies/" + startuplyId + ".aspx"; StartuplyCache startuplyCache = null; try { startuplyCache = getOfy().get(StartuplyCache.class, startuplyPath); } catch (NotFoundException e) { ; } if (startuplyCache == null) { try { URL url = new URL(startuplyPath); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setDoOutput(true); connection.setRequestMethod("GET"); StringWriter stringWriter = new StringWriter(); IOUtils.copy(connection.getInputStream(), stringWriter, "UTF-8"); String startuplyPage = stringWriter.toString(); connection.disconnect(); if (!StringUtils.isEmpty(startuplyPage)) { startuplyPage = startuplyPage.replaceAll("<br />", ""); // messes up multiline desc if we don't do this startuplyCache = new StartuplyCache(startuplyPath, startuplyPage); getOfy().put(startuplyCache); } } catch (Exception e) { log.log(Level.WARNING, "Exception while importing Startuply startup: " + startuplyId, e); } } if (startuplyCache == null) { log.info("Could not load Startuply cache for id: " + startuplyId); } else if (StringUtils.isEmpty(startuplyCache.page)) { log.info("Unable to import, empty response for Startuply cache page for id: " + startuplyId); } else { try { //log.info(startuplyCache.page); String name = ""; Matcher nameMatcher = namePattern.matcher(startuplyCache.page); if (nameMatcher.find()) { name = nameMatcher.group(1); } String address = ""; Matcher addressMatcher = addressPattern.matcher(startuplyCache.page); if (addressMatcher.find()) { address = addressMatcher.group(1); } String website = ""; Matcher websiteMatcher = websitePattern.matcher(startuplyCache.page); if (websiteMatcher.find()) { website = "http://" + websiteMatcher.group(1); } String logo = ""; Matcher logoMatcher = logoPattern.matcher(startuplyCache.page); if (logoMatcher.find()) { logo = STARTUPLY_ROOT + logoMatcher.group(1); } String industries = ""; Matcher industriesMatcher = industriesPattern.matcher(startuplyCache.page); if (industriesMatcher.find()) { industries = industriesMatcher.group(1); industriesMatcher.usePattern(industries2Pattern); while (industriesMatcher.find()) { industries += " " + industriesMatcher.group(1); } } String description = ""; String mantra = ""; Matcher missionMatcher = missionPattern.matcher(startuplyCache.page); if (missionMatcher.find()) { description = missionMatcher.group(1); Matcher mantraMatcher = mantraPattern.matcher(description); if (mantraMatcher.find()) { mantra = mantraMatcher.group(1); } } Matcher productsMatcher = productsPattern.matcher(startuplyCache.page); if (productsMatcher.find()) { String products = productsMatcher.group(1); description += " " + products; } Matcher teamMatcher = teamPattern.matcher(startuplyCache.page); if (teamMatcher.find()) { String team = teamMatcher.group(1); description += " " + team; } Matcher lifeMatcher = lifePattern.matcher(startuplyCache.page); if (lifeMatcher.find()) { String life = lifeMatcher.group(1); description += " " + life; } if (StringUtils.isEmpty(name)) { log.info("Unable to import, couldn't find name for Startuply id: " + startuplyId); } else { String type = bestGuessListingType(name, industries, description); //log.info("Matched name:[" + name + "] address:["+address + "] website:["+website + "] logo:["+logo + "] industries:["+industries+"] mantra:["+mantra+"] description:["+description+"]"); int askamt = 5 * new Random().nextInt(20) * 1000; int askpct = 5 + 5 * new Random().nextInt(9); if (askamt < 10000) { askamt = 0; } if (StringUtils.isEmpty(mantra)) { mantra = industries; } if (StringUtils.isEmpty(description)) { description = "In summary, " + name + " is a great company in the " + industries + " space."; } Listing listing = prepareListing(STARTUPLY, // DtoToVoConverter.convert(user), name, Listing.State.ACTIVE, type, askamt, askpct, mantra, description, website, null, null, logo, address); listings.add(listing); //log.info("Added Startuply listing: "+listing); log.info("Added Startuply listing " + counter + " of " + startuplyIds.size() + " name: " + name); } } catch (Exception e) { log.log(Level.WARNING, "Exception while importing Startuply startup: " + startuplyId, e); } } } } catch (Exception e) { e.printStackTrace(); } return listings; }
From source file:eu.finwest.dao.MockDataBuilder.java
private List<Listing> getStartuplyListings(int fromId, int toId) { List<Listing> listings = new ArrayList<Listing>(); List<String> startuplyIds = getStartuplyIds(); log.info("Loading " + startuplyIds.size() + " Startuply listings"); try {/*from ww w . ja v a2 s .c o m*/ Pattern namePattern = Pattern.compile("<h1 id=\"companyNameHeader\"[^>]*>([^<]*)", Pattern.MULTILINE); Pattern websitePattern = Pattern.compile("<a href=\"http://([^\"]*)\">\\1", Pattern.MULTILINE); Pattern addressPattern = Pattern.compile( "<table id=\"branchTable\".*<td class=\"SortedColumn\">[^<]*</td>\\s*<td[^>]*>([^<]*)", Pattern.MULTILINE | Pattern.DOTALL); Pattern logoPattern = Pattern.compile("<img\\s+src=\"([^\"]*)\"\\s+id=\"ctl00_Content_Logo\"", Pattern.MULTILINE); Pattern industriesPattern = Pattern.compile( "<div[^>]*>Industries</div>\\s*<div[^>]*>\\s*<a[^>]*>([^<]*)</a>\\s*", Pattern.MULTILINE); Pattern industries2Pattern = Pattern.compile("\\s*,\\s*<a[^>]*>([^<]*)</a>", Pattern.MULTILINE); Pattern missionPattern = Pattern.compile("<h1[^>]*>[^<]* Mission</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern mantraPattern = Pattern.compile("([^\\.]+)"); Pattern productsPattern = Pattern.compile("<h1[^>]*>[^<]* Products</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern teamPattern = Pattern.compile("<h1[^>]*>[^<]* Team</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); Pattern lifePattern = Pattern.compile("<h1[^>]*>Life [^<]</h1>\\s*<div[^>]*>\\s*([^<]*)", Pattern.MULTILINE); int counter = 0; for (String startuplyId : startuplyIds) { counter++; if (counter <= fromId || counter >= toId) { continue; } String startuplyPath = STARTUPLY_ROOT + "/Companies/" + startuplyId + ".aspx"; StartuplyCache startuplyCache = null; try { startuplyCache = getOfy().get(StartuplyCache.class, startuplyPath); } catch (NotFoundException e) { ; } if (startuplyCache == null) { try { URL url = new URL(startuplyPath); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setDoOutput(true); connection.setRequestMethod("GET"); StringWriter stringWriter = new StringWriter(); IOUtils.copy(connection.getInputStream(), stringWriter, "UTF-8"); String startuplyPage = stringWriter.toString(); connection.disconnect(); if (!StringUtils.isEmpty(startuplyPage)) { startuplyPage = startuplyPage.replaceAll("<br />", ""); // messes up multiline desc if we don't do this startuplyCache = new StartuplyCache(startuplyPath, startuplyPage); getOfy().put(startuplyCache); } } catch (Exception e) { log.log(Level.WARNING, "Exception while importing Startuply startup: " + startuplyId, e); } } if (startuplyCache == null) { log.info("Could not load Startuply cache for id: " + startuplyId); } else if (StringUtils.isEmpty(startuplyCache.page)) { log.info("Unable to import, empty response for Startuply cache page for id: " + startuplyId); } else { try { //log.info(startuplyCache.page); String name = ""; Matcher nameMatcher = namePattern.matcher(startuplyCache.page); if (nameMatcher.find()) { name = nameMatcher.group(1); } String address = ""; Matcher addressMatcher = addressPattern.matcher(startuplyCache.page); if (addressMatcher.find()) { address = addressMatcher.group(1); } String website = ""; Matcher websiteMatcher = websitePattern.matcher(startuplyCache.page); if (websiteMatcher.find()) { website = "http://" + websiteMatcher.group(1); } String logo = ""; Matcher logoMatcher = logoPattern.matcher(startuplyCache.page); if (logoMatcher.find()) { logo = STARTUPLY_ROOT + logoMatcher.group(1); } String industries = ""; Matcher industriesMatcher = industriesPattern.matcher(startuplyCache.page); if (industriesMatcher.find()) { industries = industriesMatcher.group(1); industriesMatcher.usePattern(industries2Pattern); while (industriesMatcher.find()) { industries += " " + industriesMatcher.group(1); } } String description = ""; String mantra = ""; Matcher missionMatcher = missionPattern.matcher(startuplyCache.page); if (missionMatcher.find()) { description = missionMatcher.group(1); Matcher mantraMatcher = mantraPattern.matcher(description); if (mantraMatcher.find()) { mantra = mantraMatcher.group(1); } } Matcher productsMatcher = productsPattern.matcher(startuplyCache.page); if (productsMatcher.find()) { String products = productsMatcher.group(1); description += " " + products; } Matcher teamMatcher = teamPattern.matcher(startuplyCache.page); if (teamMatcher.find()) { String team = teamMatcher.group(1); description += " " + team; } Matcher lifeMatcher = lifePattern.matcher(startuplyCache.page); if (lifeMatcher.find()) { String life = lifeMatcher.group(1); description += " " + life; } if (StringUtils.isEmpty(name)) { log.info("Unable to import, couldn't find name for Startuply id: " + startuplyId); } else { String type = bestGuessListingType(name, industries, description); //log.info("Matched name:[" + name + "] address:["+address + "] website:["+website + "] logo:["+logo + "] industries:["+industries+"] mantra:["+mantra+"] description:["+description+"]"); int askamt = 5 * new Random().nextInt(20) * 1000; int askpct = 5 + 5 * new Random().nextInt(9); if (askamt < 10000) { askamt = 0; } if (StringUtils.isEmpty(mantra)) { mantra = industries; } if (StringUtils.isEmpty(description)) { description = "In summary, " + name + " is a great company in the " + industries + " space."; } Listing listing = prepareListing(EN, STARTUPLY, // DtoToVoConverter.convert(user), name, Listing.State.ACTIVE, type, askamt, askpct, mantra, description, website, null, null, logo, address, false, false); listings.add(listing); //log.info("Added Startuply listing: "+listing); log.info("Added Startuply listing " + counter + " of " + startuplyIds.size() + " name: " + name); } } catch (Exception e) { log.log(Level.WARNING, "Exception while importing Startuply startup: " + startuplyId, e); } } } } catch (Exception e) { e.printStackTrace(); } return listings; }
From source file:com.boylesoftware.web.impl.AbstractRouterConfiguration.java
@Override public RouterRequest findRoute(final HttpServletRequest request, final HttpServletResponse response) throws MethodNotAllowedException, ServletException { // check if we have mappings if (this.mappings.length == 0) return null; // try to find the matching route mapping final Lock readLock = this.mappingsLock.readLock(); readLock.lock();/*from w w w . j a v a2 s . c o m*/ try { // test request URI against the mappings RouteImpl mapping = this.mappings[0]; final String requestURI = request.getRequestURI(); // TODO: reusable matcher? final Matcher m = mapping.getURIPattern().matcher(requestURI); int mappingInd = 0; do { // try to match the mapping if (m.matches()) { // log the match if (this.log.isDebugEnabled()) this.log.debug("found mapping for URI " + requestURI + " on attempt " + (mappingInd + 1)); // move the mapping higher if matched more frequently final long numMatched = mapping.incrementNumMatched(); if (mappingInd > 0) { final RouteImpl prevMapping = this.mappings[mappingInd - 1]; if (numMatched > prevMapping.getNumMatched()) { final Lock writeLock = this.mappingsLock.writeLock(); readLock.unlock(); writeLock.lock(); try { this.mappings[mappingInd] = prevMapping; this.mappings[mappingInd - 1] = mapping; } finally { readLock.lock(); writeLock.unlock(); } } } // wrap the request final RouterRequestImpl routerRequest = this.routerRequestPool.getSync(); boolean success = false; try { // initialize the router request routerRequest.wrap(request, response, mapping, this.isAuthenticationRequired(requestURI)); // add parameters made from the URI components final int numURIParams = m.groupCount(); for (int i = 0; i < numURIParams; i++) { final String uriParamName = mapping.getURIParamName(i); if (uriParamName != null) routerRequest.addParameter(uriParamName, m.group(i + 1)); } // convert flash attributes cookie to request attributes routerRequest.flashCookieToAttributes(); // return the router request success = true; return routerRequest; } finally { if (!success) routerRequest.recycle(); } } // next mapping for next iteration if (++mappingInd >= this.mappings.length) break; mapping = this.mappings[mappingInd]; // reuse the matcher m.reset(); m.usePattern(mapping.getURIPattern()); } while (true); } finally { readLock.unlock(); } // no mapping matched return null; }