List of usage examples for java.util.regex Matcher end
public int end()
From source file:com.thoughtworks.go.domain.materials.Modification.java
public Set<String> getCardNumbersFromComment() { Set<String> cardNumbers = new TreeSet<>(); Pattern pattern = Pattern.compile("#(\\d+)"); String comment = this.comment == null ? "" : this.comment; Matcher matcher = pattern.matcher(comment); while (hasMatch(matcher)) { cardNumbers.add(id(matcher));/*from w w w . jav a 2s .co m*/ matcher.end(); } return cardNumbers; }
From source file:babel.content.pages.PageVersion.java
/** * Used to construct a page version from a set of nutch page related objects. * Should only be used by NutchPageExtractor. *///from w w w .j av a 2s . c o m public PageVersion(String segmentId, List<NutchChunk> chunks, Page page) { this(); Writable curVal; CrawlDatum curCD; Content curCT; ParseData curPD; ParseText curPT; // Store Segment ID m_verProps.set(PROP_SEGMENT_ID, segmentId); // Unwrap all of the page related information for (NutchChunk chunk : chunks) { curVal = chunk.get(); if (curVal instanceof CrawlDatum) { // Get fetch information curCD = (CrawlDatum) curVal; if (curCD.getStatus() == CrawlDatum.STATUS_FETCH_SUCCESS) { m_verProps.set(PROP_FETCH_TIME, Long.toString(curCD.getFetchTime())); m_verProps.set(PROP_MODIFIED_TIME, Long.toString(curCD.getModifiedTime())); } } else if (curVal instanceof Content) { // Get the original unparsed content curCT = (Content) curVal; try { String str = new String(curCT.getContent(), DEFAULT_CHARSET); Matcher m = Pattern.compile("<html[^>]*>", Pattern.CASE_INSENSITIVE).matcher(str); if (m.find()) { str = str.substring(m.start(), m.end()); m = Pattern.compile("\\slang\\s*=\\s*\"*([^\\s=\"]+)\"*", Pattern.CASE_INSENSITIVE) .matcher(str); if (m.find()) { str = str.substring(m.start(1), m.end(1)).trim().toLowerCase(); if (str.length() > 0) { page.setLanguage(Language.fromString(str)); } } } } catch (Exception e) { } } else if (curVal instanceof ParseData) { // Get data extracted from page content curPD = (ParseData) curVal; if (curPD.getStatus().isSuccess()) { m_verProps.set(PROP_TITLE, curPD.getTitle()); m_parseMeta.setAll(curPD.getParseMeta()); m_contentMeta.setAll(curPD.getContentMeta()); m_outLinks = curPD.getOutlinks(); } } else if (curVal instanceof ParseText) { // Get parsed content curPT = (ParseText) curVal; m_content = setStr(curPT.getText()); } else if (LOG.isWarnEnabled()) { LOG.warn("Unrecognized type: " + curVal.getClass()); } } }
From source file:com.spend.spendService.DomainLearning.java
private void GetNewQuery() { try {/*www .ja v a 2 s . c o m*/ TimerTask timertask = new TimerTask() { public void run() { try { domainList = new ArrayList<String>(); String[] seList = getSearchEngineNamesArray(); /* get urls from seedurlraw table */ PreparedStatement psmt = con.prepareStatement("SELECT url FROM seedurlraw"); ResultSet rs = psmt.executeQuery(); String regex = "[/]"; String regex2 = "[.]"; String PLDomain; while (rs.next()) { PLDomain = rs.getString("url"); PLDomain = PLDomain.replaceAll("http://|https://", ""); Pattern p = Pattern.compile(regex); Matcher m = p.matcher(PLDomain); if (m.find()) { PLDomain = PLDomain.substring(0, m.start()); } Pattern p2 = Pattern.compile(regex2); Matcher m2 = p2.matcher(PLDomain); int count = 0; while (m2.find()) { count++; } m2 = p2.matcher(PLDomain); if (count > 1 && m2.find()) { PLDomain = PLDomain.substring(m2.end()); } //System.out.println(PLDomain); if (!domainList.contains(PLDomain)) { domainList.add(PLDomain); newQuery = "sparql endpoint site:" + PLDomain; for (Object se : seList) { PreparedStatement psmt1 = con.prepareStatement( "INSERT INTO searchqueue(searchText,disabled,searchEngineName) VALUES(?,0,?);"); psmt1.setString(1, newQuery); psmt1.setString(2, se.toString()); psmt1.executeUpdate(); psmt1.close(); } } } } catch (Exception ex) { System.out .println("DomainLearning.java timertask run function SQL ERROR " + ex.getMessage()); } } }; Timer timer = new Timer(); DateFormat dateformat = new SimpleDateFormat("dd-MM-yyyy HH:mm:ss"); Date date = dateformat.parse("20-07-2017 00:00:00"); // set date and time timer.schedule(timertask, date, 1000 * 60 * 60 * 24 * 7); // for a week 1000*60*60*24*7 } catch (Exception ex) { System.out.println("DomainLearning.java GetNewQuery function ERROR " + ex.getMessage()); } }
From source file:com.haulmont.cuba.core.global.QueryTransformerRegex.java
@Override public boolean removeDistinct() { Matcher matcher = SELECT_DISTINCT_PATTERN.matcher(buffer); if (matcher.find()) { buffer.replace(matcher.start(), matcher.end(), "select"); return true; }/*from w ww .j a v a 2s . c o m*/ return false; }
From source file:com.epam.reportportal.extension.bugtracking.jira.JiraStrategy.java
/** * Parse ticket description and find binary data * * @param issueInput/*from w ww .j a v a2 s . c om*/ * @return */ private Map<String, String> findBinaryData(IssueInput issueInput) { Map<String, String> binary = new HashMap<>(); String description = issueInput.getField(IssueFieldId.DESCRIPTION_FIELD.id).getValue().toString(); if (null != description) { // !54086a2c3c0c7d4446beb3e6.jpg| or [^54086a2c3c0c7d4446beb3e6.xml] String regex = "(!|\\[\\^).{24}.{0,5}(\\||\\])"; Matcher matcher = Pattern.compile(regex).matcher(description); while (matcher.find()) { String rawValue = description.subSequence(matcher.start(), matcher.end()).toString(); String binaryDataName = rawValue.replace("!", "").replace("[", "").replace("]", "").replace("^", "") .replace("|", ""); String binaryDataId = binaryDataName.split("\\.")[0]; binary.put(binaryDataId, binaryDataName); } } return binary; }
From source file:com.kenshoo.freemarker.util.DataModelParser.java
public static Map<String, Object> parse(String src, TimeZone timeZone) throws DataModelParsingException { if (!StringUtils.hasText(src)) { return Collections.emptyMap(); }/*from w w w . j ava 2s . co m*/ Map<String, Object> dataModel = new LinkedHashMap<>(); String lastName = null; int lastAssignmentStartEnd = 0; final Matcher assignmentStart = ASSIGNMENT_START.matcher(src); findAssignments: while (true) { boolean hasNextAssignment = assignmentStart.find(lastAssignmentStartEnd); if (lastName != null) { String value = src.substring(lastAssignmentStartEnd, hasNextAssignment ? assignmentStart.start() : src.length()).trim(); final Object parsedValue; try { parsedValue = parseValue(value, timeZone); } catch (DataModelParsingException e) { throw new DataModelParsingException( "Failed to parse the value of \"" + lastName + "\":\n" + e.getMessage(), e.getCause()); } dataModel.put(lastName, parsedValue); } if (lastName == null && (!hasNextAssignment || assignmentStart.start() != 0)) { throw new DataModelParsingException( "The data model specification must start with an assignment (name=value)."); } if (!hasNextAssignment) { break findAssignments; } lastName = assignmentStart.group(1).trim(); lastAssignmentStartEnd = assignmentStart.end(); } return dataModel; }
From source file:com.hp.alm.ali.idea.entity.tree.EntityNode.java
public String toString() { String name = entity.getPropertyValue("name"); if (loading) { name += "..."; // TODO: show as icon }// www .jav a2 s . c o m String filter = model.getFilter(); if (!filter.isEmpty()) { Matcher matcher = Pattern.compile(wildcardToRegex(filter), Pattern.CASE_INSENSITIVE).matcher(name); List<Pair<Integer, Integer>> list = new LinkedList<Pair<Integer, Integer>>(); while (matcher.find()) { list.add(new Pair<Integer, Integer>(matcher.start(), matcher.end())); } if (!list.isEmpty()) { Collections.reverse(list); for (Pair<Integer, Integer> match : list) { name = name.substring(0, match.first) + "<b>" + name.substring(match.first, match.second) + "</b>" + name.substring(match.second); } return "<html>" + name + "</html>"; } } return name; }
From source file:org.ala.harvester.WaissHarvester.java
/** * @see org.ala.harvester.Harvester#start() *//* ww w.j a v a 2s. c om*/ @SuppressWarnings("unchecked") @Override public void start(int infosourceId) throws Exception { // TODO Auto-generated method stub Thread.sleep(timeGap); // Obtains the image listing on the page number specified. // Instance variable `currentResDom` will have new // DOM representation of the result. String indexStr = getIndexPageStr(); // String xpathToImageUrls = "//div[@align='center']/a[@target='_blank']/@href"; // // Document document = getDocument(indexStr.getBytes()); // XPathFactory factory = XPathFactory.newInstance(); // XPath xpath = factory.newXPath(); // // NodeList nodes = (NodeList) xpath.evaluate(xpathToImageUrls, document, XPathConstants.NODESET); // // for (int i = 0; i < nodes.getLength(); i++) { // String imageUrl = (nodes.item(i)).getNodeValue(); // imageUrl = StringUtils.trimToNull(imageUrl); // // if (imageUrl != null && !"".equals(imageUrl)) { // imageUrl = "http://www.museum.wa.gov.au/waiss/pages/" + imageUrl; // System.out.println(imageUrl); // } // } Pattern speciesPattern = Pattern.compile("(?:<a href=\")" + "(images/[a-zA-Z0-9\\.]*)" + "(?:\" target=\"_blank\"><img src=\")" + "(?:\\.\\./a_data/[a-zA-Z0-9_]{1,}\\.jpg)" + "(?:\" width=\"[0-9]{1,}\" height=\"[0-9]{1,}\" border=\"[0-9]{1,}\"></a></div></td>[\\s]{0,}<td[ width=\"422]*>[\\s]*[<div align=\"center\">]*<span class=\"style4\"><strong>)" + "([a-zA-Z \\-']*)" + "(?:</strong>[ ]*\\([ ]*<em>)" + "([a-zA-Z ]*)"); Pattern creatorPattern = Pattern.compile("(?:Photo: )" + "([a-zA-Z ]{1,})"); Matcher m = speciesPattern.matcher(indexStr); String[] creatorArray = new String[9]; Matcher m2 = creatorPattern.matcher(indexStr); int searchIdx2 = 0; int creatorCounter = 0; while (m2.find(searchIdx2)) { int endIdx2 = m2.end(); String creator = m2.group(1); if (creator != null && !"".equals(creator)) { creatorArray[creatorCounter] = creator; creatorCounter++; } searchIdx2 = endIdx2; } int speciesCounter = 0; int searchIdx = 0; // get all the family links while (m.find(searchIdx)) { int endIdx = m.end(); // String found = content.substring(startIdx, endIdx); String url = "http://www.museum.wa.gov.au/waiss/pages/" + m.group(1); String commonName = m.group(2); String sciName = m.group(3); // String generatedUrl = this.endpoint + url; // System.out.println("URL:" + url); System.out.println("Common Name: " + commonName); System.out.println("Sci Name:" + sciName); // processSingleImage(url.trim(), commonName.trim(), sciName.trim(), infosourceId, creatorArray[speciesCounter]); speciesCounter++; // // identifiers.add(generatedUrl); searchIdx = endIdx; } System.out.println(speciesCounter); }
From source file:org.ala.harvester.BlueTierHarvester.java
private Map<String, String> matchStr(Pattern p, String str, int imageUrlPosition, int sciNamePosition) { Matcher m = p.matcher(str); Map<String, String> urlNameMap = new HashMap<String, String>(); int searchIdx = 0; while (m.find(searchIdx)) { int endIdx = m.end(); // String found = content.substring(startIdx, endIdx); String imageUrl = m.group(imageUrlPosition); String sciName = m.group(sciNamePosition); imageUrl = imageUrl.replaceFirst("\\.\\./", ""); String generatedUrl = baseUrl + imageUrl; // System.out.println("URL:" + generatedUrl); // System.out.println("Name:" + sciName); // storeImageDoc(generatedUrl, sciName); urlNameMap.put(generatedUrl, sciName); searchIdx = endIdx;/*from www . jav a2s. c o m*/ } return urlNameMap; }