List of usage examples for java.util.regex Matcher start
public int start()
From source file:com.google.flightmap.parsing.faa.afd.AfdCommParser.java
private void addCommData() throws SQLException { Pattern commSectionRegex = Pattern .compile("\\((\\S+?)\\)\\s+?\\d+\\s*(?:N|E|W|S|NE|NW|SE|SW).+?UTC\\s+.+?COMMUNICATIONS\\:"); // "\\((\\S+?)\\)\\s+?\\d+.+?UTC\\s+.+?COMMUNICATIONS\\:"); Pattern freqRegex = Pattern.compile("([A-Z]+(?:[A-Z]| |/)+?)(\\d+\\.\\d+)\\s+(?:\\((.+?)\\))?"); int start = 0; Matcher commSectionMatcher = commSectionRegex.matcher(afd); Matcher freqMatcher = freqRegex.matcher(afd); while (commSectionMatcher.find(start)) { start = commSectionMatcher.start(); final String iata = commSectionMatcher.group(1); System.out.println(iata); // Determine potential end of this COMMUNICATIONS end int nextColon = afd.indexOf(":", commSectionMatcher.end()); if (nextColon == -1) { nextColon = afd.length();//w ww. j a va2s.c om } int nextMatch; if (commSectionMatcher.find(commSectionMatcher.end())) { nextMatch = commSectionMatcher.start(); } else { nextMatch = afd.length(); } final int stop = Math.min(nextColon, nextMatch); while (freqMatcher.find(start)) { if (freqMatcher.start() > stop) { start = stop; break; } System.out.println(" -> " + freqMatcher.group(0)); final String identifier = freqMatcher.group(1).trim(); final String frequency = freqMatcher.group(2).trim(); String remarks = freqMatcher.group(3); if (remarks != null) { remarks = remarks.trim(); } addAirportCommToDb(iata, identifier, frequency, remarks); start = freqMatcher.end(); } } }
From source file:com.sonicle.webtop.mail.ICalendarRequest.java
/** * <p>//from ww w . j a v a 2s.c o m * Finds all "URL"s in the given _rawText, wraps them in * HTML link tags and returns the result (with the rest of the text * html encoded). * </p> * <p> * We employ the procedure described at: * http://www.codinghorror.com/blog/2008/10/the-problem-with-urls.html * which is a <b>must-read</b>. * </p> * Basically, we allow any number of left parenthesis (which will get stripped away) * followed by http:// or https://. Then any number of permitted URL characters * (based on http://www.ietf.org/rfc/rfc1738.txt) followed by a single character * of that set (basically, those minus typical punctuation). We remove all sets of * matching left & right parentheses which surround the URL. *</p> * <p> * This method *must* be called from a tag/component which will NOT * end up escaping the output. For example: * <PRE> * <h:outputText ... escape="false" value="#{core:hyperlinkText(textThatMayHaveURLs, '_blank')}"/> * </pre> * </p> * <p> * Reason: we are adding <code><a href="..."></code> tags to the output *and* * encoding the rest of the string. So, encoding the outupt will result in * double-encoding data which was already encoded - and encoding the <code>a href</code> * (which will render it useless). * </p> * <p> * * @param _rawText - if <code>null</code>, returns <code>""</code> (empty string). * @param _target - if not <code>null</code> or <code>""</code>, adds a target attributed to the generated link, using _target as the attribute value. */ public static final String hyperlinkText(final String _rawText, final String _target) { String returnValue = null; if (!StringUtils.isBlank(_rawText)) { final Matcher matcher = URI_FINDER_PATTERN.matcher(_rawText); if (matcher.find()) { final int originalLength = _rawText.length(); final String targetText = (StringUtils.isBlank(_target)) ? "" : " target=\"" + _target.trim() + "\""; final int targetLength = targetText.length(); // Counted 15 characters aside from the target + 2 of the URL (max if the whole string is URL) // Rough guess, but should keep us from expanding the Builder too many times. final StringBuilder returnBuffer = new StringBuilder(originalLength * 2 + targetLength + 15); int currentStart; int currentEnd; int lastEnd = 0; String currentURL; do { currentStart = matcher.start(); currentEnd = matcher.end(); currentURL = matcher.group(); // Adjust for URLs wrapped in ()'s ... move start/end markers // and substring the _rawText for new URL value. while (currentURL.startsWith("(") && currentURL.endsWith(")")) { currentStart = currentStart + 1; currentEnd = currentEnd - 1; currentURL = _rawText.substring(currentStart, currentEnd); } while (currentURL.startsWith("(")) { currentStart = currentStart + 1; currentURL = _rawText.substring(currentStart, currentEnd); } // Text since last match returnBuffer.append(StringEscapeUtils.escapeHtml4(_rawText.substring(lastEnd, currentStart))); // Wrap matched URL returnBuffer.append("<a href=\"" + currentURL + "\"" + targetText + ">" + currentURL + "</a>"); lastEnd = currentEnd; } while (matcher.find()); if (lastEnd < originalLength) { returnBuffer.append(StringEscapeUtils.escapeHtml4(_rawText.substring(lastEnd))); } returnValue = returnBuffer.toString(); } } if (returnValue == null) { returnValue = StringEscapeUtils.escapeHtml4(_rawText); } return returnValue; }
From source file:android.databinding.compilationTest.BaseCompilationTest.java
protected void copyResourceTo(String name, File targetFile, Map<String, String> replacements) throws IOException { if (replacements.isEmpty()) { copyResourceTo(name, targetFile); }/*from w ww . j a va2 s . c o m*/ InputStream inputStream = getClass().getResourceAsStream(name); final String contents = IOUtils.toString(inputStream); IOUtils.closeQuietly(inputStream); StringBuilder out = new StringBuilder(contents.length()); final Matcher matcher = VARIABLES.matcher(contents); int location = 0; while (matcher.find()) { int start = matcher.start(); if (start > location) { out.append(contents, location, start); } final String key = matcher.group(1); final String replacement = replacements.get(key); if (replacement != null) { out.append(replacement); } location = matcher.end(); } if (location < contents.length()) { out.append(contents, location, contents.length()); } FileUtils.writeStringToFile(targetFile, out.toString()); }
From source file:be.makercafe.apps.makerbench.editors.JFXScadEditor.java
private static StyleSpans<Collection<String>> computeHighlighting(String text) { Matcher matcher = PATTERN.matcher(text); int lastKwEnd = 0; StyleSpansBuilder<Collection<String>> spansBuilder = new StyleSpansBuilder<>(); while (matcher.find()) { String styleClass = matcher.group("KEYWORD") != null ? "keyword" : matcher.group("PAREN") != null ? "paren" : matcher.group("BRACE") != null ? "brace" : matcher.group("BRACKET") != null ? "bracket" : matcher.group("SEMICOLON") != null ? "semicolon" : matcher.group("STRING") != null ? "string" : matcher.group("COMMENT") != null ? "comment" : null; /* never happens */ assert styleClass != null; spansBuilder.add(Collections.emptyList(), matcher.start() - lastKwEnd); spansBuilder.add(Collections.singleton(styleClass), matcher.end() - matcher.start()); lastKwEnd = matcher.end();/*from ww w .j ava 2 s . c om*/ } spansBuilder.add(Collections.emptyList(), text.length() - lastKwEnd); return spansBuilder.create(); }
From source file:com.xpn.xwiki.render.XWikiMacrosMappingRenderer.java
private String convertSingleLines(String content, XWikiContext context) { StringBuffer result = new StringBuffer(); Matcher m = SINGLE_LINE_MACRO_PATTERN.matcher(content); int current = 0; while (m.find()) { result.append(content.substring(current, m.start())); current = m.end();//from w ww .j av a 2s . c om String macroname = m.group(1); String params = m.group(3); String allcontent = m.group(0); XWikiVirtualMacro macro = this.macros_mappings.get(macroname); if ((macro != null) && (macro.isSingleLine())) { result.append(context.getWiki().getRenderingEngine().convertSingleLine(macroname, params, allcontent, macro, context)); } else { result.append(allcontent); } } if (current == 0) { return content; } result.append(content.substring(current)); return result.toString(); }
From source file:com.xpn.xwiki.render.XWikiMacrosMappingRenderer.java
private String convertMultiLines(String content, XWikiContext context) { StringBuffer result = new StringBuffer(); Matcher m = MULTI_LINE_MACRO_PATTERN.matcher(content); int current = 0; while (m.find()) { result.append(content.substring(current, m.start())); current = m.end();/*from www . j a v a 2 s . c om*/ String macroname = m.group(1); String params = m.group(3); String data = m.group(4); String allcontent = m.group(0); XWikiVirtualMacro macro = this.macros_mappings.get(macroname); if ((macro != null) && (macro.isMultiLine())) { result.append(context.getWiki().getRenderingEngine().convertMultiLine(macroname, params, data, allcontent, macro, context)); } else { result.append(allcontent); } } if (current == 0) { return content; } result.append(content.substring(current)); return result.toString(); }
From source file:com.ushahidi.chambua.service.EntityExtractorService.java
/** * Extracts named entities from the provided text. The first * step is to determine the content type of the text. * //from ww w . j ava 2s . c om * @param text * @return com.ushahidi.swiftriver.tagger.dto.APIResponseDTO */ public APIResponseDTO getEntities(String text) { String cleanedContent = null; try { cleanedContent = articleExtractor.getText(text); } catch (BoilerpipeProcessingException e) { LOGGER.error("An error occurred while cleaning the input: {}", e.getMessage()); } String labeledText = classifier.classifyWithInlineXML(cleanedContent); // Entity types/classes available in the classifier e.g. PERSON, LOCATION, ORGANIZATION Set<String> tags = classifier.labels(); String background = classifier.backgroundSymbol(); // Build out the regex string String tagPattern = ""; for (String tag : tags) { if (background.equals(tag)) continue; if (tagPattern.length() > 0) { tagPattern += "|"; } tagPattern += tag; } // Patterns for extracting the labeled text Pattern startPattern = Pattern.compile("<(" + tagPattern + ")>"); Pattern endPattern = Pattern.compile("</(" + tagPattern + ")>"); // Map to store the extracted entities/tags Map<String, Set<String>> entityMap = new HashMap<String, Set<String>>(); // Begin extraction Matcher m = startPattern.matcher(labeledText); while (m.find()) { int start = m.start(); labeledText = m.replaceFirst(""); m = endPattern.matcher(labeledText); if (m.find()) { int end = m.start(); String tag = m.group(1).toLowerCase(); labeledText = m.replaceFirst(""); String entity = labeledText.substring(start, end); if (entityMap.containsKey(tag)) { Set<String> current = entityMap.get(tag); current.add(entity); entityMap.put(tag, current); } else { Set<String> entities = new HashSet<String>(); entities.add(entity); entityMap.put(tag, entities); } } // Adjust the matcher m = startPattern.matcher(labeledText); } DocumentData apiResponse = new DocumentData(); if (entityMap.containsKey("person")) { apiResponse.setPeople(new ArrayList<String>(entityMap.get("person"))); } if (entityMap.containsKey("organization")) { apiResponse.setOrganizations(new ArrayList<String>(entityMap.get("organization"))); } // Geocode the location entities via the Gisgraphy REST API if (entityMap.containsKey("location")) { List<Place> places = new ArrayList<DocumentData.Place>(); try { places = geocodePlaceNames(entityMap.get("location")); apiResponse.setPlaces(places); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } return beanMapper.map(apiResponse, APIResponseDTO.class); }
From source file:at.ac.tuwien.inso.subcat.utility.commentparser.Parser.java
License:asdf
private void parseParagraphs(List<ContentNode<T>> ast, String commentFragment) { Matcher pm = pPara.matcher(commentFragment); int lastEnd = 0; while (pm.find()) { if (lastEnd != pm.start()) { int count = StringUtils.countMatches(pm.group(0), "\n") - 1; parseParagraph(ast, commentFragment.substring(lastEnd, pm.start()), count); }// w w w. j av a 2 s. c om lastEnd = pm.end(); } if (lastEnd != commentFragment.length()) { String frag = commentFragment.substring(lastEnd, commentFragment.length()); if (frag.trim().length() > 0) { parseParagraph(ast, frag, 0); } } }
From source file:net.sf.jabref.openoffice.BstWrapper.java
private Map<String, String> parseResult(String result) { Map<String, String> map = new HashMap<>(); // Look through for instances of \bibitem : Matcher m = BstWrapper.bibitemTag.matcher(result); ArrayList<Integer> indices = new ArrayList<>(); ArrayList<Integer> endIndices = new ArrayList<>(); ArrayList<String> keys = new ArrayList<>(); while (m.find()) { if (!indices.isEmpty()) { endIndices.add(m.start()); }/*from w w w. ja v a2s .c o m*/ LOGGER.debug(m.start() + " " + m.end()); String tag = m.group(); String key = tag.substring(9, tag.length() - 1); indices.add(m.end()); keys.add(key); } int lastI = result.lastIndexOf("\\end{thebibliography}"); if ((lastI > 0) && (lastI > indices.get(indices.size() - 1))) { endIndices.add(lastI); } for (int i = 0; i < keys.size(); i++) { String key = keys.get(i); int index = indices.get(i); int endIndex = endIndices.get(i); String part = result.substring(index, endIndex); map.put(key, formatter.format(part.trim().replaceAll("\\\\newblock ", " "))); } return map; }
From source file:com.digitalpebble.storm.crawler.indexing.AbstractIndexerBolt.java
/** Returns a mapping field name / values for the metadata to index **/ protected Map<String, String[]> filterMetadata(Metadata meta) { Pattern indexValuePattern = Pattern.compile("\\[(\\d+)\\]"); Map<String, String[]> fieldVals = new HashMap<String, String[]>(); Iterator<Entry<String, String>> iter = metadata2field.entrySet().iterator(); while (iter.hasNext()) { Entry<String, String> entry = iter.next(); // check whether we want a specific value or all of them? int index = -1; String key = entry.getKey(); Matcher match = indexValuePattern.matcher(key); if (match.find()) { index = Integer.parseInt(match.group(1)); key = key.substring(0, match.start()); }//from www. j a v a 2s . com String[] values = meta.getValues(key); // not found if (values == null || values.length == 0) continue; // want a value index that it outside the range given if (index >= values.length) continue; // store all values available if (index == -1) fieldVals.put(entry.getValue(), values); // or only the one we want else fieldVals.put(entry.getValue(), new String[] { values[index] }); } return fieldVals; }