List of usage examples for java.util.regex Matcher replaceFirst
public String replaceFirst(Function<MatchResult, String> replacer)
From source file:org.lockss.extractor.SimpleHtmlMetaTagMetadataExtractor.java
public ArticleMetadata extract(MetadataTarget target, CachedUrl cu) throws IOException { if (cu == null) { throw new IllegalArgumentException("extract() called with null CachedUrl"); }/*from w w w . ja v a2s. c o m*/ ArticleMetadata ret = new ArticleMetadata(); BufferedReader bReader = null; try { bReader = new BufferedReader(cu.openForReading()); for (String line = bReader.readLine(); line != null; line = bReader.readLine()) { int i = StringUtil.indexOfIgnoreCase(line, "<meta "); while (i >= 0) { // recognize end of tag character preceded by optional '/', // preceded by a double-quote that is separated by zero or more // whitespace characters int j = i + 1; while (true) { j = StringUtil.indexOfIgnoreCase(line, ">", j); if (j < 0) break; String s = line.substring(i, j); if (s.endsWith("/")) { s = s.substring(0, s.length() - 1); } if (s.trim().endsWith("\"")) { break; } j++; } if (j < 0) { // join next line with tag end String nextLine = bReader.readLine(); if (nextLine == null) { break; } if (line.endsWith("=") && nextLine.startsWith(" ")) { // here we trim leading spaces from nextLine Matcher m = whiteSpacePat.matcher(nextLine); nextLine = m.replaceFirst(""); } line += nextLine; continue; } String meta = line.substring(i, j + 1); if (log.isDebug3()) log.debug3("meta: " + meta); addTag(meta, ret); i = StringUtil.indexOfIgnoreCase(line, "<meta ", j + 1); } } } finally { IOUtil.safeClose(bReader); } return ret; }
From source file:de.uzk.hki.da.format.CLIConversionStrategy.java
/** * Tokenizes commandLine and replaces certain strings. * "input" and "output" get replaced by paths of source and destination file. * strings beginning with "{" and ending with "}" get replaced by the contents of additionalParams of the ConversionInstruction. * Each of the {}-surrounded string gets replaced by exactly one token of additional params. * * @param ci the ci// ww w . j av a 2 s . c o m * @param repName the rep name * @return The processed command as list of tokens. The tokenized string has the right format * for a call in Runtime.getRuntime().exec(commandToExecute). This holds especially true * for filenames (which replace the input/output parameters) that are separated by * whitespaces. "file 2.jpg" is represented as one token only. */ protected String[] assemble(ConversionInstruction ci, String repName) { String commandLine_ = commandLine; // replace additional params List<String> ap = tokenize(ci.getAdditional_params(), ","); for (String s : ap) { Pattern pattern = Pattern.compile("\\{.*?\\}"); Matcher matcher = pattern.matcher(commandLine_); commandLine_ = matcher.replaceFirst(s); } // tokenize before replacement to group original tokens together // (to prevent wrong tokenization like two tokens for "file" "2.jpg" // which can result from replacement) String[] tokenizedCmd = tokenize(commandLine_); String targetSuffix = ci.getConversion_routine().getTarget_suffix(); if (targetSuffix.equals("*")) targetSuffix = FilenameUtils.getExtension(ci.getSource_file().toRegularFile().getAbsolutePath()); Utilities.replace(tokenizedCmd, "input", ci.getSource_file().toRegularFile().getAbsolutePath()); Utilities.replace(tokenizedCmd, "output", object.getDataPath() + "/" + repName + "/" + Utilities.slashize(ci.getTarget_folder()) + FilenameUtils.removeExtension(Matcher.quoteReplacement( FilenameUtils.getName(ci.getSource_file().toRegularFile().getAbsolutePath()))) + "." + targetSuffix); return tokenizedCmd; }
From source file:org.talend.dq.dbms.MSSqlDbmsLanguage.java
@Override public String getTopNQuery(String query, int n) { Matcher m = SELECT_PATTERN.matcher(query); return m.replaceFirst("SELECT TOP " + n + PluginConstant.SPACE_STRING); //$NON-NLS-1$ }
From source file:com.almende.util.NamespaceUtil.java
/** * _get.// ww w . j av a 2s . com * * @param destination * the destination * @param path * the path * @return the call tuple * @throws IllegalAccessException * the illegal access exception * @throws InvocationTargetException * the invocation target exception * @throws NoSuchMethodException * the no such method exception */ private CallTuple _get(final Object destination, final String path) throws IllegalAccessException, InvocationTargetException, NoSuchMethodException { final CallTuple result = new CallTuple(); String reducedPath = ""; String reducedMethod = path; if (path.indexOf('.') >= 0) { reducedPath = destination.getClass().getName() + "." + path; final Matcher matcher = PATTERN.matcher(reducedPath); reducedPath = matcher.replaceFirst(""); reducedMethod = matcher.group().substring(1); } if (!cache.containsKey(reducedPath)) { final AnnotatedMethod[] methods = new AnnotatedMethod[1]; final String newSteps = destination.getClass().getName(); cache.put("", new AnnotatedMethod[0]); populateCache(destination, newSteps, methods); } if (!cache.containsKey(reducedPath)) { try { throw new IllegalStateException("Non resolveable path given:'" + path + "' \n checked:" + JOM.getInstance().writeValueAsString(cache)); } catch (final JsonProcessingException e) { throw new IllegalStateException("Non resolveable path given:'" + path + "' \n checked:" + cache); } } final AnnotatedMethod[] methodPath = cache.get(reducedPath); Object newDestination = destination; for (final AnnotatedMethod method : methodPath) { if (method != null) { newDestination = method.getActualMethod().invoke(destination, (Object[]) null); } } if (newDestination == null) { // Oops, namespace getter returned null pointer! return result; } result.setDestination(newDestination); final AnnotatedClass newClazz = AnnotationUtil.get(newDestination.getClass()); final List<AnnotatedMethod> methods = newClazz.getMethods(reducedMethod); if (!methods.isEmpty()) { result.setMethod(methods.get(0)); } return result; }
From source file:org.goko.core.rs274ngcv3.parser.GCodeLexer.java
/** * Recursive method used to split the stringCommand into a list of tokens * @param stringCommand the string command * @param tokens the list of token//from w w w . ja v a 2 s .c o m * @throws GkException GkException */ protected List<GCodeToken> createTokens(String stringCommand, List<GCodeToken> tokens) throws GkException { if (StringUtils.isBlank(stringCommand)) { return tokens; } Matcher spaceMatcher = spacePattern.matcher(stringCommand); if (spaceMatcher.find()) { String remainingString = spaceMatcher.replaceFirst(StringUtils.EMPTY); return createTokens(remainingString, tokens); } Matcher wordMatcher = wordPattern.matcher(stringCommand); if (wordMatcher.find()) { String remainingString = extractToken(wordMatcher, tokens, GCodeTokenType.WORD); return createTokens(remainingString, tokens); } Matcher lineNumberMatcher = lineNumberPattern.matcher(stringCommand); if (lineNumberMatcher.find()) { String remainingString = extractToken(lineNumberMatcher, tokens, GCodeTokenType.LINE_NUMBER); return createTokens(remainingString, tokens); } Matcher multilineCommentMatcher = multilineCommentPattern.matcher(stringCommand); if (multilineCommentMatcher.find()) { String remainingString = extractToken(multilineCommentMatcher, tokens, GCodeTokenType.MULTILINE_COMMENT); return createTokens(remainingString, tokens); } Matcher simpleCommentMatcher = simpleCommentPattern.matcher(stringCommand); if (simpleCommentMatcher.find()) { String remainingString = extractToken(simpleCommentMatcher, tokens, GCodeTokenType.SIMPLE_COMMENT); return createTokens(remainingString, tokens); } throw new GkFunctionalException("Unexpected character : " + stringCommand); }
From source file:org.silverpeas.core.contribution.attachment.util.SharingContext.java
/** * This method reads a text content in order to identify all attachment URIs and to transform * them into shared attachment URIs.//from w ww . ja v a2 s . c o m * @param text the text content to modify * @return the text containing all attachment URI conversions. */ public String applyOn(String text) { Matcher matcher; String newStr = text; while ((matcher = REGEXPR_SHARED_ATTACHMENT.matcher(newStr)).find()) { String currentURL = matcher.group(1); newStr = matcher.replaceFirst("src=\"" + convertURLToSharedOne(currentURL)); } return newStr; }
From source file:de.uzk.hki.da.convert.CLIConversionStrategy.java
/** * Tokenizes commandLine and replaces certain strings. * "input" and "output" get replaced by paths of source and destination file. * strings beginning with "{" and ending with "}" get replaced by the contents of additionalParams of the ConversionInstruction. * Each of the {}-surrounded string gets replaced by exactly one token of additional params. * * @param ci the ci/*from www . java 2 s. com*/ * @param repName the rep name * @return The processed command as list of tokens. The tokenized string has the right format * for a call in Runtime.getRuntime().exec(commandToExecute). This holds especially true * for filenames (which replace the input/output parameters) that are separated by * whitespaces. "file 2.jpg" is represented as one token only. */ protected String[] assemble(WorkArea wa, ConversionInstruction ci, String repName) { String commandLine_ = commandLine; // replace additional params List<String> ap = tokenize(ci.getAdditional_params(), ","); for (String s : ap) { Pattern pattern = Pattern.compile("\\{.*?\\}"); Matcher matcher = pattern.matcher(commandLine_); commandLine_ = matcher.replaceFirst(s); } // tokenize before replacement to group original tokens together // (to prevent wrong tokenization like two tokens for "file" "2.jpg" // which can result from replacement) String[] tokenizedCmd = tokenize(commandLine_); String targetSuffix = ci.getConversion_routine().getTarget_suffix(); if (targetSuffix.equals("*")) targetSuffix = FilenameUtils.getExtension(wa.toFile(ci.getSource_file()).getAbsolutePath()); StringUtilities.replace(tokenizedCmd, "input", wa.toFile(ci.getSource_file()).getAbsolutePath()); StringUtilities.replace(tokenizedCmd, "output", wa.dataPath() + "/" + repName + "/" + StringUtilities.slashize(ci.getTarget_folder()) + FilenameUtils.removeExtension(Matcher.quoteReplacement( FilenameUtils.getName(wa.toFile(ci.getSource_file()).getAbsolutePath()))) + "." + targetSuffix); return tokenizedCmd; }
From source file:org.goko.core.gcode.rs274ngcv3.parser.GCodeLexer.java
/** * Extract the first token from the given matcher * @param matcher the matcher/*from w w w. ja va 2 s . co m*/ * @param tokens the list of tokens * @param type the type of token to create * @return the remaining String after the token extraction */ protected String extractToken(Matcher matcher, List<GCodeToken> tokens, GCodeTokenType type) { tokens.add(new GCodeToken(type, matcher.group())); return matcher.replaceFirst(StringUtils.EMPTY); }
From source file:org.kuali.kfs.sys.document.validation.impl.BusinessObjectDataDictionaryValidation.java
/** * @return determines the prefix from the "businessObjectForValidation" parameter property if possible *//*from w w w . j a v a2 s .c o m*/ protected String determinePrefixIfPossible() { String prefix = null; ValidationFieldConvertible convertible = findParameterForBusinessObjectForValidation(); if (convertible != null) { if (convertible.getSourceEventProperty().contains(KFSPropertyConstants.DOCUMENT)) { Matcher prefixCleanMatch = PREFIX_FINDER.matcher(convertible.getSourceEventProperty()); if (prefixCleanMatch != null) { return prefixCleanMatch.replaceFirst(KFSPropertyConstants.DOCUMENT); } else { return convertible.getSourceEventProperty(); } } } return prefix; }
From source file:ac.simons.tweetarchive.web.ArchiveHandlingController.java
/** * As you can see, it get's nasty here... * <br>// w w w.ja va 2 s . c om * Twitter4j doesn't offer an official way to parse Twitters JSON, so I * brute force my way into the twitter4j.StatusJSONImpl implementation of * Status. * <br> * And even if there was an official way, the JSON files inside the * official(!) Twitter archive differ from the API, even if they are said to * be identical. By the way, I'm not the only one, who * <a href="https://twittercommunity.com/t/why-does-twitter-json-archive-have-a-different-format-than-the-rest-api-1-1/35530">noticed * that</a>. * <br> * Furthermore, I didn't even bother to add error handling or tests. * * @param archive The uploaded archive * @return Redirect to the index * @throws java.io.IOException * @throws twitter4j.JSONException */ @PostMapping public String store(@NotNull final MultipartFile archive, final RedirectAttributes redirectAttributes) throws IOException, JSONException { try (final ZipInputStream archiv = new ZipInputStream(archive.getInputStream())) { ZipEntry entry; while ((entry = archiv.getNextEntry()) != null) { if (!entry.getName().startsWith("data/js/tweets/") || entry.isDirectory()) { continue; } log.debug("Reading archive entry {}...", entry.getName()); final BufferedReader buffer = new BufferedReader( new InputStreamReader(archiv, StandardCharsets.UTF_8)); final String content = buffer.lines().skip(1).map(l -> { Matcher m = PATTERN_CREATED_AT.matcher(l); String rv = l; if (m.find()) { try { rv = m.replaceFirst( "$1\"" + DATE_FORMAT_OUT.format(DATE_FORMAT_IN.parse(m.group(2))) + "\""); } catch (ParseException ex) { log.warn("Unexpected date format in twitter archive", ex); } } return rv; }).collect(Collectors.joining("")).replaceAll("\"sizes\" : \\[.+?\\],", "\"sizes\" : {},"); final JSONArray statuses = new JSONArray(content); for (int i = 0; i < statuses.length(); ++i) { final JSONObject rawJSON = statuses.getJSONObject(i); // https://twitter.com/lukaseder/status/772772372990586882 ;) final Status status = statusFactory.create(rawJSON).as(Status.class); this.tweetStorageService.store(status, rawJSON.toString()); } } } redirectAttributes.addFlashAttribute("message", "Done."); return "redirect:/upload"; }