Java tutorial
/* * Copyright 2016 The Bazel Authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.idea.blaze.base.lang.buildfile.globbing; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.base.Throwables; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.common.util.concurrent.ForwardingListenableFuture; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; import com.google.idea.blaze.base.io.FileAttributeProvider; import com.google.idea.blaze.base.lang.buildfile.validation.GlobPatternValidator; import com.intellij.openapi.application.ApplicationManager; import com.intellij.openapi.progress.ProgressManager; import com.intellij.openapi.vfs.LocalFileSystem; import com.intellij.openapi.vfs.VirtualFile; import com.intellij.openapi.vfs.VirtualFileSystem; import com.intellij.openapi.vfs.ex.temp.TempFileSystem; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Predicate; import java.util.regex.Pattern; import javax.annotation.Nullable; /** * Implementation of a subset of UNIX-style file globbing, expanding "*" and "?" as wildcards, but * not [a-z] ranges. * * <p><code>**</code> gets special treatment in include patterns. If it is used as a complete path * segment it matches the filenames in subdirectories recursively. * * <p>Largely copied from {@link com.google.devtools.build.lib.vfs.UnixGlob} */ public final class UnixGlob { private UnixGlob() { } private static List<File> globInternal(File base, Collection<String> patterns, Collection<String> excludePatterns, boolean excludeDirectories, Predicate<File> dirPred, ThreadPoolExecutor threadPool) throws IOException, InterruptedException { GlobVisitor visitor = (threadPool == null) ? new GlobVisitor() : new GlobVisitor(threadPool); return visitor.glob(base, patterns, excludePatterns, excludeDirectories, dirPred); } private static Future<List<File>> globAsyncInternal(File base, Collection<String> patterns, Collection<String> excludePatterns, boolean excludeDirectories, Predicate<File> dirPred, ThreadPoolExecutor threadPool) { Preconditions.checkNotNull(threadPool, "%s %s", base, patterns); try { return new GlobVisitor(threadPool).globAsync(base, patterns, excludePatterns, excludeDirectories, dirPred); } catch (IOException e) { // We are evaluating asynchronously, so no exceptions should be thrown until the future is // retrieved. throw new IllegalStateException(e); } } /** * Checks that each pattern is valid, splits it into segments and checks that each segment * contains only valid wildcards. * * @return list of segment arrays */ private static List<String[]> checkAndSplitPatterns(Collection<String> patterns) { List<String[]> list = Lists.newArrayListWithCapacity(patterns.size()); for (String pattern : patterns) { String error = GlobPatternValidator.validate(pattern); if (error != null) { throw new IllegalArgumentException(error); } Iterable<String> segments = Splitter.on('/').split(pattern); list.add(Iterables.toArray(segments, String.class)); } return list; } private static boolean excludedOnMatch(File path, List<String[]> excludePatterns, int idx, Cache<String, Pattern> cache) { for (String[] excludePattern : excludePatterns) { String text = path.getName(); if (idx == excludePattern.length && matches(excludePattern[idx - 1], text, cache)) { return true; } } return false; } /** * Returns the exclude patterns in {@code excludePatterns} which could apply to the children of * {@code base} * * @param idx index into {@code excludePatterns} for the part of the pattern which might match * {@code base} */ private static List<String[]> getRelevantExcludes(final File base, List<String[]> excludePatterns, final int idx, final Cache<String, Pattern> cache) { if (excludePatterns.isEmpty()) { return excludePatterns; } List<String[]> list = new ArrayList<>(); for (String[] patterns : excludePatterns) { if (excludePatternMatches(patterns, idx, base, cache)) { list.add(patterns); } } return list; } /** * @param patterns a list of patterns * @param idx index into {@code patterns} */ private static boolean excludePatternMatches(String[] patterns, int idx, File base, Cache<String, Pattern> cache) { if (idx == 0) { return true; } String text = base.getName(); return patterns.length > idx && matches(patterns[idx - 1], text, cache); } /** Calls {@link #matches(String, String, Cache) matches(pattern, str, null)} */ public static boolean matches(String pattern, String str) { return matches(pattern, str, null); } /** * Returns whether {@code str} matches the glob pattern {@code pattern}. This method may use the * {@code patternCache} to speed up the matching process. * * @param pattern a glob pattern * @param str the string to match * @param patternCache a cache from patterns to compiled Pattern objects, or {@code null} to skip * caching */ public static boolean matches(String pattern, String str, Cache<String, Pattern> patternCache) { if (pattern.length() == 0 || str.length() == 0) { return false; } // Common case: ** if (pattern.equals("**")) { return true; } // Common case: * if (pattern.equals("*")) { return true; } // If a filename starts with '.', this char must be matched explicitly. if (str.charAt(0) == '.' && pattern.charAt(0) != '.') { return false; } // Common case: *.xyz if (pattern.charAt(0) == '*' && pattern.lastIndexOf('*') == 0) { return str.endsWith(pattern.substring(1)); } // Common case: xyz* int lastIndex = pattern.length() - 1; // The first clause of this if statement is unnecessary, but is an // optimization--charAt runs faster than indexOf. if (pattern.charAt(lastIndex) == '*' && pattern.indexOf('*') == lastIndex) { return str.startsWith(pattern.substring(0, lastIndex)); } Pattern regex = patternCache == null ? null : patternCache.getIfPresent(pattern); if (regex == null) { regex = makePatternFromWildcard(pattern); if (patternCache != null) { patternCache.put(pattern, regex); } } return regex.matcher(str).matches(); } /** * Returns a regular expression implementing a matcher for "pattern", in which "*" and "?" are * wildcards. * * <p>e.g. "foo*bar?.java" -> "foo.*bar.\\.java" */ private static Pattern makePatternFromWildcard(String pattern) { StringBuilder regexp = new StringBuilder(); for (int i = 0, len = pattern.length(); i < len; i++) { char c = pattern.charAt(i); switch (c) { case '*': int toIncrement = 0; if (len > i + 1 && pattern.charAt(i + 1) == '*') { // The pattern '**' is interpreted to match 0 or more directory separators, not 1 or // more. We skip the next * and then find a trailing/leading '/' and get rid of it. toIncrement = 1; if (len > i + 2 && pattern.charAt(i + 2) == '/') { // We have '**/' -- skip the '/'. toIncrement = 2; } else if (len == i + 2 && i > 0 && pattern.charAt(i - 1) == '/') { // We have '/**' -- remove the '/'. regexp.delete(regexp.length() - 1, regexp.length()); } } regexp.append(".*"); i += toIncrement; break; case '?': regexp.append('.'); break; //escape the regexp special characters that are allowed in wildcards case '^': case '$': case '|': case '+': case '{': case '}': case '[': case ']': case '\\': case '.': regexp.append('\\'); regexp.append(c); break; default: regexp.append(c); break; } } return Pattern.compile(regexp.toString()); } public static Builder forPath(File path) { return new Builder(path); } /** Builder class for UnixGlob. */ public static class Builder { private File base; private List<String> patterns; private List<String> excludes; private boolean excludeDirectories; private Predicate<File> pathFilter; private ThreadPoolExecutor threadPool; /** Creates a glob builder with the given base path. */ public Builder(File base) { this.base = base; this.patterns = Lists.newArrayList(); this.excludes = Lists.newArrayList(); this.excludeDirectories = false; this.pathFilter = file -> true; } /** * Adds a pattern to include to the glob builder. * * <p>For a description of the syntax of the patterns, see {@link UnixGlob}. */ public Builder addPattern(String pattern) { this.patterns.add(pattern); return this; } /** * Adds a pattern to include to the glob builder. * * <p>For a description of the syntax of the patterns, see {@link UnixGlob}. */ public Builder addPatterns(String... patterns) { Collections.addAll(this.patterns, patterns); return this; } /** * Adds a pattern to include to the glob builder. * * <p>For a description of the syntax of the patterns, see {@link UnixGlob}. */ public Builder addPatterns(Collection<String> patterns) { this.patterns.addAll(patterns); return this; } /** * Adds patterns to exclude from the results to the glob builder. * * <p>For a description of the syntax of the patterns, see {@link UnixGlob}. */ public Builder addExcludes(String... excludes) { Collections.addAll(this.excludes, excludes); return this; } /** * Adds patterns to exclude from the results to the glob builder. * * <p>For a description of the syntax of the patterns, see {@link UnixGlob}. */ public Builder addExcludes(Collection<String> excludes) { this.excludes.addAll(excludes); return this; } /** If set to true, directories are not returned in the glob result. */ public Builder setExcludeDirectories(boolean excludeDirectories) { this.excludeDirectories = excludeDirectories; return this; } /** * Sets the threadpool to use for parallel glob evaluation. If unset, evaluation is done * in-thread. */ public Builder setThreadPool(ThreadPoolExecutor pool) { this.threadPool = pool; return this; } /** * If set, the given predicate is called for every directory encountered. If it returns false, * the corresponding item is not returned in the output and directories are not traversed * either. */ public Builder setDirectoryFilter(Predicate<File> pathFilter) { this.pathFilter = pathFilter; return this; } /** * Executes the glob. * * @throws InterruptedException if the thread is interrupted. */ public List<File> glob() throws IOException, InterruptedException { return globInternal(base, patterns, excludes, excludeDirectories, pathFilter, threadPool); } /** * Executes the glob asynchronously. {@link #setThreadPool} must have been called already with a * non-null argument. * * @param checkForInterrupt if the returned future may throw InterruptedException. */ public Future<List<File>> globAsync() { return globAsyncInternal(base, patterns, excludes, excludeDirectories, pathFilter, threadPool); } } /** Adapts the result of the glob visitation as a Future. */ private static class GlobFuture extends ForwardingListenableFuture<List<File>> { private final GlobVisitor visitor; private final SettableFuture<List<File>> delegate = SettableFuture.create(); public GlobFuture(GlobVisitor visitor) { this.visitor = visitor; } @Override public List<File> get() throws InterruptedException, ExecutionException { return super.get(); } @Override protected ListenableFuture<List<File>> delegate() { return delegate; } public void setException(IOException exception) { delegate.setException(exception); } public void set(List<File> paths) { delegate.set(paths); } @Override public boolean cancel(boolean mayInterruptIfRunning) { // Best-effort interrupt of the in-flight visitation. visitor.cancel(); return true; } public void markCanceled() { super.cancel(true); } } /** * GlobVisitor executes a glob using parallelism, which is useful when the glob() requires many * readdir() calls on high latency filesystems. */ private static final class GlobVisitor { // These collections are used across workers and must therefore be thread-safe. private final Collection<File> results = Sets.newConcurrentHashSet(); private final Cache<String, Pattern> cache = CacheBuilder.newBuilder() .build(new CacheLoader<String, Pattern>() { @Override public Pattern load(String wildcard) { return makePatternFromWildcard(wildcard); } }); private final GlobFuture result; private final ThreadPoolExecutor executor; private final AtomicLong pendingOps = new AtomicLong(0); private final AtomicReference<IOException> failure = new AtomicReference<>(); private final FileAttributeProvider fileAttributeProvider = FileAttributeProvider.getInstance(); private volatile boolean canceled = false; public GlobVisitor(ThreadPoolExecutor executor) { this.executor = executor; this.result = new GlobFuture(this); } public GlobVisitor() { this(null); } /** * Performs wildcard globbing: returns the sorted list of filenames that match any of {@code * patterns} relative to {@code base}, but which do not match {@code excludePatterns}. * Directories are traversed if and only if they match {@code dirPred}. The predicate is also * called for the root of the traversal. * * <p>Patterns may include "*" and "?", but not "[a-z]". * * <p><code>**</code> gets special treatment in include patterns. If it is used as a complete * path segment it matches the filenames in subdirectories recursively. * * @throws IllegalArgumentException if any glob or exclude pattern {@linkplain * #checkPatternForError(String) contains errors} or if any exclude pattern segment contains * <code>**</code> or if any include pattern segment contains <code>**</code> but not equal * to it. */ public List<File> glob(File base, Collection<String> patterns, Collection<String> excludePatterns, boolean excludeDirectories, Predicate<File> dirPred) throws IOException, InterruptedException { try { return globAsync(base, patterns, excludePatterns, excludeDirectories, dirPred).get(); } catch (ExecutionException e) { Throwable cause = e.getCause(); Throwables.propagateIfPossible(cause, IOException.class); throw new RuntimeException(e); } } public Future<List<File>> globAsync(File base, Collection<String> patterns, Collection<String> excludePatterns, boolean excludeDirectories, Predicate<File> dirPred) throws IOException { if (!fileAttributeProvider.exists(base) || patterns.isEmpty()) { return Futures.immediateFuture(Collections.emptyList()); } boolean baseIsDirectory = fileAttributeProvider.isDirectory(base); List<String[]> splitPatterns = checkAndSplitPatterns(patterns); List<String[]> splitExcludes = checkAndSplitPatterns(excludePatterns); // We do a dumb loop, even though it will likely duplicate work // (e.g., readdir calls). In order to optimize, we would need // to keep track of which patterns shared sub-patterns and which did not // (for example consider the glob [*/*.java, sub/*.java, */*.txt]). pendingOps.incrementAndGet(); try { for (String[] splitPattern : splitPatterns) { queueGlob(base, baseIsDirectory, splitPattern, 0, excludeDirectories, splitExcludes, 0, results, cache, dirPred); } } finally { decrementAndCheckDone(); } return result; } private void queueGlob(File base, boolean baseIsDirectory, String[] patternParts, int idx, boolean excludeDirectories, List<String[]> excludePatterns, int excludeIdx, Collection<File> results, Cache<String, Pattern> cache, Predicate<File> dirPred) throws IOException { enqueue(() -> { try { reallyGlob(base, baseIsDirectory, patternParts, idx, excludeDirectories, excludePatterns, excludeIdx, results, cache, dirPred); } catch (IOException e) { failure.set(e); } }); } protected void enqueue(final Runnable r) { pendingOps.incrementAndGet(); Runnable wrapped = () -> { try { if (!canceled && failure.get() == null) { r.run(); } } finally { decrementAndCheckDone(); } }; if (executor == null) { wrapped.run(); } else { executor.execute(wrapped); } } protected void cancel() { this.canceled = true; } private void decrementAndCheckDone() { if (pendingOps.decrementAndGet() == 0) { // We get to 0 iff we are done all the relevant work. This is because we always increment // the pending ops count as we're enqueuing, and don't decrement until the task is complete // (which includes accounting for any additional tasks that one enqueues). if (canceled) { result.markCanceled(); } else if (failure.get() != null) { result.setException(failure.get()); } else { result.set(Ordering.<File>natural().immutableSortedCopy(results)); } } } /** * Expressed in Haskell: * * <pre> * reallyGlob base [] = { base } * reallyGlob base [x:xs] = union { reallyGlob(f, xs) | f results "base/x" } * </pre> */ private void reallyGlob(File base, boolean baseIsDirectory, String[] patternParts, int idx, boolean excludeDirectories, List<String[]> excludePatterns, int excludeIdx, Collection<File> results, Cache<String, Pattern> cache, Predicate<File> dirPred) throws IOException { ProgressManager.checkCanceled(); if (baseIsDirectory && !dirPred.test(base)) { return; } if (idx == patternParts.length) { // Base case. if (!(excludeDirectories && baseIsDirectory) && !excludedOnMatch(base, excludePatterns, excludeIdx, cache)) { results.add(base); } return; } if (!baseIsDirectory) { // Nothing to find here. return; } List<String[]> relevantExcludes = getRelevantExcludes(base, excludePatterns, excludeIdx, cache); final String pattern = patternParts[idx]; // ** is special: it can match nothing at all. // For example, x/** matches x, **/y matches y, and x/**/y matches x/y. if ("**".equals(pattern)) { queueGlob(base, baseIsDirectory, patternParts, idx + 1, excludeDirectories, excludePatterns, excludeIdx, results, cache, dirPred); } if (!pattern.contains("*") && !pattern.contains("?")) { // We do not need to do a readdir in this case, just a stat. File child = new File(base, pattern); boolean childIsDir = fileAttributeProvider.isDirectory(child); if (!childIsDir && !fileAttributeProvider.isFile(child)) { // The file is a dangling symlink, fifo, does not exist, etc. return; } queueGlob(child, childIsDir, patternParts, idx + 1, excludeDirectories, relevantExcludes, excludeIdx + 1, results, cache, dirPred); return; } File[] children = getChildren(base); if (children == null) { return; } for (File child : children) { boolean childIsDir = fileAttributeProvider.isDirectory(child); if ("**".equals(pattern)) { // Recurse without shifting the pattern. if (childIsDir) { queueGlob(child, childIsDir, patternParts, idx, excludeDirectories, relevantExcludes, excludeIdx + 1, results, cache, dirPred); } } if (matches(pattern, child.getName(), cache)) { // Recurse and consume one segment of the pattern. if (childIsDir) { queueGlob(child, childIsDir, patternParts, idx + 1, excludeDirectories, relevantExcludes, excludeIdx + 1, results, cache, dirPred); } else { // Instead of using an async call, just repeat the base case above. if (idx + 1 == patternParts.length && !excludedOnMatch(child, relevantExcludes, excludeIdx + 1, cache)) { results.add(child); } } } } } private static VirtualFileSystem getFileSystem() { if (ApplicationManager.getApplication().isUnitTestMode()) { return TempFileSystem.getInstance(); } return LocalFileSystem.getInstance(); } @Nullable private File[] getChildren(File file) { if (!ApplicationManager.getApplication().isUnitTestMode()) { return file.listFiles(); } TempFileSystem fs = TempFileSystem.getInstance(); VirtualFile vf = fs.findFileByIoFile(file); VirtualFile[] children = vf.getChildren(); if (children == null) { return null; } return Arrays.stream(children).map(VirtualFile::getPath).map(File::new).toArray(File[]::new); } } }