Java tutorial
/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.util.cache; import java.io.FileNotFoundException; import java.io.IOException; import java.text.MessageFormat; import java.util.zip.CRC32; import java.util.zip.Checksum; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.asakusafw.runtime.util.lock.LockObject; import com.asakusafw.runtime.util.lock.LockProvider; import com.asakusafw.runtime.util.lock.RetryObject; import com.asakusafw.runtime.util.lock.RetryStrategy; /** * Manages cache files on Hadoop file system. * @since 0.7.0 */ public class HadoopFileCacheRepository implements FileCacheRepository { static final Log LOG = LogFactory.getLog(HadoopFileCacheRepository.class); static final String KEY_CHECK_BEFORE_DELETE = "com.asakusafw.cache.hadoop.deleteOnlyIfExists"; //$NON-NLS-1$ static final boolean DEFAULT_CHECK_BEFORE_DELETE = true; private final Configuration configuration; private final Path repository; private final LockProvider<? super Path> lockProvider; private final RetryStrategy retryStrategy; private final boolean checkBeforeDelete; private final ThreadLocal<byte[]> byteBuffers = ThreadLocal.withInitial(() -> new byte[1024]); /** * Creates a new instance. * @param configuration the current configuration * @param repository the cache root path (must be absolute) * @param lockProvider the cache lock provider * @param retryStrategy the retry strategy */ public HadoopFileCacheRepository(Configuration configuration, Path repository, LockProvider<? super Path> lockProvider, RetryStrategy retryStrategy) { if (repository.toUri().getScheme() == null) { throw new IllegalArgumentException( MessageFormat.format("Cache repository location must contan the scheme: {0}", repository)); } this.configuration = configuration; this.repository = repository; this.lockProvider = lockProvider; this.retryStrategy = retryStrategy; this.checkBeforeDelete = configuration.getBoolean(KEY_CHECK_BEFORE_DELETE, DEFAULT_CHECK_BEFORE_DELETE); } @Override public Path resolve(Path file) throws IOException, InterruptedException { FileSystem fs = file.getFileSystem(configuration); Path qualified = fs.makeQualified(file); return doResolve(qualified); } private Path doResolve(Path sourcePath) throws IOException, InterruptedException { assert sourcePath.isAbsolute(); FileSystem fs = sourcePath.getFileSystem(configuration); if (fs.exists(sourcePath) == false) { throw new FileNotFoundException(sourcePath.toString()); } long sourceChecksum = computeChecksum(fs, sourcePath); Path cachePath = computeCachePath(sourcePath); Path cacheChecksumPath = computeCacheChecksumPath(cachePath); IOException firstException = null; RetryObject retry = retryStrategy .newInstance(MessageFormat.format("preparing cache ({0} -> {1})", sourcePath, cachePath)); do { try (LockObject<? super Path> lock = lockProvider.tryLock(cachePath)) { // TODO reduce lock scope? if (lock == null) { continue; } if (isCached(cachePath, cacheChecksumPath, sourceChecksum)) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("cache hit: {0} -> {1}", //$NON-NLS-1$ sourcePath, cachePath)); } // just returns cached file } else { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("cache miss: {0} -> {1}", //$NON-NLS-1$ sourcePath, cachePath)); } updateCache(sourcePath, sourceChecksum, cachePath, cacheChecksumPath); } return cachePath; } catch (IOException e) { LOG.warn(MessageFormat.format("Failed to prepare cache: {0} -> {1}", sourcePath, cachePath), e); if (firstException == null) { firstException = e; } } } while (retry.waitForNextAttempt()); if (firstException == null) { throw new IOException(MessageFormat.format("Failed to acquire a lock for remote cache file: {0} ({1})", sourcePath, cachePath)); } throw firstException; } private long computeChecksum(FileSystem fs, Path file) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Computing checksum: {0}", //$NON-NLS-1$ file)); } Checksum checksum = new CRC32(); byte[] buf = byteBuffers.get(); try (FSDataInputStream input = fs.open(file)) { while (true) { int read = input.read(buf); if (read < 0) { break; } checksum.update(buf, 0, read); } } return checksum.getValue(); } private Path computeCachePath(Path file) { assert repository != null; String directoryName; Path parent = file.getParent(); if (parent == null) { directoryName = String.format("%08x", 0); //$NON-NLS-1$ } else { directoryName = String.format("%08x", parent.toString().hashCode()); //$NON-NLS-1$ } Path directory = new Path(repository, directoryName); Path target = new Path(directory, file.getName()); return target; } private Path computeCacheChecksumPath(Path cachePath) { Path parent = cachePath.getParent(); String name = String.format("%s.acrc", cachePath.getName()); //$NON-NLS-1$ return new Path(parent, name); } private boolean isCached(Path cacheFilePath, Path cacheChecksumPath, long checksum) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("checking remote cache: {0}", //$NON-NLS-1$ cacheFilePath)); } FileSystem fs = cacheChecksumPath.getFileSystem(configuration); if (fs.exists(cacheChecksumPath) == false || fs.exists(cacheFilePath) == false) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("remote cache is not found: {0}", //$NON-NLS-1$ cacheFilePath)); } return false; } else { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("reading remote cache checksum: {0}", //$NON-NLS-1$ cacheFilePath)); } long other; try (FSDataInputStream input = fs.open(cacheChecksumPath)) { other = input.readLong(); } return checksum == other; } } private void updateCache(Path file, long checksum, Path cachePath, Path cacheChecksumPath) throws IOException { if (LOG.isInfoEnabled()) { LOG.info(MessageFormat.format("updating library cache: {0} -> {1}", file, cachePath)); } FileSystem sourceFs = file.getFileSystem(configuration); FileSystem cacheFs = cachePath.getFileSystem(configuration); // remove checksum file -> cachePath delete(cacheFs, cacheChecksumPath); delete(cacheFs, cachePath); // sync source file to cache file try (FSDataOutputStream checksumOutput = cacheFs.create(cacheChecksumPath, false)) { checksumOutput.writeLong(checksum); syncFile(sourceFs, file, cacheFs, cachePath); } } private void delete(FileSystem fs, Path path) throws IOException { if (checkBeforeDelete && fs.exists(path) == false) { return; } fs.delete(path, false); } private void syncFile(FileSystem sourceFs, Path sourceFile, FileSystem targetFs, Path targetFile) throws IOException { byte[] buf = byteBuffers.get(); try (FSDataOutputStream output = targetFs.create(targetFile, false); FSDataInputStream input = sourceFs.open(sourceFile)) { while (true) { int read = input.read(buf); if (read < 0) { break; } output.write(buf, 0, read); } } } }