Java tutorial
/** * Copyright (c) 2016. Qubole Inc * Licensed under the Apache License, Version 2.0 (the License); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. See accompanying LICENSE file. */ package com.qubole.rubix.core; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.qubole.rubix.bookkeeper.Location; import com.qubole.rubix.bookkeeper.RetryingBookkeeperClient; import com.qubole.rubix.spi.CacheConfig; import com.qubole.rubix.spi.ClusterType; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static com.qubole.rubix.bookkeeper.RetryingBookkeeperClient.createBookKeeperClient; /** * Created by stagra on 29/12/15. */ public class CachingInputStream extends FSInputStream { private FSDataInputStream inputStream; private long nextReadPosition; private long nextReadBlock; private int blockSize; private RandomAccessFile localFileForReading = null; private CachingFileSystemStats statsMbean; private static ListeningExecutorService readService = MoreExecutors .listeningDecorator(Executors.newCachedThreadPool()); private static final Log log = LogFactory.getLog(CachingInputStream.class); private String remotePath; private long fileSize; private String localPath; private long lastModified; private RetryingBookkeeperClient bookKeeperClient; private Configuration conf; private boolean strictMode = false; private long splitSize; ClusterType clusterType; public CachingInputStream(FSDataInputStream parentInputStream, FileSystem parentFs, Path backendPath, Configuration conf, CachingFileSystemStats statsMbean, long splitSize, ClusterType clusterType) throws IOException { this.remotePath = backendPath.toString(); this.fileSize = parentFs.getLength(backendPath); lastModified = parentFs.getFileStatus(backendPath).getModificationTime(); initialize(parentInputStream, conf); this.statsMbean = statsMbean; this.splitSize = splitSize; this.clusterType = clusterType; } @VisibleForTesting public CachingInputStream(FSDataInputStream parentInputStream, Configuration conf, Path backendPath, long size, long lastModified, CachingFileSystemStats statsMbean, long splitSize, ClusterType clusterType) throws IOException { this.remotePath = backendPath.toString(); this.fileSize = size; this.lastModified = lastModified; initialize(parentInputStream, conf); this.statsMbean = statsMbean; this.splitSize = splitSize; this.clusterType = clusterType; } private void initialize(FSDataInputStream parentInputStream, Configuration conf) { this.conf = conf; this.strictMode = CacheConfig.isStrictMode(conf); try { this.bookKeeperClient = createBookKeeperClient(conf); } catch (Exception e) { if (strictMode) { throw Throwables.propagate(e); } log.warn("Could not create BookKeeper Client " + Throwables.getStackTraceAsString(e)); bookKeeperClient = null; } this.inputStream = checkNotNull(parentInputStream, "ParentInputStream is null"); this.blockSize = CacheConfig.getBlockSize(conf); this.localPath = CacheConfig.getLocalPath(remotePath, conf); try { this.localFileForReading = new RandomAccessFile(localPath, "r"); } catch (FileNotFoundException e) { log.info("Creating local file " + localPath); File file = new File(localPath); try { file.createNewFile(); this.localFileForReading = new RandomAccessFile(file, "rw"); } catch (IOException e1) { log.error("Error in creating local file " + localPath, e1); // reset bookkeeper client so that we take direct route this.bookKeeperClient = null; } } } @Override public void seek(long pos) throws IOException { checkState(pos >= 0, "Negative Position"); log.debug(String.format("Seek request, currentPos: %d currentBlock: %d", nextReadPosition, nextReadBlock)); this.nextReadPosition = pos; setNextReadBlock(); log.debug(String.format("Seek to %d, setting block location %d", nextReadPosition, nextReadBlock)); } @Override public long getPos() throws IOException { return nextReadPosition; } @Override public boolean seekToNewSource(long l) throws IOException { return false; } @Override public int read() throws IOException { // This stream is wrapped with BufferedInputStream, so this method should never be called throw new UnsupportedOperationException(); } @Override public int read(byte[] buffer, int offset, int length) throws IOException { log.debug(String.format("Got Read, currentPos: %d currentBlock: %d bufferOffset: %d length: %d", nextReadPosition, nextReadBlock, offset, length)); if (nextReadPosition >= fileSize) { log.debug("Already at eof, returning"); return -1; } // Get the last block final long endBlock = ((nextReadPosition + (length - 1)) / blockSize) + 1; // this block will not be read // Create read requests final List<ReadRequestChain> readRequestChains = setupReadRequestChains(buffer, offset, endBlock, length); log.debug("Executing Chains"); // start read requests ImmutableList.Builder builder = ImmutableList.builder(); int sizeRead = 0; for (ReadRequestChain readRequestChain : readRequestChains) { readRequestChain.lock(); builder.add(readService.submit(readRequestChain)); } List<ListenableFuture<Integer>> futures = builder.build(); try { for (ListenableFuture<Integer> future : futures) { sizeRead += future.get(); } } catch (InterruptedException e) { throw Throwables.propagate(e); } catch (ExecutionException e) { throw Throwables.propagate(e); } // mark all read blocks cached // We can let this is happen in background final long lastBlock = nextReadBlock; readService.execute(new Runnable() { @Override public void run() { ReadRequestChainStats stats = new ReadRequestChainStats(); for (ReadRequestChain readRequestChain : readRequestChains) { readRequestChain.updateCacheStatus(remotePath, fileSize, lastModified, blockSize, conf); stats = stats.add(readRequestChain.getStats()); } statsMbean.addReadRequestChainStats(stats); } }); log.debug(String.format("Read %d bytes", sizeRead)); if (sizeRead > 0) { nextReadPosition += sizeRead; setNextReadBlock(); log.debug(String.format("New nextReadPosition: %d nextReadBlock: %d", nextReadPosition, nextReadBlock)); } return sizeRead; } private List<ReadRequestChain> setupReadRequestChains(byte[] buffer, int offset, long endBlock, int length) { DirectReadRequestChain directReadRequestChain = null; RemoteReadRequestChain remoteReadRequestChain = null; CachedReadRequestChain cachedReadRequestChain = null; NonLocalReadRequestChain nonLocalReadRequestChain = null; ImmutableList.Builder chainedReadRequestChainBuilder = ImmutableList.builder(); int lengthAlreadyConsidered = 0; List<Location> isCached = null; try { if (bookKeeperClient != null) { isCached = bookKeeperClient.getCacheStatus(remotePath, fileSize, lastModified, nextReadBlock, endBlock, clusterType.ordinal()); } } catch (Exception e) { if (strictMode) { throw Throwables.propagate(e); } log.info("Could not get cache status from server " + Throwables.getStackTraceAsString(e)); } int idx = 0; for (long blockNum = nextReadBlock; blockNum < endBlock; blockNum++, idx++) { long backendReadStart = blockNum * blockSize; long backendReadEnd = (blockNum + 1) * blockSize; // if backendReadStart is after EOF, then return. It can happen while reading last block and enf of read covers multiple blocks after EOF if (backendReadStart >= fileSize) { log.debug("Reached EOF, returning"); break; } if (backendReadEnd >= fileSize) { backendReadEnd = fileSize; } long actualReadStart = (blockNum == nextReadBlock ? nextReadPosition : backendReadStart); long actualReadEnd = (blockNum == (endBlock - 1) ? (nextReadPosition + length) : backendReadEnd); if (actualReadEnd >= fileSize) { actualReadEnd = fileSize; } int bufferOffest = offset + lengthAlreadyConsidered; ReadRequest readRequest = new ReadRequest(backendReadStart, backendReadEnd, actualReadStart, actualReadEnd, buffer, bufferOffest, fileSize); lengthAlreadyConsidered += readRequest.getActualReadLength(); if (isCached == null) { log.debug(String.format("Sending block %d to DirectReadRequestChain", blockNum)); if (directReadRequestChain == null) { directReadRequestChain = new DirectReadRequestChain(inputStream); } directReadRequestChain.addReadRequest(readRequest); } else if (isCached.get(idx) == Location.CACHED) { log.debug(String.format("Sending cached block %d to cachedReadRequestChain", blockNum)); if (cachedReadRequestChain == null) { cachedReadRequestChain = new CachedReadRequestChain(localFileForReading); } cachedReadRequestChain.addReadRequest(readRequest); } else { if (isCached.get(idx) == Location.NON_LOCAL) { log.debug(String.format("Sending block %d to NonLocalReadRequestChain", blockNum)); if (nonLocalReadRequestChain == null) { nonLocalReadRequestChain = new NonLocalReadRequestChain(inputStream); } nonLocalReadRequestChain.addReadRequest(readRequest); } else { log.debug(String.format("Sending block %d to remoteReadRequestChain", blockNum)); if (remoteReadRequestChain == null) { remoteReadRequestChain = new RemoteReadRequestChain(inputStream, localPath); } remoteReadRequestChain.addReadRequest(readRequest); } } } if (cachedReadRequestChain != null) { chainedReadRequestChainBuilder .add(new ChainedReadRequestChain().addReadRequestChain(cachedReadRequestChain)); } if (nonLocalReadRequestChain != null || directReadRequestChain != null || remoteReadRequestChain != null) { ChainedReadRequestChain shared = new ChainedReadRequestChain(); if (remoteReadRequestChain != null) { shared.addReadRequestChain(remoteReadRequestChain); } if (nonLocalReadRequestChain != null) { shared.addReadRequestChain(nonLocalReadRequestChain); } if (directReadRequestChain != null) { shared.addReadRequestChain(directReadRequestChain); } chainedReadRequestChainBuilder.add(shared); } return chainedReadRequestChainBuilder.build(); } private void setNextReadBlock() { this.nextReadBlock = this.nextReadPosition / blockSize; } @Override public void close() { try { inputStream.close(); if (localFileForReading != null) { localFileForReading.close(); } if (bookKeeperClient != null) { bookKeeperClient.close(); } } catch (IOException e) { throw Throwables.propagate(e); } } }