Java tutorial
/* Copyright 2014 Norconex Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.norconex.commons.lang.io; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.lang3.ArrayUtils; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import com.norconex.commons.lang.file.FileUtil; import com.norconex.commons.lang.io.CachedStreamFactory.MemoryTracker; /** * {@link InputStream} wrapper that can be re-read any number of times. This * class will cache the wrapped input steam content the first time it is read, * and subsequent read will use the cache. * <br><br> * To create new instances of {@link CachedInputStream}, use the * {@link CachedStreamFactory} class. Reusing the same factory * will ensure all {@link CachedInputStream} instances created share the same * combined maximum memory. Invoking one of the * <code>newInputStream(...)</code> methods on this class have the same effect. * <br><br> * In order to re-use this InputStream, you must call {@link #rewind()} first * on it. Once done reading the stream, you will get the -1 character as * expected, and it will remain at that until you rewind or dispose. * <br><br> * Starting reading the stream again will start reading bytes from the * beginning (re)using its internal cache. * <br><br> * Calling {@link #close()} has * no effect, and the cache data remains available for subsequent read. * <br><br> * To explicitly dispose of resources allocated to the cache, you can * use the {@link #dispose()} method. * Attempting to read a disposed instance will throw an {@link IOException}. * It is recommended you explicitly dispose of <code>CachedInputStream</code> * instances to speed up the release of resources. Otherwise, resources are * de-allocated automatically when the instance is finalized. * <br><br> * The internal cache stores read bytes into memory, up to to the * specified maximum cache size. If content exceeds * the cache limit, the cache transforms itself into a fast file-based cache * of unlimited size. Default memory cache size is 128 KB. * <br><br> * <b>Starting with 1.6.0</b>, <code>mark(int)</code> is supported. The mark * limit is always unlimited so the method argument is ignored. * <br><br> * @author Pascal Essiembre * @since 1.5.0 * @see CachedStreamFactory */ public class CachedInputStream extends InputStream implements ICachedStream { private static final Logger LOG = LogManager.getLogger(CachedInputStream.class); private final CachedStreamFactory factory; private final MemoryTracker tracker; private InputStream inputStream; private byte[] memCache; private ByteArrayOutputStream memOutputStream; private File fileCache; private RandomAccessFile randomAccessFile; private boolean firstRead = true; private boolean needNewStream = false; private boolean cacheEmpty = true; private boolean disposed = false; private final File cacheDirectory; private int count; // total number of bytes read so far private int pos = 0; // byte position we are in private int markpos = 0; // position we want to go back to /** * Caches the wrapped InputStream. * @param is InputStream to cache * @param cacheDirectory directory where to store large content */ /*default*/ CachedInputStream(CachedStreamFactory factory, InputStream is, File cacheDirectory) { super(); this.factory = factory; this.tracker = factory.new MemoryTracker(); memOutputStream = new ByteArrayOutputStream(); if (is instanceof BufferedInputStream) { this.inputStream = is; } else { this.inputStream = new BufferedInputStream(is); } if (cacheDirectory == null) { this.cacheDirectory = FileUtils.getTempDirectory(); } else { this.cacheDirectory = cacheDirectory; } } /** * Creates an input stream with an existing memory cache. * @param byteBuffer the InputStream cache. */ /*default*/ CachedInputStream(CachedStreamFactory factory, byte[] memCache) { this.factory = factory; this.tracker = factory.new MemoryTracker(); this.memCache = ArrayUtils.clone(memCache); this.cacheDirectory = null; firstRead = false; needNewStream = true; } /** * Creates an input stream with an existing file cache. * @param cacheFile the file cache */ /*default*/ CachedInputStream(CachedStreamFactory factory, File cacheFile) { this.factory = factory; this.tracker = factory.new MemoryTracker(); this.fileCache = cacheFile; this.cacheDirectory = null; firstRead = false; needNewStream = true; } /** * Always <code>true</code> since 1.6.0. * @return <code>true</code> */ @Override public boolean markSupported() { return true; } /** * The read limit value is ignored. Limit is always unlimited. * Supported since 1.6.0. * @param readlimit any value (ignored) */ @Override public synchronized void mark(int readlimit) { markpos = pos; } /** * If no mark has previously been set, it resets to the beginning. * Supported since 1.6.0. */ @Override public synchronized void reset() throws IOException { pos = markpos; } /** * Whether caching is done in memory for this instance for what has been * read so far. Otherwise, file-based caching is used. * @return <code>true</code> if caching is in memory. */ public boolean isInMemory() { return fileCache == null; } @Override public int read() throws IOException { if (disposed) { throw new IOException("CachedInputStream has been disposed."); } int cursor = pos; if (cursor < count) { int val = -1; if (isInMemory()) { if (memOutputStream != null) { val = memOutputStream.getByte(cursor); } else { if (cursor >= memCache.length) { val = -1; } else { val = memCache[cursor]; } } } else { randomAccessFile.seek(cursor); val = randomAccessFile.read(); } if (val != -1) { pos++; } return val; } int b = realRead(); if (b != -1) { pos++; count++; } return b; } private int realRead() throws IOException { if (needNewStream) { createInputStreamFromCache(); } if (firstRead) { int read = inputStream.read(); if (read == -1) { return read; } if (randomAccessFile != null) { // Write to file cache randomAccessFile.write(read); } else if (!tracker.hasEnoughAvailableMemory(memOutputStream, 1)) { // Too big: create file cache and write to it. cacheToFile(); randomAccessFile.write(read); } else { // Write to memory cache memOutputStream.write(read); } cacheEmpty = false; return read; } int read = inputStream.read(); cacheEmpty = false; return read; } @Override public int read(byte[] b, int off, int len) throws IOException { if (disposed) { throw new IOException("CachedInputStream has been disposed."); } int cursor = pos; int read = 0; if (cursor < count) { int toRead = Math.min(len, count - cursor); if (isInMemory()) { if (memOutputStream != null) { byte[] bytes = new byte[toRead]; read = memOutputStream.getBytes(bytes, cursor); System.arraycopy(bytes, 0, b, off, toRead); } else { if (cursor >= memCache.length) { read = -1; } else { System.arraycopy(memCache, cursor, b, off, toRead); read = toRead; } } } else { randomAccessFile.seek(cursor); read = randomAccessFile.read(b, off, toRead); } if (read != -1) { pos += read; } } if (read != -1 && read < len) { int maxToRead = len - read; read = realRead(b, off + read, maxToRead); if (read != -1) { pos += read; count += read; } } return read; } public int realRead(byte[] b, int off, int len) throws IOException { if (needNewStream) { createInputStreamFromCache(); } int num = inputStream.read(b, off, len); cacheEmpty = false; if (num == -1) { return num; } if (firstRead) { if (randomAccessFile != null) { randomAccessFile.write(b, off, num); } else if (!tracker.hasEnoughAvailableMemory(memOutputStream, num)) { cacheToFile(); randomAccessFile.write(b, off, num); } else { memOutputStream.write(b, off, num); } } return num; } /** * If not already fully cached, forces the inner input stream to be * fully cached. * @throws IOException could not enforce full caching */ public void enforceFullCaching() throws IOException { if (firstRead) { IOUtils.copy(this, new NullOutputStream()); } } /** * Rewinds this stream so it can be read again from the beginning. * If this input stream was not fully read at least once, it will * be fully read first, so its entirety is cached properly. */ public void rewind() { if (!cacheEmpty) { // Rewinding a stream that we not fully read will truncate // it. We finish reading it all to avoid that. if (firstRead) { try { enforceFullCaching(); } catch (IOException e) { //TODO handle better throw new RuntimeException(e); } } // Rewind IOUtils.closeQuietly(inputStream); IOUtils.closeQuietly(memOutputStream); IOUtils.closeQuietly(randomAccessFile); randomAccessFile = null; firstRead = false; needNewStream = true; if (memOutputStream != null) { LOG.debug("Creating memory cache from cached stream."); memCache = memOutputStream.toByteArray(); memOutputStream = null; } // Reset marking pos = 0; markpos = 0; count = 0; } } public void dispose() throws IOException { if (memCache != null) { memCache = null; } if (inputStream != null) { inputStream.close(); inputStream = null; } if (memOutputStream != null) { memOutputStream.flush(); memOutputStream.close(); memOutputStream = null; } if (randomAccessFile != null) { randomAccessFile.close(); randomAccessFile = null; } if (fileCache != null) { FileUtil.delete(fileCache); LOG.debug("Deleted cache file: " + fileCache); } disposed = true; cacheEmpty = true; } @Override public int available() throws IOException { if (needNewStream) { createInputStreamFromCache(); } if (inputStream == null) { return 0; } return inputStream.available(); } /** * Gets the cache directory where temporary cache files are created. * @return the cache directory */ public final File getCacheDirectory() { return cacheDirectory; } /** * Returns <code>true</code> if was nothing to cache (no writing was * performed) or if the stream was closed. * @return <code>true</code> if empty */ public boolean isCacheEmpty() { return cacheEmpty; } public boolean isDisposed() { return disposed; } @Override public long getMemCacheSize() { if (memCache != null) { return memCache.length; } if (memOutputStream != null) { return memOutputStream.size(); } return 0; } /** * Creates a new {@link CachedInputStream} using the same factory settings * that were used to create this instance. * @param file file to create the input stream from * @return cached input stream */ public CachedInputStream newInputStream(File file) { return factory.newInputStream(file); } /** * Creates a new {@link CachedInputStream} using the same factory settings * that were used to create this instance. * @param is input stream * @return cached input stream */ public CachedInputStream newInputStream(InputStream is) { return factory.newInputStream(is); } public CachedStreamFactory getStreamFactory() { return factory; } private void cacheToFile() throws IOException { fileCache = File.createTempFile("CachedInputStream-", "-temp", cacheDirectory); fileCache.deleteOnExit(); LOG.debug("Reached max cache size. Swapping to file: " + fileCache); randomAccessFile = new RandomAccessFile(fileCache, "rw"); randomAccessFile.write(memOutputStream.toByteArray()); memOutputStream = null; } @SuppressWarnings("resource") private void createInputStreamFromCache() throws FileNotFoundException { if (fileCache != null) { LOG.debug("Creating new input stream from file cache."); RandomAccessFile f = new RandomAccessFile(fileCache, "r"); FileChannel channel = f.getChannel(); inputStream = Channels.newInputStream(channel); } else { LOG.debug("Creating new input stream from memory cache."); inputStream = new ByteArrayInputStream(memCache); } needNewStream = false; } @Override protected void finalize() throws Throwable { dispose(); super.finalize(); } }