Java tutorial
package org.apache.lucene.store; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; import java.io.FileNotFoundException; import java.io.FilenameFilter; import java.io.IOException; import java.io.RandomAccessFile; import java.text.SimpleDateFormat; import java.util.Collection; import static java.util.Collections.synchronizedSet; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.Set; import java.util.concurrent.Future; import java.util.zip.CRC32; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.Constants; import org.apache.lucene.util.cache.Cache; import org.apache.lucene.util.cache.SimpleLRUCache; import com.alimama.mdrill.buffer.CacheKeyBuffer; import com.alimama.mdrill.hdfsDirectory.FileSystemDirectory; /** * <a name="subclasses"/> * Base class for Directory implementations that store index * files in the file system. There are currently three core * subclasses: * * <ul> * * <li> {@link SimpleFSDirectory} is a straightforward * implementation using java.io.RandomAccessFile. * However, it has poor concurrent performance * (multiple threads will bottleneck) as it * synchronizes when multiple threads read from the * same file. * * <li> {@link NIOFSDirectory} uses java.nio's * FileChannel's positional io when reading to avoid * synchronization when reading from the same file. * Unfortunately, due to a Windows-only <a * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265734">Sun * JRE bug</a> this is a poor choice for Windows, but * on all other platforms this is the preferred * choice. Applications using {@link Thread#interrupt()} or * {@link Future#cancel(boolean)} should use * {@link SimpleFSDirectory} instead. See {@link NIOFSDirectory} java doc * for details. * * * * <li> {@link MMapDirectory} uses memory-mapped IO when * reading. This is a good choice if you have plenty * of virtual memory relative to your index size, eg * if you are running on a 64 bit JRE, or you are * running on a 32 bit JRE but your index sizes are * small enough to fit into the virtual memory space. * Java has currently the limitation of not being able to * unmap files from user code. The files are unmapped, when GC * releases the byte buffers. Due to * <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038"> * this bug</a> in Sun's JRE, MMapDirectory's {@link IndexInput#close} * is unable to close the underlying OS file handle. Only when * GC finally collects the underlying objects, which could be * quite some time later, will the file handle be closed. * This will consume additional transient disk usage: on Windows, * attempts to delete or overwrite the files will result in an * exception; on other platforms, which typically have a "delete on * last close" semantics, while such operations will succeed, the bytes * are still consuming space on disk. For many applications this * limitation is not a problem (e.g. if you have plenty of disk space, * and you don't rely on overwriting files on Windows) but it's still * an important limitation to be aware of. This class supplies a * (possibly dangerous) workaround mentioned in the bug report, * which may fail on non-Sun JVMs. * * Applications using {@link Thread#interrupt()} or * {@link Future#cancel(boolean)} should use * {@link SimpleFSDirectory} instead. See {@link MMapDirectory} * java doc for details. * </ul> * * Unfortunately, because of system peculiarities, there is * no single overall best implementation. Therefore, we've * added the {@link #open} method, to allow Lucene to choose * the best FSDirectory implementation given your * environment, and the known limitations of each * implementation. For users who have no reason to prefer a * specific implementation, it's best to simply use {@link * #open}. For all others, you should instantiate the * desired implementation directly. * * <p>The locking implementation is by default {@link * NativeFSLockFactory}, but can be changed by * passing in a custom {@link LockFactory} instance. * * @see Directory */ public abstract class FSDirectory extends Directory { private static final Log logger = LogFactory.getLog(FSDirectory.class); /** * Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's * <code>Integer.MAX_VALUE</code>. * * @see #setReadChunkSize */ public static final int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? Integer.MAX_VALUE : 100 * 1024 * 1024; public final File directory; // The underlying filesystem directory protected final Set<String> staleFiles = synchronizedSet(new HashSet<String>()); // Files written, but not yet sync'ed private int chunkSize = DEFAULT_READ_CHUNK_SIZE; // LUCENE-1566 // returns the canonical version of the directory, creating it if it doesn't exist. private static File getCanonicalPath(File file) throws IOException { return new File(file.getCanonicalPath()); } private Cache<CacheKeyBuffer, String> CACHE_BUFFER = Cache .synchronizedCache(new SimpleLRUCache<CacheKeyBuffer, String>(64)); public synchronized String getCacheKey() { CacheKeyBuffer kkk = new CacheKeyBuffer(new String[0], this.dir_uuid, System.currentTimeMillis() / 600000l, this.getP()); String rtn = CACHE_BUFFER.get(kkk); if (rtn == null) { String[] filelist = null; try { filelist = this.listAll(); } catch (Throwable e) { logger.error("listAll", e); } rtn = getCacheKey(filelist); CACHE_BUFFER.put(kkk, rtn); } return rtn; } private static SimpleDateFormat format = new SimpleDateFormat("yyyyMMddHHmmss"); public synchronized String getCacheKey(String[] filelist) { CacheKeyBuffer kkk = new CacheKeyBuffer(filelist, this.dir_uuid, System.currentTimeMillis() / 600000l, this.getP()); String rtn = CACHE_BUFFER.get(kkk); if (rtn == null) { StringBuffer buff = new StringBuffer(); buff.append(this.getClass().getName()).append("_"); buff.append(this.directory.getAbsolutePath()).append("_"); CRC32 crc32 = new CRC32(); crc32.update(0); long filesize = 0; long filemodify = 0; if (filelist != null) { buff.append(filelist.length).append("_"); for (String s : filelist) { crc32.update(new String(s).getBytes()); try { filesize += this.fileLength(s); } catch (Throwable e) { logger.error("filelength", e); } try { filemodify = Math.max(filemodify, this.fileModified(s)); } catch (Throwable e) { logger.error("filelength", e); } } } long crcvalue = crc32.getValue(); buff.append(crcvalue).append("_"); buff.append(filesize).append("_"); buff.append(filemodify).append("_"); buff.append(format.format(new Date(filemodify))); rtn = buff.toString(); CACHE_BUFFER.put(kkk, rtn); } return rtn; } /** Create a new FSDirectory for the named location (ctor for subclasses). * @param path the path of the directory * @param lockFactory the lock factory to use, or null for the default * ({@link NativeFSLockFactory}); * @throws IOException */ protected FSDirectory(File path, LockFactory lockFactory) throws IOException { // new ctors use always NativeFSLockFactory as default: if (lockFactory == null) { lockFactory = new NativeFSLockFactory(); } directory = getCanonicalPath(path); if (directory.exists() && !directory.isDirectory()) throw new NoSuchDirectoryException("file '" + directory + "' exists but is not a directory"); setLockFactory(lockFactory); } /** Creates an FSDirectory instance, trying to pick the * best implementation given the current environment. * The directory returned uses the {@link NativeFSLockFactory}. * * <p>Currently this returns {@link MMapDirectory} for most Solaris * and Windows 64-bit JREs, {@link NIOFSDirectory} for other * non-Windows JREs, and {@link SimpleFSDirectory} for other * JREs on Windows. It is highly recommended that you consult the * implementation's documentation for your platform before * using this method. * * <p><b>NOTE</b>: this method may suddenly change which * implementation is returned from release to release, in * the event that higher performance defaults become * possible; if the precise implementation is important to * your application, please instantiate it directly, * instead. For optimal performance you should consider using * {@link MMapDirectory} on 64 bit JVMs. * * <p>See <a href="#subclasses">above</a> */ public static FSDirectory open(File path) throws IOException { return open(path, null); } /** Just like {@link #open(File)}, but allows you to * also specify a custom {@link LockFactory}. */ public static FSDirectory open(File path, LockFactory lockFactory) throws IOException { if ((Constants.WINDOWS || Constants.SUN_OS || Constants.LINUX) && Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) { return new MMapDirectory(path, lockFactory); } else if (Constants.WINDOWS) { return new SimpleFSDirectory(path, lockFactory); } else { return new NIOFSDirectory(path, lockFactory); } } @Override public void setLockFactory(LockFactory lockFactory) throws IOException { super.setLockFactory(lockFactory); // for filesystem based LockFactory, delete the lockPrefix, if the locks are placed // in index dir. If no index dir is given, set ourselves if (lockFactory instanceof FSLockFactory) { final FSLockFactory lf = (FSLockFactory) lockFactory; final File dir = lf.getLockDir(); // if the lock factory has no lockDir set, use the this directory as lockDir if (dir == null) { lf.setLockDir(directory); lf.setLockPrefix(null); } else if (dir.getCanonicalPath().equals(directory.getCanonicalPath())) { lf.setLockPrefix(null); } } } /** Lists all files (not subdirectories) in the * directory. This method never returns null (throws * {@link IOException} instead). * * @throws NoSuchDirectoryException if the directory * does not exist, or does exist but is not a * directory. * @throws IOException if list() returns null */ public static String[] listAll(File dir) throws IOException { if (!dir.exists()) throw new NoSuchDirectoryException("directory '" + dir + "' does not exist"); else if (!dir.isDirectory()) throw new NoSuchDirectoryException("file '" + dir + "' exists but is not a directory"); // Exclude subdirs String[] result = dir.list(new FilenameFilter() { public boolean accept(File dir, String file) { return !new File(dir, file).isDirectory(); } }); if (result == null) throw new IOException("directory '" + dir + "' exists and is a directory, but cannot be listed: list() returned null"); return result; } /** Lists all files (not subdirectories) in the * directory. * @see #listAll(File) */ @Override public String[] listAll() throws IOException { ensureOpen(); return listAll(directory); } /** Returns true iff a file with the given name exists. */ @Override public boolean fileExists(String name) { ensureOpen(); File file = new File(directory, name); return file.exists(); } /** Returns the time the named file was last modified. */ @Override public long fileModified(String name) { ensureOpen(); File file = new File(directory, name); return file.lastModified(); } /** Returns the time the named file was last modified. */ public static long fileModified(File directory, String name) { File file = new File(directory, name); return file.lastModified(); } /** Set the modified time of an existing file to now. * @deprecated Lucene never uses this API; it will be * removed in 4.0. */ @Override @Deprecated public void touchFile(String name) { ensureOpen(); File file = new File(directory, name); file.setLastModified(System.currentTimeMillis()); } /** Returns the length in bytes of a file in the directory. */ @Override public long fileLength(String name) throws IOException { ensureOpen(); File file = new File(directory, name); final long len = file.length(); if (len == 0 && !file.exists()) { throw new FileNotFoundException(name); } else { return len; } } /** Removes an existing file in the directory. */ @Override public void deleteFile(String name) throws IOException { ensureOpen(); File file = new File(directory, name); if (!file.delete()) throw new IOException("Cannot delete " + file); staleFiles.remove(name); } /** Creates an IndexOutput for the file with the given name. */ @Override public IndexOutput createOutput(String name) throws IOException { ensureOpen(); ensureCanWrite(name); return new FSIndexOutput(this, name); } protected void ensureCanWrite(String name) throws IOException { if (!directory.exists()) if (!directory.mkdirs()) throw new IOException("Cannot create directory: " + directory); File file = new File(directory, name); if (file.exists() && !file.delete()) // delete existing, if any throw new IOException("Cannot overwrite: " + file); } protected void onIndexOutputClosed(FSIndexOutput io) { staleFiles.add(io.name); } @Deprecated @Override public void sync(String name) throws IOException { sync(Collections.singleton(name)); } @Override public void sync(Collection<String> names) throws IOException { ensureOpen(); Set<String> toSync = new HashSet<String>(names); toSync.retainAll(staleFiles); for (String name : toSync) fsync(name); staleFiles.removeAll(toSync); } // Inherit javadoc @Override public IndexInput openInput(String name) throws IOException { ensureOpen(); return openInput(name, BufferedIndexInput.BUFFER_SIZE); } @Override public String getLockID() { ensureOpen(); String dirName; // name to be hashed try { dirName = directory.getCanonicalPath(); } catch (IOException e) { throw new RuntimeException(e.toString(), e); } int digest = 0; for (int charIDX = 0; charIDX < dirName.length(); charIDX++) { final char ch = dirName.charAt(charIDX); digest = 31 * digest + ch; } return "lucene-" + Integer.toHexString(digest); } /** Closes the store to future operations. */ @Override public synchronized void close() { isOpen = false; } /** @deprecated Use {@link #getDirectory} instead. */ @Deprecated public File getFile() { return getDirectory(); } /** @return the underlying filesystem directory */ public File getDirectory() { ensureOpen(); return directory; } /** For debug output. */ @Override public String toString() { return this.getClass().getName() + "@" + directory + " lockFactory=" + getLockFactory(); } /** * Sets the maximum number of bytes read at once from the * underlying file during {@link IndexInput#readBytes}. * The default value is {@link #DEFAULT_READ_CHUNK_SIZE}; * * <p> This was introduced due to <a * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546">Sun * JVM Bug 6478546</a>, which throws an incorrect * OutOfMemoryError when attempting to read too many bytes * at once. It only happens on 32bit JVMs with a large * maximum heap size.</p> * * <p>Changes to this value will not impact any * already-opened {@link IndexInput}s. You should call * this before attempting to open an index on the * directory.</p> * * <p> <b>NOTE</b>: This value should be as large as * possible to reduce any possible performance impact. If * you still encounter an incorrect OutOfMemoryError, * trying lowering the chunk size.</p> */ public final void setReadChunkSize(int chunkSize) { // LUCENE-1566 if (chunkSize <= 0) { throw new IllegalArgumentException("chunkSize must be positive"); } if (!Constants.JRE_IS_64BIT) { this.chunkSize = chunkSize; } } /** * The maximum number of bytes to read at once from the * underlying file during {@link IndexInput#readBytes}. * @see #setReadChunkSize */ public final int getReadChunkSize() { // LUCENE-1566 return chunkSize; } protected static class FSIndexOutput extends BufferedIndexOutput { private final FSDirectory parent; private final String name; private final RandomAccessFile file; private volatile boolean isOpen; // remember if the file is open, so that we don't try to close it more than once public FSIndexOutput(FSDirectory parent, String name) throws IOException { this.parent = parent; this.name = name; file = new RandomAccessFile(new File(parent.directory, name), "rw"); isOpen = true; } /** output methods: */ @Override public void flushBuffer(byte[] b, int offset, int size) throws IOException { file.write(b, offset, size); } @Override public void close() throws IOException { parent.onIndexOutputClosed(this); // only close the file if it has not been closed yet if (isOpen) { boolean success = false; try { super.close(); success = true; } finally { isOpen = false; if (!success) { try { file.close(); } catch (Throwable t) { // Suppress so we don't mask original exception } } else { file.close(); } } } } //// /** Random-access methods */ //// @Override // public void seek(long pos) throws IOException { // super.seek(pos); // file.seek(pos); // } @Override public void seek(long pos) { throw new RuntimeException("not allowed"); } @Override public long length() throws IOException { return file.length(); } @Override public void setLength(long length) throws IOException { file.setLength(length); } } protected void fsync(String name) throws IOException { File fullFile = new File(directory, name); this.fsync(fullFile); } protected void fsync(File fullFile) throws IOException { boolean success = false; int retryCount = 0; IOException exc = null; while (!success && retryCount < 5) { retryCount++; RandomAccessFile file = null; try { try { file = new RandomAccessFile(fullFile, "rw"); file.getFD().sync(); success = true; } finally { if (file != null) file.close(); } } catch (IOException ioe) { if (exc == null) exc = ioe; try { // Pause 5 msec Thread.sleep(5); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } } } if (!success) // Throw original exception throw exc; } }