Java tutorial
package org.commoncrawl.util.shared; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.File; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.BufferUnderflowException; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; import java.security.AccessController; import java.security.PrivilegedExceptionAction; import java.security.PrivilegedActionException; import java.lang.reflect.Method; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.Seekable; /** * some utility classes to do memory mapped io in java * * @author rana * */ public class MMapUtils { static final Log LOG = LogFactory.getLog(MMapUtils.class); public static final int DEFAULT_MAX_BUFF = 256 * 1024; /** * <code>true</code>, if this platform supports unmapping mmapped files. */ public static final boolean UNMAP_SUPPORTED; static { boolean v; try { Class.forName("sun.misc.Cleaner"); Class.forName("java.nio.DirectByteBuffer").getMethod("cleaner"); v = true; } catch (Exception e) { v = false; } UNMAP_SUPPORTED = v; } /** * Returns <code>true</code>, if the unmap workaround is enabled. * @see #setUseUnmap */ public static boolean getUseUnmap() { return UNMAP_SUPPORTED; } /** * Try to unmap the buffer, this method silently fails if no support * for that in the JVM. On Windows, this leads to the fact, * that mmapped files cannot be modified or deleted. */ final static void cleanMapping(final ByteBuffer buffer) throws IOException { if (getUseUnmap()) { try { AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() { public Object run() throws Exception { final Method getCleanerMethod = buffer.getClass().getMethod("cleaner"); getCleanerMethod.setAccessible(true); final Object cleaner = getCleanerMethod.invoke(buffer); if (cleaner != null) { cleaner.getClass().getMethod("clean").invoke(cleaner); } return null; } }); } catch (PrivilegedActionException e) { final IOException ioe = new IOException("unable to unmap the mapped buffer"); ioe.initCause(e.getCause()); throw ioe; } } } /** * Returns the current mmap chunk size. * @see #setMaxChunkSize */ public static int getMaxChunkSize() { return DEFAULT_MAX_BUFF; } public static class MMapFile { long length = -1; ByteBuffer buffers[] = null; int bufSizes[] = null; private int refCount = 0; private boolean closePending = false; public MMapFile(File input) throws IOException { RandomAccessFile raf = new RandomAccessFile(input, "r"); try { this.length = raf.length(); if ((length / getMaxChunkSize()) > Integer.MAX_VALUE) throw new IllegalArgumentException( "RandomAccessFile too big for maximum buffer size: " + raf.toString()); int nrBuffers = (int) (length / getMaxChunkSize()); if (((long) nrBuffers * getMaxChunkSize()) <= length) nrBuffers++; this.buffers = new ByteBuffer[nrBuffers]; this.bufSizes = new int[nrBuffers]; long bufferStart = 0; FileChannel rafc = raf.getChannel(); for (int bufNr = 0; bufNr < nrBuffers; bufNr++) { int bufSize = (length > (bufferStart + getMaxChunkSize())) ? getMaxChunkSize() : (int) (length - bufferStart); this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY, bufferStart, bufSize); this.bufSizes[bufNr] = bufSize; bufferStart += bufSize; } LOG.info("Initialized MapFile from file:" + input.getAbsolutePath() + " NumBuffers:" + buffers.length + " TotalLength:" + this.length); } finally { raf.close(); } } public FSDataInputStream newInputStream() throws IOException { FSInputStream stream = new MMapFileInputStream(); FSDataInputStream dataStream = new FSDataInputStream(stream); return dataStream; } private synchronized void addRef() { refCount++; } private synchronized void release() { if (--refCount == 0 && closePending) { try { close(); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } } } public synchronized void close() throws IOException { if (refCount == 0) { if (buffers != null) { for (ByteBuffer buffer : buffers) { cleanMapping(buffer); } } buffers = null; } else { closePending = true; } } public long getLength() { return length; } // Because Java's ByteBuffer uses an int to address the // values, it's necessary to access a file > // Integer.MAX_VALUE in size using multiple byte buffers. public class MMapFileInputStream extends FSInputStream { private int curBufIndex = 0; private final int maxBufSize = getMaxChunkSize(); private ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex] public MMapFileInputStream() throws IOException { addRef(); seek(0L); } @Override public int read() throws IOException { try { return curBuf.get() & 0xff; } catch (BufferUnderflowException e) { curBufIndex++; if (curBufIndex >= buffers.length) throw new IOException("read past EOF"); curBuf = buffers[curBufIndex].slice(); curBuf.position(0); return curBuf.get() & 0xff; } } @Override public int read(byte[] bytes, int offset, int len) throws IOException { try { curBuf.get(bytes, offset, len); return len; } catch (BufferUnderflowException e) { int bytesRead = 0; int curAvail = curBuf.remaining(); while (len > curAvail) { curBuf.get(bytes, offset, curAvail); bytesRead += curAvail; len -= curAvail; offset += curAvail; curBufIndex++; if (curBufIndex >= buffers.length) { return bytesRead; } curBuf = buffers[curBufIndex].slice(); curBuf.position(0); curAvail = curBuf.remaining(); } curBuf.get(bytes, offset, len); return bytesRead + len; } } @Override public void close() throws IOException { release(); } @Override public int available() throws IOException { long amtAvailable = (length() - getPos()); return (amtAvailable <= Integer.MAX_VALUE) ? (int) amtAvailable : Integer.MAX_VALUE; }; @Override public void seek(long pos) throws IOException { int bufferIndex = (int) (pos / maxBufSize); if (curBuf == null || bufferIndex != curBufIndex) { curBufIndex = bufferIndex; curBuf = buffers[curBufIndex].slice(); } int bufOffset = (int) (pos - ((long) curBufIndex * maxBufSize)); curBuf.position(bufOffset); } public long length() { return length; } @Override public long getPos() throws IOException { return ((long) curBufIndex * maxBufSize) + curBuf.position(); } @Override public boolean seekToNewSource(long targetPos) throws IOException { seek(targetPos); return false; } public short readShort() throws IOException { try { return curBuf.getShort(); } catch (BufferUnderflowException e) { return (short) (((read() & 0xFF) << 8) | (read() & 0xFF)); } } public int readInt() throws IOException { try { return curBuf.getInt(); } catch (BufferUnderflowException e) { return ((read() & 0xFF) << 24) | ((read() & 0xFF) << 16) | ((read() & 0xFF) << 8) | (read() & 0xFF); } } public long readLong() throws IOException { try { return curBuf.getLong(); } catch (BufferUnderflowException e) { return (((long) readInt()) << 32) | (readInt() & 0xFFFFFFFFL); } } /** Reads an int stored in variable-length format. Reads between one and * five bytes. Smaller values take fewer bytes. Negative numbers are not * supported. * @see DataOutput#writeVInt(int) */ public int readVInt() throws IOException { int b = read(); int i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { b = read(); i |= (b & 0x7F) << shift; } return i; } /** Reads a long stored in variable-length format. Reads between one and * nine bytes. Smaller values take fewer bytes. Negative numbers are not * supported. */ public long readVLong() throws IOException { int b = read(); long i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { b = read(); i |= (b & 0x7FL) << shift; } return i; } } } }