org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.shortcircuit;

import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.BitSet;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Random;

import org.apache.commons.lang.builder.EqualsBuilder;
import org.apache.commons.lang.builder.HashCodeBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.InvalidRequestException;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.io.nativeio.NativeIO.POSIX;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;

import com.google.common.base.Preconditions;
import com.google.common.collect.ComparisonChain;
import com.google.common.primitives.Ints;
import org.apache.hadoop.hdfs.ExtendedBlockId;

import sun.misc.Unsafe;

/**
 * A shared memory segment used to implement short-circuit reads.
 */
public class ShortCircuitShm {
    private static final Log LOG = LogFactory.getLog(ShortCircuitShm.class);

    protected static final int BYTES_PER_SLOT = 64;

    private static final Unsafe unsafe = safetyDance();

    private static Unsafe safetyDance() {
        try {
            Field f = Unsafe.class.getDeclaredField("theUnsafe");
            f.setAccessible(true);
            return (Unsafe) f.get(null);
        } catch (Throwable e) {
            LOG.error("failed to load misc.Unsafe", e);
        }
        return null;
    }

    /**
     * Calculate the usable size of a shared memory segment.
     * We round down to a multiple of the slot size and do some validation.
     *
     * @param stream The stream we're using.
     * @return       The usable size of the shared memory segment.
     */
    private static int getUsableLength(FileInputStream stream) throws IOException {
        int intSize = Ints.checkedCast(stream.getChannel().size());
        int slots = intSize / BYTES_PER_SLOT;
        if (slots == 0) {
            throw new IOException("size of shared memory segment was " + intSize
                    + ", but that is not enough to hold even one slot.");
        }
        return slots * BYTES_PER_SLOT;
    }

    /**
     * Identifies a DfsClientShm.
     */
    public static class ShmId implements Comparable<ShmId> {
        private static final Random random = new Random();
        private final long hi;
        private final long lo;

        /**
         * Generate a random ShmId.
         * 
         * We generate ShmIds randomly to prevent a malicious client from
         * successfully guessing one and using that to interfere with another
         * client.
         */
        public static ShmId createRandom() {
            return new ShmId(random.nextLong(), random.nextLong());
        }

        public ShmId(long hi, long lo) {
            this.hi = hi;
            this.lo = lo;
        }

        public long getHi() {
            return hi;
        }

        public long getLo() {
            return lo;
        }

        @Override
        public boolean equals(Object o) {
            if ((o == null) || (o.getClass() != this.getClass())) {
                return false;
            }
            ShmId other = (ShmId) o;
            return new EqualsBuilder().append(hi, other.hi).append(lo, other.lo).isEquals();
        }

        @Override
        public int hashCode() {
            return new HashCodeBuilder().append(this.hi).append(this.lo).toHashCode();
        }

        @Override
        public String toString() {
            return String.format("%016x%016x", hi, lo);
        }

        @Override
        public int compareTo(ShmId other) {
            return ComparisonChain.start().compare(hi, other.hi).compare(lo, other.lo).result();
        }
    };

    /**
     * Uniquely identifies a slot.
     */
    public static class SlotId {
        private final ShmId shmId;
        private final int slotIdx;

        public SlotId(ShmId shmId, int slotIdx) {
            this.shmId = shmId;
            this.slotIdx = slotIdx;
        }

        public ShmId getShmId() {
            return shmId;
        }

        public int getSlotIdx() {
            return slotIdx;
        }

        @Override
        public boolean equals(Object o) {
            if ((o == null) || (o.getClass() != this.getClass())) {
                return false;
            }
            SlotId other = (SlotId) o;
            return new EqualsBuilder().append(shmId, other.shmId).append(slotIdx, other.slotIdx).isEquals();
        }

        @Override
        public int hashCode() {
            return new HashCodeBuilder().append(this.shmId).append(this.slotIdx).toHashCode();
        }

        @Override
        public String toString() {
            return String.format("SlotId(%s:%d)", shmId.toString(), slotIdx);
        }
    }

    public class SlotIterator implements Iterator<Slot> {
        int slotIdx = -1;

        @Override
        public boolean hasNext() {
            synchronized (ShortCircuitShm.this) {
                return allocatedSlots.nextSetBit(slotIdx + 1) != -1;
            }
        }

        @Override
        public Slot next() {
            synchronized (ShortCircuitShm.this) {
                int nextSlotIdx = allocatedSlots.nextSetBit(slotIdx + 1);
                if (nextSlotIdx == -1) {
                    throw new NoSuchElementException();
                }
                slotIdx = nextSlotIdx;
                return slots[nextSlotIdx];
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("SlotIterator " + "doesn't support removal");
        }
    }

    /**
     * A slot containing information about a replica.
     *
     * The format is:
     * word 0
     *   bit 0:32   Slot flags (see below).
     *   bit 33:63  Anchor count.
     * word 1:7
     *   Reserved for future use, such as statistics.
     *   Padding is also useful for avoiding false sharing.
     *
     * Little-endian versus big-endian is not relevant here since both the client
     * and the server reside on the same computer and use the same orientation.
     */
    public class Slot {
        /**
         * Flag indicating that the slot is valid.  
         * 
         * The DFSClient sets this flag when it allocates a new slot within one of
         * its shared memory regions.
         * 
         * The DataNode clears this flag when the replica associated with this slot
         * is no longer valid.  The client itself also clears this flag when it
         * believes that the DataNode is no longer using this slot to communicate.
         */
        private static final long VALID_FLAG = 1L << 63;

        /**
         * Flag indicating that the slot can be anchored.
         */
        private static final long ANCHORABLE_FLAG = 1L << 62;

        /**
         * The slot address in memory.
         */
        private final long slotAddress;

        /**
         * BlockId of the block this slot is used for.
         */
        private final ExtendedBlockId blockId;

        Slot(long slotAddress, ExtendedBlockId blockId) {
            this.slotAddress = slotAddress;
            this.blockId = blockId;
        }

        /**
         * Get the short-circuit memory segment associated with this Slot.
         *
         * @return      The enclosing short-circuit memory segment.
         */
        public ShortCircuitShm getShm() {
            return ShortCircuitShm.this;
        }

        /**
         * Get the ExtendedBlockId associated with this slot.
         *
         * @return      The ExtendedBlockId of this slot.
         */
        public ExtendedBlockId getBlockId() {
            return blockId;
        }

        /**
         * Get the SlotId of this slot, containing both shmId and slotIdx.
         *
         * @return      The SlotId of this slot.
         */
        public SlotId getSlotId() {
            return new SlotId(getShmId(), getSlotIdx());
        }

        /**
         * Get the Slot index.
         *
         * @return      The index of this slot.
         */
        public int getSlotIdx() {
            return Ints.checkedCast((slotAddress - baseAddress) / BYTES_PER_SLOT);
        }

        /**
         * Clear the slot.
         */
        void clear() {
            unsafe.putLongVolatile(null, this.slotAddress, 0);
        }

        private boolean isSet(long flag) {
            long prev = unsafe.getLongVolatile(null, this.slotAddress);
            return (prev & flag) != 0;
        }

        private void setFlag(long flag) {
            long prev;
            do {
                prev = unsafe.getLongVolatile(null, this.slotAddress);
                if ((prev & flag) != 0) {
                    return;
                }
            } while (!unsafe.compareAndSwapLong(null, this.slotAddress, prev, prev | flag));
        }

        private void clearFlag(long flag) {
            long prev;
            do {
                prev = unsafe.getLongVolatile(null, this.slotAddress);
                if ((prev & flag) == 0) {
                    return;
                }
            } while (!unsafe.compareAndSwapLong(null, this.slotAddress, prev, prev & (~flag)));
        }

        public boolean isValid() {
            return isSet(VALID_FLAG);
        }

        public void makeValid() {
            setFlag(VALID_FLAG);
        }

        public void makeInvalid() {
            clearFlag(VALID_FLAG);
        }

        public boolean isAnchorable() {
            return isSet(ANCHORABLE_FLAG);
        }

        public void makeAnchorable() {
            setFlag(ANCHORABLE_FLAG);
        }

        public void makeUnanchorable() {
            clearFlag(ANCHORABLE_FLAG);
        }

        public boolean isAnchored() {
            long prev = unsafe.getLongVolatile(null, this.slotAddress);
            if ((prev & VALID_FLAG) == 0) {
                // Slot is no longer valid.
                return false;
            }
            return ((prev & 0x7fffffff) != 0);
        }

        /**
         * Try to add an anchor for a given slot.
         *
         * When a slot is anchored, we know that the block it refers to is resident
         * in memory.
         *
         * @return          True if the slot is anchored.
         */
        public boolean addAnchor() {
            long prev;
            do {
                prev = unsafe.getLongVolatile(null, this.slotAddress);
                if ((prev & VALID_FLAG) == 0) {
                    // Slot is no longer valid.
                    return false;
                }
                if ((prev & ANCHORABLE_FLAG) == 0) {
                    // Slot can't be anchored right now.
                    return false;
                }
                if ((prev & 0x7fffffff) == 0x7fffffff) {
                    // Too many other threads have anchored the slot (2 billion?)
                    return false;
                }
            } while (!unsafe.compareAndSwapLong(null, this.slotAddress, prev, prev + 1));
            return true;
        }

        /**
         * Remove an anchor for a given slot.
         */
        public void removeAnchor() {
            long prev;
            do {
                prev = unsafe.getLongVolatile(null, this.slotAddress);
                Preconditions.checkState((prev & 0x7fffffff) != 0,
                        "Tried to remove anchor for slot " + slotAddress + ", which was " + "not anchored.");
            } while (!unsafe.compareAndSwapLong(null, this.slotAddress, prev, prev - 1));
        }

        @Override
        public String toString() {
            return "Slot(slotIdx=" + getSlotIdx() + ", shm=" + getShm() + ")";
        }
    }

    /**
     * ID for this SharedMemorySegment.
     */
    private final ShmId shmId;

    /**
     * The base address of the memory-mapped file.
     */
    private final long baseAddress;

    /**
     * The mmapped length of the shared memory segment
     */
    private final int mmappedLength;

    /**
     * The slots associated with this shared memory segment.
     * slot[i] contains the slot at offset i * BYTES_PER_SLOT,
     * or null if that slot is not allocated.
     */
    private final Slot slots[];

    /**
     * A bitset where each bit represents a slot which is in use.
     */
    private final BitSet allocatedSlots;

    /**
     * Create the ShortCircuitShm.
     * 
     * @param shmId       The ID to use.
     * @param stream      The stream that we're going to use to create this 
     *                    shared memory segment.
     *                    
     *                    Although this is a FileInputStream, we are going to
     *                    assume that the underlying file descriptor is writable
     *                    as well as readable. It would be more appropriate to use
     *                    a RandomAccessFile here, but that class does not have
     *                    any public accessor which returns a FileDescriptor,
     *                    unlike FileInputStream.
     */
    public ShortCircuitShm(ShmId shmId, FileInputStream stream) throws IOException {
        if (!NativeIO.isAvailable()) {
            throw new UnsupportedOperationException("NativeIO is not available.");
        }
        if (Shell.WINDOWS) {
            throw new UnsupportedOperationException("DfsClientShm is not yet implemented for Windows.");
        }
        if (unsafe == null) {
            throw new UnsupportedOperationException(
                    "can't use DfsClientShm because we failed to " + "load misc.Unsafe.");
        }
        this.shmId = shmId;
        this.mmappedLength = getUsableLength(stream);
        this.baseAddress = POSIX.mmap(stream.getFD(), POSIX.MMAP_PROT_READ | POSIX.MMAP_PROT_WRITE, true,
                mmappedLength);
        this.slots = new Slot[mmappedLength / BYTES_PER_SLOT];
        this.allocatedSlots = new BitSet(slots.length);
        if (LOG.isTraceEnabled()) {
            LOG.trace("creating " + this.getClass().getSimpleName() + "(shmId=" + shmId + ", mmappedLength="
                    + mmappedLength + ", baseAddress=" + String.format("%x", baseAddress) + ", slots.length="
                    + slots.length + ")");
        }
    }

    public final ShmId getShmId() {
        return shmId;
    }

    /**
     * Determine if this shared memory object is empty.
     *
     * @return    True if the shared memory object is empty.
     */
    synchronized final public boolean isEmpty() {
        return allocatedSlots.nextSetBit(0) == -1;
    }

    /**
     * Determine if this shared memory object is full.
     *
     * @return    True if the shared memory object is full.
     */
    synchronized final public boolean isFull() {
        return allocatedSlots.nextClearBit(0) >= slots.length;
    }

    /**
     * Calculate the base address of a slot.
     *
     * @param slotIdx   Index of the slot.
     * @return          The base address of the slot.
     */
    private final long calculateSlotAddress(int slotIdx) {
        long offset = slotIdx;
        offset *= BYTES_PER_SLOT;
        return this.baseAddress + offset;
    }

    /**
     * Allocate a new slot and register it.
     *
     * This function chooses an empty slot, initializes it, and then returns
     * the relevant Slot object.
     *
     * @return    The new slot.
     */
    synchronized public final Slot allocAndRegisterSlot(ExtendedBlockId blockId) {
        int idx = allocatedSlots.nextClearBit(0);
        if (idx >= slots.length) {
            throw new RuntimeException(this + ": no more slots are available.");
        }
        allocatedSlots.set(idx, true);
        Slot slot = new Slot(calculateSlotAddress(idx), blockId);
        slot.clear();
        slot.makeValid();
        slots[idx] = slot;
        if (LOG.isTraceEnabled()) {
            LOG.trace(this + ": allocAndRegisterSlot " + idx + ": allocatedSlots=" + allocatedSlots
                    + StringUtils.getStackTrace(Thread.currentThread()));
        }
        return slot;
    }

    synchronized public final Slot getSlot(int slotIdx) throws InvalidRequestException {
        if (!allocatedSlots.get(slotIdx)) {
            throw new InvalidRequestException(this + ": slot " + slotIdx + " does not exist.");
        }
        return slots[slotIdx];
    }

    /**
     * Register a slot.
     *
     * This function looks at a slot which has already been initialized (by
     * another process), and registers it with us.  Then, it returns the 
     * relevant Slot object.
     *
     * @return    The slot.
     *
     * @throws InvalidRequestException
     *            If the slot index we're trying to allocate has not been
     *            initialized, or is already in use.
     */
    synchronized public final Slot registerSlot(int slotIdx, ExtendedBlockId blockId)
            throws InvalidRequestException {
        if (slotIdx < 0) {
            throw new InvalidRequestException(this + ": invalid negative slot " + "index " + slotIdx);
        }
        if (slotIdx >= slots.length) {
            throw new InvalidRequestException(this + ": invalid slot " + "index " + slotIdx);
        }
        if (allocatedSlots.get(slotIdx)) {
            throw new InvalidRequestException(this + ": slot " + slotIdx + " is already in use.");
        }
        Slot slot = new Slot(calculateSlotAddress(slotIdx), blockId);
        if (!slot.isValid()) {
            throw new InvalidRequestException(this + ": slot " + slotIdx + " is not marked as valid.");
        }
        slots[slotIdx] = slot;
        allocatedSlots.set(slotIdx, true);
        if (LOG.isTraceEnabled()) {
            LOG.trace(this + ": registerSlot " + slotIdx + ": allocatedSlots=" + allocatedSlots
                    + StringUtils.getStackTrace(Thread.currentThread()));
        }
        return slot;
    }

    /**
     * Unregisters a slot.
     * 
     * This doesn't alter the contents of the slot.  It just means
     *
     * @param slotIdx  Index of the slot to unregister.
     */
    synchronized public final void unregisterSlot(int slotIdx) {
        Preconditions.checkState(allocatedSlots.get(slotIdx),
                "tried to unregister slot " + slotIdx + ", which was not registered.");
        allocatedSlots.set(slotIdx, false);
        slots[slotIdx] = null;
        if (LOG.isTraceEnabled()) {
            LOG.trace(this + ": unregisterSlot " + slotIdx);
        }
    }

    /**
     * Iterate over all allocated slots.
     * 
     * Note that this method isn't safe if 
     *
     * @return        The slot iterator.
     */
    public SlotIterator slotIterator() {
        return new SlotIterator();
    }

    public void free() {
        try {
            POSIX.munmap(baseAddress, mmappedLength);
        } catch (IOException e) {
            LOG.warn(this + ": failed to munmap", e);
        }
        LOG.trace(this + ": freed");
    }

    @Override
    public String toString() {
        return this.getClass().getSimpleName() + "(" + shmId + ")";
    }
}