org.apache.hadoop.hdfs.client.ClientMmapManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.client.ClientMmapManager.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.client;

import java.io.Closeable;

import org.apache.hadoop.classification.InterfaceAudience;

import java.io.FileInputStream;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.Map.Entry;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.io.IOUtils;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ComparisonChain;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
 * Tracks mmap instances used on an HDFS client.
 *
 * mmaps can be used concurrently by multiple threads at once.
 * mmaps cannot be closed while they are in use.
 *
 * The cache is important for performance, because the first time an mmap is
 * created, the page table entries (PTEs) are not yet set up.
 * Even when reading data that is entirely resident in memory, reading an
 * mmap the second time is faster.
 */
@InterfaceAudience.Private
public class ClientMmapManager implements Closeable {
    public static final Log LOG = LogFactory.getLog(ClientMmapManager.class);

    private boolean closed = false;

    private final int cacheSize;

    private final long timeoutNs;

    private final int runsPerTimeout;

    private final Lock lock = new ReentrantLock();

    /**
     * Maps block, datanode_id to the client mmap object.
     * If the ClientMmap is in the process of being loaded,
     * {@link Waitable<ClientMmap>#await()} will block.
     *
     * Protected by the ClientMmapManager lock.
     */
    private final TreeMap<Key, Waitable<ClientMmap>> mmaps = new TreeMap<Key, Waitable<ClientMmap>>();

    /**
     * Maps the last use time to the client mmap object.
     * We ensure that each last use time is unique by inserting a jitter of a
     * nanosecond or two if necessary.
     * 
     * Protected by the ClientMmapManager lock.
     * ClientMmap objects that are in use are never evictable.
     */
    private final TreeMap<Long, ClientMmap> evictable = new TreeMap<Long, ClientMmap>();

    private final ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(1,
            new ThreadFactoryBuilder().setDaemon(true).setNameFormat("ClientMmapManager").build());

    /**
     * The CacheCleaner for this ClientMmapManager.  We don't create this
     * and schedule it until it becomes necessary.
     */
    private CacheCleaner cacheCleaner;

    /**
     * Factory method to create a ClientMmapManager from a Hadoop
     * configuration.
     */
    public static ClientMmapManager fromConf(Configuration conf) {
        return new ClientMmapManager(conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE, DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT),
                conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS, DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT),
                conf.getInt(DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
                        DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT));
    }

    public ClientMmapManager(int cacheSize, long timeoutMs, int runsPerTimeout) {
        this.cacheSize = cacheSize;
        this.timeoutNs = timeoutMs * 1000000;
        this.runsPerTimeout = runsPerTimeout;
    }

    long getTimeoutMs() {
        return this.timeoutNs / 1000000;
    }

    int getRunsPerTimeout() {
        return this.runsPerTimeout;
    }

    public String verifyConfigurationMatches(Configuration conf) {
        StringBuilder bld = new StringBuilder();
        int cacheSize = conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE, DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
        if (this.cacheSize != cacheSize) {
            bld.append("You specified a cache size of ").append(cacheSize)
                    .append(", but the existing cache size is ").append(this.cacheSize).append(".  ");
        }
        long timeoutMs = conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS, DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
        if (getTimeoutMs() != timeoutMs) {
            bld.append("You specified a cache timeout of ").append(timeoutMs)
                    .append(" ms, but the existing cache timeout is ").append(getTimeoutMs()).append("ms")
                    .append(".  ");
        }
        int runsPerTimeout = conf.getInt(DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
                DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT);
        if (getRunsPerTimeout() != runsPerTimeout) {
            bld.append("You specified ").append(runsPerTimeout)
                    .append(" runs per timeout, but the existing runs per timeout is ").append(getTimeoutMs())
                    .append(".  ");
        }
        return bld.toString();
    }

    private static class Waitable<T> {
        private T val;
        private final Condition cond;

        public Waitable(Condition cond) {
            this.val = null;
            this.cond = cond;
        }

        public T await() throws InterruptedException {
            while (this.val == null) {
                this.cond.await();
            }
            return this.val;
        }

        public void provide(T val) {
            this.val = val;
            this.cond.signalAll();
        }
    }

    private static class Key implements Comparable<Key> {
        private final ExtendedBlock block;
        private final DatanodeID datanode;

        Key(ExtendedBlock block, DatanodeID datanode) {
            this.block = block;
            this.datanode = datanode;
        }

        /**
         * Compare two ClientMmap regions that we're storing.
         *
         * When we append to a block, we bump the genstamp.  It is important to 
         * compare the genStamp here.  That way, we will not return a shorter 
         * mmap than required.
         */
        @Override
        public int compareTo(Key o) {
            return ComparisonChain.start().compare(block.getBlockId(), o.block.getBlockId())
                    .compare(block.getGenerationStamp(), o.block.getGenerationStamp())
                    .compare(block.getBlockPoolId(), o.block.getBlockPoolId()).compare(datanode, o.datanode)
                    .result();
        }

        @Override
        public boolean equals(Object rhs) {
            if (rhs == null) {
                return false;
            }
            try {
                Key o = (Key) rhs;
                return (compareTo(o) == 0);
            } catch (ClassCastException e) {
                return false;
            }
        }

        @Override
        public int hashCode() {
            return block.hashCode() ^ datanode.hashCode();
        }
    }

    /**
     * Thread which handles expiring mmaps from the cache.
     */
    private static class CacheCleaner implements Runnable, Closeable {
        private WeakReference<ClientMmapManager> managerRef;
        private ScheduledFuture<?> future;

        CacheCleaner(ClientMmapManager manager) {
            this.managerRef = new WeakReference<ClientMmapManager>(manager);
        }

        @Override
        public void run() {
            ClientMmapManager manager = managerRef.get();
            if (manager == null)
                return;
            long curTime = System.nanoTime();
            try {
                manager.lock.lock();
                manager.evictStaleEntries(curTime);
            } finally {
                manager.lock.unlock();
            }
        }

        void setFuture(ScheduledFuture<?> future) {
            this.future = future;
        }

        @Override
        public void close() throws IOException {
            future.cancel(false);
        }
    }

    /**
     * Evict entries which are older than curTime + timeoutNs from the cache.
     *
     * NOTE: you must call this function with the lock held.
     */
    private void evictStaleEntries(long curTime) {
        if (closed) {
            return;
        }
        Iterator<Entry<Long, ClientMmap>> iter = evictable.entrySet().iterator();
        while (iter.hasNext()) {
            Entry<Long, ClientMmap> entry = iter.next();
            if (entry.getKey() + timeoutNs >= curTime) {
                return;
            }
            ClientMmap mmap = entry.getValue();
            Key key = new Key(mmap.getBlock(), mmap.getDatanodeID());
            mmaps.remove(key);
            iter.remove();
            mmap.unmap();
        }
    }

    /**
     * Evict one mmap object from the cache.
     *
     * NOTE: you must call this function with the lock held.
     *
     * @return                  True if an object was evicted; false if none
     *                          could be evicted.
     */
    private boolean evictOne() {
        Entry<Long, ClientMmap> entry = evictable.pollFirstEntry();
        if (entry == null) {
            // We don't want to try creating another mmap region, because the
            // cache is full.
            return false;
        }
        ClientMmap evictedMmap = entry.getValue();
        Key evictedKey = new Key(evictedMmap.getBlock(), evictedMmap.getDatanodeID());
        mmaps.remove(evictedKey);
        evictedMmap.unmap();
        return true;
    }

    /**
     * Create a new mmap object.
     * 
     * NOTE: you must call this function with the lock held.
     *
     * @param key              The key which describes this mmap.
     * @param in               The input stream to use to create the mmap.
     * @return                 The new mmap object, or null if there were
     *                         insufficient resources.
     * @throws IOException     If there was an I/O error creating the mmap.
     */
    private ClientMmap create(Key key, FileInputStream in) throws IOException {
        if (mmaps.size() + 1 > cacheSize) {
            if (!evictOne()) {
                LOG.warn("mmap cache is full (with " + cacheSize + " elements) and "
                        + "nothing is evictable.  Ignoring request for mmap with " + "datanodeID=" + key.datanode
                        + ", " + "block=" + key.block);
                return null;
            }
        }
        // Create the condition variable that other threads may wait on.
        Waitable<ClientMmap> waitable = new Waitable<ClientMmap>(lock.newCondition());
        mmaps.put(key, waitable);
        // Load the entry
        boolean success = false;
        ClientMmap mmap = null;
        try {
            try {
                lock.unlock();
                mmap = ClientMmap.load(this, in, key.block, key.datanode);
            } finally {
                lock.lock();
            }
            if (cacheCleaner == null) {
                cacheCleaner = new CacheCleaner(this);
                ScheduledFuture<?> future = executor.scheduleAtFixedRate(cacheCleaner, timeoutNs,
                        timeoutNs / runsPerTimeout, TimeUnit.NANOSECONDS);
                cacheCleaner.setFuture(future);
            }
            success = true;
        } finally {
            if (!success) {
                LOG.warn("failed to create mmap for datanodeID=" + key.datanode + ", " + "block=" + key.block);
                mmaps.remove(key);
            }
            waitable.provide(mmap);
        }
        if (LOG.isDebugEnabled()) {
            LOG.info("created a new ClientMmap for block " + key.block + " on datanode " + key.datanode);
        }
        return mmap;
    }

    /**
     * Get or create an mmap region.
     * 
     * @param node       The DataNode that owns the block for this mmap region.
     * @param block      The block ID, block pool ID, and generation stamp of 
     *                     the block we want to read.
     * @param in         An open file for this block.  This stream is only used
     *                     if we have to create a new mmap; if we use an
     *                     existing one, it is ignored.
     *
     * @return           The client mmap region.
     */
    public ClientMmap fetch(DatanodeID datanodeID, ExtendedBlock block, FileInputStream in)
            throws IOException, InterruptedException {
        LOG.debug("fetching mmap with datanodeID=" + datanodeID + ", " + "block=" + block);
        Key key = new Key(block, datanodeID);
        ClientMmap mmap = null;
        try {
            lock.lock();
            if (closed) {
                throw new IOException("ClientMmapManager is closed.");
            }
            while (mmap == null) {
                Waitable<ClientMmap> entry = mmaps.get(key);
                if (entry == null) {
                    return create(key, in);
                }
                mmap = entry.await();
            }
            if (mmap.ref() == 1) {
                // When going from nobody using the mmap (ref = 0) to somebody
                // using the mmap (ref = 1), we must make the mmap un-evictable.
                evictable.remove(mmap.getLastEvictableTimeNs());
            }
        } finally {
            lock.unlock();
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("reusing existing mmap with datanodeID=" + datanodeID + ", " + "block=" + block);
        }
        return mmap;
    }

    /**
     * Make an mmap evictable.
     * 
     * When an mmap is evictable, it may be removed from the cache if necessary.
     * mmaps can only be evictable if nobody is using them.
     *
     * @param mmap             The mmap to make evictable.
     */
    void makeEvictable(ClientMmap mmap) {
        try {
            lock.lock();
            if (closed) {
                // If this ClientMmapManager is closed, then don't bother with the
                // cache; just close the mmap.
                mmap.unmap();
                return;
            }
            long now = System.nanoTime();
            while (evictable.containsKey(now)) {
                now++;
            }
            mmap.setLastEvictableTimeNs(now);
            evictable.put(now, mmap);
        } finally {
            lock.unlock();
        }
    }

    @Override
    public void close() throws IOException {
        try {
            lock.lock();
            closed = true;
            IOUtils.cleanup(LOG, cacheCleaner);

            // Unmap all the mmaps that nobody is using.
            // The ones which are in use will be unmapped just as soon as people stop
            // using them.
            evictStaleEntries(Long.MAX_VALUE);

            executor.shutdown();
        } finally {
            lock.unlock();
        }
    }

    @VisibleForTesting
    public interface ClientMmapVisitor {
        void accept(ClientMmap mmap);
    }

    @VisibleForTesting
    public synchronized void visitMmaps(ClientMmapVisitor visitor) throws InterruptedException {
        for (Waitable<ClientMmap> entry : mmaps.values()) {
            visitor.accept(entry.await());
        }
    }

    public void visitEvictable(ClientMmapVisitor visitor) throws InterruptedException {
        for (ClientMmap mmap : evictable.values()) {
            visitor.accept(mmap);
        }
    }
}