org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyDefault;
import org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicy.NotEnoughReplicasException;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.raid.DirectoryStripeReader.BlockInfo;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.Codec;
import org.apache.hadoop.util.HostsFileReader;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.util.StringUtils;

/**
 * This BlockPlacementPolicy uses a simple heuristic, random placement of
 * the replicas of a newly-created block, for the purpose of spreading out the 
 * group of blocks which used by RAID for recovering each other. 
 * This is important for the availability of the blocks. 
 * 
 * Replication of an existing block continues to use the default placement
 * policy.
 * 
 * This simple block placement policy does not guarantee that
 * blocks on the RAID stripe are on different nodes. However, BlockMonitor
 * will periodically scans the raided files and will fix the placement
 * if it detects violation. 
 * 
 * This class can be used by multiple threads. It has to be thread safe.
 */
public class BlockPlacementPolicyRaid extends BlockPlacementPolicyDefault {
    public static final Log LOG = LogFactory.getLog(BlockPlacementPolicyRaid.class);
    Configuration conf;
    private FSNamesystem namesystem = null;

    private CachedLocatedBlocks cachedLocatedBlocks;
    private CachedFullPathNames cachedFullPathNames;
    private long minFileSize = RaidNode.MINIMUM_RAIDABLE_FILESIZE;

    /** {@inheritDoc} */
    @Override
    public void initialize(Configuration conf, FSClusterStats stats, NetworkTopology clusterMap,
            HostsFileReader hostsReader, DNSToSwitchMapping dnsToSwitchMapping, FSNamesystem namesystem) {
        super.initialize(conf, stats, clusterMap, hostsReader, dnsToSwitchMapping, namesystem);
        this.conf = conf;
        this.minFileSize = conf.getLong(RaidNode.MINIMUM_RAIDABLE_FILESIZE_KEY, RaidNode.MINIMUM_RAIDABLE_FILESIZE);
        this.namesystem = namesystem;
        this.cachedLocatedBlocks = new CachedLocatedBlocks(conf);
        this.cachedFullPathNames = new CachedFullPathNames(conf);
    }

    @Override
    public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas, DatanodeDescriptor writer,
            List<DatanodeDescriptor> chosenNodes, long blocksize) {
        return chooseTarget(srcPath, numOfReplicas, writer, chosenNodes, null, blocksize);
    }

    @Override
    protected void place3rdReplicaForInClusterWriter(HashMap<Node, Node> excludedNodes, long blocksize,
            int maxNodesPerRack, List<DatanodeDescriptor> results) throws NotEnoughReplicasException {
        if (results.size() > 2) {
            return;
        }
        HashSet<String> excludedRacks = new HashSet<String>();
        for (DatanodeDescriptor node : results) {
            String rack = node.getNetworkLocation();
            excludedRacks.add(rack);
        }

        do {
            String remoteRack = clusterMap.chooseRack(excludedRacks);
            if (remoteRack == null) { // no more remote rack available
                // choose a node on the rack where the first replica is located
                chooseLocalRack(results.get(0), excludedNodes, blocksize, maxNodesPerRack, results);
                return;
            }
            // a remote rack is chosen
            try {
                excludedRacks.add(remoteRack);
                chooseRandom(1, remoteRack, excludedNodes, blocksize, maxNodesPerRack, results);
                return;
            } catch (NotEnoughReplicasException ne) {
                // try again until all remote tracks are exhausted
            }
        } while (true);
    }

    @Override
    public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas, DatanodeDescriptor writer,
            List<DatanodeDescriptor> chosenNodes, List<Node> exlcNodes, long blocksize) {
        try {
            FileInfo info = getFileInfo(null, srcPath);
            if (LOG.isDebugEnabled()) {
                LOG.debug("FileType:" + srcPath + " " + info.type.name());
            }
            if (info.type == FileType.NOT_RAID) {
                return super.chooseTarget(srcPath, numOfReplicas, writer, chosenNodes, exlcNodes, blocksize);
            }
            ArrayList<DatanodeDescriptor> results = new ArrayList<DatanodeDescriptor>();
            HashMap<Node, Node> excludedNodes = new HashMap<Node, Node>();
            if (exlcNodes != null) {
                for (Node node : exlcNodes) {
                    excludedNodes.put(node, node);
                }
            }
            for (Node node : chosenNodes) {
                excludedNodes.put(node, node);
            }
            chooseRandom(numOfReplicas, Path.SEPARATOR, excludedNodes, blocksize, 1, results);
            return results.toArray(new DatanodeDescriptor[results.size()]);
        } catch (Exception e) {
            FSNamesystem.LOG
                    .debug("Error happend when choosing datanode to write:" + StringUtils.stringifyException(e));
            return super.chooseTarget(srcPath, numOfReplicas, writer, chosenNodes, blocksize);
        }
    }

    /** {@inheritDoc} */
    @Override
    public DatanodeDescriptor chooseReplicaToDelete(FSInodeInfo inode, Block block, short replicationFactor,
            Collection<DatanodeDescriptor> first, Collection<DatanodeDescriptor> second) {

        DatanodeDescriptor chosenNode = null;
        try {
            String path = getFullPathName(inode);
            FileInfo info = getFileInfo(inode, path);
            if (info.type == FileType.NOT_RAID) {
                return super.chooseReplicaToDelete(inode, block, replicationFactor, first, second);
            }
            List<LocatedBlock> companionBlocks = getCompanionBlocks(path, info, block, inode);
            if (companionBlocks == null || companionBlocks.size() == 0) {
                // Use the default method if it is not a valid raided or parity file
                return super.chooseReplicaToDelete(inode, block, replicationFactor, first, second);
            }
            // Delete from the first collection first
            // This ensures the number of unique rack of this block is not reduced
            Collection<DatanodeDescriptor> all = new HashSet<DatanodeDescriptor>();
            all.addAll(first);
            all.addAll(second);
            chosenNode = chooseReplicaToDelete(companionBlocks, all);
            if (chosenNode != null) {
                return chosenNode;
            }
            return super.chooseReplicaToDelete(inode, block, replicationFactor, first, second);
        } catch (Exception e) {
            LOG.debug("Failed to choose the correct replica to delete", e);
            return super.chooseReplicaToDelete(inode, block, replicationFactor, first, second);
        }
    }

    private DatanodeDescriptor chooseReplicaToDelete(Collection<LocatedBlock> companionBlocks,
            Collection<DatanodeDescriptor> dataNodes) throws IOException {

        if (dataNodes.isEmpty()) {
            return null;
        }
        // Count the number of replicas on each node and rack
        final Map<String, Integer>[] companionBlockCounts = countCompanionBlocks(companionBlocks);
        final Map<String, Integer> nodeCompanionBlockCount = companionBlockCounts[0];
        final Map<String, Integer> rackCompanionBlockCount = companionBlockCounts[1];

        NodeComparator comparator = new NodeComparator(nodeCompanionBlockCount, rackCompanionBlockCount);
        return Collections.max(dataNodes, comparator);
    }

    /**
     * Count how many companion blocks are on each datanode or the each rack
     * @param companionBlocks a collection of all the companion blocks
     * @param result the map from node name to the number of companion blocks
     * [0] for datanodes [1] for racks
     */
    @SuppressWarnings("unchecked")
    static Map<String, Integer>[] countCompanionBlocks(Collection<LocatedBlock> companionBlocks) {
        Map<String, Integer>[] result = new HashMap[2];
        result[0] = new HashMap<String, Integer>();
        result[1] = new HashMap<String, Integer>();

        for (LocatedBlock block : companionBlocks) {
            for (DatanodeInfo d : block.getLocations()) {
                // count the companion blocks on the datanodes
                String name = d.getName();
                Integer currentCount = result[0].get(name);
                result[0].put(name, currentCount == null ? 1 : currentCount + 1);

                // count the companion blocks on the racks of datanodes
                name = d.getParent().getName();
                currentCount = result[1].get(name);
                result[1].put(name, currentCount == null ? 1 : currentCount + 1);
            }
        }
        return result;
    }

    /**
     * Compares the datanodes based on the number of companion blocks on the same
     * node and rack. If even, compare the remaining space on the datanodes.
     */
    class NodeComparator implements Comparator<DatanodeDescriptor> {
        private Map<String, Integer> nodeBlockCount;
        private Map<String, Integer> rackBlockCount;

        private NodeComparator(Map<String, Integer> nodeBlockCount, Map<String, Integer> rackBlockCount) {
            this.nodeBlockCount = nodeBlockCount;
            this.rackBlockCount = rackBlockCount;
        }

        @Override
        public int compare(DatanodeDescriptor d1, DatanodeDescriptor d2) {
            int res = compareBlockCount(d1, d2, nodeBlockCount);
            if (res != 0) {
                return res;
            }
            res = compareBlockCount(d1.getParent(), d2.getParent(), rackBlockCount);
            if (res != 0) {
                return res;
            }
            if (d1.getRemaining() > d2.getRemaining()) {
                return -1;
            }
            if (d1.getRemaining() < d2.getRemaining()) {
                return 1;
            }
            return 0;
        }

        private int compareBlockCount(Node node1, Node node2, Map<String, Integer> blockCount) {
            Integer count1 = blockCount.get(node1.getName());
            Integer count2 = blockCount.get(node2.getName());
            count1 = count1 == null ? 0 : count1;
            count2 = count2 == null ? 0 : count2;
            if (count1 > count2) {
                return 1;
            }
            if (count1 < count2) {
                return -1;
            }
            return 0;
        }
    }

    /**
     * Obtain the companion blocks of the give block
     * Companion blocks are defined as the blocks that can help recover each
     * others by using raid decoder.
     * @param path The path of the file contains the block
     * @param info The info of this file
     * @param block The given block
     *              null if it is the block which is currently being written to
     * @param inode the inode of the path file 
     * @return the block locations of companion blocks
     */
    List<LocatedBlock> getCompanionBlocks(String path, FileInfo info, Block block, FSInodeInfo inode)
            throws IOException {
        Codec codec = info.codec;
        switch (info.type) {
        case NOT_RAID:
            return Collections.emptyList();
        case HAR_TEMP_PARITY:
            return getCompanionBlocksForHarParityBlock(path, codec.parityLength, block, inode);
        case TEMP_PARITY:
            NameWithINode ni = getSourceFile(path, codec.tmpParityDirectory);
            return getCompanionBlocksForParityBlock(ni.name, path, codec.parityLength, codec.stripeLength, block,
                    codec.isDirRaid, ni.inode, inode);
        case PARITY:
            ni = getSourceFile(path, codec.parityDirectory);
            return getCompanionBlocksForParityBlock(ni.name, path, codec.parityLength, codec.stripeLength, block,
                    codec.isDirRaid, ni.inode, inode);
        case SOURCE:
            return getCompanionBlocksForSourceBlock(path, info.parityName, codec.parityLength, codec.stripeLength,
                    block, codec.isDirRaid, inode, info.parityInode);
        }
        return Collections.emptyList();
    }

    private List<LocatedBlock> getCompanionBlocksForHarParityBlock(String parity, int parityLength, Block block,
            FSInodeInfo inode) throws IOException {
        int blockIndex = getBlockIndex(parity, block, inode, true);
        List<LocatedBlock> parityBlocks = getLocatedBlocks(parity, inode);
        // consider only parity file in this case because source file block
        // location is not easy to obtain
        List<LocatedBlock> result = new ArrayList<LocatedBlock>();
        int start = Math.max(0, blockIndex - parityLength + 1);
        int end = Math.min(parityBlocks.size(), blockIndex + parityLength);
        result.addAll(parityBlocks.subList(start, end));
        return result;
    }

    private void addCompanionParityBlocks(String parity, INodeFile pinode, int stripeIndex, int parityLength,
            List<LocatedBlock> blocks) throws IOException {
        if (pinode == null)
            return;
        long parityStartOffset = stripeIndex * parityLength * pinode.getPreferredBlockSize();
        long parityFileSize = namesystem.dir.getFileSize(pinode);
        // for parity, always consider the neighbor blocks as companion blocks
        if (parityStartOffset < parityFileSize) {
            blocks.addAll(
                    getLocatedBlocks(pinode, parityStartOffset, parityLength * pinode.getPreferredBlockSize()));
        }
    }

    String getFullPathName(FSInodeInfo inode) throws IOException {
        String path = cachedFullPathNames.get(inode);
        if (path != null) {
            InjectionHandler.processEvent(InjectionEvent.BLOCKPLACEMENTPOLICYRAID_CACHED_PATH);
            return path;
        }
        byte[][] names = null;
        namesystem.readLock();
        try {
            names = FSDirectory.getINodeByteArray((INode) inode);
        } finally {
            namesystem.readUnlock();
        }
        path = FSDirectory.getFullPathName(names);
        cachedFullPathNames.put(inode, path);
        return path;
    }

    List<LocatedBlock> getLocatedBlocks(String file, FSInodeInfo f) throws IOException {
        List<LocatedBlock> blocks = cachedLocatedBlocks.get(file);
        if (blocks != null) {
            InjectionHandler.processEvent(InjectionEvent.BLOCKPLACEMENTPOLICYRAID_CACHED_BLOCKS);
            return blocks;
        }
        // otherwise populate cache
        INodeFile inode = (INodeFile) f;
        // Note that the list is generated. It is not the internal data of inode.
        List<LocatedBlock> result = inode == null ? new ArrayList<LocatedBlock>()
                : namesystem.getBlockLocationsInternal(inode, 0, Long.MAX_VALUE, Integer.MAX_VALUE)
                        .getLocatedBlocks();
        if (result == null) {
            result = Collections.emptyList();
        } else {
            result = Collections.unmodifiableList(result);
        }
        cachedLocatedBlocks.put(file, result);
        return result;
    }

    public List<LocatedBlock> getLocatedBlocks(INodeFile inode, long offset, long length) throws IOException {
        // Note that the list is generated. It is not the internal data of inode.
        List<LocatedBlock> result = inode == null ? new ArrayList<LocatedBlock>()
                : namesystem.getBlockLocationsInternal(inode, offset, length, Integer.MAX_VALUE).getLocatedBlocks();
        if (result == null) {
            return Collections.emptyList();
        }
        return Collections.unmodifiableList(result);
    }

    private List<LocatedBlock> getCompanionBlocksForParityBlock(String src, String parity, int parityLength,
            int stripeLength, Block block, boolean isDirRaid, FSInodeInfo srcinode, FSInodeInfo pinode)
            throws IOException {
        int blockIndex = getBlockIndex(parity, block, pinode, false);
        int stripeIndex = blockIndex / parityLength;

        List<LocatedBlock> result = new ArrayList<LocatedBlock>();
        addCompanionParityBlocks(parity, (INodeFile) pinode, stripeIndex, parityLength, result);
        if (src == null) {
            return result;
        }

        // get the source blocks.
        List<LocatedBlock> sourceBlocks;
        int sourceStart = stripeIndex * stripeLength;
        int sourceEnd = sourceStart + stripeLength;

        if (!isDirRaid) {
            sourceBlocks = getLocatedBlocks(src, srcinode);
        } else {
            sourceBlocks = new ArrayList<LocatedBlock>();
            INode inode = (INode) srcinode;
            INodeDirectory srcNode;
            if (inode.isDirectory()) {
                srcNode = (INodeDirectory) inode;
            } else {
                throw new IOException("The source should be a directory in Dir-Raiding: " + src);
            }

            boolean found = false;
            String srcPath = src + Path.SEPARATOR;
            // look for the stripe 
            namesystem.readLock();
            namesystem.dir.readLock();
            try {
                for (INode child : srcNode.getChildren()) {
                    if (child.isDirectory()) {
                        throw new IOException("The source is not a leaf directory: " + src
                                + ", contains a subdirectory: " + child.getLocalName());
                    }
                    INodeFile childInode = (INodeFile) child;
                    long fileSize = namesystem.dir.getFileSize(childInode);
                    // check if we will do dir-raid on this file
                    if (fileSize < minFileSize) {
                        continue;
                    }
                    int numBlocks = childInode.getBlocks().length;

                    if (numBlocks < sourceStart && !found) {
                        sourceStart -= numBlocks;
                        sourceEnd -= numBlocks;
                        continue;
                    } else {
                        String childName = srcPath + child.getLocalName();
                        List<LocatedBlock> childBlocks = getLocatedBlocks(childName, child);
                        found = true;
                        sourceBlocks.addAll(childBlocks);
                        if (sourceEnd <= sourceBlocks.size()) {
                            break;
                        }
                    }
                }
            } finally {
                namesystem.dir.readUnlock();
                namesystem.readUnlock();
            }
        }

        sourceEnd = Math.min(sourceEnd, sourceBlocks.size());
        if (sourceStart < sourceBlocks.size()) {
            result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
        }
        return result;
    }

    private List<LocatedBlock> getCompanionBlocksForSourceBlock(String src, String parity, int parityLength,
            int stripeLength, Block block, boolean isDirRaid, FSInodeInfo inode, FSInodeInfo parityInode)
            throws IOException {
        List<LocatedBlock> result = new ArrayList<LocatedBlock>();
        List<LocatedBlock> sourceBlocks = null;
        int blockIndex = getBlockIndex(src, block, inode, true);
        int stripeIndex = 0;
        int sourceStart = 0;
        int sourceEnd = 0;

        if (!isDirRaid) {
            sourceBlocks = getLocatedBlocks(src, inode);
            stripeIndex = blockIndex / stripeLength;
            sourceStart = stripeIndex * stripeLength;
            sourceEnd = Math.min(sourceStart + stripeLength, sourceBlocks.size());
        } else {
            // cache the candidate blocks.
            BlockInfo[] tmpStripe = new BlockInfo[stripeLength];
            for (int i = 0; i < stripeLength; i++) {
                tmpStripe[i] = new BlockInfo(0, 0);
            }
            int curIdx = 0;
            boolean found = false;

            sourceBlocks = new ArrayList<LocatedBlock>();
            byte[][] components = INodeDirectory.getPathComponents(src);
            INodeDirectory srcNode = namesystem.dir.getINode(components).getParent();
            String parentPath = getParentPath(src);
            if (!parentPath.endsWith(Path.SEPARATOR)) {
                parentPath += Path.SEPARATOR;
            }

            namesystem.readLock();
            namesystem.dir.readLock();
            try {
                List<INode> children = srcNode.getChildren();
                // look for the stripe
                for (int fid = 0; fid < children.size(); fid++) {
                    INode child = children.get(fid);
                    if (child.isDirectory()) {
                        throw new IOException("The raided-directory is not a leaf directory: " + parentPath
                                + ", contains a subdirectory: " + child.getLocalName());
                    }
                    INodeFile childInode = (INodeFile) child;

                    long fileSize = namesystem.dir.getFileSize(childInode);
                    // check if we will do dir-raid on this file
                    if (fileSize < minFileSize) {
                        continue;
                    }

                    String childName = parentPath + child.getLocalName();
                    if (found) {
                        if (sourceEnd <= sourceBlocks.size()) {
                            break;
                        }
                        List<LocatedBlock> childBlocks = getLocatedBlocks(childName, childInode);
                        sourceBlocks.addAll(childBlocks);
                    } else {
                        int childBlockSize = childInode.getBlocks().length;

                        /** 
                         * If we find the target file, we will addAll the
                         * cached blocks and the child blocks. 
                         * And update the metrics like stripeIndex, sourceStart and sourceEnd.
                         * 
                         */
                        if (childName.equals(src)) {
                            found = true;
                            List<LocatedBlock> prevChildBlocks = null;
                            for (int i = 0; i < curIdx; i++) {
                                if (i == 0 || tmpStripe[i].fileIdx != tmpStripe[i - 1].fileIdx) {
                                    INode prevChildInode = children.get(tmpStripe[i].fileIdx);
                                    String prevChildName = parentPath + prevChildInode.getLocalName();
                                    prevChildBlocks = getLocatedBlocks(prevChildName, prevChildInode);
                                }
                                sourceBlocks.add(prevChildBlocks.get(tmpStripe[i].blockId));
                            }
                            List<LocatedBlock> childBlocks = getLocatedBlocks(childName, childInode);
                            sourceBlocks.addAll(childBlocks);
                            blockIndex += curIdx;

                            stripeIndex += blockIndex / stripeLength;
                            sourceStart = (blockIndex / stripeLength) * stripeLength;
                            sourceEnd = sourceStart + stripeLength;
                        } else {
                            /**
                             * If not find the target file, we will keep the current stripe
                             * in the temp stripe cache.
                             */
                            /**
                             * the childBlockSize is small, and we can fill them into 
                             * current temp stripe cache.
                             */
                            if (curIdx + childBlockSize < stripeLength) {
                                for (int i = 0; i < childBlockSize; i++, curIdx++) {
                                    tmpStripe[curIdx].fileIdx = fid;
                                    tmpStripe[curIdx].blockId = i;
                                }
                            } else {
                                /**
                                 * The childBlockSize is not small, We need to calculate 
                                 * the place in the stripe cache, and copy the current stripe 
                                 * into the temp stripe cache.
                                 */
                                stripeIndex += (curIdx + childBlockSize) / stripeLength;
                                int childStart = ((curIdx + childBlockSize) / stripeLength) * stripeLength - curIdx;
                                curIdx = 0;
                                for (; childStart < childBlockSize; childStart++, curIdx++) {
                                    tmpStripe[curIdx].fileIdx = fid;
                                    tmpStripe[curIdx].blockId = childStart;
                                }
                                curIdx %= stripeLength;
                            }
                        }
                    }
                }
            } finally {
                namesystem.dir.readUnlock();
                namesystem.readUnlock();
            }
            sourceEnd = Math.min(sourceEnd, sourceBlocks.size());
        }

        if (sourceStart < sourceBlocks.size()) {
            for (int i = sourceStart; i < sourceEnd; i++) {
                result.add(sourceBlocks.get(i));
            }
        }
        if (parity == null) {
            return result;
        }
        // add the parity blocks.
        addCompanionParityBlocks(parity, (INodeFile) parityInode, stripeIndex, parityLength, result);
        return result;
    }

    private int getBlockIndex(String file, Block block, FSInodeInfo inode, boolean cacheResult) throws IOException {
        if (cacheResult) {
            List<LocatedBlock> blocks = getLocatedBlocks(file, inode);
            // null indicates that this block is currently added. Return size()
            // as the index in this case
            if (block == null) {
                return blocks.size();
            }
            for (int i = 0; i < blocks.size(); i++) {
                if (blocks.get(i).getBlock().equals(block)) {
                    return i;
                }
            }
            throw new IOException("Cannot locate " + block + " in file " + file);
        } else {
            return namesystem.dir.getBlockIndex((INodeFile) inode, block, file);
        }
    }

    /**
     * Cache results for FSInodeInfo.getFullPathName()
     */
    static class CachedFullPathNames {
        private Cache<INodeWithHashCode, String> cacheInternal;

        CachedFullPathNames(final Configuration conf) {
            this.cacheInternal = new Cache<INodeWithHashCode, String>(conf);
        }

        private static class INodeWithHashCode {
            FSInodeInfo inode;

            INodeWithHashCode(FSInodeInfo inode) {
                this.inode = inode;
            }

            @Override
            public boolean equals(Object obj) {
                if (!(obj instanceof INodeWithHashCode))
                    return false;
                return inode == ((INodeWithHashCode) obj).inode;
            }

            @Override
            public int hashCode() {
                return System.identityHashCode(inode);
            }
        }

        public String get(FSInodeInfo inode) throws IOException {
            return cacheInternal.get(new INodeWithHashCode(inode));
        }

        public void put(FSInodeInfo inode, String path) {
            cacheInternal.put(new INodeWithHashCode(inode), path);
        }
    }

    /**
     * Cache results for FSNamesystem.getBlockLocations()
     */
    static class CachedLocatedBlocks extends Cache<String, List<LocatedBlock>> {
        CachedLocatedBlocks(Configuration conf) {
            super(conf);
        }
    }

    /**
     * Generic caching class
     */
    private static class Cache<K, V> {
        private Map<K, ValueWithTime> cache;
        final private long cacheTimeout;
        final private int maxEntries;

        // The timeout is long but the consequence of stale value is not serious
        Cache(Configuration conf) {
            this.cacheTimeout = conf.getLong("raid.blockplacement.cache.timeout", 5000L); // 5 seconds
            this.maxEntries = conf.getInt("raid.blockplacement.cache.size", 1000); // 1000 entries
            Map<K, ValueWithTime> map = new LinkedHashMap<K, ValueWithTime>(2 * maxEntries, 0.75f, true) {
                private static final long serialVersionUID = 1L;

                @Override
                protected boolean removeEldestEntry(Map.Entry<K, ValueWithTime> eldest) {
                    return size() > maxEntries;
                }
            };
            this.cache = Collections.synchronizedMap(map);
        }

        public V get(K key) throws IOException {
            // The method is not synchronized so we may get some stale value here but
            // it's OK.
            ValueWithTime result = cache.get(key);
            long now = System.currentTimeMillis();
            if (result != null && now - result.cachedTime < cacheTimeout) {
                return result.value;
            }
            return null;
        }

        public void put(K key, V value) {
            ValueWithTime v = new ValueWithTime();
            v.value = value;
            v.cachedTime = System.currentTimeMillis();
            cache.put(key, v);
        }

        private class ValueWithTime {
            V value = null;
            long cachedTime = 0L;
        }
    }

    /**
     * Get path for the corresponding source file for a valid parity
     * file. Returns null if it does not exists
     * @param parity the toUri path of the parity file
     * @return the toUri path of the source file
     */
    NameWithINode getSourceFile(String parity, String prefix) throws IOException {
        if (isHarFile(parity)) {
            return null;
        }
        // remove the prefix
        String src = parity.substring(prefix.length());
        byte[][] components = INodeDirectory.getPathComponents(src);
        INode inode = namesystem.dir.getINode(components);
        return new NameWithINode(src, inode);
    }

    class NameWithINode {
        String name;
        INode inode;

        public NameWithINode(String name, INode inode) {
            this.name = name;
            this.inode = inode;
        }
    }

    /**
     * Get path for the parity file. Returns null if it does not exists
     * @param codec the codec of the parity file.
     * @return the toUri path of the parity file
     */
    private NameWithINode getParityFile(Codec codec, String src) throws IOException {
        String parity;
        if (codec.isDirRaid) {
            String parent = getParentPath(src);
            parity = codec.parityDirectory + parent;
        } else {
            parity = codec.parityDirectory + src;
        }
        byte[][] components = INodeDirectory.getPathComponents(parity);
        INode parityInode = namesystem.dir.getINode(components);
        if (parityInode == null)
            return null;
        return new NameWithINode(parity, parityInode);
    }

    static String getParentPath(String src) {
        int precision = 1;
        if (src.length() > 1 && src.endsWith(Path.SEPARATOR)) {
            precision = 2;
        }
        src = src.substring(0, src.lastIndexOf(Path.SEPARATOR, src.length() - precision));
        if (src.isEmpty())
            src = Path.SEPARATOR;
        return src;
    }

    private boolean isHarFile(String path) {
        return path.lastIndexOf(RaidNode.HAR_SUFFIX) != -1;
    }

    class FileInfo {
        FileInfo(FileType type, Codec codec) {
            this.type = type;
            this.codec = codec;
        }

        FileInfo(FileType type, Codec codec, String parityName, INode parityInode) throws IOException {
            if (type != FileType.SOURCE) {
                throw new IOException("FileType must be source");
            }
            this.type = type;
            this.codec = codec;
            this.parityInode = parityInode;
            this.parityName = parityName;
        }

        final FileType type;
        final Codec codec;
        INode parityInode = null;
        String parityName = null;
    }

    enum FileType {
        NOT_RAID, HAR_TEMP_PARITY, TEMP_PARITY, PARITY, SOURCE,
    }

    /**
     * Return raid information about a file, for example
     * if this file is the source file, parity file, or not raid
     * 
     * @param path file name
     * @return raid information
     * @throws IOException
     */
    protected FileInfo getFileInfo(FSInodeInfo srcINode, String path) throws IOException {
        for (Codec c : Codec.getCodecs()) {
            if (path.startsWith(c.tmpHarDirectoryPS)) {
                return new FileInfo(FileType.HAR_TEMP_PARITY, c);
            }
            if (path.startsWith(c.tmpParityDirectoryPS)) {
                return new FileInfo(FileType.TEMP_PARITY, c);
            }
            if (path.startsWith(c.parityDirectoryPS)) {
                return new FileInfo(FileType.PARITY, c);
            }
            NameWithINode ni = getParityFile(c, path);
            if (ni != null) {
                if (c.isDirRaid && srcINode != null && srcINode instanceof INodeFile) {
                    INodeFile inf = (INodeFile) srcINode;
                    if (inf.getFileSize() < this.minFileSize) {
                        // It's too small to be raided
                        return new FileInfo(FileType.NOT_RAID, null);
                    }
                }
                return new FileInfo(FileType.SOURCE, c, ni.name, ni.inode);
            }
        }
        return new FileInfo(FileType.NOT_RAID, null);
    }
}