org.apache.hadoop.raid.StripeReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.raid.StripeReader.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockMissingException;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlockWithFileName;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.StripeStore.StripeInfo;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public abstract class StripeReader {
    public static final Log LOG = LogFactory.getLog(StripeReader.class);
    Codec codec;
    Configuration conf;
    FileSystem fs;
    long stripeStartIdx;
    long stripeEndIdx;
    int bufferSize;

    protected long currentStripeIdx;

    public static class LocationPair {
        private int stripeIdx;
        private int blockIdxInStripe;
        private List<FileStatus> lfs;

        public LocationPair(int stripeIdx, int blockIdxInStripe, List<FileStatus> lfs) {
            this.stripeIdx = stripeIdx;
            this.blockIdxInStripe = blockIdxInStripe;
            this.lfs = lfs;
        }

        int getStripeIdx() {
            return stripeIdx;
        }

        int getBlockIdxInStripe() {
            return blockIdxInStripe;
        }

        List<FileStatus> getListFileStatus() {
            return lfs;
        }
    }

    public static class StripeInputInfo {
        private final InputStream[] inputs;
        private final Path[] srcPaths;
        private final long[] offSets;

        public StripeInputInfo(InputStream[] inputs, Path[] srcPaths, long[] offSets) {
            this.inputs = inputs;
            this.srcPaths = srcPaths;
            this.offSets = offSets;
        }

        InputStream[] getInputs() {
            return inputs;
        }

        Path[] getSrcPaths() {
            return srcPaths;
        }

        long[] getBlockOffsets() {
            return offSets;
        }
    }

    public StripeReader(Configuration conf, Codec codec, FileSystem fs, long stripeStartIdx) {
        this.codec = codec;
        this.conf = conf;
        this.fs = fs;
        this.stripeStartIdx = stripeStartIdx;
        this.currentStripeIdx = stripeStartIdx;
        this.bufferSize = conf.getInt("io.file.buffer.size", 64 * 1024);
    }

    /**
     * get current Stripe index
     */
    public long getCurrentStripeIdx() {
        return this.currentStripeIdx;
    }

    /**
     * Has next stripe to read?
     */
    public boolean hasNext() {
        return currentStripeIdx < stripeEndIdx;
    }

    /**
     * Get the input streams for the next stripe
     */
    public StripeInputInfo getNextStripeInputs() throws IOException {
        StripeInputInfo stripeInputInfo = getStripeInputs(currentStripeIdx);
        currentStripeIdx++;
        return stripeInputInfo;
    }

    public abstract StripeInputInfo getStripeInputs(long StripeIdx) throws IOException;

    /**
     * Get the index of a block in a file, according to the blockId.
     * 
     * Any better ways to do this?
     */
    private static int getBlockIdInFile(DistributedFileSystem srcFs, Path srcPath, long blockId)
            throws IOException {
        FileStatus srcStat = srcFs.getFileStatus(srcPath);
        LocatedBlocks lbs = srcFs.getClient().getLocatedBlocks(srcPath.toUri().getPath(), 0, srcStat.getLen());

        int i = 0;
        LOG.info("Look for block " + blockId + " in file " + srcPath);
        for (LocatedBlock lb : lbs.getLocatedBlocks()) {
            if (lb.getBlock().getBlockId() == blockId) {
                return i;
            }
            i++;
        }

        return -1;
    }

    /**
     * Build the InputStream arrays from the StripeInfo.
     * 
     * @return
     * @throws IOException
     */
    public static InputStream[] buildInputsFromStripeInfo(DistributedFileSystem srcFs, FileStatus srcStat,
            Codec codec, StripeInfo si, long offsetInBlock, long limit, List<Integer> erasedLocations,
            Set<Integer> locationsToNotRead, ErasureCode code) throws IOException {

        InputStream[] inputs = new InputStream[codec.stripeLength + codec.parityLength];

        boolean redo = false;
        do {
            redo = false;
            locationsToNotRead.clear();
            List<Integer> locationsToRead = code.locationsToReadForDecode(erasedLocations);

            for (int i = 0; i < inputs.length; i++) {
                boolean isErased = (erasedLocations.indexOf(i) != -1);
                boolean shouldRead = (locationsToRead.indexOf(i) != -1);
                try {
                    InputStream stm = null;
                    if (isErased || !shouldRead) {
                        if (isErased) {
                            LOG.info("Location " + i + " is erased, using zeros");
                        } else {
                            LOG.info("Location " + i + " need not be read, using zeros");
                        }
                        locationsToNotRead.add(i);
                        stm = new RaidUtils.ZeroInputStream(limit);
                    } else {
                        long blockId;
                        if (i < codec.parityLength) {
                            blockId = si.parityBlocks.get(i).getBlockId();
                        } else if ((i - codec.parityLength) < si.srcBlocks.size()) {
                            blockId = si.srcBlocks.get(i - codec.parityLength).getBlockId();
                        } else {
                            LOG.info("Using zeros for location " + i);
                            inputs[i] = new RaidUtils.ZeroInputStream(limit);
                            continue;
                        }
                        LocatedBlockWithFileName lb = srcFs.getClient().getBlockInfo(blockId);
                        if (lb == null) {
                            throw new BlockMissingException(String.valueOf(blockId),
                                    "Location " + i + " can not be found. Block id: " + blockId, 0);
                        } else {
                            Path filePath = new Path(lb.getFileName());
                            FileStatus stat = srcFs.getFileStatus(filePath);
                            long blockSize = stat.getBlockSize();
                            if (offsetInBlock > blockSize) {
                                stm = new RaidUtils.ZeroInputStream(limit);
                            } else {
                                if (srcFs.exists(filePath)) {
                                    long startOffset = getBlockIdInFile(srcFs, filePath, blockId) * blockSize;
                                    long offset = startOffset + offsetInBlock;
                                    LOG.info("Opening " + lb.getFileName() + ":" + offset + " for location " + i);
                                    FSDataInputStream is = srcFs.open(filePath);
                                    is.seek(offset);
                                    stm = is;
                                } else {
                                    LOG.info("Location " + i + ", File " + lb.getFileName()
                                            + " does not exist, using zeros");
                                    locationsToNotRead.add(i);
                                    stm = new RaidUtils.ZeroInputStream(limit);
                                }
                            }
                        }
                    }
                    inputs[i] = stm;
                } catch (IOException e) {
                    if (e instanceof BlockMissingException || e instanceof ChecksumException) {
                        erasedLocations.add(i);
                        redo = true;
                        RaidUtils.closeStreams(inputs);
                        break;
                    } else {
                        throw e;
                    }
                }
            }
        } while (redo);
        assert (locationsToNotRead.size() == codec.parityLength);
        return inputs;
    }

    /**
     * Builds (codec.stripeLength + codec.parityLength) inputs given some erased locations.
     * Outputs:
     *  - the array of input streams @param inputs
     *  - the list of erased locations @param erasedLocations.
     *  - the list of locations that are not read @param locationsToNotRead.
     */
    public InputStream[] buildInputs(FileSystem srcFs, Path srcFile, FileStatus srcStat, FileSystem parityFs,
            Path parityFile, FileStatus parityStat, int stripeIdx, long offsetInBlock,
            List<Integer> erasedLocations, Set<Integer> locationsToNotRead, ErasureCode code) throws IOException {
        InputStream[] inputs = new InputStream[codec.stripeLength + codec.parityLength];
        boolean redo = false;
        do {
            locationsToNotRead.clear();
            List<Integer> locationsToRead = code.locationsToReadForDecode(erasedLocations);
            for (int i = 0; i < inputs.length; i++) {
                boolean isErased = (erasedLocations.indexOf(i) != -1);
                boolean shouldRead = (locationsToRead.indexOf(i) != -1);
                try {
                    InputStream stm = null;
                    if (isErased || !shouldRead) {
                        if (isErased) {
                            LOG.info("Location " + i + " is erased, using zeros");
                        } else {
                            LOG.info("Location " + i + " need not be read, using zeros");
                        }
                        locationsToNotRead.add(i);
                        stm = new RaidUtils.ZeroInputStream(srcStat.getBlockSize()
                                * ((i < codec.parityLength) ? stripeIdx * codec.parityLength + i
                                        : stripeIdx * codec.stripeLength + i - codec.parityLength));
                    } else {
                        stm = buildOneInput(i, offsetInBlock, srcFs, srcFile, srcStat, parityFs, parityFile,
                                parityStat);
                    }
                    inputs[i] = stm;
                } catch (IOException e) {
                    if (e instanceof BlockMissingException || e instanceof ChecksumException) {
                        erasedLocations.add(i);
                        redo = true;
                        RaidUtils.closeStreams(inputs);
                        break;
                    } else {
                        throw e;
                    }
                }
            }
        } while (redo);
        assert (locationsToNotRead.size() == codec.parityLength);
        return inputs;
    }

    public static LocationPair getBlockLocation(Codec codec, FileSystem srcFs, Path srcFile, int blockIdxInFile,
            Configuration conf) throws IOException {
        return getBlockLocation(codec, srcFs, srcFile, blockIdxInFile, conf, null);
    }

    /**
     * Given a block in the file and specific codec, return the LocationPair
     * object which contains id of the stripe it belongs to and its
     * location in the stripe
     */
    public static LocationPair getBlockLocation(Codec codec, FileSystem srcFs, Path srcFile, int blockIdxInFile,
            Configuration conf, List<FileStatus> lfs) throws IOException {
        int stripeIdx = 0;
        int blockIdxInStripe = 0;
        int blockIdx = blockIdxInFile;
        if (codec.isDirRaid) {
            Path parentPath = srcFile.getParent();
            if (lfs == null) {
                lfs = RaidNode.listDirectoryRaidFileStatus(conf, srcFs, parentPath);
            }
            if (lfs == null) {
                throw new IOException("Couldn't list files under " + parentPath);
            }
            int blockNum = 0;
            Path qSrcFile = srcFs.makeQualified(srcFile);
            for (FileStatus fsStat : lfs) {
                if (!fsStat.getPath().equals(qSrcFile)) {
                    blockNum += RaidNode.getNumBlocks(fsStat);
                } else {
                    blockNum += blockIdxInFile;
                    break;
                }
            }
            blockIdx = blockNum;
        }
        stripeIdx = blockIdx / codec.stripeLength;
        blockIdxInStripe = blockIdx % codec.stripeLength;
        return new LocationPair(stripeIdx, blockIdxInStripe, lfs);
    }

    public static LocationPair getParityBlockLocation(Codec codec, final int blockIdxInFile) {

        int stripeIdx = blockIdxInFile / codec.parityLength;
        int blockIdxInStripe = blockIdxInFile % codec.parityLength;

        return new LocationPair(stripeIdx, blockIdxInStripe, null);
    }

    public static StripeReader getStripeReader(Codec codec, Configuration conf, long blockSize, FileSystem fs,
            long stripeIdx, FileStatus srcStat) throws IOException {
        if (codec.isDirRaid) {
            Path srcDir = srcStat.isDir() ? srcStat.getPath() : srcStat.getPath().getParent();
            return new DirectoryStripeReader(conf, codec, fs, stripeIdx, -1L, srcDir,
                    RaidNode.listDirectoryRaidFileStatus(conf, fs, srcDir));
        } else {
            return new FileStripeReader(conf, blockSize, codec, fs, stripeIdx, -1L, srcStat.getPath(),
                    srcStat.getLen());
        }
    }

    protected abstract InputStream buildOneInput(int locationIndex, long offsetInBlock, FileSystem srcFs,
            Path srcFile, FileStatus srcStat, FileSystem parityFs, Path parityFile, FileStatus parityStat)
            throws IOException;

    protected InputStream getParityFileInput(int locationIndex, Path parityFile, FileSystem parityFs,
            FileStatus parityStat, long offsetInBlock, long parityBlockSize) throws IOException {
        // Dealing with a parity file here.
        int parityBlockIdx = (int) (codec.parityLength * stripeStartIdx + locationIndex);
        long offset = parityBlockSize * parityBlockIdx + offsetInBlock;
        assert (offset < parityStat.getLen());
        LOG.info("Opening " + parityFile + ":" + offset + " for location " + locationIndex);
        FSDataInputStream s = parityFs.open(parityFile, conf.getInt("io.file.buffer.size", 64 * 1024));
        s.seek(offset);
        return s;
    }
}