org.apache.hadoop.raid.Encoder.java Source code

Introduction

Here is the source code for org.apache.hadoop.raid.Encoder.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid;

import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.Vector;
import java.util.zip.CRC32;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockMissingException;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.raid.DistRaid.EncodingCandidate;
import org.apache.hadoop.raid.LogUtils.LOGRESULTS;
import org.apache.hadoop.raid.LogUtils.LOGTYPES;
import org.apache.hadoop.raid.BlockReconstructor.CorruptBlockReconstructor;
import org.apache.hadoop.raid.StripeReader.StripeInputInfo;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.io.MD5Hash;

/**
 * Represents a generic encoder that can generate a parity file for a source
 * file.
 */
public class Encoder {
    public static final Log LOG = LogFactory.getLog("org.apache.hadoop.raid.Encoder");
    public static final int DEFAULT_PARALLELISM = 4;
    public static final int DEFAULT_MAX_BUFFER_SIZE = 1024 * 1024;
    public static final String ENCODING_MAX_BUFFER_SIZE_KEY = "raid.encoder.max.buffer.size";
    public static final String FINAL_TMP_PARITY_NAME = "0";
    public static final int DEFAULT_RETRY_COUNT_PARTIAL_ENCODING = 3;
    public static final String RETRY_COUNT_PARTIAL_ENCODING_KEY = "raid.encoder.retry.count.partial.encoding";
    protected Configuration conf;
    protected int parallelism;
    protected Codec codec;
    protected ErasureCode code;
    protected Random rand;
    protected int bufSize;
    protected int maxBufSize;
    protected int retryCountPartialEncoding;
    protected byte[][] readBufs;
    protected byte[][] writeBufs;
    protected ChecksumStore checksumStore;
    protected StripeStore stripeStore;
    protected boolean requiredChecksumStore = false;

    /**
     * A class that acts as a sink for data, similar to /dev/null.
     */
    static class NullOutputStream extends OutputStream {
        public void write(byte[] b) throws IOException {
        }

        public void write(int b) throws IOException {
        }

        public void write(byte[] b, int off, int len) throws IOException {
        }
    }

    Encoder(Configuration conf, Codec codec) {
        this.conf = conf;
        this.parallelism = conf.getInt("raid.encoder.parallelism", DEFAULT_PARALLELISM);
        this.codec = codec;
        this.code = codec.createErasureCode(conf);
        this.rand = new Random();
        this.maxBufSize = conf.getInt(ENCODING_MAX_BUFFER_SIZE_KEY, DEFAULT_MAX_BUFFER_SIZE);
        this.bufSize = conf.getInt("raid.encoder.bufsize", maxBufSize);
        this.writeBufs = new byte[codec.parityLength][];
        this.checksumStore = RaidNode.createChecksumStore(conf, false);
        this.requiredChecksumStore = conf.getBoolean(RaidNode.RAID_CHECKSUM_STORE_REQUIRED_KEY, false);
        if (codec.isDirRaid) {
            // only need by directory raid
            this.stripeStore = RaidNode.createStripeStore(conf, false, null);
        }
        this.retryCountPartialEncoding = conf.getInt(RETRY_COUNT_PARTIAL_ENCODING_KEY,
                DEFAULT_RETRY_COUNT_PARTIAL_ENCODING);
        allocateBuffers();
    }

    public void verifyStore() throws IOException {
        if (this.requiredChecksumStore && checksumStore == null) {
            throw new IOException("Checksum store is required but is null");
        }
        if (codec.isDirRaid && stripeStore == null) {
            throw new IOException("Stripe store is required but is null");
        }
    }

    private void allocateBuffers() {
        for (int i = 0; i < codec.parityLength; i++) {
            writeBufs[i] = new byte[bufSize];
        }
    }

    private void configureBuffers(long blockSize) {
        if ((long) bufSize > blockSize) {
            bufSize = (int) blockSize;
            allocateBuffers();
        } else if (blockSize % bufSize != 0) {
            bufSize = (int) (blockSize / 256L); // heuristic.
            if (bufSize == 0) {
                bufSize = 1024;
            }
            bufSize = Math.min(bufSize, maxBufSize);
            allocateBuffers();
        }
    }

    private void writeToChecksumStore(DistributedFileSystem dfs, CRC32[] crcOuts, Path parityTmp,
            long expectedParityFileSize, Progressable reporter) throws IOException {
        LocatedBlocks lbks = dfs.getLocatedBlocks(parityTmp, 0L, expectedParityFileSize);
        for (int i = 0; i < crcOuts.length; i++) {
            this.checksumStore.putIfAbsentChecksum(lbks.get(i).getBlock(), crcOuts[i].getValue());
            reporter.progress();
        }
        LOG.info("Wrote checksums of parity file into checksum store");
    }

    private void writeToStripeStore(List<List<Block>> srcStripes, DistributedFileSystem dfs, FileSystem srcFs,
            Path srcFile, FileSystem parityFs, Path parityFile, long expectedParityFileSize, Progressable reporter,
            Path finalTmpParity) throws IOException {
        try {
            if (srcStripes == null) {
                throw new IOException("source blocks are null");
            }
            LocatedBlocks lbks = dfs.getLocatedBlocks(parityFile, 0L, expectedParityFileSize);
            if (srcStripes.size() * codec.parityLength != lbks.locatedBlockCount()) {
                throw new IOException(
                        "The number of stripes " + srcStripes.size() + "doesn't match the number of parity blocks "
                                + lbks.locatedBlockCount() + " and parity length is " + codec.parityLength);
            }
            InjectionHandler.processEventIO(InjectionEvent.RAID_ENCODING_FAILURE_PUT_STRIPE, parityFile,
                    finalTmpParity);
            for (int i = 0, j = 0; i < srcStripes.size(); i++, j += codec.parityLength) {
                ArrayList<Block> parityBlocks = new ArrayList<Block>(codec.parityLength);
                for (int k = 0; k < codec.parityLength; k++) {
                    parityBlocks.add(lbks.get(j + k).getBlock());
                }
                stripeStore.putStripe(codec, parityBlocks, srcStripes.get(i));
                reporter.progress();
            }
            LOG.info("Wrote stripe information into stripe store");
        } catch (Exception ex) {
            LogUtils.logRaidEncodingMetrics(LOGRESULTS.FAILURE, codec, -1L, -1L, -1L, -1L, -1L, -1L, srcFile,
                    LOGTYPES.ENCODING, srcFs, ex, reporter);
            if (!dfs.rename(parityFile, finalTmpParity)) {
                LOG.warn("Fail to rename " + parityFile + " back to " + finalTmpParity);
            } else {
                LOG.info("Rename parity file " + parityFile + " back to " + finalTmpParity
                        + " so that we could retry putStripe " + " in the next round");
            }
            throw new IOException(ex);
        }
    }

    private Vector<Path> getPartialPaths(int encodingUnit, int expectedNum, FileStatus[] stats, Codec codec,
            long numStripes) throws IOException {
        Vector<Path> partialPaths = new Vector<Path>(expectedNum);
        partialPaths.setSize(expectedNum);
        for (FileStatus stat : stats) {
            int startStripeIdx;
            try {
                startStripeIdx = Integer.parseInt(stat.getPath().getName());
            } catch (NumberFormatException nfe) {
                throw new IOException("partial file " + stat.getPath() + " is not a number");
            }
            if (startStripeIdx % encodingUnit != 0) {
                throw new IOException("partial file " + stat.getPath() + " couldn't " + "match " + encodingUnit);
            }
            long numBlocks = RaidNode.numBlocks(stat);
            long expectedNumBlocks = Math.min(encodingUnit, numStripes - startStripeIdx) * codec.parityLength;
            if (numBlocks != expectedNumBlocks) {
                throw new IOException("partial file " + stat.getPath() + " has " + numBlocks
                        + " blocks, but it should be " + expectedNumBlocks);
            }
            partialPaths.set(startStripeIdx / encodingUnit, stat.getPath());
        }
        return partialPaths;
    }

    private List<List<Block>> getSrcStripes(Configuration jobConf, DistributedFileSystem dfs, Path srcFile,
            Codec codec, long numStripes, StripeReader sReader, Progressable reporter)
            throws IOException, InterruptedException {
        List<List<Block>> srcStripes = new ArrayList<List<Block>>();
        List<FileStatus> lfs = RaidNode.listDirectoryRaidFileStatus(jobConf, dfs, srcFile);
        if (lfs == null) {
            return null;
        }
        ArrayList<Block> currentBlocks = new ArrayList<Block>();
        long totalBlocks = 0L;
        int index = 0;
        for (FileStatus stat : lfs) {
            LocatedBlocks lbs = dfs.getLocatedBlocks(stat.getPath(), 0L, stat.getLen());
            for (LocatedBlock lb : lbs.getLocatedBlocks()) {
                currentBlocks.add(lb.getBlock());
                if (currentBlocks.size() == codec.stripeLength) {
                    srcStripes.add(currentBlocks);
                    totalBlocks += currentBlocks.size();
                    currentBlocks = new ArrayList<Block>();
                }
            }
            index++;
            if (index % 10 == 0) {
                Thread.sleep(1000);
            }
            reporter.progress();
        }
        if (currentBlocks.size() > 0) {
            srcStripes.add(currentBlocks);
            totalBlocks += currentBlocks.size();
        }
        if (srcStripes.size() != numStripes || totalBlocks != ((DirectoryStripeReader) sReader).numBlocks) {
            StringBuilder sb = new StringBuilder();
            for (List<Block> lb : srcStripes) {
                for (Block blk : lb) {
                    sb.append(blk.toString());
                    sb.append(" ");
                }
                sb.append(";");
            }
            throw new IOException("srcStripes has " + srcStripes.size() + " stripes and " + totalBlocks
                    + " blocks : " + sb + " Doesn't match " + srcFile);
        }
        return srcStripes;
    }

    /*
     * Create the temp parity file and rename to the partial parity directory
     */
    public boolean encodeTmpParityFile(Configuration jobConf, StripeReader sReader, FileSystem parityFs,
            Path partialTmpParity, Path parityFile, short tmpRepl, long blockSize, long expectedPartialParityBlocks,
            long expectedPartialParityFileSize, Progressable reporter) throws IOException, InterruptedException {
        // Create a tmp file to which we will write first.
        String jobID = RaidNode.getJobID(jobConf);
        Path tmpDir = new Path(codec.tmpParityDirectory, jobID);
        if (!parityFs.mkdirs(tmpDir)) {
            throw new IOException("Could not create tmp dir " + tmpDir);
        }
        Path parityTmp = new Path(tmpDir, parityFile.getName() + rand.nextLong());
        FSDataOutputStream out = parityFs.create(parityTmp, true, conf.getInt("io.file.buffer.size", 64 * 1024),
                tmpRepl, blockSize);
        try {
            CRC32[] crcOuts = null;
            if (checksumStore != null) {
                crcOuts = new CRC32[(int) expectedPartialParityBlocks];
            }
            encodeFileToStream(sReader, blockSize, out, crcOuts, reporter);
            out.close();
            out = null;
            LOG.info("Wrote temp parity file " + parityTmp);
            FileStatus tmpStat = parityFs.getFileStatus(parityTmp);
            if (tmpStat.getLen() != expectedPartialParityFileSize) {
                InjectionHandler.processEventIO(InjectionEvent.RAID_ENCODING_FAILURE_PARTIAL_PARITY_SIZE_MISMATCH);
                throw new IOException("Expected partial parity size " + expectedPartialParityFileSize
                        + " does not match actual " + tmpStat.getLen() + " in path " + tmpStat.getPath());
            }
            InjectionHandler.processEventIO(InjectionEvent.RAID_ENCODING_FAILURE_PUT_CHECKSUM);
            if (checksumStore != null) {
                this.writeToChecksumStore((DistributedFileSystem) parityFs, crcOuts, parityTmp,
                        expectedPartialParityFileSize, reporter);
            }
            if (!parityFs.rename(parityTmp, partialTmpParity)) {
                LOG.warn("Fail to rename file " + parityTmp + " to " + partialTmpParity);
                return false;
            }
            LOG.info("renamed " + parityTmp + " to " + partialTmpParity);
            return true;
        } finally {
            try {
                if (out != null) {
                    out.close();
                }
            } finally {
                parityFs.delete(parityTmp, false);
            }
        }
    }

    public boolean finishAllPartialEncoding(FileSystem parityFs, Path tmpPartialParityDir, long expectedNum)
            throws IOException, InterruptedException {
        //Verify if we finish all partial encoding
        try {
            FileStatus[] stats = null;
            long len = 0;
            for (int i = 0; i < this.retryCountPartialEncoding; i++) {
                stats = parityFs.listStatus(tmpPartialParityDir);
                len = stats != null ? stats.length : 0;
                if (len == expectedNum) {
                    return true;
                }
                if (i + 1 == this.retryCountPartialEncoding) {
                    Thread.sleep(rand.nextInt(2000));
                }
            }
            LOG.info("Number of partial files in the directory " + tmpPartialParityDir + " is " + len
                    + ". It's different from expected number " + expectedNum);
            return false;
        } catch (FileNotFoundException fnfe) {
            LOG.info("The temp directory is already moved to final partial" + " directory");
            return false;
        }
    }

    /**
     * The interface to use to generate a parity file.
     * This method can be called multiple times with the same Encoder object,
     * thus allowing reuse of the buffers allocated by the Encoder object.
     *
     * @param fs The filesystem containing the source file.
     * @param srcFile The source file.
     * @param parityFile The parity file to be generated.
     * @throws InterruptedException 
     */
    public boolean encodeFile(Configuration jobConf, FileSystem fs, FileSystem parityFs, Path parityFile,
            short parityRepl, long numStripes, long blockSize, Progressable reporter, StripeReader sReader,
            EncodingCandidate ec) throws IOException, InterruptedException {
        DistributedFileSystem dfs = DFSUtil.convertToDFS(parityFs);
        Path srcFile = ec.srcStat.getPath();
        long expectedParityFileSize = numStripes * blockSize * codec.parityLength;
        long expectedPartialParityBlocks = (sReader.stripeEndIdx - sReader.stripeStartIdx) * codec.parityLength;
        long expectedPartialParityFileSize = expectedPartialParityBlocks * blockSize;

        // Create a tmp file to which we will write first.
        String jobID = RaidNode.getJobID(jobConf);
        Path tmpDir = new Path(codec.tmpParityDirectory, jobID);
        if (!parityFs.mkdirs(tmpDir)) {
            throw new IOException("Could not create tmp dir " + tmpDir);
        }

        String partialParityName = "partial_" + MD5Hash.digest(srcFile.toUri().getPath()) + "_"
                + ec.srcStat.getModificationTime() + "_" + ec.encodingUnit + "_" + ec.encodingId;
        Path partialParityDir = new Path(tmpDir, partialParityName);
        Path tmpPartialParityDir = new Path(partialParityDir, "tmp");
        Path finalPartialParityDir = new Path(partialParityDir, "final");
        if (!parityFs.mkdirs(partialParityDir)) {
            throw new IOException("Could not create partial parity directory " + partialParityDir);
        }
        // If we write a parity for a large directory, 
        // Use 3 replicas to guarantee the durability by default
        short tmpRepl = (short) conf.getInt(RaidNode.RAID_PARITY_INITIAL_REPL_KEY,
                RaidNode.DEFAULT_RAID_PARITY_INITIAL_REPL);

        Path finalTmpParity = null;
        /**
         * To support retriable encoding, we use three checkpoints to represent
         * the last success state. 
         * 1. isEncoded: Set to true when partial partiy is generated
         * 2. isRenamed: Set to true when all partial parity are generated and
         *               tmpPartialParityDir is moved to finalPartialParityDir
         * 3. isConcated: Set to true when partial parities are concatenated into
         *                a final parity. 
         */
        if (!ec.isConcated) {
            if (!ec.isEncoded) {
                if (!parityFs.mkdirs(tmpPartialParityDir)) {
                    throw new IOException("Could not create " + tmpPartialParityDir);
                }
                Path partialTmpParity = new Path(tmpPartialParityDir, Long.toString(sReader.getCurrentStripeIdx()));
                LOG.info("Encoding partial parity " + partialTmpParity);
                if (!encodeTmpParityFile(jobConf, sReader, dfs, partialTmpParity, parityFile, tmpRepl, blockSize,
                        expectedPartialParityBlocks, expectedPartialParityFileSize, reporter)) {
                    return false;
                }
                LOG.info("Encoded partial parity " + partialTmpParity);
            }
            ec.isEncoded = true;
            long expectedNum = (long) Math.ceil(numStripes * 1.0 / ec.encodingUnit);
            if (!ec.isRenamed) {
                if (!finishAllPartialEncoding(parityFs, tmpPartialParityDir, expectedNum)) {
                    return false;
                }
                InjectionHandler.processEventIO(InjectionEvent.RAID_ENCODING_FAILURE_RENAME_FILE);
                // Move the directory to final
                if (!dfs.rename(tmpPartialParityDir, finalPartialParityDir)) {
                    LOG.info("Fail to rename " + tmpPartialParityDir + " to " + finalPartialParityDir);
                    return false;
                }
                LOG.info("Renamed " + tmpPartialParityDir + " to " + finalPartialParityDir);
                ec.isRenamed = true;
            }
            FileStatus[] stats = parityFs.listStatus(finalPartialParityDir);
            // Verify partial parities are correct
            Vector<Path> partialPaths = getPartialPaths((int) ec.encodingUnit, (int) expectedNum, stats, codec,
                    numStripes);
            finalTmpParity = partialPaths.get(0);
            InjectionHandler.processEventIO(InjectionEvent.RAID_ENCODING_FAILURE_CONCAT_FILE);
            if (partialPaths.size() > 1) {
                Path[] restPaths = partialPaths.subList(1, partialPaths.size())
                        .toArray(new Path[partialPaths.size() - 1]);
                try {
                    // Concat requires source and target files are in the same directory
                    dfs.concat(finalTmpParity, restPaths, true);
                    LOG.info("Concated " + partialPaths.size() + " files into " + finalTmpParity);

                } catch (IOException ioe) {
                    // Maybe other tasks already finish concating. 
                    LOG.info("Fail to concat " + partialPaths.size() + " files into " + finalTmpParity, ioe);
                    throw ioe;
                }
            }
            ec.isConcated = true;
        } else {
            FileStatus[] stats = parityFs.listStatus(finalPartialParityDir);
            if (stats == null || stats.length == 0) {
                return false;
            }
            if (stats.length > 1) {
                throw new IOException("We shouldn't have more than 1 files under" + finalPartialParityDir);
            }
            finalTmpParity = stats[0].getPath();
        }
        FileStatus tmpStat = parityFs.getFileStatus(finalTmpParity);
        if (tmpStat.getBlockSize() != blockSize) {
            throw new IOException("Expected parity block size " + blockSize + " does not match actual "
                    + tmpStat.getBlockSize() + " in path " + finalTmpParity);
        }
        if (tmpStat.getLen() != expectedParityFileSize) {
            throw new IOException("Expected parity size " + expectedParityFileSize + " does not match actual "
                    + tmpStat.getLen() + " in path " + finalTmpParity);
        }
        if (ec.srcStripes == null && stripeStore != null) {
            InjectionHandler.processEventIO(InjectionEvent.RAID_ENCODING_FAILURE_GET_SRC_STRIPES);
            ec.srcStripes = getSrcStripes(jobConf, dfs, srcFile, codec, numStripes, sReader, reporter);
            if (ec.srcStripes == null) {
                LOG.error("Cannot get srcStripes for " + srcFile);
                return false;
            }
        }

        // delete destination if exists
        if (dfs.exists(parityFile)) {
            dfs.delete(parityFile, false);
        }
        dfs.mkdirs(parityFile.getParent());
        if (!dfs.rename(finalTmpParity, parityFile)) {
            String msg = "Unable to rename file " + finalTmpParity + " to " + parityFile;
            throw new IOException(msg);
        }
        LOG.info("Wrote parity file " + parityFile);

        if (stripeStore != null) {
            this.writeToStripeStore(ec.srcStripes, dfs, fs, srcFile, parityFs, parityFile, expectedParityFileSize,
                    reporter, finalTmpParity);
        }
        if (tmpRepl != parityRepl) {
            dfs.setReplication(parityFile, parityRepl);
            LOG.info("Reduce replication of " + parityFile + " to " + parityRepl);
        }
        dfs.delete(partialParityDir, true);
        return true;
    }

    /**
     * Recovers a corrupt block in a parity file to a local file.
     *
     * The encoder generates codec.parityLength parity blocks for a source file stripe.
     * Since we want only one of the parity blocks, this function creates
     * null outputs for the blocks to be discarded.
     *
     * @param fs The filesystem in which both srcFile and parityFile reside.
     * @param srcStat fileStatus of The source file.
     * @param blockSize The block size for the parity files.
     * @param corruptOffset The location of corruption in the parity file.
     * @param out The destination for the reovered block.
     * @param progress A reporter for progress.
     */
    public CRC32 recoverParityBlockToStream(FileSystem fs, FileStatus srcStat, long blockSize, Path parityFile,
            long corruptOffset, OutputStream out, Progressable progress) throws IOException {
        LOG.info("Recovering parity block" + parityFile + ":" + corruptOffset);
        Path srcFile = srcStat.getPath();
        // Get the start offset of the corrupt block.
        corruptOffset = (corruptOffset / blockSize) * blockSize;
        // Output streams to each block in the parity file stripe.
        OutputStream[] outs = new OutputStream[codec.parityLength];
        long indexOfCorruptBlockInParityStripe = (corruptOffset / blockSize) % codec.parityLength;
        LOG.info("Index of corrupt block in parity stripe: " + indexOfCorruptBlockInParityStripe);
        CRC32[] crcOuts = null;
        if (checksumStore != null) {
            crcOuts = new CRC32[codec.parityLength];
        }
        // Create a real output stream for the block we want to recover,
        // and create null streams for the rest.
        for (int i = 0; i < codec.parityLength; i++) {
            if (indexOfCorruptBlockInParityStripe == i) {
                outs[i] = out;
                if (checksumStore != null) {
                    crcOuts[i] = new CRC32();
                }
            } else {
                outs[i] = new NullOutputStream();
            }
        }
        // Get the stripe index and start offset of stripe.
        long stripeIdx = corruptOffset / (codec.parityLength * blockSize);
        StripeReader sReader = StripeReader.getStripeReader(codec, conf, blockSize, fs, stripeIdx, srcStat);
        // Get input streams to each block in the source file stripe.
        assert sReader.hasNext() == true;
        InputStream[] blocks = sReader.getNextStripeInputs().getInputs();
        LOG.info("Starting recovery by using source stripe " + srcFile + ": stripe " + stripeIdx);

        try {
            // Read the data from the blocks and write to the parity file.
            encodeStripe(blocks, blockSize, outs, crcOuts, progress, false, null);
            if (checksumStore != null) {
                return crcOuts[(int) indexOfCorruptBlockInParityStripe];
            } else {
                return null;
            }
        } finally {
            RaidUtils.closeStreams(blocks);
        }
    }

    /**
     * Recovers a corrupt block in a parity file to an output stream.
     *
     * The encoder generates codec.parityLength parity blocks for a source file stripe.
     * Since there is only one output provided, some blocks are written out to
     * files before being written out to the output.
     *
     * @param blockSize The block size for the source/parity files.
     * @param out The destination for the reovered block.
     * @throws InterruptedException 
     */
    private void encodeFileToStream(StripeReader sReader, long blockSize, FSDataOutputStream out, CRC32[] crcOuts,
            Progressable reporter) throws IOException, InterruptedException {
        OutputStream[] tmpOuts = new OutputStream[codec.parityLength];
        // One parity block can be written directly to out, rest to local files.
        tmpOuts[0] = out;
        File[] tmpFiles = new File[codec.parityLength - 1];
        for (int i = 0; i < codec.parityLength - 1; i++) {
            tmpFiles[i] = File.createTempFile("parity", "_" + i);
            LOG.info("Created tmp file " + tmpFiles[i]);
            tmpFiles[i].deleteOnExit();
        }
        int finishedParityBlockIdx = 0;
        List<Integer> errorLocations = new ArrayList<Integer>();
        try {
            // Loop over stripe
            boolean redo;
            while (sReader.hasNext()) {
                reporter.progress();
                StripeInputInfo stripeInputInfo = null;
                InputStream[] blocks = null;
                // Create input streams for blocks in the stripe.
                long currentStripeIdx = sReader.getCurrentStripeIdx();
                stripeInputInfo = sReader.getNextStripeInputs();
                // The offset of first temporary output stream
                long encodeStartOffset = out.getPos();
                int retry = 3;
                do {
                    redo = false;
                    retry--;
                    try {
                        blocks = stripeInputInfo.getInputs();
                        CRC32[] curCRCOuts = new CRC32[codec.parityLength];

                        if (crcOuts != null) {
                            for (int i = 0; i < codec.parityLength; i++) {
                                crcOuts[finishedParityBlockIdx + i] = curCRCOuts[i] = new CRC32();
                            }
                        }
                        // Create output streams to the temp files.
                        for (int i = 0; i < codec.parityLength - 1; i++) {
                            tmpOuts[i + 1] = new FileOutputStream(tmpFiles[i]);
                        }
                        // Call the implementation of encoding.
                        encodeStripe(blocks, blockSize, tmpOuts, curCRCOuts, reporter, true, errorLocations);
                    } catch (IOException e) {
                        if (out.getPos() > encodeStartOffset) {
                            // Partial data is already written, throw the exception
                            InjectionHandler.processEventIO(InjectionEvent.RAID_ENCODING_PARTIAL_STRIPE_ENCODED);
                            throw e;
                        }
                        // try to fix the missing block in the stripe using stripe store.
                        if ((e instanceof BlockMissingException || e instanceof ChecksumException)
                                && codec.isDirRaid) {
                            if (retry <= 0) {
                                throw e;
                            }
                            redo = true;
                            CorruptBlockReconstructor constructor = new CorruptBlockReconstructor(conf);

                            Set<Path> srcPaths = new HashSet<Path>();
                            for (int idx : errorLocations) {
                                Path srcPath = stripeInputInfo.getSrcPaths()[idx];
                                if (srcPath != null) {
                                    srcPaths.add(srcPath);
                                }
                            }

                            for (Path srcPath : srcPaths) {
                                Decoder decoder = new Decoder(conf, codec);
                                decoder.connectToStore(srcPath);
                                LOG.info("In Encoding: try to reconstruct the file: " + srcPath);
                                // will throw exception if it fails to reconstruct the lost
                                // blocks.
                                constructor.processFile(srcPath, null, decoder, true, null);
                                LOG.info("In Encoding: finished to reconstruct the file: " + srcPath);
                            }
                        } else {
                            throw e;
                        }
                    } finally {
                        if (blocks != null) {
                            RaidUtils.closeStreams(blocks);
                        }
                    }
                    if (redo) {
                        // rebuild the inputs.
                        stripeInputInfo = sReader.getStripeInputs(currentStripeIdx);
                    }
                } while (redo);

                // Close output streams to the temp files and write the temp files
                // to the output provided.
                for (int i = 0; i < codec.parityLength - 1; i++) {
                    tmpOuts[i + 1].close();
                    tmpOuts[i + 1] = null;
                    InputStream in = new FileInputStream(tmpFiles[i]);
                    RaidUtils.copyBytes(in, out, writeBufs[i], blockSize);
                    reporter.progress();
                }
                finishedParityBlockIdx += codec.parityLength;
            }
        } finally {
            for (int i = 0; i < codec.parityLength - 1; i++) {
                if (tmpOuts[i + 1] != null) {
                    tmpOuts[i + 1].close();
                }
                tmpFiles[i].delete();
                LOG.info("Deleted tmp file " + tmpFiles[i]);
            }
        }
    }

    /**
     * Wraps around encodeStripeImpl in order to configure buffers.
     * Having buffers of the right size is extremely important. If the the
     * buffer size is not a divisor of the block size, we may end up reading
     * across block boundaries.
     */
    void encodeStripe(InputStream[] blocks, long blockSize, OutputStream[] outs, CRC32[] crcOuts,
            Progressable reporter, boolean computeSrcChecksum, List<Integer> errorLocations) throws IOException {
        configureBuffers(blockSize);
        int boundedBufferCapacity = 1;
        ParallelStreamReader parallelReader = new ParallelStreamReader(reporter, blocks, bufSize, parallelism,
                boundedBufferCapacity, blockSize, computeSrcChecksum, outs);
        parallelReader.start();
        try {
            for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
                ParallelStreamReader.ReadResult readResult = null;
                try {
                    readResult = parallelReader.getReadResult();
                } catch (InterruptedException e) {
                    throw new IOException("Interrupted while waiting for read result");
                }
                // Cannot tolerate any IO errors.
                IOException readEx = readResult.getException();
                if (readEx != null) {
                    if (errorLocations != null) {
                        errorLocations.clear();
                        for (int idx : readResult.getErrorIdx()) {
                            errorLocations.add(idx);
                        }
                    }
                    throw readEx;
                }

                code.encodeBulk(readResult.readBufs, writeBufs);
                reporter.progress();
                // Assume each byte is independently encoded
                int toWrite = (int) Math.min(blockSize - encoded, bufSize);

                // Now that we have some data to write, send it to the temp files.
                for (int i = 0; i < codec.parityLength; i++) {
                    outs[i].write(writeBufs[i], 0, toWrite);
                    if (crcOuts != null && crcOuts[i] != null) {
                        crcOuts[i].update(writeBufs[i], 0, toWrite);
                    }
                    reporter.progress();
                }
            }
            if (computeSrcChecksum) {
                parallelReader.collectSrcBlocksChecksum(checksumStore);
            }
        } finally {
            parallelReader.shutdown();
        }
    }
}