org.apache.hadoop.raid.DistBlockIntegrityMonitor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.raid.DistBlockIntegrityMonitor.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.net.InetSocketAddress;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.TreeMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.security.auth.login.LoginException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.tools.DFSck;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobInProgress;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.raid.BlockReconstructor.CorruptBlockReconstructor;
import org.apache.hadoop.raid.DistBlockIntegrityMonitor.Worker.LostFileInfo;
import org.apache.hadoop.raid.LogUtils.LOGRESULTS;
import org.apache.hadoop.raid.LogUtils.LOGTYPES;
import org.apache.hadoop.raid.RaidUtils.RaidInfo;
import org.apache.hadoop.raid.protocol.RaidProtocol;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;

/**
 * distributed block integrity monitor, uses parity to reconstruct lost files
 *
 * configuration options
 * raid.blockfix.filespertask       - number of files to reconstruct in a single
 *                                    map reduce task (i.e., at one mapper node)
 *
 * raid.blockfix.fairscheduler.pool - the pool to use for MR jobs
 *
 * raid.blockfix.maxpendingjobs    - maximum number of MR jobs
 *                                    running simultaneously
 */
public class DistBlockIntegrityMonitor extends BlockIntegrityMonitor {
    public final static String[] BLOCKFIXER_MAPREDUCE_KEYS = { "mapred.job.tracker", "cm.server.address",
            "cm.server.http.address", "mapred.job.tracker.corona.proxyaddr", "corona.proxy.job.tracker.rpcaddr",
            "corona.system.dir", "mapred.temp.dir" };
    public final static String BLOCKFIXER = "blockfixer";

    private static final String IN_FILE_SUFFIX = ".in";
    private static final String PART_PREFIX = "part-";
    static final Pattern LIST_CORRUPT_FILE_PATTERN = Pattern.compile("blk_-*\\d+\\s+(.*)");
    static final Pattern LIST_DECOMMISSION_FILE_PATTERN = Pattern.compile("blk_-*\\d+\\s+(.*)"); // For now this is the same because of how dfsck generates output 
    private static final String FILES_PER_TASK = "raid.blockfix.filespertask";
    public static final String MAX_PENDING_JOBS = "raid.blockfix.maxpendingjobs";
    private static final String HIGH_PRI_SCHEDULER_OPTION = "raid.blockfix.highpri.scheduleroption";
    private static final String LOW_PRI_SCHEDULER_OPTION = "raid.blockfix.lowpri.scheduleroption";
    private static final String LOWEST_PRI_SCHEDULER_OPTION = "raid.blockfix.lowestpri.scheduleroption";
    private static final String MAX_FIX_TIME_FOR_FILE = "raid.blockfix.max.fix.time.for.file";
    private static final String LOST_FILES_LIMIT = "raid.blockfix.corruptfiles.limit";
    private static final String RAIDNODE_BLOCK_FIXER_SCAN_NUM_THREADS_KEY = "raid.block.fixer.scan.threads";
    private static final int DEFAULT_BLOCK_FIXER_SCAN_NUM_THREADS = 5;
    private int blockFixerScanThreads = DEFAULT_BLOCK_FIXER_SCAN_NUM_THREADS;
    // The directories checked by the corrupt file monitor, seperate by comma
    public static final String RAIDNODE_BLOCK_FIX_SUBMISSION_INTERVAL_KEY = "raid.block.fix.submission.interval";
    private static final long DEFAULT_BLOCK_FIX_SUBMISSION_INTERVAL = 5 * 1000;
    public static final String RAIDNODE_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL_KEY = "raid.block.fix.scan.submission.interval";
    private static final long DEFAULT_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL = 5 * 1000;
    public static final String RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED_KEY = "raid.max.num.detection.time.collected";
    public static final int DEFAULT_RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED = 100;

    public enum UpdateNumFilesDropped {
        SET, ADD
    };

    // default number of files to reconstruct in a task
    private static final long DEFAULT_FILES_PER_TASK = 10L;

    private static final int TASKS_PER_JOB = 50;

    // default number of files to reconstruct simultaneously
    private static final long DEFAULT_MAX_PENDING_JOBS = 100L;

    private static final long DEFAULT_MAX_FIX_TIME_FOR_FILE = 4 * 60 * 60 * 1000; // 4 hrs.

    private static final int DEFAULT_LOST_FILES_LIMIT = 200000;
    public static final String FAILED_FILE = "failed";
    public static final String SIMULATION_FAILED_FILE = "simulation_failed";

    protected static final Log LOG = LogFactory.getLog(DistBlockIntegrityMonitor.class);

    private static final String CORRUPT_FILE_DETECT_TIME = "corrupt_detect_time";

    // number of files to reconstruct in a task
    private long filesPerTask;

    // number of files to reconstruct simultaneously
    final private long maxPendingJobs;

    final private long maxFixTimeForFile;

    final private int lostFilesLimit;

    private static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");

    private Worker corruptionWorker = new CorruptionWorker();
    private Worker decommissioningWorker = new DecommissioningWorker();
    private Runnable corruptFileCounterWorker = new CorruptFileCounter();

    static enum RaidCounter {
        FILES_SUCCEEDED, FILES_FAILED, FILES_NOACTION, BLOCK_FIX_SIMULATION_FAILED, BLOCK_FIX_SIMULATION_SUCCEEDED, FILE_FIX_NUM_READBYTES_REMOTERACK
    }

    static enum CorruptFileStatus {
        POTENTIALLY_CORRUPT, RAID_UNRECOVERABLE, NOT_RAIDED_UNRECOVERABLE, NOT_EXIST, RECOVERABLE
    }

    static enum Priority {
        HIGH(HIGH_PRI_SCHEDULER_OPTION, 2), LOW(LOW_PRI_SCHEDULER_OPTION, 1), LOWEST(LOWEST_PRI_SCHEDULER_OPTION,
                0);

        public final String configOption;
        private final int underlyingValue;

        private Priority(String s, int value) {
            configOption = s;
            underlyingValue = value;
        }

        public boolean higherThan(Priority other) {
            return (underlyingValue > other.underlyingValue);
        }
    }

    static public class TrackingUrlInfo {
        String trackingUrl;
        long insertTime;

        public TrackingUrlInfo(String newUrl, long newTime) {
            trackingUrl = newUrl;
            insertTime = newTime;
        }
    }

    /**
     * Hold information about a failed file with task id
     */
    static public class FailedFileInfo {
        String taskId;
        LostFileInfo fileInfo;

        public FailedFileInfo(String newTaskId, LostFileInfo newFileInfo) {
            this.taskId = newTaskId;
            this.fileInfo = newFileInfo;
        }
    }

    public DistBlockIntegrityMonitor(Configuration conf) throws Exception {
        super(conf);
        filesPerTask = DistBlockIntegrityMonitor.getFilesPerTask(getConf());
        maxPendingJobs = DistBlockIntegrityMonitor.getMaxPendingJobs(getConf());
        maxFixTimeForFile = DistBlockIntegrityMonitor.getMaxFixTimeForFile(getConf());
        lostFilesLimit = DistBlockIntegrityMonitor.getLostFilesLimit(getConf());
    }

    public static void updateBlockFixerMapreduceConfigs(Configuration conf, String suffix) {
        for (String configKey : BLOCKFIXER_MAPREDUCE_KEYS) {
            String newKey = configKey + "." + suffix;
            String value = conf.get(newKey);
            if (value != null) {
                conf.set(configKey, value);
            }
        }
    }

    /**
     * determines how many files to reconstruct in a single task
     */
    protected static long getFilesPerTask(Configuration conf) {
        return conf.getLong(FILES_PER_TASK, DEFAULT_FILES_PER_TASK);

    }

    /**
     * determines how many files to reconstruct simultaneously
     */
    protected static long getMaxPendingJobs(Configuration conf) {
        return conf.getLong(MAX_PENDING_JOBS, DEFAULT_MAX_PENDING_JOBS);
    }

    protected static long getMaxFixTimeForFile(Configuration conf) {
        return conf.getLong(MAX_FIX_TIME_FOR_FILE, DEFAULT_MAX_FIX_TIME_FOR_FILE);
    }

    protected static int getLostFilesLimit(Configuration conf) {
        return conf.getInt(LOST_FILES_LIMIT, DEFAULT_LOST_FILES_LIMIT);
    }

    // Return true if succeed to start one job
    public static Job startOneJob(Worker newWorker, Priority pri, Set<String> jobFiles, long detectTime,
            AtomicLong numFilesSubmitted, AtomicLong lastCheckingTime, long maxPendingJobs)
            throws IOException, InterruptedException, ClassNotFoundException {
        if (lastCheckingTime != null) {
            lastCheckingTime.set(System.currentTimeMillis());
        }
        String startTimeStr = dateFormat.format(new Date());
        String jobName = newWorker.JOB_NAME_PREFIX + "." + newWorker.jobCounter + "." + pri + "-pri" + "."
                + startTimeStr;
        Job job = null;
        synchronized (jobFiles) {
            if (jobFiles.size() == 0) {
                return null;
            }
            newWorker.jobCounter++;

            synchronized (newWorker.jobIndex) {
                if (newWorker.jobIndex.size() >= maxPendingJobs) {
                    // full 
                    return null;
                }
                job = newWorker.startJob(jobName, jobFiles, pri, detectTime);
            }
            numFilesSubmitted.addAndGet(jobFiles.size());
            jobFiles.clear();

        }
        return job;
    }

    public abstract class Worker implements Runnable {

        protected Map<String, LostFileInfo> fileIndex = Collections
                .synchronizedMap(new HashMap<String, LostFileInfo>());
        protected Map<JobID, TrackingUrlInfo> idToTrakcingUrlMap = Collections
                .synchronizedMap(new HashMap<JobID, TrackingUrlInfo>());
        protected Map<Job, List<LostFileInfo>> jobIndex = Collections
                .synchronizedMap(new HashMap<Job, List<LostFileInfo>>());
        protected Map<Job, List<FailedFileInfo>> failJobIndex = new HashMap<Job, List<FailedFileInfo>>();
        protected Map<Job, List<FailedFileInfo>> simFailJobIndex = new HashMap<Job, List<FailedFileInfo>>();

        private long jobCounter = 0;
        private AtomicInteger numJobsRunning = new AtomicInteger(0);

        protected AtomicLong numFilesDropped = new AtomicLong(0);

        volatile BlockIntegrityMonitor.Status lastStatus = null;
        AtomicLong recentNumFilesSucceeded = new AtomicLong();
        AtomicLong recentNumFilesFailed = new AtomicLong();
        AtomicLong recentSlotSeconds = new AtomicLong();
        AtomicLong recentNumBlockFixSimulationSucceeded = new AtomicLong();
        AtomicLong recentNumBlockFixSimulationFailed = new AtomicLong();
        AtomicLong recentNumReadBytesRemoteRack = new AtomicLong();
        Map<String, Long> recentLogMetrics = Collections.synchronizedMap(new HashMap<String, Long>());

        private static final int POOL_SIZE = 2;
        private final ExecutorService executor = Executors.newFixedThreadPool(POOL_SIZE);
        private static final int DEFAULT_CHECK_JOB_TIMEOUT_SEC = 600; //10 mins

        protected final Log LOG;
        protected final Class<? extends BlockReconstructor> RECONSTRUCTOR_CLASS;
        protected final String JOB_NAME_PREFIX;

        protected Worker(Log log, Class<? extends BlockReconstructor> rClass, String prefix) {

            this.LOG = log;
            this.RECONSTRUCTOR_CLASS = rClass;
            this.JOB_NAME_PREFIX = prefix;
            Path workingDir = new Path(prefix);
            try {
                FileSystem fs = workingDir.getFileSystem(getConf());
                // Clean existing working dir
                fs.delete(workingDir, true);
            } catch (IOException ioe) {
                LOG.warn("Get exception when cleaning " + workingDir, ioe);
            }
        }

        public void shutdown() {
        }

        /**
         * runs the worker periodically
         */
        public void run() {
            try {
                while (running) {
                    try {
                        updateStatus();
                        checkAndReconstructBlocks();
                    } catch (InterruptedException ignore) {
                        LOG.info("interrupted");
                    } catch (Exception e) {
                        // log exceptions and keep running
                        LOG.error(StringUtils.stringifyException(e));
                    } catch (Error e) {
                        LOG.error(StringUtils.stringifyException(e));
                        throw e;
                    }

                    try {
                        Thread.sleep(blockCheckInterval);
                    } catch (InterruptedException ignore) {
                        LOG.info("interrupted");
                    }
                }
            } finally {
                shutdown();
            }
        }

        /**
         * checks for lost blocks and reconstructs them (if any)
         */
        void checkAndReconstructBlocks() throws Exception {
            checkJobsWithTimeOut(DEFAULT_CHECK_JOB_TIMEOUT_SEC);
            int size = jobIndex.size();
            if (size >= maxPendingJobs) {
                LOG.info("Waiting for " + size + " pending jobs");
                return;
            }

            FileSystem fs = new Path("/").getFileSystem(getConf());
            Map<String, Integer> lostFiles = getLostFiles(fs);
            long detectTime = System.currentTimeMillis();
            computePrioritiesAndStartJobs(fs, lostFiles, detectTime);
        }

        /**
         * Handle a failed job.
         */
        private void failJob(Job job) {
            // assume no files have been reconstructed
            LOG.error("Job " + job.getID() + "(" + job.getJobName() + ") finished (failed)");
            // We do not change metrics here since we do not know for sure if file
            // reconstructing failed.
            for (LostFileInfo fileInfo : jobIndex.get(job)) {
                boolean failed = true;
                addToMap(job, job.getID().toString(), fileInfo, failJobIndex);
                fileInfo.finishJob(job.getJobName(), failed);
            }
            numJobsRunning.decrementAndGet();
        }

        private void addToMap(Job job, String taskId, LostFileInfo fileInfo, Map<Job, List<FailedFileInfo>> index) {
            List<FailedFileInfo> failFiles = null;
            if (!index.containsKey(job)) {
                failFiles = new ArrayList<FailedFileInfo>();
                index.put(job, failFiles);
            } else {
                failFiles = index.get(job);
            }
            failFiles.add(new FailedFileInfo(taskId, fileInfo));
        }

        /**
         * Handle a successful job.
         */
        private void succeedJob(Job job, long filesSucceeded, long filesFailed) throws IOException {
            String jobName = job.getJobName();
            LOG.info("Job " + job.getID() + "(" + jobName + ") finished (succeeded)");
            // we have to look at the output to check which files have failed
            HashMap<String, String> failedFiles = getFailedFiles(job);
            for (LostFileInfo fileInfo : jobIndex.get(job)) {
                String filePath = fileInfo.getFile().toString();
                String failedFilePath = DistBlockIntegrityMonitor.FAILED_FILE + "," + filePath;
                String simulatedFailedFilePath = DistBlockIntegrityMonitor.SIMULATION_FAILED_FILE + "," + filePath;
                if (failedFiles.containsKey(simulatedFailedFilePath)) {
                    String taskId = failedFiles.get(simulatedFailedFilePath);
                    addToMap(job, taskId, fileInfo, simFailJobIndex);
                    LOG.error("Simulation failed file: " + fileInfo.getFile());
                }
                if (failedFiles.containsKey(failedFilePath)) {
                    String taskId = failedFiles.get(failedFilePath);
                    addToMap(job, taskId, fileInfo, failJobIndex);
                    boolean failed = true;
                    fileInfo.finishJob(jobName, failed);
                } else {
                    // call succeed for files that have succeeded or for which no action
                    // was taken
                    boolean failed = false;
                    fileInfo.finishJob(jobName, failed);
                }
            }
            // report succeeded files to metrics
            this.recentNumFilesSucceeded.addAndGet(filesSucceeded);
            this.recentNumFilesFailed.addAndGet(filesFailed);
            if (filesSucceeded > 0) {
                lastSuccessfulFixTime = System.currentTimeMillis();
            }
            numJobsRunning.decrementAndGet();
        }

        /**
         * Check the jobs with timeout
         */
        void checkJobsWithTimeOut(int timeoutSec) throws ExecutionException {
            Future<Boolean> future = executor.submit(new Callable<Boolean>() {
                @Override
                public Boolean call() throws Exception {
                    checkJobs();
                    return true;
                }
            });
            try {
                future.get(timeoutSec, TimeUnit.SECONDS);
            } catch (TimeoutException e) {
                // ignore this.
                LOG.warn("Timeout when checking jobs' status.");
            } catch (InterruptedException e) {
                // ignore this.
                LOG.warn("checkJobs function is interrupted.");
            }
            if (!future.isDone()) {
                future.cancel(true);
            }
        }

        /**
         * checks if jobs have completed and updates job and file index
         * returns a list of failed files for restarting
         */
        void checkJobs() throws IOException {
            List<Job> nonRunningJobs = new ArrayList<Job>();
            synchronized (jobIndex) {
                Iterator<Job> jobIter = jobIndex.keySet().iterator();
                while (jobIter.hasNext()) {
                    Job job = jobIter.next();

                    try {
                        if (job.isComplete()) {
                            Counters ctrs = job.getCounters();
                            if (ctrs != null) {
                                // If we got counters, perform extra validation.
                                this.recentSlotSeconds.addAndGet(
                                        ctrs.findCounter(JobInProgress.Counter.SLOTS_MILLIS_MAPS).getValue()
                                                / 1000);

                                long filesSucceeded = ctrs.findCounter(RaidCounter.FILES_SUCCEEDED) != null
                                        ? ctrs.findCounter(RaidCounter.FILES_SUCCEEDED).getValue()
                                        : 0;
                                long filesFailed = ctrs.findCounter(RaidCounter.FILES_FAILED) != null
                                        ? ctrs.findCounter(RaidCounter.FILES_FAILED).getValue()
                                        : 0;
                                long filesNoAction = ctrs.findCounter(RaidCounter.FILES_NOACTION) != null
                                        ? ctrs.findCounter(RaidCounter.FILES_NOACTION).getValue()
                                        : 0;
                                long blockFixSimulationFailed = ctrs
                                        .findCounter(RaidCounter.BLOCK_FIX_SIMULATION_FAILED) != null
                                                ? ctrs.findCounter(RaidCounter.BLOCK_FIX_SIMULATION_FAILED)
                                                        .getValue()
                                                : 0;
                                long blockFixSimulationSucceeded = ctrs
                                        .findCounter(RaidCounter.BLOCK_FIX_SIMULATION_SUCCEEDED) != null
                                                ? ctrs.findCounter(RaidCounter.BLOCK_FIX_SIMULATION_SUCCEEDED)
                                                        .getValue()
                                                : 0;
                                this.recentNumBlockFixSimulationFailed.addAndGet(blockFixSimulationFailed);
                                this.recentNumBlockFixSimulationSucceeded.addAndGet(blockFixSimulationSucceeded);
                                long fileFixNumReadBytesRemoteRack = ctrs
                                        .findCounter(RaidCounter.FILE_FIX_NUM_READBYTES_REMOTERACK) != null
                                                ? ctrs.findCounter(RaidCounter.FILE_FIX_NUM_READBYTES_REMOTERACK)
                                                        .getValue()
                                                : 0;
                                this.recentNumReadBytesRemoteRack.addAndGet(fileFixNumReadBytesRemoteRack);
                                CounterGroup counterGroup = ctrs.getGroup(LogUtils.LOG_COUNTER_GROUP_NAME);
                                for (Counter ctr : counterGroup) {
                                    Long curVal = ctr.getValue();
                                    if (this.recentLogMetrics.containsKey(ctr.getName())) {
                                        curVal += this.recentLogMetrics.get(ctr.getName());
                                    }
                                    this.recentLogMetrics.put(ctr.getName(), curVal);
                                }

                                int files = jobIndex.get(job).size();

                                if (job.isSuccessful()
                                        && (filesSucceeded + filesFailed + filesNoAction == ((long) files))) {
                                    // job has processed all files
                                    succeedJob(job, filesSucceeded, filesFailed);
                                } else {
                                    failJob(job);
                                }
                            } else {
                                long filesSucceeded = jobIndex.get(job).size();
                                long filesFailed = 0;
                                if (job.isSuccessful()) {
                                    succeedJob(job, filesSucceeded, filesFailed);
                                } else {
                                    failJob(job);
                                }
                            }
                            jobIter.remove();
                            nonRunningJobs.add(job);
                        } else {
                            LOG.info("Job " + job.getID() + "(" + job.getJobName() + " still running");
                        }
                    } catch (Exception e) {
                        LOG.error(StringUtils.stringifyException(e));
                        failJob(job);
                        jobIter.remove();
                        nonRunningJobs.add(job);
                        try {
                            job.killJob();
                        } catch (Exception ee) {
                            LOG.error(StringUtils.stringifyException(ee));
                        }
                    }
                }
            }
            purgeFileIndex();
            cleanupNonRunningJobs(nonRunningJobs);
        }

        /**
         * Delete (best-effort) the input and output directories of jobs.
         * @param nonRunningJobs
         */
        private void cleanupNonRunningJobs(List<Job> nonRunningJobs) {
            for (Job job : nonRunningJobs) {
                Path outDir = null;
                try {
                    outDir = SequenceFileOutputFormat.getOutputPath(job);
                    outDir.getFileSystem(getConf()).delete(outDir, true);
                } catch (IOException e) {
                    LOG.warn("Could not delete output dir " + outDir, e);
                }
                Path[] inDir = null;
                try {
                    // We only create one input directory.
                    inDir = ReconstructionInputFormat.getInputPaths(job);
                    inDir[0].getFileSystem(getConf()).delete(inDir[0], true);
                } catch (IOException e) {
                    LOG.warn("Could not delete input dir " + inDir[0], e);
                }
            }
        }

        /**
         * determines which files have failed for a given job
         */
        private HashMap<String, String> getFailedFiles(Job job) throws IOException {
            HashMap<String, String> failedFiles = new HashMap<String, String>();

            Path outDir = SequenceFileOutputFormat.getOutputPath(job);
            FileSystem fs = outDir.getFileSystem(getConf());
            if (!fs.getFileStatus(outDir).isDir()) {
                throw new IOException(outDir.toString() + " is not a directory");
            }

            FileStatus[] files = fs.listStatus(outDir);

            for (FileStatus f : files) {
                Path fPath = f.getPath();
                if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) {
                    LOG.info("opening " + fPath.toString());
                    SequenceFile.Reader reader = new SequenceFile.Reader(fs, fPath, getConf());

                    Text key = new Text();
                    Text value = new Text();
                    while (reader.next(key, value)) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("key: " + key.toString() + " , value: " + value.toString());
                        }
                        failedFiles.put(key.toString(), value.toString());
                    }
                    reader.close();
                }
            }
            return failedFiles;
        }

        /**
         * purge expired jobs from the file index
         */
        private void purgeFileIndex() {
            Iterator<String> fileIter = fileIndex.keySet().iterator();
            long now = System.currentTimeMillis();
            while (fileIter.hasNext()) {
                String file = fileIter.next();
                if (fileIndex.get(file).isTooOld(now)) {
                    fileIter.remove();
                }
            }
            Iterator<TrackingUrlInfo> tuiIter = this.idToTrakcingUrlMap.values().iterator();
            while (tuiIter.hasNext()) {
                TrackingUrlInfo tui = tuiIter.next();
                if (System.currentTimeMillis() - tui.insertTime > maxWindowTime) {
                    tuiIter.remove();
                }
            }
        }

        // Start jobs for all the lost files.
        public void startJobs(Map<String, Priority> filePriorities, long detectTime)
                throws IOException, InterruptedException, ClassNotFoundException {
            AtomicLong numFilesSubmitted = new AtomicLong(0);
            for (Priority pri : Priority.values()) {
                Set<String> jobFiles = new HashSet<String>();
                for (Map.Entry<String, Priority> entry : filePriorities.entrySet()) {
                    // Check if file priority matches the current round.
                    if (entry.getValue() != pri) {
                        continue;
                    }
                    jobFiles.add(entry.getKey());
                    // Check if we have hit the threshold for number of files in a job.
                    if (jobFiles.size() == filesPerTask * TASKS_PER_JOB) {
                        boolean succeed = startOneJob(this, pri, jobFiles, detectTime, numFilesSubmitted, null,
                                maxPendingJobs) != null;
                        if (!succeed) {
                            this.numFilesDropped.set(filePriorities.size() - numFilesSubmitted.get());
                            LOG.debug("Submitted a job with max number of files allowed. " + "Num files dropped is "
                                    + this.numFilesDropped.get());
                            return;
                        }
                    }
                }
                if (jobFiles.size() > 0) {
                    boolean succeed = startOneJob(this, pri, jobFiles, detectTime, numFilesSubmitted, null,
                            maxPendingJobs) != null;
                    if (!succeed) {
                        this.numFilesDropped.set(filePriorities.size() - numFilesSubmitted.get());
                        LOG.debug("Submitted a job with max number of files allowed. " + "Num files dropped is "
                                + this.numFilesDropped.get());
                        return;
                    }
                }
            }
            this.numFilesDropped.set(filePriorities.size() - numFilesSubmitted.get());
        }

        /**
         * creates and submits a job, updates file index and job index
         */
        private Job startJob(String jobName, Set<String> lostFiles, Priority priority, long detectTime)
                throws IOException, InterruptedException, ClassNotFoundException {
            Path inDir = new Path(JOB_NAME_PREFIX + "/in/" + jobName);
            Path outDir = new Path(JOB_NAME_PREFIX + "/out/" + jobName);
            List<String> filesInJob = createInputFile(jobName, inDir, lostFiles);
            if (filesInJob.isEmpty())
                return null;

            Configuration jobConf = new Configuration(getConf());
            DistBlockIntegrityMonitor.updateBlockFixerMapreduceConfigs(jobConf, BLOCKFIXER);
            RaidUtils.parseAndSetOptions(jobConf, priority.configOption);
            Job job = new Job(jobConf, jobName);
            job.getConfiguration().set(CORRUPT_FILE_DETECT_TIME, Long.toString(detectTime));
            configureJob(job, this.RECONSTRUCTOR_CLASS);
            job.setJarByClass(getClass());
            job.setMapperClass(ReconstructionMapper.class);
            job.setNumReduceTasks(0);
            job.setInputFormatClass(ReconstructionInputFormat.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);

            ReconstructionInputFormat.setInputPaths(job, inDir);
            SequenceFileOutputFormat.setOutputPath(job, outDir);

            submitJob(job, filesInJob, priority);
            List<LostFileInfo> fileInfos = updateFileIndex(jobName, filesInJob, priority);
            // The implementation of submitJob() need not update jobIndex.
            // So check if the job exists in jobIndex before updating jobInfos.
            if (jobIndex.containsKey(job)) {
                jobIndex.put(job, fileInfos);
            }
            numJobsRunning.incrementAndGet();
            return job;
        }

        void submitJob(Job job, List<String> filesInJob, Priority priority)
                throws IOException, InterruptedException, ClassNotFoundException {
            LOG.info("Submitting job");
            DistBlockIntegrityMonitor.this.submitJob(job, filesInJob, priority, this.jobIndex,
                    this.idToTrakcingUrlMap);
        }

        /**
         * inserts new job into file index and job index
         */
        private List<LostFileInfo> updateFileIndex(String jobName, List<String> lostFiles, Priority priority) {
            List<LostFileInfo> fileInfos = new ArrayList<LostFileInfo>();

            for (String file : lostFiles) {
                LostFileInfo fileInfo = fileIndex.get(file);
                if (fileInfo != null) {
                    fileInfo.addJob(jobName, priority);
                } else {
                    fileInfo = new LostFileInfo(file, jobName, priority);
                    fileIndex.put(file, fileInfo);
                }
                fileInfos.add(fileInfo);
            }
            return fileInfos;
        }

        /**
         * creates the input file (containing the names of the files to be 
         * reconstructed)
         */
        private List<String> createInputFile(String jobName, Path inDir, Set<String> lostFiles) throws IOException {
            Path file = new Path(inDir, jobName + IN_FILE_SUFFIX);
            FileSystem fs = file.getFileSystem(getConf());
            SequenceFile.Writer fileOut = SequenceFile.createWriter(fs, getConf(), file, LongWritable.class,
                    Text.class);
            long index = 0L;

            List<String> filesAdded = new ArrayList<String>();
            int count = 0;
            for (String lostFileName : lostFiles) {
                fileOut.append(new LongWritable(index++), new Text(lostFileName));
                filesAdded.add(lostFileName);
                count++;

                if (index % filesPerTask == 0) {
                    fileOut.sync(); // create sync point to make sure we can split here
                }
            }

            fileOut.close();
            return filesAdded;
        }

        /**
         * Update {@link lastStatus} so that it can be viewed from outside
         */
        protected void updateStatus() {
            int highPriorityFiles = 0;
            int lowPriorityFiles = 0;
            int lowestPriorityFiles = 0;
            List<JobStatus> jobs = new ArrayList<JobStatus>();
            List<JobStatus> failJobs = new ArrayList<JobStatus>();
            List<JobStatus> simFailJobs = new ArrayList<JobStatus>();
            List<String> highPriorityFileNames = new ArrayList<String>();
            for (Map.Entry<String, LostFileInfo> e : fileIndex.entrySet()) {
                String fileName = e.getKey();
                LostFileInfo fileInfo = e.getValue();
                Priority pri = fileInfo.getHighestPriority();
                if (pri == Priority.HIGH) {
                    highPriorityFileNames.add(fileName);
                    highPriorityFiles++;
                } else if (pri == Priority.LOW) {
                    lowPriorityFiles++;
                } else if (pri == Priority.LOWEST) {
                    lowestPriorityFiles++;
                }
            }
            synchronized (jobIndex) {
                for (Job job : jobIndex.keySet()) {
                    String url = job.getTrackingURL();
                    String name = job.getJobName();
                    JobID jobId = job.getID();
                    jobs.add(new BlockIntegrityMonitor.JobStatus(jobId, name, url, jobIndex.get(job), null));
                }
                for (Job job : failJobIndex.keySet()) {
                    String url = job.getTrackingURL();
                    String name = job.getJobName();
                    JobID jobId = job.getID();
                    failJobs.add(
                            new BlockIntegrityMonitor.JobStatus(jobId, name, url, null, failJobIndex.get(job)));
                }
                for (Job simJob : simFailJobIndex.keySet()) {
                    String url = simJob.getTrackingURL();
                    String name = simJob.getJobName();
                    JobID jobId = simJob.getID();
                    simFailJobs.add(new BlockIntegrityMonitor.JobStatus(jobId, name, url, null,
                            simFailJobIndex.get(simJob)));
                }
            }
            lastStatus = new BlockIntegrityMonitor.Status(highPriorityFiles, lowPriorityFiles, lowestPriorityFiles,
                    jobs, highPriorityFileNames, failJobs, simFailJobs);
            updateRaidNodeMetrics();
        }

        public Status getStatus() {
            return lastStatus;
        }

        abstract void computePrioritiesAndStartJobs(FileSystem fs, Map<String, Integer> lostFiles, long detectTime)
                throws IOException, InterruptedException, ClassNotFoundException;

        protected abstract Map<String, Integer> getLostFiles(FileSystem fs) throws IOException;

        protected abstract void updateRaidNodeMetrics();

        /**
         * hold information about a lost file that is being reconstructed
         */
        class LostFileInfo {

            private String file;
            private List<String> jobNames; // Jobs reconstructing this file.
            private boolean done;
            private List<Priority> priorities;
            private long insertTime;

            public LostFileInfo(String file, String jobName, Priority priority) {
                this.file = file;
                this.jobNames = new ArrayList<String>();
                this.priorities = new ArrayList<Priority>();
                this.done = false;
                this.insertTime = System.currentTimeMillis();
                addJob(jobName, priority);
            }

            public boolean isTooOld(long now) {
                return now - insertTime > maxFixTimeForFile;
            }

            public boolean isDone() {
                return done;
            }

            public void addJob(String jobName, Priority priority) {
                this.jobNames.add(jobName);
                this.priorities.add(priority);
            }

            public Priority getHighestPriority() {
                Priority max = Priority.LOWEST;
                for (Priority p : priorities) {
                    if (p.higherThan(max))
                        max = p;
                }
                return max;
            }

            public String getFile() {
                return file;
            }

            /**
             * Updates state with the completion of a job. If all jobs for this file
             * are done, the file index is updated.
             */
            public void finishJob(String jobName, boolean failed) {
                int idx = jobNames.indexOf(jobName);
                if (idx == -1)
                    return;
                jobNames.remove(idx);
                priorities.remove(idx);
                LOG.info("reconstructing " + file + (failed ? " failed in " : " succeeded in ") + jobName);
                if (jobNames.isEmpty()) {
                    // All jobs dealing with this file are done,
                    // remove this file from the index
                    LostFileInfo removed = fileIndex.remove(file);
                    if (removed == null) {
                        LOG.error("trying to remove file not in file index: " + file);
                    }
                    done = true;
                }
            }
        }

        public String getTrackingUrl(JobID jobId) {
            TrackingUrlInfo tui = this.idToTrakcingUrlMap.get(jobId);
            if (tui == null) {
                return "";
            } else {
                return tui.trackingUrl;
            }
        }
    }

    /**
     * CorruptFileCounter is a periodical running daemon that keeps running raidfsck 
     * to get the number of the corrupt files under the give directories defined by 
     * RAIDNODE_CORRUPT_FILE_COUNTER_DIRECTORIES_KEY
     * @author weiyan
     *
     */
    public class CorruptFileCounter implements Runnable {
        private long filesWithMissingBlksCnt = 0;
        private Map<String, long[]> numStrpWithMissingBlksMap = new HashMap<String, long[]>();
        private Object counterMapLock = new Object();
        private long numNonRaidedMissingBlocks = 0;

        public CorruptFileCounter() {
            for (Codec codec : Codec.getCodecs()) {
                this.numStrpWithMissingBlksMap.put(codec.id, new long[codec.stripeLength + codec.parityLength]);
            }
        }

        public void run() {
            RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).initCorruptFilesMetrics(getConf());
            while (running) {
                TreeMap<String, Long> newUnRecoverableCounterMap = new TreeMap<String, Long>();
                Map<String, Long> newRecoverableCounterMap = new HashMap<String, Long>();
                long newfilesWithMissingBlksCnt = 0;
                String srcDir = "/";
                try {
                    ByteArrayOutputStream bout = new ByteArrayOutputStream();
                    PrintStream ps = new PrintStream(bout, true);
                    RaidShell shell = new RaidShell(getConf(), ps);
                    int res = ToolRunner.run(shell,
                            new String[] { "-fsck", srcDir, "-count", "-retNumStrpsMissingBlks" });
                    shell.close();
                    ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
                    BufferedReader reader = new BufferedReader(new InputStreamReader(bin));
                    String line = reader.readLine();
                    if (line == null) {
                        throw new IOException("Raidfsck fails without output");
                    }
                    Long corruptCount = Long.parseLong(line);
                    LOG.info("The number of corrupt files under " + srcDir + " is " + corruptCount);
                    newUnRecoverableCounterMap.put(srcDir, corruptCount);
                    line = reader.readLine();
                    if (line == null) {
                        throw new IOException("Raidfsck did not print number " + "of files with missing blocks");
                    }

                    // get files with Missing Blks
                    // fsck with '-count' prints this number in line2
                    long incfilesWithMissingBlks = Long.parseLong(line);
                    LOG.info("The number of files with missing blocks under " + srcDir + " is "
                            + incfilesWithMissingBlks);

                    long numRecoverableFiles = incfilesWithMissingBlks - corruptCount;
                    newRecoverableCounterMap.put(srcDir, numRecoverableFiles);
                    approximateNumRecoverableFiles = numRecoverableFiles;

                    // Add filesWithMissingBlks and numStrpWithMissingBlks only for "/"
                    // dir to avoid duplicates
                    Map<String, long[]> newNumStrpWithMissingBlksMap = new HashMap<String, long[]>();
                    newfilesWithMissingBlksCnt += incfilesWithMissingBlks;
                    // read the array for num stripes with missing blocks

                    line = reader.readLine();
                    if (line == null) {
                        throw new IOException(
                                "Raidfsck did not print the number of " + "missing blocks in non raided files");
                    }
                    long numNonRaided = Long.parseLong(line);

                    for (int i = 0; i < Codec.getCodecs().size(); i++) {
                        line = reader.readLine();
                        if (line == null) {
                            throw new IOException(
                                    "Raidfsck did not print the missing " + "block info for codec at index " + i);
                        }

                        Codec codec = Codec.getCodec(line);
                        long[] incNumStrpWithMissingBlks = new long[codec.stripeLength + codec.parityLength];
                        for (int j = 0; j < incNumStrpWithMissingBlks.length; j++) {
                            line = reader.readLine();
                            if (line == null) {
                                throw new IOException("Raidfsck did not print the array "
                                        + "for number stripes with missing blocks for index " + j);
                            }
                            incNumStrpWithMissingBlks[j] = Long.parseLong(line);
                            LOG.info("The number of stripes with missing blocks at index" + j + "under" + srcDir
                                    + " is " + incNumStrpWithMissingBlks[j]);
                        }
                        newNumStrpWithMissingBlksMap.put(codec.id, incNumStrpWithMissingBlks);
                    }
                    synchronized (counterMapLock) {
                        this.numNonRaidedMissingBlocks = numNonRaided;
                        for (String codeId : newNumStrpWithMissingBlksMap.keySet()) {
                            numStrpWithMissingBlksMap.put(codeId, newNumStrpWithMissingBlksMap.get(codeId));
                        }
                    }
                    reader.close();
                    bin.close();
                } catch (Exception e) {
                    LOG.error("Fail to count the corrupt files under " + srcDir, e);
                }
                synchronized (counterMapLock) {
                    this.filesWithMissingBlksCnt = newfilesWithMissingBlksCnt;
                }
                updateRaidNodeMetrics();
                if (!running) {
                    break;
                }
                try {
                    Thread.sleep(corruptFileCountInterval);
                } catch (InterruptedException ignore) {
                    LOG.info("interrupted");
                }
            }
        }

        public long getNumNonRaidedMissingBlks() {
            synchronized (counterMapLock) {
                return this.numNonRaidedMissingBlocks;
            }
        }

        public long getFilesWithMissingBlksCnt() {
            synchronized (counterMapLock) {
                return filesWithMissingBlksCnt;
            }
        }

        public long[] getNumStrpWithMissingBlksRS() {
            synchronized (counterMapLock) {
                return numStrpWithMissingBlksMap.get("rs");
            }
        }

        protected void updateRaidNodeMetrics() {
            RaidNodeMetrics rnm = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID);

            synchronized (counterMapLock) {
                rnm.numFilesWithMissingBlks.set(this.filesWithMissingBlksCnt);
                long[] numStrpWithMissingBlksRS = this.numStrpWithMissingBlksMap.get("rs");

                if (numStrpWithMissingBlksRS != null) {
                    rnm.numStrpsOneMissingBlk.set(numStrpWithMissingBlksRS[0]);
                    rnm.numStrpsTwoMissingBlk.set(numStrpWithMissingBlksRS[1]);
                    rnm.numStrpsThreeMissingBlk.set(numStrpWithMissingBlksRS[2]);
                    rnm.numStrpsFourMissingBlk.set(numStrpWithMissingBlksRS[3]);

                    long tmp_sum = 0;
                    for (int idx = 4; idx < numStrpWithMissingBlksRS.length; idx++) {
                        tmp_sum += numStrpWithMissingBlksRS[idx];
                    }
                    rnm.numStrpsFiveMoreMissingBlk.set(tmp_sum);
                }
            }
        }

        public String getMissingBlksHtmlTable() {
            synchronized (counterMapLock) {
                return RaidUtils.getMissingBlksHtmlTable(this.numNonRaidedMissingBlocks,
                        this.numStrpWithMissingBlksMap);
            }
        }
    }

    /**
     * Get the lost blocks numbers per stripe in the source file.
     */
    private Map<Integer, Integer> getLostStripes(Configuration conf, FileStatus stat, FileSystem fs)
            throws IOException {
        Map<Integer, Integer> lostStripes = new HashMap<Integer, Integer>();
        RaidInfo raidInfo = RaidUtils.getFileRaidInfo(stat, conf);
        if (raidInfo.codec == null) {
            // Can not find the parity file, the file is not raided.
            return lostStripes;
        }
        Codec codec = raidInfo.codec;

        if (codec.isDirRaid) {
            RaidUtils.collectDirectoryCorruptBlocksInStripe(conf, (DistributedFileSystem) fs, raidInfo, stat,
                    lostStripes);
        } else {
            RaidUtils.collectFileCorruptBlocksInStripe((DistributedFileSystem) fs, raidInfo, stat, lostStripes);
        }
        return lostStripes;
    }

    public class CorruptFile {
        public String path;
        public long detectTime;
        public volatile int numCorrupt;
        public volatile CorruptFileStatus fileStatus;
        public volatile long lastSubmitTime;

        public CorruptFile(String newPath, int newNumCorrupt, long newDetectTime) {
            this.path = newPath;
            this.numCorrupt = newNumCorrupt;
            this.fileStatus = CorruptFileStatus.POTENTIALLY_CORRUPT;
            this.lastSubmitTime = System.currentTimeMillis();
            this.detectTime = newDetectTime;
        }

        public String toString() {
            return fileStatus.name();
        }
    }

    public class MonitorSet {
        public ConcurrentHashMap<String, CorruptFile> toScanFiles;
        public ExecutorService executor;
        public BlockingQueue<Runnable> scanningQueue;

        public MonitorSet(final String monitorDir) {
            this.scanningQueue = new LinkedBlockingQueue<Runnable>();
            ThreadFactory factory = new ThreadFactory() {
                final AtomicInteger numThreads = new AtomicInteger();

                public Thread newThread(Runnable r) {
                    Thread t = new Thread(r);
                    t.setName("BlockFix-Scanner-" + monitorDir + "-" + numThreads.getAndIncrement());
                    return t;
                }
            };
            this.executor = new ThreadPoolExecutor(blockFixerScanThreads, blockFixerScanThreads, 0L,
                    TimeUnit.MILLISECONDS, scanningQueue, factory);
            this.toScanFiles = new ConcurrentHashMap<String, CorruptFile>();
        }
    }

    public class CorruptionWorker extends Worker {
        public static final String RAIDNODE_JOB_SUBMIT_NUM_THREADS_KEY = "raid.job.submit.num.threads";
        public static final int RAIDNODE_JOB_SUBMIT_NUM_THREADS_DEFAULT = 5;
        public String[] corruptMonitorDirs = null;
        public HashMap<String, MonitorSet> monitorSets;
        public final String OTHERS = "others";
        public HashMap<Priority, HashSet<String>> jobFilesMap;
        public HashMap<Priority, AtomicLong> lastCheckingTimes;
        public AtomicLong numFilesSubmitted = new AtomicLong(0);
        public AtomicLong totalFilesToSubmit = new AtomicLong(0);
        private long blockFixSubmissionInterval = DEFAULT_BLOCK_FIX_SUBMISSION_INTERVAL;
        private long blockFixScanSubmissionInterval = DEFAULT_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL;
        private int maxNumDetectionTime;
        // Collection of recent X samples;
        private long[] detectionTimeCollection;
        private int currPos;
        private long totalDetectionTime;
        private long totalCollecitonSize;
        private ExecutorService jobSubmitExecutor;
        private BlockingQueue<Runnable> jobSubmitQueue;
        private int jobSubmitThreads = RAIDNODE_JOB_SUBMIT_NUM_THREADS_DEFAULT;

        public CorruptionWorker() {
            super(LogFactory.getLog(CorruptionWorker.class), CorruptBlockReconstructor.class, "blockfixer");
            blockFixerScanThreads = getConf().getInt(RAIDNODE_BLOCK_FIXER_SCAN_NUM_THREADS_KEY,
                    DEFAULT_BLOCK_FIXER_SCAN_NUM_THREADS);
            this.blockFixSubmissionInterval = getConf().getLong(RAIDNODE_BLOCK_FIX_SUBMISSION_INTERVAL_KEY,
                    DEFAULT_BLOCK_FIX_SUBMISSION_INTERVAL);
            this.blockFixScanSubmissionInterval = getConf().getLong(RAIDNODE_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL_KEY,
                    DEFAULT_BLOCK_FIX_SCAN_SUBMISSION_INTERVAL);
            this.corruptMonitorDirs = DistBlockIntegrityMonitor.getCorruptMonitorDirs(getConf());
            this.monitorSets = new HashMap<String, MonitorSet>();
            for (String monitorDir : this.corruptMonitorDirs) {
                this.monitorSets.put(monitorDir, new MonitorSet(monitorDir));
            }
            this.monitorSets.put(OTHERS, new MonitorSet(OTHERS));
            this.jobFilesMap = new HashMap<Priority, HashSet<String>>();
            lastCheckingTimes = new HashMap<Priority, AtomicLong>();
            for (Priority priority : Priority.values()) {
                this.jobFilesMap.put(priority, new HashSet<String>());
                this.lastCheckingTimes.put(priority, new AtomicLong(System.currentTimeMillis()));
            }
            this.maxNumDetectionTime = getConf().getInt(RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED_KEY,
                    DEFAULT_RAIDNODE_MAX_NUM_DETECTION_TIME_COLLECTED);
            detectionTimeCollection = new long[maxNumDetectionTime];
            this.totalCollecitonSize = 0;
            this.totalDetectionTime = 0;
            this.currPos = 0;
            this.jobSubmitThreads = getConf().getInt(RAIDNODE_JOB_SUBMIT_NUM_THREADS_KEY,
                    RAIDNODE_JOB_SUBMIT_NUM_THREADS_DEFAULT);
            this.jobSubmitQueue = new LinkedBlockingQueue<Runnable>();
            ThreadFactory factory = new ThreadFactory() {
                final AtomicInteger numThreads = new AtomicInteger();

                public Thread newThread(Runnable r) {
                    Thread t = new Thread(r);
                    t.setName("BlockFix-Job-Submit-" + numThreads.getAndIncrement());
                    return t;
                }
            };
            this.jobSubmitExecutor = new ThreadPoolExecutor(this.jobSubmitThreads, this.jobSubmitThreads, 0L,
                    TimeUnit.MILLISECONDS, this.jobSubmitQueue, factory);
        }

        public void putDetectionTime(long detectionTime) {
            synchronized (detectionTimeCollection) {
                long oldVal = detectionTimeCollection[currPos];
                detectionTimeCollection[currPos] = detectionTime;
                totalDetectionTime += detectionTime - oldVal;
                currPos++;
                if (currPos == maxNumDetectionTime) {
                    currPos = 0;
                }
                if (totalCollecitonSize < maxNumDetectionTime) {
                    totalCollecitonSize++;
                }
            }
        }

        public double getNumDetectionsPerSec() {
            synchronized (detectionTimeCollection) {
                if (totalCollecitonSize == 0) {
                    return 0;
                } else {
                    return ((double) totalCollecitonSize) * 1000 / totalDetectionTime * blockFixerScanThreads;
                }
            }
        }

        @Override
        protected Map<String, Integer> getLostFiles(FileSystem fs) throws IOException {
            Map<String, Integer> lostFiles = new HashMap<String, Integer>();
            RemoteIterator<Path> cfb = fs.listCorruptFileBlocks(new Path("/"));
            while (cfb.hasNext()) {
                String lostFile = cfb.next().toString();
                Integer count = lostFiles.get(lostFile);
                if (count == null) {
                    lostFiles.put(lostFile, 1);
                } else {
                    lostFiles.put(lostFile, count + 1);
                }
            }
            LOG.info("ListCorruptFileBlocks returned " + lostFiles.size() + " files");
            RaidUtils.filterTrash(getConf(), lostFiles.keySet().iterator());
            LOG.info("getLostFiles returning " + lostFiles.size() + " files");

            return lostFiles;
        }

        public void addToScanSet(String p, int numCorrupt, String monitorDir,
                ConcurrentHashMap<String, CorruptFile> newScanSet, FileSystem fs, long detectTime)
                throws IOException {
            CorruptFile cf = new CorruptFile(p, numCorrupt, detectTime);
            MonitorSet monitorSet = monitorSets.get(monitorDir);
            CorruptFile oldCf = monitorSet.toScanFiles.get(p);
            FileCheckRunnable fcr = new FileCheckRunnable(cf, monitorSet, fs, detectTime, this);
            if (oldCf == null) {
                newScanSet.put(p, cf);
                monitorSet.toScanFiles.put(p, cf);
                // Check the file
                cf.lastSubmitTime = System.currentTimeMillis();
                monitorSet.executor.submit(fcr);
            } else {
                if (oldCf.numCorrupt == numCorrupt) {
                    newScanSet.put(p, oldCf);
                    if (System.currentTimeMillis() - oldCf.lastSubmitTime > this.blockFixScanSubmissionInterval) {
                        // if a block hasn't been checked for a while, check it again.
                        oldCf.lastSubmitTime = System.currentTimeMillis();
                        monitorSet.executor.submit(fcr);
                    }
                } else {
                    cf.detectTime = oldCf.detectTime;
                    newScanSet.put(p, cf);
                    cf.lastSubmitTime = System.currentTimeMillis();
                    monitorSet.executor.submit(fcr);
                }
            }
        }

        /**
         * In JobSubmitRunnable, a mapreduce job to fix files under tmpJobFiles will
         * be created and submitted to mapreduce cluster. 
         * If it fails to do that, numFilesDropped will be updated and files under
         * tmpJobFiles will be move back to the original jobFiles so that they could
         * be fixed in the next job.
         */
        public class JobSubmitRunnable implements Runnable {
            private final Priority priority;
            private final HashSet<String> tmpJobFiles;
            private final HashSet<String> jobFiles;
            private final long detectTime;
            private final AtomicLong lastCheckingTime;
            private final UpdateNumFilesDropped type;

            public JobSubmitRunnable(Priority newPriority, HashSet<String> tmpJobFiles,
                    HashSet<String> originalJobFiles, long newDetectTime, AtomicLong newLastCheckingTime,
                    UpdateNumFilesDropped newType) {
                this.priority = newPriority;
                this.tmpJobFiles = tmpJobFiles;
                this.jobFiles = originalJobFiles;
                this.detectTime = newDetectTime;
                this.lastCheckingTime = newLastCheckingTime;
                this.type = newType;
            }

            public void run() {
                boolean succeed = false;
                try {
                    succeed = startOneJob(CorruptionWorker.this, priority, tmpJobFiles, detectTime,
                            numFilesSubmitted, lastCheckingTime, maxPendingJobs) != null;
                } catch (Throwable ex) {
                    LOG.error("Get Error in blockSubmitRunnable", ex);
                } finally {
                    if (!succeed) {
                        if (type == UpdateNumFilesDropped.SET) {
                            numFilesDropped.set(tmpJobFiles.size());
                        } else if (type == UpdateNumFilesDropped.ADD) {
                            numFilesDropped.addAndGet(tmpJobFiles.size());
                        } else {
                            LOG.error("Hit an unexpected type:" + type.name());
                        }
                        // add back to original job files
                        synchronized (jobFiles) {
                            this.jobFiles.addAll(tmpJobFiles);
                        }
                    }
                }
            }
        }

        // Return used time 
        public long addToJobFilesMap(HashMap<Priority, HashSet<String>> jobFilesMap, Priority priority, String path,
                long detectTime) throws IOException, InterruptedException, ClassNotFoundException {
            long startTime = System.currentTimeMillis();
            HashSet<String> jobFiles = jobFilesMap.get(priority);

            synchronized (jobFiles) {
                if (!jobFiles.add(path)) {
                    return System.currentTimeMillis() - startTime;
                }
                totalFilesToSubmit.incrementAndGet();
                // Check if we have hit the threshold for number of files in a job.

                AtomicLong lastCheckingTime = lastCheckingTimes.get(priority);
                if ((jobFiles.size() >= filesPerTask * TASKS_PER_JOB)) {
                    // Collect enough files
                    this.asyncSubmitJob(jobFiles, priority, detectTime, UpdateNumFilesDropped.ADD);
                } else if (System.currentTimeMillis() - lastCheckingTime.get() > this.blockFixSubmissionInterval
                        && jobFiles.size() > 0) {
                    // Wait enough time
                    this.asyncSubmitJob(jobFiles, priority, detectTime, UpdateNumFilesDropped.SET);
                }
            }
            return System.currentTimeMillis() - startTime;
        }

        @Override
        public void shutdown() {
            for (MonitorSet ms : monitorSets.values()) {
                ms.executor.shutdownNow();
            }
            this.jobSubmitExecutor.shutdownNow();
        }

        public Map<String, Map<CorruptFileStatus, Long>> getCounterMap() {
            TreeMap<String, Map<CorruptFileStatus, Long>> results = new TreeMap<String, Map<CorruptFileStatus, Long>>();
            for (String monitorDir : monitorSets.keySet()) {
                MonitorSet ms = monitorSets.get(monitorDir);
                HashMap<CorruptFileStatus, Long> counters = new HashMap<CorruptFileStatus, Long>();
                for (CorruptFileStatus cfs : CorruptFileStatus.values()) {
                    counters.put(cfs, 0L);
                }
                for (CorruptFile cf : ms.toScanFiles.values()) {
                    Long counter = counters.get(cf.fileStatus);
                    if (counter == null) {
                        counter = 0L;
                    }
                    counters.put(cf.fileStatus, counter + 1);
                }
                results.put(monitorDir, counters);
            }
            return results;
        }

        public ArrayList<CorruptFile> getCorruptFileList(String monitorDir, CorruptFileStatus cfs) {
            ArrayList<CorruptFile> corruptFiles = new ArrayList<CorruptFile>();
            MonitorSet ms = monitorSets.get(monitorDir);
            if (ms == null) {
                return corruptFiles;
            }
            for (CorruptFile cf : ms.toScanFiles.values()) {
                if (cf.fileStatus == cfs) {
                    corruptFiles.add(cf);
                }
            }
            return corruptFiles;
        }

        public Map<String, Map<CorruptFileStatus, Long>> getCorruptFilesCounterMap() {
            return this.getCounterMap();
        }

        public class FileCheckRunnable implements Runnable {
            CorruptFile corruptFile;
            MonitorSet monitorSet;
            FileSystem fs;
            CorruptionWorker worker;
            long detectTime;

            public FileCheckRunnable(CorruptFile newCorruptFile, MonitorSet newMonitorSet, FileSystem newFs,
                    long newDetectTime, CorruptionWorker newWorker) {
                corruptFile = newCorruptFile;
                monitorSet = newMonitorSet;
                fs = newFs;
                detectTime = newDetectTime;
                worker = newWorker;
            }

            public void run() {
                long startTime = System.currentTimeMillis();
                try {
                    if (corruptFile.numCorrupt <= 0) {
                        // Not corrupt
                        return;
                    }
                    ConcurrentHashMap<String, CorruptFile> toScanFiles = monitorSet.toScanFiles;
                    // toScanFiles could be switched before the task get executed
                    CorruptFile cf = toScanFiles.get(corruptFile.path);
                    if (cf == null || cf.numCorrupt != corruptFile.numCorrupt) {
                        // Not exist or doesn't match
                        return;
                    }
                    FileStatus stat = null;
                    try {
                        stat = fs.getFileStatus(new Path(corruptFile.path));
                    } catch (FileNotFoundException fnfe) {
                        cf.fileStatus = CorruptFileStatus.NOT_EXIST;
                        return;
                    }
                    Codec codec = BlockIntegrityMonitor.isParityFile(corruptFile.path);
                    long addJobTime = 0;
                    if (codec == null) {
                        if (stat.getReplication() >= notRaidedReplication) {
                            cf.fileStatus = CorruptFileStatus.NOT_RAIDED_UNRECOVERABLE;
                            return;
                        }
                        if (BlockIntegrityMonitor.doesParityDirExist(fs, corruptFile.path)) {
                            Priority priority = Priority.LOW;
                            if (stat.getReplication() > 1) {
                                // If we have a missing block when replication > 1, it is high pri.
                                priority = Priority.HIGH;
                            } else {
                                // Replication == 1. Assume Reed Solomon parity exists.
                                // If we have more than one missing block when replication == 1, then
                                // high pri.
                                priority = (corruptFile.numCorrupt > 1) ? Priority.HIGH : Priority.LOW;
                            }
                            LostFileInfo fileInfo = fileIndex.get(corruptFile.path);
                            if (fileInfo == null || priority.higherThan(fileInfo.getHighestPriority())) {
                                addJobTime = addToJobFilesMap(jobFilesMap, priority, corruptFile.path, detectTime);
                            }
                        }
                    } else {
                        // Dikang: for parity files, we use the total numbers for now.
                        Priority priority = (corruptFile.numCorrupt > 1) ? Priority.HIGH
                                : (codec.parityLength == 1) ? Priority.HIGH : Priority.LOW;
                        LostFileInfo fileInfo = fileIndex.get(corruptFile.path);
                        if (fileInfo == null || priority.higherThan(fileInfo.getHighestPriority())) {
                            addJobTime = addToJobFilesMap(jobFilesMap, priority, corruptFile.path, detectTime);
                        }
                    }
                    boolean isFileCorrupt = RaidShell.isFileCorrupt((DistributedFileSystem) fs, stat, false,
                            getConf(), null, null);
                    if (isFileCorrupt) {
                        cf.fileStatus = CorruptFileStatus.RAID_UNRECOVERABLE;
                    } else {
                        cf.fileStatus = CorruptFileStatus.RECOVERABLE;
                    }
                    long elapseTime = System.currentTimeMillis() - startTime - addJobTime;
                    worker.putDetectionTime(elapseTime);
                } catch (Exception e) {
                    LOG.error("Get Exception ", e);
                }
            }
        }

        /**
         * Acquire a lock and dump files of jobFiles into a tmpJobFiles
         * Then it clears the jobFiles and submits a jobSubmitRunnable to the thread pool 
         * to submit a mapreduce job in the background. 
         * No need to wait for job submission to finish.
         */
        void asyncSubmitJob(HashSet<String> jobFiles, Priority pri, long detectTime, UpdateNumFilesDropped type)
                throws IOException {
            synchronized (jobFiles) {
                if (jobFiles.size() == 0)
                    return;
                HashSet<String> tmpJobFiles = new HashSet<String>();
                tmpJobFiles.addAll(jobFiles);
                jobFiles.clear();
                JobSubmitRunnable jsr = new JobSubmitRunnable(pri, tmpJobFiles, jobFiles, detectTime,
                        lastCheckingTimes.get(pri), type);
                this.jobSubmitExecutor.submit(jsr);
            }
        }

        @Override
        // Compute integer priority and start jobs. Urgency is indicated by higher numbers.
        void computePrioritiesAndStartJobs(FileSystem fs, Map<String, Integer> corruptFiles, long detectTime)
                throws IOException, InterruptedException, ClassNotFoundException {

            HashMap<String, ConcurrentHashMap<String, CorruptFile>> newToScanSet = new HashMap<String, ConcurrentHashMap<String, CorruptFile>>();
            // Include "others"
            for (String monitorDir : this.monitorSets.keySet()) {
                newToScanSet.put(monitorDir, new ConcurrentHashMap<String, CorruptFile>());
            }
            numFilesSubmitted.set(0);
            totalFilesToSubmit.set(0);
            for (Iterator<String> it = corruptFiles.keySet().iterator(); it.hasNext();) {
                String p = it.next();
                int numCorrupt = corruptFiles.get(p);
                // Filter through monitor dirs
                boolean match = false;
                for (String monitorDir : this.corruptMonitorDirs) {
                    if (p.startsWith(monitorDir)) {
                        match = true;
                        addToScanSet(p, numCorrupt, monitorDir, newToScanSet.get(monitorDir), fs, detectTime);
                    }
                }
                if (match == false) {
                    addToScanSet(p, numCorrupt, OTHERS, newToScanSet.get(OTHERS), fs, detectTime);
                }
            }
            // switch to new toScanSet
            for (String monitorDir : this.monitorSets.keySet()) {
                MonitorSet ms = this.monitorSets.get(monitorDir);
                ms.toScanFiles = newToScanSet.get(monitorDir);
            }
            for (Priority pri : Priority.values()) {
                HashSet<String> jobFiles = jobFilesMap.get(pri);
                if (System.currentTimeMillis() - lastCheckingTimes.get(pri).get() > this.blockFixSubmissionInterval
                        && jobFiles.size() > 0) {
                    this.asyncSubmitJob(jobFiles, pri, detectTime, UpdateNumFilesDropped.SET);
                }
            }
        }

        @Override
        protected void updateRaidNodeMetrics() {
            RaidNodeMetrics rnm = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID);

            rnm.corruptFilesHighPri.set(lastStatus.highPriorityFiles);
            rnm.corruptFilesLowPri.set(lastStatus.lowPriorityFiles);
            rnm.numFilesToFix.set(this.fileIndex.size());
            rnm.numFilesToFixDropped.set(this.numFilesDropped.get());

            // Flush statistics out to the RaidNode
            incrFilesFixed(this.recentNumFilesSucceeded.get());
            incrFileFixFailures(this.recentNumFilesFailed.get());
            incrNumBlockFixSimulationFailures(this.recentNumBlockFixSimulationFailed.get());
            incrNumBlockFixSimulationSuccess(this.recentNumBlockFixSimulationSucceeded.get());
            incrFileFixReadBytesRemoteRack(this.recentNumReadBytesRemoteRack.get());
            LogUtils.incrLogMetrics(this.recentLogMetrics);

            rnm.blockFixSlotSeconds.inc(this.recentSlotSeconds.get());
            this.recentNumFilesSucceeded.set(0);
            this.recentNumFilesFailed.set(0);
            this.recentSlotSeconds.set(0);
            this.recentNumBlockFixSimulationFailed.set(0);
            this.recentNumBlockFixSimulationSucceeded.set(0);
            this.recentNumReadBytesRemoteRack.set(0);
            this.recentLogMetrics.clear();

            Map<String, Map<CorruptFileStatus, Long>> corruptFilesCounterMap = this.getCounterMap();
            if (rnm.corruptFiles == null) {
                return;
            }
            for (String dir : this.corruptMonitorDirs) {
                if (corruptFilesCounterMap.containsKey(dir) && rnm.corruptFiles.containsKey(dir)) {
                    Map<CorruptFileStatus, Long> maps = corruptFilesCounterMap.get(dir);
                    Long raidUnrecoverable = maps.get(CorruptFileStatus.RAID_UNRECOVERABLE);
                    Long notRaidUnrecoverable = maps.get(CorruptFileStatus.NOT_RAIDED_UNRECOVERABLE);
                    if (raidUnrecoverable == null) {
                        raidUnrecoverable = 0L;
                    }
                    if (notRaidUnrecoverable == null) {
                        notRaidUnrecoverable = 0L;
                    }
                    rnm.corruptFiles.get(dir).set(raidUnrecoverable + notRaidUnrecoverable);
                } else {
                    rnm.corruptFiles.get(dir).set(-1L);
                }
            }
        }
    }

    public class DecommissioningWorker extends Worker {

        DecommissioningWorker() {
            super(LogFactory.getLog(DecommissioningWorker.class),
                    BlockReconstructor.DecommissioningBlockReconstructor.class, "blockcopier");
        }

        /**
         * gets a list of decommissioning files from the namenode
         * and filters out files that are currently being regenerated or
         * that were recently regenerated
         */
        @Override
        protected Map<String, Integer> getLostFiles(FileSystem fs) throws IOException {
            return DistBlockIntegrityMonitor.this.getLostFiles(LIST_DECOMMISSION_FILE_PATTERN,
                    new String[] { "-list-corruptfileblocks", "-list-decommissioningblocks", "-limit",
                            new Integer(lostFilesLimit).toString() });
        }

        @Override
        void computePrioritiesAndStartJobs(FileSystem fs, Map<String, Integer> decommissioningFiles,
                long detectTime) throws IOException, InterruptedException, ClassNotFoundException {

            Map<String, Priority> fileToPriority = new HashMap<String, Priority>(decommissioningFiles.size());

            for (String file : decommissioningFiles.keySet()) {

                // Replication == 1. Assume Reed Solomon parity exists.
                // Files with more than 4 blocks being decommissioned get a bump.
                // Otherwise, copying jobs have the lowest priority. 
                Priority priority = ((decommissioningFiles.get(file) > Codec.getCodec("rs").parityLength)
                        ? Priority.LOW
                        : Priority.LOWEST);

                LostFileInfo fileInfo = fileIndex.get(file);
                if (fileInfo == null || priority.higherThan(fileInfo.getHighestPriority())) {
                    fileToPriority.put(file, priority);
                }
            }
            LOG.info("Found " + fileToPriority.size() + " new lost files");

            startJobs(fileToPriority, detectTime);
        }

        @Override
        protected void updateRaidNodeMetrics() {
            RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).decomFilesLowPri
                    .set(lastStatus.highPriorityFiles);
            RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).decomFilesLowestPri
                    .set(lastStatus.lowPriorityFiles);
            RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).numFilesToCopy.set(fileIndex.size());

            incrFilesCopied(recentNumFilesSucceeded.get());
            incrFileCopyFailures(recentNumFilesFailed.get());
            incrNumBlockFixSimulationFailures(this.recentNumBlockFixSimulationFailed.get());
            incrNumBlockFixSimulationSuccess(this.recentNumBlockFixSimulationSucceeded.get());
            LogUtils.incrLogMetrics(this.recentLogMetrics);

            RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID).blockCopySlotSeconds
                    .inc(recentSlotSeconds.get());

            // Reset temporary values now that they've been flushed
            recentNumFilesSucceeded.set(0);
            recentNumFilesFailed.set(0);
            recentSlotSeconds.set(0);
            recentNumBlockFixSimulationFailed.set(0);
            recentNumBlockFixSimulationSucceeded.set(0);
            recentLogMetrics.clear();
        }

    }

    // ---- Methods which can be overridden by tests ----

    /**
     * Gets a list of lost files from the name node via DFSck
     * 
     * @param pattern A pattern matching a single file in DFSck's output
     * @param dfsckArgs Arguments to pass to DFSck
     * @return A map of lost files' filenames to num lost blocks for that file 
     */
    protected Map<String, Integer> getLostFiles(Pattern pattern, String[] dfsckArgs) throws IOException {

        Map<String, Integer> lostFiles = new HashMap<String, Integer>();
        BufferedReader reader = getLostFileReader(dfsckArgs);
        String line = reader.readLine(); // remove the header line
        while ((line = reader.readLine()) != null) {
            Matcher m = pattern.matcher(line);
            if (!m.find()) {
                continue;
            }

            String fileName = m.group(1).trim();
            Integer numLost = lostFiles.get(fileName);
            numLost = numLost == null ? 0 : numLost;
            numLost += 1;
            lostFiles.put(fileName, numLost);
        }
        LOG.info("FSCK returned " + lostFiles.size() + " files with args " + Arrays.toString(dfsckArgs));
        RaidUtils.filterTrash(getConf(), lostFiles.keySet().iterator());
        LOG.info("getLostFiles returning " + lostFiles.size() + " files with args " + Arrays.toString(dfsckArgs));
        return lostFiles;
    }

    private BufferedReader getLostFileReader(String[] dfsckArgs) throws IOException {

        ByteArrayOutputStream bout = new ByteArrayOutputStream();
        PrintStream ps = new PrintStream(bout, true);
        DFSck dfsck = new DFSck(getConf(), ps);
        try {
            dfsck.run(dfsckArgs);
        } catch (Exception e) {
            throw new IOException(e);
        }
        ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
        return new BufferedReader(new InputStreamReader(bin));
    }

    public void configureJob(Job job, Class<? extends BlockReconstructor> reconstructorClass) {

        ((JobConf) job.getConfiguration()).setUser(RaidNode.JOBUSER);
        ((JobConf) job.getConfiguration()).setClass(ReconstructionMapper.RECONSTRUCTOR_CLASS_TAG,
                reconstructorClass, BlockReconstructor.class);
    }

    void submitJob(Job job, List<String> filesInJob, Priority priority, Map<Job, List<LostFileInfo>> jobIndex,
            Map<JobID, TrackingUrlInfo> idToTrackingUrlMap)
            throws IOException, InterruptedException, ClassNotFoundException {
        job.submit();
        LOG.info("Job " + job.getID() + "(" + job.getJobName() + ") started");
        jobIndex.put(job, null);
        idToTrackingUrlMap.put(job.getID(), new TrackingUrlInfo(job.getTrackingURL(), System.currentTimeMillis()));
    }

    /**
     * returns the number of map reduce jobs running
     */
    public int jobsRunning() {
        return (corruptionWorker.numJobsRunning.get() + decommissioningWorker.numJobsRunning.get());
    }

    static class ReconstructionInputFormat extends SequenceFileInputFormat<LongWritable, Text> {

        protected static final Log LOG = LogFactory.getLog(ReconstructionMapper.class);

        /**
         * splits the input files into tasks handled by a single node
         * we have to read the input files to do this based on a number of 
         * items in a sequence
         */
        @Override
        public List<InputSplit> getSplits(JobContext job) throws IOException {
            long filesPerTask = DistBlockIntegrityMonitor.getFilesPerTask(job.getConfiguration());

            Path[] inPaths = getInputPaths(job);

            List<InputSplit> splits = new ArrayList<InputSplit>();

            long fileCounter = 0;

            for (Path inPath : inPaths) {

                FileSystem fs = inPath.getFileSystem(job.getConfiguration());

                if (!fs.getFileStatus(inPath).isDir()) {
                    throw new IOException(inPath.toString() + " is not a directory");
                }

                FileStatus[] inFiles = fs.listStatus(inPath);

                for (FileStatus inFileStatus : inFiles) {
                    Path inFile = inFileStatus.getPath();

                    if (!inFileStatus.isDir() && (inFile.getName().equals(job.getJobName() + IN_FILE_SUFFIX))) {

                        fileCounter++;
                        SequenceFile.Reader inFileReader = new SequenceFile.Reader(fs, inFile,
                                job.getConfiguration());

                        long startPos = inFileReader.getPosition();
                        long counter = 0;

                        // create an input split every filesPerTask items in the sequence
                        LongWritable key = new LongWritable();
                        Text value = new Text();
                        try {
                            while (inFileReader.next(key, value)) {
                                if (counter % filesPerTask == filesPerTask - 1L) {
                                    splits.add(new FileSplit(inFile, startPos,
                                            inFileReader.getPosition() - startPos, null));
                                    startPos = inFileReader.getPosition();
                                }
                                counter++;
                            }

                            // create input split for remaining items if necessary
                            // this includes the case where no splits were created by the loop
                            if (startPos != inFileReader.getPosition()) {
                                splits.add(new FileSplit(inFile, startPos, inFileReader.getPosition() - startPos,
                                        null));
                            }
                        } finally {
                            inFileReader.close();
                        }
                    }
                }
            }

            LOG.info("created " + splits.size() + " input splits from " + fileCounter + " files");

            return splits;
        }

        /**
         * indicates that input file can be split
         */
        @Override
        public boolean isSplitable(JobContext job, Path file) {
            return true;
        }
    }

    /**
     * Mapper for reconstructing stripes with lost blocks
     */
    static class ReconstructionMapper extends Mapper<LongWritable, Text, Text, Text> {

        protected static final Log LOG = LogFactory.getLog(ReconstructionMapper.class);

        public static final String RECONSTRUCTOR_CLASS_TAG = "hdfs.blockintegrity.reconstructor";
        private BlockReconstructor reconstructor;
        public RaidProtocol raidnode;
        private UnixUserGroupInformation ugi;
        RaidProtocol rpcRaidnode;
        private long detectTimeInput;
        private String taskId;

        void initializeRpc(Configuration conf, InetSocketAddress address) throws IOException {
            try {
                this.ugi = UnixUserGroupInformation.login(conf, true);
            } catch (LoginException e) {
                throw (IOException) (new IOException().initCause(e));
            }

            this.rpcRaidnode = RaidShell.createRPCRaidnode(address, conf, ugi);
            this.raidnode = RaidShell.createRaidnode(rpcRaidnode);
        }

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {

            super.setup(context);

            Configuration conf = context.getConfiguration();
            taskId = conf.get("mapred.task.id");
            Codec.initializeCodecs(conf);
            initializeRpc(conf, RaidNode.getAddress(conf));

            Class<? extends BlockReconstructor> reconstructorClass = context.getConfiguration()
                    .getClass(RECONSTRUCTOR_CLASS_TAG, null, BlockReconstructor.class);

            if (reconstructorClass == null) {
                LOG.error("No class supplied for reconstructor " + "(prop " + RECONSTRUCTOR_CLASS_TAG + ")");
                context.progress();
                return;
            }

            // We dynamically instantiate the helper based on the helperClass member
            try {
                Constructor<? extends BlockReconstructor> ctor = reconstructorClass
                        .getConstructor(new Class[] { Configuration.class });

                reconstructor = ctor.newInstance(conf);

            } catch (Exception ex) {
                throw new IOException(
                        "Could not instantiate a block reconstructor " + "based on class " + reconstructorClass,
                        ex);
            }

            detectTimeInput = Long.parseLong(conf.get("corrupt_detect_time"));
        }

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            RPC.stopProxy(rpcRaidnode);
        }

        /**
         * Reconstruct a stripe
         */
        @Override
        public void map(LongWritable key, Text fileText, Context context) throws IOException, InterruptedException {
            long sTime = System.currentTimeMillis();
            String fileStr = fileText.toString();
            Path file = new Path(fileStr);
            String prefix = "[" + fileStr + "]      ";
            LOG.info("");
            LOG.info(prefix + "============================= BEGIN =============================");
            LOG.info(prefix + "Reconstruct File: " + fileStr);
            LOG.info(prefix + "Block Missing Detection Time: " + dateFormat.format(detectTimeInput));
            long waitTime = sTime - detectTimeInput;
            LOG.info(prefix + "Scheduling Time: " + (waitTime / 1000) + " seconds");
            FileSystem fs = file.getFileSystem(context.getConfiguration());
            LogUtils.logWaitTimeMetrics(waitTime, getMaxPendingJobs(context.getConfiguration()),
                    getFilesPerTask(context.getConfiguration()), LOGTYPES.FILE_FIX_WAITTIME, fs, context);
            long recoveryTime = -1;

            try {
                boolean reconstructed = reconstructor.reconstructFile(file, context);
                if (reconstructed) {
                    recoveryTime = System.currentTimeMillis() - detectTimeInput;
                    context.getCounter(RaidCounter.FILES_SUCCEEDED).increment(1L);
                    LogUtils.logRaidReconstructionMetrics(LOGRESULTS.SUCCESS, 0, null, file, -1,
                            LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, fs, null, context, recoveryTime);
                    LOG.info(prefix + "File Reconstruction Time: " + ((System.currentTimeMillis() - sTime) / 1000)
                            + " seconds");
                    LOG.info(prefix + "Total Recovery Time: " + (recoveryTime / 1000) + " seconds");
                } else {
                    LOG.info(prefix + "File has already been fixed, No action");
                    context.getCounter(RaidCounter.FILES_NOACTION).increment(1L);
                }
            } catch (Throwable e) {
                LOG.error(prefix + "Reconstructing file " + file + " failed", e);
                LogUtils.logRaidReconstructionMetrics(LOGRESULTS.FAILURE, 0, null, file, -1,
                        LOGTYPES.OFFLINE_RECONSTRUCTION_FILE, fs, e, context, -1);
                recoveryTime = Integer.MAX_VALUE;
                // report file as failed
                context.getCounter(RaidCounter.FILES_FAILED).increment(1L);
                String outkey = DistBlockIntegrityMonitor.FAILED_FILE + "," + fileStr;
                context.write(new Text(outkey), new Text(taskId));
            } finally {
                if (recoveryTime > 0) {
                    // Send recoveryTime to raidnode
                    try {
                        raidnode.sendRecoveryTime(fileStr, recoveryTime, taskId);
                    } catch (Exception e) {
                        LOG.error(prefix + "Failed to send recovery time ", e);
                    }
                }
                LOG.info(prefix + "============================= END =============================");
                LOG.info("");
            }
            context.progress();
        }
    }

    /**
     * Get the status of the entire block integrity monitor.
     * The status returned represents the aggregation of the statuses of all the 
     * integrity monitor's components.
     * 
     * @return The status of the block integrity monitor 
     */
    @Override
    public BlockIntegrityMonitor.Status getAggregateStatus() {
        Status fixer = corruptionWorker.getStatus();
        Status copier = decommissioningWorker.getStatus();

        List<JobStatus> jobs = new ArrayList<JobStatus>();
        List<JobStatus> simFailedJobs = new ArrayList<JobStatus>();
        List<JobStatus> failedJobs = new ArrayList<JobStatus>();
        List<String> highPriFileNames = new ArrayList<String>();
        int numHighPriFiles = 0;
        int numLowPriFiles = 0;
        int numLowestPriFiles = 0;
        if (fixer != null) {
            jobs.addAll(fixer.jobs);
            simFailedJobs.addAll(fixer.simFailJobs);
            failedJobs.addAll(fixer.failJobs);
            if (fixer.highPriorityFileNames != null) {
                highPriFileNames.addAll(fixer.highPriorityFileNames);
            }
            numHighPriFiles += fixer.highPriorityFiles;
            numLowPriFiles += fixer.lowPriorityFiles;
            numLowestPriFiles += fixer.lowestPriorityFiles;
        }
        if (copier != null) {
            jobs.addAll(copier.jobs);
            simFailedJobs.addAll(copier.simFailJobs);
            failedJobs.addAll(copier.failJobs);
            if (copier.highPriorityFileNames != null) {
                highPriFileNames.addAll(copier.highPriorityFileNames);
            }
            numHighPriFiles += copier.highPriorityFiles;
            numLowPriFiles += copier.lowPriorityFiles;
            numLowestPriFiles += copier.lowestPriorityFiles;
        }

        return new Status(numHighPriFiles, numLowPriFiles, numLowestPriFiles, jobs, highPriFileNames, failedJobs,
                simFailedJobs);
    }

    public Worker getCorruptionMonitor() {
        return this.corruptionWorker;
    }

    @Override
    public Worker getDecommissioningMonitor() {
        return this.decommissioningWorker;
    }

    @Override
    public Runnable getCorruptFileCounter() {
        return this.corruptFileCounterWorker;
    }
}