org.apache.hadoop.raid.SmokeTestThread.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.raid.SmokeTestThread.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicLong;
import java.util.zip.CRC32;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.DistBlockIntegrityMonitor.Priority;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import java.util.HashSet;
import java.util.Set;

/*
 * In Smoke Test Runnable, raidnode will try to raid a small file 
 * and submit a small job to fix this file to verify blockfixing is working.
 * If not, it will shutdown the raidnode
 */
public class SmokeTestThread implements Callable<Boolean> {
    public static final Log LOG = LogFactory.getLog(SmokeTestThread.class);
    public static final String TEST_CODEC = "rs";
    public static final long NUM_SOURCE_BLOCKS = 3;
    public static final long BLOCK_SIZE = 512L;
    public static final long SLEEP_TIME = 3000L;
    public static final String SMOKE_TEST_TIMEOUT_KEY = "raid.smoke.test.timeout";
    public static final long DEFAULT_SMOKE_TEST_TIME_OUT = 120000L;
    public long timeOut = DEFAULT_SMOKE_TEST_TIME_OUT;
    public IOException ioe = null;
    public FileSystem fileSys = null;
    public String testFileDirectory = "/test/";
    public String testFileBase = testFileDirectory + "smoketest";
    public Random rand = new Random();
    public DistRaidNode distRaidNode = null;
    public CRC32 checksum = new CRC32();

    public SmokeTestThread(RaidNode rn) {
        distRaidNode = (DistRaidNode) rn;
        timeOut = distRaidNode.getConf().getLong(SMOKE_TEST_TIMEOUT_KEY, DEFAULT_SMOKE_TEST_TIME_OUT);
    }

    @Override
    public Boolean call() throws Exception {
        Path testPath = null;
        try {
            fileSys = FileSystem.get(distRaidNode.getConf());
            // Create a small file with 3 blocks
            String testFile = testFileBase + rand.nextLong();
            testPath = new Path(testFile);
            if (fileSys.exists(testPath)) {
                fileSys.delete(testPath, true);
            }
            long blockSize = BLOCK_SIZE;
            FSDataOutputStream stm = fileSys.create(testPath, true,
                    fileSys.getConf().getInt("io.file.buffer.size", 4096), (short) 3, blockSize);
            // Write 3 blocks.
            byte[] b = new byte[(int) blockSize];
            for (int i = 0; i < NUM_SOURCE_BLOCKS; i++) {
                rand.nextBytes(b);
                stm.write(b);
                checksum.update(b);
            }
            stm.close();
            LOG.info(
                    "[SMOKETEST] Created a test file: " + testFile + " with CRC32 checksum " + checksum.getValue());
            PolicyInfo info = new PolicyInfo(testFile, distRaidNode.getConf());
            info.setCodecId(TEST_CODEC);
            info.setSrcPath(testFileDirectory);
            info.setShouldRaid(true);
            info.setProperty("modTimePeriod", "0");
            info.setProperty("targetReplication", "1");
            info.setProperty("metaReplication", "1");
            FileStatus stat = fileSys.getFileStatus(testPath);
            ArrayList<FileStatus> fstats = new ArrayList<FileStatus>();
            fstats.add(stat);
            // Raid it using rs
            DistRaid dr = DistRaidNode.raidFiles(distRaidNode.getConf(), distRaidNode.jobMonitor, fstats, info);
            LOG.info("[SMOKETEST] RS Raid test file: " + testFile);
            if (dr == null) {
                throw new IOException("Failed to sart a raiding job");
            }
            long startTime = System.currentTimeMillis();
            while (!dr.checkComplete() && System.currentTimeMillis() - startTime < timeOut) {
                Thread.sleep(SLEEP_TIME);
            }
            if (!dr.checkComplete()) {
                throw new IOException("Failed to finish the raiding job in " + (timeOut / 1000) + " seconds");
            }
            if (!dr.successful()) {
                throw new IOException("Failed to raid the file " + testFile);
            }
            LOG.info("[SMOKETEST] Finish raiding test file: " + testFile);
            // Verify parity file exists
            Codec codec = Codec.getCodec(TEST_CODEC);
            Path parityPath = new Path(codec.getParityPrefix(), RaidNode.makeRelative(testPath));
            FileStatus parityStat = fileSys.getFileStatus(parityPath);
            long numParityBlocks = RaidNode.numBlocks(parityStat);
            long expectedNumParityBlocks = RaidNode.numStripes(NUM_SOURCE_BLOCKS, codec.stripeLength)
                    * codec.parityLength;
            if (numParityBlocks != expectedNumParityBlocks
                    || parityStat.getLen() != expectedNumParityBlocks * BLOCK_SIZE) {
                throw new IOException("[SMOKETEST] Parity file " + parityPath + " has " + numParityBlocks
                        + " blocks and " + parityStat.getLen() + " bytes, but we expect " + expectedNumParityBlocks
                        + " blocks and " + (expectedNumParityBlocks * BLOCK_SIZE) + " bytes");
            }
            LOG.info("[SMOKETEST] Verification of parity file " + parityPath + " succeeded");
            LocatedBlock[] blocks = new LocatedBlock[1];
            LocatedBlocks lbs = ((DistributedFileSystem) fileSys).getLocatedBlocks(testPath, 0, Integer.MAX_VALUE);
            // Corrupt the first block
            blocks[0] = lbs.get(0);
            ((DistributedFileSystem) fileSys).getClient().reportBadBlocks(blocks);
            LOG.info("[SMOKETEST] Finish corrupting the first block " + lbs.get(0).getBlock());
            // submit a job to "fix" it
            Set<String> jobFiles = new HashSet<String>();
            jobFiles.add(testFile);
            Job job = DistBlockIntegrityMonitor.startOneJob(
                    (DistBlockIntegrityMonitor.Worker) distRaidNode.blockIntegrityMonitor.getCorruptionMonitor(),
                    Priority.HIGH, jobFiles, System.currentTimeMillis(), new AtomicLong(0),
                    new AtomicLong(System.currentTimeMillis()), Integer.MAX_VALUE);
            startTime = System.currentTimeMillis();
            while (!job.isComplete() && System.currentTimeMillis() - startTime < timeOut) {
                Thread.sleep(SLEEP_TIME);
            }
            if (!job.isComplete()) {
                throw new IOException("Failed to finish the blockfixing job in " + (timeOut / 1000) + " seconds");
            }
            if (!job.isSuccessful()) {
                throw new IOException("Failed to fix the file " + testFile);
            }
            LOG.info("[SMOKETEST] Finish blockfixing test file: " + testFile);
            // wait for block is reported
            startTime = System.currentTimeMillis();
            while (((DistributedFileSystem) fileSys).getLocatedBlocks(testPath, 0, Integer.MAX_VALUE).get(0)
                    .isCorrupt() && System.currentTimeMillis() - startTime < timeOut) {
                Thread.sleep(SLEEP_TIME);
            }
            CRC32 newChk = new CRC32();
            FSDataInputStream readStm = fileSys.open(testPath);
            int num = 0;
            while (num >= 0) {
                num = readStm.read(b);
                if (num < 0) {
                    break;
                }
                newChk.update(b, 0, num);
            }
            stm.close();
            if (newChk.getValue() != checksum.getValue()) {
                throw new IOException(
                        "Fixed file's checksum " + newChk.getValue() + " != original one " + checksum.getValue());
            }
            LOG.info("[SMOKETEST] Verification of fixed test file: " + testFile);
            return true;
        } catch (IOException ex) {
            LOG.error("Get IOException in SmokeTestThread", ex);
            ioe = ex;
            return false;
        } catch (Throwable ex) {
            LOG.error("Get Error in SmokeTestThread", ex);
            ioe = new IOException(ex);
            return false;
        } finally {
            try {
                if (fileSys != null) {
                    fileSys.delete(testPath, true);
                }
            } catch (IOException ioe) {
                LOG.error("Get error during deletion", ioe);
            }
        }
    }
}