alluxio.cli.JournalCrashTest.java Source code

Java tutorial

Introduction

Here is the source code for alluxio.cli.JournalCrashTest.java

Source

/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.cli;

import alluxio.AlluxioURI;
import alluxio.Configuration;
import alluxio.PropertyKey;
import alluxio.RuntimeConstants;
import alluxio.client.WriteType;
import alluxio.client.file.FileSystem;
import alluxio.client.file.options.CreateFileOptions;
import alluxio.exception.AlluxioException;
import alluxio.exception.FileAlreadyExistsException;
import alluxio.util.CommonUtils;
import alluxio.util.io.PathUtils;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;

/**
 * Class to perform Journal crash test. The clients issue commands to the master, and the master
 * generates journal events. Check if the master can generate and reproduce the journal correctly.
 */
public final class JournalCrashTest {

    private JournalCrashTest() {
    } // prevent instantiation

    /**
     * The operation types to test.
     */
    enum ClientOpType {
        /**
         * Keep creating empty file.
         */
        CREATE_FILE,
        /**
         * Keep creating and deleting file.
         */
        CREATE_DELETE_FILE,
        /**
         * Keep creating and renaming file.
         */
        CREATE_RENAME_FILE,
    }

    /**
     * The client thread class. Each thread holds an Alluxio Client and keeps requesting to Master.
     */
    static class ClientThread extends Thread {
        /** Which type of operation this thread should do. */
        private final ClientOpType mOpType;
        /** The working directory of this thread on Alluxio. */
        private final String mWorkDir;

        /** Used for supervisor to stop this thread. */
        private boolean mIsStopped = false;
        /** The number of successfully operations. */
        private int mSuccessNum = 0;

        /**
         * @param workDir the working directory for this thread on Alluxio
         * @param opType the type of operation this thread should do
         */
        public ClientThread(String workDir, ClientOpType opType) {
            mOpType = opType;
            mWorkDir = workDir;
        }

        /**
         * @return the type of operation this thread should do
         */
        public ClientOpType getOpType() {
            return mOpType;
        }

        /**
         * @return the number of successfully operations
         */
        public int getSuccessNum() {
            return mSuccessNum;
        }

        /**
         * @return the working directory of this thread on Alluxio
         */
        public String getWorkDir() {
            return mWorkDir;
        }

        /**
         * Keeps requesting to Master until something crashes or fail to create. Records how many
         * operations are performed successfully.
         */
        @Override
        public void run() {
            // This infinity loop will be broken when the master is crashed and the client needs to stop.
            while (true) {
                synchronized (this) {
                    if (mIsStopped) {
                        break;
                    }
                }
                try {
                    AlluxioURI testURI = new AlluxioURI(mWorkDir + mSuccessNum);
                    if (ClientOpType.CREATE_FILE == mOpType) {
                        sFileSystem.createFile(testURI, sCreateFileOptions).close();
                    } else if (ClientOpType.CREATE_DELETE_FILE == mOpType) {
                        try {
                            sFileSystem.createFile(testURI, sCreateFileOptions).close();
                        } catch (AlluxioException e) {
                            // If file already exists, ignore it.
                            if (!(e instanceof FileAlreadyExistsException)) {
                                throw e;
                            }
                        }
                        sFileSystem.delete(testURI);
                    } else if (ClientOpType.CREATE_RENAME_FILE == mOpType) {
                        try {
                            sFileSystem.createFile(testURI, sCreateFileOptions).close();
                        } catch (AlluxioException e) {
                            // If file already exists, ignore it.
                            if (!(e instanceof FileAlreadyExistsException)) {
                                throw e;
                            }
                        }
                        sFileSystem.rename(testURI, new AlluxioURI(testURI + "-rename"));
                    }
                } catch (Exception e) {
                    // Since master may crash/restart for several times, so this exception is expected.
                    // Ignore the exception and still keep requesting to master.
                    continue;
                }
                mSuccessNum++;
                CommonUtils.sleepMs(100);
            }
        }

        /**
         * @param isStopped signal from supervisor to stop this thread
         */
        public synchronized void setIsStopped(boolean isStopped) {
            mIsStopped = isStopped;
        }
    }

    private static final Logger LOG = LoggerFactory.getLogger(JournalCrashTest.class);

    // The two Exit Codes are used to tell script if the test runs well.
    private static final int EXIT_FAILED = 1;
    private static final int EXIT_SUCCESS = 0;

    private static CreateFileOptions sCreateFileOptions = null;
    private static List<ClientThread> sClientThreadList = null;
    private static int sCreateDeleteClientNum;
    private static int sCreateFileClientNum;
    private static int sCreateRenameClientNum;
    /** The maximum time a master should ever be alive. */
    private static long sMaxAliveTimeMs;
    private static String sTestDir;
    /** The Alluxio Client. This can be shared by all the threads. */
    private static FileSystem sFileSystem = null;
    /** The total time to run this test. */
    private static long sTotalTimeMs;

    private static boolean checkStatus() throws Exception {
        // Connect to Master and check if all the test operations are reproduced by Master successfully.
        for (ClientThread clientThread : sClientThreadList) {
            ClientOpType opType = clientThread.getOpType();
            String workDir = clientThread.getWorkDir();
            int successNum = clientThread.getSuccessNum();
            LOG.info("Expected Status: OpType[{}] WorkDir[{}] SuccessNum[{}].", opType, workDir, successNum);
            for (int s = 0; s < successNum; s++) {
                AlluxioURI checkURI = new AlluxioURI(workDir + s);
                if (ClientOpType.CREATE_FILE == opType) {
                    if (!sFileSystem.exists(checkURI)) {
                        // File not exist. This is unexpected for CREATE_FILE.
                        LOG.error("File not exist for create test. Check failed! File: {}", checkURI);
                        return false;
                    }
                } else if (ClientOpType.CREATE_DELETE_FILE == opType) {
                    if (sFileSystem.exists(checkURI)) {
                        LOG.error("File exists for create/delete test. Check failed! File: {}", checkURI);
                        return false;
                    }
                } else if (ClientOpType.CREATE_RENAME_FILE == opType) {
                    if (!sFileSystem.exists(new AlluxioURI(checkURI + "-rename"))) {
                        // File not exist. This is unexpected for CREATE_FILE.
                        LOG.error("File not exist for create/rename test. Check failed! File: {}-rename", checkURI);
                        return false;
                    }
                }
            }
        }
        return true;
    }

    /**
     * Kills Alluxio Master by 'kill -9' command.
     */
    private static void killMaster() {
        String[] killMasterCommand = new String[] { "/usr/bin/env", "bash", "-c",
                "for pid in `ps -Aww -o pid,command | grep -i \"[j]ava\" | grep "
                        + "\"alluxio.master.AlluxioMaster\" | awk '{print $1}'`; do kill -9 \"$pid\"; done" };
        try {
            Runtime.getRuntime().exec(killMasterCommand).waitFor();
            CommonUtils.sleepMs(LOG, 1000);
        } catch (Exception e) {
            LOG.error("Error when killing Master", e);
        }
    }

    /**
     * Runs the crash test.
     *
     * @param args no arguments
     */
    public static void main(String[] args) {
        // Parse the input args.
        if (!parseInputArgs(args)) {
            System.exit(EXIT_FAILED);
        }

        System.out.println("Stop the current Alluxio cluster...");
        stopCluster();

        // Set NO_STORE and NO_PERSIST so that this test can work without AlluxioWorker.
        sCreateFileOptions = CreateFileOptions.defaults().setWriteType(WriteType.NONE);
        // Set the max retry to avoid long pending for client disconnect.
        if (System.getProperty(PropertyKey.USER_RPC_RETRY_MAX_NUM_RETRY.toString()) == null) {
            System.setProperty(PropertyKey.USER_RPC_RETRY_MAX_NUM_RETRY.toString(), "10");
        }

        System.out.println("Start Journal Crash Test...");
        long startTimeMs = System.currentTimeMillis();
        boolean ret = true;
        startMaster();

        int rounds = 0;
        while (System.currentTimeMillis() - startTimeMs < sTotalTimeMs) {
            rounds++;
            long aliveTimeMs = (long) (Math.random() * sMaxAliveTimeMs) + 100;
            LOG.info("Round {}: Planning Master Alive Time {}ms.", rounds, aliveTimeMs);

            System.out.println("Round " + rounds + " : Launch Clients...");
            sFileSystem = FileSystem.Factory.get();
            try {
                sFileSystem.delete(new AlluxioURI(sTestDir));
            } catch (Exception e) {
                // Test Directory not exist
            }

            // Launch all the client threads.
            setupClientThreads();
            for (Thread thread : sClientThreadList) {
                thread.start();
            }

            CommonUtils.sleepMs(LOG, aliveTimeMs);
            System.out.println("Round " + rounds + " : Crash Master...");
            killMaster();
            for (ClientThread clientThread : sClientThreadList) {
                clientThread.setIsStopped(true);
            }
            for (Thread thread : sClientThreadList) {
                try {
                    thread.join();
                } catch (InterruptedException e) {
                    LOG.error("Error when waiting thread", e);
                }
            }

            System.out.println("Round " + rounds + " : Check Status...");
            startMaster();
            boolean checkSuccess = false;
            try {
                checkSuccess = checkStatus();
            } catch (Exception e) {
                LOG.error("Failed to check status", e);
            }
            CliUtils.printPassInfo(checkSuccess);
            ret &= checkSuccess;
        }

        stopCluster();
        System.exit(ret ? EXIT_SUCCESS : EXIT_FAILED);
    }

    /**
     * Parses the input args with a command line format, using
     * {@link org.apache.commons.cli.CommandLineParser}. This method handles printing help information
     * if parsing fails or --help is specified.
     *
     * @param args the input args
     * @return true if parsing succeeded and --help wasn't specified, false otherwise
     */
    private static boolean parseInputArgs(String[] args) {
        Options options = new Options();
        options.addOption("help", false, "Show help for this test");
        options.addOption("maxAlive", true,
                "The maximum time a master should ever be alive during the test, in seconds");
        options.addOption("totalTime", true,
                "The total time to run this test, in seconds." + " This value should be greater than [maxAlive]");
        options.addOption("creates", true, "Number of Client Threads to request create operations");
        options.addOption("deletes", true, "Number of Client Threads to request create/delete operations");
        options.addOption("renames", true, "Number of Client Threads to request create/rename operations");
        options.addOption("testDir", true, "Test Directory on Alluxio");
        CommandLineParser parser = new DefaultParser();
        CommandLine cmd = null;
        boolean ret = true;
        try {
            cmd = parser.parse(options, args);
        } catch (ParseException e) {
            LOG.error("Failed to parse input args", e);
            ret = false;
        }
        if (ret && !cmd.hasOption("help")) {
            sMaxAliveTimeMs = 1000 * Long.parseLong(cmd.getOptionValue("maxAlive", "5"));
            sTotalTimeMs = 1000 * Long.parseLong(cmd.getOptionValue("totalTime", "20"));
            sCreateFileClientNum = Integer.parseInt(cmd.getOptionValue("creates", "2"));
            sCreateDeleteClientNum = Integer.parseInt(cmd.getOptionValue("deletes", "2"));
            sCreateRenameClientNum = Integer.parseInt(cmd.getOptionValue("renames", "2"));
            sTestDir = cmd.getOptionValue("testDir", "/default_tests_files");
        } else {
            ret = false;
            new HelpFormatter().printHelp(
                    String.format("java -cp %s %s", RuntimeConstants.ALLUXIO_JAR,
                            JournalCrashTest.class.getCanonicalName()),
                    "Test the Master Journal System in a crash scenario", options,
                    "e.g. options '-maxAlive 5 -totalTime 20 -creates 2 -deletes 2 -renames 2'"
                            + "will launch total 6 clients connecting to the Master and the Master"
                            + "will crash randomly with the max alive time 5 seconds.",
                    true);
        }
        return ret;
    }

    /**
     * Setups all the client threads.
     */
    private static void setupClientThreads() {
        sClientThreadList = new ArrayList<>();
        for (int i = 0; i < sCreateFileClientNum; i++) {
            ClientThread thread = new ClientThread(sTestDir + "/createFile" + i + "/", ClientOpType.CREATE_FILE);
            sClientThreadList.add(thread);
        }
        for (int i = 0; i < sCreateDeleteClientNum; i++) {
            ClientThread thread = new ClientThread(sTestDir + "/createDelete" + i + "/",
                    ClientOpType.CREATE_DELETE_FILE);
            sClientThreadList.add(thread);
        }
        for (int i = 0; i < sCreateRenameClientNum; i++) {
            ClientThread thread = new ClientThread(sTestDir + "/createRename" + i + "/",
                    ClientOpType.CREATE_RENAME_FILE);
            sClientThreadList.add(thread);
        }
    }

    /**
     * Starts Alluxio Master by executing the launch script.
     */
    private static void startMaster() {
        String alluxioStartPath = PathUtils.concatPath(Configuration.get(PropertyKey.HOME), "bin",
                "alluxio-start.sh");
        String startMasterCommand = String.format("%s master", alluxioStartPath);
        try {
            Runtime.getRuntime().exec(startMasterCommand).waitFor();
            CommonUtils.sleepMs(LOG, 1000);
        } catch (Exception e) {
            LOG.error("Error when starting Master", e);
        }
    }

    /**
     * Stops the current Alluxio cluster. This is used for preparation and clean up.
     * To crash the Master, use {@link #killMaster()}.
     */
    private static void stopCluster() {
        String alluxioStopPath = PathUtils.concatPath(Configuration.get(PropertyKey.HOME), "bin",
                "alluxio-stop.sh");
        String stopClusterCommand = String.format("%s all", alluxioStopPath);
        try {
            Runtime.getRuntime().exec(stopClusterCommand).waitFor();
            CommonUtils.sleepMs(LOG, 1000);
        } catch (Exception e) {
            LOG.error("Error when stop Alluxio cluster", e);
        }
    }
}