org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java Source code

Introduction

Here is the source code for org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hcatalog.hcatmix.load;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hcatalog.hcatmix.HCatMixUtils;
import org.apache.hcatalog.hcatmix.load.hadoop.HadoopUtils;
import org.apache.hcatalog.hcatmix.load.hadoop.IntervalResult;
import org.apache.hcatalog.hcatmix.load.hadoop.ReduceResult;
import org.apache.pig.tools.cmdline.CmdLineParser;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.text.ParseException;
import java.util.Properties;
import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;

/**
 * This class launches a hadoop job to load test a system. Each map would keep on incrementing
 * the number of threads over time and repeatedly execute the task {@link org.apache.hcatalog.hcatmix.load.tasks.Task}
 * It measures time take to do the task over time and returns the statistics over time.
 */
public class HadoopLoadGenerator extends Configured implements Tool {
    public final String JOB_NAME = "hcat-load-generator";

    public static final String METASTORE_TOKEN_KEY = "metaStoreToken";
    public static final String METASTORE_TOKEN_SIGNATURE = "metaStoreTokenSig";

    private FileSystem fs;

    private static final Logger LOG = LoggerFactory.getLogger(HadoopLoadGenerator.class);

    /**
     * Configuration that can be configured in properties files and their default values
     */
    public enum Conf {
        NUM_MAPPERS("num.mappers", 30), THREAD_INCREMENT_COUNT("thread.increment.count",
                5), THREAD_INCREMENT_INTERVAL_MINUTES("thread.increment.interval.minutes",
                        1), THREAD_COMPLETION_BUFFER_MINUTES("thread.completion.buffer.minutes",
                                1), MAP_RUN_TIME_MINUTES("map.runtime.minutes", 3), STAT_COLLECTION_INTERVAL_MINUTE(
                                        "stat.collection.interval.minutes",
                                        2), INPUT_DIR("input.dir", "/tmp/hcatmix/loadtest/input"), OUTPUT_DIR(
                                                "output.dir", "/tmp/hcatmix/loadtest/output"), TASK_CLASS_NAMES(
                                                        "task.class.names", null);

        public final String propName;
        public final int defaultValue;
        public final String defaultValueStr;

        Conf(final String propName, final int defaultVale) {
            this.propName = propName;
            this.defaultValue = defaultVale;
            this.defaultValueStr = null;
        }

        Conf(final String propName, final String defaultValue) {
            this.propName = propName;
            this.defaultValue = -1;
            this.defaultValueStr = defaultValue;
        }

        public String getJobConfKey() {
            return "hcatmix." + propName;
        }
    }

    public HadoopLoadGenerator() {
    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new Configuration(), new HadoopLoadGenerator(), args);
    }

    @Override
    public int run(String[] args) throws Exception {
        CmdLineParser opts = new CmdLineParser(args);
        String confFileName = null;

        opts.registerOpt('c', "confFile", CmdLineParser.ValueExpected.REQUIRED);

        char opt;
        try {
            while ((opt = opts.getNextOpt()) != CmdLineParser.EndOfOpts) {
                switch (opt) {
                case 'c':
                    confFileName = opts.getValStr();
                    break;
                default:
                    throw new IllegalArgumentException("Unrecognized option");
                }
            }
        } catch (ParseException pe) {
            System.err.println("Couldn't parse the command line arguments, " + pe.getMessage());
            usage();
        }
        runLoadTest(confFileName, getConf());
        return 1;
    }

    private void usage() {
        System.out.println("TODO");
        System.exit(1);
    }

    /**
     * Prepare input directory/jobConf and launch the hadoop job, for load testing
     *
     * @param confFileName The properties file for the task, should be available in the classpath
     * @param conf
     * @return
     * @throws IOException
     * @throws MetaException
     * @throws TException
     */
    public SortedMap<Long, ReduceResult> runLoadTest(String confFileName, Configuration conf)
            throws Exception, MetaException, TException {
        JobConf jobConf;
        if (conf != null) {
            jobConf = new JobConf(conf);
        } else {
            jobConf = new JobConf(new Configuration());
        }
        InputStream confFileIS;
        try {
            confFileIS = HCatMixUtils.getInputStream(confFileName);
        } catch (Exception e) {
            LOG.error("Couldn't load configuration file " + confFileName);
            throw e;
        }
        Properties props = new Properties();
        try {
            props.load(confFileIS);
        } catch (IOException e) {
            LOG.error("Couldn't load properties file: " + confFileName, e);
            throw e;
        }

        LOG.info("Loading configuration file: " + confFileName);
        addToJobConf(jobConf, props, Conf.MAP_RUN_TIME_MINUTES);
        addToJobConf(jobConf, props, Conf.STAT_COLLECTION_INTERVAL_MINUTE);
        addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_COUNT);
        addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_INTERVAL_MINUTES);
        addToJobConf(jobConf, props, Conf.THREAD_COMPLETION_BUFFER_MINUTES);

        int numMappers = Integer
                .parseInt(props.getProperty(Conf.NUM_MAPPERS.propName, "" + Conf.NUM_MAPPERS.defaultValue));
        Path inputDir = new Path(props.getProperty(Conf.INPUT_DIR.propName, Conf.INPUT_DIR.defaultValueStr));
        Path outputDir = new Path(props.getProperty(Conf.OUTPUT_DIR.propName, Conf.OUTPUT_DIR.defaultValueStr));

        jobConf.setJobName(JOB_NAME);
        jobConf.setNumMapTasks(numMappers);
        jobConf.setMapperClass(HCatMapper.class);
        jobConf.setJarByClass(HCatMapper.class);
        jobConf.setReducerClass(HCatReducer.class);
        jobConf.setMapOutputKeyClass(LongWritable.class);
        jobConf.setMapOutputValueClass(IntervalResult.class);
        jobConf.setOutputKeyClass(LongWritable.class);
        jobConf.setOutputValueClass(ReduceResult.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.set(Conf.TASK_CLASS_NAMES.getJobConfKey(),
                props.getProperty(Conf.TASK_CLASS_NAMES.propName, Conf.TASK_CLASS_NAMES.defaultValueStr));

        fs = FileSystem.get(jobConf);
        Path jarRoot = new Path("/tmp/hcatmix_jar_" + new Random().nextInt());
        HadoopUtils.uploadClasspathAndAddToJobConf(jobConf, jarRoot);
        fs.deleteOnExit(jarRoot);

        FileInputFormat.setInputPaths(jobConf, createInputFiles(inputDir, numMappers));
        if (fs.exists(outputDir)) {
            fs.delete(outputDir, true);
        }
        FileOutputFormat.setOutputPath(jobConf, outputDir);

        // Set up delegation token required for hiveMetaStoreClient in map task
        HiveConf hiveConf = new HiveConf(HadoopLoadGenerator.class);
        HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveConf);
        String tokenStr = hiveClient.getDelegationToken(UserGroupInformation.getCurrentUser().getUserName(),
                "mapred");
        Token<? extends AbstractDelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
        token.decodeFromUrlString(tokenStr);
        token.setService(new Text(METASTORE_TOKEN_SIGNATURE));
        jobConf.getCredentials().addToken(new Text(METASTORE_TOKEN_KEY), token);

        // Submit the job, once the job is complete see output
        LOG.info("Submitted hadoop job");
        RunningJob j = JobClient.runJob(jobConf);
        LOG.info("JobID is: " + j.getJobName());
        if (!j.isSuccessful()) {
            throw new IOException("Job failed");
        }
        return readResult(outputDir, jobConf);
    }

    /**
     * Read result from HDFS reduce output directory and return the results
     * @param outputDir where to read the data from. Expects the file to be {SequenceFile}
     * @param jobConf
     * @return
     * @throws IOException
     */
    private SortedMap<Long, ReduceResult> readResult(Path outputDir, JobConf jobConf) throws IOException {
        SortedMap<Long, ReduceResult> timeseriesResults = new TreeMap<Long, ReduceResult>();
        FileStatus[] files = fs.listStatus(outputDir, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part");
            }
        });
        for (FileStatus status : files) {
            Path path = status.getPath();
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, jobConf);
            LongWritable timeStamp = new LongWritable();
            ReduceResult result = new ReduceResult();
            while (reader.next(timeStamp, result)) {
                LOG.info("Timestamp: " + timeStamp);
                LOG.info("ThreadCount: " + result.getThreadCount());
                LOG.info("Stats:\n" + result.getStatistics());
                LOG.info("Errors: " + result.getNumErrors());
                timeseriesResults.put(timeStamp.get(), result);
                timeStamp = new LongWritable(); // initialize, so as to use new objects for next round reading
                result = new ReduceResult();
            }
            reader.close();
        }
        return timeseriesResults;
    }

    private static void addToJobConf(JobConf jobConf, Properties props, Conf conf) {
        jobConf.set(conf.getJobConfKey(), props.getProperty(conf.propName, "" + conf.defaultValue));
    }

    /**
     * Create input directory with dummy input file to match the number of mappers
     * @param inputDir
     * @param numMappers
     * @return
     * @throws IOException
     */
    private Path[] createInputFiles(final Path inputDir, final int numMappers) throws IOException {
        Path[] paths = new Path[numMappers];
        if (!fs.exists(inputDir)) {
            LOG.info("Input Directory doesn't exist will create input dir: " + inputDir);
            if (!fs.mkdirs(inputDir)) {
                HCatMixUtils.logAndThrow(new RuntimeException("Couldn't create input directory: " + inputDir));
            }
        } else {
            LOG.info("Input directory already exists, skipping creation : " + inputDir);
        }

        for (int i = 0; i < numMappers; i++) {
            Path childDir = new Path(inputDir, "input_" + i);
            if (!fs.exists(childDir)) {
                if (!fs.mkdirs(childDir)) {
                    HCatMixUtils.logAndThrow(
                            new RuntimeException("Couldn't create input child directory: " + childDir));
                }
            }
            Path childFile = new Path(childDir, "input");
            if (!fs.exists(childFile)) {
                OutputStream out = fs.create(childFile);
                PrintWriter pw = new PrintWriter(out);
                pw.println("Dummy Input");
                pw.close();
            }
            paths[i] = childDir;
        }

        return paths;
    }
}