org.apache.tez.mapreduce.examples.RPCLoadGen.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.mapreduce.examples.RPCLoadGen.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.mapreduce.examples;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;

import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.tez.client.TezClient;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.examples.TezExampleBase;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.library.processor.SimpleProcessor;
import org.apache.tez.util.StopWatch;

public class RPCLoadGen extends TezExampleBase {

    private static final Logger LOG = LoggerFactory.getLogger(RPCLoadGen.class);

    private static final String VIA_RPC = "viaRpc";
    private static final byte VIA_RPC_BYTE = (byte) 0x00;
    private static final String VIA_HDFS_DIST_CACHE = "viaHdfsDistCache";
    private static final byte VIA_HDFS_DIST_CACHE_BYTE = (byte) 0x01;
    private static final String VIA_HDFS_DIRECT_READ = "viaHdfsDirectRead";
    private static final byte VIA_HDFS_DIRECT_READ_BYTE = (byte) 0x02;
    private static final Random random = new Random();

    private static final String DISK_PAYLOAD_NAME = RPCLoadGen.class.getSimpleName() + "_payload";

    private FileSystem fs;
    private Path resourcePath;

    @Override
    protected final int runJob(String[] args, TezConfiguration tezConf, TezClient tezClient)
            throws TezException, InterruptedException, IOException {
        LOG.info("Running: " + this.getClass().getSimpleName());
        String mode = VIA_RPC;
        if (args.length == 4) {
            if (args[3].equals(VIA_RPC) || args[3].equals(VIA_HDFS_DIRECT_READ)
                    || args[3].equals(VIA_HDFS_DIST_CACHE)) {
                mode = args[3];
            } else {
                printUsage();
                return 2;
            }
        }

        int numTasks = Integer.parseInt(args[0]);
        int maxSleepTimeMillis = Integer.parseInt(args[1]);
        int payloadSizeBytes = Integer.parseInt(args[2]);
        LOG.info("Parameters: numTasks=" + numTasks + ", maxSleepTime(ms)=" + maxSleepTimeMillis
                + ", payloadSize(bytes)=" + payloadSizeBytes + ", mode=" + mode);

        DAG dag = createDAG(tezConf, numTasks, maxSleepTimeMillis, payloadSizeBytes, mode);
        try {
            return runDag(dag, false, LOG);
        } finally {
            if (fs != null) {
                if (resourcePath != null) {
                    fs.delete(resourcePath, false);
                }
            }
        }
    }

    @Override
    protected void printUsage() {
        System.err.println("Usage: " + "RPCLoadGen <numTasks> <max_sleep_time_millis> <get_task_payload_size> ["
                + "<" + VIA_RPC + ">|" + VIA_HDFS_DIST_CACHE + "|" + VIA_HDFS_DIRECT_READ + "]");
        ToolRunner.printGenericCommandUsage(System.err);
    }

    @Override
    protected final int validateArgs(String[] otherArgs) {
        return (otherArgs.length >= 3 && otherArgs.length <= 4) ? 0 : 2;
    }

    private DAG createDAG(TezConfiguration conf, int numTasks, int maxSleepTimeMillis, int payloadSize, String mode)
            throws IOException {

        Map<String, LocalResource> localResourceMap = new HashMap<String, LocalResource>();
        UserPayload payload = createUserPayload(conf, maxSleepTimeMillis, payloadSize, mode, localResourceMap);

        Vertex vertex = Vertex.create("RPCLoadVertex",
                ProcessorDescriptor.create(RPCSleepProcessor.class.getName()).setUserPayload(payload), numTasks)
                .addTaskLocalFiles(localResourceMap);

        return DAG.create("RPCLoadGen").addVertex(vertex);
    }

    private UserPayload createUserPayload(TezConfiguration conf, int maxSleepTimeMillis, int payloadSize,
            String mode, Map<String, LocalResource> localResources) throws IOException {
        ByteBuffer payload;
        if (mode.equals(VIA_RPC)) {
            if (payloadSize < 5) {
                payloadSize = 5; // To Configure the processor
            }
            byte[] payloadBytes = new byte[payloadSize];
            random.nextBytes(payloadBytes);
            payload = ByteBuffer.wrap(payloadBytes);
            payload.put(4, VIA_RPC_BYTE); // ViaRPC
        } else {
            // Actual payload
            byte[] payloadBytes = new byte[5];
            payload = ByteBuffer.wrap(payloadBytes);

            // Disk payload
            byte[] diskPayload = new byte[payloadSize];
            random.nextBytes(diskPayload);
            fs = FileSystem.get(conf);
            resourcePath = new Path(Path.SEPARATOR + "tmp", DISK_PAYLOAD_NAME);
            resourcePath = fs.makeQualified(resourcePath);
            FSDataOutputStream dataOut = fs.create(resourcePath, true);
            dataOut.write(diskPayload);
            dataOut.close();
            fs.setReplication(resourcePath, (short) 10);
            FileStatus fileStatus = fs.getFileStatus(resourcePath);

            if (mode.equals(VIA_HDFS_DIST_CACHE)) {
                LocalResource lr = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(resourcePath),
                        LocalResourceType.ARCHIVE.FILE, LocalResourceVisibility.PRIVATE, fileStatus.getLen(),
                        fileStatus.getModificationTime());
                localResources.put(DISK_PAYLOAD_NAME, lr);
                payload.put(4, VIA_HDFS_DIST_CACHE_BYTE); // ViaRPC
            } else if (mode.equals(VIA_HDFS_DIRECT_READ)) {
                payload.put(4, VIA_HDFS_DIRECT_READ_BYTE); // ViaRPC
            }
        }

        payload.putInt(0, maxSleepTimeMillis);
        return UserPayload.create(payload);
    }

    public static class RPCSleepProcessor extends SimpleProcessor {

        private final int sleepTimeMax;
        private final byte modeByte;

        public RPCSleepProcessor(ProcessorContext context) {
            super(context);
            sleepTimeMax = getContext().getUserPayload().getPayload().getInt(0);
            modeByte = getContext().getUserPayload().getPayload().get(4);
        }

        @Override
        public void run() throws Exception {
            StopWatch sw = new StopWatch().start();
            long sleepTime = random.nextInt(sleepTimeMax);
            if (modeByte == VIA_RPC_BYTE) {
                LOG.info("Received via RPC.");
            } else if (modeByte == VIA_HDFS_DIST_CACHE_BYTE) {
                LOG.info("Reading from local filesystem");
                FileSystem localFs = FileSystem.getLocal(new Configuration());
                FSDataInputStream is = localFs.open(new Path(DISK_PAYLOAD_NAME));
                IOUtils.toByteArray(is);
            } else if (modeByte == VIA_HDFS_DIRECT_READ_BYTE) {
                LOG.info("Reading from HDFS");
                FileSystem fs = FileSystem.get(new Configuration());
                FSDataInputStream is = fs.open(new Path(Path.SEPARATOR + "tmp", DISK_PAYLOAD_NAME));
                IOUtils.toByteArray(is);
            } else {
                throw new IllegalArgumentException("Unknown execution mode: [" + modeByte + "]");
            }
            LOG.info("TimeTakenToAccessPayload=" + sw.stop().now(TimeUnit.MILLISECONDS));
            LOG.info("Sleeping for: " + sleepTime);
            Thread.sleep(sleepTime);
        }
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new RPCLoadGen(), args);
        System.exit(res);
    }
}