org.apache.tez.mapreduce.examples.BroadcastAndOneToOneExample.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.mapreduce.examples.BroadcastAndOneToOneExample.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.mapreduce.examples;

import java.io.IOException;

import java.nio.ByteBuffer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.tez.client.TezClient;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.dag.api.VertexManagerPluginDescriptor;
import org.apache.tez.dag.api.client.DAGClient;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.dag.library.vertexmanager.InputReadyVertexManager;
import org.apache.tez.runtime.api.ObjectRegistry;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.KeyValueWriter;
import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig;
import org.apache.tez.runtime.library.output.UnorderedKVOutput;
import org.apache.tez.runtime.library.processor.SimpleProcessor;

import com.google.common.base.Preconditions;

public class BroadcastAndOneToOneExample extends Configured implements Tool {
    public static class InputProcessor extends SimpleProcessor {
        Text word = new Text();

        public InputProcessor(ProcessorContext context) {
            super(context);
        }

        @Override
        public void run() throws Exception {
            Preconditions.checkArgument(getOutputs().size() == 1);
            UnorderedKVOutput output = (UnorderedKVOutput) getOutputs().values().iterator().next();
            KeyValueWriter kvWriter = (KeyValueWriter) output.getWriter();
            kvWriter.write(word, new IntWritable(getContext().getTaskIndex()));
            ByteBuffer userPayload = getContext().getUserPayload() == null ? null
                    : getContext().getUserPayload().getPayload();
            if (userPayload != null) {
                boolean doLocalityCheck = getContext().getUserPayload().getPayload().get(0) > 0 ? true : false;
                if (doLocalityCheck) {
                    ObjectRegistry objectRegistry = getContext().getObjectRegistry();
                    String entry = String.valueOf(getContext().getTaskIndex());
                    objectRegistry.cacheForDAG(entry, entry);
                }
            }
        }
    }

    public static class OneToOneProcessor extends SimpleProcessor {
        Text word = new Text();

        public OneToOneProcessor(ProcessorContext context) {
            super(context);
        }

        @Override
        public void run() throws Exception {
            Preconditions.checkArgument(inputs.size() == 2);

            KeyValueReader inputKvReader = (KeyValueReader) getInputs().get("Input").getReader();
            KeyValueReader broadcastKvReader = (KeyValueReader) getInputs().get("Broadcast").getReader();
            int sum = 0;
            while (broadcastKvReader.next()) {
                sum += ((IntWritable) broadcastKvReader.getCurrentValue()).get();
            }
            while (inputKvReader.next()) {
                sum += ((IntWritable) inputKvReader.getCurrentValue()).get();
            }
            boolean doLocalityCheck = getContext().getUserPayload().getPayload().get(0) > 0 ? true : false;
            int broadcastSum = getContext().getUserPayload().getPayload().get(1);
            int expectedSum = broadcastSum + getContext().getTaskIndex();
            System.out.println("Index: " + getContext().getTaskIndex() + " sum: " + sum + " expectedSum: "
                    + expectedSum + " broadcastSum: " + broadcastSum);
            Preconditions.checkState((sum == expectedSum), "Sum = " + sum);

            if (doLocalityCheck) {
                ObjectRegistry objectRegistry = getContext().getObjectRegistry();
                String index = (String) objectRegistry.get(String.valueOf(getContext().getTaskIndex()));
                if (index == null || Integer.valueOf(index).intValue() != getContext().getTaskIndex()) {
                    String msg = "Did not find expected local producer " + getContext().getTaskIndex()
                            + " in the same JVM";
                    System.out.println(msg);
                    throw new TezUncheckedException(msg);
                }
            }
        }

    }

    private DAG createDAG(FileSystem fs, TezConfiguration tezConf, Path stagingDir, boolean doLocalityCheck)
            throws IOException, YarnException {

        int numBroadcastTasks = 2;
        int numOneToOneTasks = 3;
        if (doLocalityCheck) {
            YarnClient yarnClient = YarnClient.createYarnClient();
            yarnClient.init(tezConf);
            yarnClient.start();
            int numNMs = yarnClient.getNodeReports(NodeState.RUNNING).size();
            yarnClient.stop();
            // create enough 1-1 tasks to run in parallel
            numOneToOneTasks = numNMs - numBroadcastTasks - 1;// 1 AM
            if (numOneToOneTasks < 1) {
                numOneToOneTasks = 1;
            }
        }
        byte[] procByte = { (byte) (doLocalityCheck ? 1 : 0), 1 };
        UserPayload procPayload = UserPayload.create(ByteBuffer.wrap(procByte));

        System.out.println("Using " + numOneToOneTasks + " 1-1 tasks");

        Vertex broadcastVertex = Vertex.create("Broadcast",
                ProcessorDescriptor.create(InputProcessor.class.getName()), numBroadcastTasks);

        Vertex inputVertex = Vertex.create("Input",
                ProcessorDescriptor.create(InputProcessor.class.getName()).setUserPayload(procPayload),
                numOneToOneTasks);

        Vertex oneToOneVertex = Vertex.create("OneToOne",
                ProcessorDescriptor.create(OneToOneProcessor.class.getName()).setUserPayload(procPayload));
        oneToOneVertex.setVertexManagerPlugin(
                VertexManagerPluginDescriptor.create(InputReadyVertexManager.class.getName()));

        UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
                .newBuilder(Text.class.getName(), IntWritable.class.getName()).setFromConfiguration(tezConf)
                .build();

        DAG dag = DAG.create("BroadcastAndOneToOneExample");
        dag.addVertex(inputVertex).addVertex(broadcastVertex).addVertex(oneToOneVertex)
                .addEdge(Edge.create(inputVertex, oneToOneVertex, edgeConf.createDefaultOneToOneEdgeProperty()))
                .addEdge(Edge.create(broadcastVertex, oneToOneVertex,
                        edgeConf.createDefaultBroadcastEdgeProperty()));
        return dag;
    }

    public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception {
        System.out.println("Running BroadcastAndOneToOneExample");
        // conf and UGI
        TezConfiguration tezConf;
        if (conf != null) {
            tezConf = new TezConfiguration(conf);
        } else {
            tezConf = new TezConfiguration();
        }
        tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
        UserGroupInformation.setConfiguration(tezConf);

        // staging dir
        FileSystem fs = FileSystem.get(tezConf);
        String stagingDirStr = tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR,
                TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + "BroadcastAndOneToOneExample"
                + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
        Path stagingDir = new Path(stagingDirStr);
        tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
        stagingDir = fs.makeQualified(stagingDir);

        // No need to add jar containing this class as assumed to be part of
        // the tez jars.

        // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
        // is the same filesystem as the one used for Input/Output.
        TezClient tezSession = null;
        // needs session or else TaskScheduler does not hold onto containers
        tezSession = TezClient.create("broadcastAndOneToOneExample", tezConf);
        tezSession.start();

        DAGClient dagClient = null;

        try {
            DAG dag = createDAG(fs, tezConf, stagingDir, doLocalityCheck);

            tezSession.waitTillReady();
            dagClient = tezSession.submitDAG(dag);

            // monitoring
            DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
            if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
                System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
                return false;
            }
            return true;
        } finally {
            fs.delete(stagingDir, true);
            tezSession.stop();
        }
    }

    @Override
    public int run(String[] args) throws Exception {
        boolean doLocalityCheck = true;
        if (args.length == 1) {
            if (args[0].equals(skipLocalityCheck)) {
                doLocalityCheck = false;
            } else {
                printUsage();
                throw new TezException("Invalid command line");
            }
        } else if (args.length > 1) {
            printUsage();
            throw new TezException("Invalid command line");
        }

        if (doLocalityCheck
                && getConf().getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT)) {
            System.out.println("locality check is not valid in local mode. skipping");
            doLocalityCheck = false;
        }

        boolean status = run(getConf(), doLocalityCheck);
        return status ? 0 : 1;
    }

    private static void printUsage() {
        System.err.println("broadcastAndOneToOneExample " + skipLocalityCheck);
        ToolRunner.printGenericCommandUsage(System.err);
    }

    static String skipLocalityCheck = "-skipLocalityCheck";

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        BroadcastAndOneToOneExample job = new BroadcastAndOneToOneExample();
        int status = ToolRunner.run(conf, job, args);
        System.exit(status);
    }
}