org.notmysock.tez.BroadcastTest.java Source code

Java tutorial

Introduction

Here is the source code for org.notmysock.tez.BroadcastTest.java

Source

package org.notmysock.tez;
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import java.net.URL;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.apache.tez.client.TezClient;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
import org.apache.tez.dag.api.ProcessorDescriptor;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.UserPayload;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.dag.api.client.DAGClient;
import org.apache.tez.dag.api.client.DAGStatus;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.ProcessorContext;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.KeyValueWriter;
import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig;
import org.apache.tez.runtime.library.output.UnorderedKVOutput;
import org.apache.tez.runtime.library.processor.SimpleProcessor;

import com.google.common.base.Preconditions;

public class BroadcastTest extends Configured implements Tool {

    public static class InputProcessor extends SimpleProcessor {
        Text word = new Text();

        public InputProcessor(ProcessorContext context) {
            super(context);
            ByteBuffer userPayload = getContext().getUserPayload().getPayload();
            // TODO: do something with this 
        }

        @Override
        public void run() throws Exception {
            Preconditions.checkArgument(getOutputs().size() == 1);
            for (LogicalOutput out : getOutputs().values()) {
                if (out instanceof UnorderedKVOutput) {
                    UnorderedKVOutput output = (UnorderedKVOutput) out;
                    KeyValueWriter kvWriter = (KeyValueWriter) output.getWriter();
                    for (int i = 0; i < 1024 * 1024; i++) {
                        kvWriter.write(word, new IntWritable(getContext().getTaskIndex()));
                    }
                }
            }
        }
    }

    public static class MapOneProcessor extends SimpleProcessor {
        Text word = new Text();

        public MapOneProcessor(ProcessorContext context) {
            super(context);
        }

        @Override
        public void run() throws Exception {
            Preconditions.checkArgument(inputs.size() == 1);
            KeyValueReader broadcastKvReader = (KeyValueReader) getInputs().get("Broadcast").getReader();
            int sum = 0;
            int count = 0;
            while (broadcastKvReader.next()) {
                count++;
                sum += ((IntWritable) broadcastKvReader.getCurrentValue()).get();
            }
            System.err.println("Count = " + getContext().getTaskIndex() + " * " + count);
        }
    }

    private DAG createDAG(FileSystem fs, TezConfiguration tezConf, Path stagingDir,
            Map<String, LocalResource> localFiles) throws IOException, YarnException {

        int numBroadcastTasks = 1;

        byte[] procByte = { 0 };
        UserPayload procPayload = UserPayload.create(ByteBuffer.wrap(procByte));

        Vertex broadcastVertex = Vertex.create("Broadcast",
                ProcessorDescriptor.create(InputProcessor.class.getName()), numBroadcastTasks);

        Vertex mapVertex = Vertex.create("Map 1",
                ProcessorDescriptor.create(MapOneProcessor.class.getName()).setUserPayload(procPayload), 2000);

        UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
                .newBuilder(Text.class.getName(), IntWritable.class.getName()).build();

        DAG dag = new DAG("BroadcastExample");

        dag.addVertex(broadcastVertex).addVertex(mapVertex)
                .addEdge(Edge.create(broadcastVertex, mapVertex, edgeConf.createDefaultBroadcastEdgeProperty()));
        return dag;
    }

    public static Path getCurrentJarURL() throws URISyntaxException {
        return new Path(BroadcastTest.class.getProtectionDomain().getCodeSource().getLocation().toURI());
    }

    private LocalResource createLocalResource(FileSystem fs, Path file) throws IOException {

        final LocalResourceType type = LocalResourceType.FILE;
        final LocalResourceVisibility visibility = LocalResourceVisibility.APPLICATION;

        FileStatus fstat = fs.getFileStatus(file);

        org.apache.hadoop.yarn.api.records.URL resourceURL = ConverterUtils.getYarnUrlFromPath(file);
        long resourceSize = fstat.getLen();
        long resourceModificationTime = fstat.getModificationTime();

        LocalResource lr = Records.newRecord(LocalResource.class);
        lr.setResource(resourceURL);
        lr.setType(type);
        lr.setSize(resourceSize);
        lr.setVisibility(visibility);
        lr.setTimestamp(resourceModificationTime);

        return lr;
    }

    public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception {
        System.out.println("Running BroadcastTest");
        // conf and UGI
        TezConfiguration tezConf;
        if (conf != null) {
            tezConf = new TezConfiguration(conf);
        } else {
            tezConf = new TezConfiguration();
        }
        tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true);
        UserGroupInformation.setConfiguration(tezConf);
        String user = UserGroupInformation.getCurrentUser().getShortUserName();

        // staging dir
        FileSystem fs = FileSystem.get(tezConf);
        String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging"
                + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
        Path stagingDir = new Path(stagingDirStr);
        tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
        stagingDir = fs.makeQualified(stagingDir);

        Path jobJar = new Path(stagingDir, "job.jar");
        fs.copyFromLocalFile(getCurrentJarURL(), jobJar);

        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
        localResources.put("job.jar", createLocalResource(fs, jobJar));

        TezClient tezSession = null;
        // needs session or else TaskScheduler does not hold onto containers
        tezSession = TezClient.create("BroadcastTest", tezConf);
        tezSession.addAppMasterLocalFiles(localResources);
        tezSession.start();

        DAGClient dagClient = null;

        try {
            DAG dag = createDAG(fs, tezConf, stagingDir, localResources);

            dag.addTaskLocalFiles(localResources);

            tezSession.waitTillReady();
            dagClient = tezSession.submitDAG(dag);

            // monitoring
            DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null);
            if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
                System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
                return false;
            }
            return true;
        } finally {
            fs.delete(stagingDir, true);
            tezSession.stop();
        }
    }

    @Override
    public int run(String[] args) throws Exception {
        boolean doLocalityCheck = true;
        if (args.length == 1) {
            if (args[0].equals(skipLocalityCheck)) {
                doLocalityCheck = false;
            } else {
                printUsage();
                throw new TezException("Invalid command line");
            }
        } else if (args.length > 1) {
            printUsage();
            throw new TezException("Invalid command line");
        }
        boolean status = run(getConf(), doLocalityCheck);
        return status ? 0 : 1;
    }

    private static void printUsage() {
        System.err.println("broadcastAndOneToOneExample " + skipLocalityCheck);
        ToolRunner.printGenericCommandUsage(System.err);
    }

    static String skipLocalityCheck = "-skipLocalityCheck";

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        BroadcastTest job = new BroadcastTest();
        int status = ToolRunner.run(conf, job, args);
        System.exit(status);
    }
}