org.apache.hadoop.mapred.TestStreamingJobProcessTree.java Source code

Introduction

Here is the source code for org.apache.hadoop.mapred.TestStreamingJobProcessTree.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.mapreduce.test.system.JTClient;
import org.apache.hadoop.mapreduce.test.system.JobInfo;
import org.apache.hadoop.mapreduce.test.system.MRCluster;
import org.apache.hadoop.mapreduce.test.system.TaskInfo;
import org.apache.hadoop.mapreduce.test.system.JTProtocol;
import org.apache.hadoop.streaming.StreamJob;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

/**
 * Increase memory usage beyond the memory limits of streaming job and 
 * verify whether task manager logs the process tree status 
 * before killing or not.
 */
public class TestStreamingJobProcessTree {
    private static final Log LOG = LogFactory.getLog(TestStreamingJobProcessTree.class);
    private static MRCluster cluster;
    private static Configuration conf = new Configuration();
    private static Path inputDir = new Path("input");
    private static Path outputDir = new Path("output");

    @BeforeClass
    public static void before() throws Exception {
        String[] excludeExpList = { "java.net.ConnectException", "java.io.IOException" };
        cluster = MRCluster.createCluster(conf);
        cluster.setExcludeExpList(excludeExpList);
        cluster.setUp();
        conf = cluster.getJTClient().getProxy().getDaemonConf();
        createInput(inputDir, conf);
    }

    @AfterClass
    public static void after() throws Exception {
        cleanup(inputDir, conf);
        cleanup(outputDir, conf);
        cluster.tearDown();
    }

    /**
     * Increase the memory limit for map task and verify whether the 
     * task manager logs the process tree status before killing or not.
     * @throws IOException - If an I/O error occurs.
     */
    @Test
    public void testStreamingJobProcTreeCleanOfMapTask() throws IOException {
        String runtimeArgs[] = { "-D", "mapred.job.name=ProcTreeStreamJob", "-D", "mapred.map.tasks=1", "-D",
                "mapred.reduce.tasks=0", "-D", "mapred.map.max.attempts=1", "-D",
                "mapred.cluster.max.map.memory.mb=2048", "-D", "mapred.cluster.reduce.memory.mb=1024", "-D",
                "mapred.cluster.max.reduce.memory.mb=2048", "-D", "mapred.cluster.map.memory.mb=1024", "-D",
                "mapred.job.map.memory.mb=512" };

        String[] otherArgs = new String[] { "-input", inputDir.toString(), "-output", outputDir.toString(),
                "-mapper", "ProcessTree.sh", };
        JobID jobId = getJobId(runtimeArgs, otherArgs);
        LOG.info("Job ID:" + jobId);
        if (jobId == null) {
            jobId = getJobId(runtimeArgs, otherArgs);
        }
        Assert.assertNotNull("Job ID not found for 1 min", jobId);
        Assert.assertTrue("Job has not been started for 1 min.", cluster.getJTClient().isJobStarted(jobId));
        TaskInfo taskInfo = getTaskInfo(jobId, true);
        Assert.assertNotNull("TaskInfo is null", taskInfo);
        Assert.assertTrue("Task has not been started for 1 min.", cluster.getJTClient().isTaskStarted(taskInfo));
        JTProtocol wovenClient = cluster.getJTClient().getProxy();
        int counter = 0;
        TaskInfo tempTaskInfo;
        while (counter++ < 60) {
            if (taskInfo.getTaskStatus().length == 0) {
                UtilsForTests.waitFor(1000);
                tempTaskInfo = taskInfo;
                taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
            } else if (taskInfo.getTaskStatus()[0].getRunState() == TaskStatus.State.RUNNING) {
                UtilsForTests.waitFor(1000);
                tempTaskInfo = taskInfo;
                taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
            } else {
                break;
            }

            if (taskInfo == null) {
                taskInfo = tempTaskInfo;
                break;
            }
        }

        verifyProcessTreeOverLimit(taskInfo, jobId);
        JobInfo jInfo = wovenClient.getJobInfo(jobId);
        LOG.info("Waiting till the job is completed...");
        counter = 0;
        while (counter++ < 60) {
            if (jInfo == null) {
                break;
            } else if (jInfo.getStatus().isJobComplete()) {
                break;
            }
            UtilsForTests.waitFor(100);
            jInfo = wovenClient.getJobInfo(jobId);
        }
        UtilsForTests.waitFor(1000);
    }

    /**
     * Increase the memory limit for reduce task and verify whether the 
     * task manager logs the process tree status before killing or not.
     * @throws IOException - If an I/O error occurs.
     */
    @Test
    public void testStreamingJobProcTreeCleanOfReduceTask() throws IOException {
        String runtimeArgs[] = { "-D", "mapred.job.name=ProcTreeStreamJob", "-D", "mapred.reduce.tasks=1", "-D",
                "mapred.map.tasks=1", "-D", "mapred.reduce.max.attempts=1", "-D",
                "mapred.cluster.max.map.memory.mb=2048", "-D", "mapred.cluster.map.memory.mb=1024", "-D",
                "mapred.cluster.max.reduce.memory.mb=20248", "-D", "mapred.cluster.reduce.memory.mb=1024", "-D",
                "mapred.job.reduce.memory.mb=512" };

        String[] otherArgs = new String[] { "-input", inputDir.toString(), "-output", outputDir.toString(),
                "-mapper", "/bin/cat", "-reducer", "ProcessTree.sh" };

        cleanup(outputDir, conf);
        JobID jobId = getJobId(runtimeArgs, otherArgs);
        if (jobId == null) {
            jobId = getJobId(runtimeArgs, otherArgs);
        }
        Assert.assertNotNull("Job ID not found for 1 min", jobId);
        Assert.assertTrue("Job has not been started for 1 min.", cluster.getJTClient().isJobStarted(jobId));
        TaskInfo taskInfo = getTaskInfo(jobId, false);
        Assert.assertNotNull("TaskInfo is null", taskInfo);
        Assert.assertTrue("Task has not been started for 1 min.", cluster.getJTClient().isTaskStarted(taskInfo));
        JTProtocol wovenClient = cluster.getJTClient().getProxy();
        int counter = 0;
        TaskInfo tempTaskInfo;
        while (counter++ < 60) {
            if (taskInfo.getTaskStatus().length == 0) {
                UtilsForTests.waitFor(1000);
                tempTaskInfo = taskInfo;
                taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
            } else if (taskInfo.getTaskStatus()[0].getRunState() == TaskStatus.State.RUNNING) {
                UtilsForTests.waitFor(1000);
                tempTaskInfo = taskInfo;
                taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID());
            } else {
                break;
            }
            if (taskInfo == null) {
                taskInfo = tempTaskInfo;
                break;
            }
        }
        verifyProcessTreeOverLimit(taskInfo, jobId);
        JobInfo jInfo = wovenClient.getJobInfo(jobId);
        LOG.info("Waiting till the job is completed...");
        counter = 0;
        while (counter++ < 60) {
            if (jInfo == null) {
                break;
            } else if (jInfo.getStatus().isJobComplete()) {
                break;
            }
            UtilsForTests.waitFor(1000);
            jInfo = wovenClient.getJobInfo(jobId);
        }
    }

    private void verifyProcessTreeOverLimit(TaskInfo taskInfo, JobID jobId) throws IOException {
        String taskOverLimitPatternString = "TaskTree \\[pid=[0-9]*,tipID=.*\\] is "
                + "running beyond memory-limits. " + "Current usage : [0-9]*bytes. Limit : %sbytes. Killing task.";
        Pattern taskOverLimitPattern = Pattern
                .compile(String.format(taskOverLimitPatternString, String.valueOf(512 * 1024 * 1024L)));
        LOG.info("Task OverLimit Pattern:" + taskOverLimitPattern);
        TaskID tID = TaskID.downgrade(taskInfo.getTaskID());
        TaskAttemptID taskAttID = new TaskAttemptID(tID, 0);
        JobClient jobClient = cluster.getJTClient().getClient();
        RunningJob runJob = jobClient.getJob(jobId);
        String[] taskDiagnostics = runJob.getTaskDiagnostics(taskAttID);
        Assert.assertNotNull("Task diagnostics is null.", taskDiagnostics);
        for (String strVal : taskDiagnostics) {
            Matcher mat = taskOverLimitPattern.matcher(strVal);
            Assert.assertTrue("Taskover limit error message is not matched.", mat.find());
        }
    }

    private String[] buildArgs(String[] runtimeArgs, String[] otherArgs) {
        String shellFile = System.getProperty("user.dir") + "/src/test/system/scripts/ProcessTree.sh";

        String fileArgs[] = new String[] { "-files", shellFile };
        int size = fileArgs.length + runtimeArgs.length + otherArgs.length;
        String args[] = new String[size];
        int index = 0;
        for (String fileArg : fileArgs) {
            args[index++] = fileArg;
        }
        for (String runtimeArg : runtimeArgs) {
            args[index++] = runtimeArg;
        }
        for (String otherArg : otherArgs) {
            args[index++] = otherArg;
        }
        return args;
    }

    private JobID getJobId(String[] runtimeArgs, String[] otherArgs) throws IOException {
        JobID jobId = null;
        final RunStreamJob runSJ;
        StreamJob streamJob = new StreamJob();
        int counter = 0;
        JTClient jtClient = cluster.getJTClient();
        JobClient jobClient = jtClient.getClient();
        int totalJobs = jobClient.getAllJobs().length;
        String[] args = buildArgs(runtimeArgs, otherArgs);
        cleanup(outputDir, conf);
        conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
        runSJ = new RunStreamJob(conf, streamJob, args);
        runSJ.start();
        while (counter++ < 60) {
            if (jobClient.getAllJobs().length - totalJobs == 0) {
                UtilsForTests.waitFor(1000);
            } else if (jobClient.getAllJobs()[0].getRunState() == JobStatus.RUNNING) {
                jobId = jobClient.getAllJobs()[0].getJobID();
                break;
            } else {
                UtilsForTests.waitFor(1000);
            }
        }
        return jobId;
    }

    private TaskInfo getTaskInfo(JobID jobId, boolean isMap) throws IOException {
        JTProtocol wovenClient = cluster.getJTClient().getProxy();
        JobInfo jInfo = wovenClient.getJobInfo(jobId);
        TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId);
        for (TaskInfo taskinfo : taskInfos) {
            if (!taskinfo.isSetupOrCleanup()) {
                if (taskinfo.getTaskID().isMap() == isMap) {
                    return taskinfo;
                }
            }
        }
        return null;
    }

    private static void createInput(Path inDir, Configuration conf) throws IOException {
        FileSystem fs = inDir.getFileSystem(conf);
        if (!fs.mkdirs(inDir)) {
            throw new IOException("Failed to create the input directory:" + inDir.toString());
        }
        fs.setPermission(inDir, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
        DataOutputStream file = fs.create(new Path(inDir, "data.txt"));
        String input = "Process tree cleanup of Streaming job tasks.";
        file.writeBytes(input + "\n");
        file.close();
    }

    private static void cleanup(Path dir, Configuration conf) throws IOException {
        FileSystem fs = dir.getFileSystem(conf);
        fs.delete(dir, true);
    }

    class RunStreamJob extends Thread {
        Configuration jobConf;
        Tool tool;
        String[] args;

        public RunStreamJob(Configuration jobConf, Tool tool, String[] args) {
            this.jobConf = jobConf;
            this.tool = tool;
            this.args = args;
        }

        public void run() {
            try {
                ToolRunner.run(jobConf, tool, args);
            } catch (InterruptedException iexp) {
                LOG.warn("Thread is interrupted:" + iexp.getMessage());
            } catch (Exception exp) {
                LOG.warn("Exception:" + exp.getMessage());
            }
        }
    }
}