org.apache.hadoop.mapred.TestCluster.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.mapred.TestCluster.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import java.security.PrivilegedExceptionAction;
import java.util.Collection;

import junit.framework.Assert;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.examples.SleepJob;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.mapred.TaskStatus.State;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.test.system.FinishTaskControlAction;
import org.apache.hadoop.mapreduce.test.system.JTClient;
import org.apache.hadoop.mapreduce.test.system.JTProtocol;
import org.apache.hadoop.mapreduce.test.system.JobInfo;
import org.apache.hadoop.mapreduce.test.system.MRCluster;
import org.apache.hadoop.mapreduce.test.system.TTClient;
import org.apache.hadoop.mapreduce.test.system.TTInfo;
import org.apache.hadoop.mapreduce.test.system.TTTaskInfo;
import org.apache.hadoop.mapreduce.test.system.TaskInfo;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.system.AbstractDaemonClient;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestCluster {

    private static final Log LOG = LogFactory.getLog(TestCluster.class);

    private static MRCluster cluster;

    public TestCluster() throws Exception {

    }

    @BeforeClass
    public static void before() throws Exception {
        String[] expExcludeList = new String[2];
        expExcludeList[0] = "java.net.ConnectException";
        expExcludeList[1] = "java.io.IOException";
        cluster = MRCluster.createCluster(new Configuration());
        cluster.setExcludeExpList(expExcludeList);
        cluster.setUp();
    }

    @AfterClass
    public static void after() throws Exception {
        cluster.tearDown();
    }

    @Test
    public void testProcessInfo() throws Exception {
        LOG.info("Process info of JobTracker is : " + cluster.getJTClient().getProcessInfo());
        Assert.assertNotNull(cluster.getJTClient().getProcessInfo());
        Collection<TTClient> tts = cluster.getTTClients();
        for (TTClient tt : tts) {
            LOG.info("Process info of TaskTracker is : " + tt.getProcessInfo());
            Assert.assertNotNull(tt.getProcessInfo());
        }
    }

    @Test
    public void testJobSubmission() throws Exception {
        Configuration conf = new Configuration(cluster.getConf());
        SleepJob job = new SleepJob();
        job.setConf(conf);
        conf = job.setupJobConf(1, 1, 100, 100, 100, 100);
        RunningJob rJob = cluster.getJTClient().submitAndVerifyJob(conf);
        cluster.getJTClient().verifyJobHistory(rJob.getID());
    }

    //@Test
    public void testFileStatus() throws Exception {
        UserGroupInformation ugi = UserGroupInformation
                .createRemoteUser(cluster.getJTClient().getProxy().getDaemonUser());
        ugi.doAs(new PrivilegedExceptionAction<Void>() {
            @Override
            public Void run() throws Exception {
                MRCluster myCluster = null;
                try {
                    myCluster = MRCluster.createCluster(cluster.getConf());
                    myCluster.connect();
                    JTClient jt = myCluster.getJTClient();
                    String dir = ".";
                    checkFileStatus(jt.getFileStatus(dir, true));
                    checkFileStatus(jt.listStatus(dir, false, true), dir);
                    for (TTClient tt : myCluster.getTTClients()) {
                        String[] localDirs = tt.getMapredLocalDirs();
                        for (String localDir : localDirs) {
                            checkFileStatus(tt.listStatus(localDir, true, false), localDir);
                            checkFileStatus(tt.listStatus(localDir, true, true), localDir);
                        }
                    }
                    String systemDir = jt.getClient().getSystemDir().toString();
                    checkFileStatus(jt.listStatus(systemDir, false, true), systemDir);
                    checkFileStatus(jt.listStatus(jt.getLogDir(), true, true), jt.getLogDir());
                } finally {
                    if (myCluster != null) {
                        myCluster.disconnect();
                    }
                }
                return null;
            }
        });
    }

    private void checkFileStatus(FileStatus[] fs, String path) {
        Assert.assertNotNull(fs);
        LOG.info("-----Listing for " + path + "  " + fs.length);
        for (FileStatus fz : fs) {
            checkFileStatus(fz);
        }
    }

    private void checkFileStatus(FileStatus fz) {
        Assert.assertNotNull(fz);
        LOG.info("FileStatus is " + fz.getPath() + "  " + fz.getPermission() + "  " + fz.getOwner() + "  "
                + fz.getGroup() + "  " + fz.getClass());
    }

    /**
     * Test to verify the common properties of tasks.
     * @throws Exception
     */
    @Test
    public void testTaskDetails() throws Exception {
        Configuration conf = new Configuration(cluster.getConf());
        JTProtocol wovenClient = cluster.getJTClient().getProxy();
        FinishTaskControlAction.configureControlActionForJob(conf);
        SleepJob job = new SleepJob();
        job.setConf(conf);

        conf = job.setupJobConf(1, 1, 100, 100, 100, 100);
        JobClient client = cluster.getJTClient().getClient();

        RunningJob rJob = client.submitJob(new JobConf(conf));
        JobID id = rJob.getID();

        JobInfo jInfo = wovenClient.getJobInfo(id);

        while (jInfo.getStatus().getRunState() != JobStatus.RUNNING) {
            Thread.sleep(1000);
            jInfo = wovenClient.getJobInfo(id);
        }

        LOG.info("Waiting till job starts running one map");

        TaskInfo[] myTaskInfos = wovenClient.getTaskInfo(id);
        boolean isOneTaskStored = false;
        String sometaskpid = null;
        org.apache.hadoop.mapreduce.TaskAttemptID sometaskId = null;
        TTClient myCli = null;
        for (TaskInfo info : myTaskInfos) {
            if (!info.isSetupOrCleanup()) {
                String[] taskTrackers = info.getTaskTrackers();
                for (String taskTracker : taskTrackers) {
                    TTInfo ttInfo = wovenClient.getTTInfo(taskTracker);
                    TTClient ttCli = cluster.getTTClient(ttInfo.getStatus().getHost());
                    TaskID taskId = info.getTaskID();
                    TTTaskInfo ttTaskInfo = ttCli.getProxy().getTask(taskId);
                    Assert.assertNotNull(ttTaskInfo);
                    Assert.assertNotNull(ttTaskInfo.getConf());
                    Assert.assertNotNull(ttTaskInfo.getUser());
                    Assert.assertTrue(ttTaskInfo.getTaskStatus().getProgress() >= 0.0);
                    Assert.assertTrue(ttTaskInfo.getTaskStatus().getProgress() <= 1.0);
                    //Get the pid of the task attempt. The task need not have 
                    //reported the pid of the task by the time we are checking
                    //the pid. So perform null check.
                    String pid = ttTaskInfo.getPid();
                    int i = 1;
                    while (pid.isEmpty()) {
                        Thread.sleep(1000);
                        LOG.info("Waiting for task to report its pid back");
                        ttTaskInfo = ttCli.getProxy().getTask(taskId);
                        pid = ttTaskInfo.getPid();
                        if (i == 40) {
                            Assert.fail("The task pid not reported for 40 seconds.");
                        }
                        i++;
                    }
                    if (!isOneTaskStored) {
                        sometaskpid = pid;
                        sometaskId = ttTaskInfo.getTaskStatus().getTaskID();
                        myCli = ttCli;
                        isOneTaskStored = true;
                    }
                    LOG.info("verified task progress to be between 0 and 1");
                    State state = ttTaskInfo.getTaskStatus().getRunState();
                    if (ttTaskInfo.getTaskStatus().getProgress() < 1.0
                            && ttTaskInfo.getTaskStatus().getProgress() > 0.0) {
                        Assert.assertEquals(TaskStatus.State.RUNNING, state);
                        LOG.info("verified run state as " + state);
                    }
                    FinishTaskControlAction action = new FinishTaskControlAction(
                            org.apache.hadoop.mapred.TaskID.downgrade(info.getTaskID()));
                    ttCli.getProxy().sendAction(action);
                }
            }
        }
        rJob.killJob();
        int i = 1;
        while (!rJob.isComplete()) {
            Thread.sleep(1000);
            if (i == 40) {
                Assert.fail("The job not completed within 40 seconds after killing it.");
            }
            i++;
        }
        TTTaskInfo myTaskInfo = myCli.getProxy().getTask(sometaskId.getTaskID());
        i = 0;
        while (myTaskInfo != null && !myTaskInfo.getPid().isEmpty()) {
            LOG.info("sleeping till task is retired from TT memory");
            Thread.sleep(1000);
            myTaskInfo = myCli.getProxy().getTask(sometaskId.getTaskID());
            if (i == 40) {
                Assert.fail("Task not retired from TT memory within 40 seconds of job completeing");
            }
            i++;
        }
        Assert.assertFalse(myCli.getProxy().isProcessTreeAlive(sometaskpid));
    }

    @Test
    public void testClusterRestart() throws Exception {
        cluster.stop();
        // Give the cluster time to stop the whole cluster.
        AbstractDaemonClient cli = cluster.getJTClient();
        int i = 1;
        while (i < 40) {
            try {
                cli.ping();
                Thread.sleep(1000);
                i++;
            } catch (Exception e) {
                break;
            }
        }
        if (i >= 40) {
            Assert.fail("JT on " + cli.getHostName() + " Should have been down.");
        }
        i = 1;
        for (AbstractDaemonClient tcli : cluster.getTTClients()) {
            i = 1;
            while (i < 40) {
                try {
                    tcli.ping();
                    Thread.sleep(1000);
                    i++;
                } catch (Exception e) {
                    break;
                }
            }
            if (i >= 40) {
                Assert.fail("TT on " + tcli.getHostName() + " Should have been down.");
            }
        }
        cluster.start();
        cli = cluster.getJTClient();
        i = 1;
        while (i < 40) {
            try {
                cli.ping();
                break;
            } catch (Exception e) {
                i++;
                Thread.sleep(1000);
                LOG.info("Waiting for Jobtracker on host : " + cli.getHostName() + " to come up.");
            }
        }
        if (i >= 40) {
            Assert.fail("JT on " + cli.getHostName() + " Should have been up.");
        }
        for (AbstractDaemonClient tcli : cluster.getTTClients()) {
            i = 1;
            while (i < 40) {
                try {
                    tcli.ping();
                    break;
                } catch (Exception e) {
                    i++;
                    Thread.sleep(1000);
                    LOG.info("Waiting for Tasktracker on host : " + tcli.getHostName() + " to come up.");
                }
            }
            if (i >= 40) {
                Assert.fail("TT on " + tcli.getHostName() + " Should have been Up.");
            }
        }
    }
}