org.apache.hadoop.mapred.TestMapProgress.java Source code

Introduction

Here is the source code for org.apache.hadoop.mapred.TestMapProgress.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.mapreduce.split.JobSplitWriter;
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
import static org.junit.Assert.assertTrue;

/**
 *  Validates map phase progress.
 *  Testcase uses newApi.
 *  We extend Task.TaskReporter class and override setProgress()
 *  to validate the map phase progress being set.
 *  We extend MapTask and override startReporter() method that creates
 *  TestTaskReporter instead of TaskReporter and call mapTask.run().
 *  Similar to LocalJobRunner, we set up splits and call mapTask.run()
 *  directly. No job is run, only map task is run.
 *  As the reporter's setProgress() validates progress after
 *  every record is read, we are done with the validation of map phase progress
 *  once mapTask.run() is finished. Sort phase progress in map task is not
 *  validated here.
 */
public class TestMapProgress {
    public static final Log LOG = LogFactory.getLog(TestMapProgress.class);
    private static String TEST_ROOT_DIR;
    static {
        String root = new File(System.getProperty("test.build.data", "/tmp")).getAbsolutePath();
        TEST_ROOT_DIR = new Path(root, "mapPhaseprogress").toString();
    }

    static class FakeUmbilical implements TaskUmbilicalProtocol {

        public long getProtocolVersion(String protocol, long clientVersion) {
            return TaskUmbilicalProtocol.versionID;
        }

        @Override
        public ProtocolSignature getProtocolSignature(String protocol, long clientVersion, int clientMethodsHash)
                throws IOException {
            return ProtocolSignature.getProtocolSignature(this, protocol, clientVersion, clientMethodsHash);
        }

        public void done(TaskAttemptID taskid) throws IOException {
            LOG.info("Task " + taskid + " reporting done.");
        }

        public void fsError(TaskAttemptID taskId, String message) throws IOException {
            LOG.info("Task " + taskId + " reporting file system error: " + message);
        }

        public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
            LOG.info("Task " + taskId + " reporting shuffle error: " + message);
        }

        public void fatalError(TaskAttemptID taskId, String msg) throws IOException {
            LOG.info("Task " + taskId + " reporting fatal error: " + msg);
        }

        public JvmTask getTask(JvmContext context) throws IOException {
            return null;
        }

        public boolean ping(TaskAttemptID taskid) throws IOException {
            return true;
        }

        public void commitPending(TaskAttemptID taskId, TaskStatus taskStatus)
                throws IOException, InterruptedException {
            statusUpdate(taskId, taskStatus);
        }

        public boolean canCommit(TaskAttemptID taskid) throws IOException {
            return true;
        }

        public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
                throws IOException, InterruptedException {
            StringBuffer buf = new StringBuffer("Task ");
            buf.append(taskId);
            if (taskStatus != null) {
                buf.append(" making progress to ");
                buf.append(taskStatus.getProgress());
                String state = taskStatus.getStateString();
                if (state != null) {
                    buf.append(" and state of ");
                    buf.append(state);
                }
            }
            LOG.info(buf.toString());
            // ignore phase
            // ignore counters
            return true;
        }

        public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException {
            LOG.info("Task " + taskid + " has problem " + trace);
        }

        public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId, int fromEventId, int maxLocs,
                TaskAttemptID id) throws IOException {
            return new MapTaskCompletionEventsUpdate(TaskCompletionEvent.EMPTY_ARRAY, false);
        }

        public void reportNextRecordRange(TaskAttemptID taskid, SortedRanges.Range range) throws IOException {
            LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
        }
    }

    private FileSystem fs = null;
    private TestMapTask map = null;
    private JobID jobId = null;
    private FakeUmbilical fakeUmbilical = new FakeUmbilical();

    /**
     *  Task Reporter that validates map phase progress after each record is
     *  processed by map task
     */
    public class TestTaskReporter extends Task.TaskReporter {
        private int recordNum = 0; // number of records processed

        TestTaskReporter(Task task) {
            task.super(task.getProgress(), fakeUmbilical);
        }

        @Override
        public void setProgress(float progress) {
            super.setProgress(progress);
            float mapTaskProgress = map.getProgress().getProgress();
            LOG.info("Map task progress is " + mapTaskProgress);
            if (recordNum < 3) {
                // only 3 records are there; Ignore validating progress after 3 times
                recordNum++;
            } else {
                return;
            }
            // validate map task progress when the map task is in map phase
            assertTrue("Map progress is not the expected value.",
                    Math.abs(mapTaskProgress - ((float) recordNum / 3)) < 0.001);
        }
    }

    /**
     * Map Task that overrides run method and uses TestTaskReporter instead of
     * TaskReporter and uses FakeUmbilical.
     */
    class TestMapTask extends MapTask {
        public TestMapTask(String jobFile, TaskAttemptID taskId, int partition, TaskSplitIndex splitIndex,
                int numSlotsRequired) {
            super(jobFile, taskId, partition, splitIndex, numSlotsRequired);
        }

        /**
         * Create a TestTaskReporter and use it for validating map phase progress
         */
        @Override
        TaskReporter startReporter(final TaskUmbilicalProtocol umbilical) {
            // start thread that will handle communication with parent
            TaskReporter reporter = new TestTaskReporter(map);
            return reporter;
        }
    }

    // In the given dir, creates part-0 file with 3 records of same size
    private void createInputFile(Path rootDir) throws IOException {
        if (fs.exists(rootDir)) {
            fs.delete(rootDir, true);
        }

        String str = "The quick brown fox\n" + "The brown quick fox\n" + "The fox brown quick\n";
        DataOutputStream inpFile = fs.create(new Path(rootDir, "part-0"));
        inpFile.writeBytes(str);
        inpFile.close();
    }

    /**
     *  Validates map phase progress after each record is processed by map task
     *  using custom task reporter.
     */
    @Test
    public void testMapProgress() throws Exception {
        JobConf job = new JobConf();
        fs = FileSystem.getLocal(job);
        Path rootDir = new Path(TEST_ROOT_DIR);
        createInputFile(rootDir);

        job.setNumReduceTasks(0);
        TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0");
        job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class);
        job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR);
        jobId = taskId.getJobID();

        JobContext jContext = new JobContextImpl(job, jobId);
        InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

        List<InputSplit> splits = input.getSplits(jContext);
        JobSplitWriter.createSplitFiles(new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job),
                splits);
        TaskSplitMetaInfo[] splitMetaInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job,
                new Path(TEST_ROOT_DIR));
        job.setUseNewMapper(true); // use new api    
        for (int i = 0; i < splitMetaInfo.length; i++) {// rawSplits.length is 1
            map = new TestMapTask(job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml",
                    taskId, i, splitMetaInfo[i].getSplitIndex(), 1);

            JobConf localConf = new JobConf(job);
            map.localizeConfiguration(localConf);
            map.setConf(localConf);
            map.run(localConf, fakeUmbilical);
        }
        // clean up
        fs.delete(rootDir, true);
    }
}