org.apache.hadoop.mapred.TestJobCleanup.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.mapred.TestJobCleanup.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;

import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapreduce.JobCounter;
import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;

/**
 * A JUnit test to test Map-Reduce job cleanup.
 */
@SuppressWarnings("deprecation")
public class TestJobCleanup {
    private static String TEST_ROOT_DIR = new File(
            System.getProperty("test.build.data", "/tmp") + "/" + "test-job-cleanup").toString();
    private static final String CUSTOM_CLEANUP_FILE_NAME = "_custom_cleanup";
    private static final String ABORT_KILLED_FILE_NAME = "_custom_abort_killed";
    private static final String ABORT_FAILED_FILE_NAME = "_custom_abort_failed";
    private static FileSystem fileSys = null;
    private static MiniMRCluster mr = null;
    private static Path inDir = null;
    private static Path emptyInDir = null;
    private static int outDirs = 0;

    private static Log LOG = LogFactory.getLog(TestJobCleanup.class);

    @BeforeClass
    public static void setUp() throws IOException {
        JobConf conf = new JobConf();
        fileSys = FileSystem.get(conf);
        fileSys.delete(new Path(TEST_ROOT_DIR), true);
        conf.set("mapred.job.tracker.handler.count", "1");
        conf.set("mapred.job.tracker", "127.0.0.1:0");
        conf.set("mapred.job.tracker.http.address", "127.0.0.1:0");
        conf.set("mapred.task.tracker.http.address", "127.0.0.1:0");
        conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR + "/intermediate");
        conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER,
                "true");

        mr = new MiniMRCluster(1, "file:///", 1, null, null, conf);
        inDir = new Path(TEST_ROOT_DIR, "test-input");
        String input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n";
        DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0));
        file.writeBytes(input);
        file.close();
        emptyInDir = new Path(TEST_ROOT_DIR, "empty-input");
        fileSys.mkdirs(emptyInDir);
    }

    @AfterClass
    public static void tearDown() throws Exception {
        if (fileSys != null) {
            // fileSys.delete(new Path(TEST_ROOT_DIR), true);
            fileSys.close();
        }
        if (mr != null) {
            mr.shutdown();
        }
    }

    /**
     * Committer with deprecated
     * {@link FileOutputCommitter#cleanupJob(JobContext)} making a _failed/_killed
     * in the output folder
     */
    static class CommitterWithCustomDeprecatedCleanup extends FileOutputCommitter {
        @Override
        public void cleanupJob(JobContext context) throws IOException {
            System.err.println("---- HERE ----");
            JobConf conf = context.getJobConf();
            Path outputPath = FileOutputFormat.getOutputPath(conf);
            FileSystem fs = outputPath.getFileSystem(conf);
            fs.create(new Path(outputPath, CUSTOM_CLEANUP_FILE_NAME)).close();
        }

        @Override
        public void commitJob(JobContext context) throws IOException {
            cleanupJob(context);
        }

        @Override
        public void abortJob(JobContext context, int i) throws IOException {
            cleanupJob(context);
        }
    }

    /**
     * Committer with abort making a _failed/_killed in the output folder
     */
    static class CommitterWithCustomAbort extends FileOutputCommitter {
        @Override
        public void abortJob(JobContext context, int state) throws IOException {
            JobConf conf = context.getJobConf();
            ;
            Path outputPath = FileOutputFormat.getOutputPath(conf);
            FileSystem fs = outputPath.getFileSystem(conf);
            String fileName = (state == JobStatus.FAILED) ? TestJobCleanup.ABORT_FAILED_FILE_NAME
                    : TestJobCleanup.ABORT_KILLED_FILE_NAME;
            fs.create(new Path(outputPath, fileName)).close();
        }
    }

    private Path getNewOutputDir() {
        return new Path(TEST_ROOT_DIR, "output-" + outDirs++);
    }

    private void configureJob(JobConf jc, String jobName, int maps, int reds, Path outDir) {
        jc.setJobName(jobName);
        jc.setInputFormat(TextInputFormat.class);
        jc.setOutputKeyClass(LongWritable.class);
        jc.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(jc, inDir);
        FileOutputFormat.setOutputPath(jc, outDir);
        jc.setMapperClass(IdentityMapper.class);
        jc.setReducerClass(IdentityReducer.class);
        jc.setNumMapTasks(maps);
        jc.setNumReduceTasks(reds);
    }

    // run a job with 1 map and let it run to completion
    private void testSuccessfulJob(String filename, Class<? extends OutputCommitter> committer, String[] exclude)
            throws IOException {
        JobConf jc = mr.createJobConf();
        Path outDir = getNewOutputDir();
        configureJob(jc, "job with cleanup()", 1, 0, outDir);
        jc.setOutputCommitter(committer);

        JobClient jobClient = new JobClient(jc);
        RunningJob job = jobClient.submitJob(jc);
        JobID id = job.getID();
        job.waitForCompletion();

        LOG.info("Job finished : " + job.isComplete());
        Path testFile = new Path(outDir, filename);
        assertTrue("Done file \"" + testFile + "\" missing for job " + id, fileSys.exists(testFile));

        // check if the files from the missing set exists
        for (String ex : exclude) {
            Path file = new Path(outDir, ex);
            assertFalse("File " + file + " should not be present for successful job " + id, fileSys.exists(file));
        }
    }

    // run a job for which all the attempts simply fail.
    private void testFailedJob(String fileName, Class<? extends OutputCommitter> committer, String[] exclude)
            throws IOException {
        JobConf jc = mr.createJobConf();
        Path outDir = getNewOutputDir();
        configureJob(jc, "fail job with abort()", 1, 0, outDir);
        jc.setMaxMapAttempts(1);
        // set the job to fail
        jc.setMapperClass(UtilsForTests.FailMapper.class);
        jc.setOutputCommitter(committer);

        JobClient jobClient = new JobClient(jc);
        RunningJob job = jobClient.submitJob(jc);
        JobID id = job.getID();
        job.waitForCompletion();
        assertEquals("Job did not fail", JobStatus.FAILED, job.getJobState());

        if (fileName != null) {
            Path testFile = new Path(outDir, fileName);
            assertTrue("File " + testFile + " missing for failed job " + id, fileSys.exists(testFile));
        }

        // check if the files from the missing set exists
        for (String ex : exclude) {
            Path file = new Path(outDir, ex);
            assertFalse("File " + file + " should not be present for failed job " + id, fileSys.exists(file));
        }
    }

    // run a job which gets stuck in mapper and kill it.
    private void testKilledJob(String fileName, Class<? extends OutputCommitter> committer, String[] exclude)
            throws IOException {
        JobConf jc = mr.createJobConf();
        Path outDir = getNewOutputDir();
        configureJob(jc, "kill job with abort()", 1, 0, outDir);
        // set the job to wait for long
        jc.setMapperClass(UtilsForTests.KillMapper.class);
        jc.setOutputCommitter(committer);

        JobClient jobClient = new JobClient(jc);
        RunningJob job = jobClient.submitJob(jc);
        JobID id = job.getID();

        Counters counters = job.getCounters();

        // wait for the map to be launched
        while (true) {
            if (counters.getCounter(JobCounter.TOTAL_LAUNCHED_MAPS) == 1) {
                break;
            }
            LOG.info("Waiting for a map task to be launched");
            UtilsForTests.waitFor(100);
            counters = job.getCounters();
        }

        job.killJob(); // kill the job

        job.waitForCompletion(); // wait for the job to complete
        assertEquals("Job was not killed", JobStatus.KILLED, job.getJobState());

        if (fileName != null) {
            Path testFile = new Path(outDir, fileName);
            assertTrue("File " + testFile + " missing for job " + id, fileSys.exists(testFile));
        }

        // check if the files from the missing set exists
        for (String ex : exclude) {
            Path file = new Path(outDir, ex);
            assertFalse("File " + file + " should not be present for killed job " + id, fileSys.exists(file));
        }
    }

    /**
     * Test default cleanup/abort behavior
     *
     * @throws IOException
     */
    @Test
    public void testDefaultCleanupAndAbort() throws IOException {
        // check with a successful job
        testSuccessfulJob(FileOutputCommitter.SUCCEEDED_FILE_NAME, FileOutputCommitter.class, new String[] {});

        // check with a failed job
        testFailedJob(null, FileOutputCommitter.class, new String[] { FileOutputCommitter.SUCCEEDED_FILE_NAME });

        // check default abort job kill
        testKilledJob(null, FileOutputCommitter.class, new String[] { FileOutputCommitter.SUCCEEDED_FILE_NAME });
    }

    /**
     * Test if a failed job with custom committer runs the abort code.
     *
     * @throws IOException
     */
    @Test
    public void testCustomAbort() throws IOException {
        // check with a successful job
        testSuccessfulJob(FileOutputCommitter.SUCCEEDED_FILE_NAME, CommitterWithCustomAbort.class,
                new String[] { ABORT_FAILED_FILE_NAME, ABORT_KILLED_FILE_NAME });

        // check with a failed job
        testFailedJob(ABORT_FAILED_FILE_NAME, CommitterWithCustomAbort.class,
                new String[] { FileOutputCommitter.SUCCEEDED_FILE_NAME, ABORT_KILLED_FILE_NAME });

        // check with a killed job
        testKilledJob(ABORT_KILLED_FILE_NAME, CommitterWithCustomAbort.class,
                new String[] { FileOutputCommitter.SUCCEEDED_FILE_NAME, ABORT_FAILED_FILE_NAME });
    }

    /**
     * Test if a failed job with custom committer runs the deprecated
     * {@link FileOutputCommitter#cleanupJob(JobContext)} code for api
     * compatibility testing.
     */
    @Test
    public void testCustomCleanup() throws IOException {
        // check with a successful job
        testSuccessfulJob(CUSTOM_CLEANUP_FILE_NAME, CommitterWithCustomDeprecatedCleanup.class, new String[] {});

        // check with a failed job
        testFailedJob(CUSTOM_CLEANUP_FILE_NAME, CommitterWithCustomDeprecatedCleanup.class,
                new String[] { FileOutputCommitter.SUCCEEDED_FILE_NAME });

        // check with a killed job
        testKilledJob(TestJobCleanup.CUSTOM_CLEANUP_FILE_NAME, CommitterWithCustomDeprecatedCleanup.class,
                new String[] { FileOutputCommitter.SUCCEEDED_FILE_NAME });
    }
}