org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

Source

/*******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.hadoop.mapreduce.test;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LoggingRegistry;
import org.pentaho.di.core.plugins.Plugin;
import org.pentaho.di.core.plugins.PluginInterface;
import org.pentaho.di.core.plugins.PluginMainClassType;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.core.plugins.StepPluginType;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.trans.TransConfiguration;
import org.pentaho.di.trans.TransExecutionConfiguration;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.steps.hadoopenter.HadoopEnterMeta;
import org.pentaho.di.trans.steps.hadoopexit.HadoopExitMeta;
import org.pentaho.hadoop.mapreduce.GenericTransCombiner;
import org.pentaho.hadoop.mapreduce.GenericTransReduce;
import org.pentaho.hadoop.mapreduce.PentahoMapRunnable;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.junit.Assert.*;

@SuppressWarnings({ "nls", "unchecked", "deprecation", "rawtypes" })
public class PentahoMapReduceIntegrationTest {

    private boolean debug = false;

    @Before
    public void setup() throws KettleException {
        KettleEnvironment.init();
    }

    @After
    public void shutdown() {

    }

    @Test
    public void testMapperBadInjectorFields() throws IOException, KettleException {
        try {
            PentahoMapRunnable mapper = new PentahoMapRunnable();
            MockOutputCollector outputCollector = new MockOutputCollector();
            MockReporter reporter = new MockReporter();
            MockRecordReader reader = new MockRecordReader(Arrays.asList("test"));
            mapper.configure(createJobConf("./src/test/resources/bad-injector-fields.ktr",
                    "./src/test/resources/wordcount-reducer.ktr"));

            mapper.run(reader, outputCollector, reporter);
            fail("Should have thrown an exception");
        } catch (IOException e) {
            assertTrue("Test for KettleException",
                    e.getMessage().contains("key or value is not defined in transformation injector step"));
        }
    }

    @Test
    public void testMapperBadOutputFields() throws IOException, KettleException {
        try {
            PentahoMapRunnable mapper = new PentahoMapRunnable();
            MockOutputCollector outputCollector = new MockOutputCollector();
            MockReporter reporter = new MockReporter();
            MockRecordReader reader = new MockRecordReader(Arrays.asList("test"));

            mapper.configure(createJobConf("./src/test/resources/bad-output-fields.ktr",
                    "./src/test/resources/bad-output-fields.ktr"));

            mapper.run(reader, outputCollector, reporter);
            fail("Should have thrown an exception");
        } catch (IOException e) {
            assertTrue("Test for KettleException",
                    e.getMessage().contains("outKey or outValue is not defined in transformation output stream"));
        }
    }

    @Test
    public void testMapperNoInjectorStep() throws IOException, KettleException {
        try {
            PentahoMapRunnable mapper = new PentahoMapRunnable();
            MockOutputCollector outputCollector = new MockOutputCollector();
            MockReporter reporter = new MockReporter();
            MockRecordReader reader = new MockRecordReader(Arrays.asList("test"));

            mapper.configure(createJobConf("./src/test/resources/no-injector-step.ktr",
                    "./src/test/resources/no-injector-step.ktr"));

            mapper.run(reader, outputCollector, reporter);
            fail("Should have thrown an exception");
        } catch (IOException e) {
            assertTrue("Test for KettleException",
                    e.getMessage().contains("Unable to find thread with name Injector and copy number 0"));
        }
    }

    @Test
    public void testMapperNoOutputStep() throws IOException, KettleException {
        try {
            PentahoMapRunnable mapper = new PentahoMapRunnable();
            MockOutputCollector outputCollector = new MockOutputCollector();
            MockReporter reporter = new MockReporter();
            MockRecordReader reader = new MockRecordReader(Arrays.asList("test"));
            mapper.configure(createJobConf("./src/test/resources/no-output-step.ktr",
                    "./src/test/resources/no-output-step.ktr"));

            mapper.run(reader, outputCollector, reporter);
            fail("Should have thrown an exception");
        } catch (IOException e) {
            assertTrue("Test for KettleException",
                    e.getMessage().contains("Output step not defined in transformation"));
        }
    }

    @Test
    public void testReducerBadInjectorFields() throws IOException, KettleException {
        try {
            GenericTransReduce reducer = new GenericTransReduce();
            MockOutputCollector outputCollector = new MockOutputCollector();
            MockReporter reporter = new MockReporter();

            reducer.configure(createJobConf("./src/test/resources/bad-injector-fields.ktr",
                    "./src/test/resources/bad-injector-fields.ktr"));

            reducer.reduce(new Text("key"), Arrays.asList(new Text("value")).iterator(), outputCollector, reporter);
            fail("Should have thrown an exception");
        } catch (IOException e) {
            assertTrue("Test for KettleException",
                    e.getMessage().contains("key or value is not defined in transformation injector step"));
        }
    }

    @Test
    public void testReducerNoInjectorStep() throws IOException, KettleException {
        try {
            GenericTransReduce reducer = new GenericTransReduce();
            MockOutputCollector outputCollector = new MockOutputCollector();
            MockReporter reporter = new MockReporter();

            reducer.configure(createJobConf("./src/test/resources/no-injector-step.ktr",
                    "./src/test/resources/no-injector-step.ktr"));

            reducer.reduce(new Text("key"), Arrays.asList(new Text("value")).iterator(), outputCollector, reporter);
            fail("Should have thrown an exception");
        } catch (IOException e) {
            assertTrue("Test for KettleException",
                    e.getMessage().contains("Unable to find thread with name Injector and copy number 0"));
        }
    }

    @Test
    public void testReducerNoOutputStep() throws IOException, KettleException {
        try {
            GenericTransReduce reducer = new GenericTransReduce();
            MockOutputCollector outputCollector = new MockOutputCollector();
            MockReporter reporter = new MockReporter();

            reducer.configure(createJobConf("./src/test/resources/no-output-step.ktr",
                    "./src/test/resources/no-output-step.ktr"));

            reducer.reduce(new Text("key"), Arrays.asList(new Text("value")).iterator(), outputCollector, reporter);
            fail("Should have thrown an exception");
        } catch (IOException e) {
            assertTrue("Test for KettleException",
                    e.getMessage().contains("Output step not defined in transformation"));
        }
    }

    public static JobConf createJobConf(String mapperTransformationFile, String reducerTransformationFile)
            throws IOException, KettleException {
        return createJobConf(mapperTransformationFile, null, reducerTransformationFile, "localhost", "9000",
                "9001");
    }

    public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile,
            String reducerTransformationFile) throws IOException, KettleException {
        return createJobConf(mapperTransformationFile, combinerTransformationFile, reducerTransformationFile,
                "localhost", "9000", "9001");
    }

    public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile,
            String reducerTransformationFile, String hostname, String hdfsPort, String trackerPort)
            throws IOException, KettleException {

        JobConf conf = new JobConf();
        conf.setJobName("wordcount");

        KettleEnvironment.init();

        // Register Map/Reduce Input and Map/Reduce Output plugin steps
        PluginMainClassType mainClassTypesAnnotation = StepPluginType.class
                .getAnnotation(PluginMainClassType.class);

        Map<Class<?>, String> inputClassMap = new HashMap<Class<?>, String>();
        inputClassMap.put(mainClassTypesAnnotation.value(), HadoopEnterMeta.class.getName());
        PluginInterface inputStepPlugin = new Plugin(new String[] { "HadoopEnterPlugin" }, StepPluginType.class,
                mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Input",
                "Enter a Hadoop Mapper or Reducer transformation", "MRI.png", false, false, inputClassMap,
                new ArrayList<String>(), null, null);
        PluginRegistry.getInstance().registerPlugin(StepPluginType.class, inputStepPlugin);

        Map<Class<?>, String> outputClassMap = new HashMap<Class<?>, String>();
        outputClassMap.put(mainClassTypesAnnotation.value(), HadoopExitMeta.class.getName());
        PluginInterface outputStepPlugin = new Plugin(new String[] { "HadoopExitPlugin" }, StepPluginType.class,
                mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Output",
                "Exit a Hadoop Mapper or Reducer transformation", "MRO.png", false, false, outputClassMap,
                new ArrayList<String>(), null, null);
        PluginRegistry.getInstance().registerPlugin(StepPluginType.class, outputStepPlugin);

        TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();

        TransMeta transMeta = null;
        TransConfiguration transConfig = null;

        if (mapperTransformationFile != null) {
            conf.setMapRunnerClass(PentahoMapRunnable.class);
            transMeta = new TransMeta(mapperTransformationFile);
            transConfig = new TransConfiguration(transMeta, transExecConfig);
            conf.set("transformation-map-xml", transConfig.getXML());
            conf.set("transformation-map-input-stepname", "Injector");
            conf.set("transformation-map-output-stepname", "Output");
        }

        if (combinerTransformationFile != null) {
            conf.setCombinerClass(GenericTransCombiner.class);
            transMeta = new TransMeta(combinerTransformationFile);
            transConfig = new TransConfiguration(transMeta, transExecConfig);
            conf.set("transformation-combiner-xml", transConfig.getXML());
            conf.set("transformation-combiner-input-stepname", "Injector");
            conf.set("transformation-combiner-output-stepname", "Output");
        }

        if (reducerTransformationFile != null) {
            conf.setReducerClass(GenericTransReduce.class);
            transMeta = new TransMeta(reducerTransformationFile);
            transConfig = new TransConfiguration(transMeta, transExecConfig);
            conf.set("transformation-reduce-xml", transConfig.getXML());
            conf.set("transformation-reduce-input-stepname", "Injector");
            conf.set("transformation-reduce-output-stepname", "Output");
        }

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path("/"));
        FileOutputFormat.setOutputPath(conf, new Path("/"));

        conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
        conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

        conf.setJar(jar.toURI().toURL().toExternalForm());
        conf.setWorkingDirectory(new Path("/tmp/wordcount"));

        return conf;
    }

    public static String getTransformationXml(String transFilename) throws IOException {
        StringBuilder sb = new StringBuilder();

        BufferedReader reader = new BufferedReader(new FileReader(transFilename));

        String line = null;
        while ((line = reader.readLine()) != null) {
            sb.append(line + Const.CR);
        }

        return sb.toString();
    }

    public static class MockOutputCollector implements OutputCollector {

        private Map<Object, ArrayList<Object>> collection = new HashMap<Object, ArrayList<Object>>();
        private AtomicBoolean closed = new AtomicBoolean(false);

        public void close() {
            closed.set(true);
        }

        @Override
        public void collect(Object arg0, Object arg1) throws IOException {
            if (closed.get()) {
                return;
            }
            if (!collection.containsKey(arg0)) {
                collection.put(arg0, new ArrayList<Object>());
            }

            collection.get(arg0).add(arg1);
        }

        public Map<Object, ArrayList<Object>> getCollection() {
            return collection;
        }

    }

    public static class MockReporter implements Reporter {

        @Override
        public void progress() {
            // TODO Auto-generated method stub

        }

        @Override
        public Counter getCounter(Enum<?> arg0) {
            // TODO Auto-generated method stub
            return null;
        }

        @Override
        public Counter getCounter(String arg0, String arg1) {
            // TODO Auto-generated method stub
            return null;
        }

        @Override
        public InputSplit getInputSplit() throws UnsupportedOperationException {
            // TODO Auto-generated method stub
            return null;
        }

        @Override
        public void incrCounter(Enum<?> arg0, long arg1) {
            // TODO Auto-generated method stub

        }

        @Override
        public void incrCounter(String arg0, String arg1, long arg2) {
            // TODO Auto-generated method stub

        }

        @Override
        public void setStatus(String arg0) {
            // TODO Auto-generated method stub

        }

    }

    class MockRecordReader implements RecordReader<Text, Text> {

        private Iterator<String> rowIter;

        int rowNum = -1;

        int totalRows;

        // Make them provide a pre-filled list so we don't confuse the overhead of generating strings
        // with the time it takes to run the mapper
        public MockRecordReader(List<String> rows) {
            totalRows = rows.size();
            rowIter = rows.iterator();
        }

        @Override
        public boolean next(Text key, Text value) throws IOException {
            if (!rowIter.hasNext()) {
                return false;
            }
            rowNum++;
            key.set(String.valueOf(rowNum));
            value.set(rowIter.next());
            return true;
        }

        @Override
        public Text createKey() {
            return new Text();
        }

        @Override
        public Text createValue() {
            return new Text();
        }

        @Override
        public long getPos() throws IOException {
            return rowNum;
        }

        @Override
        public void close() throws IOException {
        }

        @Override
        public float getProgress() throws IOException {
            return (rowNum + 1) / totalRows;
        }

    }

    @Test
    public void testMapRunnable_wordCount() throws IOException, KettleException {
        PentahoMapRunnable mapRunnable = new PentahoMapRunnable();
        MockOutputCollector outputCollector = new MockOutputCollector();
        MockReporter reporter = new MockReporter();

        mapRunnable.configure(createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr"));

        final int ROWS = 10000;

        List<String> strings = new ArrayList<String>();
        for (int i = 0; i < ROWS; i++) {
            strings.add("zebra giraffe hippo elephant tiger");
        }

        MockRecordReader reader = new MockRecordReader(strings);

        long start = System.currentTimeMillis();
        mapRunnable.run(reader, outputCollector, reporter);
        outputCollector.close();
        long stop = System.currentTimeMillis();
        System.out.println("Executed " + ROWS + " in " + (stop - start) + "ms");
        System.out.println("Average: " + ((stop - start) / (float) ROWS) + "ms");
        System.out.println("Rows/Second: " + (ROWS / ((stop - start) / 1000f)));

        class CountValues {
            private Object workingKey;

            public CountValues setWorkingKey(Object k) {
                workingKey = k;
                return this;
            }

            public void countValues(String k, Object v, MockOutputCollector oc) {
                if (workingKey.equals(new Text(k))) {
                    assertEquals(k.toString(), v, oc.getCollection().get(new Text(k)).size());
                }
            }
        }

        assertNotNull(outputCollector);
        assertNotNull(outputCollector.getCollection());
        assertNotNull(outputCollector.getCollection().keySet());
        assertEquals(5, outputCollector.getCollection().keySet().size());

        CountValues cv = new CountValues();
        for (Object key : outputCollector.getCollection().keySet()) {
            cv.setWorkingKey(key).countValues("zebra", ROWS, outputCollector);
            cv.setWorkingKey(key).countValues("giraffe", ROWS, outputCollector);
            cv.setWorkingKey(key).countValues("hippo", ROWS, outputCollector);
            cv.setWorkingKey(key).countValues("elephant", ROWS, outputCollector);
            cv.setWorkingKey(key).countValues("tiger", ROWS, outputCollector);
            // TODO: Add words that will not exist: unique words, same words - diff case

            if (debug) {
                for (Object value : outputCollector.getCollection().get(key)) {
                    System.out.println(key + ": " + value);
                }
            }
        }

        GenericTransReduce reducer = new GenericTransReduce();
        MockOutputCollector inputCollector = outputCollector;
        outputCollector = new MockOutputCollector();

        reducer.configure(createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr"));

        start = System.currentTimeMillis();
        for (Object key : inputCollector.getCollection().keySet()) {
            System.out.println("reducing: " + key);
            reducer.reduce((Text) key, new ArrayList(inputCollector.getCollection().get(key)).iterator(),
                    outputCollector, reporter);
        }
        reducer.close();
        outputCollector.close();
        stop = System.currentTimeMillis();
        System.out.println("Executed " + ROWS + " in " + (stop - start) + "ms");
        System.out.println("Average: " + ((stop - start) / (float) ROWS) + "ms");
        System.out.println("Rows/Second: " + (ROWS / ((stop - start) / 1000f)));

        assertNotNull(outputCollector);
        assertNotNull(outputCollector.getCollection());
        assertNotNull(outputCollector.getCollection().keySet());
        assertEquals(5, outputCollector.getCollection().keySet().size());

        class CheckValues {
            private Object workingKey;

            public CheckValues setWorkingKey(Object k) {
                workingKey = k;
                return this;
            }

            public void checkValues(String k, Object v, MockOutputCollector oc) {
                if (workingKey.equals(new Text(k))) {
                    assertEquals(k.toString(), v, ((IntWritable) oc.getCollection().get(new Text(k)).get(0)).get());
                }
            }
        }

        CheckValues cv2 = new CheckValues();
        for (Object key : outputCollector.getCollection().keySet()) {
            cv2.setWorkingKey(key).checkValues("zebra", ROWS, outputCollector);
            cv2.setWorkingKey(key).checkValues("giraffe", ROWS, outputCollector);
            cv2.setWorkingKey(key).checkValues("hippo", ROWS, outputCollector);
            cv2.setWorkingKey(key).checkValues("elephant", ROWS, outputCollector);
            cv2.setWorkingKey(key).checkValues("tiger", ROWS, outputCollector);

            if (debug) {
                for (Object value : outputCollector.getCollection().get(key)) {
                    System.out.println(key + ": " + value);
                }
            }
        }
    }

    @Test
    public void testMapper_null_output_value() throws Exception {
        PentahoMapRunnable mapper = new PentahoMapRunnable();
        MockOutputCollector outputCollector = new MockOutputCollector();
        MockReporter reporter = new MockReporter();

        mapper.configure(createJobConf("./src/test/resources/null-test.ktr", "./src/test/resources/null-test.ktr"));

        MockRecordReader reader = new MockRecordReader(Arrays.asList("test"));

        mapper.run(reader, outputCollector, reporter);
        outputCollector.close();

        Exception ex = mapper.getException();
        if (ex != null) {
            ex.printStackTrace();
        }
        assertNull("Exception thrown", ex);
        assertEquals("Received output when we didn't expect any.  <null>s aren't passed through.", 0,
                outputCollector.getCollection().size());
    }

    @Test
    public void testCombiner_null_output_value() throws Exception {
        GenericTransCombiner combiner = new GenericTransCombiner();
        MockOutputCollector outputCollector = new MockOutputCollector();
        MockReporter reporter = new MockReporter();

        combiner.configure(createJobConf(null, "./src/test/resources/null-test.ktr", null));

        combiner.reduce(new Text("0"), Arrays.asList(new Text("test")).iterator(), outputCollector, reporter);
        outputCollector.close();

        Exception ex = combiner.getException();
        if (ex != null) {
            ex.printStackTrace();
        }
        assertNull("Exception thrown", ex);
        assertEquals("Received output when we didn't expect any.  <null>s aren't passed through.", 0,
                outputCollector.getCollection().size());
    }

    @Test
    public void testReducer_null_output_value() throws Exception {
        GenericTransReduce reducer = new GenericTransReduce();
        MockOutputCollector outputCollector = new MockOutputCollector();
        MockReporter reporter = new MockReporter();

        reducer.configure(
                createJobConf("./src/test/resources/null-test.ktr", "./src/test/resources/null-test.ktr"));

        reducer.reduce(new Text("0"), Arrays.asList(new Text("test")).iterator(), outputCollector, reporter);
        outputCollector.close();

        Exception ex = reducer.getException();
        if (ex != null) {
            ex.printStackTrace();
        }
        assertNull("Exception thrown", ex);
        assertEquals("Received output when we didn't expect any.  <null>s aren't passed through.", 0,
                outputCollector.getCollection().size());
    }

    @Test
    public void testLogChannelLeaking_mapper() throws Exception {
        JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");
        PentahoMapRunnable mapper = new PentahoMapRunnable();
        mapper.configure(jobConf);
        MockReporter reporter = new MockReporter();

        // We expect 4 log channels per run. The total should never grow past logChannelsBefore + 4.
        final int EXPECTED_CHANNELS_PER_RUN = 5;
        final int logChannels = LoggingRegistry.getInstance().getMap().size();
        // Run the reducer this many times
        final int RUNS = 10;

        for (int i = 0; i < RUNS; i++) {
            MockRecordReader reader = new MockRecordReader(Arrays.asList("test"));
            MockOutputCollector outputCollector = new MockOutputCollector();
            mapper.run(reader, outputCollector, reporter);
            outputCollector.close();
            Exception ex = mapper.getException();
            if (ex != null) {
                ex.printStackTrace();
            }
            assertNull("Exception thrown", ex);
            assertEquals("Incorrect output", 1, outputCollector.getCollection().size());

            assertEquals("LogChannels are not being cleaned up. On Run #" + i + " we have too many.",
                    logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size());
        }
        assertEquals(logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size());
    }

    @Test
    public void testLogChannelLeaking_combiner() throws Exception {
        JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");
        List<IntWritable> input = Arrays.asList(new IntWritable(1));
        GenericTransCombiner combiner = new GenericTransCombiner();
        combiner.configure(jobConf);
        MockReporter reporter = new MockReporter();

        // We expect 4 log channels per run. The total should never grow past logChannelsBefore + 4.
        final int EXPECTED_CHANNELS_PER_RUN = 4;
        final int logChannels = LoggingRegistry.getInstance().getMap().size();
        // Run the reducer this many times
        final int RUNS = 10;

        for (int i = 0; i < RUNS; i++) {
            MockOutputCollector outputCollector = new MockOutputCollector();
            combiner.reduce(new Text(String.valueOf(i)), input.iterator(), outputCollector, reporter);
            combiner.close();
            outputCollector.close();
            Exception ex = combiner.getException();
            if (ex != null) {
                ex.printStackTrace();
            }
            assertNull("Exception thrown", ex);
            assertEquals("Incorrect output", 1, outputCollector.getCollection().size());

            assertEquals("LogChannels are not being cleaned up. On Run #" + i + " we have too many.",
                    logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size());
        }
        assertEquals(logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size());
    }

    @Test
    public void testLogChannelLeaking_reducer() throws Exception {
        JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");
        List<IntWritable> input = Arrays.asList(new IntWritable(1));
        GenericTransReduce reducer = new GenericTransReduce();
        reducer.configure(jobConf);
        MockReporter reporter = new MockReporter();

        // We expect 4 log channels per run. The total should never grow past logChannelsBefore + 4.
        final int EXPECTED_CHANNELS_PER_RUN = 4;
        final int logChannels = LoggingRegistry.getInstance().getMap().size();
        // Run the reducer this many times
        final int RUNS = 10;

        for (int i = 0; i < RUNS; i++) {
            MockOutputCollector outputCollector = new MockOutputCollector();
            reducer.reduce(new Text(String.valueOf(i)), input.iterator(), outputCollector, reporter);
            reducer.close();
            outputCollector.close();
            Exception ex = reducer.getException();
            if (ex != null) {
                ex.printStackTrace();
            }
            assertNull("Exception thrown", ex);
            assertEquals("Incorrect output", 1, outputCollector.getCollection().size());

            assertEquals("LogChannels are not being cleaned up. On Run #" + i + " we have too many.",
                    logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size());
        }
        assertEquals(logChannels + EXPECTED_CHANNELS_PER_RUN, LoggingRegistry.getInstance().getMap().size());
    }

    // TODO Create tests for exception propogation from RowListeners in Mapper, Combiner, and Reducer

    @Test
    public void testMapReduce_InputOutput() throws Exception {
        JobConf jobConf = createJobConf("./src/test/resources/mr-input-output.ktr",
                "./src/test/resources/mr-passthrough.ktr", "./src/test/resources/mr-passthrough.ktr");

        PentahoMapRunnable mapper = new PentahoMapRunnable();
        mapper.configure(jobConf);

        MockReporter reporter = new MockReporter();
        MockOutputCollector outputCollector = new MockOutputCollector();
        MockRecordReader reader = new MockRecordReader(Arrays.asList("1", "2", "3"));

        mapper.run(reader, outputCollector, reporter);

        outputCollector.close();

        Exception ex = mapper.getException();
        if (ex != null) {
            ex.printStackTrace();
        }
        assertNull("Exception thrown", ex);
        assertEquals("Incorrect output", 3, outputCollector.getCollection().size());

        assertEquals("Validating output collector", new IntWritable(0),
                outputCollector.getCollection().get(new Text("1")).get(0));
        assertEquals("Validating output collector", new IntWritable(1),
                outputCollector.getCollection().get(new Text("2")).get(0));
        assertEquals("Validating output collector", new IntWritable(2),
                outputCollector.getCollection().get(new Text("3")).get(0));

    }

    @Test
    public void testCombinerOutputClasses() throws IOException, KettleException {
        JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(IntWritable.class);
        jobConf.setOutputValueClass(NullWritable.class);
        jobConf.setOutputValueClass(LongWritable.class);

        GenericTransCombiner combiner = new GenericTransCombiner();

        combiner.configure(jobConf);

        assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
        assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
    }

    @Test
    public void testReducerOutputClasses() throws IOException, KettleException {
        JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

        jobConf.setMapOutputKeyClass(Text.class);
        jobConf.setMapOutputValueClass(IntWritable.class);
        jobConf.setOutputValueClass(NullWritable.class);
        jobConf.setOutputValueClass(LongWritable.class);

        GenericTransReduce reducer = new GenericTransReduce();

        reducer.configure(jobConf);

        assertEquals(jobConf.getOutputKeyClass(), reducer.getOutClassK());
        assertEquals(jobConf.getOutputValueClass(), reducer.getOutClassV());
    }

    @Test
    public void testTaskIdExtraction() throws Exception {
        JobConf conf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");
        conf.set("mapred.task.id", "job_201208090841_0133");
        PentahoMapRunnable mapRunnable = new PentahoMapRunnable();

        mapRunnable.configure(conf);

        Field variableSpaceField = PentahoMapRunnable.class.getDeclaredField("variableSpace");
        variableSpaceField.setAccessible(true);
        VariableSpace variableSpace = (VariableSpace) variableSpaceField.get(mapRunnable);
        String s = variableSpace.getVariable("Internal.Hadoop.NodeNumber");
        assertEquals("133", s);
    }

    @Test
    public void testTaskIdExtraction_over_10000() throws Exception {
        JobConf conf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
                "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");
        conf.set("mapred.task.id", "job_201208090841_013302");
        PentahoMapRunnable mapRunnable = new PentahoMapRunnable();

        mapRunnable.configure(conf);

        Field variableSpaceField = PentahoMapRunnable.class.getDeclaredField("variableSpace");
        variableSpaceField.setAccessible(true);
        VariableSpace variableSpace = (VariableSpace) variableSpaceField.get(mapRunnable);
        String s = variableSpace.getVariable("Internal.Hadoop.NodeNumber");
        assertEquals("13302", s);
    }
}