it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java Source code

Java tutorial

Introduction

Here is the source code for it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//package org.apache.hadoop.mapred.pipes;

package it.crs4.pydoop.mapreduce.pipes;

import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.RawKeyValueIterator;
import org.apache.hadoop.mapred.IFile.Writer;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MapContext;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.task.MapContextImpl;
import org.apache.hadoop.mapreduce.task.ReduceContextImpl;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.StatusReporter;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.junit.Assert;
import org.junit.Test;

import static org.junit.Assert.*;

public class TestPipeApplication {
    private static File workSpace = new File("target", TestPipeApplication.class.getName() + "-workSpace");

    private static String taskName = "attempt_001_02_r03_04_05";

    /**
     * test PipesMapRunner    test the transfer data from reader
     *
     * @throws Exception
     */
    @Test
    public void testRunner() throws Exception {
        // clean old password files
        File[] psw = cleanTokenPasswordFile();
        try {
            JobID jobId = new JobID("201408272347", 0);
            TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
            TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

            Job job = new Job(new Configuration());
            job.setJobID(jobId);
            Configuration conf = job.getConfiguration();
            conf.set(Submitter.IS_JAVA_RR, "true");
            conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
            job.setInputFormatClass(DummyInputFormat.class);
            FileSystem fs = new RawLocalFileSystem();
            fs.setConf(conf);

            DummyInputFormat input_format = new DummyInputFormat();
            List<InputSplit> isplits = input_format.getSplits(job);

            InputSplit isplit = isplits.get(0);

            TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

            RecordReader<FloatWritable, NullWritable> rReader = input_format.createRecordReader(isplit, tcontext);

            TestMapContext context = new TestMapContext(conf, taskAttemptid, rReader, null, null, null, isplit);
            // stub for client
            File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
            conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
            // token for authorization
            Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                    "password".getBytes(), new Text("kind"), new Text("service"));
            TokenCache.setJobToken(token, job.getCredentials());
            conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
            PipesMapper<FloatWritable, NullWritable, IntWritable, Text> mapper = new PipesMapper<FloatWritable, NullWritable, IntWritable, Text>(
                    context);

            initStdOut(conf);
            mapper.run(context);
            String stdOut = readStdOut(conf);

            // test part of translated data. As common file for client and test -
            // clients stdOut
            // check version
            assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0"));
            // check key and value classes
            assertTrue(stdOut.contains("Key class:org.apache.hadoop.io.FloatWritable"));
            assertTrue(stdOut.contains("Value class:org.apache.hadoop.io.NullWritable"));
            // test have sent all data from reader
            assertTrue(stdOut.contains("value:0.0"));
            assertTrue(stdOut.contains("value:9.0"));

        } finally {
            if (psw != null) {
                // remove password files
                for (File file : psw) {
                    file.deleteOnExit();
                }
            }
        }
    }

    /**
     * test org.apache.hadoop.mapreduce.pipes.Application
     * test a internal functions: 
     *     MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
     *
     * @throws Throwable
     */

    @Test
    public void testApplication() throws Throwable {

        System.err.println("testApplication");

        File[] psw = cleanTokenPasswordFile();
        try {
            JobID jobId = new JobID("201408272347", 0);
            TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
            TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

            Job job = new Job(new Configuration());
            job.setJobID(jobId);
            Configuration conf = job.getConfiguration();
            conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
            FileSystem fs = new RawLocalFileSystem();
            fs.setConf(conf);

            File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationStub");
            //getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
            conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
            System.err.println("fCommand" + fCommand.getAbsolutePath());

            Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                    "password".getBytes(), new Text("kind"), new Text("service"));
            TokenCache.setJobToken(token, job.getCredentials());
            conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

            TestReporter reporter = new TestReporter();
            DummyInputFormat input_format = new DummyInputFormat();
            List<InputSplit> isplits = input_format.getSplits(job);
            InputSplit isplit = isplits.get(0);
            TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

            DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(isplit, tcontext);

            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(Text.class);

            RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                    new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

            MapContextImpl<IntWritable, Text, IntWritable, Text> context = new MapContextImpl<IntWritable, Text, IntWritable, Text>(
                    conf, taskAttemptid, null, writer, null, reporter, null);

            System.err.println("ready to launch application");
            Application<IntWritable, Text, IntWritable, Text> application = new Application<IntWritable, Text, IntWritable, Text>(
                    context, reader);
            System.err.println("done");

            application.getDownlink().flush();
            application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));
            application.getDownlink().flush();
            application.waitForFinish();

            // test getDownlink().mapItem();
            String stdOut = readStdOut(conf);
            assertTrue(stdOut.contains("key:3"));
            assertTrue(stdOut.contains("value:txt"));

            assertEquals(0.0, context.getProgress(), 0.01);
            assertNotNull(context.getCounter("group", "name"));

            // test status MessageType.STATUS
            assertEquals(context.getStatus(), "PROGRESS");
            // check MessageType.PROGRESS
            assertEquals(0.55f, reader.getProgress(), 0.001);
            application.getDownlink().close();
            // test MessageType.OUTPUT
            stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile"));
            assertTrue(stdOut.contains("key:123"));
            assertTrue(stdOut.contains("value:value"));
            try {
                // try to abort
                application.abort(new Throwable());
                fail();
            } catch (IOException e) {
                // abort works ?
                assertEquals("pipe child exception", e.getMessage());
            }
        } finally {
            if (psw != null) {
                // remove password files
                for (File file : psw) {
                    file.deleteOnExit();
                }
            }
        }
    }

    /**
     * test org.apache.hadoop.mapreduce.pipes.Submitter
     *
     * @throws Exception
     */
    @Test
    public void testSubmitter() throws Exception {

        Configuration conf = new Configuration();

        File[] psw = cleanTokenPasswordFile();

        System.setProperty("test.build.data", "target/tmp/build/TEST_SUBMITTER_MAPPER/data");
        conf.set("hadoop.log.dir", "target/tmp");

        // prepare configuration
        Submitter.setIsJavaMapper(conf, false);
        Submitter.setIsJavaReducer(conf, false);
        Submitter.setKeepCommandFile(conf, false);
        Submitter.setIsJavaRecordReader(conf, false);
        Submitter.setIsJavaRecordWriter(conf, false);
        PipesPartitioner<IntWritable, Text> partitioner = new PipesPartitioner<IntWritable, Text>();
        partitioner.configure(conf);

        Submitter.setJavaPartitioner(conf, partitioner.getClass());

        assertEquals(PipesPartitioner.class, (Submitter.getJavaPartitioner(conf)));
        // test going to call main method with System.exit(). Change Security
        SecurityManager securityManager = System.getSecurityManager();
        // store System.out
        PrintStream oldps = System.out;
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        ExitUtil.disableSystemExit();
        // test without parameters
        try {
            System.setOut(new PrintStream(out));
            Submitter.main(new String[0]);
            fail();
        } catch (ExitUtil.ExitException e) {
            // System.exit prohibited! output message test
            assertTrue(out.toString().contains(""));
            assertTrue(out.toString().contains("bin/hadoop pipes"));
            assertTrue(out.toString().contains("[-input <path>] // Input directory"));
            assertTrue(out.toString().contains("[-output <path>] // Output directory"));
            assertTrue(out.toString().contains("[-jar <jar file> // jar filename"));
            assertTrue(out.toString().contains("[-inputformat <class>] // InputFormat class"));
            assertTrue(out.toString().contains("[-map <class>] // Java Map class"));
            assertTrue(out.toString().contains("[-partitioner <class>] // Java Partitioner"));
            assertTrue(out.toString().contains("[-reduce <class>] // Java Reduce class"));
            assertTrue(out.toString().contains("[-writer <class>] // Java RecordWriter"));
            assertTrue(out.toString().contains("[-program <executable>] // executable URI"));
            assertTrue(out.toString().contains("[-reduces <num>] // number of reduces"));
            assertTrue(out.toString().contains("[-lazyOutput <true/false>] // createOutputLazily"));

            assertTrue(out.toString()
                    .contains("-conf <configuration file>     specify an application configuration file"));
            assertTrue(out.toString().contains("-D <property=value>            use value for given property"));
            assertTrue(out.toString().contains("-fs <local|namenode:port>      specify a namenode"));
            assertTrue(out.toString().contains("-jt <local|jobtracker:port>    specify a job tracker"));
            assertTrue(out.toString().contains(
                    "-files <comma separated list of files>    specify comma separated files to be copied to the map reduce cluster"));
            assertTrue(out.toString().contains(
                    "-libjars <comma separated list of jars>    specify comma separated jar files to include in the classpath."));
            assertTrue(out.toString().contains(
                    "-archives <comma separated list of archives>    specify comma separated archives to be unarchived on the compute machines."));
        } finally {
            System.setOut(oldps);
            // restore
            System.setSecurityManager(securityManager);
            if (psw != null) {
                // remove password files
                for (File file : psw) {
                    file.deleteOnExit();
                }
            }
        }
        // test call Submitter form command line
        try {
            File fCommand = getFileCommand(null);
            String[] args = new String[20];
            File input = new File(workSpace + File.separator + "input");
            if (!input.exists()) {
                Assert.assertTrue(input.createNewFile());
            }
            File outPut = new File(workSpace + File.separator + "output");
            FileUtil.fullyDelete(outPut);

            args[0] = "-input";
            args[1] = input.getAbsolutePath();// "input";
            args[2] = "-output";
            args[3] = outPut.getAbsolutePath();// "output";
            args[4] = "-inputformat";
            args[5] = "org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
            args[6] = "-map";
            args[7] = "org.apache.hadoop.mapreduce.lib.map.InverseMapper";
            args[8] = "-partitioner";
            args[9] = "it.crs4.pydoop.mapreduce.pipes.PipesPartitioner";
            args[10] = "-reduce";
            args[11] = "org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer";
            args[12] = "-writer";
            args[13] = "org.apache.hadoop.mapreduce.lib.output.TextOutputFormat";
            args[14] = "-program";
            args[15] = fCommand.getAbsolutePath();// "program";
            args[16] = "-reduces";
            args[17] = "2";
            args[18] = "-lazyOutput";
            args[19] = "lazyOutput";
            Submitter.main(args);
            fail();
        } catch (ExitUtil.ExitException e) {
            // status should be 0
            assertEquals(e.status, 0);

        } finally {
            System.setOut(oldps);
            System.setSecurityManager(securityManager);
        }

    }

    /**
     * test org.apache.hadoop.mapreduce.pipes.PipesReducer
     * test the transfer of data: key and value
     *
     * @throws Exception
     */
    @Test
    public void testPipesReducer() throws Exception {
        System.err.println("testPipesReducer");

        File[] psw = cleanTokenPasswordFile();
        try {
            JobID jobId = new JobID("201408272347", 0);
            TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
            TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

            Job job = new Job(new Configuration());
            job.setJobID(jobId);
            Configuration conf = job.getConfiguration();
            conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
            FileSystem fs = new RawLocalFileSystem();
            fs.setConf(conf);

            File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub");
            conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
            System.err.println("fCommand" + fCommand.getAbsolutePath());

            Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                    "password".getBytes(), new Text("kind"), new Text("service"));
            TokenCache.setJobToken(token, job.getCredentials());
            conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

            TestReporter reporter = new TestReporter();
            DummyInputFormat input_format = new DummyInputFormat();
            List<InputSplit> isplits = input_format.getSplits(job);
            InputSplit isplit = isplits.get(0);
            TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

            RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                    new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

            BooleanWritable bw = new BooleanWritable(true);
            List<Text> texts = new ArrayList<Text>();
            texts.add(new Text("first"));
            texts.add(new Text("second"));
            texts.add(new Text("third"));

            DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator();

            ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>(
                    conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class,
                    Text.class);

            PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
            reducer.setup(context);

            initStdOut(conf);
            reducer.reduce(bw, texts, context);
            reducer.cleanup(context);
            String stdOut = readStdOut(conf);

            // test data: key
            assertTrue(stdOut.contains("reducer key :true"));
            // and values
            assertTrue(stdOut.contains("reduce value  :first"));
            assertTrue(stdOut.contains("reduce value  :second"));
            assertTrue(stdOut.contains("reduce value  :third"));

        } finally {
            if (psw != null) {
                // remove password files
                for (File file : psw) {
                    file.deleteOnExit();
                }
            }
        }

    }

    /**
     * test PipesPartitioner
     * test set and get data from  PipesPartitioner
     */
    @Test
    public void testPipesPartitioner() {

        PipesPartitioner<IntWritable, Text> partitioner = new PipesPartitioner<IntWritable, Text>();
        Configuration configuration = new Configuration();
        Submitter.getJavaPartitioner(configuration);
        partitioner.configure(new Configuration());
        IntWritable iw = new IntWritable(4);
        // the cache empty
        assertEquals(0, partitioner.getPartition(iw, new Text("test"), 2));
        // set data into cache
        PipesPartitioner.setNextPartition(3);
        // get data from cache
        assertEquals(3, partitioner.getPartition(iw, new Text("test"), 2));
    }

    /**
     * clean previous std error and outs
     */

    private void initStdOut(Configuration configuration) {
        TaskAttemptID taskId = TaskAttemptID.forName(configuration.get(MRJobConfig.TASK_ATTEMPT_ID));
        File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT);
        File stdErr = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDERR);
        // prepare folder
        if (!stdOut.getParentFile().exists()) {
            stdOut.getParentFile().mkdirs();
        } else { // clean logs
            stdOut.deleteOnExit();
            stdErr.deleteOnExit();
        }
    }

    private String readStdOut(Configuration conf) throws Exception {
        TaskAttemptID taskId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));
        File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT);

        return readFile(stdOut);

    }

    private String readFile(File file) throws Exception {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        InputStream is = new FileInputStream(file);
        byte[] buffer = new byte[1024];
        int counter = 0;
        while ((counter = is.read(buffer)) >= 0) {
            out.write(buffer, 0, counter);
        }

        is.close();

        return out.toString();

    }

    private class Progress implements Progressable {

        @Override
        public void progress() {

        }

    }

    private File[] cleanTokenPasswordFile() throws Exception {
        File[] result = new File[2];
        result[0] = new File("./jobTokenPassword");
        if (result[0].exists()) {
            FileUtil.chmod(result[0].getAbsolutePath(), "700");
            assertTrue(result[0].delete());
        }
        result[1] = new File("./.jobTokenPassword.crc");
        if (result[1].exists()) {
            FileUtil.chmod(result[1].getAbsolutePath(), "700");
            result[1].delete();
        }
        return result;
    }

    private File getFileCommand(String clazz) throws Exception {
        String classpath = System.getProperty("java.class.path");
        File fCommand = new File(workSpace + File.separator + "cache.sh");
        fCommand.deleteOnExit();
        if (!fCommand.getParentFile().exists()) {
            fCommand.getParentFile().mkdirs();
        }
        fCommand.createNewFile();
        OutputStream os = new FileOutputStream(fCommand);
        os.write("#!/bin/sh \n".getBytes());
        if (clazz == null) {
            os.write(("ls ").getBytes());
        } else {
            os.write(("java -cp " + classpath + " " + clazz).getBytes());
        }
        os.flush();
        os.close();
        FileUtil.chmod(fCommand.getAbsolutePath(), "700");
        return fCommand;
    }

    private class TestRecordWriter extends RecordWriter<IntWritable, Text> {
        private OutputStream os;

        public TestRecordWriter(OutputStream os) {
            this.os = os;
        }

        @Override
        public void write(IntWritable key, Text value) throws IOException, InterruptedException {
            os.write(("key:" + key + "\n").getBytes());
            os.write(("value:" + value + "\n").getBytes());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            os.close();
        }
    }

    private class TestReporter extends StatusReporter {
        private String status;
        private float progress;

        public Counter getCounter(Enum<?> name) {
            return new Counters().findCounter(name);
        }

        public Counter getCounter(String group, String name) {
            return new Counters().findCounter(group, name);
        }

        public void progress() {
        }

        public float getProgress() {
            progress = 0.0f;
            return progress;
        }

        public void setStatus(String status) {
        }
    }

    private class TestMapContext extends MapContextImpl<FloatWritable, NullWritable, IntWritable, Text> {

        TestMapContext(Configuration conf, TaskAttemptID tid, RecordReader<FloatWritable, NullWritable> reader,
                RecordWriter<IntWritable, Text> writer, OutputCommitter committer, StatusReporter reporter,
                InputSplit split) {
            super(conf, tid, reader, writer, committer, reporter, split);
        }
    }

    private class DummyRawKeyValueIterator implements RawKeyValueIterator {
        public DataInputBuffer getKey() throws IOException {
            return null;
        }

        public DataInputBuffer getValue() throws IOException {
            return null;
        }

        public boolean next() throws IOException {
            return true;
        }

        public void close() throws IOException {
        }

        public org.apache.hadoop.util.Progress getProgress() {
            return null;
        }
    }
}