org.apache.avro.mapreduce.TestAvroKeyOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.avro.mapreduce.TestAvroKeyOutputFormat.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 */

package org.apache.avro.mapreduce;

import static org.easymock.EasyMock.*;
import static org.junit.Assert.*;

import java.io.IOException;
import java.io.OutputStream;

import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.generic.GenericData;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.easymock.Capture;
import org.easymock.EasyMock;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

public class TestAvroKeyOutputFormat {
    @Rule
    public TemporaryFolder mTempDir = new TemporaryFolder();

    @Test
    public void testWithNullCodec() throws IOException {
        Configuration conf = new Configuration();
        testGetRecordWriter(conf, CodecFactory.nullCodec());
    }

    @Test
    public void testWithDeflateCodec() throws IOException {
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.output.compress", true);
        conf.setInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, 3);
        testGetRecordWriter(conf, CodecFactory.deflateCodec(3));
    }

    @Test
    public void testWithSnappyCode() throws IOException {
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.output.compress", true);
        conf.set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC);
        testGetRecordWriter(conf, CodecFactory.snappyCodec());
    }

    @Test
    public void testWithBZip2Code() throws IOException {
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.output.compress", true);
        conf.set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.BZIP2_CODEC);
        testGetRecordWriter(conf, CodecFactory.bzip2Codec());
    }

    @Test
    public void testWithDeflateCodeWithHadoopConfig() throws IOException {
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.output.compress", true);
        conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DeflateCodec");
        conf.setInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, -1);
    }

    @Test
    public void testWithSnappyCodeWithHadoopConfig() throws IOException {
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.output.compress", true);
        conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        testGetRecordWriter(conf, CodecFactory.snappyCodec());
    }

    @Test
    public void testWithBZip2CodeWithHadoopConfig() throws IOException {
        Configuration conf = new Configuration();
        conf.setBoolean("mapred.output.compress", true);
        conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        testGetRecordWriter(conf, CodecFactory.bzip2Codec());
    }

    /**
     * Tests that the record writer is constructed and returned correctly from the output format.
     */
    private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException {
        // Configure a mock task attempt context.
        Job job = new Job(conf);
        job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath());
        Schema writerSchema = Schema.create(Schema.Type.INT);
        AvroJob.setOutputKeySchema(job, writerSchema);
        TaskAttemptContext context = createMock(TaskAttemptContext.class);
        expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes();
        expect(context.getTaskAttemptID()).andReturn(TaskAttemptID.forName("attempt_200707121733_0001_m_000000_0"))
                .anyTimes();
        expect(context.getNumReduceTasks()).andReturn(1);

        // Create a mock record writer.
        @SuppressWarnings("unchecked")
        RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class);
        AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock(
                AvroKeyOutputFormat.RecordWriterFactory.class);

        // Expect the record writer factory to be called with appropriate parameters.
        Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>();
        expect(recordWriterFactory.create(eq(writerSchema), anyObject(GenericData.class),
                capture(capturedCodecFactory), // Capture for comparison later.
                anyObject(OutputStream.class))).andReturn(expectedRecordWriter);

        replay(context);
        replay(expectedRecordWriter);
        replay(recordWriterFactory);

        AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory);
        RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context);
        // Make sure the expected codec was used.
        assertTrue(capturedCodecFactory.hasCaptured());
        assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString());

        verify(context);
        verify(expectedRecordWriter);
        verify(recordWriterFactory);

        assertNotNull(recordWriter);
        assertTrue(expectedRecordWriter == recordWriter);
    }
}