gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.runtime.mapreduce;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.List;
import java.util.Set;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystemTestUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.testng.Assert;
import org.testng.annotations.Test;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import static org.testng.Assert.*;

public class GobblinWorkUnitsInputFormatTest {
    @Test
    public void testGetSplits() throws Exception {

        URI baseUri = new URI(GobblinWorkUnitsInputFormatTest.class.getSimpleName() + "://testGetSplits");
        Configuration configuration = new Configuration();

        Path workUnitsDir = new Path(new Path(baseUri), "/workUnits");

        FileSystem fs = Mockito.mock(FileSystem.class);
        FileStatus[] statuses = createFileStatuses(20, workUnitsDir);
        Mockito.when(fs.listStatus(workUnitsDir)).thenReturn(statuses);
        Mockito.when(fs.makeQualified(Mockito.any(Path.class))).thenAnswer(new Answer<Path>() {
            @Override
            public Path answer(InvocationOnMock invocation) throws Throwable {
                return (Path) invocation.getArguments()[0];
            }
        });

        FileSystemTestUtils.addFileSystemForTest(baseUri, configuration, fs);

        GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
        Job job = Job.getInstance(configuration);
        FileInputFormat.addInputPath(job, workUnitsDir);

        List<InputSplit> splits = inputFormat.getSplits(job);

        Assert.assertEquals(splits.size(), 20);
        verifyPaths(splits, statuses);
    }

    @Test
    public void testGetSplitsMaxSize() throws Exception {

        URI baseUri = new URI(GobblinWorkUnitsInputFormatTest.class.getSimpleName() + "://testGetSplitsMaxSize");
        Configuration configuration = new Configuration();

        Path workUnitsDir = new Path(new Path(baseUri), "/workUnits");

        FileSystem fs = Mockito.mock(FileSystem.class);
        FileStatus[] statuses = createFileStatuses(20, workUnitsDir);
        Mockito.when(fs.listStatus(workUnitsDir)).thenReturn(statuses);
        Mockito.when(fs.makeQualified(Mockito.any(Path.class))).thenAnswer(new Answer<Path>() {
            @Override
            public Path answer(InvocationOnMock invocation) throws Throwable {
                return (Path) invocation.getArguments()[0];
            }
        });

        FileSystemTestUtils.addFileSystemForTest(baseUri, configuration, fs);

        GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
        Job job = Job.getInstance(configuration);
        FileInputFormat.addInputPath(job, workUnitsDir);
        GobblinWorkUnitsInputFormat.setMaxMappers(job, 6);

        List<InputSplit> splits = inputFormat.getSplits(job);

        Assert.assertTrue(splits.size() < 6);
        verifyPaths(splits, statuses);
    }

    @Test
    public void testSplit() throws Exception {
        List<String> paths = Lists.newArrayList("/path1", "/path2");
        GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);

        ByteArrayOutputStream os = new ByteArrayOutputStream();
        split.write(new DataOutputStream(os));

        ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
        GobblinWorkUnitsInputFormat.GobblinSplit deserSplit = new GobblinWorkUnitsInputFormat.GobblinSplit();
        deserSplit.readFields(new DataInputStream(is));

        Assert.assertEquals(split, deserSplit);
    }

    @Test
    public void testRecordReader() throws Exception {

        List<String> paths = Lists.newArrayList("/path1", "/path2");
        GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);

        GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
        RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split,
                new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1)));

        recordReader.nextKeyValue();
        Assert.assertEquals(recordReader.getCurrentKey().get(), 0);
        Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1");

        recordReader.nextKeyValue();
        Assert.assertEquals(recordReader.getCurrentKey().get(), 1);
        Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2");

        Assert.assertFalse(recordReader.nextKeyValue());

    }

    private void verifyPaths(List<InputSplit> splits, FileStatus[] statuses) {
        Set<String> splitPaths = Sets.newHashSet();
        for (InputSplit split : splits) {
            splitPaths.addAll(((GobblinWorkUnitsInputFormat.GobblinSplit) split).getPaths());
        }

        Set<String> statusPaths = Sets.newHashSet();
        for (FileStatus status : statuses) {
            statusPaths.add(status.getPath().toString());
        }

        Assert.assertEquals(splitPaths, statusPaths);
    }

    private FileStatus[] createFileStatuses(int howMany, Path basePath) {
        FileStatus[] statuses = new FileStatus[howMany];
        for (int i = 0; i < howMany; i++) {
            statuses[i] = new FileStatus(0, false, 0, 0, 0, new Path(basePath, "file" + i));
        }
        return statuses;
    }
}