com.datasalt.utils.mapred.joiner.TestJoinOneToMany.java Source code

Java tutorial

Introduction

Here is the source code for com.datasalt.utils.mapred.joiner.TestJoinOneToMany.java

Source

/**
 * Copyright [2011] [Datasalt Systems S.L.]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.datasalt.utils.mapred.joiner;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.junit.AfterClass;
import org.junit.Test;

import com.datasalt.utils.commons.HadoopUtils;
import com.datasalt.utils.commons.test.BaseTest;
import com.datasalt.utils.mapred.joiner.JoinOneToMany;
import com.datasalt.utils.mapred.joiner.JoinOneToMany.ManySideMapper;
import com.datasalt.utils.mapred.joiner.JoinOneToMany.OneSideMapper;
import com.datasalt.utils.mapred.joiner.JoinOneToMany.OneToManyReducer;
import com.google.common.io.Files;

public class TestJoinOneToMany extends BaseTest {

    final static String OUTPUT = "output-" + TestJoinOneToMany.class;
    final static String INPUT1 = "input1-" + TestJoinOneToMany.class;
    final static String INPUT2 = "input2-" + TestJoinOneToMany.class;

    static int firstWasSecondClass = 0;
    static boolean noSecondClass = false;

    public static class OneMap extends OneSideMapper<LongWritable, Text> {

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            emit("KeyA", new IntWritable(1));
            emit("KeyB", new IntWritable(2));
            emit("KeyC", new IntWritable(3));
        }
    }

    public static class ManyMap extends ManySideMapper<LongWritable, Text> {

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            emit("KeyA", new Text("foo"));
            emit("KeyA", new Text("bar"));
            emit("KeyB", new Text("oh la la"));
            emit("KeyB", new Text("blah blah"));
            emit("KeyD", new Text("bluu"));
        }
    }

    public static class Reducer extends OneToManyReducer<IntWritable, Text, Text, NullWritable> {

        @Override
        protected void onNoOneSideItem(Context ctx) throws IOException, InterruptedException {
            firstWasSecondClass++;
        }

        @Override
        protected void onNoManySideItems(Context ctx) throws IOException, InterruptedException {
            noSecondClass = true;
        }

        @Override
        protected void onPair(IntWritable firstItem, Text secondItem, Context ctx)
                throws IOException, InterruptedException {
            secondItem = (secondItem == null) ? new Text("snull") : secondItem;
            firstItem = (firstItem == null) ? new IntWritable(-1) : firstItem;
            ctx.write(new Text(firstItem + " " + secondItem), NullWritable.get());
        }
    }

    protected Job getMultiJoiner(Configuration conf)
            throws IOException, InterruptedException, ClassNotFoundException {
        JoinOneToMany multiJoiner = new JoinOneToMany("MultiJoiner Test", conf);
        multiJoiner.setReducer(Reducer.class);
        multiJoiner.setOutputKeyClass(Text.class);
        multiJoiner.setOutputValueClass(NullWritable.class);
        multiJoiner.setOutputFormat(TextOutputFormat.class);
        multiJoiner.setOutputPath(new Path(OUTPUT));

        Job job = multiJoiner.setManySideClass(Text.class).setOneSideClass(IntWritable.class)
                .addManySideInput(new Path(INPUT1), TextInputFormat.class, ManyMap.class)
                .addOneSideInput(new Path(INPUT2), TextInputFormat.class, OneMap.class).getJob();

        return job;
    }

    @Test
    public void test() throws IOException, InterruptedException, ClassNotFoundException {
        File input = new File(INPUT1);
        if (input.exists()) {
            while (!input.delete())
                ;
        }
        Files.write("line", input, Charset.defaultCharset());
        input = new File(INPUT2);
        if (input.exists()) {
            while (!input.delete())
                ;
        }
        Files.write("line", input, Charset.defaultCharset());

        Configuration conf = getConf();
        Job job = getMultiJoiner(conf);
        job.waitForCompletion(true);
        assertTrue(job.isSuccessful());

        File out = new File(OUTPUT, "part-r-00000");
        List<String> lines = Files.readLines(out, Charset.defaultCharset());
        System.out.println(lines);

        assertEquals(6, lines.size());
        assertTrue(lines.contains("1 foo"));
        assertTrue(lines.contains("1 bar"));
        assertTrue(lines.contains("2 oh la la"));
        assertTrue(lines.contains("2 blah blah"));
        assertTrue(lines.contains("-1 bluu"));
        assertTrue(lines.contains("3 snull"));

        assertTrue(firstWasSecondClass == 1);
        assertTrue(noSecondClass == true);

        cleanUp();
    }

    @AfterClass
    public static void cleanUp() throws IOException {
        Configuration conf = new Configuration();
        HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(OUTPUT));
        HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(INPUT1));
        HadoopUtils.deleteIfExists(FileSystem.get(conf), new Path(INPUT2));
    }
}