org.apache.flink.test.hadoop.mapred.HadoopIOFormatsITCase.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flink.test.hadoop.mapred.HadoopIOFormatsITCase.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.test.hadoop.mapred;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.api.java.hadoop.mapred.HadoopInputFormat;
import org.apache.flink.test.util.JavaProgramTestBase;
import org.apache.flink.test.util.TestBaseUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.LinkedList;

@RunWith(Parameterized.class)
public class HadoopIOFormatsITCase extends JavaProgramTestBase {

    private static int NUM_PROGRAMS = 2;

    private int curProgId = config.getInteger("ProgramId", -1);
    private String[] resultPath;
    private String[] expectedResult;
    private String sequenceFileInPath;
    private String sequenceFileInPathNull;

    public HadoopIOFormatsITCase(Configuration config) {
        super(config);
    }

    @Override
    protected void preSubmit() throws Exception {
        resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };

        File sequenceFile = createAndRegisterTempFile("seqFile");
        sequenceFileInPath = sequenceFile.toURI().toString();

        // Create a sequence file
        org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
        FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
        Path path = new Path(sequenceFile.getAbsolutePath());

        //  ------------------ Long / Text Key Value pair: ------------
        int kvCount = 4;

        LongWritable key = new LongWritable();
        Text value = new Text();
        SequenceFile.Writer writer = null;
        try {
            writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
            for (int i = 0; i < kvCount; i++) {
                if (i == 1) {
                    // write key = 0 a bit more often.
                    for (int a = 0; a < 15; a++) {
                        key.set(i);
                        value.set(i + " - somestring");
                        writer.append(key, value);
                    }
                }
                key.set(i);
                value.set(i + " - somestring");
                writer.append(key, value);
            }
        } finally {
            IOUtils.closeStream(writer);
        }

        //  ------------------ Long / Text Key Value pair: ------------

        File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
        sequenceFileInPathNull = sequenceFileNull.toURI().toString();
        path = new Path(sequenceFileInPathNull);

        LongWritable value1 = new LongWritable();
        SequenceFile.Writer writer1 = null;
        try {
            writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
            for (int i = 0; i < kvCount; i++) {
                value1.set(i);
                writer1.append(NullWritable.get(), value1);
            }
        } finally {
            IOUtils.closeStream(writer1);
        }
    }

    @Override
    protected void testProgram() throws Exception {
        expectedResult = HadoopIOFormatPrograms.runProgram(curProgId, resultPath, sequenceFileInPath,
                sequenceFileInPathNull);
    }

    @Override
    protected void postSubmit() throws Exception {
        for (int i = 0; i < resultPath.length; i++) {
            compareResultsByLinesInMemory(expectedResult[i], resultPath[i]);
        }
    }

    @Parameters
    public static Collection<Object[]> getConfigurations() throws FileNotFoundException, IOException {

        LinkedList<Configuration> tConfigs = new LinkedList<Configuration>();

        for (int i = 1; i <= NUM_PROGRAMS; i++) {
            Configuration config = new Configuration();
            config.setInteger("ProgramId", i);
            tConfigs.add(config);
        }

        return TestBaseUtils.toParameterList(tConfigs);
    }

    public static class HadoopIOFormatPrograms {

        public static String[] runProgram(int progId, String resultPath[], String sequenceFileInPath,
                String sequenceFileInPathNull) throws Exception {

            switch (progId) {
            case 1: {
                /**
                 * Test sequence file, including a key access.
                 */
                final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

                SequenceFileInputFormat<LongWritable, Text> sfif = new SequenceFileInputFormat<LongWritable, Text>();
                JobConf hdconf = new JobConf();
                SequenceFileInputFormat.addInputPath(hdconf, new Path(sequenceFileInPath));
                HadoopInputFormat<LongWritable, Text> hif = new HadoopInputFormat<LongWritable, Text>(sfif,
                        LongWritable.class, Text.class, hdconf);
                DataSet<Tuple2<LongWritable, Text>> ds = env.createInput(hif);
                DataSet<Tuple2<Long, Text>> sumed = ds
                        .map(new MapFunction<Tuple2<LongWritable, Text>, Tuple2<Long, Text>>() {
                            @Override
                            public Tuple2<Long, Text> map(Tuple2<LongWritable, Text> value) throws Exception {
                                return new Tuple2<Long, Text>(value.f0.get(), value.f1);
                            }
                        }).sum(0);
                sumed.writeAsText(resultPath[0]);
                DataSet<String> res = ds.distinct(0).map(new MapFunction<Tuple2<LongWritable, Text>, String>() {
                    @Override
                    public String map(Tuple2<LongWritable, Text> value) throws Exception {
                        return value.f1 + " - " + value.f0.get();
                    }
                });
                res.writeAsText(resultPath[1]);
                env.execute();

                // return expected result
                return new String[] { "(21,3 - somestring)", "0 - somestring - 0\n" + "1 - somestring - 1\n"
                        + "2 - somestring - 2\n" + "3 - somestring - 3\n" };

            }
            case 2: {
                final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

                SequenceFileInputFormat<NullWritable, LongWritable> sfif = new SequenceFileInputFormat<NullWritable, LongWritable>();
                JobConf hdconf = new JobConf();
                SequenceFileInputFormat.addInputPath(hdconf, new Path(sequenceFileInPathNull));
                HadoopInputFormat<NullWritable, LongWritable> hif = new HadoopInputFormat<NullWritable, LongWritable>(
                        sfif, NullWritable.class, LongWritable.class, hdconf);
                DataSet<Tuple2<NullWritable, LongWritable>> ds = env.createInput(hif);
                DataSet<Tuple2<Void, Long>> res = ds
                        .map(new MapFunction<Tuple2<NullWritable, LongWritable>, Tuple2<Void, Long>>() {
                            @Override
                            public Tuple2<Void, Long> map(Tuple2<NullWritable, LongWritable> value)
                                    throws Exception {
                                return new Tuple2<Void, Long>(null, value.f1.get());
                            }
                        });
                DataSet<Tuple2<Void, Long>> res1 = res.groupBy(1).sum(1);
                res1.writeAsText(resultPath[1]);
                res.writeAsText(resultPath[0]);
                env.execute();

                // return expected result
                return new String[] { "(null,2)\n" + "(null,0)\n" + "(null,1)\n" + "(null,3)",
                        "(null,0)\n" + "(null,1)\n" + "(null,2)\n" + "(null,3)" };
            }
            default:
                throw new IllegalArgumentException("Invalid program id");
            }

        }

    }
}