eu.stratosphere.test.operators.UnionITCase.java Source code

Java tutorial

Introduction

Here is the source code for eu.stratosphere.test.operators.UnionITCase.java

Source

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.test.operators;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.functions.MapFunction;
import eu.stratosphere.api.java.record.io.DelimitedInputFormat;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.test.operators.io.ContractITCaseIOFormats.ContractITCaseInputFormat;
import eu.stratosphere.test.operators.io.ContractITCaseIOFormats.ContractITCaseOutputFormat;
import eu.stratosphere.test.util.RecordAPITestBase;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;
import eu.stratosphere.util.Collector;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;
import java.util.LinkedList;

@RunWith(Parameterized.class)
public class UnionITCase extends RecordAPITestBase {
    private static final Log LOG = LogFactory.getLog(UnionITCase.class);

    String inPath = null;
    String emptyInPath = null;
    String resultPath = null;

    public UnionITCase(Configuration testConfig) {
        super(testConfig);
    }

    private static final String IN = "1 1\n2 2\n2 8\n4 4\n4 4\n6 6\n7 7\n8 8\n1 1\n"
            + "2 2\n2 2\n4 4\n4 4\n6 3\n5 9\n8 8\n1 1\n2 2\n2 2\n3 0\n4 4\n5 9\n7 7\n8 8\n"
            + "1 1\n9 1\n5 9\n4 4\n4 4\n6 6\n7 7\n8 8\n";

    private static final String RESULT = "1 11\n2 12\n4 14\n4 14\n1 11\n2 12\n2 12\n"
            + "4 14\n4 14\n3 16\n1 11\n2 12\n2 12\n0 13\n4 14\n1 11\n4 14\n4 14\n";

    private static final String EMPTY_RESULT = "";

    private static final String MAP_RESULT_TWICE = "1 11\n2 12\n4 14\n4 14\n1 11\n2 12\n2 12\n4 14\n4 14\n3 16\n1 11\n2 12\n2 12\n0 13\n4 14\n1 11\n4 14\n4 14\n"
            + "1 11\n2 12\n4 14\n4 14\n1 11\n2 12\n2 12\n4 14\n4 14\n3 16\n1 11\n2 12\n2 12\n0 13\n4 14\n1 11\n4 14\n4 14\n";

    @Override
    protected void preSubmit() throws Exception {
        inPath = createTempFile("in.txt", IN);
        emptyInPath = createTempFile("empty_in.txt", "");
        resultPath = getTempDirPath("result");
    }

    public static class TestMapper extends MapFunction implements Serializable {
        private static final long serialVersionUID = 1L;

        private StringValue keyString = new StringValue();
        private StringValue valueString = new StringValue();

        @Override
        public void map(Record record, Collector<Record> out) throws Exception {
            keyString = record.getField(0, keyString);
            valueString = record.getField(1, valueString);

            if (LOG.isDebugEnabled())
                LOG.debug("Processed: [" + keyString.toString() + "," + valueString.getValue() + "]");

            if (Integer.parseInt(keyString.toString()) + Integer.parseInt(valueString.toString()) < 10) {

                record.setField(0, valueString);
                record.setField(1, new IntValue(Integer.parseInt(keyString.toString()) + 10));

                out.collect(record);
            }

        }
    }

    @SuppressWarnings({ "deprecation", "unchecked" })
    @Override
    protected Plan getTestJob() {
        String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath;
        String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath;

        FileDataSource input1 = new FileDataSource(new ContractITCaseInputFormat(), input1Path);
        DelimitedInputFormat.configureDelimitedFormat(input1).recordDelimiter('\n');
        input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));

        FileDataSource input2 = new FileDataSource(new ContractITCaseInputFormat(), input2Path);
        DelimitedInputFormat.configureDelimitedFormat(input2).recordDelimiter('\n');
        input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));

        MapOperator testMapper = MapOperator.builder(new TestMapper()).build();
        testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));

        FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath);
        output.setDegreeOfParallelism(1);

        output.setInput(testMapper);

        testMapper.addInput(input1);
        testMapper.addInput(input2);

        return new Plan(output);
    }

    @Override
    protected void postSubmit() throws Exception {
        compareResultsByLinesInMemory(config.getString("UnionTest#ExpectedResult", ""), resultPath);

    }

    @Parameters
    public static Collection<Object[]> getConfigurations() throws FileNotFoundException, IOException {
        LinkedList<Configuration> testConfigs = new LinkedList<Configuration>();

        //second input empty
        Configuration config = new Configuration();
        config.setInteger("UnionTest#NoSubtasks", 4);
        config.setString("UnionTest#ExpectedResult", RESULT);
        config.setString("UnionTest#Input1Path", "non-empty");
        config.setString("UnionTest#Input2Path", "empty");
        testConfigs.add(config);

        //first input empty
        config = new Configuration();
        config.setInteger("UnionTest#NoSubtasks", 4);
        config.setString("UnionTest#ExpectedResult", RESULT);
        config.setString("UnionTest#Input1Path", "empty");
        config.setString("UnionTest#Input2Path", "non-empty");
        testConfigs.add(config);

        //both inputs full
        config = new Configuration();
        config.setInteger("UnionTest#NoSubtasks", 4);
        config.setString("UnionTest#ExpectedResult", MAP_RESULT_TWICE);
        config.setString("UnionTest#Input1Path", "non-empty");
        config.setString("UnionTest#Input2Path", "non-empty");
        testConfigs.add(config);

        //both inputs empty
        config = new Configuration();
        config.setInteger("UnionTest#NoSubtasks", 4);
        config.setString("UnionTest#ExpectedResult", EMPTY_RESULT);
        config.setString("UnionTest#Input1Path", "empty");
        config.setString("UnionTest#Input2Path", "empty");
        testConfigs.add(config);

        return toParameterList(testConfigs);
    }
}