Java tutorial
/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.test.operators; import eu.stratosphere.api.common.Plan; import eu.stratosphere.api.java.record.operators.FileDataSink; import eu.stratosphere.api.java.record.operators.FileDataSource; import eu.stratosphere.api.java.record.functions.ReduceFunction; import eu.stratosphere.api.java.record.io.DelimitedInputFormat; import eu.stratosphere.api.java.record.operators.ReduceOperator; import eu.stratosphere.compiler.DataStatistics; import eu.stratosphere.compiler.PactCompiler; import eu.stratosphere.compiler.plan.OptimizedPlan; import eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.nephele.jobgraph.JobGraph; import eu.stratosphere.test.operators.io.ContractITCaseIOFormats.ContractITCaseInputFormat; import eu.stratosphere.test.operators.io.ContractITCaseIOFormats.ContractITCaseOutputFormat; import eu.stratosphere.test.util.RecordAPITestBase; import eu.stratosphere.types.IntValue; import eu.stratosphere.types.Record; import eu.stratosphere.types.StringValue; import eu.stratosphere.util.Collector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; @RunWith(Parameterized.class) public class ReduceITCase extends RecordAPITestBase { private static final Log LOG = LogFactory.getLog(ReduceITCase.class); String inPath = null; String resultPath = null; public ReduceITCase(Configuration testConfig) { super(testConfig); } private static final String IN = "1 1\n2 2\n2 8\n4 4\n4 4\n6 6\n7 7\n8 8\n1 1\n" + "2 2\n2 2\n4 4\n4 4\n6 3\n5 9\n8 8\n1 1\n2 2\n2 2\n3 0\n4 4\n5 9\n7 7\n8 8\n" + "1 1\n9 1\n5 9\n4 4\n4 4\n6 6\n7 7\n8 8\n"; private static final String RESULT = "1 4\n2 18\n3 0\n4 28\n5 27\n6 15\n7 21\n8 32\n9 1\n"; @Override protected void preSubmit() throws Exception { inPath = createTempFile("in.txt", IN); resultPath = getTempDirPath("result"); } @ReduceOperator.Combinable public static class TestReducer extends ReduceFunction implements Serializable { private static final long serialVersionUID = 1L; private StringValue reduceValue = new StringValue(); private StringValue combineValue = new StringValue(); @Override public void combine(Iterator<Record> records, Collector<Record> out) throws Exception { int sum = 0; Record record = new Record(); while (records.hasNext()) { record = records.next(); combineValue = record.getField(1, combineValue); sum += Integer.parseInt(combineValue.toString()); LOG.debug("Processed: [" + record.getField(0, StringValue.class).toString() + "," + combineValue.toString() + "]"); } combineValue.setValue(sum + ""); record.setField(1, combineValue); out.collect(record); } @Override public void reduce(Iterator<Record> records, Collector<Record> out) throws Exception { int sum = 0; Record record = new Record(); while (records.hasNext()) { record = records.next(); reduceValue = record.getField(1, reduceValue); sum += Integer.parseInt(reduceValue.toString()); LOG.debug("Processed: [" + record.getField(0, StringValue.class).toString() + "," + reduceValue.toString() + "]"); } record.setField(1, new IntValue(sum)); out.collect(record); } } @Override protected JobGraph getJobGraph() throws Exception { FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inPath); DelimitedInputFormat.configureDelimitedFormat(input).recordDelimiter('\n'); input.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1)); ReduceOperator testReducer = ReduceOperator.builder(new TestReducer(), StringValue.class, 0).build(); testReducer.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1)); testReducer.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY, config.getString("ReduceTest#LocalStrategy", "")); testReducer.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY, config.getString("ReduceTest#ShipStrategy", "")); FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testReducer); testReducer.setInput(input); Plan plan = new Plan(output); PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); } @Override protected void postSubmit() throws Exception { compareResultsByLinesInMemory(RESULT, resultPath); } @Parameters public static Collection<Object[]> getConfigurations() throws FileNotFoundException, IOException { LinkedList<Configuration> tConfigs = new LinkedList<Configuration>(); String[] localStrategies = { PactCompiler.HINT_LOCAL_STRATEGY_SORT }; String[] shipStrategies = { PactCompiler.HINT_SHIP_STRATEGY_REPARTITION_HASH }; for (String localStrategy : localStrategies) { for (String shipStrategy : shipStrategies) { Configuration config = new Configuration(); config.setString("ReduceTest#LocalStrategy", localStrategy); config.setString("ReduceTest#ShipStrategy", shipStrategy); config.setInteger("ReduceTest#NoSubtasks", 4); tConfigs.add(config); } } return toParameterList(tConfigs); } }