Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.df.mapred.partial; import java.util.Random; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.apache.mahout.df.builder.TreeBuilder; import org.apache.mahout.df.data.Data; import org.apache.mahout.df.data.DataLoader; import org.apache.mahout.df.data.Dataset; import org.apache.mahout.df.data.Utils; import org.apache.mahout.df.mapreduce.partial.TreeID; import org.apache.mahout.df.node.Leaf; import org.apache.mahout.df.node.Node; public class Step1MapperTest extends MahoutTestCase { /** * Make sure that the data used to build the trees is from the mapper's * partition * */ private static class MockTreeBuilder implements TreeBuilder { private Data expected; public void setExpected(Data data) { expected = data; } @Override public Node build(Random rng, Data data) { for (int index = 0; index < data.size(); index++) { assertTrue(expected.contains(data.get(index))); } return new Leaf(-1); } } /** * Special Step1Mapper that can be configured without using a Configuration * */ static class MockStep1Mapper extends Step1Mapper { MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed, int partition, int numMapTasks, int numTrees) { configure(false, true, treeBuilder, dataset); configure(seed, partition, numMapTasks, numTrees); } } /** nb attributes per generated data instance */ private static final int nbAttributes = 4; /** nb generated data instances */ private static final int nbInstances = 100; /** nb trees to build */ private static final int nbTrees = 10; /** nb mappers to use */ private static final int nbMappers = 2; public void testMapper() throws Exception { Long seed = null; Random rng = RandomUtils.getRandom(); // prepare the data String descriptor = Utils.randomDescriptor(rng, nbAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances); String[] sData = Utils.double2String(source); Dataset dataset = DataLoader.generateDataset(descriptor, sData); String[][] splits = Utils.splitData(sData, nbMappers); MockTreeBuilder treeBuilder = new MockTreeBuilder(); LongWritable key = new LongWritable(); Text value = new Text(); int treeIndex = 0; for (int partition = 0; partition < nbMappers; partition++) { String[] split = splits[partition]; treeBuilder.setExpected(DataLoader.loadData(dataset, split)); // expected number of trees that this mapper will build int mapNbTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition); PartialOutputCollector output = new PartialOutputCollector(mapNbTrees); MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed, partition, nbMappers, nbTrees); // make sure the mapper computed firstTreeId correctly assertEquals(treeIndex, mapper.getFirstTreeId()); for (int index = 0; index < split.length; index++) { key.set(index); value.set(split[index]); mapper.map(key, value, output, Reporter.NULL); } mapper.close(); // make sure the mapper built all its trees assertEquals(mapNbTrees, output.nbOutputs()); // check the returned keys for (TreeID k : output.getKeys()) { assertEquals(partition, k.partition()); assertEquals(treeIndex, k.treeId()); treeIndex++; } } } }