org.apache.mahout.df.mapred.partial.Step1MapperTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mahout.df.mapred.partial.Step1MapperTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.df.mapred.partial;

import java.util.Random;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.DataLoader;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.Utils;
import org.apache.mahout.df.mapreduce.partial.TreeID;
import org.apache.mahout.df.node.Leaf;
import org.apache.mahout.df.node.Node;

public class Step1MapperTest extends MahoutTestCase {

    /**
     * Make sure that the data used to build the trees is from the mapper's
     * partition
     * 
     */
    private static class MockTreeBuilder implements TreeBuilder {

        private Data expected;

        public void setExpected(Data data) {
            expected = data;
        }

        @Override
        public Node build(Random rng, Data data) {
            for (int index = 0; index < data.size(); index++) {
                assertTrue(expected.contains(data.get(index)));
            }

            return new Leaf(-1);
        }
    }

    /**
     * Special Step1Mapper that can be configured without using a Configuration
     * 
     */
    static class MockStep1Mapper extends Step1Mapper {
        MockStep1Mapper(TreeBuilder treeBuilder, Dataset dataset, Long seed, int partition, int numMapTasks,
                int numTrees) {
            configure(false, true, treeBuilder, dataset);
            configure(seed, partition, numMapTasks, numTrees);
        }

    }

    /** nb attributes per generated data instance */
    private static final int nbAttributes = 4;

    /** nb generated data instances */
    private static final int nbInstances = 100;

    /** nb trees to build */
    private static final int nbTrees = 10;

    /** nb mappers to use */
    private static final int nbMappers = 2;

    public void testMapper() throws Exception {
        Long seed = null;
        Random rng = RandomUtils.getRandom();

        // prepare the data
        String descriptor = Utils.randomDescriptor(rng, nbAttributes);
        double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances);
        String[] sData = Utils.double2String(source);
        Dataset dataset = DataLoader.generateDataset(descriptor, sData);
        String[][] splits = Utils.splitData(sData, nbMappers);

        MockTreeBuilder treeBuilder = new MockTreeBuilder();

        LongWritable key = new LongWritable();
        Text value = new Text();

        int treeIndex = 0;

        for (int partition = 0; partition < nbMappers; partition++) {
            String[] split = splits[partition];
            treeBuilder.setExpected(DataLoader.loadData(dataset, split));

            // expected number of trees that this mapper will build
            int mapNbTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition);

            PartialOutputCollector output = new PartialOutputCollector(mapNbTrees);

            MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed, partition, nbMappers, nbTrees);

            // make sure the mapper computed firstTreeId correctly
            assertEquals(treeIndex, mapper.getFirstTreeId());

            for (int index = 0; index < split.length; index++) {
                key.set(index);
                value.set(split[index]);
                mapper.map(key, value, output, Reporter.NULL);
            }

            mapper.close();

            // make sure the mapper built all its trees
            assertEquals(mapNbTrees, output.nbOutputs());

            // check the returned keys
            for (TreeID k : output.getKeys()) {
                assertEquals(partition, k.partition());
                assertEquals(treeIndex, k.treeId());

                treeIndex++;
            }
        }
    }
}