com.linkedin.cubert.plan.physical.CubertMapper.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.cubert.plan.physical.CubertMapper.java

Source

/* (c) 2014 LinkedIn Corp. All rights reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package com.linkedin.cubert.plan.physical;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.node.ArrayNode;

import com.linkedin.cubert.block.Block;
import com.linkedin.cubert.block.BlockWriter;
import com.linkedin.cubert.block.CommonContext;
import com.linkedin.cubert.block.ContextBlock;
import com.linkedin.cubert.io.MultiMapperSplit;
import com.linkedin.cubert.io.SerializerUtils;
import com.linkedin.cubert.io.StorageFactory;
import com.linkedin.cubert.operator.PhaseContext;
import com.linkedin.cubert.operator.TeeOperator;
import com.linkedin.cubert.utils.FileCache;
import com.linkedin.cubert.utils.JsonUtils;
import com.linkedin.cubert.utils.MemoryStats;
import com.linkedin.cubert.utils.print;

/**
 * Executes the physical plan of a job at the Mapper.
 * 
 * @author Maneesh Varshney
 * 
 */
public class CubertMapper extends Mapper<Object, Object, Object, Object> {

    @Override
    public void run(Context context) throws IOException, InterruptedException {
        print.f("Mapper init  ----------------------------------");
        Configuration conf = context.getConfiguration();

        FileCache.initialize(conf);
        PhaseContext.create(context, conf);

        ObjectMapper mapper = new ObjectMapper();
        // read the map configuration
        ArrayNode mapCommands = mapper.readValue(conf.get(CubertStrings.JSON_MAP_OPERATOR_LIST), ArrayNode.class);

        int multiMapperIndex = 0;

        if (context.getInputSplit() instanceof MultiMapperSplit) {
            // identify the input, output and operators for this mapper
            MultiMapperSplit mmSplit = (MultiMapperSplit) context.getInputSplit();
            multiMapperIndex = mmSplit.getMultiMapperIndex();
        }
        JsonNode inputJson = mapCommands.get(multiMapperIndex).get("input");
        ArrayNode operatorsJson = (ArrayNode) mapCommands.get(multiMapperIndex).get("operators");

        JsonNode outputJson = null;
        if (conf.get(CubertStrings.JSON_SHUFFLE) != null) {
            outputJson = mapper.readValue(conf.get(CubertStrings.JSON_SHUFFLE), JsonNode.class);
        } else {
            outputJson = mapper.readValue(conf.get(CubertStrings.JSON_OUTPUT), JsonNode.class);
        }

        long blockId = conf.getLong("MY_BLOCK_ID", -1);
        long numRecords = conf.getLong("MY_NUM_RECORDS", -1);

        Tuple partitionKey = null;
        if (conf.get("MY_PARTITION_KEY") != null) {
            try {
                byte[] bytes = (byte[]) SerializerUtils.deserializeFromString(conf.get("MY_PARTITION_KEY"));
                ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
                partitionKey = TupleFactory.getInstance().newTuple();
                partitionKey.readFields(new DataInputStream(bis));
            } catch (ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        }

        // Create input block
        CommonContext commonContext = new MapContext(context);
        Block input = new ContextBlock(commonContext, partitionKey, blockId, numRecords);
        input.configure(inputJson);

        // Create phase executor
        PhaseExecutor exec = new PhaseExecutor(inputJson.get("name").getTextValue(), input,
                outputJson.get("name").getTextValue(), operatorsJson, conf);

        BlockWriter writer = StorageFactory.get(JsonUtils.getText(outputJson, "type")).getBlockWriter();
        writer.configure(outputJson);

        final int MIN_DELAY = 15000;
        int nBlocks = 0;
        long start = System.currentTimeMillis();
        long curr = start;
        Block outputBlock;
        while ((outputBlock = exec.next()) != null) {
            writer.write(outputBlock, commonContext);
            ++nBlocks;
            curr = System.currentTimeMillis();
            if (curr > start + MIN_DELAY) {
                print.f("Executed operator chain for %d block(s) in %d ms", nBlocks, curr - start);
                start = System.currentTimeMillis();
                nBlocks = 0;
            }
        }
        if (nBlocks > 0) {
            print.f("Executed operator chain for %d block(s) in %d ms", nBlocks, curr - start);
        }

        // HACK!! Asking the TeeOperator to close the files that were opened
        TeeOperator.closeFiles();

        print.f("Mapper complete ----------------------------------");
        MemoryStats.printGCStats();
    }

    public static final class MapContext implements CommonContext {
        private final Context context;

        public MapContext(Context context) {
            this.context = context;
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return context.nextKeyValue();
        }

        @Override
        public Object getCurrentKey() throws IOException, InterruptedException {
            return context.getCurrentKey();
        }

        @Override
        public Object getCurrentValue() throws IOException, InterruptedException {
            return context.getCurrentValue();
        }

        @Override
        public void write(Object key, Object value) throws IOException, InterruptedException {
            context.write(key, value);
        }

    }
}