com.aliyun.odps.mapred.bridge.streaming.PipeMapper.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.mapred.bridge.streaming.PipeMapper.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.aliyun.odps.mapred.bridge.streaming;

import static com.aliyun.odps.mapred.utils.UTF8ByteArrayUtils.unescapeSeparator;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.Map;

import org.apache.commons.lang.StringUtils;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.mapred.Mapper;
import com.aliyun.odps.mapred.bridge.streaming.io.InputWriter;
import com.aliyun.odps.mapred.bridge.streaming.io.OutputReader;
import com.aliyun.odps.mapred.conf.JobConf;

/**
 * A generic Mapper bridge.
 * It delegates operations to an external program via stdin and stdout.
 */
public class PipeMapper extends PipeMapRed implements Mapper {

    // odps's table input always ignore key
    private boolean ignoreKey = true;
    private boolean skipping = false;

    private byte[] mapOutputFieldSeparator;
    private byte[] mapInputFieldSeparator;
    private int numOfMapOutputKeyFields = 1;

    @Override
    String getPipeCommand(JobConf job) {
        String str = job.get("stream.map.streamprocessor");
        if (str == null) {
            return str;
        }
        try {
            return URLDecoder.decode(str, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            System.err.println("stream.map.streamprocessor in jobconf not found");
            return null;
        }
    }

    boolean getDoPipe() {
        return true;
    }

    @Override
    public void setup(TaskContext context) throws IOException {
        com.aliyun.odps.data.TableInfo ti = context.getInputTableInfo();
        // XXX not available for exstore case
        String input = "/" + ti.getProjectName() + "/" + ti.getTableName() + "/" + ti.getPartPath();
        context.getJobConf().set("map.input.file", input);
        context.getJobConf().set("map.input.columns", StringUtils.join(ti.getCols(), ","));

        try {
            Map<String, Object> inputConfigs = JSON.parseObject(
                    context.getJobConf().get("stream.map.input.configs", null),
                    new TypeReference<Map<String, Object>>() {
                    }.getType());
            if (inputConfigs == null) {
                throw new RuntimeException("input configs is null");
            }

            String inputKey = ti.getProjectName() + "." + ti.getTableName();
            Map<String, String> inputConfig = (Map<String, String>) inputConfigs.get(inputKey);
            for (Map.Entry<String, String> e : inputConfig.entrySet()) {
                context.getJobConf().set(e.getKey(), e.getValue());
            }
        } catch (Exception e) {
            throw new RuntimeException("fail to apply input configs");
        }

        configure(context.getJobConf());
    }

    @Override
    public void cleanup(TaskContext context) throws IOException {
        close();
    }

    public void configure(JobConf job) {
        super.configure(job);
        //disable the auto increment of the counter. For streaming, no of 
        //processed records could be different(equal or less) than the no of 
        //records input.
        //SkipBadRecords.setAutoIncrMapperProcCount(job, false);
        //skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
        //if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
        //  String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
        //  ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
        //    inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
        //}

        try {
            mapOutputFieldSeparator = unescapeSeparator(job.get("stream.map.output.field.separator", "\t"))
                    .getBytes("UTF-8");
            mapInputFieldSeparator = unescapeSeparator(job.get("stream.map.input.field.separator", "\t"))
                    .getBytes("UTF-8");
            numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
        }
    }

    // Do NOT declare default constructor
    // (MapRed creates it reflectively)

    @Override
    public void map(long keyRaw, Record valueRaw, TaskContext context) throws IOException {
        if (outThread_ == null) {
            startOutputThreads(context);
        }
        if (outerrThreadsThrowable != null) {
            mapRedFinished();
            throw new IOException("MROutput/MRErrThread failed:", outerrThreadsThrowable);
        }
        try {
            // 1/4 Hadoop in
            numRecRead_++;
            maybeLogRecord();

            // 2/4 Hadoop to Tool
            if (numExceptions_ == 0) {
                //if (!this.ignoreKey) {
                //  inWriter_.writeKey(key);
                //}
                inWriter_.writeValue(valueRaw);
                if (skipping) {
                    //flush the streams on every record input if running in skip mode
                    //so that we don't buffer other records surrounding a bad record. 
                    clientOut_.flush();
                }
            } else {
                numRecSkipped_++;
            }
        } catch (IOException io) {
            numExceptions_++;
            if (numExceptions_ > 1 || numRecWritten_ < minRecWrittenToEnableSkip_) {
                // terminate with failure
                LOG.info(getContext(), io);
                mapRedFinished();
                throw io;
            } else {
                // terminate with success:
                // swallow input records although the stream processor failed/closed
            }
        }
    }

    public void close() {
        mapRedFinished();
    }

    @Override
    public byte[] getInputSeparator() {
        return mapInputFieldSeparator;
    }

    @Override
    public byte[] getFieldSeparator() {
        return mapOutputFieldSeparator;
    }

    @Override
    public int getNumOfKeyFields() {
        return numOfMapOutputKeyFields;
    }

    @Override
    InputWriter createInputWriter() throws IOException {
        return super.createInputWriter(mapInputWriterClass_);
    }

    @Override
    OutputReader createOutputReader() throws IOException {
        return super.createOutputReader(mapOutputReaderClass_);
    }

}