com.aliyun.odps.mapred.conf.JobConf.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.mapred.conf.JobConf.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred.conf;

import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;

import com.aliyun.odps.utils.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.aliyun.odps.Column;
import com.aliyun.odps.Survey;
import com.aliyun.odps.conf.Configuration;
import com.aliyun.odps.data.RecordComparator;
import com.aliyun.odps.io.WritableComparable;
import com.aliyun.odps.io.WritableComparator;
import com.aliyun.odps.mapred.JobClient;
import com.aliyun.odps.mapred.Mapper;
import com.aliyun.odps.mapred.MapperBase;
import com.aliyun.odps.mapred.Partitioner;
import com.aliyun.odps.mapred.Reducer;
import com.aliyun.odps.mapred.ReducerBase;
import com.aliyun.odps.mapred.utils.SchemaUtils;
import com.aliyun.odps.utils.ReflectionUtils;

/**
 * JobConf ??ODPS MapReduce ?.
 *
 * <p>
 * JobConf ? ODPS ?? M/R ??MapReduce  JobConf ????
 * </p>
 *
 * <p>
 * JobConf ? {@link com.aliyun.odps.mapred.Mapper}?Combiner?
 * {@link com.aliyun.odps.mapred.Reducer}
 * </p>
 *
 * <p>
 * JobConf ? {@link com.aliyun.odps.mapred.Mapper} 
 * Key/Value???
 * <ul>
 * <li>{@link #setMapOutputKeySchema(Column[])}</li>
 * <li>{@link #setMapOutputValueSchema(Column[])}</li>
 * <li>{@link #setOutputKeySortColumns(String[])}</li>
 * <li>{@link #setOutputGroupingColumns(String[])}</li>
 * </ul>
 * </p>
 *
 * <p>
 * JobConf ?{@link #addResource(String)} ODPS ?
 * </p>
 *
 * <p>
 * JobConf ? MapReduce 
 * <ul>
 * <li>{@link #setSplitSize(long)}?Mapper?? {@link Mapper}</li>
 * <li>{@link #setNumReduceTasks(int)}Reducer</li>
 * </ul>
 * </p>
 *
 * <p>
 * JobConf job = new JobConf();<br/>
 * JobConf {@link SessionState} ?
 *
 * <p>
 * ?WordCount
 *
 * <pre>
 * JobConf job = new JobConf();
 * job.setMapperClass(TokenizerMapper.class);
 * job.setCombinerClass(SumCombiner.class);
 * job.setReducerClass(SumReducer.class);
 *
 * job.setMapOutputKeySchema(SchemaUtils.fromString(&quot;word:string&quot;));
 * job.setMapOutputValueSchema(SchemaUtils.fromString(&quot;count:bigint&quot;));
 *
 * InputUtils.addTable(TableInfo.builder().tableName(args[0]).build(), job);
 * OutputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), job);
 *
 * RunningJob rj = JobClient.runJob(job);
 *
 * </pre>
 *
 * </blockquote>
 * </p>
 *
 * @see JobClient
 */
public class JobConf extends Configuration {

    private static final Log LOG = LogFactory.getLog(JobConf.class);

    public enum SortOrder {
        ASC, DESC
    }

    /**
     *  MapReduce ?.
     */
    public JobConf() {
        super(SessionState.get().getDefaultJob());
    }

    /**
     *  MapReduce ?.
     *
     * @param conf
     *     ??
     */
    public JobConf(Configuration conf) {
        super(SessionState.get().getDefaultJob());
        mergeConfiguration(conf);
    }

    /**
     *  MapReduce ?.
     *
     * Configuration-format XML ??
     *
     * <pre>
     * &lt;configuration&gt;
     *  &lt;property&gt;
     *   &lt;name&gt;com.mycomp.xxx&lt;/name&gt;
     *   &lt;value&gt;xxx&lt;/value&gt;
     *  &lt;/property&gt;
     *  ... ...
     * &lt;/configuration&gt;
     * </pre>
     *
     * @param config
     *     Configuration-format XML ?
     */
    @Survey
    public JobConf(String config) {
        this();
        addResource(config);
    }

    /**
     *  MapReduce ????
     *
     * @param loadSessionContext
     *     true?
     */
    public JobConf(boolean loadSessionContext) {
        super();
        if (loadSessionContext) {
            mergeConfiguration(SessionState.get().getDefaultJob());
        }
    }

    /**
     * ??, ?
     *
     * @param conf
     *     ???
     */
    private void mergeConfiguration(Configuration conf) {
        Iterator<Map.Entry<String, String>> iter = conf.iterator();
        while (iter.hasNext()) {
            Map.Entry<String, String> entry = iter.next();
            this.set(entry.getKey(), entry.getValue());
        }
    }

    /**
     * ?.
     *
     * <p>
     * ???? Mapper/Reducer ?
     *
     * @param resourceNames
     *     ?????
     * @see com.aliyun.odps.mapred.TaskContext
     */
    public void setResources(String resourceNames) {
        set(CONF.RESOURCES, resourceNames);
    }

    /**
     * ??
     *
     * @return ?
     */
    public String[] getResources() {
        return getStrings(CONF.RESOURCES);
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Key 
     *
     * @return {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Key 
     */
    public Column[] getMapOutputKeySchema() {
        String rs = get(CONF.MAPOUTPUT_KEY_SCHEMA);
        return SchemaUtils.fromString(rs);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Key 
     *
     * @param schema
     *     {@link com.aliyun.odps.mapred.Mapper} 
     *     {@link com.aliyun.odps.mapred.Reducer}  Key 
     */
    public void setMapOutputKeySchema(Column[] schema) {
        set(CONF.MAPOUTPUT_KEY_SCHEMA, SchemaUtils.toString(schema));
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Value 
     *
     * @return {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Value 
     */
    public Column[] getMapOutputValueSchema() {
        String rs = get(CONF.MAPOUTPUT_VALUE_SCHEMA);
        return SchemaUtils.fromString(rs);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Value 
     *
     * @param schema
     *     {@link com.aliyun.odps.mapred.Mapper} 
     *     {@link com.aliyun.odps.mapred.Reducer}  Value 
     */
    public void setMapOutputValueSchema(Column[] schema) {
        set(CONF.MAPOUTPUT_VALUE_SCHEMA, SchemaUtils.toString(schema));
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Key ?. Key.
     * ?????. 1, Key??, , ?, .
     * ??.
     *
     * @return Key?
     * @see #setMapOutputKeySchema(Column[])
     */
    public String[] getOutputKeySortColumns() {
        String joined = get(CONF.OUTPUT_KEY_SORT_COLUMNS, null);
        if (joined == null || joined.isEmpty()) {
            return SchemaUtils.getNames(getMapOutputKeySchema());
        }
        return joined.split(",");
    }

    /**
     *  {@link com.aliyun.odps.mapred.Mapper} 
     * {@link com.aliyun.odps.mapred.Reducer}  Key ?
     *
     * @param cols
     *     Mapper?
     */
    public void setOutputKeySortColumns(String[] cols) {
        set(CONF.OUTPUT_KEY_SORT_COLUMNS, StringUtils.join(cols, ","));
    }

    /**
     * ? Key ??. Key??.
     *
     * @return Key?.
     * @see #setMapOutputKeySchema(Column[])
     */
    public SortOrder[] getOutputKeySortOrder() {
        SortOrder[] order;
        String joined = get(CONF.OUTPUT_KEY_SORT_ORDER, null);
        if (joined == null || joined.isEmpty()) {
            order = new SortOrder[getOutputKeySortColumns().length];
            Arrays.fill(order, SortOrder.ASC);
        } else {
            String[] orders = joined.split(",");
            order = new SortOrder[orders.length];
            for (int i = 0; i < order.length; i++) {
                order[i] = SortOrder.valueOf(orders[i]);
            }
        }
        return order;
    }

    /**
     *  Key ??
     *
     * @param order
     *     ??
     */
    public void setOutputKeySortOrder(SortOrder[] order) {
        set(CONF.OUTPUT_KEY_SORT_ORDER, StringUtils.join(order, ","));
    }

    /**
     * ? Key Key.
     * <p>
     *  Key  MapReduce ?? {@link com.aliyun.odps.mapred.Reducer}
     * </p>
     *
     * @return Key 
     * @see #setOutputGroupingColumns(String[])
     */
    public String[] getOutputGroupingColumns() {
        String joined = get(CONF.OUTPUT_GROUP_COLUMNS, null);
        if (joined != null) {
            return joined.split(",");
        }
        return SchemaUtils.getNames(getMapOutputKeySchema());
    }

    /**
     *  Key .
     *
     * <p>
     *  Key  MapReduce ?? {@link com.aliyun.odps.mapred.Reducer}
     * </p>
     *
     * @param cols
     *     Key 
     * @see #setMapOutputKeySchema(Column[])
     */
    public void setOutputGroupingColumns(String[] cols) {
        set(CONF.OUTPUT_GROUP_COLUMNS, StringUtils.join(cols, ","));
    }

    /**
     * ? {@link Mapper}  Key ?
     * {@link WritableComparator#get(Class)} .
     * 
     * @see RecordComparator
     * @see Mapper
     * @return {@link Mapper}  Key ?.
     * 
     */
    @SuppressWarnings("rawtypes")
    public Class<? extends RecordComparator> getOutputKeyComparatorClass() {
        return getClass(CONF.OUTPUT_KEY_COMPARATOR_CLASS, null, RecordComparator.class);
    }

    /**
     *  {@link Mapper}  Key ?.
     * 
     * <p>
     *  Key ? MapReduce ?? {@link Mapper}  {@link Reducer} 
     * Shuffle
     * </p>
     * 
     * @param theClass
     *           {@link Mapper}  Key ?{@link RecordComparator} ?
     * @see #setOutputKeyGroupingComparatorClass(Class)
     * 
     */
    @SuppressWarnings("rawtypes")
    public void setOutputKeyComparatorClass(Class<? extends RecordComparator> theClass) {
        setClass(CONF.OUTPUT_KEY_COMPARATOR_CLASS, theClass, RecordComparator.class);
    }

    /**
     * ? Key  {@link #getOutputKeyComparatorClass()}.
     * <p>
     *  Key  MapReduce ?? {@link Reducer}
     * </p>
     * 
     * @see #setOutputKeyGroupingComparatorClass(Class)
     * @return Key 
     * 
     */
    @SuppressWarnings("rawtypes")
    public Class<? extends RecordComparator> getOutputKeyGroupingComparatorClass() {
        return getClass(CONF.OUTPUT_KEY_GROUPING_COMPARATOR_CLASS, null, RecordComparator.class);
    }

    /**
     *  Key ? {@link #getOutputKeyComparatorClass()} .
     * 
     * <p>
     *  Key  MapReduce ?? {@link Reducer}
     * </p>
     * 
     * @param theClass
     *          Key  {@link RecordComparator}?
     * @see #setOutputKeyComparatorClass(Class)
     * 
     */
    @SuppressWarnings("rawtypes")
    public void setOutputKeyGroupingComparatorClass(Class<? extends RecordComparator> theClass) {
        setClass(CONF.OUTPUT_KEY_GROUPING_COMPARATOR_CLASS, theClass, RecordComparator.class);
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Mapper}?
     * {@link com.aliyun.odps.mapred.MapperBase}.
     *
     * @return  {@link com.aliyun.odps.mapred.Mapper} .
     */
    public Class<? extends Mapper> getMapperClass() {
        return getClass(CONF.MAP_CLASS, MapperBase.class, Mapper.class);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Mapper}.
     *
     * @param theClass
     *     {@link com.aliyun.odps.mapred.Mapper} ?
     */
    public void setMapperClass(Class<? extends Mapper> theClass) {
        setClass(CONF.MAP_CLASS, theClass, Mapper.class);
    }

    /**
     * ?. {@link com.aliyun.odps.mapred.Mapper}Key.
     *
     * @return 
     */
    public String[] getPartitionColumns() {
        String joined = get(CONF.PARTITION_COLUMNS, null);
        if (joined != null) {
            return joined.split(",");
        }
        return SchemaUtils.getNames(getMapOutputKeySchema());
    }

    /**
     * . ??{@link JobConf#setPartitionerClass(Class)}{@link
     * JobConf#setPartitionColumns(String[])}
     * 
     *
     * @param cols
     *     ??
     * @see com.aliyun.odps.mapred.Mapper
     */
    public void setPartitionColumns(String[] cols) {
        set(CONF.PARTITION_COLUMNS, StringUtils.join(cols, ","));
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Reducer}
     * {@link com.aliyun.odps.mapred.ReducerBase}
     *
     * @return  {@link com.aliyun.odps.mapred.Reducer}
     */
    public Class<? extends Reducer> getReducerClass() {
        return getClass(CONF.REDUCE_CLASS, ReducerBase.class, Reducer.class);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Reducer}.
     *
     * @param theClass
     *      {@link com.aliyun.odps.mapred.Reducer}
     */
    public void setReducerClass(Class<? extends Reducer> theClass) {
        setClass(CONF.REDUCE_CLASS, theClass, Reducer.class);
    }

    /**
     * ? combiner
     *
     * @return  combiner
     */
    public Class<? extends Reducer> getCombinerClass() {
        return getClass(CONF.COMBINE_CLASS, null, Reducer.class);
    }

    /**
     *  combiner.
     *
     * @param theClass
     *      combiner
     */
    public void setCombinerClass(Class<? extends Reducer> theClass) {
        setClass(CONF.COMBINE_CLASS, theClass, Reducer.class);
    }

    /**
     * ? partitioner.
     *
     * @return  combiner
     */
    public Class<? extends Partitioner> getPartitionerClass() {
        return getClass(CONF.PARTITION_CLASS, null, Partitioner.class);
    }

    /**
     *  partitioner. ??{@link JobConf#setPartitionerClass(Class)}{@link
     * JobConf#setPartitionColumns(String[])}
     *
     * @param theClass
     *      {@link com.aliyun.odps.mapred.Partitioner}
     */
    public void setPartitionerClass(Class<? extends Partitioner> theClass) {
        setClass(CONF.PARTITION_CLASS, theClass, Partitioner.class);
    }

    /**
     * ???? MB 640.
     *
     * <p>
     * ?? {@link com.aliyun.odps.mapred.Mapper} ??
     * {@link com.aliyun.odps.mapred.Mapper} 
     * </p>
     *
     * @return ??? MB
     */
    public long getSplitSize() {
        return getLong(CONF.MAP_SPLIT_SIZE, 640);
    }

    /**
     * ??? MB 640.
     *
     * <p>
     * ?? {@link com.aliyun.odps.mapred.Mapper} ??
     * {@link com.aliyun.odps.mapred.Mapper} 
     * </p>
     *
     * @param size
     *     ??? MB
     */
    public void setSplitSize(long size) {
        setLong(CONF.MAP_SPLIT_SIZE, size);
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Mapper} . input.
     *
     * @return {@link com.aliyun.odps.mapred.Mapper} 
     */
    public int getNumMapTasks() {
        return getInt(CONF.MAP_TASKS, 1);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Mapper} . input.
     *
     * @param n
     *     {@link com.aliyun.odps.mapred.Mapper} 
     */
    public void setNumMapTasks(int n) {
        setInt(CONF.MAP_TASKS, n);
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Reducer} .
     *
     * @return {@link com.aliyun.odps.mapred.Reducer} 
     */
    public int getNumReduceTasks() {
        return getInt(CONF.REDUCE_TASKS, 1);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Reducer} 
     * {@link com.aliyun.odps.mapred.Mapper}  1/4.
     *
     * <p>
     * ? {@link com.aliyun.odps.mapred.Reducer}?0setNumReduceTasks(0);
     * </p>
     *
     * @param n
     *     {@link com.aliyun.odps.mapred.Reducer} 
     */
    public void setNumReduceTasks(int n) {
        setInt(CONF.REDUCE_TASKS, n);
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Mapper} ???MB 2048.
     *
     * @return {@link com.aliyun.odps.mapred.Mapper} ?
     */
    public int getMemoryForMapTask() {
        return getInt(CONF.MAP_MEMORY, 2048);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Mapper} ???MB 2048.
     *
     * @param mem
     *     {@link com.aliyun.odps.mapred.Mapper} ?
     */
    public void setMemoryForMapTask(int mem) {
        setInt(CONF.MAP_MEMORY, mem);
    }

    /**
     * ? {@link com.aliyun.odps.mapred.Reducer} ???MB 2048.
     *
     * @return {@link com.aliyun.odps.mapred.Reducer} ?
     */
    public int getMemoryForReduceTask() {
        return getInt(CONF.REDUCE_MEMORY, 2048);
    }

    /**
     *  {@link com.aliyun.odps.mapred.Reducer} ???MB 2048.
     *
     * @param mem
     *     {@link com.aliyun.odps.mapred.Reducer} ?
     */
    public void setMemoryForReduceTask(int mem) {
        setInt(CONF.REDUCE_MEMORY, mem);
    }

    /**
     * ? Map  JVM ???MB 1024.
     *
     * @return JVM?
     */
    public int getMemoryForMapperJVM() {
        return getInt(CONF.MAP_JVM_MEMORY, 1024);
    }

    /**
     *  Map  JVM ???MB 1024.
     *
     * @param mem
     *     JVM?
     */
    public void setMemoryForMapperJVM(int mem) {
        setInt(CONF.MAP_JVM_MEMORY, mem);
    }

    /**
     * ? Reduce  JVM ???MB 1024.
     *
     * @return JVM?
     */
    public int getMemoryForReducerJVM() {
        return getInt(CONF.REDUCE_JVM_MEMORY, 1024);
    }

    /**
     *  Reduce  JVM ???MB 1024.
     *
     * @param mem
     *     JVM?
     */
    public void setMemoryForReducerJVM(int mem) {
        setInt(CONF.REDUCE_JVM_MEMORY, mem);
    }

    /**
     * ? JVM???MB 1024.
     *
     * @return JVM?
     */
    public int getMemoryForJVM() {
        return getInt(CONF.JVM_MEMORY, 1024);
    }

    /**
     *  JVM???MB 1024.
     *
     * @param mem
     *     JVM?
     */
    public void setMemoryForJVM(int mem) {
        setInt(CONF.JVM_MEMORY, mem);
    }

    /**
     * ?Combiner??1024?
     *
     * @return Combiner?
     */
    public int getCombinerCacheItems() {
        return getInt(CONF.COMBINER_CACHE_ITEMS, 1024);
    }

    /**
     * Combiner??
     */
    public void setCombinerCacheItems(int size) {
        setInt(CONF.COMBINER_CACHE_ITEMS, size);
    }

    /**
     * ?Combinerspill0.5, combine?
     * spill?, CombinerOptimizeEnabletrue?
     *
     * @return Combinerspill
     */
    public float getCombinerCacheSpillPercent() {
        return getFloat(CONF.COMBINER_CACHE_SPILL_PERCENT, (float) 0.5);
    }

    /**
     * Combinerspill
     */
    public void setCombinerCacheSpillPercent(float percent) {
        setFloat(CONF.COMBINER_CACHE_SPILL_PERCENT, percent);
    }

    /**
     * ??Combinerfalse
     *
     * @return true or false
     */
    public boolean getCombinerOptimizeEnable() {
        return getBoolean(CONF.COMBINER_OPTIMIZE_ENABLE, false);
    }

    /**
     * ?Combiner
     */
    public void setCombinerOptimizeEnable(boolean isCombineOpt) {
        setBoolean(CONF.COMBINER_OPTIMIZE_ENABLE, isCombineOpt);
    }

    /**
     * ??600
     *
     * @return 
     */
    public int getFunctionTimeout() {
        return getInt(CONF.FUNCTION_TIMEOUT, 600);
    }

    /**
     * ?
     *
     * @param timeout
     *     ?[1, 3600]
     */
    public void setFunctionTimeout(int timeout) {
        setInt(CONF.FUNCTION_TIMEOUT, timeout);
    }

    /**
     * ?Instance
     *
     * @return 
     */
    public int getInstancePriority() {
        return getInt(CONF.INSTANCE_PRIORITY, 9);
    }

    /**
     * ??
     *
     * @return ?
     *
     */
    public boolean getOutputOverwrite() {
        return getBoolean(CONF.OUTPUT_OVERWRITE, true);
    }

    /**
     * ?
     *
     * @param isOverwrite
     *     ?
     */
    public void setOutputOverwrite(boolean isOverwrite) {
        setBoolean(CONF.OUTPUT_OVERWRITE, isOverwrite);
    }

    /**
     * ??
     *
     * @return ?
     *
     */
    public boolean getInnerOutputEnable() {
        return getBoolean(CONF.INNER_OUTPUT_ENABLE, false);
    }

    /**
     * ?
     *
     * @param isInnerOutput
     *     ?
     */
    public void setInnerOutputEnable(boolean isInnerOutput) {
        setBoolean(CONF.INNER_OUTPUT_ENABLE, isInnerOutput);
    }

    /**
     * Instance??[0, 9]
     *
     * @param priority
     *     
     */
    public void setInstancePriority(int priority) {
        setInt(CONF.INSTANCE_PRIORITY, priority);
    }

    /**
     * ?.
     *
     * @return 
     */
    @Deprecated
    public Column[] getOutputSchema() {
        try {
            onDeprecated(JobConf.class.getMethod("getOutputSchema"));
        } catch (NoSuchMethodException e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * ?label
     *
     * @param label
     *     
     * @return 
     */
    @Deprecated
    public Column[] getOutputSchema(String label) {
        try {
            onDeprecated(JobConf.class.getMethod("getOutputSchema", String.class));
        } catch (NoSuchMethodException e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * label
     *
     * @param schema
     *     
     * @param label
     *     
     */
    @Deprecated
    public void setOutputSchema(Column[] schema, String label) {
        try {
            onDeprecated(JobConf.class.getMethod("setOutputSchema", Column[].class, String.class));
        } catch (NoSuchMethodException e) {
            e.printStackTrace();
        }
    }

    private void onDeprecated(Method method) {
        set("odps.deprecated." + method.getDeclaringClass().getCanonicalName() + "." + method.getName(), "true");
        LOG.warn("Calling deprecated method:" + method);
    }
}