Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred.conf; import java.lang.reflect.Method; import java.util.Arrays; import java.util.Iterator; import java.util.Map; import com.aliyun.odps.utils.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.aliyun.odps.Column; import com.aliyun.odps.Survey; import com.aliyun.odps.conf.Configuration; import com.aliyun.odps.data.RecordComparator; import com.aliyun.odps.io.WritableComparable; import com.aliyun.odps.io.WritableComparator; import com.aliyun.odps.mapred.JobClient; import com.aliyun.odps.mapred.Mapper; import com.aliyun.odps.mapred.MapperBase; import com.aliyun.odps.mapred.Partitioner; import com.aliyun.odps.mapred.Reducer; import com.aliyun.odps.mapred.ReducerBase; import com.aliyun.odps.mapred.utils.SchemaUtils; import com.aliyun.odps.utils.ReflectionUtils; /** * JobConf ??ODPS MapReduce ?. * * <p> * JobConf ? ODPS ?? M/R ??MapReduce JobConf ???? * </p> * * <p> * JobConf ? {@link com.aliyun.odps.mapred.Mapper}?Combiner? * {@link com.aliyun.odps.mapred.Reducer} * </p> * * <p> * JobConf ? {@link com.aliyun.odps.mapred.Mapper} * Key/Value??? * <ul> * <li>{@link #setMapOutputKeySchema(Column[])}</li> * <li>{@link #setMapOutputValueSchema(Column[])}</li> * <li>{@link #setOutputKeySortColumns(String[])}</li> * <li>{@link #setOutputGroupingColumns(String[])}</li> * </ul> * </p> * * <p> * JobConf ?{@link #addResource(String)} ODPS ? * </p> * * <p> * JobConf ? MapReduce * <ul> * <li>{@link #setSplitSize(long)}?Mapper?? {@link Mapper}</li> * <li>{@link #setNumReduceTasks(int)}Reducer</li> * </ul> * </p> * * <p> * JobConf job = new JobConf();<br/> * JobConf {@link SessionState} ? * * <p> * ?WordCount * * <pre> * JobConf job = new JobConf(); * job.setMapperClass(TokenizerMapper.class); * job.setCombinerClass(SumCombiner.class); * job.setReducerClass(SumReducer.class); * * job.setMapOutputKeySchema(SchemaUtils.fromString("word:string")); * job.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint")); * * InputUtils.addTable(TableInfo.builder().tableName(args[0]).build(), job); * OutputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), job); * * RunningJob rj = JobClient.runJob(job); * * </pre> * * </blockquote> * </p> * * @see JobClient */ public class JobConf extends Configuration { private static final Log LOG = LogFactory.getLog(JobConf.class); public enum SortOrder { ASC, DESC } /** * MapReduce ?. */ public JobConf() { super(SessionState.get().getDefaultJob()); } /** * MapReduce ?. * * @param conf * ?? */ public JobConf(Configuration conf) { super(SessionState.get().getDefaultJob()); mergeConfiguration(conf); } /** * MapReduce ?. * * Configuration-format XML ?? * * <pre> * <configuration> * <property> * <name>com.mycomp.xxx</name> * <value>xxx</value> * </property> * ... ... * </configuration> * </pre> * * @param config * Configuration-format XML ? */ @Survey public JobConf(String config) { this(); addResource(config); } /** * MapReduce ???? * * @param loadSessionContext * true? */ public JobConf(boolean loadSessionContext) { super(); if (loadSessionContext) { mergeConfiguration(SessionState.get().getDefaultJob()); } } /** * ??, ? * * @param conf * ??? */ private void mergeConfiguration(Configuration conf) { Iterator<Map.Entry<String, String>> iter = conf.iterator(); while (iter.hasNext()) { Map.Entry<String, String> entry = iter.next(); this.set(entry.getKey(), entry.getValue()); } } /** * ?. * * <p> * ???? Mapper/Reducer ? * * @param resourceNames * ????? * @see com.aliyun.odps.mapred.TaskContext */ public void setResources(String resourceNames) { set(CONF.RESOURCES, resourceNames); } /** * ?? * * @return ? */ public String[] getResources() { return getStrings(CONF.RESOURCES); } /** * ? {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Key * * @return {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Key */ public Column[] getMapOutputKeySchema() { String rs = get(CONF.MAPOUTPUT_KEY_SCHEMA); return SchemaUtils.fromString(rs); } /** * {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Key * * @param schema * {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Key */ public void setMapOutputKeySchema(Column[] schema) { set(CONF.MAPOUTPUT_KEY_SCHEMA, SchemaUtils.toString(schema)); } /** * ? {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Value * * @return {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Value */ public Column[] getMapOutputValueSchema() { String rs = get(CONF.MAPOUTPUT_VALUE_SCHEMA); return SchemaUtils.fromString(rs); } /** * {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Value * * @param schema * {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Value */ public void setMapOutputValueSchema(Column[] schema) { set(CONF.MAPOUTPUT_VALUE_SCHEMA, SchemaUtils.toString(schema)); } /** * ? {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Key ?. Key. * ?????. 1, Key??, , ?, . * ??. * * @return Key? * @see #setMapOutputKeySchema(Column[]) */ public String[] getOutputKeySortColumns() { String joined = get(CONF.OUTPUT_KEY_SORT_COLUMNS, null); if (joined == null || joined.isEmpty()) { return SchemaUtils.getNames(getMapOutputKeySchema()); } return joined.split(","); } /** * {@link com.aliyun.odps.mapred.Mapper} * {@link com.aliyun.odps.mapred.Reducer} Key ? * * @param cols * Mapper? */ public void setOutputKeySortColumns(String[] cols) { set(CONF.OUTPUT_KEY_SORT_COLUMNS, StringUtils.join(cols, ",")); } /** * ? Key ??. Key??. * * @return Key?. * @see #setMapOutputKeySchema(Column[]) */ public SortOrder[] getOutputKeySortOrder() { SortOrder[] order; String joined = get(CONF.OUTPUT_KEY_SORT_ORDER, null); if (joined == null || joined.isEmpty()) { order = new SortOrder[getOutputKeySortColumns().length]; Arrays.fill(order, SortOrder.ASC); } else { String[] orders = joined.split(","); order = new SortOrder[orders.length]; for (int i = 0; i < order.length; i++) { order[i] = SortOrder.valueOf(orders[i]); } } return order; } /** * Key ?? * * @param order * ?? */ public void setOutputKeySortOrder(SortOrder[] order) { set(CONF.OUTPUT_KEY_SORT_ORDER, StringUtils.join(order, ",")); } /** * ? Key Key. * <p> * Key MapReduce ?? {@link com.aliyun.odps.mapred.Reducer} * </p> * * @return Key * @see #setOutputGroupingColumns(String[]) */ public String[] getOutputGroupingColumns() { String joined = get(CONF.OUTPUT_GROUP_COLUMNS, null); if (joined != null) { return joined.split(","); } return SchemaUtils.getNames(getMapOutputKeySchema()); } /** * Key . * * <p> * Key MapReduce ?? {@link com.aliyun.odps.mapred.Reducer} * </p> * * @param cols * Key * @see #setMapOutputKeySchema(Column[]) */ public void setOutputGroupingColumns(String[] cols) { set(CONF.OUTPUT_GROUP_COLUMNS, StringUtils.join(cols, ",")); } /** * ? {@link Mapper} Key ? * {@link WritableComparator#get(Class)} . * * @see RecordComparator * @see Mapper * @return {@link Mapper} Key ?. * */ @SuppressWarnings("rawtypes") public Class<? extends RecordComparator> getOutputKeyComparatorClass() { return getClass(CONF.OUTPUT_KEY_COMPARATOR_CLASS, null, RecordComparator.class); } /** * {@link Mapper} Key ?. * * <p> * Key ? MapReduce ?? {@link Mapper} {@link Reducer} * Shuffle * </p> * * @param theClass * {@link Mapper} Key ?{@link RecordComparator} ? * @see #setOutputKeyGroupingComparatorClass(Class) * */ @SuppressWarnings("rawtypes") public void setOutputKeyComparatorClass(Class<? extends RecordComparator> theClass) { setClass(CONF.OUTPUT_KEY_COMPARATOR_CLASS, theClass, RecordComparator.class); } /** * ? Key {@link #getOutputKeyComparatorClass()}. * <p> * Key MapReduce ?? {@link Reducer} * </p> * * @see #setOutputKeyGroupingComparatorClass(Class) * @return Key * */ @SuppressWarnings("rawtypes") public Class<? extends RecordComparator> getOutputKeyGroupingComparatorClass() { return getClass(CONF.OUTPUT_KEY_GROUPING_COMPARATOR_CLASS, null, RecordComparator.class); } /** * Key ? {@link #getOutputKeyComparatorClass()} . * * <p> * Key MapReduce ?? {@link Reducer} * </p> * * @param theClass * Key {@link RecordComparator}? * @see #setOutputKeyComparatorClass(Class) * */ @SuppressWarnings("rawtypes") public void setOutputKeyGroupingComparatorClass(Class<? extends RecordComparator> theClass) { setClass(CONF.OUTPUT_KEY_GROUPING_COMPARATOR_CLASS, theClass, RecordComparator.class); } /** * ? {@link com.aliyun.odps.mapred.Mapper}? * {@link com.aliyun.odps.mapred.MapperBase}. * * @return {@link com.aliyun.odps.mapred.Mapper} . */ public Class<? extends Mapper> getMapperClass() { return getClass(CONF.MAP_CLASS, MapperBase.class, Mapper.class); } /** * {@link com.aliyun.odps.mapred.Mapper}. * * @param theClass * {@link com.aliyun.odps.mapred.Mapper} ? */ public void setMapperClass(Class<? extends Mapper> theClass) { setClass(CONF.MAP_CLASS, theClass, Mapper.class); } /** * ?. {@link com.aliyun.odps.mapred.Mapper}Key. * * @return */ public String[] getPartitionColumns() { String joined = get(CONF.PARTITION_COLUMNS, null); if (joined != null) { return joined.split(","); } return SchemaUtils.getNames(getMapOutputKeySchema()); } /** * . ??{@link JobConf#setPartitionerClass(Class)}{@link * JobConf#setPartitionColumns(String[])} * * * @param cols * ?? * @see com.aliyun.odps.mapred.Mapper */ public void setPartitionColumns(String[] cols) { set(CONF.PARTITION_COLUMNS, StringUtils.join(cols, ",")); } /** * ? {@link com.aliyun.odps.mapred.Reducer} * {@link com.aliyun.odps.mapred.ReducerBase} * * @return {@link com.aliyun.odps.mapred.Reducer} */ public Class<? extends Reducer> getReducerClass() { return getClass(CONF.REDUCE_CLASS, ReducerBase.class, Reducer.class); } /** * {@link com.aliyun.odps.mapred.Reducer}. * * @param theClass * {@link com.aliyun.odps.mapred.Reducer} */ public void setReducerClass(Class<? extends Reducer> theClass) { setClass(CONF.REDUCE_CLASS, theClass, Reducer.class); } /** * ? combiner * * @return combiner */ public Class<? extends Reducer> getCombinerClass() { return getClass(CONF.COMBINE_CLASS, null, Reducer.class); } /** * combiner. * * @param theClass * combiner */ public void setCombinerClass(Class<? extends Reducer> theClass) { setClass(CONF.COMBINE_CLASS, theClass, Reducer.class); } /** * ? partitioner. * * @return combiner */ public Class<? extends Partitioner> getPartitionerClass() { return getClass(CONF.PARTITION_CLASS, null, Partitioner.class); } /** * partitioner. ??{@link JobConf#setPartitionerClass(Class)}{@link * JobConf#setPartitionColumns(String[])} * * @param theClass * {@link com.aliyun.odps.mapred.Partitioner} */ public void setPartitionerClass(Class<? extends Partitioner> theClass) { setClass(CONF.PARTITION_CLASS, theClass, Partitioner.class); } /** * ???? MB 640. * * <p> * ?? {@link com.aliyun.odps.mapred.Mapper} ?? * {@link com.aliyun.odps.mapred.Mapper} * </p> * * @return ??? MB */ public long getSplitSize() { return getLong(CONF.MAP_SPLIT_SIZE, 640); } /** * ??? MB 640. * * <p> * ?? {@link com.aliyun.odps.mapred.Mapper} ?? * {@link com.aliyun.odps.mapred.Mapper} * </p> * * @param size * ??? MB */ public void setSplitSize(long size) { setLong(CONF.MAP_SPLIT_SIZE, size); } /** * ? {@link com.aliyun.odps.mapred.Mapper} . input. * * @return {@link com.aliyun.odps.mapred.Mapper} */ public int getNumMapTasks() { return getInt(CONF.MAP_TASKS, 1); } /** * {@link com.aliyun.odps.mapred.Mapper} . input. * * @param n * {@link com.aliyun.odps.mapred.Mapper} */ public void setNumMapTasks(int n) { setInt(CONF.MAP_TASKS, n); } /** * ? {@link com.aliyun.odps.mapred.Reducer} . * * @return {@link com.aliyun.odps.mapred.Reducer} */ public int getNumReduceTasks() { return getInt(CONF.REDUCE_TASKS, 1); } /** * {@link com.aliyun.odps.mapred.Reducer} * {@link com.aliyun.odps.mapred.Mapper} 1/4. * * <p> * ? {@link com.aliyun.odps.mapred.Reducer}?0setNumReduceTasks(0); * </p> * * @param n * {@link com.aliyun.odps.mapred.Reducer} */ public void setNumReduceTasks(int n) { setInt(CONF.REDUCE_TASKS, n); } /** * ? {@link com.aliyun.odps.mapred.Mapper} ???MB 2048. * * @return {@link com.aliyun.odps.mapred.Mapper} ? */ public int getMemoryForMapTask() { return getInt(CONF.MAP_MEMORY, 2048); } /** * {@link com.aliyun.odps.mapred.Mapper} ???MB 2048. * * @param mem * {@link com.aliyun.odps.mapred.Mapper} ? */ public void setMemoryForMapTask(int mem) { setInt(CONF.MAP_MEMORY, mem); } /** * ? {@link com.aliyun.odps.mapred.Reducer} ???MB 2048. * * @return {@link com.aliyun.odps.mapred.Reducer} ? */ public int getMemoryForReduceTask() { return getInt(CONF.REDUCE_MEMORY, 2048); } /** * {@link com.aliyun.odps.mapred.Reducer} ???MB 2048. * * @param mem * {@link com.aliyun.odps.mapred.Reducer} ? */ public void setMemoryForReduceTask(int mem) { setInt(CONF.REDUCE_MEMORY, mem); } /** * ? Map JVM ???MB 1024. * * @return JVM? */ public int getMemoryForMapperJVM() { return getInt(CONF.MAP_JVM_MEMORY, 1024); } /** * Map JVM ???MB 1024. * * @param mem * JVM? */ public void setMemoryForMapperJVM(int mem) { setInt(CONF.MAP_JVM_MEMORY, mem); } /** * ? Reduce JVM ???MB 1024. * * @return JVM? */ public int getMemoryForReducerJVM() { return getInt(CONF.REDUCE_JVM_MEMORY, 1024); } /** * Reduce JVM ???MB 1024. * * @param mem * JVM? */ public void setMemoryForReducerJVM(int mem) { setInt(CONF.REDUCE_JVM_MEMORY, mem); } /** * ? JVM???MB 1024. * * @return JVM? */ public int getMemoryForJVM() { return getInt(CONF.JVM_MEMORY, 1024); } /** * JVM???MB 1024. * * @param mem * JVM? */ public void setMemoryForJVM(int mem) { setInt(CONF.JVM_MEMORY, mem); } /** * ?Combiner??1024? * * @return Combiner? */ public int getCombinerCacheItems() { return getInt(CONF.COMBINER_CACHE_ITEMS, 1024); } /** * Combiner?? */ public void setCombinerCacheItems(int size) { setInt(CONF.COMBINER_CACHE_ITEMS, size); } /** * ?Combinerspill0.5, combine? * spill?, CombinerOptimizeEnabletrue? * * @return Combinerspill */ public float getCombinerCacheSpillPercent() { return getFloat(CONF.COMBINER_CACHE_SPILL_PERCENT, (float) 0.5); } /** * Combinerspill */ public void setCombinerCacheSpillPercent(float percent) { setFloat(CONF.COMBINER_CACHE_SPILL_PERCENT, percent); } /** * ??Combinerfalse * * @return true or false */ public boolean getCombinerOptimizeEnable() { return getBoolean(CONF.COMBINER_OPTIMIZE_ENABLE, false); } /** * ?Combiner */ public void setCombinerOptimizeEnable(boolean isCombineOpt) { setBoolean(CONF.COMBINER_OPTIMIZE_ENABLE, isCombineOpt); } /** * ??600 * * @return */ public int getFunctionTimeout() { return getInt(CONF.FUNCTION_TIMEOUT, 600); } /** * ? * * @param timeout * ?[1, 3600] */ public void setFunctionTimeout(int timeout) { setInt(CONF.FUNCTION_TIMEOUT, timeout); } /** * ?Instance * * @return */ public int getInstancePriority() { return getInt(CONF.INSTANCE_PRIORITY, 9); } /** * ?? * * @return ? * */ public boolean getOutputOverwrite() { return getBoolean(CONF.OUTPUT_OVERWRITE, true); } /** * ? * * @param isOverwrite * ? */ public void setOutputOverwrite(boolean isOverwrite) { setBoolean(CONF.OUTPUT_OVERWRITE, isOverwrite); } /** * ?? * * @return ? * */ public boolean getInnerOutputEnable() { return getBoolean(CONF.INNER_OUTPUT_ENABLE, false); } /** * ? * * @param isInnerOutput * ? */ public void setInnerOutputEnable(boolean isInnerOutput) { setBoolean(CONF.INNER_OUTPUT_ENABLE, isInnerOutput); } /** * Instance??[0, 9] * * @param priority * */ public void setInstancePriority(int priority) { setInt(CONF.INSTANCE_PRIORITY, priority); } /** * ?. * * @return */ @Deprecated public Column[] getOutputSchema() { try { onDeprecated(JobConf.class.getMethod("getOutputSchema")); } catch (NoSuchMethodException e) { e.printStackTrace(); } return null; } /** * ?label * * @param label * * @return */ @Deprecated public Column[] getOutputSchema(String label) { try { onDeprecated(JobConf.class.getMethod("getOutputSchema", String.class)); } catch (NoSuchMethodException e) { e.printStackTrace(); } return null; } /** * label * * @param schema * * @param label * */ @Deprecated public void setOutputSchema(Column[] schema, String label) { try { onDeprecated(JobConf.class.getMethod("setOutputSchema", Column[].class, String.class)); } catch (NoSuchMethodException e) { e.printStackTrace(); } } private void onDeprecated(Method method) { set("odps.deprecated." + method.getDeclaringClass().getCanonicalName() + "." + method.getName(), "true"); LOG.warn("Calling deprecated method:" + method); } }