Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred; import java.io.File; import java.io.FileFilter; import java.io.FileOutputStream; import java.io.IOException; import java.net.URL; import java.net.URLClassLoader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.aliyun.odps.Column; import com.aliyun.odps.Odps; import com.aliyun.odps.OdpsException; import com.aliyun.odps.OdpsType; import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.conf.Configuration; import com.aliyun.odps.counter.Counter; import com.aliyun.odps.counter.CounterGroup; import com.aliyun.odps.counter.Counters; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.local.common.Constants; import com.aliyun.odps.local.common.DownloadMode; import com.aliyun.odps.local.common.FileSplit; import com.aliyun.odps.local.common.JobDirecotry; import com.aliyun.odps.local.common.TableMeta; import com.aliyun.odps.local.common.WareHouse; import com.aliyun.odps.local.common.security.ApplicatitionType; import com.aliyun.odps.local.common.security.SecurityClient; import com.aliyun.odps.local.common.utils.DownloadUtils; import com.aliyun.odps.local.common.utils.LocalRunUtils; import com.aliyun.odps.local.common.utils.PartitionUtils; import com.aliyun.odps.local.common.utils.SchemaUtils; import com.aliyun.odps.mapred.bridge.utils.Validator; import com.aliyun.odps.mapred.conf.BridgeJobConf; import com.aliyun.odps.mapred.conf.SessionState; import com.aliyun.odps.mapred.local.JobCounter; import com.aliyun.odps.mapred.local.LocalRunningJob; import com.aliyun.odps.mapred.local.LocalTaskId; import com.aliyun.odps.mapred.local.MRExceptionCode; import com.aliyun.odps.mapred.local.MapDriver; import com.aliyun.odps.mapred.local.MapOutputBuffer; import com.aliyun.odps.mapred.local.ReduceDriver; import com.aliyun.odps.mapred.local.StageStatic; import com.aliyun.odps.mapred.local.utils.LocalMRUtils; import com.aliyun.odps.mapred.local.utils.LocalValidatorFactory; import com.aliyun.odps.mapred.utils.InputUtils; import com.aliyun.odps.mapred.utils.OutputUtils; import com.aliyun.odps.pipeline.Pipeline; import com.aliyun.odps.pipeline.Pipeline.TransformNode; public class LocalJobRunner implements JobRunner { private List<FileSplit> inputs; private WareHouse wareHouse; private JobDirecotry jobDirecotry; private Counters counters; private Odps odps; private BridgeJobConf conf; private Map<FileSplit, TableInfo> splitToTableInfo; private List<StageStatic> stageStaticList; private static final Log LOG = LogFactory.getLog(LocalJobRunner.class); public static Counter EMPTY_COUNTER; // pipe mode private Pipeline pipeline; public void initialize() { odps = SessionState.get().getOdps(); wareHouse = WareHouse.getInstance(); wareHouse.init(odps, conf); LocalMRUtils.generateLocalMrTaskName(); inputs = new ArrayList<FileSplit>(); counters = new Counters(); splitToTableInfo = new HashMap<FileSplit, TableInfo>(); stageStaticList = new LinkedList<StageStatic>(); jobDirecotry = new JobDirecotry(); EMPTY_COUNTER = counters.findCounter(JobCounter.__EMPTY_WILL_NOT_SHOW); initSecurity(); } private void initSecurity() { List<String> codeBase = new LinkedList<String>(); // add odps-mapred-local String path = LocalJobRunner.class.getProtectionDomain().getCodeSource().getLocation().getPath(); path = path.substring(path.indexOf(":") + 1); codeBase.add(path); // add odps-sdk-mapred path = MapperBase.class.getProtectionDomain().getCodeSource().getLocation().getPath(); path = path.substring(path.indexOf(":") + 1); codeBase.add(path); // add odps-mapred-bridge path = BridgeJobConf.class.getProtectionDomain().getCodeSource().getLocation().getPath(); path = path.substring(path.indexOf(":") + 1); codeBase.add(path); Map<String, String> replacement = new HashMap<String, String>(); replacement.put("\\$TEMP_HOME\\$", WareHouse.getInstance().getJobDirStr() + File.separator + "-"); replacement.put("\\$WAREHOUSE_HOME\\$", WareHouse.getInstance().getWarehouseDir().getAbsolutePath() + File.separator + "-"); boolean isSecurityEnabled = conf.getBoolean(Constants.LOCAL_SECURITY_ENABLE, false); boolean isJNIEnabled = conf.getBoolean(Constants.LOCAL_SECURITY_JNI_ENABLE, false); String userDefinePolicy = conf.get(Constants.LOCAL_USER_DEFINE_POLICY, ""); SecurityClient.init(ApplicatitionType.MR, codeBase, replacement, isSecurityEnabled, isJNIEnabled, userDefinePolicy); } @Override public RunningJob submit() { try { initialize(); runJob(); return new LocalRunningJob(wareHouse.getJobName(), JobStatus.SUCCEEDED, counters); } catch (Exception e) { throw new RuntimeException(e); } } private void runJob() throws IOException, OdpsException { pipeline = Pipeline.fromJobConf(conf); LOG.info("Run mapreduce job in local mode, Type: " + (pipeline == null ? "MR" : "MRR") + ", Job ID: " + wareHouse.getJobName()); // write job configuration to temporary file job.xml FileOutputStream fos = new FileOutputStream(jobDirecotry.getJobFile()); conf.writeXml(fos); fos.close(); LOG.info("Start to process input tables"); processInputs(); LOG.info("Finished process input tables"); LOG.info("Start to process output tables"); processOutputs(); LOG.info("Finished process output tables"); LOG.info("Start to process resources"); processResources(); LOG.info("Finished process resources"); LOG.info("Start to fill tableInfo"); fillTableInfo(); LOG.info("Finished fill tableInfo"); LOG.info("Start to validate configuration"); Validator validator = LocalValidatorFactory.getValidator(conf); validator.validate(); LOG.info("Finished validate configuration"); try { SecurityClient.open(); if (pipeline != null) { handlePipeMode(); } else { handleNonPipeMode(); } } catch (Exception ex) { throw new OdpsException(ex); } finally { SecurityClient.close(); } // copy output files from temporary directory to warehouse moveOutputs(); // delete temporary directory if user not set odps.mapred.temp.retain=true try { if (!wareHouse.isRetainTempData()) { FileUtils.deleteDirectory(jobDirecotry.getJobDir()); } } catch (Exception exception) { LOG.warn(exception.getMessage()); } // print summary content System.err.println(); System.err.println("Summary:"); printInputOutput(); printStageStatic(); printCounters(); System.err.println("\nOK"); } private void handlePipeMode() throws IOException { int mapCopyNum = 0, reduceCopyNum = 0; if (inputs.size() > 0) { mapCopyNum = inputs.size(); } else { // allow no input mapCopyNum = conf.getInt("odps.stage.mapper.num", 1); } TransformNode pipeNode = pipeline.getFirstNode(); reduceCopyNum = computeReduceNum(mapCopyNum, pipeNode); LOG.info("Start to run mappers, num: " + mapCopyNum); TaskId taskId = new LocalTaskId("M1", 0, odps.getDefaultProject()); StageStatic stageStatic = createStageStatic(taskId); stageStatic.setWorkerCount(mapCopyNum); MapOutputBuffer inputBuffer = new MapOutputBuffer(conf, pipeline, taskId.getTaskId(), reduceCopyNum); for (int mapId = 0; mapId < mapCopyNum; mapId++) { FileSplit split = inputs.size() > 0 ? inputs.get(mapId) : FileSplit.NullSplit; taskId = new LocalTaskId("M1", 0, odps.getDefaultProject()); LOG.info("Start to run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split)); MapDriver mapDriver = new MapDriver(conf, split, taskId, inputBuffer, counters, splitToTableInfo.get(split)); mapDriver.run(); setInputOutputRecordCount(stageStatic); LOG.info("Fininshed run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split)); } LOG.info("Fininshed run all mappers, num: " + mapCopyNum); int reduceNodeCount = pipeline.getNodeNum() - 1; if (reduceNodeCount > 0) { LOG.info("Start to run reduces, num: " + reduceNodeCount); stageStatic.setNextTaskId("R2_1"); int i = 0; for (; i < reduceNodeCount; ++i) { // (i+2)?Reduce?,(i+1)?MapReduce? taskId = new LocalTaskId("R" + (i + 2) + "_" + (i + 1), 0, odps.getDefaultProject()); LOG.info("Start to run reduce, taskId: " + taskId); // ?stagenextTaskId stageStatic.setNextTaskId("R" + (i + 2) + "_" + (i + 1)); stageStatic = createStageStatic(taskId); stageStatic.setWorkerCount(reduceCopyNum); int nextReduceCopyNum = computeReduceNum(reduceCopyNum, pipeline.getNode(i + 1)); MapOutputBuffer outputBuffer = new MapOutputBuffer(conf, pipeline, taskId.getTaskId(), nextReduceCopyNum); for (int j = 0; j < reduceCopyNum; ++j) { taskId = new LocalTaskId("R" + (i + 2) + "_" + (i + 1), j, odps.getDefaultProject()); ReduceDriver reduceDriver = new ReduceDriver(conf, inputBuffer, outputBuffer, taskId, counters, j); reduceDriver.run(); setInputOutputRecordCount(stageStatic); } // ? inputBuffer = outputBuffer; reduceCopyNum = nextReduceCopyNum; LOG.info("Finished run reduce, taskId: " + taskId); } // ?stagenextTaskId stageStatic.setNextTaskId("R" + (i + 1) + "_" + i + "FS_9"); LOG.info("Fininshed run all reduces, num: " + reduceNodeCount); } else { stageStatic.setNextTaskId("M1"); LOG.info("This is a MapOnly job"); } } private void handleNonPipeMode() throws IOException { int mapCopyNum = 0, reduceCopyNum = 0; if (inputs.size() > 0) { mapCopyNum = inputs.size(); } else { // allow no input mapCopyNum = conf.getInt("odps.stage.mapper.num", 1); } reduceCopyNum = computeReduceNum(mapCopyNum, null); MapOutputBuffer buffer = new MapOutputBuffer(conf, reduceCopyNum); LOG.info("Start to run mappers, num: " + mapCopyNum); TaskId taskId = new LocalTaskId("M1", 0, odps.getDefaultProject()); StageStatic stageStatic = createStageStatic(taskId); stageStatic.setWorkerCount(mapCopyNum); for (int mapId = 0; mapId < mapCopyNum; mapId++) { FileSplit split = inputs.size() > 0 ? inputs.get(mapId) : FileSplit.NullSplit; taskId = new TaskId("M", mapId + 1); LOG.info("Start to run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split)); MapDriver mapDriver = new MapDriver(conf, split, taskId, buffer, counters, splitToTableInfo.get(split)); mapDriver.run(); setInputOutputRecordCount(stageStatic); LOG.info("Fininshed run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split)); } LOG.info("Fininshed run all mappers, num: " + mapCopyNum); if (reduceCopyNum > 0) { LOG.info("Start to run reduces, num: " + reduceCopyNum); taskId = new LocalTaskId("R2_1", 0, odps.getDefaultProject()); // ?stagenextTaskId stageStatic.setNextTaskId("R2_1"); stageStatic = createStageStatic(taskId); stageStatic.setWorkerCount(reduceCopyNum); for (int reduceId = 0; reduceId < reduceCopyNum; ++reduceId) { taskId = new TaskId("R", reduceId); LOG.info("Start to run reduce, taskId: " + taskId); ReduceDriver reduceDriver = new ReduceDriver(conf, buffer, null, taskId, counters, reduceId); reduceDriver.run(); setInputOutputRecordCount(stageStatic); LOG.info("Finished run reduce, taskId: " + taskId); } stageStatic.setNextTaskId("R2_1FS_9"); LOG.info("Fininshed run all reduces, num: " + reduceCopyNum); } else { stageStatic.setNextTaskId("M1"); LOG.info("This is a MapOnly job"); } } private StageStatic createStageStatic(TaskId taskId) { StageStatic stageStatic = new StageStatic(); stageStaticList.add(stageStatic); stageStatic.setTaskId(taskId.toString()); return stageStatic; } private void setInputOutputRecordCount(StageStatic stageStatic) { stageStatic.setInputRecordCount(counters.findCounter(JobCounter.__EMPTY_INPUT_RECORD_COUNT).getValue()); stageStatic.setOutputRecordCount(counters.findCounter(JobCounter.__EMPTY_OUTPUT_RECORD_COUNT).getValue()); counters.findCounter(JobCounter.__EMPTY_INPUT_RECORD_COUNT).setValue(0); counters.findCounter(JobCounter.__EMPTY_OUTPUT_RECORD_COUNT).setValue(0); } private void processInput(TableInfo tableInfo) throws IOException, OdpsException { if (tableInfo == null || StringUtils.isBlank(tableInfo.getTableName())) { throw new RuntimeException("Invalid TableInfo: " + tableInfo); } if (StringUtils.isEmpty(tableInfo.getProjectName())) { tableInfo.setProjectName(wareHouse.getOdps().getDefaultProject()); } String[] readCols = tableInfo.getCols(); // ?MR?? PartitionSpec expectParts = tableInfo.getPartitionSpec(); // ?Table Scheme??? if (!wareHouse.existsPartition(tableInfo.getProjectName(), tableInfo.getTableName(), expectParts) || wareHouse.getDownloadMode() == DownloadMode.ALWAYS) { DownloadUtils.downloadTableSchemeAndData(odps, tableInfo, wareHouse.getLimitDownloadRecordCount(), wareHouse.getInputColumnSeperator()); if (!wareHouse.existsPartition(tableInfo.getProjectName(), tableInfo.getTableName(), expectParts)) { throw new RuntimeException(LocalRunUtils.getDownloadErrorMsg(tableInfo.toString())); } } // ////warehouse _scheme_????//// TableMeta whTblMeta = wareHouse.getTableMeta(tableInfo.getProjectName(), tableInfo.getTableName()); Column[] whReadFields = LocalRunUtils.getInputTableFields(whTblMeta, readCols); List<PartitionSpec> whParts = wareHouse.getPartitions(tableInfo.getProjectName(), tableInfo.getTableName()); if (whParts.size() > 0) { // partitioned table for (PartitionSpec partSpec : whParts) { // ? if (!PartitionUtils.match(expectParts, partSpec)) { continue; } File whSrcDir = wareHouse.getPartitionDir(whTblMeta.getProjName(), whTblMeta.getTableName(), partSpec); // add input split only when src dir has data file if (LocalRunUtils.listDataFiles(whSrcDir).size() > 0) { // ??warehouse File tempDataDir = jobDirecotry.getInputDir( wareHouse.getRelativePath(whTblMeta.getProjName(), whTblMeta.getTableName(), partSpec)); File tempSchemeDir = jobDirecotry.getInputDir( wareHouse.getRelativePath(whTblMeta.getProjName(), whTblMeta.getTableName(), null)); wareHouse.copyTable(whTblMeta.getProjName(), whTblMeta.getTableName(), partSpec, readCols, tempSchemeDir, wareHouse.getLimitDownloadRecordCount(), wareHouse.getInputColumnSeperator()); for (File file : LocalRunUtils.listDataFiles(tempDataDir)) { FileSplit split = new FileSplit(file, whReadFields, 0L, file.length()); splitToTableInfo.put(split, tableInfo); inputs.add(split); } } } } else { // not partitioned table if (tableInfo.getPartSpec() != null && tableInfo.getPartSpec().size() > 0) { throw new IOException(MRExceptionCode.ODPS_0720121 + "table " + tableInfo.getProjectName() + "." + tableInfo.getTableName() + " is not partitioned table"); } File whSrcDir = wareHouse.getTableDir(whTblMeta.getProjName(), whTblMeta.getTableName()); if (LocalRunUtils.listDataFiles(whSrcDir).size() > 0) { // ??warehouse File tempDataDir = jobDirecotry.getInputDir( wareHouse.getRelativePath(whTblMeta.getProjName(), whTblMeta.getTableName(), null)); File tempSchemeDir = tempDataDir; wareHouse.copyTable(whTblMeta.getProjName(), whTblMeta.getTableName(), null, readCols, tempSchemeDir, wareHouse.getLimitDownloadRecordCount(), wareHouse.getInputColumnSeperator()); for (File file : LocalRunUtils.listDataFiles(tempDataDir)) { FileSplit split = new FileSplit(file, whReadFields, 0L, file.length()); splitToTableInfo.put(split, tableInfo); inputs.add(split); } } } } private void processInputs() throws IOException, OdpsException { // UserConfChecker.checkMapTasks(conf); TableInfo[] inputTableInfos = InputUtils.getTables(conf); if (inputTableInfos == null) { LOG.debug("No input tables to process"); return; } for (TableInfo tableInfo : inputTableInfos) { LOG.debug("Start to process input table: " + tableInfo); processInput(tableInfo); LOG.debug("Finished process input table: " + tableInfo); } if (inputs.isEmpty()) { inputs.add(FileSplit.NullSplit); } } @SuppressWarnings("deprecation") private void processResources() throws IOException, OdpsException { String[] resources = conf.getResources(); if (resources == null || resources.length == 0) { LOG.debug("No resources to process"); return; } Set<String> names = new HashSet<String>(Arrays.asList(resources)); LOG.info("Start to process resources: " + StringUtils.join(resources, ',')); URLClassLoader loader = (URLClassLoader) Thread.currentThread().getContextClassLoader(); ArrayList<URL> cp = new ArrayList<URL>(Arrays.asList(loader.getURLs())); String curProjName = wareHouse.getOdps().getDefaultProject(); File resDir = jobDirecotry.getResourceDir(); for (String name : names) { List<String> res = LocalRunUtils.parseResourceName(name, curProjName); String projName = res.get(0); String resName = res.get(1); if (!wareHouse.existsResource(projName, resName) || wareHouse.getDownloadMode() == DownloadMode.ALWAYS) { DownloadUtils.downloadResource(odps, projName, resName, wareHouse.getLimitDownloadRecordCount(), wareHouse.getInputColumnSeperator()); } wareHouse.copyResource(projName, resName, resDir, wareHouse.getLimitDownloadRecordCount(), wareHouse.getInputColumnSeperator()); cp.add(new File(resDir, resName).toURI().toURL()); } URLClassLoader newLoader = new URLClassLoader(cp.toArray(new URL[0]), loader); Thread.currentThread().setContextClassLoader(newLoader); conf.setClassLoader(newLoader); } private void processOutputs() throws IOException { TableInfo[] outputs = OutputUtils.getTables(conf); if (outputs == null || outputs.length == 0) { LOG.debug("No output tables to process"); return; } for (TableInfo tableInfo : outputs) { if (StringUtils.isBlank(tableInfo.getProjectName())) { tableInfo.setProjectName(wareHouse.getOdps().getDefaultProject()); } // FIXME: Support partition File tableDirInJobDir = jobDirecotry.getOutputDir(tableInfo.getLabel()); tableDirInJobDir.mkdirs(); TableMeta tblMeta = null; if (wareHouse.existsTable(tableInfo.getProjectName(), tableInfo.getTableName()) && wareHouse.getDownloadMode() != DownloadMode.ALWAYS) { tblMeta = wareHouse.getTableMeta(tableInfo.getProjectName(), tableInfo.getTableName()); } else { tblMeta = DownloadUtils.downloadTableInfo(odps, tableInfo); //generate output table schema in warehouse File tableDirInWarehouse = wareHouse.getTableDir(tableInfo.getProjectName(), tableInfo.getTableName()); tableDirInWarehouse.mkdirs(); SchemaUtils.generateSchemaFile(tblMeta, null, tableDirInWarehouse); } SchemaUtils.generateSchemaFile(tblMeta, null, tableDirInJobDir); conf.setOutputSchema(tblMeta.getCols(), tableInfo.getLabel()); } } private void fillTableInfo() throws IOException { TableInfo[] infos = new TableInfo[splitToTableInfo.size()]; splitToTableInfo.values().toArray(infos); String project = wareHouse.getOdps().getDefaultProject(); for (FileSplit key : splitToTableInfo.keySet()) { TableInfo info = splitToTableInfo.get(key); if (info.getProjectName() == null) { info.setProjectName(project); } Column[] schema = wareHouse.getTableMeta(info.getProjectName(), info.getTableName()).getCols(); if (info.getCols() == null) { conf.setInputSchema(info, schema); info.setCols(SchemaUtils.getColumnNames(schema)); } else { Column[] columns = new Column[info.getCols().length]; for (int k = 0; k < info.getCols().length; k++) { String colName = info.getCols()[k]; for (Column c : schema) { if (c.getName().equalsIgnoreCase(colName)) { columns[k] = c; break; } } } conf.setInputSchema(info, columns); } } //fill input table infos = InputUtils.getTables(conf); if (infos != null) { boolean changed = false; for (int i = 0; i < infos.length; i++) { TableInfo info = infos[i]; if (info.getProjectName() == null) { changed = true; info.setProjectName(project); } Column[] schema = wareHouse.getTableMeta(info.getProjectName(), info.getTableName()).getCols(); if (info.getCols() == null) { changed = true; conf.setInputSchema(info, schema); info.setCols(SchemaUtils.getColumnNames(schema)); } else { Column[] columns = new Column[info.getCols().length]; for (int k = 0; k < info.getCols().length; k++) { String colName = info.getCols()[k]; for (Column c : schema) { if (c.getName().equalsIgnoreCase(colName)) { columns[k] = c; break; } } } conf.setInputSchema(info, columns); } infos[i] = info; } if (changed) { InputUtils.setTables(infos, conf); } } // Expand output columns. infos = OutputUtils.getTables(conf); if (infos == null) { conf.setOutputSchema(new Column[] { new Column("nil", OdpsType.STRING) }, TableInfo.DEFAULT_LABEL); } else { for (TableInfo info : infos) { if (info.getProjectName() == null) { info.setProjectName(project); } Column[] schema = wareHouse.getTableMeta(info.getProjectName(), info.getTableName()).getCols(); info.setCols(SchemaUtils.getColumnNames(schema)); conf.setOutputSchema(schema, info.getLabel()); } OutputUtils.setTables(infos, conf); } } private void moveOutputs() throws IOException { TableInfo[] output = OutputUtils.getTables(conf); if (output == null) { return; } for (TableInfo table : output) { String label = table.getLabel(); String projName = table.getProjectName(); if (projName == null) { projName = wareHouse.getOdps().getDefaultProject(); } String tblName = table.getTableName(); Map<String, String> partSpec = table.getPartSpec(); File tempTblDir = jobDirecotry.getOutputDir(table.getLabel()); File whOutputDir = wareHouse.createPartitionDir(projName, tblName, PartitionUtils.convert(partSpec)); if (wareHouse.existsTable(projName, tblName)) { LOG.info("Reload warehouse table:" + tblName); LocalRunUtils.removeDataFiles(whOutputDir); wareHouse.copyDataFiles(tempTblDir, null, whOutputDir, wareHouse.getInputColumnSeperator()); } else { LOG.info("Copy output to warehouse: label=" + label + " -> " + whOutputDir.getAbsolutePath()); File whOutputTableDir = wareHouse.getTableDir(projName, tblName); // copy schema file FileUtils.copyDirectory(tempTblDir, whOutputTableDir, new FileFilter() { @Override public boolean accept(File pathname) { String filename = pathname.getName(); if (filename.equals("__schema__")) { return true; } return false; } }); // copy data files FileUtils.copyDirectory(tempTblDir, whOutputDir, new FileFilter() { @Override public boolean accept(File pathname) { String filename = pathname.getName(); if (filename.equals("__schema__")) { return false; } return true; } }); } } } private int computeReduceNum(int mapNum, TransformNode pipeNode) throws IOException { int reduceNum = 1; if (pipeNode != null) { if (pipeNode.getNextNode() != null) { reduceNum = pipeNode.getNextNode().getNumTasks(); } else { reduceNum = pipeNode.getNumTasks(); } } else if (wareHouse.caintainsKey("odps.stage.reducer.num")) { reduceNum = conf.getNumReduceTasks(); } else { reduceNum = Math.max(1, mapNum / 4); } if (reduceNum < 0) { throw new IOException(MRExceptionCode.ODPS_0720251 + " - reduce num cann't be less than 0"); } else if (reduceNum != conf.getNumReduceTasks()) { LOG.info("change reduce num from " + conf.getNumReduceTasks() + " to " + reduceNum); } conf.setNumReduceTasks(reduceNum); return reduceNum; } private void printInputOutput() { StringBuffer sb = new StringBuffer(); System.err.println("Inputs:"); TableInfo[] tableInfos = InputUtils.getTables(conf); if (tableInfos != null) { for (TableInfo tableInfo : tableInfos) { if (sb.length() > 0) { sb.append(","); } if (tableInfo.getProjectName() != null) { sb.append(tableInfo.getProjectName()); } else { sb.append(odps.getDefaultProject()); } sb.append("."); sb.append(tableInfo.getTableName()); String parts = tableInfo.getPartPath(); if (parts != null && !parts.trim().isEmpty()) { sb.append("/"); if (parts.endsWith("/")) { parts = parts.substring(0, parts.length() - 1); } sb.append(parts); } } } if (sb.length() > 0) { System.err.println("\t" + sb.toString()); sb.delete(0, sb.length()); } System.err.println("Outputs:"); tableInfos = OutputUtils.getTables(conf); if (tableInfos != null) { for (TableInfo tableInfo : tableInfos) { if (sb.length() > 0) { sb.append(","); } if (tableInfo.getProjectName() != null) { sb.append(tableInfo.getProjectName()); } else { sb.append(odps.getDefaultProject()); } sb.append("."); sb.append(tableInfo.getTableName()); String parts = tableInfo.getPartPath(); if (parts != null && !parts.trim().isEmpty()) { sb.append("/"); if (parts.endsWith("/")) { parts = parts.substring(0, parts.length() - 1); } sb.append(parts); } } } if (sb.length() > 0) { System.err.println("\t" + sb.toString()); sb.delete(0, sb.length()); } } private void printStageStatic() { StringBuilder sb = new StringBuilder(); for (StageStatic item : stageStaticList) { sb.append("\n" + item.getTaskId()); sb.append("\n\tWorker Count: " + item.getWorkerCount()); sb.append("\n\tInput Records: "); sb.append("\n\t\tinput: "); sb.append(item.getTotalInputRecords()); sb.append(" (min: "); sb.append(item.getMinInputRecords()); sb.append(", max: "); sb.append(item.getMaxInputRecords()); sb.append(", avg: "); sb.append(item.getAvgInputRecords()); sb.append(")"); sb.append("\n\tOutput Records: "); sb.append("\n\t\t"); sb.append(item.getNextTaskId()); sb.append(": "); sb.append(item.getTotalOutputRecords()); sb.append(" (min: "); sb.append(item.getMinOutputRecords()); sb.append(", max: "); sb.append(item.getMaxOutputRecords()); sb.append(", avg: "); sb.append(item.getAvgOutputRecords()); sb.append(")"); } System.err.println(sb.toString()); } private void printCounters() { int totalCount = 0; int frameWorkCounterCount = 0; int jobCounterCount = 0; int userCounterCount = 0; for (CounterGroup group : counters) { for (Counter counter : group) { if (counter.getDisplayName().startsWith("__EMPTY_")) { continue; } else if (group.getDisplayName().equals(JobCounter.class.getName())) { ++frameWorkCounterCount; } else if (group.getDisplayName().equals("com.aliyun.odps.mapred.local.Counter.JobCounter")) { ++jobCounterCount; } else { ++userCounterCount; } ++totalCount; } } StringBuilder sb = new StringBuilder("Counters: " + totalCount); sb.append("\n\tMap-Reduce Framework: " + frameWorkCounterCount); for (CounterGroup group : counters) { if (!group.getDisplayName().equals(JobCounter.class.getName())) { continue; } for (Counter counter : group) { if (counter.getDisplayName().startsWith("__EMPTY_")) { continue; } sb.append("\n\t\t" + counter.getDisplayName() + "=" + counter.getValue()); } } // sb.append("\n\tJob Counters: " + jobCounterCount); // for (CounterGroup group : counters) { // if // (!group.getDisplayName().equals("com.aliyun.odps.mapred.local.Counter.JobCounter")) // { // continue; // } // for (Counter counter : group) { // if // (counter.getDisplayName().equals(JobCounter.__EMPTY_WILL_NOT_SHOW.toString())) // continue; // sb.append("\n\t\t" + counter.getDisplayName() + "=" + // counter.getValue()); // } // } sb.append("\n\tUser Defined Counters: " + userCounterCount); for (CounterGroup group : counters) { if (group.getDisplayName().equals(JobCounter.class.getName()) || group.getDisplayName().equals("com.aliyun.odps.mapred.local.Counter.JobCounter")) { continue; } sb.append("\n\t\t" + group.getDisplayName()); for (Counter counter : group) { if (counter.getDisplayName().equals(JobCounter.__EMPTY_WILL_NOT_SHOW.toString())) { continue; } sb.append("\n\t\t\t" + counter.getDisplayName() + "=" + counter.getValue()); } } System.err.println(sb.toString().toLowerCase()); } @Override public Configuration getConf() { return conf; } @Override public void setConf(Configuration conf) { this.conf = new BridgeJobConf(conf); } }