com.aliyun.odps.mapred.LocalJobRunner.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.mapred.LocalJobRunner.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred;

import java.io.File;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.aliyun.odps.Column;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.OdpsType;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.conf.Configuration;
import com.aliyun.odps.counter.Counter;
import com.aliyun.odps.counter.CounterGroup;
import com.aliyun.odps.counter.Counters;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.local.common.Constants;
import com.aliyun.odps.local.common.DownloadMode;
import com.aliyun.odps.local.common.FileSplit;
import com.aliyun.odps.local.common.JobDirecotry;
import com.aliyun.odps.local.common.TableMeta;
import com.aliyun.odps.local.common.WareHouse;
import com.aliyun.odps.local.common.security.ApplicatitionType;
import com.aliyun.odps.local.common.security.SecurityClient;
import com.aliyun.odps.local.common.utils.DownloadUtils;
import com.aliyun.odps.local.common.utils.LocalRunUtils;
import com.aliyun.odps.local.common.utils.PartitionUtils;
import com.aliyun.odps.local.common.utils.SchemaUtils;
import com.aliyun.odps.mapred.bridge.utils.Validator;
import com.aliyun.odps.mapred.conf.BridgeJobConf;
import com.aliyun.odps.mapred.conf.SessionState;
import com.aliyun.odps.mapred.local.JobCounter;
import com.aliyun.odps.mapred.local.LocalRunningJob;
import com.aliyun.odps.mapred.local.LocalTaskId;
import com.aliyun.odps.mapred.local.MRExceptionCode;
import com.aliyun.odps.mapred.local.MapDriver;
import com.aliyun.odps.mapred.local.MapOutputBuffer;
import com.aliyun.odps.mapred.local.ReduceDriver;
import com.aliyun.odps.mapred.local.StageStatic;
import com.aliyun.odps.mapred.local.utils.LocalMRUtils;
import com.aliyun.odps.mapred.local.utils.LocalValidatorFactory;
import com.aliyun.odps.mapred.utils.InputUtils;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.pipeline.Pipeline;
import com.aliyun.odps.pipeline.Pipeline.TransformNode;

public class LocalJobRunner implements JobRunner {

    private List<FileSplit> inputs;
    private WareHouse wareHouse;
    private JobDirecotry jobDirecotry;
    private Counters counters;
    private Odps odps;
    private BridgeJobConf conf;
    private Map<FileSplit, TableInfo> splitToTableInfo;
    private List<StageStatic> stageStaticList;

    private static final Log LOG = LogFactory.getLog(LocalJobRunner.class);
    public static Counter EMPTY_COUNTER;

    // pipe mode
    private Pipeline pipeline;

    public void initialize() {
        odps = SessionState.get().getOdps();
        wareHouse = WareHouse.getInstance();
        wareHouse.init(odps, conf);

        LocalMRUtils.generateLocalMrTaskName();
        inputs = new ArrayList<FileSplit>();
        counters = new Counters();
        splitToTableInfo = new HashMap<FileSplit, TableInfo>();
        stageStaticList = new LinkedList<StageStatic>();
        jobDirecotry = new JobDirecotry();
        EMPTY_COUNTER = counters.findCounter(JobCounter.__EMPTY_WILL_NOT_SHOW);

        initSecurity();
    }

    private void initSecurity() {
        List<String> codeBase = new LinkedList<String>();
        // add odps-mapred-local
        String path = LocalJobRunner.class.getProtectionDomain().getCodeSource().getLocation().getPath();
        path = path.substring(path.indexOf(":") + 1);
        codeBase.add(path);

        // add odps-sdk-mapred
        path = MapperBase.class.getProtectionDomain().getCodeSource().getLocation().getPath();
        path = path.substring(path.indexOf(":") + 1);
        codeBase.add(path);

        // add odps-mapred-bridge
        path = BridgeJobConf.class.getProtectionDomain().getCodeSource().getLocation().getPath();
        path = path.substring(path.indexOf(":") + 1);
        codeBase.add(path);

        Map<String, String> replacement = new HashMap<String, String>();
        replacement.put("\\$TEMP_HOME\\$", WareHouse.getInstance().getJobDirStr() + File.separator + "-");
        replacement.put("\\$WAREHOUSE_HOME\\$",
                WareHouse.getInstance().getWarehouseDir().getAbsolutePath() + File.separator + "-");

        boolean isSecurityEnabled = conf.getBoolean(Constants.LOCAL_SECURITY_ENABLE, false);
        boolean isJNIEnabled = conf.getBoolean(Constants.LOCAL_SECURITY_JNI_ENABLE, false);
        String userDefinePolicy = conf.get(Constants.LOCAL_USER_DEFINE_POLICY, "");
        SecurityClient.init(ApplicatitionType.MR, codeBase, replacement, isSecurityEnabled, isJNIEnabled,
                userDefinePolicy);
    }

    @Override
    public RunningJob submit() {
        try {
            initialize();
            runJob();
            return new LocalRunningJob(wareHouse.getJobName(), JobStatus.SUCCEEDED, counters);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private void runJob() throws IOException, OdpsException {
        pipeline = Pipeline.fromJobConf(conf);
        LOG.info("Run mapreduce job in local mode, Type: " + (pipeline == null ? "MR" : "MRR") + ", Job ID: "
                + wareHouse.getJobName());

        // write job configuration to temporary file job.xml
        FileOutputStream fos = new FileOutputStream(jobDirecotry.getJobFile());
        conf.writeXml(fos);
        fos.close();

        LOG.info("Start to process input tables");
        processInputs();
        LOG.info("Finished process input tables");

        LOG.info("Start to process output tables");
        processOutputs();
        LOG.info("Finished process output tables");

        LOG.info("Start to process resources");
        processResources();
        LOG.info("Finished process resources");

        LOG.info("Start to fill tableInfo");
        fillTableInfo();
        LOG.info("Finished fill tableInfo");

        LOG.info("Start to validate configuration");
        Validator validator = LocalValidatorFactory.getValidator(conf);
        validator.validate();
        LOG.info("Finished validate configuration");

        try {
            SecurityClient.open();
            if (pipeline != null) {
                handlePipeMode();
            } else {
                handleNonPipeMode();
            }
        } catch (Exception ex) {
            throw new OdpsException(ex);
        } finally {
            SecurityClient.close();
        }

        // copy output files from temporary directory to warehouse
        moveOutputs();

        // delete temporary directory if user not set odps.mapred.temp.retain=true
        try {
            if (!wareHouse.isRetainTempData()) {
                FileUtils.deleteDirectory(jobDirecotry.getJobDir());
            }
        } catch (Exception exception) {
            LOG.warn(exception.getMessage());
        }

        // print summary content
        System.err.println();
        System.err.println("Summary:");
        printInputOutput();
        printStageStatic();
        printCounters();
        System.err.println("\nOK");
    }

    private void handlePipeMode() throws IOException {

        int mapCopyNum = 0, reduceCopyNum = 0;
        if (inputs.size() > 0) {
            mapCopyNum = inputs.size();
        } else {
            // allow no input
            mapCopyNum = conf.getInt("odps.stage.mapper.num", 1);
        }
        TransformNode pipeNode = pipeline.getFirstNode();
        reduceCopyNum = computeReduceNum(mapCopyNum, pipeNode);

        LOG.info("Start to run mappers, num: " + mapCopyNum);

        TaskId taskId = new LocalTaskId("M1", 0, odps.getDefaultProject());

        StageStatic stageStatic = createStageStatic(taskId);
        stageStatic.setWorkerCount(mapCopyNum);

        MapOutputBuffer inputBuffer = new MapOutputBuffer(conf, pipeline, taskId.getTaskId(), reduceCopyNum);

        for (int mapId = 0; mapId < mapCopyNum; mapId++) {
            FileSplit split = inputs.size() > 0 ? inputs.get(mapId) : FileSplit.NullSplit;
            taskId = new LocalTaskId("M1", 0, odps.getDefaultProject());
            LOG.info("Start to run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split));

            MapDriver mapDriver = new MapDriver(conf, split, taskId, inputBuffer, counters,
                    splitToTableInfo.get(split));
            mapDriver.run();

            setInputOutputRecordCount(stageStatic);
            LOG.info("Fininshed run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split));
        }
        LOG.info("Fininshed run all mappers, num: " + mapCopyNum);

        int reduceNodeCount = pipeline.getNodeNum() - 1;
        if (reduceNodeCount > 0) {
            LOG.info("Start to run reduces, num: " + reduceNodeCount);

            stageStatic.setNextTaskId("R2_1");

            int i = 0;
            for (; i < reduceNodeCount; ++i) {

                // (i+2)?Reduce?,(i+1)?MapReduce?
                taskId = new LocalTaskId("R" + (i + 2) + "_" + (i + 1), 0, odps.getDefaultProject());
                LOG.info("Start to run reduce, taskId: " + taskId);

                // ?stagenextTaskId
                stageStatic.setNextTaskId("R" + (i + 2) + "_" + (i + 1));

                stageStatic = createStageStatic(taskId);
                stageStatic.setWorkerCount(reduceCopyNum);
                int nextReduceCopyNum = computeReduceNum(reduceCopyNum, pipeline.getNode(i + 1));

                MapOutputBuffer outputBuffer = new MapOutputBuffer(conf, pipeline, taskId.getTaskId(),
                        nextReduceCopyNum);

                for (int j = 0; j < reduceCopyNum; ++j) {
                    taskId = new LocalTaskId("R" + (i + 2) + "_" + (i + 1), j, odps.getDefaultProject());
                    ReduceDriver reduceDriver = new ReduceDriver(conf, inputBuffer, outputBuffer, taskId, counters,
                            j);
                    reduceDriver.run();

                    setInputOutputRecordCount(stageStatic);
                }

                // ?
                inputBuffer = outputBuffer;
                reduceCopyNum = nextReduceCopyNum;

                LOG.info("Finished run reduce, taskId: " + taskId);
            }

            // ?stagenextTaskId
            stageStatic.setNextTaskId("R" + (i + 1) + "_" + i + "FS_9");

            LOG.info("Fininshed run all reduces, num: " + reduceNodeCount);

        } else {
            stageStatic.setNextTaskId("M1");
            LOG.info("This is a MapOnly job");
        }

    }

    private void handleNonPipeMode() throws IOException {
        int mapCopyNum = 0, reduceCopyNum = 0;
        if (inputs.size() > 0) {
            mapCopyNum = inputs.size();
        } else {
            // allow no input
            mapCopyNum = conf.getInt("odps.stage.mapper.num", 1);
        }
        reduceCopyNum = computeReduceNum(mapCopyNum, null);
        MapOutputBuffer buffer = new MapOutputBuffer(conf, reduceCopyNum);

        LOG.info("Start to run mappers, num: " + mapCopyNum);

        TaskId taskId = new LocalTaskId("M1", 0, odps.getDefaultProject());
        StageStatic stageStatic = createStageStatic(taskId);
        stageStatic.setWorkerCount(mapCopyNum);

        for (int mapId = 0; mapId < mapCopyNum; mapId++) {
            FileSplit split = inputs.size() > 0 ? inputs.get(mapId) : FileSplit.NullSplit;
            taskId = new TaskId("M", mapId + 1);
            LOG.info("Start to run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split));

            MapDriver mapDriver = new MapDriver(conf, split, taskId, buffer, counters, splitToTableInfo.get(split));
            mapDriver.run();

            setInputOutputRecordCount(stageStatic);
            LOG.info("Fininshed run mapper, TaskId: " + taskId + ", Input: " + splitToTableInfo.get(split));
        }

        LOG.info("Fininshed run all mappers, num: " + mapCopyNum);

        if (reduceCopyNum > 0) {
            LOG.info("Start to run reduces, num: " + reduceCopyNum);

            taskId = new LocalTaskId("R2_1", 0, odps.getDefaultProject());
            // ?stagenextTaskId
            stageStatic.setNextTaskId("R2_1");

            stageStatic = createStageStatic(taskId);
            stageStatic.setWorkerCount(reduceCopyNum);

            for (int reduceId = 0; reduceId < reduceCopyNum; ++reduceId) {

                taskId = new TaskId("R", reduceId);
                LOG.info("Start to run reduce, taskId: " + taskId);

                ReduceDriver reduceDriver = new ReduceDriver(conf, buffer, null, taskId, counters, reduceId);
                reduceDriver.run();

                setInputOutputRecordCount(stageStatic);
                LOG.info("Finished run reduce, taskId: " + taskId);
            }

            stageStatic.setNextTaskId("R2_1FS_9");

            LOG.info("Fininshed run all reduces, num: " + reduceCopyNum);

        } else {
            stageStatic.setNextTaskId("M1");
            LOG.info("This is a MapOnly job");
        }

    }

    private StageStatic createStageStatic(TaskId taskId) {
        StageStatic stageStatic = new StageStatic();
        stageStaticList.add(stageStatic);
        stageStatic.setTaskId(taskId.toString());
        return stageStatic;
    }

    private void setInputOutputRecordCount(StageStatic stageStatic) {
        stageStatic.setInputRecordCount(counters.findCounter(JobCounter.__EMPTY_INPUT_RECORD_COUNT).getValue());
        stageStatic.setOutputRecordCount(counters.findCounter(JobCounter.__EMPTY_OUTPUT_RECORD_COUNT).getValue());
        counters.findCounter(JobCounter.__EMPTY_INPUT_RECORD_COUNT).setValue(0);
        counters.findCounter(JobCounter.__EMPTY_OUTPUT_RECORD_COUNT).setValue(0);
    }

    private void processInput(TableInfo tableInfo) throws IOException, OdpsException {
        if (tableInfo == null || StringUtils.isBlank(tableInfo.getTableName())) {
            throw new RuntimeException("Invalid TableInfo: " + tableInfo);
        }

        if (StringUtils.isEmpty(tableInfo.getProjectName())) {
            tableInfo.setProjectName(wareHouse.getOdps().getDefaultProject());
        }

        String[] readCols = tableInfo.getCols();

        // ?MR??
        PartitionSpec expectParts = tableInfo.getPartitionSpec();

        // ?Table Scheme???
        if (!wareHouse.existsPartition(tableInfo.getProjectName(), tableInfo.getTableName(), expectParts)
                || wareHouse.getDownloadMode() == DownloadMode.ALWAYS) {

            DownloadUtils.downloadTableSchemeAndData(odps, tableInfo, wareHouse.getLimitDownloadRecordCount(),
                    wareHouse.getInputColumnSeperator());

            if (!wareHouse.existsPartition(tableInfo.getProjectName(), tableInfo.getTableName(), expectParts)) {
                throw new RuntimeException(LocalRunUtils.getDownloadErrorMsg(tableInfo.toString()));
            }
        }

        // ////warehouse _scheme_????////
        TableMeta whTblMeta = wareHouse.getTableMeta(tableInfo.getProjectName(), tableInfo.getTableName());
        Column[] whReadFields = LocalRunUtils.getInputTableFields(whTblMeta, readCols);
        List<PartitionSpec> whParts = wareHouse.getPartitions(tableInfo.getProjectName(), tableInfo.getTableName());

        if (whParts.size() > 0) {
            // partitioned table
            for (PartitionSpec partSpec : whParts) {
                // ?
                if (!PartitionUtils.match(expectParts, partSpec)) {
                    continue;
                }
                File whSrcDir = wareHouse.getPartitionDir(whTblMeta.getProjName(), whTblMeta.getTableName(),
                        partSpec);
                // add input split only when src dir has data file
                if (LocalRunUtils.listDataFiles(whSrcDir).size() > 0) {

                    // ??warehouse
                    File tempDataDir = jobDirecotry.getInputDir(
                            wareHouse.getRelativePath(whTblMeta.getProjName(), whTblMeta.getTableName(), partSpec));
                    File tempSchemeDir = jobDirecotry.getInputDir(
                            wareHouse.getRelativePath(whTblMeta.getProjName(), whTblMeta.getTableName(), null));
                    wareHouse.copyTable(whTblMeta.getProjName(), whTblMeta.getTableName(), partSpec, readCols,
                            tempSchemeDir, wareHouse.getLimitDownloadRecordCount(),
                            wareHouse.getInputColumnSeperator());
                    for (File file : LocalRunUtils.listDataFiles(tempDataDir)) {
                        FileSplit split = new FileSplit(file, whReadFields, 0L, file.length());
                        splitToTableInfo.put(split, tableInfo);
                        inputs.add(split);
                    }
                }
            }
        } else {
            // not partitioned table
            if (tableInfo.getPartSpec() != null && tableInfo.getPartSpec().size() > 0) {
                throw new IOException(MRExceptionCode.ODPS_0720121 + "table " + tableInfo.getProjectName() + "."
                        + tableInfo.getTableName() + " is not partitioned table");
            }

            File whSrcDir = wareHouse.getTableDir(whTblMeta.getProjName(), whTblMeta.getTableName());
            if (LocalRunUtils.listDataFiles(whSrcDir).size() > 0) {

                // ??warehouse
                File tempDataDir = jobDirecotry.getInputDir(
                        wareHouse.getRelativePath(whTblMeta.getProjName(), whTblMeta.getTableName(), null));
                File tempSchemeDir = tempDataDir;
                wareHouse.copyTable(whTblMeta.getProjName(), whTblMeta.getTableName(), null, readCols,
                        tempSchemeDir, wareHouse.getLimitDownloadRecordCount(),
                        wareHouse.getInputColumnSeperator());
                for (File file : LocalRunUtils.listDataFiles(tempDataDir)) {
                    FileSplit split = new FileSplit(file, whReadFields, 0L, file.length());
                    splitToTableInfo.put(split, tableInfo);
                    inputs.add(split);
                }
            }
        }

    }

    private void processInputs() throws IOException, OdpsException {

        // UserConfChecker.checkMapTasks(conf);

        TableInfo[] inputTableInfos = InputUtils.getTables(conf);

        if (inputTableInfos == null) {
            LOG.debug("No input tables to process");
            return;
        }

        for (TableInfo tableInfo : inputTableInfos) {
            LOG.debug("Start to process input table: " + tableInfo);
            processInput(tableInfo);
            LOG.debug("Finished process input table: " + tableInfo);

        }

        if (inputs.isEmpty()) {
            inputs.add(FileSplit.NullSplit);
        }
    }

    @SuppressWarnings("deprecation")
    private void processResources() throws IOException, OdpsException {
        String[] resources = conf.getResources();

        if (resources == null || resources.length == 0) {
            LOG.debug("No resources to process");
            return;
        }

        Set<String> names = new HashSet<String>(Arrays.asList(resources));
        LOG.info("Start to process resources: " + StringUtils.join(resources, ','));

        URLClassLoader loader = (URLClassLoader) Thread.currentThread().getContextClassLoader();
        ArrayList<URL> cp = new ArrayList<URL>(Arrays.asList(loader.getURLs()));
        String curProjName = wareHouse.getOdps().getDefaultProject();
        File resDir = jobDirecotry.getResourceDir();
        for (String name : names) {

            List<String> res = LocalRunUtils.parseResourceName(name, curProjName);

            String projName = res.get(0);
            String resName = res.get(1);

            if (!wareHouse.existsResource(projName, resName)
                    || wareHouse.getDownloadMode() == DownloadMode.ALWAYS) {
                DownloadUtils.downloadResource(odps, projName, resName, wareHouse.getLimitDownloadRecordCount(),
                        wareHouse.getInputColumnSeperator());
            }
            wareHouse.copyResource(projName, resName, resDir, wareHouse.getLimitDownloadRecordCount(),
                    wareHouse.getInputColumnSeperator());
            cp.add(new File(resDir, resName).toURI().toURL());
        }
        URLClassLoader newLoader = new URLClassLoader(cp.toArray(new URL[0]), loader);
        Thread.currentThread().setContextClassLoader(newLoader);
        conf.setClassLoader(newLoader);
    }

    private void processOutputs() throws IOException {
        TableInfo[] outputs = OutputUtils.getTables(conf);

        if (outputs == null || outputs.length == 0) {
            LOG.debug("No output tables to process");
            return;
        }

        for (TableInfo tableInfo : outputs) {

            if (StringUtils.isBlank(tableInfo.getProjectName())) {
                tableInfo.setProjectName(wareHouse.getOdps().getDefaultProject());
            }

            // FIXME: Support partition
            File tableDirInJobDir = jobDirecotry.getOutputDir(tableInfo.getLabel());
            tableDirInJobDir.mkdirs();
            TableMeta tblMeta = null;
            if (wareHouse.existsTable(tableInfo.getProjectName(), tableInfo.getTableName())
                    && wareHouse.getDownloadMode() != DownloadMode.ALWAYS) {
                tblMeta = wareHouse.getTableMeta(tableInfo.getProjectName(), tableInfo.getTableName());
            } else {
                tblMeta = DownloadUtils.downloadTableInfo(odps, tableInfo);

                //generate output table schema in warehouse
                File tableDirInWarehouse = wareHouse.getTableDir(tableInfo.getProjectName(),
                        tableInfo.getTableName());
                tableDirInWarehouse.mkdirs();
                SchemaUtils.generateSchemaFile(tblMeta, null, tableDirInWarehouse);

            }
            SchemaUtils.generateSchemaFile(tblMeta, null, tableDirInJobDir);
            conf.setOutputSchema(tblMeta.getCols(), tableInfo.getLabel());
        }
    }

    private void fillTableInfo() throws IOException {

        TableInfo[] infos = new TableInfo[splitToTableInfo.size()];
        splitToTableInfo.values().toArray(infos);
        String project = wareHouse.getOdps().getDefaultProject();

        for (FileSplit key : splitToTableInfo.keySet()) {
            TableInfo info = splitToTableInfo.get(key);
            if (info.getProjectName() == null) {
                info.setProjectName(project);
            }

            Column[] schema = wareHouse.getTableMeta(info.getProjectName(), info.getTableName()).getCols();
            if (info.getCols() == null) {
                conf.setInputSchema(info, schema);
                info.setCols(SchemaUtils.getColumnNames(schema));
            } else {
                Column[] columns = new Column[info.getCols().length];
                for (int k = 0; k < info.getCols().length; k++) {
                    String colName = info.getCols()[k];
                    for (Column c : schema) {
                        if (c.getName().equalsIgnoreCase(colName)) {
                            columns[k] = c;
                            break;
                        }
                    }
                }
                conf.setInputSchema(info, columns);
            }
        }

        //fill input table
        infos = InputUtils.getTables(conf);
        if (infos != null) {
            boolean changed = false;
            for (int i = 0; i < infos.length; i++) {
                TableInfo info = infos[i];
                if (info.getProjectName() == null) {
                    changed = true;
                    info.setProjectName(project);
                }

                Column[] schema = wareHouse.getTableMeta(info.getProjectName(), info.getTableName()).getCols();
                if (info.getCols() == null) {
                    changed = true;
                    conf.setInputSchema(info, schema);
                    info.setCols(SchemaUtils.getColumnNames(schema));
                } else {
                    Column[] columns = new Column[info.getCols().length];
                    for (int k = 0; k < info.getCols().length; k++) {
                        String colName = info.getCols()[k];
                        for (Column c : schema) {
                            if (c.getName().equalsIgnoreCase(colName)) {
                                columns[k] = c;
                                break;
                            }
                        }
                    }
                    conf.setInputSchema(info, columns);
                }
                infos[i] = info;
            }
            if (changed) {
                InputUtils.setTables(infos, conf);
            }
        }

        // Expand output columns.
        infos = OutputUtils.getTables(conf);
        if (infos == null) {
            conf.setOutputSchema(new Column[] { new Column("nil", OdpsType.STRING) }, TableInfo.DEFAULT_LABEL);
        } else {
            for (TableInfo info : infos) {
                if (info.getProjectName() == null) {
                    info.setProjectName(project);
                }
                Column[] schema = wareHouse.getTableMeta(info.getProjectName(), info.getTableName()).getCols();
                info.setCols(SchemaUtils.getColumnNames(schema));
                conf.setOutputSchema(schema, info.getLabel());
            }
            OutputUtils.setTables(infos, conf);
        }

    }

    private void moveOutputs() throws IOException {
        TableInfo[] output = OutputUtils.getTables(conf);
        if (output == null) {
            return;
        }
        for (TableInfo table : output) {
            String label = table.getLabel();

            String projName = table.getProjectName();
            if (projName == null) {
                projName = wareHouse.getOdps().getDefaultProject();
            }
            String tblName = table.getTableName();
            Map<String, String> partSpec = table.getPartSpec();

            File tempTblDir = jobDirecotry.getOutputDir(table.getLabel());
            File whOutputDir = wareHouse.createPartitionDir(projName, tblName, PartitionUtils.convert(partSpec));
            if (wareHouse.existsTable(projName, tblName)) {
                LOG.info("Reload warehouse table:" + tblName);
                LocalRunUtils.removeDataFiles(whOutputDir);
                wareHouse.copyDataFiles(tempTblDir, null, whOutputDir, wareHouse.getInputColumnSeperator());
            } else {
                LOG.info("Copy output to warehouse: label=" + label + " -> " + whOutputDir.getAbsolutePath());
                File whOutputTableDir = wareHouse.getTableDir(projName, tblName);
                // copy schema file
                FileUtils.copyDirectory(tempTblDir, whOutputTableDir, new FileFilter() {
                    @Override
                    public boolean accept(File pathname) {
                        String filename = pathname.getName();
                        if (filename.equals("__schema__")) {
                            return true;
                        }
                        return false;
                    }
                });
                // copy data files
                FileUtils.copyDirectory(tempTblDir, whOutputDir, new FileFilter() {
                    @Override
                    public boolean accept(File pathname) {
                        String filename = pathname.getName();
                        if (filename.equals("__schema__")) {
                            return false;
                        }
                        return true;
                    }
                });
            }
        }
    }

    private int computeReduceNum(int mapNum, TransformNode pipeNode) throws IOException {
        int reduceNum = 1;
        if (pipeNode != null) {
            if (pipeNode.getNextNode() != null) {
                reduceNum = pipeNode.getNextNode().getNumTasks();
            } else {
                reduceNum = pipeNode.getNumTasks();
            }
        } else if (wareHouse.caintainsKey("odps.stage.reducer.num")) {
            reduceNum = conf.getNumReduceTasks();
        } else {
            reduceNum = Math.max(1, mapNum / 4);
        }
        if (reduceNum < 0) {
            throw new IOException(MRExceptionCode.ODPS_0720251 + " - reduce num cann't be less than 0");
        } else if (reduceNum != conf.getNumReduceTasks()) {
            LOG.info("change reduce num from " + conf.getNumReduceTasks() + " to " + reduceNum);
        }
        conf.setNumReduceTasks(reduceNum);
        return reduceNum;
    }

    private void printInputOutput() {
        StringBuffer sb = new StringBuffer();

        System.err.println("Inputs:");
        TableInfo[] tableInfos = InputUtils.getTables(conf);
        if (tableInfos != null) {
            for (TableInfo tableInfo : tableInfos) {
                if (sb.length() > 0) {
                    sb.append(",");
                }
                if (tableInfo.getProjectName() != null) {
                    sb.append(tableInfo.getProjectName());
                } else {
                    sb.append(odps.getDefaultProject());
                }
                sb.append(".");
                sb.append(tableInfo.getTableName());
                String parts = tableInfo.getPartPath();
                if (parts != null && !parts.trim().isEmpty()) {
                    sb.append("/");
                    if (parts.endsWith("/")) {
                        parts = parts.substring(0, parts.length() - 1);
                    }
                    sb.append(parts);
                }

            }
        }

        if (sb.length() > 0) {
            System.err.println("\t" + sb.toString());
            sb.delete(0, sb.length());
        }

        System.err.println("Outputs:");
        tableInfos = OutputUtils.getTables(conf);
        if (tableInfos != null) {
            for (TableInfo tableInfo : tableInfos) {
                if (sb.length() > 0) {
                    sb.append(",");
                }
                if (tableInfo.getProjectName() != null) {
                    sb.append(tableInfo.getProjectName());
                } else {
                    sb.append(odps.getDefaultProject());
                }
                sb.append(".");
                sb.append(tableInfo.getTableName());
                String parts = tableInfo.getPartPath();
                if (parts != null && !parts.trim().isEmpty()) {
                    sb.append("/");
                    if (parts.endsWith("/")) {
                        parts = parts.substring(0, parts.length() - 1);
                    }
                    sb.append(parts);
                }

            }
        }
        if (sb.length() > 0) {
            System.err.println("\t" + sb.toString());
            sb.delete(0, sb.length());
        }

    }

    private void printStageStatic() {
        StringBuilder sb = new StringBuilder();
        for (StageStatic item : stageStaticList) {
            sb.append("\n" + item.getTaskId());
            sb.append("\n\tWorker Count: " + item.getWorkerCount());

            sb.append("\n\tInput Records: ");
            sb.append("\n\t\tinput: ");
            sb.append(item.getTotalInputRecords());
            sb.append(" (min: ");
            sb.append(item.getMinInputRecords());
            sb.append(", max: ");
            sb.append(item.getMaxInputRecords());
            sb.append(", avg: ");
            sb.append(item.getAvgInputRecords());
            sb.append(")");

            sb.append("\n\tOutput Records: ");
            sb.append("\n\t\t");
            sb.append(item.getNextTaskId());
            sb.append(": ");
            sb.append(item.getTotalOutputRecords());
            sb.append(" (min: ");
            sb.append(item.getMinOutputRecords());
            sb.append(", max: ");
            sb.append(item.getMaxOutputRecords());
            sb.append(", avg: ");
            sb.append(item.getAvgOutputRecords());
            sb.append(")");
        }

        System.err.println(sb.toString());
    }

    private void printCounters() {
        int totalCount = 0;
        int frameWorkCounterCount = 0;
        int jobCounterCount = 0;
        int userCounterCount = 0;
        for (CounterGroup group : counters) {
            for (Counter counter : group) {
                if (counter.getDisplayName().startsWith("__EMPTY_")) {
                    continue;
                } else if (group.getDisplayName().equals(JobCounter.class.getName())) {
                    ++frameWorkCounterCount;
                } else if (group.getDisplayName().equals("com.aliyun.odps.mapred.local.Counter.JobCounter")) {
                    ++jobCounterCount;
                } else {
                    ++userCounterCount;
                }

                ++totalCount;

            }
        }

        StringBuilder sb = new StringBuilder("Counters: " + totalCount);
        sb.append("\n\tMap-Reduce Framework: " + frameWorkCounterCount);
        for (CounterGroup group : counters) {
            if (!group.getDisplayName().equals(JobCounter.class.getName())) {
                continue;
            }
            for (Counter counter : group) {
                if (counter.getDisplayName().startsWith("__EMPTY_")) {
                    continue;
                }
                sb.append("\n\t\t" + counter.getDisplayName() + "=" + counter.getValue());
            }
        }

        // sb.append("\n\tJob Counters: " + jobCounterCount);
        // for (CounterGroup group : counters) {
        // if
        // (!group.getDisplayName().equals("com.aliyun.odps.mapred.local.Counter.JobCounter"))
        // {
        // continue;
        // }
        // for (Counter counter : group) {
        // if
        // (counter.getDisplayName().equals(JobCounter.__EMPTY_WILL_NOT_SHOW.toString()))
        // continue;
        // sb.append("\n\t\t" + counter.getDisplayName() + "=" +
        // counter.getValue());
        // }
        // }

        sb.append("\n\tUser Defined Counters: " + userCounterCount);
        for (CounterGroup group : counters) {
            if (group.getDisplayName().equals(JobCounter.class.getName())
                    || group.getDisplayName().equals("com.aliyun.odps.mapred.local.Counter.JobCounter")) {
                continue;
            }
            sb.append("\n\t\t" + group.getDisplayName());
            for (Counter counter : group) {
                if (counter.getDisplayName().equals(JobCounter.__EMPTY_WILL_NOT_SHOW.toString())) {
                    continue;
                }
                sb.append("\n\t\t\t" + counter.getDisplayName() + "=" + counter.getValue());
            }
        }
        System.err.println(sb.toString().toLowerCase());
    }

    @Override
    public Configuration getConf() {
        return conf;
    }

    @Override
    public void setConf(Configuration conf) {
        this.conf = new BridgeJobConf(conf);
    }

}