com.aliyun.odps.mapred.local.LocalTaskContext.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.mapred.local.LocalTaskContext.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred.local;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.aliyun.odps.Column;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.counter.Counter;
import com.aliyun.odps.counter.Counters;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.RecordReader;
import com.aliyun.odps.data.RecordWriter;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.local.common.FileSplit;
import com.aliyun.odps.local.common.JobDirecotry;
import com.aliyun.odps.local.common.TableMeta;
import com.aliyun.odps.local.common.WareHouse;
import com.aliyun.odps.local.common.utils.LocalRunUtils;
import com.aliyun.odps.local.common.utils.SchemaUtils;
import com.aliyun.odps.local.common.utils.ArchiveUtils;
import com.aliyun.odps.mapred.LocalJobRunner;
import com.aliyun.odps.mapred.Mapper;
import com.aliyun.odps.mapred.Reducer;
import com.aliyun.odps.mapred.TaskContext;
import com.aliyun.odps.mapred.TaskId;
import com.aliyun.odps.mapred.bridge.WritableRecord;
import com.aliyun.odps.mapred.conf.BridgeJobConf;
import com.aliyun.odps.mapred.conf.JobConf;
import com.aliyun.odps.mapred.conf.SessionState;
import com.aliyun.odps.mapred.utils.OutputUtils;
import com.aliyun.odps.pipeline.Pipeline;
import com.aliyun.odps.pipeline.Pipeline.TransformNode;
import com.aliyun.odps.utils.ReflectionUtils;

public abstract class LocalTaskContext implements TaskContext {

    public static final Log LOG = LogFactory.getLog(LocalTaskContext.class);

    protected BridgeJobConf conf;
    private TaskId taskId;

    private final Counters counters;

    protected Map<String, RecordWriter> recordWriters;

    // pipeline mode 
    protected Pipeline pipeline;
    protected boolean pipeMode; // use pipeline mapreduce
    protected int pipeIndex = -1;
    protected TransformNode pipeNode;
    private JobDirecotry jobDirecotry;

    protected int reducerNum = 0;

    public LocalTaskContext(BridgeJobConf conf, TaskId taskid, Counters counters) throws IOException {
        this.conf = conf;
        this.taskId = taskid;
        this.jobDirecotry = new JobDirecotry();

        this.pipeline = Pipeline.fromJobConf(conf);
        if (this.pipeline != null) {
            this.pipeMode = true;
            String taskId = getTaskID().toString();
            System.err.println("Task ID: " + taskId);
            this.pipeIndex = Integer.parseInt(taskId.split("_")[0].substring(1)) - 1;
            this.pipeNode = pipeline.getNode(pipeIndex);
        }

        if (pipeMode && pipeNode != null) {
            if (pipeNode.getNextNode() != null) {
                reducerNum = pipeNode.getNextNode().getNumTasks();
            } else if (pipeIndex > 0) {
                // the last but not the first node of pipeline, must be reduce node
                reducerNum = pipeNode.getNumTasks();
            } else {
                reducerNum = 0;
            }
        } else {
            reducerNum = conf.getNumReduceTasks();
        }

        this.recordWriters = new HashMap<String, RecordWriter>();
        TableInfo[] output = OutputUtils.getTables(conf);
        if (output != null) {
            for (TableInfo info : output) {
                Counter recordCounter;
                Counter byteCounter;
                int reduceNum = conf.getNumReduceTasks();
                if (taskid.isMap() && reduceNum > 0) {
                    // since the map output is a buffer,will not write to file
                    // this counter will not change,but can't be null
                    Counter emptyCounter = counters.findCounter(JobCounter.__EMPTY_WILL_NOT_SHOW);
                    recordCounter = emptyCounter;
                    byteCounter = emptyCounter;
                } else {
                    recordCounter = counters.findCounter(JobCounter.class.getName(),
                            String.format("%s_OUTPUT_[%s]_RECORDS", taskId.isMap() ? "MAP" : "REDUCE", info));
                    byteCounter = counters.findCounter(JobCounter.class.getName(),
                            String.format("%s_OUTPUT_[%s]_BYTES", taskId.isMap() ? "MAP" : "REDUCE", info));
                }
                RecordWriter writer = new CSVRecordWriter(
                        new File(jobDirecotry.getOutputDir(info.getLabel()), taskId.toString()), recordCounter,
                        byteCounter, WareHouse.getInstance().getInputColumnSeperator());
                recordWriters.put(info.getLabel(), writer);
            }
        }

        this.counters = counters;

    }

    public Mapper createMapper() {
        return ReflectionUtils.newInstance(getMapperClass(), conf);
    }

    public Reducer createReducer() {
        return ReflectionUtils.newInstance(getReducerClass(), conf);
    }

    public Reducer createCombiner() {
        if (getCombinerClass() != null) {
            return ReflectionUtils.newInstance(getCombinerClass(), conf);
        }
        return null;
    }

    public void closeWriters() throws IOException {
        for (RecordWriter writer : recordWriters.values()) {
            writer.close();
        }
    }

    @Override
    public String[] getGroupingColumns() {
        if (pipeMode && pipeNode != null) {
            return pipeNode.getOutputGroupingColumns();
        } else {
            return conf.getOutputGroupingColumns();
        }
    }

    @Override
    public Column[] getMapOutputKeySchema() {
        return conf.getMapOutputKeySchema();
    }

    @Override
    public Column[] getMapOutputValueSchema() {
        return conf.getMapOutputValueSchema();
    }

    @Override
    public Class<? extends Mapper> getMapperClass() {
        return conf.getMapperClass();
    }

    @Override
    public int getNumReduceTasks() {
        return reducerNum;
    }

    @Override
    public Class<? extends Reducer> getReducerClass() {
        return conf.getReducerClass();
    }

    @Override
    public Class<? extends Reducer> getCombinerClass() {
        return conf.getCombinerClass();
    }

    @Override
    public Record createOutputRecord() throws IOException {
        return createOutputRecord(TableInfo.DEFAULT_LABEL);
    }

    @Override
    public Record createOutputRecord(String label) throws IOException {
        return new WritableRecord(conf.getOutputSchema(label));
    }

    @Override
    public Counter getCounter(Enum<?> key) {
        return counters.findCounter(key);
    }

    @Override
    public Counter getCounter(String groupName, String counterName) {
        return counters.findCounter(groupName, counterName);
    }

    @Override
    public TaskId getTaskID() {
        return taskId;
    }

    @Override
    public void progress() {
        // do nothing
    }

    @Override
    public BufferedInputStream readResourceFileAsStream(String name) throws IOException {
        if (StringUtils.isEmpty(name)) {
            throw new IOException("Resouce name is empty or null");
        }

        if (!jobDirecotry.hasResource(name)) {
            String project = SessionState.get().getOdps().getDefaultProject();
            try {
                WareHouse.getInstance().copyResource(project, name, jobDirecotry.getResourceDir(),
                        WareHouse.getInstance().getLimitDownloadRecordCount(),
                        WareHouse.getInstance().getInputColumnSeperator());
            } catch (OdpsException e) {
            }
        }
        File file = new File(jobDirecotry.getResourceDir(), name);
        return new BufferedInputStream(new FileInputStream(file));
    }

    private static class InputStreamIterator implements Iterator<BufferedInputStream> {

        private Iterator<File> files;

        public InputStreamIterator(Iterator<File> files) {
            super();
            this.files = files;
        }

        @Override
        public boolean hasNext() {
            return files.hasNext();
        }

        @Override
        public BufferedInputStream next() {
            File file = files.next();
            try {
                return new BufferedInputStream(new FileInputStream(file));
            } catch (IOException ex) {
                throw new RuntimeException(ex);
            }
        }

        @Override
        public void remove() {
            throw new RuntimeException("remove Unsupported");
        }

    }

    @Override
    public Iterable<BufferedInputStream> readResourceArchiveAsStream(String resourceName) throws IOException {
        return readResourceArchiveAsStream(resourceName, "");
    }

    @Override
    public Iterable<BufferedInputStream> readResourceArchiveAsStream(String resourceName, String relativePath)
            throws IOException {
        if (StringUtils.isEmpty(resourceName)) {
            throw new IOException("Resouce name is empty or null");
        }

        File resFile = new File(jobDirecotry.getResourceDir(), resourceName);
        ;
        if (!jobDirecotry.hasResource(resourceName)) {
            String project = SessionState.get().getOdps().getDefaultProject();
            try {
                WareHouse.getInstance().copyResource(project, resourceName, jobDirecotry.getResourceDir(),
                        WareHouse.getInstance().getLimitDownloadRecordCount(),
                        WareHouse.getInstance().getInputColumnSeperator());
            } catch (OdpsException e) {
            }
        }

        File resDir = new File(jobDirecotry.getResourceDir(), resourceName + "_decompressed");
        ;
        if (!resDir.exists()) {
            ArchiveUtils.unArchive(resFile, resDir);
        }

        final Collection<File> files = LocalRunUtils.listFiles(resDir, relativePath.trim());
        return new Iterable<BufferedInputStream>() {
            @Override
            public Iterator<BufferedInputStream> iterator() {
                return new InputStreamIterator(files.iterator());
            }
        };
    }

    @Override
    public Iterator<Record> readResourceTable(String tbl) throws IOException {
        if (StringUtils.isEmpty(tbl)) {
            throw new IOException("Table resouce name is empty or null");
        }

        if (!jobDirecotry.hasResource(tbl)) {
            String project = SessionState.get().getOdps().getDefaultProject();
            try {
                WareHouse.getInstance().copyResource(project, tbl, jobDirecotry.getResourceDir(),
                        WareHouse.getInstance().getLimitDownloadRecordCount(),
                        WareHouse.getInstance().getInputColumnSeperator());
            } catch (OdpsException e) {
            }
        }

        File dir = new File(jobDirecotry.getResourceDir(), tbl);
        LOG.info("Reading resource table from " + dir);
        final List<File> datafiles = new ArrayList<File>();

        LocalRunUtils.listAllDataFiles(dir, datafiles);

        final TableMeta tableMeta = SchemaUtils.readSchema(dir);

        return new Iterator<Record>() {
            RecordReader reader;
            Record current;
            boolean fetched;

            @Override
            public boolean hasNext() {
                if (fetched) {
                    return current != null;
                }
                // Fetch new one
                try {
                    fetch();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
                return current != null;

            }

            private void fetch() throws IOException {

                // first time
                if (reader == null) {
                    if (datafiles.isEmpty()) {
                        current = null;
                        fetched = true;
                        return;
                    }

                    File f = datafiles.remove(0);
                    reader = new CSVRecordReader(new FileSplit(f, tableMeta.getCols(), 0, f.getTotalSpace()),
                            tableMeta, LocalJobRunner.EMPTY_COUNTER, LocalJobRunner.EMPTY_COUNTER, counters,
                            WareHouse.getInstance().getInputColumnSeperator());
                    current = reader.read();
                    fetched = true;
                    return;
                }

                current = reader.read();
                if (current == null && !datafiles.isEmpty()) {
                    File f = datafiles.remove(0);
                    reader = new CSVRecordReader(new FileSplit(f, tableMeta.getCols(), 0, f.getTotalSpace()),
                            tableMeta, LocalJobRunner.EMPTY_COUNTER, LocalJobRunner.EMPTY_COUNTER, counters,
                            WareHouse.getInstance().getInputColumnSeperator());
                    current = reader.read();
                    fetched = true;
                    return;
                }

                fetched = true;
            }

            @Override
            public Record next() {
                if (!hasNext()) {
                    throw new NoSuchElementException();
                }
                fetched = false;
                return current;
            }

            @Override
            public void remove() {
                throw new UnsupportedOperationException();
            }

        };
    }

    @Override
    public JobConf getJobConf() {
        return (JobConf) conf;
    }

    @Override
    public Record createMapOutputKeyRecord() {
        if (pipeMode && pipeNode != null && pipeNode.getType().equals("map")) {
            return new WritableRecord(pipeNode.getOutputKeySchema());
        } else {
            return new WritableRecord(conf.getMapOutputKeySchema());
        }
    }

    @Override
    public Record createMapOutputValueRecord() {
        if (pipeMode && pipeNode != null && pipeNode.getType().equals("map")) {
            return new WritableRecord(pipeNode.getOutputValueSchema());
        } else {
            return new WritableRecord(conf.getMapOutputValueSchema());
        }
    }

    @Override
    public TableInfo[] getOutputTableInfo() {
        return OutputUtils.getTables(conf);
    }

    @Override
    public Record createOutputKeyRecord() throws IOException {
        if (pipeMode && pipeNode != null) {
            return new WritableRecord(pipeNode.getOutputKeySchema());
        } else {
            return null;
        }
    }

    @Override
    public Record createOutputValueRecord() throws IOException {
        if (pipeMode && pipeNode != null) {
            return new WritableRecord(pipeNode.getOutputValueSchema());
        } else {
            return null;
        }
    }

    public boolean isPipelineMode() {
        return this.pipeMode;
    }

    public Pipeline getPipeline() {
        return this.pipeline;
    }

    public TransformNode getCurrentNode() {
        return this.pipeNode;
    }
}