org.apache.tez.runtime.library.common.shuffle.impl.Shuffle.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.runtime.library.common.shuffle.impl.Shuffle.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tez.runtime.library.common.shuffle.impl;

import java.io.IOException;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;

import javax.crypto.SecretKey;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tez.common.TezJobConfig;
import org.apache.tez.common.counters.TaskCounter;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.runtime.api.Event;
import org.apache.tez.runtime.api.TezInputContext;
import org.apache.tez.runtime.library.common.ConfigUtils;
import org.apache.tez.runtime.library.common.TezRuntimeUtils;
import org.apache.tez.runtime.library.common.combine.Combiner;
import org.apache.tez.runtime.library.common.shuffle.server.ShuffleHandler;
import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator;
import org.apache.tez.runtime.library.shuffle.common.ShuffleUtils;

import com.google.common.base.Preconditions;

@InterfaceAudience.Private
@InterfaceStability.Unstable
public class Shuffle implements ExceptionReporter {

    private static final Log LOG = LogFactory.getLog(Shuffle.class);
    private static final int PROGRESS_FREQUENCY = 2000;

    private final Configuration conf;
    private final TezInputContext inputContext;
    private final ShuffleClientMetrics metrics;

    private final ShuffleInputEventHandler eventHandler;
    private final ShuffleScheduler scheduler;
    private final MergeManager merger;
    private Throwable throwable = null;
    private String throwingThreadName = null;
    private final int numInputs;
    private final SecretKey jobTokenSecret;
    private final CompressionCodec codec;
    private final boolean ifileReadAhead;
    private final int ifileReadAheadLength;

    private FutureTask<TezRawKeyValueIterator> runShuffleFuture;

    public Shuffle(TezInputContext inputContext, Configuration conf, int numInputs) throws IOException {
        this.inputContext = inputContext;
        this.conf = conf;
        this.metrics = new ShuffleClientMetrics(inputContext.getDAGName(), inputContext.getTaskVertexName(),
                inputContext.getTaskIndex(), this.conf, UserGroupInformation.getCurrentUser().getShortUserName());

        this.numInputs = numInputs;

        this.jobTokenSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes(
                inputContext.getServiceConsumerMetaData(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID));

        if (ConfigUtils.isIntermediateInputCompressed(conf)) {
            Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf,
                    DefaultCodec.class);
            codec = ReflectionUtils.newInstance(codecClass, conf);
        } else {
            codec = null;
        }
        this.ifileReadAhead = conf.getBoolean(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
                TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
        if (this.ifileReadAhead) {
            this.ifileReadAheadLength = conf.getInt(TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
                    TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
        } else {
            this.ifileReadAheadLength = 0;
        }

        Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext);

        FileSystem localFS = FileSystem.getLocal(this.conf);
        LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS);

        // TODO TEZ Get rid of Map / Reduce references.
        TezCounter shuffledMapsCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS);
        TezCounter reduceShuffleBytes = inputContext.getCounters().findCounter(TaskCounter.REDUCE_SHUFFLE_BYTES);
        TezCounter failedShuffleCounter = inputContext.getCounters().findCounter(TaskCounter.FAILED_SHUFFLE);
        TezCounter spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
        TezCounter reduceCombineInputCounter = inputContext.getCounters()
                .findCounter(TaskCounter.COMBINE_INPUT_RECORDS);
        TezCounter mergedMapOutputsCounter = inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS);

        LOG.info("Shuffle assigned with " + numInputs + " inputs" + ", codec: "
                + (codec == null ? "None" : codec.getClass().getName()) + "ifileReadAhead: " + ifileReadAhead);

        scheduler = new ShuffleScheduler(this.inputContext, this.conf, this.numInputs, this, shuffledMapsCounter,
                reduceShuffleBytes, failedShuffleCounter);
        eventHandler = new ShuffleInputEventHandler(inputContext, scheduler);
        merger = new MergeManager(this.conf, localFS, localDirAllocator, inputContext, combiner,
                spilledRecordsCounter, reduceCombineInputCounter, mergedMapOutputsCounter, this);
    }

    public void handleEvents(List<Event> events) {
        eventHandler.handleEvents(events);
    }

    /**
     * Indicates whether the Shuffle and Merge processing is complete.
     * @return false if not complete, true if complete or if an error occurred.
     */
    public boolean isInputReady() {
        if (runShuffleFuture == null) {
            return false;
        }
        return runShuffleFuture.isDone();
        //return scheduler.isDone() && merger.isMergeComplete();
    }

    /**
     * Waits for the Shuffle and Merge to complete, and returns an iterator over the input.
     * @return an iterator over the fetched input.
     * @throws IOException
     * @throws InterruptedException
     */
    public TezRawKeyValueIterator waitForInput() throws IOException, InterruptedException {
        Preconditions.checkState(runShuffleFuture != null, "waitForInput can only be called after run");
        TezRawKeyValueIterator kvIter;
        try {
            kvIter = runShuffleFuture.get();
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof IOException) {
                throw (IOException) cause;
            } else if (cause instanceof InterruptedException) {
                throw (InterruptedException) cause;
            } else {
                throw new TezUncheckedException("Unexpected exception type while running Shuffle and Merge", cause);
            }
        }
        return kvIter;
    }

    public void run() {
        RunShuffleCallable runShuffle = new RunShuffleCallable();
        runShuffleFuture = new FutureTask<TezRawKeyValueIterator>(runShuffle);
        new Thread(runShuffleFuture, "ShuffleMergeRunner").start();
    }

    private class RunShuffleCallable implements Callable<TezRawKeyValueIterator> {
        @Override
        public TezRawKeyValueIterator call() throws IOException, InterruptedException {
            // TODO NEWTEZ Limit # fetchers to number of inputs
            final int numFetchers = conf.getInt(TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
                    TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES);
            Fetcher[] fetchers = new Fetcher[numFetchers];
            for (int i = 0; i < numFetchers; ++i) {
                fetchers[i] = new Fetcher(conf, scheduler, merger, metrics, Shuffle.this, jobTokenSecret,
                        ifileReadAhead, ifileReadAheadLength, codec, inputContext);

                fetchers[i].start();
            }

            while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) {
                synchronized (this) {
                    if (throwable != null) {
                        throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable);
                    }
                }
            }

            // Stop the map-output fetcher threads
            for (Fetcher fetcher : fetchers) {
                fetcher.shutDown();
            }
            fetchers = null;

            // stop the scheduler
            scheduler.close();

            // Finish the on-going merges...
            TezRawKeyValueIterator kvIter = null;
            try {
                kvIter = merger.close();
            } catch (Throwable e) {
                throw new ShuffleError("Error while doing final merge ", e);
            }

            // Sanity check
            synchronized (Shuffle.this) {
                if (throwable != null) {
                    throw new ShuffleError("error in shuffle in " + throwingThreadName, throwable);
                }
            }
            return kvIter;
        }
    }

    public synchronized void reportException(Throwable t) {
        if (throwable == null) {
            throwable = t;
            throwingThreadName = Thread.currentThread().getName();
            // Notify the scheduler so that the reporting thread finds the 
            // exception immediately.
            synchronized (scheduler) {
                scheduler.notifyAll();
            }
        }
    }

    public static class ShuffleError extends IOException {
        private static final long serialVersionUID = 5753909320586607881L;

        ShuffleError(String msg, Throwable t) {
            super(msg, t);
        }
    }

}