org.apache.tajo.storage.HashShuffleAppenderManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tajo.storage.HashShuffleAppenderManager.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tajo.storage;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
import org.apache.tajo.ExecutionBlockId;
import org.apache.tajo.TaskAttemptId;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.conf.TajoConf.ConfVars;
import org.apache.tajo.storage.rawfile.DirectRawFileWriter;
import org.apache.tajo.tuple.memory.MemoryRowBlock;
import org.apache.tajo.unit.StorageUnit;
import org.apache.tajo.util.Pair;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;

public class HashShuffleAppenderManager {
    private static final Log LOG = LogFactory.getLog(HashShuffleAppenderManager.class);

    private ConcurrentMap<ExecutionBlockId, Map<Integer, PartitionAppenderMeta>> appenderMap = Maps
            .newConcurrentMap();
    private ConcurrentMap<Integer, ExecutorService> executors = Maps.newConcurrentMap(); // for parallel writing
    private List<String> temporalPaths = Lists.newArrayList();

    private TajoConf systemConf;
    private FileSystem defaultFS;
    private FileSystem localFS;
    private LocalDirAllocator lDirAllocator;
    private int pageSize;

    public HashShuffleAppenderManager(TajoConf systemConf) throws IOException {
        this.systemConf = systemConf;

        // initialize LocalDirAllocator
        lDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname);

        // initialize DFS and LocalFileSystems
        defaultFS = TajoConf.getTajoRootDir(systemConf).getFileSystem(systemConf);
        localFS = FileSystem.getLocal(systemConf);
        pageSize = systemConf.getIntVar(ConfVars.SHUFFLE_HASH_APPENDER_PAGE_VOLUME) * StorageUnit.MB;

        Iterable<Path> allLocalPath = lDirAllocator.getAllLocalPathsToRead(".", systemConf);

        //add async hash shuffle writer
        for (Path path : allLocalPath) {
            temporalPaths.add(localFS.makeQualified(path).toString());
            executors.put(temporalPaths.size() - 1, Executors.newSingleThreadExecutor());
        }
    }

    protected int getVolumeId(Path path) {
        int i = 0;
        for (String rootPath : temporalPaths) {
            if (path.toString().startsWith(rootPath)) {
                break;
            }
            i++;
        }
        Preconditions.checkPositionIndex(i, temporalPaths.size() - 1);
        return i;
    }

    public synchronized HashShuffleAppenderWrapper getAppender(MemoryRowBlock memoryRowBlock, ExecutionBlockId ebId,
            int partId, TableMeta meta, Schema outSchema) throws IOException {

        Map<Integer, PartitionAppenderMeta> partitionAppenderMap = appenderMap.get(ebId);

        if (partitionAppenderMap == null) {
            partitionAppenderMap = new ConcurrentHashMap<>();
            appenderMap.put(ebId, partitionAppenderMap);
        }

        PartitionAppenderMeta partitionAppenderMeta = partitionAppenderMap.get(partId);
        if (partitionAppenderMeta == null) {
            Path dataFile = getDataFile(ebId, partId);
            FileSystem fs = dataFile.getFileSystem(systemConf);
            if (fs.exists(dataFile)) {
                FileStatus status = fs.getFileStatus(dataFile);
                LOG.info("File " + dataFile + " already exists, size=" + status.getLen());
            }

            if (!fs.exists(dataFile.getParent())) {
                fs.mkdirs(dataFile.getParent());
            }

            DirectRawFileWriter appender = new DirectRawFileWriter(systemConf, null, outSchema, meta, dataFile,
                    memoryRowBlock);
            appender.enableStats();
            appender.init();

            partitionAppenderMeta = new PartitionAppenderMeta();
            partitionAppenderMeta.partId = partId;
            partitionAppenderMeta.dataFile = dataFile;
            partitionAppenderMeta.appender = new HashShuffleAppenderWrapper(ebId, partId, pageSize, appender,
                    getVolumeId(dataFile));
            partitionAppenderMeta.appender.init();
            partitionAppenderMap.put(partId, partitionAppenderMeta);

            if (LOG.isDebugEnabled()) {
                LOG.debug("Create Hash shuffle file(partId=" + partId + "): " + dataFile);
            }
        }

        return partitionAppenderMeta.appender;
    }

    public static int getPartParentId(int partId, TajoConf tajoConf) {
        return partId % tajoConf.getIntVar(ConfVars.SHUFFLE_HASH_PARENT_DIRS);
    }

    private Path getDataFile(ExecutionBlockId ebId, int partId) throws IOException {
        try {
            // the base dir for an output dir
            String executionBlockBaseDir = ebId.getQueryId().toString() + "/output" + "/" + ebId.getId()
                    + "/hash-shuffle";
            Path baseDirPath = lDirAllocator.getLocalPathForWrite(executionBlockBaseDir, systemConf);
            //LOG.info(ebId + "'s basedir is created (" + baseDirPath + ")");

            // If EB has many partition, too many shuffle file are in single directory.
            return localFS.makeQualified(
                    StorageUtil.concatPath(baseDirPath, "" + getPartParentId(partId, systemConf), "" + partId));
        } catch (Exception e) {
            LOG.error(e.getMessage(), e);
            throw new IOException(e);
        }
    }

    public List<HashShuffleIntermediate> close(ExecutionBlockId ebId) throws IOException {
        Map<Integer, PartitionAppenderMeta> partitionAppenderMap = appenderMap.remove(ebId);

        if (partitionAppenderMap == null) {
            LOG.info("Close HashShuffleAppenderWrapper:" + ebId + ", not a hash shuffle");
            return null;
        }

        // Send Intermediate data to QueryMaster.
        List<HashShuffleIntermediate> intermediateEntries = new ArrayList<>();
        for (PartitionAppenderMeta eachMeta : partitionAppenderMap.values()) {
            try {
                eachMeta.appender.close();
                HashShuffleIntermediate intermediate = new HashShuffleIntermediate(eachMeta.partId,
                        eachMeta.appender.getOffset(), eachMeta.appender.getPages(),
                        eachMeta.appender.getMergedTupleIndexes());
                intermediateEntries.add(intermediate);
            } catch (IOException e) {
                LOG.error(e.getMessage(), e);
                throw e;
            }
        }

        LOG.info("Close HashShuffleAppenderWrapper:" + ebId + ", intermediates=" + intermediateEntries.size());

        return intermediateEntries;
    }

    public void finalizeTask(TaskAttemptId taskId) {
        Map<Integer, PartitionAppenderMeta> partitionAppenderMap = appenderMap
                .get(taskId.getTaskId().getExecutionBlockId());
        if (partitionAppenderMap == null) {
            return;
        }

        for (PartitionAppenderMeta eachAppender : partitionAppenderMap.values()) {
            eachAppender.appender.taskFinished(taskId);
        }
    }

    /**
     * Asynchronously write partitions.
     */
    public Future<MemoryRowBlock> writePartitions(TableMeta meta, Schema schema, final TaskAttemptId taskId,
            int partId, final MemoryRowBlock rowBlock, final boolean release) throws IOException {

        HashShuffleAppenderWrapper appender = getAppender(rowBlock, taskId.getTaskId().getExecutionBlockId(),
                partId, meta, schema);
        ExecutorService executor = executors.get(appender.getVolumeId());
        return executor.submit(new Callable<MemoryRowBlock>() {
            @Override
            public MemoryRowBlock call() throws Exception {
                appender.writeRowBlock(taskId, rowBlock);

                if (release)
                    rowBlock.release();
                else
                    rowBlock.clear();

                return rowBlock;
            }
        });
    }

    public void shutdown() {
        for (ExecutorService service : executors.values()) {
            service.shutdownNow();
        }
    }

    public static class HashShuffleIntermediate {
        private int partId;

        private long volume;

        //[<page start offset,<task start, task end>>]
        private Collection<Pair<Long, Pair<Integer, Integer>>> failureTskTupleIndexes;

        //[<page start offset, length>]
        private List<Pair<Long, Integer>> pages = new ArrayList<>();

        public HashShuffleIntermediate(int partId, long volume, List<Pair<Long, Integer>> pages,
                Collection<Pair<Long, Pair<Integer, Integer>>> failureTskTupleIndexes) {
            this.partId = partId;
            this.volume = volume;
            this.failureTskTupleIndexes = failureTskTupleIndexes;
            this.pages = pages;
        }

        public int getPartId() {
            return partId;
        }

        public long getVolume() {
            return volume;
        }

        public Collection<Pair<Long, Pair<Integer, Integer>>> getFailureTskTupleIndexes() {
            return failureTskTupleIndexes;
        }

        public List<Pair<Long, Integer>> getPages() {
            return pages;
        }
    }

    static class PartitionAppenderMeta {
        int partId;
        HashShuffleAppenderWrapper appender;
        Path dataFile;

        public int getPartId() {
            return partId;
        }

        public HashShuffleAppenderWrapper getAppender() {
            return appender;
        }

        public Path getDataFile() {
            return dataFile;
        }
    }
}