alluxio.job.util.JobUtils.java Source code

Java tutorial

Introduction

Here is the source code for alluxio.job.util.JobUtils.java

Source

/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.job.util;

import alluxio.AlluxioURI;
import alluxio.client.Cancelable;
import alluxio.client.ReadType;
import alluxio.client.block.AlluxioBlockStore;
import alluxio.client.block.BlockWorkerInfo;
import alluxio.client.file.FileSystem;
import alluxio.client.file.FileSystemContext;
import alluxio.client.file.URIStatus;
import alluxio.client.file.options.InStreamOptions;
import alluxio.client.file.options.OutStreamOptions;
import alluxio.client.file.policy.LocalFirstPolicy;
import alluxio.collections.IndexDefinition;
import alluxio.collections.IndexedSet;
import alluxio.exception.AlluxioException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.status.NotFoundException;
import alluxio.util.network.NetworkAddressUtils;
import alluxio.util.network.NetworkAddressUtils.ServiceType;
import alluxio.wire.BlockLocation;
import alluxio.wire.FileBlockInfo;
import alluxio.wire.WorkerNetAddress;

import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.concurrent.ConcurrentMap;

/**
 * Utility class to make it easier to write jobs.
 */
public final class JobUtils {
    private static final IndexDefinition<BlockWorkerInfo, WorkerNetAddress> WORKER_ADDRESS_INDEX = new IndexDefinition<BlockWorkerInfo, WorkerNetAddress>(
            true) {
        @Override
        public WorkerNetAddress getFieldValue(BlockWorkerInfo o) {
            return o.getNetAddress();
        }
    };

    /**
     * Returns whichever specified worker stores the most blocks from the block info list.
     *
     * @param workers a list of workers to consider
     * @param fileBlockInfos a list of file block information
     * @return a worker address storing the most blocks from the list
     */
    public static BlockWorkerInfo getWorkerWithMostBlocks(List<BlockWorkerInfo> workers,
            List<FileBlockInfo> fileBlockInfos) {
        // Index workers by their addresses.
        IndexedSet<BlockWorkerInfo> addressIndexedWorkers = new IndexedSet<>(WORKER_ADDRESS_INDEX);
        addressIndexedWorkers.addAll(workers);

        // Use ConcurrentMap for putIfAbsent. A regular Map works in Java 8.
        ConcurrentMap<BlockWorkerInfo, Integer> blocksPerWorker = Maps.newConcurrentMap();
        int maxBlocks = 0;
        BlockWorkerInfo mostBlocksWorker = null;
        for (FileBlockInfo fileBlockInfo : fileBlockInfos) {
            for (BlockLocation location : fileBlockInfo.getBlockInfo().getLocations()) {
                BlockWorkerInfo worker = addressIndexedWorkers.getFirstByField(WORKER_ADDRESS_INDEX,
                        location.getWorkerAddress());
                if (worker == null) {
                    // We can only choose workers in the workers list.
                    continue;
                }
                blocksPerWorker.putIfAbsent(worker, 0);
                int newBlockCount = blocksPerWorker.get(worker) + 1;
                blocksPerWorker.put(worker, newBlockCount);
                if (newBlockCount > maxBlocks) {
                    maxBlocks = newBlockCount;
                    mostBlocksWorker = worker;
                }
            }
        }
        return mostBlocksWorker;
    }

    /**
     * Loads a block into the local worker. If the block doesn't exist in Alluxio, it will be read
     * from the UFS.
     *
     * @param fs the filesystem
     * @param context filesystem context
     * @param path the file path of the block to load
     * @param blockId the id of the block to load
     */
    public static void loadBlock(FileSystem fs, FileSystemContext context, String path, long blockId)
            throws AlluxioException, IOException {
        AlluxioBlockStore blockStore = AlluxioBlockStore.create(context);

        String localHostName = NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC);
        List<BlockWorkerInfo> workerInfoList = blockStore.getAllWorkers();
        WorkerNetAddress localNetAddress = null;

        for (BlockWorkerInfo workerInfo : workerInfoList) {
            if (workerInfo.getNetAddress().getHost().equals(localHostName)) {
                localNetAddress = workerInfo.getNetAddress();
                break;
            }
        }
        if (localNetAddress == null) {
            throw new NotFoundException(ExceptionMessage.NO_LOCAL_BLOCK_WORKER_REPLICATE_TASK.getMessage(blockId));
        }

        // TODO(jiri): Replace with internal client that uses file ID once the internal client is
        // factored out of the core server module. The reason to prefer using file ID for this job is
        // to avoid the the race between "replicate" and "rename", so that even a file to replicate is
        // renamed, the job is still working on the correct file.
        URIStatus status = fs.getStatus(new AlluxioURI(path));

        InStreamOptions inOptions = new InStreamOptions(status);
        inOptions.getOptions().setReadType(ReadType.NO_CACHE).setUfsReadLocationPolicy(new LocalFirstPolicy());
        OutStreamOptions outOptions = OutStreamOptions.defaults();

        // use -1 to reuse the existing block size for this block
        try (OutputStream outputStream = blockStore.getOutStream(blockId, -1, localNetAddress, outOptions)) {
            try (InputStream inputStream = blockStore.getInStream(blockId, inOptions)) {
                ByteStreams.copy(inputStream, outputStream);
            } catch (Throwable t) {
                try {
                    ((Cancelable) outputStream).cancel();
                } catch (Throwable t2) {
                    t.addSuppressed(t2);
                }
                throw t;
            }
        }
    }

    private JobUtils() {
    } // Utils class not intended for instantiation.
}