Java tutorial
/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.job.util; import alluxio.AlluxioURI; import alluxio.client.Cancelable; import alluxio.client.ReadType; import alluxio.client.block.AlluxioBlockStore; import alluxio.client.block.BlockWorkerInfo; import alluxio.client.file.FileSystem; import alluxio.client.file.FileSystemContext; import alluxio.client.file.URIStatus; import alluxio.client.file.options.InStreamOptions; import alluxio.client.file.options.OutStreamOptions; import alluxio.client.file.policy.LocalFirstPolicy; import alluxio.collections.IndexDefinition; import alluxio.collections.IndexedSet; import alluxio.exception.AlluxioException; import alluxio.exception.ExceptionMessage; import alluxio.exception.status.NotFoundException; import alluxio.util.network.NetworkAddressUtils; import alluxio.util.network.NetworkAddressUtils.ServiceType; import alluxio.wire.BlockLocation; import alluxio.wire.FileBlockInfo; import alluxio.wire.WorkerNetAddress; import com.google.common.collect.Maps; import com.google.common.io.ByteStreams; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; import java.util.concurrent.ConcurrentMap; /** * Utility class to make it easier to write jobs. */ public final class JobUtils { private static final IndexDefinition<BlockWorkerInfo, WorkerNetAddress> WORKER_ADDRESS_INDEX = new IndexDefinition<BlockWorkerInfo, WorkerNetAddress>( true) { @Override public WorkerNetAddress getFieldValue(BlockWorkerInfo o) { return o.getNetAddress(); } }; /** * Returns whichever specified worker stores the most blocks from the block info list. * * @param workers a list of workers to consider * @param fileBlockInfos a list of file block information * @return a worker address storing the most blocks from the list */ public static BlockWorkerInfo getWorkerWithMostBlocks(List<BlockWorkerInfo> workers, List<FileBlockInfo> fileBlockInfos) { // Index workers by their addresses. IndexedSet<BlockWorkerInfo> addressIndexedWorkers = new IndexedSet<>(WORKER_ADDRESS_INDEX); addressIndexedWorkers.addAll(workers); // Use ConcurrentMap for putIfAbsent. A regular Map works in Java 8. ConcurrentMap<BlockWorkerInfo, Integer> blocksPerWorker = Maps.newConcurrentMap(); int maxBlocks = 0; BlockWorkerInfo mostBlocksWorker = null; for (FileBlockInfo fileBlockInfo : fileBlockInfos) { for (BlockLocation location : fileBlockInfo.getBlockInfo().getLocations()) { BlockWorkerInfo worker = addressIndexedWorkers.getFirstByField(WORKER_ADDRESS_INDEX, location.getWorkerAddress()); if (worker == null) { // We can only choose workers in the workers list. continue; } blocksPerWorker.putIfAbsent(worker, 0); int newBlockCount = blocksPerWorker.get(worker) + 1; blocksPerWorker.put(worker, newBlockCount); if (newBlockCount > maxBlocks) { maxBlocks = newBlockCount; mostBlocksWorker = worker; } } } return mostBlocksWorker; } /** * Loads a block into the local worker. If the block doesn't exist in Alluxio, it will be read * from the UFS. * * @param fs the filesystem * @param context filesystem context * @param path the file path of the block to load * @param blockId the id of the block to load */ public static void loadBlock(FileSystem fs, FileSystemContext context, String path, long blockId) throws AlluxioException, IOException { AlluxioBlockStore blockStore = AlluxioBlockStore.create(context); String localHostName = NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC); List<BlockWorkerInfo> workerInfoList = blockStore.getAllWorkers(); WorkerNetAddress localNetAddress = null; for (BlockWorkerInfo workerInfo : workerInfoList) { if (workerInfo.getNetAddress().getHost().equals(localHostName)) { localNetAddress = workerInfo.getNetAddress(); break; } } if (localNetAddress == null) { throw new NotFoundException(ExceptionMessage.NO_LOCAL_BLOCK_WORKER_REPLICATE_TASK.getMessage(blockId)); } // TODO(jiri): Replace with internal client that uses file ID once the internal client is // factored out of the core server module. The reason to prefer using file ID for this job is // to avoid the the race between "replicate" and "rename", so that even a file to replicate is // renamed, the job is still working on the correct file. URIStatus status = fs.getStatus(new AlluxioURI(path)); InStreamOptions inOptions = new InStreamOptions(status); inOptions.getOptions().setReadType(ReadType.NO_CACHE).setUfsReadLocationPolicy(new LocalFirstPolicy()); OutStreamOptions outOptions = OutStreamOptions.defaults(); // use -1 to reuse the existing block size for this block try (OutputStream outputStream = blockStore.getOutStream(blockId, -1, localNetAddress, outOptions)) { try (InputStream inputStream = blockStore.getInStream(blockId, inOptions)) { ByteStreams.copy(inputStream, outputStream); } catch (Throwable t) { try { ((Cancelable) outputStream).cancel(); } catch (Throwable t2) { t.addSuppressed(t2); } throw t; } } } private JobUtils() { } // Utils class not intended for instantiation. }