package alluxio.job.load;

import alluxio.AlluxioURI;
import alluxio.Constants;
import alluxio.client.block.AlluxioBlockStore;
import alluxio.client.block.BlockWorkerInfo;
import alluxio.client.file.BaseFileSystem;
import alluxio.client.file.FileSystem;
import alluxio.client.file.FileSystemContext;
import alluxio.exception.status.FailedPreconditionException;
import alluxio.job.AbstractVoidJobDefinition;
import alluxio.job.JobMasterContext;
import alluxio.job.JobWorkerContext;
import alluxio.job.load.LoadDefinition.LoadTask;
import alluxio.job.util.JobUtils;
import alluxio.job.util.SerializableVoid;
import alluxio.job.util.SerializationUtils;
import alluxio.wire.FileBlockInfo;
import alluxio.wire.WorkerInfo;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import javax.annotation.concurrent.NotThreadSafe;

 * A simple loading job that loads the blocks of a file in a distributed and round-robin fashion.
public final class LoadDefinition extends AbstractVoidJobDefinition<LoadConfig, ArrayList<LoadTask>> {
    private static final Logger LOG = LoggerFactory.getLogger(LoadDefinition.class);
    private static final int MAX_BUFFER_SIZE = 500 * Constants.MB;

    private final FileSystem mFileSystem;

     * Constructs a new {@link LoadDefinition}.
    public LoadDefinition() {
        mFileSystem = BaseFileSystem.get(FileSystemContext.get());

     * Constructs a new {@link LoadDefinition} with FileSystem context and instance.
     * @param fileSystem file system client
    public LoadDefinition(FileSystem fileSystem) {
        mFileSystem = fileSystem;

    public Map<WorkerInfo, ArrayList<LoadTask>> selectExecutors(LoadConfig config,
            List<WorkerInfo> jobWorkerInfoList, JobMasterContext jobMasterContext) throws Exception {
        Map<String, WorkerInfo> jobWorkersByAddress =
                .collect(Collectors.toMap(info -> info.getAddress().getHost(), info -> info));
        // Filter out workers which have no local job worker available.
        List<String> missingJobWorkerHosts = new ArrayList<>();
        List<BlockWorkerInfo> workers = new ArrayList<>();
        for (BlockWorkerInfo worker : AlluxioBlockStore.create().getAllWorkers()) {
            if (jobWorkersByAddress.containsKey(worker.getNetAddress().getHost())) {
            } else {
                LOG.warn("Worker on host {} has no local job worker", worker.getNetAddress().getHost());
        // Mapping from worker to block ids which that worker is supposed to load.
        Multimap<WorkerInfo, LoadTask> assignments = LinkedListMultimap.create();
        AlluxioURI uri = new AlluxioURI(config.getFilePath());
        for (FileBlockInfo blockInfo : mFileSystem.getStatus(uri).getFileBlockInfos()) {
            List<String> workersWithoutBlock = getWorkersWithoutBlock(workers, blockInfo);
            int neededReplicas = config.getReplication() - blockInfo.getBlockInfo().getLocations().size();
            if (workersWithoutBlock.size() < neededReplicas) {
                String missingJobWorkersMessage = "";
                if (!missingJobWorkerHosts.isEmpty()) {
                    missingJobWorkersMessage = ". The following workers could not be used because they have "
                            + "no local job workers: " + missingJobWorkerHosts;
                throw new FailedPreconditionException(String.format(
                        "Failed to find enough block workers to replicate to. Needed %s but only found %s. "
                                + "Available workers without the block: %s" + missingJobWorkersMessage,
                        neededReplicas, workersWithoutBlock.size(), workersWithoutBlock));
            for (int i = 0; i < neededReplicas; i++) {
                String address = workersWithoutBlock.get(i);
                WorkerInfo jobWorker = jobWorkersByAddress.get(address);
                assignments.put(jobWorker, new LoadTask(blockInfo.getBlockInfo().getBlockId()));
        return SerializationUtils.makeValuesSerializable(assignments.asMap());

     * @param blockWorkers a list of block workers
     * @param blockInfo information about a block
     * @return the block worker hosts which are not storing the specified block
    private List<String> getWorkersWithoutBlock(List<BlockWorkerInfo> blockWorkers, FileBlockInfo blockInfo) {
        List<String> blockLocations = blockInfo.getBlockInfo().getLocations().stream()
                .map(location -> location.getWorkerAddress().getHost()).collect(Collectors.toList());
        return -> !blockLocations.contains(worker.getNetAddress().getHost()))
                .map(worker -> worker.getNetAddress().getHost()).collect(Collectors.toList());

    public SerializableVoid runTask(LoadConfig config, ArrayList<LoadTask> tasks, JobWorkerContext jobWorkerContext)
            throws Exception {
        for (LoadTask task : tasks) {
            JobUtils.loadBlock(mFileSystem, FileSystemContext.get(), config.getFilePath(), task.getBlockId());
  "Loaded block " + task.getBlockId());
        return null;

     * A task representing loading a block into the memory of a worker.
    public static class LoadTask implements Serializable {
        private static final long serialVersionUID = 2028545900913354425L;
        final long mBlockId;

         * @param blockId the id of the block to load
        public LoadTask(long blockId) {
            mBlockId = blockId;

         * @return the block id
        public long getBlockId() {
            return mBlockId;

        public String toString() {
            return Objects.toStringHelper(this).add("blockId", mBlockId).toString();

    public Class<LoadConfig> getJobConfigClass() {
        return LoadConfig.class;