org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.dag.app.rm.TezTestServiceTaskSchedulerService.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.dag.app.rm;

import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;

import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.tez.common.TezUtils;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
import org.apache.tez.service.TezTestServiceConfConstants;
import org.apache.tez.serviceplugins.api.TaskScheduler;
import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TezTestServiceTaskSchedulerService extends TaskScheduler {

    private static final Logger LOG = LoggerFactory.getLogger(TezTestServiceTaskSchedulerService.class);

    private final List<String> serviceHosts;
    private final ContainerFactory containerFactory;
    private final Random random = new Random();
    // Currently all services must be running on the same port.
    private final int containerPort;

    private final ConcurrentMap<Object, ContainerId> runningTasks = new ConcurrentHashMap<Object, ContainerId>();

    // AppIdIdentifier to avoid conflicts with other containers in the system.

    // Per instance
    private final int memoryPerInstance;
    private final int coresPerInstance;
    private final int executorsPerInstance;

    // Per Executor Thread
    private final Resource resourcePerContainer;

    // Not registering with the RM. Assuming the main TezScheduler will always run (except local mode),
    // and take care of YARN registration.
    public TezTestServiceTaskSchedulerService(TaskSchedulerContext taskSchedulerContext) {
        // Accepting configuration here to allow setting up fields as final
        super(taskSchedulerContext);
        this.serviceHosts = new LinkedList<String>();
        this.containerFactory = new ContainerFactory(taskSchedulerContext.getApplicationAttemptId(),
                taskSchedulerContext.getCustomClusterIdentifier());

        Configuration conf = null;
        try {
            conf = TezUtils.createConfFromUserPayload(taskSchedulerContext.getInitialUserPayload());
        } catch (IOException e) {
            throw new TezUncheckedException(e);
        }
        this.memoryPerInstance = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB,
                -1);
        Preconditions.checkArgument(memoryPerInstance > 0,
                TezTestServiceConfConstants.TEZ_TEST_SERVICE_MEMORY_PER_INSTANCE_MB + " must be configured");

        this.executorsPerInstance = conf
                .getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE, -1);
        Preconditions.checkArgument(executorsPerInstance > 0,
                TezTestServiceConfConstants.TEZ_TEST_SERVICE_NUM_EXECUTORS_PER_INSTANCE + " must be configured");

        this.coresPerInstance = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_VCPUS_PER_INSTANCE,
                executorsPerInstance);

        this.containerPort = conf.getInt(TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT, -1);
        Preconditions.checkArgument(executorsPerInstance > 0,
                TezTestServiceConfConstants.TEZ_TEST_SERVICE_RPC_PORT + " must be configured");

        int memoryPerContainer = (int) (memoryPerInstance / (float) executorsPerInstance);
        int coresPerContainer = (int) (coresPerInstance / (float) executorsPerInstance);
        this.resourcePerContainer = Resource.newInstance(memoryPerContainer, coresPerContainer);

        String[] hosts = conf.getTrimmedStrings(TezTestServiceConfConstants.TEZ_TEST_SERVICE_HOSTS);
        if (hosts == null || hosts.length == 0) {
            hosts = new String[] { "localhost" };
        }
        for (String host : hosts) {
            serviceHosts.add(host);
        }

        LOG.info("Running with configuration: " + "memoryPerInstance=" + memoryPerInstance + ", vcoresPerInstance="
                + coresPerInstance + ", executorsPerInstance=" + executorsPerInstance
                + ", resourcePerContainerInferred=" + resourcePerContainer + ", hosts=" + serviceHosts.toString());

    }

    @Override
    public Resource getAvailableResources() {
        // TODO This needs information about all running executors, and the amount of memory etc available across the cluster.
        return Resource.newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance),
                serviceHosts.size() * coresPerInstance);
    }

    @Override
    public int getClusterNodeCount() {
        return serviceHosts.size();
    }

    @Override
    public void dagComplete() {
    }

    @Override
    public Resource getTotalResources() {
        return Resource.newInstance(Ints.checkedCast(serviceHosts.size() * memoryPerInstance),
                serviceHosts.size() * coresPerInstance);
    }

    @Override
    public void blacklistNode(NodeId nodeId) {
        LOG.info("BlacklistNode not supported");
    }

    @Override
    public void unblacklistNode(NodeId nodeId) {
        LOG.info("unBlacklistNode not supported");
    }

    @Override
    public void allocateTask(Object task, Resource capability, String[] hosts, String[] racks, Priority priority,
            Object containerSignature, Object clientCookie) {
        String host = selectHost(hosts);
        Container container = containerFactory.createContainer(resourcePerContainer, priority, host, containerPort);
        runningTasks.put(task, container.getId());
        getContext().taskAllocated(task, clientCookie, container);
    }

    @Override
    public void allocateTask(Object task, Resource capability, ContainerId containerId, Priority priority,
            Object containerSignature, Object clientCookie) {
        String host = selectHost(null);
        Container container = containerFactory.createContainer(resourcePerContainer, priority, host, containerPort);
        runningTasks.put(task, container.getId());
        getContext().taskAllocated(task, clientCookie, container);
    }

    @Override
    public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason,
            String diagnostics) {
        ContainerId containerId = runningTasks.remove(task);
        if (containerId == null) {
            LOG.error("Could not determine ContainerId for task: " + task
                    + " . Could have hit a race condition. Ignoring."
                    + " The query may hang since this \"unknown\" container is now taking up a slot permanently");
            return false;
        }
        getContext().containerBeingReleased(containerId);
        return true;
    }

    @Override
    public Object deallocateContainer(ContainerId containerId) {
        LOG.info("Ignoring deallocateContainer for containerId: " + containerId);
        return null;
    }

    @Override
    public void setShouldUnregister() {

    }

    @Override
    public boolean hasUnregistered() {
        // Nothing to do. No registration involved.
        return true;
    }

    private String selectHost(String[] requestedHosts) {
        String host;
        if (requestedHosts != null && requestedHosts.length > 0) {
            Arrays.sort(requestedHosts);
            host = requestedHosts[0];
            LOG.info("Selected host: " + host + " from requested hosts: " + Arrays.toString(requestedHosts));
        } else {
            host = serviceHosts.get(random.nextInt(serviceHosts.size()));
            LOG.info("Selected random host: " + host + " since the request contained no host information");
        }
        return host;
    }

    static class ContainerFactory {
        AtomicInteger nextId;
        final ApplicationAttemptId customAppAttemptId;

        public ContainerFactory(ApplicationAttemptId appAttemptId, long appIdLong) {
            this.nextId = new AtomicInteger(1);
            ApplicationId appId = ApplicationId.newInstance(appIdLong, appAttemptId.getApplicationId().getId());
            this.customAppAttemptId = ApplicationAttemptId.newInstance(appId, appAttemptId.getAttemptId());
        }

        @SuppressWarnings("deprecation")
        public Container createContainer(Resource capability, Priority priority, String hostname, int port) {
            ContainerId containerId = ContainerId.newInstance(customAppAttemptId, nextId.getAndIncrement());
            NodeId nodeId = NodeId.newInstance(hostname, port);
            String nodeHttpAddress = "hostname:0";

            Container container = Container.newInstance(containerId, nodeId, nodeHttpAddress, capability, priority,
                    null);

            return container;
        }
    }
}