org.apache.hadoop.yarn.server.resourcemanager.quota.QuotaService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.yarn.server.resourcemanager.quota.QuotaService.java

Source

/*
 * Copyright 2015 Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.yarn.server.resourcemanager.quota;

import io.hops.exception.StorageException;
import io.hops.metadata.yarn.dal.quota.ContainersCheckPointsDataAccess;
import io.hops.metadata.yarn.dal.quota.ContainersLogsDataAccess;
import io.hops.metadata.yarn.dal.quota.ProjectQuotaDataAccess;
import io.hops.metadata.yarn.dal.quota.ProjectsDailyCostDataAccess;
import io.hops.metadata.yarn.dal.rmstatestore.ApplicationStateDataAccess;
import io.hops.metadata.yarn.dal.util.YARNOperationType;
import io.hops.metadata.yarn.entity.quota.ContainerCheckPoint;
import io.hops.metadata.yarn.entity.quota.ContainerLog;
import io.hops.metadata.yarn.entity.quota.ProjectDailyCost;
import io.hops.metadata.yarn.entity.quota.ProjectDailyId;
import io.hops.metadata.yarn.entity.quota.ProjectQuota;
import io.hops.metadata.yarn.entity.rmstatestore.ApplicationState;
import io.hops.transaction.handler.LightWeightRequestHandler;
import io.hops.util.HopsWorksHelper;
import io.hops.util.RMStorageFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.ConverterUtils;

public class QuotaService extends AbstractService {

    private static final Log LOG = LogFactory.getLog(QuotaService.class);

    private Thread quotaSchedulingThread;
    private volatile boolean stopped = false;
    private long minNumberOfTicks = 1;
    private long batchTime;
    private int batchSize;
    private int minVcores;
    private int minMemory;
    private int minGpus;
    private float basePriceGeneral;
    private float basePriceGpu;

    ApplicationStateDataAccess appStatDS = (ApplicationStateDataAccess) RMStorageFactory
            .getDataAccess(ApplicationStateDataAccess.class);
    Map<String, String> applicationOwnerCache = new HashMap<>();
    Map<String, ContainerCheckPoint> containersCheckPoints;
    Set<String> recovered = new HashSet<>();

    BlockingQueue<ContainerLog> eventContainersLogs = new LinkedBlockingQueue<>();

    public QuotaService() {
        super("quota scheduler service");
    }

    @Override
    protected void serviceStart() throws Exception {
        assert !stopped : "starting when already stopped";
        LOG.info("Starting a new quota schedular service");
        recover();
        quotaSchedulingThread = new Thread(new WorkingThread());
        quotaSchedulingThread.setName("Quota scheduling service");
        quotaSchedulingThread.start();
        super.serviceStart();
    }

    @Override
    protected void serviceStop() throws Exception {
        stopped = true;
        if (quotaSchedulingThread != null) {
            quotaSchedulingThread.interrupt();
        }
        super.serviceStop();
        LOG.info("Stopped the quota schedular service.");
    }

    @Override
    public void serviceInit(Configuration conf) throws Exception {
        minNumberOfTicks = conf.getInt(YarnConfiguration.QUOTA_MIN_TICKS_CHARGE,
                YarnConfiguration.DEFAULT_QUOTA_MIN_TICKS_CHARGE);
        batchTime = conf.getLong(YarnConfiguration.QUOTA_BATCH_TIME, YarnConfiguration.DEFAULT_QUOTA_BATCH_TIME);
        batchSize = conf.getInt(YarnConfiguration.QUOTA_BATCH_SIZE, YarnConfiguration.DEFAULT_QUOTA_BATCH_SIZE);
        minVcores = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
                YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
        minGpus = Math.max(1, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_GPUS,
                YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_GPUS));
        minMemory = conf.getInt(YarnConfiguration.QUOTA_MINIMUM_CHARGED_MB,
                YarnConfiguration.DEFAULT_QUOTA_MINIMUM_CHARGED_MB);
        basePriceGeneral = conf.getFloat(YarnConfiguration.QUOTA_BASE_PRICE_GENERAL,
                YarnConfiguration.DEFAULT_QUOTA_BASE_PRICE_GPU);
        basePriceGeneral = conf.getFloat(YarnConfiguration.QUOTA_BASE_PRICE_GENERAL,
                YarnConfiguration.DEFAULT_QUOTA_BASE_PRICE_GENERAL);
    }

    public void insertEvents(Collection<ContainerLog> containersLogs) {
        for (ContainerLog cl : containersLogs) {
            eventContainersLogs.add(cl);
        }
    }

    private class WorkingThread implements Runnable {

        @Override
        public void run() {
            LOG.info("Quota Scheduler started");

            while (!stopped && !Thread.currentThread().isInterrupted()) {
                try {
                    final List<ContainerLog> containersLogs = new ArrayList<>();
                    Long start = System.currentTimeMillis();
                    long duration = 0;
                    //batch logs to reduce the number of roundtrips to the database
                    //can probably be removed once we have the ndb asynchronous library 
                    do {
                        ContainerLog log = eventContainersLogs.poll(Math.max(1, batchTime - duration),
                                TimeUnit.MILLISECONDS);
                        if (log != null) {
                            containersLogs.add(log);
                        }
                        duration = System.currentTimeMillis() - start;
                    } while (duration < batchTime && containersLogs.size() < batchSize);

                    computeAndApplyCharge(containersLogs, false);
                } catch (InterruptedException | IOException ex) {
                    LOG.error(ex, ex);
                }
            }
            LOG.info("Quota scheduler thread is exiting gracefully");
        }
    }

    protected void computeAndApplyCharge(final Collection<ContainerLog> ContainersLogs, final boolean isRecover)
            throws IOException {
        LightWeightRequestHandler quotaSchedulerHandler = new LightWeightRequestHandler(YARNOperationType.TEST) {
            @Override
            public Object performTask() throws IOException {
                connector.beginTransaction();
                connector.writeLock();

                computeAndApplyChargeInt(ContainersLogs, isRecover);
                connector.commit();
                return null;
            }

        };
        quotaSchedulerHandler.handle();
    }

    private void computeAndApplyChargeInt(final Collection<ContainerLog> ContainersLogs, final boolean isRecover)
            throws StorageException {
        //Get Data  ** ProjectQuota **
        ProjectQuotaDataAccess pqDA = (ProjectQuotaDataAccess) RMStorageFactory
                .getDataAccess(ProjectQuotaDataAccess.class);
        Map<String, ProjectQuota> projectsQuotaMap = pqDA.getAll();
        final long curentDay = TimeUnit.DAYS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS);
        Map<String, ProjectQuota> chargedProjects = new HashMap<>();
        Map<ProjectDailyId, ProjectDailyCost> chargedProjectsDailyCost = new HashMap<>();

        List<ContainerLog> toBeRemovedContainersLogs = new ArrayList<>();
        List<ContainerCheckPoint> toBePercistedContainerCheckPoint = new ArrayList<>();
        List<ContainerCheckPoint> toBeRemovedContainerCheckPoint = new ArrayList<>();

        // Calculate the quota
        for (ContainerLog containerLog : ContainersLogs) {
            if (!isRecover && recovered.remove(containerLog.getContainerid())) {
                //we have already charged this project when recovering we should
                //not charge it two times
                continue;
            }
            if (isRecover) {
                recovered.add(containerLog.getContainerid());
            }
            // Get ApplicationId from ContainerId
            ContainerId containerId = ConverterUtils.toContainerId(containerLog.getContainerid());
            ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId();

            //Get ProjectId from ApplicationId in ** ApplicationState Table ** 
            String appOwner = applicationOwnerCache.get(appId.toString());
            if (appOwner == null) {
                ApplicationState appState = (ApplicationState) appStatDS.findByApplicationId(appId.toString());
                if (appState == null) {
                    LOG.error("Application not found: " + appId.toString() + " for container "
                            + containerLog.getContainerid());
                    continue;
                } else {
                    if (applicationOwnerCache.size() > 100000) {
                        //if the cahs is too big empty it and it will be refilled with
                        //the active applications
                        //TODO make a proper chash
                        applicationOwnerCache = new HashMap<>();
                    }
                    appOwner = appState.getUser();
                    applicationOwnerCache.put(appId.toString(), appOwner);
                }
            }

            String projectName = HopsWorksHelper.getProjectName(appOwner);
            String user = HopsWorksHelper.getUserName(appOwner);

            //comput used ticks
            Long checkpoint = containerLog.getStart();
            float currentMultiplicator = containerLog.getMultiplicator();
            ContainerCheckPoint lastCheckPoint = containersCheckPoints.get(containerLog.getContainerid());
            if (lastCheckPoint != null) {
                checkpoint = lastCheckPoint.getCheckPoint();
                currentMultiplicator = lastCheckPoint.getMultiplicator();
            }
            long nbRunningTicks = containerLog.getStop() - checkpoint;

            // Decide what to do with the ticks
            if (nbRunningTicks > 0) {
                if (containerLog.getExitstatus() == ContainerExitStatus.CONTAINER_RUNNING_STATE) {
                    //The container as been running for more than one checkpoint duration
                    ContainerCheckPoint newCheckpoint = new ContainerCheckPoint(containerLog.getContainerid(),
                            containerLog.getStop(), currentMultiplicator);
                    containersCheckPoints.put(containerLog.getContainerid(), newCheckpoint);
                    toBePercistedContainerCheckPoint.add(newCheckpoint);

                    LOG.debug("charging project still running " + projectName + " for container "
                            + containerLog.getContainerid() + " current ticks " + nbRunningTicks + "("
                            + containerLog.getStart() + ", " + containerLog.getStop() + ", " + checkpoint
                            + ") current multiplicator " + currentMultiplicator);

                    float charge = computeCharge(nbRunningTicks, currentMultiplicator, containerLog.getNbVcores(),
                            containerLog.getMemoryUsed(), containerLog.getGpuUsed());
                    chargeProjectQuota(chargedProjects, projectsQuotaMap, projectName, user,
                            containerLog.getContainerid(), charge);
                    //** ProjectDailyCost charging**
                    chargeProjectDailyCost(chargedProjectsDailyCost, projectName, user, curentDay, charge,
                            containerId.getApplicationAttemptId().getApplicationId());

                } else {
                    //The container has finished running
                    toBeRemovedContainersLogs.add((ContainerLog) containerLog);
                    if (containersCheckPoints.remove(containerLog.getContainerid()) != null) {
                        toBeRemovedContainerCheckPoint.add(new ContainerCheckPoint(containerLog.getContainerid()));
                    }

                    //** ProjectQuota charging**
                    LOG.debug("charging project finished " + projectName + " for container "
                            + containerLog.getContainerid() + " current ticks " + nbRunningTicks
                            + " current multiplicator " + currentMultiplicator);
                    float charge = computeCharge(nbRunningTicks, currentMultiplicator, containerLog.getNbVcores(),
                            containerLog.getMemoryUsed(), containerLog.getGpuUsed());
                    chargeProjectQuota(chargedProjects, projectsQuotaMap, projectName, user,
                            containerLog.getContainerid(), charge);

                    //** ProjectDailyCost charging**
                    chargeProjectDailyCost(chargedProjectsDailyCost, projectName, user, curentDay, charge,
                            containerId.getApplicationAttemptId().getApplicationId());
                }
            } else {
                if (checkpoint == containerLog.getStart()
                        && containerLog.getExitstatus() == ContainerExitStatus.CONTAINER_RUNNING_STATE) {
                    //create a checkPoint at start to store multiplicator.
                    ContainerCheckPoint newCheckpoint = new ContainerCheckPoint(containerLog.getContainerid(),
                            containerLog.getStart(), currentMultiplicator);
                    containersCheckPoints.put(containerLog.getContainerid(), newCheckpoint);
                    toBePercistedContainerCheckPoint.add(newCheckpoint);
                } else if (containerLog.getExitstatus() != ContainerExitStatus.CONTAINER_RUNNING_STATE) {
                    //the container is not running remove it from db
                    toBeRemovedContainersLogs.add((ContainerLog) containerLog);
                    if (containersCheckPoints.remove(containerLog.getContainerid()) != null) {
                        toBeRemovedContainerCheckPoint.add(new ContainerCheckPoint(containerLog.getContainerid()));
                    }
                }
            }
        }
        // Delet the finished ContainersLogs
        ContainersLogsDataAccess csDA = (ContainersLogsDataAccess) RMStorageFactory
                .getDataAccess(ContainersLogsDataAccess.class);
        csDA.removeAll(toBeRemovedContainersLogs);

        //Add and remove Containers checkpoints
        ContainersCheckPointsDataAccess ccpDA = (ContainersCheckPointsDataAccess) RMStorageFactory
                .getDataAccess(ContainersCheckPointsDataAccess.class);
        ccpDA.addAll(toBePercistedContainerCheckPoint);
        ccpDA.removeAll(toBeRemovedContainerCheckPoint);

        if (LOG.isDebugEnabled()) {
            // Show all charged project
            for (ProjectQuota _cpq : chargedProjects.values()) {
                LOG.debug("RIZ:: Charged projects: " + _cpq.toString() + " charge amount:"
                        + _cpq.getTotalUsedQuota());
            }
        }

        // Add all the changed project quota to NDB
        pqDA.addAll(chargedProjects.values());
        ProjectsDailyCostDataAccess pdcDA = (ProjectsDailyCostDataAccess) RMStorageFactory
                .getDataAccess(ProjectsDailyCostDataAccess.class);
        pdcDA.addAll(chargedProjectsDailyCost.values());
    }

    Map<ProjectDailyId, ProjectDailyCost> projectsDailyCostCache;
    long cashDay = -1;

    private void chargeProjectQuota(Map<String, ProjectQuota> chargedProjectsQuota,
            Map<String, ProjectQuota> projectsQuotaMap, String projectid, String user, String containerId,
            float charge) {

        LOG.info("Quota: project " + projectid + " user " + user + " has been charged " + charge
                + " for container: " + containerId);

        ProjectQuota projectQuota = (ProjectQuota) projectsQuotaMap.get(projectid);
        if (projectQuota != null) {
            projectQuota.decrementQuota(charge);

            chargedProjectsQuota.put(projectid, projectQuota);
        } else {
            LOG.error("Project not found: " + projectid);
        }
    }

    private void chargeProjectDailyCost(Map<ProjectDailyId, ProjectDailyCost> chargedProjectsDailyCost,
            String projectid, String user, long day, float charge, ApplicationId appId) {

        LOG.debug("Quota: project " + projectid + " user " + user + " has used " + charge + " credits, on day: "
                + day);
        if (cashDay != day) {
            projectsDailyCostCache = new HashMap<>();
            cashDay = day;
        }

        ProjectDailyId key = new ProjectDailyId(projectid, user, day);
        ProjectDailyCost projectDailyCost = projectsDailyCostCache.get(key);

        if (projectDailyCost == null) {
            projectDailyCost = new ProjectDailyCost(projectid, user, day, 0, appId.toString());
            projectsDailyCostCache.put(key, projectDailyCost);
        }

        projectDailyCost.incrementCharge(charge, appId.toString());

        chargedProjectsDailyCost.put(key, projectDailyCost);

    }

    private float computeCharge(long ticks, float multiplicator, int nbVcores, long memoryUsed, int nbGpus) {
        if (ticks < minNumberOfTicks) {
            ticks = minNumberOfTicks;
        }
        //the pricePerTick is set for a minimum sized container, the price to pay is
        //proportional to the container size on the most used resource
        float vcoresUsage = (float) nbVcores / minVcores;
        float memoryUsage = (float) memoryUsed / minMemory;
        float gpuUsage = (float) nbGpus / minGpus;
        float basePrice = basePriceGeneral;
        if (gpuUsage != 0) {
            basePrice = basePriceGpu;
        }
        float credit = (float) ticks * Math.max(gpuUsage, Math.max(vcoresUsage, memoryUsage)) * multiplicator
                * basePrice;
        return credit;
    }

    public void recover() throws IOException {

        final long day = TimeUnit.DAYS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS);
        cashDay = day;
        LightWeightRequestHandler recoveryHandler = new LightWeightRequestHandler(YARNOperationType.TEST) {
            @Override
            public Object performTask() throws IOException {
                connector.beginTransaction();
                connector.writeLock();
                ProjectsDailyCostDataAccess pdcDA = (ProjectsDailyCostDataAccess) RMStorageFactory
                        .getDataAccess(ProjectsDailyCostDataAccess.class);
                projectsDailyCostCache = pdcDA.getByDay(day);

                ContainersCheckPointsDataAccess ccpDA = (ContainersCheckPointsDataAccess) RMStorageFactory
                        .getDataAccess(ContainersCheckPointsDataAccess.class);
                containersCheckPoints = ccpDA.getAll();

                //Get Data  ** ContainersLogs **
                ContainersLogsDataAccess csDA = (ContainersLogsDataAccess) RMStorageFactory
                        .getDataAccess(ContainersLogsDataAccess.class);
                Map<String, ContainerLog> hopContainersLogs = csDA.getAll();
                connector.commit();
                return hopContainersLogs;
            }
        };
        final Map<String, ContainerLog> hopContainersLogs = (Map<String, ContainerLog>) recoveryHandler.handle();

        //run logic on all
        computeAndApplyCharge(hopContainersLogs.values(), true);

    }

}