org.apache.hadoop.yarn.server.sharedcachemanager.CleanerService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.yarn.server.sharedcachemanager.CleanerService.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.yarn.server.sharedcachemanager;

import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.sharedcachemanager.metrics.CleanerMetrics;
import org.apache.hadoop.yarn.server.sharedcachemanager.store.SCMStore;

import com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
 * The cleaner service that maintains the shared cache area, and cleans up stale
 * entries on a regular basis.
 */
@Private
@Evolving
public class CleanerService extends CompositeService {
    /**
     * The name of the global cleaner lock that the cleaner creates to indicate
     * that a cleaning process is in progress.
     */
    public static final String GLOBAL_CLEANER_PID = ".cleaner_pid";

    private static final Log LOG = LogFactory.getLog(CleanerService.class);

    private Configuration conf;
    private CleanerMetrics metrics;
    private ScheduledExecutorService scheduledExecutor;
    private final SCMStore store;
    private final Lock cleanerTaskLock;

    public CleanerService(SCMStore store) {
        super("CleanerService");
        this.store = store;
        this.cleanerTaskLock = new ReentrantLock();
    }

    @Override
    protected void serviceInit(Configuration conf) throws Exception {
        this.conf = conf;

        // create scheduler executor service that services the cleaner tasks
        // use 2 threads to accommodate the on-demand tasks and reduce the chance of
        // back-to-back runs
        ThreadFactory tf = new ThreadFactoryBuilder().setNameFormat("Shared cache cleaner").build();
        scheduledExecutor = Executors.newScheduledThreadPool(2, tf);
        super.serviceInit(conf);
    }

    @Override
    protected void serviceStart() throws Exception {
        if (!writeGlobalCleanerPidFile()) {
            throw new YarnException("The global cleaner pid file already exists! "
                    + "It appears there is another CleanerService running in the cluster");
        }

        this.metrics = CleanerMetrics.getInstance();

        // Start dependent services (i.e. AppChecker)
        super.serviceStart();

        Runnable task = CleanerTask.create(conf, store, metrics, cleanerTaskLock);
        long periodInMinutes = getPeriod(conf);
        scheduledExecutor.scheduleAtFixedRate(task, getInitialDelay(conf), periodInMinutes, TimeUnit.MINUTES);
        LOG.info("Scheduled the shared cache cleaner task to run every " + periodInMinutes + " minutes.");
    }

    @Override
    protected void serviceStop() throws Exception {
        LOG.info("Shutting down the background thread.");
        scheduledExecutor.shutdownNow();
        try {
            if (scheduledExecutor.awaitTermination(10, TimeUnit.SECONDS)) {
                LOG.info("The background thread stopped.");
            } else {
                LOG.warn("Gave up waiting for the cleaner task to shutdown.");
            }
        } catch (InterruptedException e) {
            LOG.warn("The cleaner service was interrupted while shutting down the task.", e);
        }

        removeGlobalCleanerPidFile();

        super.serviceStop();
    }

    /**
     * Execute an on-demand cleaner task.
     */
    protected void runCleanerTask() {
        Runnable task = CleanerTask.create(conf, store, metrics, cleanerTaskLock);
        // this is a non-blocking call (it simply submits the task to the executor
        // queue and returns)
        this.scheduledExecutor.execute(task);
    }

    /**
     * To ensure there are not multiple instances of the SCM running on a given
     * cluster, a global pid file is used. This file contains the hostname of the
     * machine that owns the pid file.
     *
     * @return true if the pid file was written, false otherwise
     * @throws YarnException
     */
    private boolean writeGlobalCleanerPidFile() throws YarnException {
        String root = conf.get(YarnConfiguration.SHARED_CACHE_ROOT, YarnConfiguration.DEFAULT_SHARED_CACHE_ROOT);
        Path pidPath = new Path(root, GLOBAL_CLEANER_PID);
        try {
            FileSystem fs = FileSystem.get(this.conf);

            if (fs.exists(pidPath)) {
                return false;
            }

            FSDataOutputStream os = fs.create(pidPath, false);
            // write the hostname and the process id in the global cleaner pid file
            final String ID = ManagementFactory.getRuntimeMXBean().getName();
            os.writeUTF(ID);
            os.close();
            // add it to the delete-on-exit to ensure it gets deleted when the JVM
            // exits
            fs.deleteOnExit(pidPath);
        } catch (IOException e) {
            throw new YarnException(e);
        }
        LOG.info("Created the global cleaner pid file at " + pidPath.toString());
        return true;
    }

    private void removeGlobalCleanerPidFile() {
        try {
            FileSystem fs = FileSystem.get(this.conf);
            String root = conf.get(YarnConfiguration.SHARED_CACHE_ROOT,
                    YarnConfiguration.DEFAULT_SHARED_CACHE_ROOT);

            Path pidPath = new Path(root, GLOBAL_CLEANER_PID);

            fs.delete(pidPath, false);
            LOG.info("Removed the global cleaner pid file at " + pidPath.toString());
        } catch (IOException e) {
            LOG.error(
                    "Unable to remove the global cleaner pid file! The file may need " + "to be removed manually.",
                    e);
        }
    }

    private static int getInitialDelay(Configuration conf) {
        int initialDelayInMinutes = conf.getInt(YarnConfiguration.SCM_CLEANER_INITIAL_DELAY_MINS,
                YarnConfiguration.DEFAULT_SCM_CLEANER_INITIAL_DELAY_MINS);
        // negative value is invalid; use the default
        if (initialDelayInMinutes < 0) {
            throw new HadoopIllegalArgumentException("Negative initial delay value: " + initialDelayInMinutes
                    + ". The initial delay must be greater than zero.");
        }
        return initialDelayInMinutes;
    }

    private static int getPeriod(Configuration conf) {
        int periodInMinutes = conf.getInt(YarnConfiguration.SCM_CLEANER_PERIOD_MINS,
                YarnConfiguration.DEFAULT_SCM_CLEANER_PERIOD_MINS);
        // non-positive value is invalid; use the default
        if (periodInMinutes <= 0) {
            throw new HadoopIllegalArgumentException("Non-positive period value: " + periodInMinutes
                    + ". The cleaner period must be greater than or equal to zero.");
        }
        return periodInMinutes;
    }
}