com.netflix.genie.web.services.impl.JobKillServiceV3.java Source code

Java tutorial

Introduction

Here is the source code for com.netflix.genie.web.services.impl.JobKillServiceV3.java

Source

/*
 *
 *  Copyright 2016 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */
package com.netflix.genie.web.services.impl;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.netflix.genie.common.dto.JobExecution;
import com.netflix.genie.common.dto.JobStatus;
import com.netflix.genie.common.dto.JobStatusMessages;
import com.netflix.genie.common.exceptions.GenieException;
import com.netflix.genie.common.exceptions.GeniePreconditionException;
import com.netflix.genie.common.exceptions.GenieServerException;
import com.netflix.genie.common.internal.jobs.JobConstants;
import com.netflix.genie.web.events.GenieEventBus;
import com.netflix.genie.web.events.JobFinishedEvent;
import com.netflix.genie.web.events.JobFinishedReason;
import com.netflix.genie.web.events.KillJobEvent;
import com.netflix.genie.web.jobs.JobKillReasonFile;
import com.netflix.genie.web.services.JobSearchService;
import com.netflix.genie.web.util.ProcessChecker;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.ExecuteException;
import org.apache.commons.exec.Executor;
import org.apache.commons.lang3.SystemUtils;
import org.springframework.context.event.EventListener;
import org.springframework.core.io.Resource;

import javax.validation.constraints.NotBlank;
import javax.validation.constraints.NotNull;
import java.io.File;
import java.io.IOException;
import java.time.Instant;
import java.time.temporal.ChronoUnit;

/**
 * A JobKillService that attempts to kill genie 3.0 jobs running on the local node.
 *
 * @author tgianos
 * @since 3.0.0
 */
@Slf4j
public class JobKillServiceV3 {

    private final String hostname;
    private final JobSearchService jobSearchService;
    private final Executor executor;
    private final boolean runAsUser;
    private final GenieEventBus genieEventBus;
    private final File baseWorkingDir;
    private final ObjectMapper objectMapper;
    private final ProcessChecker.Factory processCheckerFactory;

    /**
     * Constructor.
     *
     * @param hostname              The name of the host this Genie node is running on
     * @param jobSearchService      The job search service to use to locate job information
     * @param executor              The executor to use to run system processes
     * @param runAsUser             True if jobs are run as the user who submitted the job
     * @param genieEventBus         The system event bus to use
     * @param genieWorkingDir       The working directory where all job directories are created.
     * @param objectMapper          The Jackson ObjectMapper used to serialize from/to JSON
     * @param processCheckerFactory The process checker factory
     */
    public JobKillServiceV3(@NotBlank final String hostname, @NotNull final JobSearchService jobSearchService,
            @NotNull final Executor executor, final boolean runAsUser, @NotNull final GenieEventBus genieEventBus,
            @NotNull final Resource genieWorkingDir, @NotNull final ObjectMapper objectMapper,
            @NotNull final ProcessChecker.Factory processCheckerFactory) {
        this.hostname = hostname;
        this.jobSearchService = jobSearchService;
        this.executor = executor;
        this.runAsUser = runAsUser;
        this.genieEventBus = genieEventBus;
        this.objectMapper = objectMapper;
        this.processCheckerFactory = processCheckerFactory;

        try {
            this.baseWorkingDir = genieWorkingDir.getFile();
        } catch (IOException gse) {
            throw new RuntimeException("Could not load the base path from resource", gse);
        }
    }

    /**
     * Kill the job with the given id if possible. Should publish a JobFinishedEvent when done.
     *
     * @param id     id of job to kill
     * @param reason brief reason for requesting the job be killed
     * @throws GenieException if there is an error
     */
    public void killJob(@NotBlank(message = "No id entered. Unable to kill job.") final String id,
            @NotBlank(message = "No reason provided.") final String reason) throws GenieException {
        // Will throw exception if not found
        // TODO: Could instead check JobMonitorCoordinator eventually for in memory check
        final JobStatus jobStatus = this.jobSearchService.getJobStatus(id);
        if (jobStatus == JobStatus.INIT) {
            // Send a job finished event to force system to update the job to killed
            this.genieEventBus.publishSynchronousEvent(new JobFinishedEvent(id, JobFinishedReason.KILLED,
                    JobStatusMessages.USER_REQUESTED_JOB_BE_KILLED_DURING_INITIALIZATION, this));
        } else if (jobStatus == JobStatus.RUNNING) {
            final JobExecution jobExecution = this.jobSearchService.getJobExecution(id);
            if (jobExecution.getExitCode().isPresent()) {
                // Job is already finished one way or another
                return;
            }

            if (!this.hostname.equals(jobExecution.getHostName())) {
                throw new GeniePreconditionException("Job with id " + id + " is not running on this host ("
                        + this.hostname + "). It's actually on " + jobExecution.getHostName());
            }

            // Job is on this node and still running as of when query was made to database
            if (SystemUtils.IS_OS_UNIX) {
                this.killJobOnUnix(jobExecution.getProcessId().orElseThrow(() -> new GeniePreconditionException(
                        "No process id found. Unable to kill if no process id")));
            } else {
                // Windows, etc. May support later
                throw new UnsupportedOperationException("Genie isn't currently supported on this OS");
            }

            // Write additional file with kill reason
            try {
                this.objectMapper.writeValue(
                        new File(this.baseWorkingDir + "/" + id + "/" + JobConstants.GENIE_KILL_REASON_FILE_NAME),
                        new JobKillReasonFile(reason));
            } catch (IOException e) {
                throw new GenieServerException("Failed to write job kill reason file", e);
            }
        }
    }

    /**
     * Listen for job kill events from within the system as opposed to on calls from users directly to killJob.
     *
     * @param event The {@link KillJobEvent}
     * @throws GenieException On error
     */
    @EventListener
    public void onKillJobEvent(@NotNull final KillJobEvent event) throws GenieException {
        this.killJob(event.getId(), event.getReason());
    }

    private void killJobOnUnix(final int pid) throws GenieException {
        try {
            // Ensure this process check can't be timed out
            final Instant tomorrow = Instant.now().plus(1, ChronoUnit.DAYS);
            final ProcessChecker processChecker = this.processCheckerFactory.get(pid, tomorrow);
            processChecker.checkProcess();
        } catch (final ExecuteException ee) {
            // This means the job was done already
            log.debug("Process with pid {} is already done", pid);
            return;
        } catch (final IOException ioe) {
            throw new GenieServerException("Unable to check process status for pid " + pid, ioe);
        }

        // TODO: Do we need retries?
        // This means the job client process is still running
        try {
            final CommandLine killCommand;
            if (this.runAsUser) {
                killCommand = new CommandLine("sudo");
                killCommand.addArgument("kill");
            } else {
                killCommand = new CommandLine("kill");
            }
            killCommand.addArguments(Integer.toString(pid));
            this.executor.execute(killCommand);
        } catch (final IOException ioe) {
            throw new GenieServerException("Unable to kill process " + pid, ioe);
        }
    }
}