com.netflix.genie.web.services.impl.JobSpecificationServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.netflix.genie.web.services.impl.JobSpecificationServiceImpl.java

Source

/*
 *
 *  Copyright 2018 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */
package com.netflix.genie.web.services.impl;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.netflix.genie.common.dto.ClusterCriteria;
import com.netflix.genie.common.exceptions.GenieException;
import com.netflix.genie.common.exceptions.GeniePreconditionException;
import com.netflix.genie.common.exceptions.GenieServerException;
import com.netflix.genie.common.internal.dto.v4.Application;
import com.netflix.genie.common.internal.dto.v4.Cluster;
import com.netflix.genie.common.internal.dto.v4.Command;
import com.netflix.genie.common.internal.dto.v4.Criterion;
import com.netflix.genie.common.internal.dto.v4.ExecutionEnvironment;
import com.netflix.genie.common.internal.dto.v4.JobMetadata;
import com.netflix.genie.common.internal.dto.v4.JobRequest;
import com.netflix.genie.common.internal.dto.v4.JobSpecification;
import com.netflix.genie.common.internal.jobs.JobConstants;
import com.netflix.genie.web.properties.JobsProperties;
import com.netflix.genie.web.services.ApplicationPersistenceService;
import com.netflix.genie.web.services.ClusterLoadBalancer;
import com.netflix.genie.web.services.ClusterPersistenceService;
import com.netflix.genie.web.services.CommandPersistenceService;
import com.netflix.genie.web.services.JobSpecificationService;
import com.netflix.genie.web.util.MetricsConstants;
import com.netflix.genie.web.util.MetricsUtils;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tag;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.RegExUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.aop.TargetClassAware;
import org.springframework.validation.annotation.Validated;

import javax.annotation.Nullable;
import javax.annotation.ParametersAreNonnullByDefault;
import javax.validation.Valid;
import javax.validation.constraints.NotEmpty;
import java.io.File;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

/**
 * Implementation of the JobSpecificationService APIs.
 *
 * @author tgianos
 * @since 4.0.0
 */
@Slf4j
@Validated
@ParametersAreNonnullByDefault
public class JobSpecificationServiceImpl implements JobSpecificationService {

    /**
     * How long it takes to completely resolve a job specification given inputs.
     */
    private static final String RESOLVE_JOB_SPECIFICATION_TIMER = "genie.services.specification.resolve.timer";

    /**
     * How long it takes to query the database for cluster command combinations matching supplied criteria.
     */
    private static final String CLUSTER_COMMAND_QUERY_TIMER_NAME = "genie.services.specification.clusterCommandQuery.timer";

    /**
     * How long it takes to select a cluster from the set of clusters returned by database query.
     */
    private static final String SELECT_CLUSTER_TIMER_NAME = "genie.services.specification.selectCluster.timer";

    /**
     * How long it takes to select a command for a given cluster.
     */
    private static final String SELECT_COMMAND_TIMER_NAME = "genie.services.specification.selectCommand.timer";

    /**
     * How long it takes to select the applications for a given command.
     */
    private static final String SELECT_APPLICATIONS_TIMER_NAME = "genie.services.specification.selectApplications.timer";

    /**
     * How many times a cluster load balancer is invoked.
     */
    private static final String SELECT_LOAD_BALANCER_COUNTER_NAME = "genie.services.specification.loadBalancer.counter";

    private static final File DEFAULT_JOB_DIRECTORY = new File("/tmp/genie/jobs");

    private static final String NO_ID_FOUND = "No id found";

    private static final String LOAD_BALANCER_STATUS_SUCCESS = "success";
    private static final String LOAD_BALANCER_STATUS_NO_PREFERENCE = "no preference";
    private static final String LOAD_BALANCER_STATUS_EXCEPTION = "exception";
    private static final String LOAD_BALANCER_STATUS_INVALID = "invalid";

    private final ApplicationPersistenceService applicationPersistenceService;
    private final ClusterPersistenceService clusterPersistenceService;
    private final CommandPersistenceService commandPersistenceService;
    private final List<ClusterLoadBalancer> clusterLoadBalancers;
    private final MeterRegistry registry;
    private final int defaultMemory;

    private final Counter noClusterSelectedCounter;
    private final Counter noClusterFoundCounter;

    /**
     * Constructor.
     *
     * @param applicationPersistenceService The service to use to manipulate applications
     * @param clusterPersistenceService     The service to use to manipulate clusters
     * @param commandPersistenceService     The service to use to manipulate commands
     * @param clusterLoadBalancers          The load balancer implementations to use
     * @param registry                      The metrics repository to use
     * @param jobsProperties                The properties for running a job set by the user
     */
    public JobSpecificationServiceImpl(final ApplicationPersistenceService applicationPersistenceService,
            final ClusterPersistenceService clusterPersistenceService,
            final CommandPersistenceService commandPersistenceService,
            @NotEmpty final List<ClusterLoadBalancer> clusterLoadBalancers, final MeterRegistry registry,
            final JobsProperties jobsProperties) {
        this.applicationPersistenceService = applicationPersistenceService;
        this.clusterPersistenceService = clusterPersistenceService;
        this.commandPersistenceService = commandPersistenceService;
        this.clusterLoadBalancers = clusterLoadBalancers;
        this.defaultMemory = jobsProperties.getMemory().getDefaultJobMemory();

        // Metrics
        this.registry = registry;
        this.noClusterSelectedCounter = this.registry
                .counter("genie.services.specification.selectCluster.noneSelected.counter");
        this.noClusterFoundCounter = this.registry
                .counter("genie.services.specification.selectCluster.noneFound.counter");
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public JobSpecification resolveJobSpecification(final String id, @Valid final JobRequest jobRequest) {
        final long start = System.nanoTime();
        final Set<Tag> tags = Sets.newHashSet();
        try {
            log.info("Received request to resolve a job specification for job id {} and parameters {}", id,
                    jobRequest);
            final Map<Cluster, String> clustersAndCommandsForJob = this.queryForClustersAndCommands(
                    jobRequest.getCriteria().getClusterCriteria(), jobRequest.getCriteria().getCommandCriterion());
            // Resolve the cluster for the job request based on the tags specified
            final Cluster cluster = this.selectCluster(id, jobRequest, clustersAndCommandsForJob.keySet());
            // Resolve the command for the job request based on command tags and cluster chosen
            final Command command = this.getCommand(clustersAndCommandsForJob.get(cluster), id);
            // Resolve the applications to use based on the command that was selected
            final List<JobSpecification.ExecutionResource> applicationResources = Lists.newArrayList();
            for (final Application application : this.getApplications(id, jobRequest, command)) {
                applicationResources.add(
                        new JobSpecification.ExecutionResource(application.getId(), application.getResources()));
            }

            final List<String> commandArgs = Lists.newArrayList(command.getExecutable());
            commandArgs.addAll(jobRequest.getCommandArgs());

            //TODO: Set the default job location as a server property?
            final JobSpecification jobSpecification = new JobSpecification(commandArgs,
                    new JobSpecification.ExecutionResource(id, jobRequest.getResources()),
                    new JobSpecification.ExecutionResource(cluster.getId(), cluster.getResources()),
                    new JobSpecification.ExecutionResource(command.getId(), command.getResources()),
                    applicationResources, this.generateEnvironmentVariables(id, jobRequest, cluster, command),
                    jobRequest.getRequestedAgentConfig().isInteractive(),
                    jobRequest.getRequestedAgentConfig().getRequestedJobDirectoryLocation()
                            .orElse(DEFAULT_JOB_DIRECTORY),
                    toArchiveLocation(jobRequest.getRequestedJobArchivalData().getRequestedArchiveLocationPrefix()
                            .orElse(null), id));

            MetricsUtils.addSuccessTags(tags);
            return jobSpecification;
        } catch (final Throwable t) {
            MetricsUtils.addFailureTagsWithException(tags, t);
            throw new RuntimeException(t);
        } finally {
            this.registry.timer(RESOLVE_JOB_SPECIFICATION_TIMER, tags).record(System.nanoTime() - start,
                    TimeUnit.NANOSECONDS);
        }
    }

    private Map<Cluster, String> queryForClustersAndCommands(final List<Criterion> clusterCriteria,
            final Criterion commandCriterion) throws GenieException {
        final long start = System.nanoTime();
        final Set<Tag> tags = Sets.newHashSet();
        try {
            final Map<Cluster, String> clustersAndCommands = this.clusterPersistenceService
                    .findClustersAndCommandsForCriteria(clusterCriteria, commandCriterion);
            MetricsUtils.addSuccessTags(tags);
            return clustersAndCommands;
        } catch (final Throwable t) {
            MetricsUtils.addFailureTagsWithException(tags, t);
            throw t;
        } finally {
            this.registry.timer(CLUSTER_COMMAND_QUERY_TIMER_NAME, tags).record(System.nanoTime() - start,
                    TimeUnit.NANOSECONDS);
        }
    }

    private Cluster selectCluster(final String id, final JobRequest jobRequest, final Set<Cluster> clusters)
            throws GenieException {
        final long start = System.nanoTime();
        final Set<Tag> timerTags = Sets.newHashSet();
        final Set<Tag> counterTags = Sets.newHashSet();
        try {
            final Cluster cluster;
            if (clusters.isEmpty()) {
                this.noClusterFoundCounter.increment();
                throw new GeniePreconditionException(
                        "No cluster/command combination found for the given criteria. Unable to continue");
            } else if (clusters.size() == 1) {
                cluster = clusters.stream().findFirst()
                        .orElseThrow(() -> new GenieServerException("Couldn't get cluster when size was one"));
            } else {
                cluster = this.selectClusterWithLoadBalancer(counterTags, clusters, id, jobRequest);
            }

            log.info("Selected cluster {} for job {}", cluster.getId(), id);
            MetricsUtils.addSuccessTags(timerTags);
            return cluster;
        } catch (final Throwable t) {
            MetricsUtils.addFailureTagsWithException(timerTags, t);
            throw t;
        } finally {
            this.registry.timer(SELECT_CLUSTER_TIMER_NAME, timerTags).record(System.nanoTime() - start,
                    TimeUnit.NANOSECONDS);
        }

    }

    private Command getCommand(final String commandId, final String jobId) throws GenieException {
        final long start = System.nanoTime();
        final Set<Tag> tags = Sets.newHashSet();
        try {
            log.info("Selecting command for job {} ", jobId);
            final Command command = this.commandPersistenceService.getCommand(commandId);
            log.info("Selected command {} for job {} ", commandId, jobId);
            MetricsUtils.addSuccessTags(tags);
            return command;
        } catch (final Throwable t) {
            MetricsUtils.addFailureTagsWithException(tags, t);
            throw t;
        } finally {
            this.registry.timer(SELECT_COMMAND_TIMER_NAME, tags).record(System.nanoTime() - start,
                    TimeUnit.NANOSECONDS);
        }
    }

    private List<Application> getApplications(final String id, final JobRequest jobRequest, final Command command)
            throws GenieException {
        final long start = System.nanoTime();
        final Set<Tag> tags = Sets.newHashSet();
        try {
            final String commandId = command.getId();
            log.info("Selecting applications for job {} and command {}", id, commandId);
            // TODO: What do we do about application status? Should probably check here
            final List<Application> applications = Lists.newArrayList();
            if (jobRequest.getCriteria().getApplicationIds().isEmpty()) {
                applications.addAll(this.commandPersistenceService.getApplicationsForCommand(commandId));
            } else {
                for (final String applicationId : jobRequest.getCriteria().getApplicationIds()) {
                    applications.add(this.applicationPersistenceService.getApplication(applicationId));
                }
            }
            log.info("Selected applications {} for job {}", applications.stream().map(Application::getId)
                    .reduce((one, two) -> one + "," + two).orElse(NO_ID_FOUND), id);
            MetricsUtils.addSuccessTags(tags);
            return applications;
        } catch (final Throwable t) {
            MetricsUtils.addFailureTagsWithException(tags, t);
            throw t;
        } finally {
            this.registry.timer(SELECT_APPLICATIONS_TIMER_NAME, tags).record(System.nanoTime() - start,
                    TimeUnit.NANOSECONDS);
        }
    }

    private Cluster selectClusterWithLoadBalancer(final Set<Tag> counterTags, final Set<Cluster> clusters,
            final String id, final JobRequest jobRequest) throws GeniePreconditionException {
        Cluster cluster = null;
        for (final ClusterLoadBalancer loadBalancer : this.clusterLoadBalancers) {
            final String loadBalancerClass;
            if (loadBalancer instanceof TargetClassAware) {
                final Class<?> targetClass = ((TargetClassAware) loadBalancer).getTargetClass();
                if (targetClass != null) {
                    loadBalancerClass = targetClass.getCanonicalName();
                } else {
                    loadBalancerClass = loadBalancer.getClass().getCanonicalName();
                }
            } else {
                loadBalancerClass = loadBalancer.getClass().getCanonicalName();
            }
            counterTags.add(Tag.of(MetricsConstants.TagKeys.CLASS_NAME, loadBalancerClass));
            try {
                final Cluster selectedCluster = loadBalancer.selectCluster(clusters,
                        this.toV3JobRequest(id, jobRequest));
                if (selectedCluster != null) {
                    // Make sure the cluster existed in the original list of clusters
                    if (clusters.contains(selectedCluster)) {
                        log.debug("Successfully selected cluster {} using load balancer {}",
                                selectedCluster.getId(), loadBalancerClass);
                        counterTags.add(Tag.of(MetricsConstants.TagKeys.STATUS, LOAD_BALANCER_STATUS_SUCCESS));
                        this.registry.counter(SELECT_LOAD_BALANCER_COUNTER_NAME, counterTags).increment();
                        cluster = selectedCluster;
                        break;
                    } else {
                        log.error(
                                "Successfully selected cluster {} using load balancer {} but it wasn't in original cluster "
                                        + "list {}",
                                selectedCluster.getId(), loadBalancerClass, clusters);
                        counterTags.add(Tag.of(MetricsConstants.TagKeys.STATUS, LOAD_BALANCER_STATUS_INVALID));
                        this.registry.counter(SELECT_LOAD_BALANCER_COUNTER_NAME, counterTags).increment();
                    }
                } else {
                    counterTags.add(Tag.of(MetricsConstants.TagKeys.STATUS, LOAD_BALANCER_STATUS_NO_PREFERENCE));
                    this.registry.counter(SELECT_LOAD_BALANCER_COUNTER_NAME, counterTags).increment();
                }
            } catch (final Exception e) {
                log.error("Cluster load balancer {} threw exception:", loadBalancer, e);
                counterTags.add(Tag.of(MetricsConstants.TagKeys.STATUS, LOAD_BALANCER_STATUS_EXCEPTION));
                this.registry.counter(SELECT_LOAD_BALANCER_COUNTER_NAME, counterTags).increment();
            }
        }

        // Make sure we selected a cluster
        if (cluster == null) {
            this.noClusterSelectedCounter.increment();
            throw new GeniePreconditionException(
                    "Unable to select a cluster from using any of the available load balancer's.");
        }

        return cluster;
    }

    private ImmutableMap<String, String> generateEnvironmentVariables(final String id, final JobRequest jobRequest,
            final Cluster cluster, final Command command) {
        final ImmutableMap.Builder<String, String> envVariables = ImmutableMap.builder();
        envVariables.put("GENIE_VERSION", "4");
        envVariables.put(JobConstants.GENIE_CLUSTER_ID_ENV_VAR, cluster.getId());
        envVariables.put(JobConstants.GENIE_CLUSTER_NAME_ENV_VAR, cluster.getMetadata().getName());
        envVariables.put(JobConstants.GENIE_CLUSTER_TAGS_ENV_VAR,
                this.tagsToString(cluster.getMetadata().getTags()));
        envVariables.put(JobConstants.GENIE_COMMAND_ID_ENV_VAR, command.getId());
        envVariables.put(JobConstants.GENIE_COMMAND_NAME_ENV_VAR, command.getMetadata().getName());
        envVariables.put(JobConstants.GENIE_COMMAND_TAGS_ENV_VAR,
                this.tagsToString(command.getMetadata().getTags()));
        envVariables.put(JobConstants.GENIE_JOB_ID_ENV_VAR, id);
        envVariables.put(JobConstants.GENIE_JOB_NAME_ENV_VAR, jobRequest.getMetadata().getName());
        envVariables.put(JobConstants.GENIE_JOB_MEMORY_ENV_VAR,
                String.valueOf(command.getMemory().orElse(this.defaultMemory)));
        envVariables.put(JobConstants.GENIE_JOB_TAGS_ENV_VAR,
                this.tagsToString(jobRequest.getMetadata().getTags()));
        envVariables.put(JobConstants.GENIE_JOB_GROUPING_ENV_VAR,
                jobRequest.getMetadata().getGrouping().orElse(""));
        envVariables.put(JobConstants.GENIE_JOB_GROUPING_INSTANCE_ENV_VAR,
                jobRequest.getMetadata().getGroupingInstance().orElse(""));
        envVariables.put(JobConstants.GENIE_REQUESTED_COMMAND_TAGS_ENV_VAR,
                this.tagsToString(jobRequest.getCriteria().getCommandCriterion().getTags()));
        final List<Criterion> clusterCriteria = jobRequest.getCriteria().getClusterCriteria();
        final List<String> clusterCriteriaTags = Lists.newArrayListWithExpectedSize(clusterCriteria.size());
        for (int i = 0; i < clusterCriteria.size(); i++) {
            final Criterion criterion = clusterCriteria.get(i);
            final String criteriaTagsString = this.tagsToString(criterion.getTags());
            envVariables.put(JobConstants.GENIE_REQUESTED_CLUSTER_TAGS_ENV_VAR + "_" + i, criteriaTagsString);
            clusterCriteriaTags.add("[" + criteriaTagsString + "]");
        }
        envVariables.put(JobConstants.GENIE_REQUESTED_CLUSTER_TAGS_ENV_VAR,
                "[" + StringUtils.join(clusterCriteriaTags, ',') + "]");
        return envVariables.build();
    }

    /**
     * Helper method to convert a v4 JobRequest to a v3 job request.
     *
     * @param jobRequest The v4 job request instance
     * @return The v3 job request instance
     */
    // TODO: This should be removed once we fully port rest of application to v4 and only have v3 interface with
    //       Adapters at API level
    private com.netflix.genie.common.dto.JobRequest toV3JobRequest(final String id, final JobRequest jobRequest) {
        final com.netflix.genie.common.dto.JobRequest.Builder v3Builder = new com.netflix.genie.common.dto.JobRequest.Builder(
                jobRequest.getMetadata().getName(), jobRequest.getMetadata().getUser(),
                jobRequest.getMetadata().getVersion(),
                jobRequest.getCriteria().getClusterCriteria().stream().map(this::toClusterCriteria).collect(
                        Collectors.toList()),
                this.toV3Tags(jobRequest.getCriteria().getCommandCriterion())).withId(id)
                        .withApplications(jobRequest.getCriteria().getApplicationIds())
                        .withCommandArgs(jobRequest.getCommandArgs())
                        .withDisableLogArchival(jobRequest.getRequestedAgentConfig().isArchivingDisabled())
                        .withTags(jobRequest.getMetadata().getTags());

        final JobMetadata metadata = jobRequest.getMetadata();
        metadata.getEmail().ifPresent(v3Builder::withEmail);
        metadata.getGroup().ifPresent(v3Builder::withGroup);
        metadata.getGrouping().ifPresent(v3Builder::withGrouping);
        metadata.getGroupingInstance().ifPresent(v3Builder::withGroupingInstance);
        metadata.getDescription().ifPresent(v3Builder::withDescription);
        metadata.getMetadata().ifPresent(v3Builder::withMetadata);

        final ExecutionEnvironment jobResources = jobRequest.getResources();
        v3Builder.withConfigs(jobResources.getConfigs());
        v3Builder.withDependencies(jobResources.getDependencies());
        jobResources.getSetupFile().ifPresent(v3Builder::withSetupFile);

        jobRequest.getRequestedAgentConfig().getTimeoutRequested().ifPresent(v3Builder::withTimeout);

        return v3Builder.build();
    }

    private ClusterCriteria toClusterCriteria(final Criterion criterion) {
        return new ClusterCriteria(this.toV3Tags(criterion));
    }

    private Set<String> toV3Tags(final Criterion criterion) {
        final Set<String> tags = Sets.newHashSet();
        criterion.getId().ifPresent(id -> tags.add("genie.id:" + id));
        criterion.getName().ifPresent(name -> tags.add("genie.name:" + name));
        tags.addAll(criterion.getTags());
        return tags;
    }

    /**
     * Helper to convert a set of tags into a string that is a suitable value for a shell environment variable.
     * Adds double quotes as necessary (i.e. in case of spaces, newlines), performs escaping of in-tag quotes.
     * Input tags are sorted to produce a deterministic output value.
     *
     * @param tags a set of tags or null
     * @return a CSV string
     */
    private String tagsToString(final Set<String> tags) {
        final List<String> sortedTags = Lists.newArrayList(tags);
        // Sort tags for the sake of determinism (e.g., tests)
        sortedTags.sort(Comparator.naturalOrder());
        final String joinedString = StringUtils.join(sortedTags, ',');
        // Escape quotes
        return RegExUtils.replaceAll(RegExUtils.replaceAll(joinedString, "\'", "\\\'"), "\"", "\\\"");
    }

    /**
     * Helper to convert archive location prefix to an archive location.
     *
     * @param requestedArchiveLocationPrefix archive location prefix uri
     * @param jobId                          job id
     * @return archive location for the job
     */
    private String toArchiveLocation(@Nullable final String requestedArchiveLocationPrefix, final String jobId) {
        if (StringUtils.isBlank(requestedArchiveLocationPrefix)) {
            return null;
        } else if (requestedArchiveLocationPrefix.endsWith(File.separator)) {
            return requestedArchiveLocationPrefix + jobId;
        } else {
            return requestedArchiveLocationPrefix + File.separator + jobId;
        }
    }
}