com.spotify.helios.testing.TemporaryJob.java Source code

Java tutorial

Introduction

Here is the source code for com.spotify.helios.testing.TemporaryJob.java

Source

/*
 * Copyright (c) 2014 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.helios.testing;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.Maps.newHashMap;
import static com.spotify.helios.testing.Jobs.TIMEOUT_MILLIS;
import static com.spotify.helios.testing.Jobs.get;
import static com.spotify.helios.testing.Jobs.getJobDescription;
import static java.lang.String.format;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.junit.Assert.fail;

import com.spotify.helios.client.HeliosClient;
import com.spotify.helios.common.descriptors.Deployment;
import com.spotify.helios.common.descriptors.Goal;
import com.spotify.helios.common.descriptors.HostStatus;
import com.spotify.helios.common.descriptors.Job;
import com.spotify.helios.common.descriptors.JobStatus;
import com.spotify.helios.common.descriptors.PortMapping;
import com.spotify.helios.common.descriptors.TaskStatus;
import com.spotify.helios.common.descriptors.ThrottleState;
import com.spotify.helios.common.protocol.CreateJobResponse;
import com.spotify.helios.common.protocol.JobDeployResponse;

import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.net.HostAndPort;
import com.google.common.util.concurrent.Futures;
import org.apache.commons.lang.text.StrSubstitutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;

public class TemporaryJob {

    private static final Logger log = LoggerFactory.getLogger(TemporaryJob.class);

    private final Map<String, TaskStatus> statuses = newHashMap();
    private final HeliosClient client;
    private final Prober prober;
    private final TemporaryJobReports.ReportWriter reportWriter;
    private final Job job;
    private final List<String> hosts;
    private final Map<String, String> hostToIp = newHashMap();
    private final Set<String> waitPorts;
    private final String jobDeployedMessageFormat;
    private final long deployTimeoutMillis;

    TemporaryJob(final HeliosClient client, final Prober prober,
            final TemporaryJobReports.ReportWriter reportWriter, final Job job, final List<String> hosts,
            final Set<String> waitPorts, final String jobDeployedMessageFormat, final long deployTimeoutMillis) {
        this.client = checkNotNull(client, "client");
        this.prober = checkNotNull(prober, "prober");
        this.reportWriter = checkNotNull(reportWriter, "reportWriter");
        this.job = checkNotNull(job, "job");
        this.hosts = ImmutableList.copyOf(checkNotNull(hosts, "hosts"));
        this.waitPorts = ImmutableSet.copyOf(checkNotNull(waitPorts, "waitPorts"));
        this.jobDeployedMessageFormat = Optional.fromNullable(jobDeployedMessageFormat).or("");
        this.deployTimeoutMillis = deployTimeoutMillis;
    }

    public Job job() {
        return job;
    }

    public List<String> hosts() {
        return hosts;
    }

    public Map<String, TaskStatus> statuses() {
        return ImmutableMap.copyOf(statuses);
    }

    /**
     * Returns the port that a job can be reached at given the host and name of registered port.
     * This is useful to discover the value of a dynamically allocated port.
     * @param host the host where the job is deployed
     * @param port the name of the registered port
     * @return the port where the job can be reached, or null if the host or port name is not found
     */
    public Integer port(final String host, final String port) {
        checkArgument(hosts.contains(host), "host %s not found", host);
        checkArgument(job.getPorts().containsKey(port), "port %s not found", port);
        final TaskStatus status = statuses.get(host);
        if (status == null) {
            return null;
        }
        final PortMapping portMapping = status.getPorts().get(port);
        if (portMapping == null) {
            return null;
        }
        return portMapping.getExternalPort();
    }

    /**
     * Returns a {@link com.google.common.net.HostAndPort} for a registered port. This is useful
     * for discovering the value of dynamically allocated ports. This method should only be called
     * when the job has been deployed to a single host. If the job has been deployed to multiple
     * hosts an AssertionError will be thrown indicating that the {@link #addresses(String)} method
     * should must  called instead.
     * @param port the name of the registered port
     * @return a HostAndPort describing where the registered port can be reached. Null if
     * no ports have been registered.
     * @throws java.lang.AssertionError if the job has been deployed to more than one host
     */
    public HostAndPort address(final String port) {
        final List<HostAndPort> addresses = addresses(port);

        if (addresses.size() > 1) {
            throw new AssertionError("Job has been deployed to multiple hosts, use addresses method instead");
        }

        return addresses.get(0);
    }

    /**
     * Returns a {@link com.google.common.net.HostAndPort} object for a registered port, for each
     * host the job has been deployed to. This is useful for discovering the value of dynamically
     * allocated ports.
     * @param port the name of the registered port
     * @return a HostAndPort describing where the registered port can be reached. Null if
     * no ports have been registered.
     */
    public List<HostAndPort> addresses(final String port) {
        checkArgument(job.getPorts().containsKey(port), "port %s not found", port);
        final List<HostAndPort> addresses = Lists.newArrayList();
        for (final Map.Entry<String, TaskStatus> entry : statuses.entrySet()) {
            final Integer externalPort = entry.getValue().getPorts().get(port).getExternalPort();
            assert externalPort != null;
            final String host = endpointFromHost(entry.getKey());
            addresses.add(HostAndPort.fromParts(host, externalPort));
        }
        return addresses;
    }

    void deploy() {
        final TemporaryJobReports.Step createJob = reportWriter.step("create job").tag("jobId", job.getId());
        try {
            // Create job
            log.info("Creating job {}", job.getId().toShortString());
            final CreateJobResponse createResponse = get(client.createJob(job));
            if (createResponse.getStatus() != CreateJobResponse.Status.OK) {
                fail(format("Failed to create job %s - %s", job.getId(), createResponse.toString()));
            }

            createJob.markSuccess();
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            fail(format("Failed to create job %s %s - %s", job.getId(), job.toString(), e));
        } finally {
            createJob.finish();
        }

        final TemporaryJobReports.Step deployJob = reportWriter.step("deploy job").tag("jobId", job.getId());
        try {
            // Deploy job
            final Deployment deployment = Deployment.of(job.getId(), Goal.START);
            for (final String host : hosts) {
                // HELIOS_HOST_ADDRESS is the IP address we should use to reach the host, instead of
                // the hostname. This is used when running a helios cluster inside a VM, and the containers
                // can be reached by IP address only, since DNS won't be able to resolve the host name of
                // the helios agent running in the VM.
                final HostStatus hostStatus = client.hostStatus(host).get();
                final String hostAddress = hostStatus.getEnvironment().get("HELIOS_HOST_ADDRESS");
                if (hostAddress != null) {
                    hostToIp.put(host, hostAddress);
                }

                log.info("Deploying {} to {}", getJobDescription(job), host);
                final JobDeployResponse deployResponse = get(client.deploy(deployment, host));
                if (deployResponse.getStatus() != JobDeployResponse.Status.OK) {
                    fail(format("Failed to deploy job %s %s - %s", job.getId(), job.toString(), deployResponse));
                }
            }

            deployJob.markSuccess();
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            fail(format("Failed to deploy job %s %s - %s", job.getId(), job.toString(), e));
        } finally {
            deployJob.finish();
        }

        try {
            // Wait for job to come up
            for (final String host : hosts) {
                awaitUp(host);
            }
        } catch (TimeoutException e) {
            fail(format("Failed while probing job %s %s - %s", job.getId(), job.toString(), e));
        }
    }

    void undeploy(final List<AssertionError> errors) {
        Jobs.undeploy(client, job, hosts, errors);
    }

    /**
     * Undeploys and removes this TemporaryJob from the Helios cluster. This is normally done
     * automatically by TemporaryJobs at the end of the test run. Use this method if you need to
     * manually undeploy a job prior to the end of the test run.
     */
    public void undeploy() {
        final List<AssertionError> errors = Lists.newArrayList();
        undeploy(errors);

        if (errors.size() > 0) {
            fail(format("Failed to undeploy job %s - %s", getJobDescription(job), errors.get(0)));
        }
    }

    private void awaitUp(final String host) throws TimeoutException {
        final TemporaryJobReports.Step startContainer = reportWriter.step("start container")
                .tag("jobId", job.getId()).tag("host", host).tag("image", job.getImage());
        try {
            final AtomicBoolean messagePrinted = new AtomicBoolean(false);
            final TaskStatus status = Polling.awaitUnchecked(deployTimeoutMillis, MILLISECONDS,
                    job.getId() + " was not up within %d %s", new Callable<TaskStatus>() {
                        @Override
                        public TaskStatus call() throws Exception {
                            final JobStatus status = Futures.getUnchecked(client.jobStatus(job.getId()));
                            if (status == null) {
                                log.debug("Job status not available");
                                return null;
                            }
                            final TaskStatus taskStatus = status.getTaskStatuses().get(host);
                            if (taskStatus == null) {
                                log.debug("Task status not available on {}", host);
                                return null;
                            }

                            if (!messagePrinted.get() && !isNullOrEmpty(jobDeployedMessageFormat)
                                    && !isNullOrEmpty(taskStatus.getContainerId())) {
                                outputDeployedMessage(host, taskStatus.getContainerId());
                                messagePrinted.set(true);
                            }

                            verifyHealthy(host, taskStatus);

                            final TaskStatus.State state = taskStatus.getState();
                            log.info("Job state of {}: {}", job.getImage(), state);

                            if (state == TaskStatus.State.RUNNING) {
                                return taskStatus;
                            }

                            return null;
                        }
                    });

            statuses.put(host, status);

            startContainer.markSuccess();
        } finally {
            startContainer.finish();
        }

        final TemporaryJobReports.Step probe = reportWriter.step("probe").tag("jobId", job.getId()).tag("host",
                host);
        try {
            for (final String port : waitPorts) {
                awaitPort(port, host);
            }

            probe.markSuccess();
        } finally {
            probe.finish();
        }
    }

    void verifyHealthy() throws AssertionError {
        log.debug("Checking health of {}", job.getImage());
        final JobStatus status = Futures.getUnchecked(client.jobStatus(job.getId()));
        if (status == null) {
            return;
        }
        for (final Map.Entry<String, TaskStatus> entry : status.getTaskStatuses().entrySet()) {
            verifyHealthy(entry.getKey(), entry.getValue());
        }
    }

    private void verifyHealthy(final String host, final TaskStatus status) {
        log.debug("Checking health of {} on {}", job.getImage(), host);
        final TaskStatus.State state = status.getState();
        if (state == TaskStatus.State.FAILED || state == TaskStatus.State.EXITED
                || state == TaskStatus.State.STOPPED) {
            // Throw exception which should stop the test dead in it's tracks
            String stateString = state.toString();
            if (status.getThrottled() != ThrottleState.NO) {
                stateString += format("(%s)", status.getThrottled());
            }
            throw new AssertionError(format(
                    "Unexpected job state %s for job %s with image %s on host %s. Check helios agent "
                            + "logs for details. If you're using HeliosSoloDeployment, set "
                            + "`HeliosSoloDeployment.fromEnv().removeHeliosSoloOnExit(false)` and check the"
                            + "logs of the helios-solo container with `docker logs <container ID>`.",
                    stateString, job.getId().toShortString(), job.getImage(), host));
        }
    }

    private void awaitPort(final String port, final String host) throws TimeoutException {
        final String endpoint = endpointFromHost(host);
        final TaskStatus taskStatus = statuses.get(host);
        assert taskStatus != null;
        final PortMapping portMapping = taskStatus.getPorts().get(port);
        final Integer externalPort = portMapping.getExternalPort();
        assert externalPort != null;
        Polling.awaitUnchecked(TIMEOUT_MILLIS, MILLISECONDS,
                "Unable to connect to port " + port + " on host " + host + " within %d %s",
                new Callable<Boolean>() {
                    @Override
                    public Boolean call() throws Exception {
                        log.info("Probing: {} @ {}:{}", port, endpoint, portMapping);
                        final boolean up = prober.probe(endpoint, portMapping);
                        if (up) {
                            log.info("Up: {} @ {}:{}", port, endpoint, externalPort);
                            return true;
                        } else {
                            return null;
                        }
                    }
                });
    }

    /**
     * Returns the ip address mapped to the given hostname. If no mapping exists, the hostname is
     * returned.
     * @param host the hostname to look up
     * @return The host's ip address if one exists, otherwise the hostname which was passed in.
     */
    private String endpointFromHost(String host) {
        final String ip = hostToIp.get(host);
        return ip == null ? host : ip;
    }

    private void outputDeployedMessage(final String host, final String containerId) {
        final StrSubstitutor subst = new StrSubstitutor(new ImmutableMap.Builder<String, Object>().put("host", host)
                .put("name", job.getId().getName()).put("version", job.getId().getVersion())
                .put("hash", job.getId().getHash()).put("job", job.toString()).put("image", job.getImage())
                .put("containerId", containerId).build());
        log.info("{}", subst.replace(jobDeployedMessageFormat));
    }
}