Java tutorial
// Copyright 2017 Twitter. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.twitter.heron.scheduler.kubernetes; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.IntStream; import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.squareup.okhttp.Response; import com.twitter.heron.api.utils.TopologyUtils; import com.twitter.heron.scheduler.TopologyRuntimeManagementException; import com.twitter.heron.scheduler.TopologySubmissionException; import com.twitter.heron.scheduler.utils.Runtime; import com.twitter.heron.scheduler.utils.SchedulerUtils; import com.twitter.heron.scheduler.utils.SchedulerUtils.ExecutorPort; import com.twitter.heron.spi.common.Config; import com.twitter.heron.spi.packing.PackingPlan; import com.twitter.heron.spi.packing.Resource; import io.kubernetes.client.ApiClient; import io.kubernetes.client.ApiException; import io.kubernetes.client.apis.AppsV1beta1Api; import io.kubernetes.client.models.V1Container; import io.kubernetes.client.models.V1ContainerPort; import io.kubernetes.client.models.V1DeleteOptions; import io.kubernetes.client.models.V1EnvVar; import io.kubernetes.client.models.V1EnvVarSource; import io.kubernetes.client.models.V1LabelSelector; import io.kubernetes.client.models.V1ObjectFieldSelector; import io.kubernetes.client.models.V1ObjectMeta; import io.kubernetes.client.models.V1PodSpec; import io.kubernetes.client.models.V1PodTemplateSpec; import io.kubernetes.client.models.V1ResourceRequirements; import io.kubernetes.client.models.V1Toleration; import io.kubernetes.client.models.V1Volume; import io.kubernetes.client.models.V1VolumeMount; import io.kubernetes.client.models.V1beta1StatefulSet; import io.kubernetes.client.models.V1beta1StatefulSetSpec; public class AppsV1beta1Controller extends KubernetesController { private static final Logger LOG = Logger.getLogger(AppsV1beta1Controller.class.getName()); private static final String ENV_SHARD_ID = "SHARD_ID"; private final AppsV1beta1Api client; AppsV1beta1Controller(Config configuration, Config runtimeConfiguration) { super(configuration, runtimeConfiguration); final ApiClient apiClient = new ApiClient().setBasePath(getKubernetesUri()); client = new AppsV1beta1Api(apiClient); } @Override boolean submit(PackingPlan packingPlan) { final String topologyName = getTopologyName(); if (!topologyName.equals(topologyName.toLowerCase())) { throw new TopologySubmissionException("K8S scheduler does not allow upper case topologies."); } final Resource containerResource = getContainerResource(packingPlan); // find the max number of instances in a container so we can open // enough ports if remote debugging is enabled. int numberOfInstances = 0; for (PackingPlan.ContainerPlan containerPlan : packingPlan.getContainers()) { numberOfInstances = Math.max(numberOfInstances, containerPlan.getInstances().size()); } final V1beta1StatefulSet statefulSet = createStatefulSet(containerResource, numberOfInstances); try { final Response response = client .createNamespacedStatefulSetCall(getNamespace(), statefulSet, null, null, null).execute(); if (!response.isSuccessful()) { LOG.log(Level.SEVERE, "Error creating topology message: " + response.message()); KubernetesUtils.logResponseBodyIfPresent(LOG, response); // construct a message based on the k8s api server response throw new TopologySubmissionException(KubernetesUtils.errorMessageFromResponse(response)); } } catch (IOException | ApiException e) { KubernetesUtils.logExceptionWithDetails(LOG, "Error creating topology", e); throw new TopologySubmissionException(e.getMessage()); } return true; } @Override boolean killTopology() { return isStatefulSet() ? deleteStatefulSet() : new KubernetesCompat().killTopology(getKubernetesUri(), getTopologyName(), getNamespace()); } @Override boolean restart(int shardId) { final String message = "Restarting the whole topology is not supported yet. " + "Please kill and resubmit the topology."; LOG.log(Level.SEVERE, message); return false; } @Override public Set<PackingPlan.ContainerPlan> addContainers(Set<PackingPlan.ContainerPlan> containersToAdd) { final V1beta1StatefulSet statefulSet; try { statefulSet = getStatefulSet(); } catch (ApiException ae) { final String message = ae.getMessage() + "\ndetails:" + ae.getResponseBody(); throw new TopologyRuntimeManagementException(message, ae); } final int currentContainerCount = statefulSet.getSpec().getReplicas(); final int newContainerCount = currentContainerCount + containersToAdd.size(); final V1beta1StatefulSetSpec newSpec = new V1beta1StatefulSetSpec(); newSpec.setReplicas(newContainerCount); try { doPatch(newSpec); } catch (ApiException ae) { throw new TopologyRuntimeManagementException(ae.getMessage() + "\ndetails\n" + ae.getResponseBody()); } return containersToAdd; } @Override public void removeContainers(Set<PackingPlan.ContainerPlan> containersToRemove) { final V1beta1StatefulSet statefulSet; try { statefulSet = getStatefulSet(); } catch (ApiException ae) { final String message = ae.getMessage() + "\ndetails:" + ae.getResponseBody(); throw new TopologyRuntimeManagementException(message, ae); } final int currentContainerCount = statefulSet.getSpec().getReplicas(); final int newContainerCount = currentContainerCount - containersToRemove.size(); final V1beta1StatefulSetSpec newSpec = new V1beta1StatefulSetSpec(); newSpec.setReplicas(newContainerCount); try { doPatch(newSpec); } catch (ApiException e) { throw new TopologyRuntimeManagementException(e.getMessage() + "\ndetails\n" + e.getResponseBody()); } } private void doPatch(V1beta1StatefulSetSpec patchedSpec) throws ApiException { final String body = String.format(JSON_PATCH_STATEFUL_SET_REPLICAS_FORMAT, patchedSpec.getReplicas().toString()); final ArrayList<JsonObject> arr = new ArrayList<>(); arr.add(((JsonElement) deserialize(body, JsonElement.class)).getAsJsonObject()); LOG.fine("Update body: " + arr); client.patchNamespacedStatefulSet(getTopologyName(), getNamespace(), arr, null); } private static final String JSON_PATCH_STATEFUL_SET_REPLICAS_FORMAT = "{\"op\":\"replace\",\"path\":\"/spec/replicas\",\"value\":%s}"; private Object deserialize(String jsonStr, Class<?> targetClass) { return (new Gson()).fromJson(jsonStr, targetClass); } V1beta1StatefulSet getStatefulSet() throws ApiException { return client.readNamespacedStatefulSet(getTopologyName(), getNamespace(), null, null, null); } boolean deleteStatefulSet() { try { final V1DeleteOptions options = new V1DeleteOptions(); options.setGracePeriodSeconds(0L); options.setPropagationPolicy(KubernetesConstants.DELETE_OPTIONS_PROPAGATION_POLICY); final Response response = client.deleteNamespacedStatefulSetCall(getTopologyName(), getNamespace(), options, null, null, null, null, null, null).execute(); if (!response.isSuccessful()) { LOG.log(Level.SEVERE, "Error killing topology message: " + response.message()); KubernetesUtils.logResponseBodyIfPresent(LOG, response); throw new TopologyRuntimeManagementException(KubernetesUtils.errorMessageFromResponse(response)); } } catch (IOException | ApiException e) { KubernetesUtils.logExceptionWithDetails(LOG, "Error deleting topology", e); return false; } return true; } boolean isStatefulSet() { try { final Response response = client .readNamespacedStatefulSetCall(getTopologyName(), getNamespace(), null, null, null, null, null) .execute(); return response.isSuccessful(); } catch (IOException | ApiException e) { LOG.warning("isStatefulSet check " + e.getMessage()); } return false; } protected List<String> getExecutorCommand(String containerId) { final Map<ExecutorPort, String> ports = KubernetesConstants.EXECUTOR_PORTS.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().toString())); final Config configuration = getConfiguration(); final Config runtimeConfiguration = getRuntimeConfiguration(); final String[] executorCommand = SchedulerUtils.getExecutorCommand(configuration, runtimeConfiguration, containerId, ports); return Arrays.asList("sh", "-c", KubernetesUtils.getFetchCommand(configuration, runtimeConfiguration) + " && " + setShardIdEnvironmentVariableCommand() + " && " + String.join(" ", executorCommand)); } private static String setShardIdEnvironmentVariableCommand() { return String.format("%s=${POD_NAME##*-} && echo shardId=${%s}", ENV_SHARD_ID, ENV_SHARD_ID); } private V1beta1StatefulSet createStatefulSet(Resource containerResource, int numberOfInstances) { final String topologyName = getTopologyName(); final Config runtimeConfiguration = getRuntimeConfiguration(); final V1beta1StatefulSet statefulSet = new V1beta1StatefulSet(); // setup stateful set metadata final V1ObjectMeta objectMeta = new V1ObjectMeta(); objectMeta.name(topologyName); statefulSet.metadata(objectMeta); // create the stateful set spec final V1beta1StatefulSetSpec statefulSetSpec = new V1beta1StatefulSetSpec(); statefulSetSpec.serviceName(topologyName); statefulSetSpec.setReplicas(Runtime.numContainers(runtimeConfiguration).intValue()); // Parallel pod management tells the StatefulSet controller to launch or terminate // all Pods in parallel, and not to wait for Pods to become Running and Ready or completely // terminated prior to launching or terminating another Pod. statefulSetSpec.setPodManagementPolicy("Parallel"); // add selector match labels "app=heron" and "topology=topology-name" // so the we know which pods to manage final V1LabelSelector selector = new V1LabelSelector(); selector.matchLabels(getMatchLabels(topologyName)); statefulSetSpec.selector(selector); // create a pod template final V1PodTemplateSpec podTemplateSpec = new V1PodTemplateSpec(); // set up pod meta final V1ObjectMeta templateMetaData = new V1ObjectMeta().labels(getLabels(topologyName)); templateMetaData.annotations(getPrometheusAnnotations()); podTemplateSpec.setMetadata(templateMetaData); final List<String> command = getExecutorCommand("$" + ENV_SHARD_ID); podTemplateSpec.spec(getPodSpec(command, containerResource, numberOfInstances)); statefulSetSpec.setTemplate(podTemplateSpec); statefulSet.spec(statefulSetSpec); return statefulSet; } private Map<String, String> getPrometheusAnnotations() { final Map<String, String> annotations = new HashMap<>(); annotations.put(KubernetesConstants.ANNOTATION_PROMETHEUS_SCRAPE, "true"); annotations.put(KubernetesConstants.ANNOTATION_PROMETHEUS_PORT, KubernetesConstants.PROMETHEUS_PORT); return annotations; } private Map<String, String> getMatchLabels(String topologyName) { final Map<String, String> labels = new HashMap<>(); labels.put(KubernetesConstants.LABEL_APP, KubernetesConstants.LABEL_APP_VALUE); labels.put(KubernetesConstants.LABEL_TOPOLOGY, topologyName); return labels; } private Map<String, String> getLabels(String topologyName) { final Map<String, String> labels = new HashMap<>(); labels.put(KubernetesConstants.LABEL_APP, KubernetesConstants.LABEL_APP_VALUE); labels.put(KubernetesConstants.LABEL_TOPOLOGY, topologyName); return labels; } private V1PodSpec getPodSpec(List<String> executorCommand, Resource resource, int numberOfInstances) { final V1PodSpec podSpec = new V1PodSpec(); // set the termination period to 0 so pods can be deleted quickly podSpec.setTerminationGracePeriodSeconds(0L); // set the pod tolerations so pods are rescheduled when nodes go down // https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/#taint-based-evictions podSpec.setTolerations(getTolerations()); podSpec.containers(Collections.singletonList(getContainer(executorCommand, resource, numberOfInstances))); addVolumesIfPresent(podSpec); return podSpec; } private List<V1Toleration> getTolerations() { final List<V1Toleration> tolerations = new ArrayList<>(); KubernetesConstants.TOLERATIONS.forEach(t -> { final V1Toleration toleration = new V1Toleration().key(t).operator("Exists").effect("NoExecute") .tolerationSeconds(10L); tolerations.add(toleration); }); return tolerations; } private void addVolumesIfPresent(V1PodSpec spec) { final Config config = getConfiguration(); if (KubernetesContext.hasVolume(config)) { final V1Volume volume = Volumes.get().create(config); if (volume != null) { LOG.fine("Adding volume: " + volume.toString()); spec.volumes(Collections.singletonList(volume)); } } } private V1Container getContainer(List<String> executorCommand, Resource resource, int numberOfInstances) { final Config configuration = getConfiguration(); final V1Container container = new V1Container().name("executor"); // set up the container images container.setImage(KubernetesContext.getExecutorDockerImage(configuration)); // set up the container command container.setCommand(executorCommand); if (KubernetesContext.hasImagePullPolicy(configuration)) { container.setImagePullPolicy(KubernetesContext.getKubernetesImagePullPolicy(configuration)); } // setup the environment variables for the container final V1EnvVar envVarHost = new V1EnvVar(); envVarHost.name(KubernetesConstants.ENV_HOST).valueFrom( new V1EnvVarSource().fieldRef(new V1ObjectFieldSelector().fieldPath(KubernetesConstants.POD_IP))); final V1EnvVar envVarPodName = new V1EnvVar(); envVarPodName.name(KubernetesConstants.ENV_POD_NAME).valueFrom( new V1EnvVarSource().fieldRef(new V1ObjectFieldSelector().fieldPath(KubernetesConstants.POD_NAME))); container.setEnv(Arrays.asList(envVarHost, envVarPodName)); // set container resources final V1ResourceRequirements resourceRequirements = new V1ResourceRequirements(); final Map<String, String> requests = new HashMap<>(); requests.put(KubernetesConstants.MEMORY, KubernetesUtils.Megabytes(resource.getRam())); requests.put(KubernetesConstants.CPU, Double.toString(resource.getCpu())); resourceRequirements.setRequests(requests); container.setResources(resourceRequirements); // set container ports final boolean debuggingEnabled = TopologyUtils .getTopologyRemoteDebuggingEnabled(Runtime.topology(getRuntimeConfiguration())); container.setPorts(getContainerPorts(debuggingEnabled, numberOfInstances)); // setup volume mounts mountVolumeIfPresent(container); return container; } private List<V1ContainerPort> getContainerPorts(boolean remoteDebugEnabled, int numberOfInstances) { List<V1ContainerPort> ports = new ArrayList<>(); KubernetesConstants.EXECUTOR_PORTS.forEach((p, v) -> { final V1ContainerPort port = new V1ContainerPort(); port.setName(p.getName()); port.setContainerPort(v); ports.add(port); }); if (remoteDebugEnabled) { IntStream.range(0, numberOfInstances).forEach(i -> { final String portName = KubernetesConstants.JVM_REMOTE_DEBUGGER_PORT_NAME + "-" + String.valueOf(i); final V1ContainerPort port = new V1ContainerPort(); port.setName(portName); port.setContainerPort(KubernetesConstants.JVM_REMOTE_DEBUGGER_PORT + i); ports.add(port); }); } return ports; } private void mountVolumeIfPresent(V1Container container) { final Config config = getConfiguration(); if (KubernetesContext.hasContainerVolume(config)) { final V1VolumeMount mount = new V1VolumeMount().name(KubernetesContext.getContainerVolumeName(config)) .mountPath(KubernetesContext.getContainerVolumeMountPath(config)); container.volumeMounts(Collections.singletonList(mount)); } } }