Java tutorial
/* * Copyright (c) 2015 Spotify AB. * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.spotify.heroic.cluster; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.spotify.heroic.HeroicConfiguration; import com.spotify.heroic.HeroicContext; import com.spotify.heroic.common.OptionalLimit; import com.spotify.heroic.lifecycle.LifeCycleRegistry; import com.spotify.heroic.lifecycle.LifeCycles; import com.spotify.heroic.metric.QueryTrace; import com.spotify.heroic.scheduler.Scheduler; import eu.toolchain.async.AsyncFramework; import eu.toolchain.async.AsyncFuture; import eu.toolchain.async.LazyTransform; import eu.toolchain.async.Transform; import lombok.Data; import lombok.ToString; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Pair; import javax.inject.Inject; import javax.inject.Named; import java.net.URI; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.stream.Collectors; /** * Handles management of cluster state. * <p> * The primary responsibility is to receive refresh requests through {@link #refresh()} that should * cause the cluster state to be updated. * * @author udoprog */ @ClusterScope @Slf4j @ToString(of = { "useLocal" }) public class CoreClusterManager implements ClusterManager, LifeCycles { public static final QueryTrace.Identifier LOCAL_IDENTIFIER = new QueryTrace.Identifier("[local]"); private final AsyncFramework async; private final ClusterDiscovery discovery; private final NodeMetadata localMetadata; private final Map<String, RpcProtocol> protocols; private final Scheduler scheduler; private final Boolean useLocal; private final HeroicConfiguration options; private final LocalClusterNode local; private final HeroicContext context; final AtomicReference<Set<URI>> staticNodes = new AtomicReference<>(new HashSet<>()); final AtomicReference<NodeRegistry> registry = new AtomicReference<>(); final AtomicReference<Map<URI, ClusterNode>> clients = new AtomicReference<>(Collections.emptyMap()); final AtomicLong refreshId = new AtomicLong(); @Inject public CoreClusterManager(AsyncFramework async, ClusterDiscovery discovery, NodeMetadata localMetadata, Map<String, RpcProtocol> protocols, Scheduler scheduler, @Named("useLocal") Boolean useLocal, HeroicConfiguration options, LocalClusterNode local, HeroicContext context) { this.async = async; this.discovery = discovery; this.localMetadata = localMetadata; this.protocols = protocols; this.scheduler = scheduler; this.useLocal = useLocal; this.options = options; this.local = local; this.context = context; } @Override public void register(LifeCycleRegistry registry) { registry.start(this::start); registry.stop(this::stop); } @Override public AsyncFuture<Set<URI>> getStaticNodes() { return async.resolved(staticNodes.get()); } @Override public AsyncFuture<Void> removeStaticNode(URI node) { while (true) { final Set<URI> old = staticNodes.get(); final Set<URI> update = new HashSet<>(staticNodes.get()); /* node already registered */ if (!update.remove(node)) { return async.resolved(); } if (staticNodes.compareAndSet(old, update)) { break; } } return refresh(); } @Override public AsyncFuture<Void> addStaticNode(URI node) { while (true) { final Set<URI> old = staticNodes.get(); final Set<URI> update = new HashSet<>(staticNodes.get()); /* node already registered */ if (!update.add(node)) { return async.resolved(); } if (staticNodes.compareAndSet(old, update)) { break; } } return refresh(); } @Override public List<ClusterNode> getNodes() { final NodeRegistry registry = this.registry.get(); if (registry == null) { throw new IllegalStateException("Registry not ready"); } return registry.getEntries(); } /** * Perform a refresh of the cluster information. * <p> * A refresh happens in four steps. * <ol> * <li>discovery</li> * <li>sweep</li> * <li>log and prepare</li> * <li>finalize</li> * </ol> * </p> * <p> * The discovery phase adds a collection of URIs provided statically (by {@link #staticNodes} * and dynamically (by {@link #discovery}) to be fed into the sweep step. * </p> * <p> * The sweep step takes the existing {@link #clients} map and compares it to the updated list of * URIs. * </p> * <p> * The log and prepare step logs information about which operations happened, closes any clients * that should be closed and sets up for the final step. * </p> * <p> * The finalize step takes the collection of new clients and node entries, replaces it * atomically with the old collection. Finally, if there is a race another refresh operation * will be issued. * </p> * * @return a future indicating the state of the refresh. */ @Override public AsyncFuture<Void> refresh() { final String id = String.format("%08x", refreshId.getAndIncrement()); log.info("new refresh with id ({})", id); return refreshDiscovery(id); } @Override public ClusterManager.Statistics getStatistics() { final NodeRegistry registry = this.registry.get(); if (registry == null) { return null; } return new ClusterManager.Statistics(registry.getOnlineNodes(), registry.getOfflineNodes()); } @Override public List<ClusterShard> useOptionalGroup(final Optional<String> group) { final ImmutableList.Builder<ClusterShard> shards = ImmutableList.builder(); for (final Pair<Map<String, String>, List<ClusterNode>> e : findFromAllShards()) { shards.add(new ClusterShard(async, e.getKey(), ImmutableList.copyOf(e.getValue().stream().map(c -> c.useOptionalGroup(group)).iterator()))); } return shards.build(); } @Override public Set<RpcProtocol> protocols() { return ImmutableSet.copyOf(protocols.values()); } AsyncFuture<Void> start() { final AsyncFuture<Void> startup; if (!options.isOneshot()) { startup = context.startedFuture().directTransform(result -> { scheduler.periodically("cluster-refresh", 1, TimeUnit.MINUTES, () -> refresh().get()); return null; }); } else { startup = context.startedFuture(); } startup.lazyTransform(result -> refresh().catchFailed((Throwable e) -> { log.error("initial metadata refresh failed", e); return null; })); return async.resolved(); } AsyncFuture<Void> stop() { final Map<URI, ClusterNode> clients = this.clients.getAndSet(null); if (clients == null) { return async.resolved(); } return async .collectAndDiscard(clients.values().stream().map(ClusterNode::close).collect(Collectors.toList())); } List<Pair<Map<String, String>, List<ClusterNode>>> findFromAllShards() { final NodeRegistry registry = this.registry.get(); if (registry == null) { throw new IllegalStateException("Registry not ready"); } return registry.findFromAllShards(OptionalLimit.empty()); } Set<Map<String, String>> extractKnownShards(Set<ClusterNode> entries) { final Set<Map<String, String>> knownShards = new HashSet<>(); for (final ClusterNode e : entries) { knownShards.add(e.metadata().getTags()); } return knownShards; } AsyncFuture<Update> createClusterNode(final String id, final URI uri) { final RpcProtocol protocol = protocols.get(uri.getScheme()); if (protocol == null) { return async.resolved(new FailedUpdate(uri, new IllegalArgumentException("Unsupported protocol (" + uri.getScheme() + ")"))); } return protocol.connect(uri).<Update>lazyTransform(node -> { if (useLocal && localMetadata.getId().equals(node.metadata().getId())) { log.info("{} using local instead of {} (closing old node)", id, node); final TracingClusterNode tracingNode = new TracingClusterNode(local, new QueryTrace.Identifier(uri.toString() + "[local]")); // close old node return node.close().directTransform(v -> new SuccessfulUpdate(uri, true, tracingNode)); } return async.resolved(new SuccessfulUpdate(uri, true, new TracingClusterNode(node, new QueryTrace.Identifier(uri.toString())))); }).catchFailed(Update.error(uri)); } /** * The first step of the refresh operation. * <p> * Discover a new collection of heroic peers, and feed them into the sweep step. * * @param id id of the operation * @return a future indicating when the operation is finished */ AsyncFuture<Void> refreshDiscovery(final String id) { final List<AsyncFuture<List<URI>>> dynamic = new ArrayList<>(); final List<URI> staticNodes = new ArrayList<>(this.staticNodes.get()); if (!staticNodes.isEmpty()) { dynamic.add(async.resolved(staticNodes)); } dynamic.add(discovery.find()); return async.collect(dynamic).lazyTransform(refreshSweep(id)); } /** * Operation that takes the existing list of clients, compares it to a collection of resolved * URIs and determines which nodes should be updated, and which should be removed. * * @param id id of the operation * @return a lazy transform */ LazyTransform<Collection<List<URI>>, Void> refreshSweep(final String id) { return uriLists -> { final List<URI> uris = ImmutableList.copyOf(Iterables.concat(uriLists)); final List<AsyncFuture<Update>> updated = new ArrayList<>(); final List<RemovedNode> removedNodes = new ArrayList<>(); final Map<URI, ClusterNode> oldClients = this.clients.get(); if (oldClients == null) { log.warn("{}: Aborting refresh, shutting down", id); return async.resolved(); } final Set<URI> removedUris = new HashSet<>(oldClients.keySet()); for (final URI uri : uris) { final ClusterNode node = oldClients.get(uri); removedUris.remove(uri); if (node == null) { /* first time URI has been seen, resolve new node */ updated.add(createClusterNode(id, uri)); continue; } /* re-query metadata for nodes already known and make sure it matches. * if it does not match, create a new cluster node and close the old one. * otherwise, re-use the existing node */ updated.add(node.fetchMetadata().lazyTransform(m -> { if (!node.metadata().equals(m)) { /* add to removedNodes list to make sure it is being closed */ removedNodes.add(new RemovedNode(uri, node)); return createClusterNode(id, uri); } return async.resolved(new SuccessfulUpdate(uri, false, node)); }).catchFailed(Update.error(uri))); } /* all the nodes that have not been seen in the updates list of uris have been removed * and should be closed */ for (final URI uri : removedUris) { final ClusterNode remove = oldClients.get(uri); if (remove != null) { removedNodes.add(new RemovedNode(uri, remove)); } } return async.collect(updated).lazyTransform(refreshLogAndPrepare(id, removedNodes, oldClients)); }; } /** * Operation the logs all intended operations and prepares for the final step. * * @param id id of the refresh operation * @param removed clients which should be removed * @param oldClients map of clients that should be replaced by a new map of clients * @return a lazy transform */ LazyTransform<Collection<Update>, Void> refreshLogAndPrepare(final String id, final List<RemovedNode> removed, final Map<URI, ClusterNode> oldClients) { return updates -> { final Set<ClusterNode> entries = new HashSet<>(); final List<SuccessfulUpdate> ok = new ArrayList<>(); final List<AsyncFuture<Void>> removals = new ArrayList<>(); final Map<URI, ClusterNode> newClients = new HashMap<>(); updates.forEach(update -> { update.handle(s -> { if (!s.isAdded()) { log.info("{} [new] {}", id, s.getUri()); } newClients.put(s.getUri(), s.getNode()); entries.add(s.getNode()); ok.add(s); }, error -> { log.error("{} [failed] {}", id, error.getUri(), error.getError()); }); }); removed.forEach(remove -> { log.error("{} [remove] {}", id, remove.getUri()); removals.add(remove.getNode().close()); }); if (entries.isEmpty() && useLocal) { log.info("{} [refresh] no nodes discovered, including local node", id); entries.add(new TracingClusterNode(local, LOCAL_IDENTIFIER)); } final Set<Map<String, String>> knownShards = extractKnownShards(entries); log.info("{} [update] {} {} result(s)", id, knownShards, entries.size()); /* shutdown removed node */ return async.collectAndDiscard(removals) .lazyTransform(refreshFinalize(id, oldClients, newClients, entries, ok)); }; } /** * Create a lazy transform that updates the local state of the registry, or attempts another * refresh if the local state has already been updated. * * @param id id of the operation * @param oldClients map of clients that should be updated from * @param newClients map of clients that should be updated to * @param entries entries to add to registry * @param ok list of successful updates * @return a lazy transform */ LazyTransform<Void, Void> refreshFinalize(final String id, final Map<URI, ClusterNode> oldClients, final Map<URI, ClusterNode> newClients, final Set<ClusterNode> entries, final List<SuccessfulUpdate> ok) { return v -> { if (this.clients.compareAndSet(oldClients, newClients)) { registry.getAndSet(new NodeRegistry(async, new ArrayList<>(entries), entries.size())); return async.resolved(); } log.warn("{} another refresh in progress, trying again", id); /* shutdown ok updates which are not part of the old collection */ final List<AsyncFuture<Void>> shutdown = ok.stream().filter(SuccessfulUpdate::isAdded) .map(s -> s.getNode().close()).collect(Collectors.toList()); return async.collectAndDiscard(shutdown).lazyTransform(v0 -> refreshDiscovery(id)); }; } /** * A container that contains information about a node update. */ interface Update { static Transform<Throwable, Update> error(final URI uri) { return e -> new FailedUpdate(uri, e); } /** * Handle the current update. * * @param successful Consumer for an successful update, will be called if the update is * successful * @param error Consumer for a failed update, will be called if the update is failed. */ void handle(final Consumer<SuccessfulUpdate> successful, final Consumer<FailedUpdate> error); } /** * A successful node update. */ @Data static class SuccessfulUpdate implements Update { /** * The URI that was updated. */ private final URI uri; /** * If the update is a new addition. */ private final boolean added; /** * The cluster node part of the update. */ private final ClusterNode node; @Override public void handle(final Consumer<SuccessfulUpdate> successful, final Consumer<FailedUpdate> error) { successful.accept(this); } } /** * A failed node update. */ @Data static class FailedUpdate implements Update { /** * URI of the node that failed to update. */ private final URI uri; /** * The error that caused the failure. */ private final Throwable error; @Override public void handle(final Consumer<SuccessfulUpdate> successful, final Consumer<FailedUpdate> error) { error.accept(this); } } /** * A single removed node. */ @Data static class RemovedNode { /** * The URI of the removed node. */ private final URI uri; /** * The cluster node that was removed. */ private final ClusterNode node; } }