org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nifi.cluster.coordination.node;

import org.apache.commons.collections4.queue.CircularFifoQueue;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.cluster.coordination.ClusterCoordinator;
import org.apache.nifi.cluster.coordination.flow.FlowElection;
import org.apache.nifi.cluster.coordination.http.HttpResponseMapper;
import org.apache.nifi.cluster.coordination.http.StandardHttpResponseMapper;
import org.apache.nifi.cluster.coordination.http.replication.RequestCompletionCallback;
import org.apache.nifi.cluster.event.Event;
import org.apache.nifi.cluster.event.NodeEvent;
import org.apache.nifi.cluster.exception.NoClusterCoordinatorException;
import org.apache.nifi.cluster.firewall.ClusterNodeFirewall;
import org.apache.nifi.cluster.manager.NodeResponse;
import org.apache.nifi.cluster.manager.exception.IllegalNodeDisconnectionException;
import org.apache.nifi.cluster.protocol.ComponentRevision;
import org.apache.nifi.cluster.protocol.ConnectionRequest;
import org.apache.nifi.cluster.protocol.ConnectionResponse;
import org.apache.nifi.cluster.protocol.DataFlow;
import org.apache.nifi.cluster.protocol.NodeIdentifier;
import org.apache.nifi.cluster.protocol.NodeProtocolSender;
import org.apache.nifi.cluster.protocol.ProtocolException;
import org.apache.nifi.cluster.protocol.ProtocolHandler;
import org.apache.nifi.cluster.protocol.StandardDataFlow;
import org.apache.nifi.cluster.protocol.impl.ClusterCoordinationProtocolSenderListener;
import org.apache.nifi.cluster.protocol.message.ClusterWorkloadRequestMessage;
import org.apache.nifi.cluster.protocol.message.ClusterWorkloadResponseMessage;
import org.apache.nifi.cluster.protocol.message.ConnectionRequestMessage;
import org.apache.nifi.cluster.protocol.message.ConnectionResponseMessage;
import org.apache.nifi.cluster.protocol.message.DisconnectMessage;
import org.apache.nifi.cluster.protocol.message.NodeConnectionStatusResponseMessage;
import org.apache.nifi.cluster.protocol.message.NodeStatusChangeMessage;
import org.apache.nifi.cluster.protocol.message.ProtocolMessage;
import org.apache.nifi.cluster.protocol.message.ProtocolMessage.MessageType;
import org.apache.nifi.cluster.protocol.message.ReconnectionRequestMessage;
import org.apache.nifi.controller.leader.election.LeaderElectionManager;
import org.apache.nifi.events.EventReporter;
import org.apache.nifi.reporting.Severity;
import org.apache.nifi.services.FlowService;
import org.apache.nifi.util.NiFiProperties;
import org.apache.nifi.web.revision.RevisionManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class NodeClusterCoordinator implements ClusterCoordinator, ProtocolHandler, RequestCompletionCallback {

    private static final Logger logger = LoggerFactory.getLogger(NodeClusterCoordinator.class);
    private static final String EVENT_CATEGORY = "Clustering";

    private static final Pattern COUNTER_URI_PATTERN = Pattern.compile("/nifi-api/counters/[a-f0-9\\-]{36}");

    private final String instanceId = UUID.randomUUID().toString();
    private volatile NodeIdentifier nodeId;

    private final ClusterCoordinationProtocolSenderListener senderListener;
    private final EventReporter eventReporter;
    private final ClusterNodeFirewall firewall;
    private final RevisionManager revisionManager;
    private final NiFiProperties nifiProperties;
    private final LeaderElectionManager leaderElectionManager;
    private final AtomicLong latestUpdateId = new AtomicLong(-1);
    private final FlowElection flowElection;
    private final NodeProtocolSender nodeProtocolSender;

    private volatile FlowService flowService;
    private volatile boolean connected;
    private volatile boolean closed = false;
    private volatile boolean requireElection = true;

    private final ConcurrentMap<NodeIdentifier, NodeConnectionStatus> nodeStatuses = new ConcurrentHashMap<>();
    private final ConcurrentMap<NodeIdentifier, CircularFifoQueue<NodeEvent>> nodeEvents = new ConcurrentHashMap<>();

    public NodeClusterCoordinator(final ClusterCoordinationProtocolSenderListener senderListener,
            final EventReporter eventReporter, final LeaderElectionManager leaderElectionManager,
            final FlowElection flowElection, final ClusterNodeFirewall firewall,
            final RevisionManager revisionManager, final NiFiProperties nifiProperties,
            final NodeProtocolSender nodeProtocolSender) {
        this.senderListener = senderListener;
        this.flowService = null;
        this.eventReporter = eventReporter;
        this.firewall = firewall;
        this.revisionManager = revisionManager;
        this.nifiProperties = nifiProperties;
        this.leaderElectionManager = leaderElectionManager;
        this.flowElection = flowElection;
        this.nodeProtocolSender = nodeProtocolSender;

        senderListener.addHandler(this);
    }

    @Override
    public void shutdown() {
        if (closed) {
            return;
        }

        closed = true;

        final NodeIdentifier localId = getLocalNodeIdentifier();
        if (localId != null) {
            final NodeConnectionStatus shutdownStatus = new NodeConnectionStatus(localId,
                    DisconnectionCode.NODE_SHUTDOWN);
            updateNodeStatus(shutdownStatus, false);
            logger.info("Successfully notified other nodes that I am shutting down");
        }
    }

    @Override
    public void setLocalNodeIdentifier(final NodeIdentifier nodeId) {
        this.nodeId = nodeId;
        nodeStatuses.computeIfAbsent(nodeId,
                id -> new NodeConnectionStatus(id, DisconnectionCode.NOT_YET_CONNECTED));
    }

    @Override
    public NodeIdentifier getLocalNodeIdentifier() {
        return nodeId;
    }

    private NodeIdentifier waitForElectedClusterCoordinator() {
        return waitForNodeIdentifier(() -> getElectedActiveCoordinatorNode(false));
    }

    private NodeIdentifier waitForNodeIdentifier(final Supplier<NodeIdentifier> fetchNodeId) {
        NodeIdentifier localNodeId = null;
        while (localNodeId == null) {
            localNodeId = fetchNodeId.get();
            if (localNodeId == null) {
                if (closed) {
                    return null;
                }

                try {
                    Thread.sleep(100L);
                } catch (final InterruptedException ie) {
                    Thread.currentThread().interrupt();
                    return null;
                }
            }
        }

        return localNodeId;
    }

    private String getElectedActiveCoordinatorAddress() throws IOException {
        return leaderElectionManager.getLeader(ClusterRoles.CLUSTER_COORDINATOR);
    }

    @Override
    public void resetNodeStatuses(final Map<NodeIdentifier, NodeConnectionStatus> statusMap) {
        logger.info("Resetting cluster node statuses from {} to {}", nodeStatuses, statusMap);

        // For each proposed replacement, update the nodeStatuses map if and only if the replacement
        // has a larger update id than the current value.
        for (final Map.Entry<NodeIdentifier, NodeConnectionStatus> entry : statusMap.entrySet()) {
            final NodeIdentifier nodeId = entry.getKey();
            final NodeConnectionStatus proposedStatus = entry.getValue();

            if (proposedStatus.getState() == NodeConnectionState.REMOVED) {
                nodeStatuses.remove(nodeId);
            } else {
                nodeStatuses.put(nodeId, proposedStatus);
            }
        }
    }

    @Override
    public boolean resetNodeStatus(final NodeConnectionStatus connectionStatus, final long qualifyingUpdateId) {
        final NodeIdentifier nodeId = connectionStatus.getNodeIdentifier();
        final NodeConnectionStatus currentStatus = getConnectionStatus(nodeId);

        if (currentStatus == null) {
            return replaceNodeStatus(nodeId, null, connectionStatus);
        } else if (currentStatus.getUpdateIdentifier() == qualifyingUpdateId) {
            return replaceNodeStatus(nodeId, currentStatus, connectionStatus);
        }

        // The update identifier is not the same. We will not replace the value
        return false;
    }

    /**
     * Attempts to update the nodeStatuses map by changing the value for the
     * given node id from the current status to the new status, as in
     * ConcurrentMap.replace(nodeId, currentStatus, newStatus) but with the
     * difference that this method can handle a <code>null</code> value for
     * currentStatus
     *
     * @param nodeId the node id
     * @param currentStatus the current status, or <code>null</code> if there is
     * no value currently
     * @param newStatus the new status to set
     * @return <code>true</code> if the map was updated, false otherwise
     */
    private boolean replaceNodeStatus(final NodeIdentifier nodeId, final NodeConnectionStatus currentStatus,
            final NodeConnectionStatus newStatus) {
        if (newStatus == null) {
            logger.error("Cannot change node status for {} from {} to {} because new status is null", nodeId,
                    currentStatus, newStatus);
            logger.error("", new NullPointerException());
        }

        if (currentStatus == null) {
            if (newStatus.getState() == NodeConnectionState.REMOVED) {
                return nodeStatuses.remove(nodeId, currentStatus);
            } else {
                final NodeConnectionStatus existingValue = nodeStatuses.putIfAbsent(nodeId, newStatus);
                return existingValue == null;
            }
        }

        if (newStatus.getState() == NodeConnectionState.REMOVED) {
            return nodeStatuses.remove(nodeId, currentStatus);
        } else {
            return nodeStatuses.replace(nodeId, currentStatus, newStatus);
        }
    }

    @Override
    public void requestNodeConnect(final NodeIdentifier nodeId, final String userDn) {
        if (requireElection && !flowElection.isElectionComplete() && flowElection.isVoteCounted(nodeId)) {
            // If we receive a heartbeat from a node that we already know, we don't want to request that it reconnect
            // to the cluster because no flow has yet been elected. However, if the node has not yet voted, we want to send
            // a reconnect request because we want this node to cast its vote for the flow, and this happens on connection
            logger.debug(
                    "Received heartbeat for {} and node is not connected. Will not request node connect to cluster, "
                            + "though, because the Flow Election is still in progress",
                    nodeId);
            return;
        }

        if (userDn == null) {
            reportEvent(nodeId, Severity.INFO, "Requesting that node connect to cluster");
        } else {
            reportEvent(nodeId, Severity.INFO, "Requesting that node connect to cluster on behalf of " + userDn);
        }

        updateNodeStatus(new NodeConnectionStatus(nodeId, NodeConnectionState.CONNECTING, null, null,
                System.currentTimeMillis()));

        // create the request
        final ReconnectionRequestMessage request = new ReconnectionRequestMessage();
        request.setNodeId(nodeId);
        request.setInstanceId(instanceId);

        // If we still are requiring that an election take place, we do not want to include our local dataflow, because we don't
        // yet know what the cluster's dataflow looks like. However, if we don't require election, then we've connected to the
        // cluster, which means that our flow is correct.
        final boolean includeDataFlow = !requireElection;
        requestReconnectionAsynchronously(request, 10, 5, includeDataFlow);
    }

    @Override
    public void finishNodeConnection(final NodeIdentifier nodeId) {
        final NodeConnectionState state = getConnectionState(nodeId);
        if (state == null) {
            logger.debug(
                    "Attempted to finish node connection for {} but node is not known. Requesting that node connect",
                    nodeId);
            requestNodeConnect(nodeId, null);
            return;
        }

        if (state == NodeConnectionState.CONNECTED) {
            // already connected. Nothing to do.
            return;
        }

        if (state == NodeConnectionState.DISCONNECTED || state == NodeConnectionState.DISCONNECTING) {
            logger.debug(
                    "Attempted to finish node connection for {} but node state was {}. Requesting that node connect",
                    nodeId, state);
            requestNodeConnect(nodeId, null);
            return;
        }

        logger.info("{} is now connected", nodeId);
        updateNodeStatus(new NodeConnectionStatus(nodeId, NodeConnectionState.CONNECTED));
    }

    @Override
    public void requestNodeDisconnect(final NodeIdentifier nodeId, final DisconnectionCode disconnectionCode,
            final String explanation) {
        final Set<NodeIdentifier> connectedNodeIds = getNodeIdentifiers(NodeConnectionState.CONNECTED);
        if (connectedNodeIds.size() == 1 && connectedNodeIds.contains(nodeId)) {
            throw new IllegalNodeDisconnectionException(
                    "Cannot disconnect node " + nodeId + " because it is the only node currently connected");
        }

        logger.info("Requesting that {} disconnect due to {}", nodeId,
                explanation == null ? disconnectionCode : explanation);

        updateNodeStatus(new NodeConnectionStatus(nodeId, disconnectionCode, explanation));

        // There is no need to tell the node that it's disconnected if it is due to being
        // shutdown, as we will not be able to connect to the node anyway.
        if (disconnectionCode == DisconnectionCode.NODE_SHUTDOWN) {
            return;
        }

        final DisconnectMessage request = new DisconnectMessage();
        request.setNodeId(nodeId);
        request.setExplanation(explanation);

        addNodeEvent(nodeId, "Disconnection requested due to " + explanation);
        disconnectAsynchronously(request, 10, 5);
    }

    @Override
    public void disconnectionRequestedByNode(final NodeIdentifier nodeId, final DisconnectionCode disconnectionCode,
            final String explanation) {
        logger.info("{} requested disconnection from cluster due to {}", nodeId,
                explanation == null ? disconnectionCode : explanation);
        updateNodeStatus(new NodeConnectionStatus(nodeId, disconnectionCode, explanation));

        final Severity severity;
        switch (disconnectionCode) {
        case STARTUP_FAILURE:
        case MISMATCHED_FLOWS:
        case UNKNOWN:
            severity = Severity.ERROR;
            break;
        case LACK_OF_HEARTBEAT:
            severity = Severity.WARNING;
            break;
        default:
            severity = Severity.INFO;
            break;
        }

        reportEvent(nodeId, severity, "Node disconnected from cluster due to " + explanation);
    }

    @Override
    public void removeNode(final NodeIdentifier nodeId, final String userDn) {
        reportEvent(nodeId, Severity.INFO, "User " + userDn + " requested that node be removed from cluster");
        nodeStatuses.remove(nodeId);
        nodeEvents.remove(nodeId);
        notifyOthersOfNodeStatusChange(new NodeConnectionStatus(nodeId, NodeConnectionState.REMOVED));
    }

    @Override
    public NodeConnectionStatus getConnectionStatus(final NodeIdentifier nodeId) {
        return nodeStatuses.get(nodeId);
    }

    private NodeConnectionState getConnectionState(final NodeIdentifier nodeId) {
        final NodeConnectionStatus status = getConnectionStatus(nodeId);
        return status == null ? null : status.getState();
    }

    @Override
    public List<NodeConnectionStatus> getConnectionStatuses() {
        return new ArrayList<>(nodeStatuses.values());
    }

    @Override
    public Map<NodeConnectionState, List<NodeIdentifier>> getConnectionStates() {
        final Map<NodeConnectionState, List<NodeIdentifier>> connectionStates = new HashMap<>();
        for (final Map.Entry<NodeIdentifier, NodeConnectionStatus> entry : nodeStatuses.entrySet()) {
            final NodeConnectionState state = entry.getValue().getState();
            final List<NodeIdentifier> nodeIds = connectionStates.computeIfAbsent(state,
                    s -> new ArrayList<NodeIdentifier>());
            nodeIds.add(entry.getKey());
        }

        return connectionStates;
    }

    @Override
    public boolean isBlockedByFirewall(final String hostname) {
        return firewall != null && !firewall.isPermissible(hostname);
    }

    @Override
    public void reportEvent(final NodeIdentifier nodeId, final Severity severity, final String event) {
        eventReporter.reportEvent(severity, EVENT_CATEGORY,
                nodeId == null ? event : "Event Reported for " + nodeId + " -- " + event);
        if (nodeId != null) {
            addNodeEvent(nodeId, severity, event);
        }

        final String message = nodeId == null ? event : "Event Reported for " + nodeId + " -- " + event;
        switch (severity) {
        case ERROR:
            logger.error(message);
            break;
        case WARNING:
            logger.warn(message);
            break;
        case INFO:
            logger.info(message);
            break;
        }
    }

    @Override
    public NodeIdentifier getNodeIdentifier(final String uuid) {
        for (final NodeIdentifier nodeId : nodeStatuses.keySet()) {
            if (nodeId.getId().equals(uuid)) {
                return nodeId;
            }
        }

        return null;
    }

    @Override
    public Set<NodeIdentifier> getNodeIdentifiers(final NodeConnectionState... states) {
        final Set<NodeConnectionState> statesOfInterest = new HashSet<>();
        if (states.length == 0) {
            for (final NodeConnectionState state : NodeConnectionState.values()) {
                statesOfInterest.add(state);
            }
        } else {
            for (final NodeConnectionState state : states) {
                statesOfInterest.add(state);
            }
        }

        return nodeStatuses.entrySet().stream()
                .filter(entry -> statesOfInterest.contains(entry.getValue().getState()))
                .map(entry -> entry.getKey()).collect(Collectors.toSet());
    }

    @Override
    public NodeIdentifier getPrimaryNode() {
        final String primaryNodeAddress = leaderElectionManager.getLeader(ClusterRoles.PRIMARY_NODE);
        if (primaryNodeAddress == null) {
            return null;
        }

        return nodeStatuses.keySet().stream().filter(
                nodeId -> primaryNodeAddress.equals(nodeId.getSocketAddress() + ":" + nodeId.getSocketPort()))
                .findFirst().orElse(null);
    }

    @Override
    public NodeIdentifier getElectedActiveCoordinatorNode() {
        return getElectedActiveCoordinatorNode(true);
    }

    private NodeIdentifier getElectedActiveCoordinatorNode(final boolean warnOnError) {
        final String electedNodeAddress;
        try {
            electedNodeAddress = getElectedActiveCoordinatorAddress();
        } catch (final NoClusterCoordinatorException ncce) {
            logger.debug("There is currently no elected active Cluster Coordinator");
            return null;
        } catch (final IOException ioe) {
            if (warnOnError) {
                logger.warn(
                        "Failed to determine which node is elected active Cluster Coordinator. There may be no coordinator currently: "
                                + ioe);
                if (logger.isDebugEnabled()) {
                    logger.warn("", ioe);
                }
            }

            return null;
        }

        if (electedNodeAddress == null) {
            logger.debug("There is currently no elected active Cluster Coordinator");
            return null;
        }

        final int colonLoc = electedNodeAddress.indexOf(':');
        if (colonLoc < 1) {
            if (warnOnError) {
                logger.warn(
                        "Failed to determine which node is elected active Cluster Coordinator: ZooKeeper reports the address as {}, but this is not a valid address",
                        electedNodeAddress);
            }

            return null;
        }

        final String electedNodeHostname = electedNodeAddress.substring(0, colonLoc);
        final String portString = electedNodeAddress.substring(colonLoc + 1);
        final int electedNodePort;
        try {
            electedNodePort = Integer.parseInt(portString);
        } catch (final NumberFormatException nfe) {
            if (warnOnError) {
                logger.warn(
                        "Failed to determine which node is elected active Cluster Coordinator: ZooKeeper reports the address as {}, but this is not a valid address",
                        electedNodeAddress);
            }

            return null;
        }

        final Set<NodeIdentifier> connectedNodeIds = getNodeIdentifiers();
        final NodeIdentifier electedNodeId = connectedNodeIds.stream()
                .filter(nodeId -> nodeId.getSocketAddress().equals(electedNodeHostname)
                        && nodeId.getSocketPort() == electedNodePort)
                .findFirst().orElse(null);

        if (electedNodeId == null && warnOnError) {
            logger.debug(
                    "Failed to determine which node is elected active Cluster Coordinator: ZooKeeper reports the address as {},"
                            + "but there is no node with this address. Will attempt to communicate with node to determine its information",
                    electedNodeAddress);

            try {
                final NodeConnectionStatus connectionStatus = senderListener
                        .requestNodeConnectionStatus(electedNodeHostname, electedNodePort);
                logger.debug("Received NodeConnectionStatus {}", connectionStatus);

                if (connectionStatus == null) {
                    return null;
                }

                final NodeConnectionStatus existingStatus = this.nodeStatuses
                        .putIfAbsent(connectionStatus.getNodeIdentifier(), connectionStatus);
                if (existingStatus == null) {
                    return connectionStatus.getNodeIdentifier();
                } else {
                    return existingStatus.getNodeIdentifier();
                }
            } catch (final Exception e) {
                logger.warn(
                        "Failed to determine which node is elected active Cluster Coordinator: ZooKeeper reports the address as {}, but there is no node with this address. "
                                + "Attempted to determine the node's information but failed to retrieve its information due to {}",
                        electedNodeAddress, e.toString());

                if (logger.isDebugEnabled()) {
                    logger.warn("", e);
                }
            }
        }

        return electedNodeId;
    }

    @Override
    public boolean isActiveClusterCoordinator() {
        final NodeIdentifier self = getLocalNodeIdentifier();
        return self != null && self.equals(getElectedActiveCoordinatorNode());
    }

    @Override
    public List<NodeEvent> getNodeEvents(final NodeIdentifier nodeId) {
        final CircularFifoQueue<NodeEvent> eventQueue = nodeEvents.get(nodeId);
        if (eventQueue == null) {
            return Collections.emptyList();
        }

        synchronized (eventQueue) {
            return new ArrayList<>(eventQueue);
        }
    }

    @Override
    public void setFlowService(final FlowService flowService) {
        if (this.flowService != null) {
            throw new IllegalStateException("Flow Service has already been set");
        }
        this.flowService = flowService;
    }

    private void addNodeEvent(final NodeIdentifier nodeId, final String event) {
        addNodeEvent(nodeId, Severity.INFO, event);
    }

    private void addNodeEvent(final NodeIdentifier nodeId, final Severity severity, final String message) {
        final NodeEvent event = new Event(nodeId.toString(), message, severity);
        final CircularFifoQueue<NodeEvent> eventQueue = nodeEvents.computeIfAbsent(nodeId,
                id -> new CircularFifoQueue<>());
        synchronized (eventQueue) {
            eventQueue.add(event);
        }
    }

    /**
     * Updates the status of the node with the given ID to the given status and
     * returns <code>true</code> if successful, <code>false</code> if no node
     * exists with the given ID
     *
     * @param status the new status of the node
     */
    // visible for testing.
    void updateNodeStatus(final NodeConnectionStatus status) {
        updateNodeStatus(status, true);
    }

    void updateNodeStatus(final NodeConnectionStatus status, final boolean waitForCoordinator) {
        final NodeIdentifier nodeId = status.getNodeIdentifier();

        // In this case, we are using nodeStatuses.put() instead of getting the current value and
        // comparing that to the new value and using the one with the largest update id. This is because
        // this method is called when something occurs that causes this node to change the status of the
        // node in question. We only use comparisons against the current value when we receive an update
        // about a node status from a different node, since those may be received out-of-order.
        final NodeConnectionStatus currentStatus = nodeStatuses.put(nodeId, status);
        final NodeConnectionState currentState = currentStatus == null ? null : currentStatus.getState();
        logger.info("Status of {} changed from {} to {}", nodeId, currentStatus, status);
        logger.debug("State of cluster nodes is now {}", nodeStatuses);

        latestUpdateId.updateAndGet(curVal -> Math.max(curVal, status.getUpdateIdentifier()));

        if (currentState == null || currentState != status.getState()) {
            final boolean notifyAllNodes = isActiveClusterCoordinator();
            if (notifyAllNodes) {
                logger.debug("Notifying all nodes that status changed from {} to {}", currentStatus, status);
            } else {
                logger.debug("Notifying cluster coordinator that node status changed from {} to {}", currentStatus,
                        status);
            }

            notifyOthersOfNodeStatusChange(status, notifyAllNodes, waitForCoordinator);
        } else {
            logger.debug(
                    "Not notifying other nodes that status changed because previous state of {} is same as new state of {}",
                    currentState, status.getState());
        }
    }

    void notifyOthersOfNodeStatusChange(final NodeConnectionStatus updatedStatus) {
        notifyOthersOfNodeStatusChange(updatedStatus, isActiveClusterCoordinator(), true);
    }

    /**
     * Notifies other nodes that the status of a node changed
     *
     * @param updatedStatus the updated status for a node in the cluster
     * @param notifyAllNodes if <code>true</code> will notify all nodes. If
     * <code>false</code>, will notify only the cluster coordinator
     */
    void notifyOthersOfNodeStatusChange(final NodeConnectionStatus updatedStatus, final boolean notifyAllNodes,
            final boolean waitForCoordinator) {
        // If this node is the active cluster coordinator, then we are going to replicate to all nodes.
        // Otherwise, get the active coordinator (or wait for one to become active) and then notify the coordinator.
        final Set<NodeIdentifier> nodesToNotify;
        if (notifyAllNodes) {
            nodesToNotify = getNodeIdentifiers(NodeConnectionState.CONNECTED, NodeConnectionState.CONNECTING);

            // Do not notify ourselves because we already know about the status update.
            nodesToNotify.remove(getLocalNodeIdentifier());
        } else if (waitForCoordinator) {
            nodesToNotify = Collections.singleton(waitForElectedClusterCoordinator());
        } else {
            final NodeIdentifier nodeId = getElectedActiveCoordinatorNode();
            if (nodeId == null) {
                return;
            }
            nodesToNotify = Collections.singleton(nodeId);
        }

        final NodeStatusChangeMessage message = new NodeStatusChangeMessage();
        message.setNodeId(updatedStatus.getNodeIdentifier());
        message.setNodeConnectionStatus(updatedStatus);
        senderListener.notifyNodeStatusChange(nodesToNotify, message);
    }

    private void disconnectAsynchronously(final DisconnectMessage request, final int attempts,
            final int retrySeconds) {
        final Thread disconnectThread = new Thread(new Runnable() {
            @Override
            public void run() {
                final NodeIdentifier nodeId = request.getNodeId();

                for (int i = 0; i < attempts; i++) {
                    try {
                        senderListener.disconnect(request);
                        reportEvent(nodeId, Severity.INFO, "Node disconnected due to " + request.getExplanation());
                        return;
                    } catch (final Exception e) {
                        logger.error("Failed to notify {} that it has been disconnected from the cluster due to {}",
                                request.getNodeId(), request.getExplanation());

                        try {
                            Thread.sleep(retrySeconds * 1000L);
                        } catch (final InterruptedException ie) {
                            Thread.currentThread().interrupt();
                            return;
                        }
                    }
                }
            }
        }, "Disconnect " + request.getNodeId());

        disconnectThread.start();
    }

    private void requestReconnectionAsynchronously(final ReconnectionRequestMessage request,
            final int reconnectionAttempts, final int retrySeconds, final boolean includeDataFlow) {
        final Thread reconnectionThread = new Thread(new Runnable() {
            @Override
            public void run() {
                // create the request
                while (flowService == null) {
                    try {
                        Thread.sleep(100L);
                    } catch (final InterruptedException ie) {
                        logger.info("Could not send Reconnection request to {} because thread was "
                                + "interrupted before FlowService was made available", request.getNodeId());
                        Thread.currentThread().interrupt();
                        return;
                    }
                }

                for (int i = 0; i < reconnectionAttempts; i++) {
                    try {
                        if (NodeConnectionState.CONNECTING != getConnectionState(request.getNodeId())) {
                            // the node status has changed. It's no longer appropriate to attempt reconnection.
                            return;
                        }

                        if (includeDataFlow) {
                            request.setDataFlow(new StandardDataFlow(flowService.createDataFlow()));
                        }

                        request.setNodeConnectionStatuses(getConnectionStatuses());
                        request.setComponentRevisions(revisionManager.getAllRevisions().stream()
                                .map(rev -> ComponentRevision.fromRevision(rev)).collect(Collectors.toList()));

                        // Issue a reconnection request to the node.
                        senderListener.requestReconnection(request);

                        // successfully told node to reconnect -- we're done!
                        logger.info("Successfully requested that {} join the cluster", request.getNodeId());

                        return;
                    } catch (final Exception e) {
                        logger.warn(
                                "Problem encountered issuing reconnection request to node " + request.getNodeId(),
                                e);
                        eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY,
                                "Problem encountered issuing reconnection request to node " + request.getNodeId()
                                        + " due to: " + e);
                    }

                    try {
                        Thread.sleep(1000L * retrySeconds);
                    } catch (final InterruptedException ie) {
                        break;
                    }
                }

                // We failed to reconnect too many times. We must now mark node as disconnected.
                if (NodeConnectionState.CONNECTING == getConnectionState(request.getNodeId())) {
                    requestNodeDisconnect(request.getNodeId(), DisconnectionCode.UNABLE_TO_COMMUNICATE,
                            "Attempted to request that node reconnect to cluster but could not communicate with node");
                }
            }
        }, "Reconnect " + request.getNodeId());

        reconnectionThread.start();
    }

    @Override
    public ProtocolMessage handle(final ProtocolMessage protocolMessage) throws ProtocolException {
        switch (protocolMessage.getType()) {
        case CONNECTION_REQUEST:
            return handleConnectionRequest((ConnectionRequestMessage) protocolMessage);
        case NODE_STATUS_CHANGE:
            handleNodeStatusChange((NodeStatusChangeMessage) protocolMessage);
            return null;
        case NODE_CONNECTION_STATUS_REQUEST:
            return handleNodeConnectionStatusRequest();
        default:
            throw new ProtocolException(
                    "Cannot handle Protocol Message " + protocolMessage + " because it is not of the correct type");
        }
    }

    private NodeConnectionStatusResponseMessage handleNodeConnectionStatusRequest() {
        final NodeConnectionStatusResponseMessage msg = new NodeConnectionStatusResponseMessage();
        final NodeIdentifier self = getLocalNodeIdentifier();
        if (self != null) {
            final NodeConnectionStatus connectionStatus = nodeStatuses.get(self);
            msg.setNodeConnectionStatus(connectionStatus);
        }

        return msg;
    }

    private String summarizeStatusChange(final NodeConnectionStatus oldStatus, final NodeConnectionStatus status) {
        final StringBuilder sb = new StringBuilder();

        if (oldStatus == null || status.getState() != oldStatus.getState()) {
            sb.append("Node Status changed from ")
                    .append(oldStatus == null ? "[Unknown Node]" : oldStatus.getState().toString()).append(" to ")
                    .append(status.getState().toString());
            if (status.getDisconnectReason() != null) {
                sb.append(" due to ").append(status.getDisconnectReason());
            } else if (status.getDisconnectCode() != null) {
                sb.append(" due to ").append(status.getDisconnectCode().toString());
            }
        }

        return sb.toString();
    }

    private void handleNodeStatusChange(final NodeStatusChangeMessage statusChangeMessage) {
        final NodeConnectionStatus updatedStatus = statusChangeMessage.getNodeConnectionStatus();
        final NodeIdentifier nodeId = statusChangeMessage.getNodeId();
        logger.debug("Handling request {}", statusChangeMessage);

        final NodeConnectionStatus oldStatus = nodeStatuses.get(statusChangeMessage.getNodeId());

        // Either remove the value from the map or update the map depending on the connection state
        if (statusChangeMessage.getNodeConnectionStatus().getState() == NodeConnectionState.REMOVED) {
            nodeStatuses.remove(nodeId, oldStatus);
        } else {
            nodeStatuses.put(nodeId, updatedStatus);
        }

        logger.info("Status of {} changed from {} to {}", statusChangeMessage.getNodeId(), oldStatus,
                updatedStatus);
        logger.debug("State of cluster nodes is now {}", nodeStatuses);

        final NodeConnectionStatus status = statusChangeMessage.getNodeConnectionStatus();
        final String summary = summarizeStatusChange(oldStatus, status);
        if (!StringUtils.isEmpty(summary)) {
            addNodeEvent(nodeId, summary);
        }

        // Update our counter so that we are in-sync with the cluster on the
        // most up-to-date version of the NodeConnectionStatus' Update Identifier.
        // We do this so that we can accurately compare status updates that are generated
        // locally against those generated from other nodes in the cluster.
        NodeConnectionStatus.updateIdGenerator(updatedStatus.getUpdateIdentifier());

        if (isActiveClusterCoordinator()) {
            notifyOthersOfNodeStatusChange(statusChangeMessage.getNodeConnectionStatus());
        }
    }

    @Override
    public String getFlowElectionStatus() {
        if (!requireElection) {
            return null;
        }

        return flowElection.getStatusDescription();
    }

    @Override
    public boolean isFlowElectionComplete() {
        return !requireElection || flowElection.isElectionComplete();
    }

    private NodeIdentifier resolveNodeId(final NodeIdentifier proposedIdentifier) {
        final NodeConnectionStatus proposedConnectionStatus = new NodeConnectionStatus(proposedIdentifier,
                DisconnectionCode.NOT_YET_CONNECTED);
        final NodeConnectionStatus existingStatus = nodeStatuses.putIfAbsent(proposedIdentifier,
                proposedConnectionStatus);

        NodeIdentifier resolvedNodeId = proposedIdentifier;
        if (existingStatus == null) {
            // there is no node with that ID
            resolvedNodeId = proposedIdentifier;
            logger.debug("No existing node with ID {}; resolved node ID is as-proposed",
                    proposedIdentifier.getId());
        } else if (existingStatus.getNodeIdentifier().logicallyEquals(proposedIdentifier)) {
            // there is a node with that ID but it's the same node.
            resolvedNodeId = proposedIdentifier;
            logger.debug("No existing node with ID {}; resolved node ID is as-proposed",
                    proposedIdentifier.getId());
        } else {
            // there is a node with that ID and it's a different node
            resolvedNodeId = new NodeIdentifier(UUID.randomUUID().toString(), proposedIdentifier.getApiAddress(),
                    proposedIdentifier.getApiPort(), proposedIdentifier.getSocketAddress(),
                    proposedIdentifier.getSocketPort(), proposedIdentifier.getSiteToSiteAddress(),
                    proposedIdentifier.getSiteToSitePort(), proposedIdentifier.getSiteToSiteHttpApiPort(),
                    proposedIdentifier.isSiteToSiteSecure());
            logger.debug(
                    "A node already exists with ID {}. Proposed Node Identifier was {}; existing Node Identifier is {}; Resolved Node Identifier is {}",
                    proposedIdentifier.getId(), proposedIdentifier, getNodeIdentifier(proposedIdentifier.getId()),
                    resolvedNodeId);
        }

        return resolvedNodeId;
    }

    private ConnectionResponseMessage handleConnectionRequest(final ConnectionRequestMessage requestMessage) {
        final NodeIdentifier proposedIdentifier = requestMessage.getConnectionRequest().getProposedNodeIdentifier();
        final NodeIdentifier withRequestorDn = addRequestorDn(proposedIdentifier, requestMessage.getRequestorDN());
        final DataFlow dataFlow = requestMessage.getConnectionRequest().getDataFlow();
        final ConnectionRequest requestWithDn = new ConnectionRequest(withRequestorDn, dataFlow);

        // Resolve Node identifier.
        final NodeIdentifier resolvedNodeId = resolveNodeId(proposedIdentifier);

        if (requireElection) {
            final DataFlow electedDataFlow = flowElection.castVote(dataFlow, withRequestorDn);
            if (electedDataFlow == null) {
                logger.info(
                        "Received Connection Request from {}; responding with Flow Election In Progress message",
                        withRequestorDn);
                return createFlowElectionInProgressResponse();
            } else {
                logger.info("Received Connection Request from {}; responding with DataFlow that was elected",
                        withRequestorDn);
                return createConnectionResponse(requestWithDn, resolvedNodeId, electedDataFlow);
            }
        }

        logger.info("Received Connection Request from {}; responding with my DataFlow", withRequestorDn);
        return createConnectionResponse(requestWithDn, resolvedNodeId);
    }

    private ConnectionResponseMessage createFlowElectionInProgressResponse() {
        final ConnectionResponseMessage responseMessage = new ConnectionResponseMessage();
        final String statusDescription = flowElection.getStatusDescription();
        responseMessage.setConnectionResponse(new ConnectionResponse(5,
                "Cluster is still voting on which Flow is the correct flow for the cluster. " + statusDescription));
        return responseMessage;
    }

    private ConnectionResponseMessage createConnectionResponse(final ConnectionRequest request,
            final NodeIdentifier resolvedNodeIdentifier) {
        DataFlow dataFlow = null;
        if (flowService != null) {
            try {
                dataFlow = flowService.createDataFlow();
            } catch (final IOException ioe) {
                logger.error("Unable to obtain current dataflow from FlowService in order to provide the flow to "
                        + resolvedNodeIdentifier + ". Will tell node to try again later", ioe);
            }
        }

        return createConnectionResponse(request, resolvedNodeIdentifier, dataFlow);
    }

    private ConnectionResponseMessage createConnectionResponse(final ConnectionRequest request,
            final NodeIdentifier resolvedNodeIdentifier, final DataFlow clusterDataFlow) {
        if (isBlockedByFirewall(resolvedNodeIdentifier.getSocketAddress())) {
            // if the socket address is not listed in the firewall, then return a null response
            logger.info("Firewall blocked connection request from node " + resolvedNodeIdentifier);
            final ConnectionResponse response = ConnectionResponse.createBlockedByFirewallResponse();
            final ConnectionResponseMessage responseMessage = new ConnectionResponseMessage();
            responseMessage.setConnectionResponse(response);
            return responseMessage;
        }

        if (clusterDataFlow == null) {
            final ConnectionResponseMessage responseMessage = new ConnectionResponseMessage();
            responseMessage
                    .setConnectionResponse(new ConnectionResponse(5, "The cluster dataflow is not yet available"));
            return responseMessage;
        }

        // Set node's status to 'CONNECTING'
        NodeConnectionStatus status = getConnectionStatus(resolvedNodeIdentifier);
        if (status == null) {
            addNodeEvent(resolvedNodeIdentifier,
                    "Connection requested from new node. Setting status to connecting.");
        } else {
            addNodeEvent(resolvedNodeIdentifier,
                    "Connection requested from existing node. Setting status to connecting.");
        }

        status = new NodeConnectionStatus(resolvedNodeIdentifier, NodeConnectionState.CONNECTING, null, null,
                System.currentTimeMillis());
        updateNodeStatus(status);

        final ConnectionResponse response = new ConnectionResponse(resolvedNodeIdentifier, clusterDataFlow,
                instanceId, getConnectionStatuses(), revisionManager.getAllRevisions().stream()
                        .map(rev -> ComponentRevision.fromRevision(rev)).collect(Collectors.toList()));

        final ConnectionResponseMessage responseMessage = new ConnectionResponseMessage();
        responseMessage.setConnectionResponse(response);
        return responseMessage;
    }

    private NodeIdentifier addRequestorDn(final NodeIdentifier nodeId, final String dn) {
        return new NodeIdentifier(nodeId.getId(), nodeId.getApiAddress(), nodeId.getApiPort(),
                nodeId.getSocketAddress(), nodeId.getSocketPort(), nodeId.getSiteToSiteAddress(),
                nodeId.getSiteToSitePort(), nodeId.getSiteToSiteHttpApiPort(), nodeId.isSiteToSiteSecure(), dn);
    }

    @Override
    public boolean canHandle(final ProtocolMessage msg) {
        return MessageType.CONNECTION_REQUEST == msg.getType() || MessageType.NODE_STATUS_CHANGE == msg.getType()
                || MessageType.NODE_CONNECTION_STATUS_REQUEST == msg.getType();
    }

    private boolean isMutableRequest(final String method) {
        return "DELETE".equalsIgnoreCase(method) || "POST".equalsIgnoreCase(method)
                || "PUT".equalsIgnoreCase(method);
    }

    /**
     * Callback that is called after an HTTP Request has been replicated to
     * nodes in the cluster. This allows us to disconnect nodes that did not
     * complete the request, if applicable.
     */
    @Override
    public void afterRequest(final String uriPath, final String method, final Set<NodeResponse> nodeResponses) {
        // if we are not the active cluster coordinator, then we are not responsible for monitoring the responses,
        // as the cluster coordinator is responsible for performing the actual request replication.
        if (!isActiveClusterCoordinator()) {
            return;
        }

        final boolean mutableRequest = isMutableRequest(method);

        /*
         * Nodes that encountered issues handling the request are marked as
         * disconnected for mutable requests (e.g., post, put, delete). For
         * other requests (e.g., get, head), the nodes remain in their current
         * state even if they had problems handling the request.
         */
        if (mutableRequest) {
            final HttpResponseMapper responseMerger = new StandardHttpResponseMapper(nifiProperties);
            final Set<NodeResponse> problematicNodeResponses = responseMerger
                    .getProblematicNodeResponses(nodeResponses);

            // all nodes failed
            final boolean allNodesFailed = problematicNodeResponses.size() == nodeResponses.size();

            // some nodes had a problematic response because of a missing counter, ensure the are not disconnected
            final boolean someNodesFailedMissingCounter = !problematicNodeResponses.isEmpty()
                    && problematicNodeResponses.size() < nodeResponses.size()
                    && isMissingCounter(problematicNodeResponses, uriPath);

            // ensure nodes stay connected in certain scenarios
            if (allNodesFailed) {
                logger.warn(
                        "All nodes failed to process URI {} {}. As a result, no node will be disconnected from cluster",
                        method, uriPath);
                return;
            }

            if (someNodesFailedMissingCounter) {
                return;
            }

            // disconnect problematic nodes
            if (!problematicNodeResponses.isEmpty() && problematicNodeResponses.size() < nodeResponses.size()) {
                final Set<NodeIdentifier> failedNodeIds = problematicNodeResponses.stream()
                        .map(response -> response.getNodeId()).collect(Collectors.toSet());
                logger.warn(String.format(
                        "The following nodes failed to process URI %s '%s'.  Requesting each node disconnect from cluster.",
                        uriPath, failedNodeIds));
                for (final NodeIdentifier nodeId : failedNodeIds) {
                    requestNodeDisconnect(nodeId, DisconnectionCode.FAILED_TO_SERVICE_REQUEST,
                            "Failed to process request " + method + " " + uriPath);
                }
            }
        }
    }

    /**
     * Determines if all problematic responses were due to 404 NOT_FOUND.
     * Assumes that problematicNodeResponses is not empty and is not comprised
     * of responses from all nodes in the cluster (at least one node contained
     * the counter in question).
     *
     * @param problematicNodeResponses The problematic node responses
     * @param uriPath The path of the URI for the request
     * @return Whether all problematic node responses were due to a missing
     * counter
     */
    private boolean isMissingCounter(final Set<NodeResponse> problematicNodeResponses, final String uriPath) {
        if (COUNTER_URI_PATTERN.matcher(uriPath).matches()) {
            boolean notFound = true;
            for (final NodeResponse problematicResponse : problematicNodeResponses) {
                if (problematicResponse.getStatus() != 404) {
                    notFound = false;
                    break;
                }
            }
            return notFound;
        }
        return false;
    }

    @Override
    public void setConnected(final boolean connected) {
        this.connected = connected;

        // Once we have connected to the cluster, election is no longer required.
        // It is required only upon startup so that if multiple nodes are started up
        // at the same time, and they have different flows, that we don't choose the
        // wrong flow as the 'golden copy' by electing that node as the elected
        // active Cluster Coordinator.
        if (connected) {
            logger.info("This node is now connected to the cluster. Will no longer require election of DataFlow.");
            requireElection = false;
        }
    }

    @Override
    public boolean isConnected() {
        return connected;
    }

    @Override
    public Map<NodeIdentifier, NodeWorkload> getClusterWorkload() throws IOException {
        final ClusterWorkloadRequestMessage request = new ClusterWorkloadRequestMessage();
        final ClusterWorkloadResponseMessage response = nodeProtocolSender.clusterWorkload(request);
        return response.getNodeWorkloads();
    }
}