com.emc.storageos.systemservices.impl.upgrade.CoordinatorClientExt.java Source code

Java tutorial

Introduction

Here is the source code for com.emc.storageos.systemservices.impl.upgrade.CoordinatorClientExt.java

Source

/*
 * Copyright (c) 2012-2014 EMC Corporation
 * All Rights Reserved
 */

package com.emc.storageos.systemservices.impl.upgrade;

import static com.emc.storageos.coordinator.client.model.Constants.CONTROL_NODE_SYSSVC_ID_PATTERN;
import static com.emc.storageos.coordinator.client.model.Constants.DBSVC_NAME;
import static com.emc.storageos.coordinator.client.model.Constants.NEW_VERSIONS_LOCK;
import static com.emc.storageos.coordinator.client.model.Constants.NODE_INFO;
import static com.emc.storageos.coordinator.client.model.Constants.REMOTE_DOWNLOAD_LEADER;
import static com.emc.storageos.coordinator.client.model.Constants.REMOTE_DOWNLOAD_LOCK;
import static com.emc.storageos.coordinator.client.model.Constants.TARGET_INFO;
import static com.emc.storageos.coordinator.client.model.Constants.TARGET_INFO_LOCK;
import static com.emc.storageos.systemservices.mapper.ClusterInfoMapper.toClusterInfo;

import java.io.IOException;
import java.net.*;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

import com.emc.storageos.coordinator.client.model.SiteMonitorResult;
import org.apache.commons.lang.StringUtils;
import org.apache.curator.framework.recipes.locks.InterProcessLock;
import org.apache.curator.framework.recipes.barriers.DistributedDoubleBarrier;
import org.apache.zookeeper.ZooKeeper.States;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.emc.storageos.coordinator.client.model.ConfigVersion;
import com.emc.storageos.coordinator.client.model.Constants;
import com.emc.storageos.coordinator.client.model.CoordinatorClassInfo;
import com.emc.storageos.coordinator.client.model.CoordinatorSerializable;
import com.emc.storageos.coordinator.client.model.PowerOffState;
import com.emc.storageos.coordinator.client.model.PropertyInfoExt;
import com.emc.storageos.coordinator.client.model.RepositoryInfo;
import com.emc.storageos.coordinator.client.model.Site;
import com.emc.storageos.coordinator.client.model.ProductName;
import com.emc.storageos.coordinator.client.model.SiteState;
import com.emc.storageos.coordinator.client.model.SoftwareVersion;
import com.emc.storageos.coordinator.client.service.CoordinatorClient;
import com.emc.storageos.coordinator.client.service.CoordinatorClient.LicenseType;
import com.emc.storageos.coordinator.client.service.DistributedPersistentLock;
import com.emc.storageos.coordinator.client.service.DrUtil;
import com.emc.storageos.coordinator.client.service.impl.CoordinatorClientImpl;
import com.emc.storageos.coordinator.common.Configuration;
import com.emc.storageos.coordinator.common.Service;
import com.emc.storageos.coordinator.common.impl.ConfigurationImpl;
import com.emc.storageos.coordinator.common.impl.ServiceImpl;
import com.emc.storageos.coordinator.common.impl.ZkConnection;
import com.emc.storageos.coordinator.exceptions.CoordinatorException;
import com.emc.storageos.db.common.DbConfigConstants;
import com.emc.storageos.db.common.DbServiceStatusChecker;
import com.emc.storageos.model.property.PropertiesMetadata;
import com.emc.storageos.model.property.PropertyInfo;
import com.emc.storageos.model.property.PropertyMetadata;
import com.emc.storageos.services.util.Strings;
import com.emc.storageos.svcs.errorhandling.resources.APIException;
import com.emc.storageos.systemservices.exceptions.CoordinatorClientException;
import com.emc.storageos.systemservices.exceptions.InvalidLockOwnerException;
import com.emc.storageos.systemservices.exceptions.SyssvcException;
import com.emc.storageos.systemservices.impl.SysSvcBeaconImpl;
import com.emc.storageos.systemservices.impl.client.SysClientFactory;
import com.emc.storageos.systemservices.impl.client.SysClientFactory.SysClient;
import com.emc.storageos.systemservices.impl.vdc.DbsvcQuorumMonitor;
import com.emc.vipr.model.sys.ClusterInfo;
import com.emc.vipr.model.sys.ClusterInfo.ClusterState;
import com.google.common.collect.ImmutableSet;

public class CoordinatorClientExt {
    private static final Logger _log = LoggerFactory.getLogger(CoordinatorClientExt.class);

    private static final Set<String> CONTROL_NODE_ROLES = new ImmutableSet.Builder<String>().add("sys")
            .add("control").build();

    private static final String URI_INTERNAL_GET_CLUSTER_INFO = "/control/internal/cluster/info";
    private static final int COODINATOR_MONITORING_INTERVAL = 60; // in seconds
    private static final int DB_MONITORING_INTERVAL = 60; // in seconds
    private static final String DR_SWITCH_TO_ZK_OBSERVER_BARRIER = "/config/disasterRecoverySwitchToZkObserver";
    private static final int DR_SWITCH_BARRIER_TIMEOUT = 180; // barrier timeout in seconds
    private static final int ZK_LEADER_ELECTION_PORT = 2888;
    private static final int DUAL_ZK_LEADER_ELECTION_PORT = 2898;

    private static final int CHECK_ACTIVE_SITE_STABLE_CONNECT_TIMEOUT_MS = 10000; // 10 seconds
    private static final int CHECK_ACTIVE_SITE_STABLE_READ_TIMEOUT_MS = 5000; //5 seconds

    private CoordinatorClient _coordinator;
    private SysSvcBeaconImpl _beacon;
    private ServiceImpl _svc;
    private Properties dbCommonInfo;
    private InterProcessLock _remoteDownloadLock = null;
    private volatile InterProcessLock _targetLock = null;
    private InterProcessLock _newVersionLock = null;
    // Node id used for external display/query purpose.
    // EX: vipr1, vipr2, dataservice-10_247_100_15
    private String _myNodeId = null;
    private String _myNodeName = null;
    // Service id is for internal use to talk to coordinator.
    // EX: syssvc-1, syssvc-2, syssvc-10_247_100_15
    private String mySvcId = null;
    private int _nodeCount = 0;
    private String _vip;
    private DrUtil drUtil;
    private volatile boolean stopCoordinatorSvcMonitor; // default to false

    private DbServiceStatusChecker statusChecker = null;

    public CoordinatorClient getCoordinatorClient() {
        return _coordinator;
    }

    public ZkConnection getZkConnection() {
        return ((CoordinatorClientImpl) _coordinator).getZkConnection();
    }

    public void setServiceBeacon(SysSvcBeaconImpl beacon) {
        _beacon = beacon;
    }

    public void setService(ServiceImpl service) {
        _svc = service;
        _myNodeId = _svc.getNodeId();
        _myNodeName = _svc.getNodeName();
        mySvcId = _svc.getId();
    }

    public void setDbCommonInfo(Properties dbCommonInfo) {
        this.dbCommonInfo = dbCommonInfo;
    }

    public void setCoordinator(CoordinatorClient coordinator) {
        _coordinator = coordinator;
    }

    public void setDrUtil(DrUtil drUtil) {
        this.drUtil = drUtil;
    }

    public DrUtil getDrUtil() {
        return this.drUtil;
    }

    /**
     * Get property
     * 
     * This method gets target properties from coordinator service as a string.
     * And decode it into propertyInfo object.
     * Syssvc is responsible for publishing the target property information into coordinator
     * 
     * @return property object
     * @throws CoordinatorException
     */
    public PropertyInfo getPropertyInfo() {
        return _coordinator.getPropertyInfo();
    }

    public void setNodeCount(int count) {
        _nodeCount = count;
    }

    public void setVip(String vip) {
        _vip = vip;
    }

    public int getNodeCount() {
        return _nodeCount;
    }

    public String getVip() {
        return _vip;
    }

    public void setStatusChecker(DbServiceStatusChecker checker) {
        statusChecker = checker;
    }

    public boolean isControlNode() {
        return CONTROL_NODE_SYSSVC_ID_PATTERN.matcher(mySvcId).matches();
    }

    /**
     * Gets the roles associated with the current node.
     */
    public Set<String> getNodeRoles() {
        return CONTROL_NODE_ROLES;
    }

    /**
     * Set node info to session scope.
     * Everything in session scope is gone when the session goes away.
     * 
     * @param state
     * @throws CoordinatorClientException
     */
    public void setNodeSessionScopeInfo(final CoordinatorSerializable state) throws CoordinatorClientException {
        if (state == null) {
            return;
        }

        try {
            String attr = state.getCoordinatorClassInfo().attribute;
            _svc.setAttribute(attr, state.encodeAsString());
            _beacon.publish();
        } catch (Exception e) {
            _log.error("set node scope info error:", e);
            throw SyssvcException.syssvcExceptions.coordinatorClientError("Failed to set node session info");
        }
    }

    /**
     * Set node info to global scope for the local site.
     * 
     * @param info
     * @param kind
     * @throws CoordinatorClientException
     */
    public void setNodeGlobalScopeInfo(final CoordinatorSerializable info, final String kind, final String id)
            throws CoordinatorClientException {
        setNodeGlobalScopeInfo(info, null, kind, id);
    }

    public void setNodeGlobalScopeInfo(final CoordinatorSerializable info, final String siteId, final String kind,
            final String id) throws CoordinatorClientException {
        if (info == null || kind == null) {
            return;
        }

        try {
            ConfigurationImpl cfg = new ConfigurationImpl();
            cfg.setId(id);
            cfg.setKind(kind); // We can use service id as the "id" and the type of info as "kind", then we can persist certain type of info
                               // about a particular node in coordinator
            cfg.setConfig(NODE_INFO, info.encodeAsString());
            _coordinator.persistServiceConfiguration(siteId, cfg);
        } catch (Exception e) {
            _log.error("Failed to set node global scope info", e);
            throw SyssvcException.syssvcExceptions
                    .coordinatorClientError("Failed to set node global scope info. " + e.getMessage());
        }
    }

    /**
     * Get node info from local site.
     * 
     * @param clazz
     * @param kind
     * @param <T>
     * @return
     * @throws Exception
     */
    public <T extends CoordinatorSerializable> T getNodeGlobalScopeInfo(final Class<T> clazz, final String kind,
            final String id) throws Exception {
        return getNodeGlobalScopeInfo(clazz, null, kind, id);
    }

    /**
     * Get node info from specific site.
     *
     * @param clazz
     * @param siteId
     * @param kind
     * @param id
     * @param <T>
     * @return
     * @throws Exception
     */
    public <T extends CoordinatorSerializable> T getNodeGlobalScopeInfo(final Class<T> clazz, final String siteId,
            final String kind, final String id) throws Exception {
        final T info = clazz.newInstance();

        final Configuration config = _coordinator.queryConfiguration(siteId, kind, id);
        if (config != null && config.getConfig(NODE_INFO) != null) {
            final String infoStr = config.getConfig(NODE_INFO);
            _log.debug("getNodeGlobalScopelInfo({}): info={}", clazz.getName(), Strings.repr(infoStr));
            final T decodeInfo = info.decodeFromString(infoStr);
            _log.debug("getNodeGlobalScopelInfo({}): info={}", clazz.getName(), decodeInfo);

            return decodeInfo;
        }
        return null;
    }

    /**
     * Wrapper setTarget method with checking cluster state stable
     * 
     * @param info
     * @throws CoordinatorClientException
     */
    public void setTargetInfo(final CoordinatorSerializable info) throws CoordinatorClientException {
        setTargetInfo(info, true);
    }

    /**
     * Set target info shared by all ndoes.
     * checkClusterUpgradable = true is used for rest api's. It forces to check the cluster state stable or not.
     * checkClusterUpgradable = false will not check the cluster state. It is used when cluster is not in stable state,
     * but we have to set target info at the same time.
     * 
     * @param info
     * @param checkClusterUpgradable
     * @throws CoordinatorClientException
     */
    public void setTargetInfo(final CoordinatorSerializable info, boolean checkClusterUpgradable)
            throws CoordinatorClientException {
        if (info == null) {
            return;
        }

        if (getTargetInfoLock()) {
            try {
                // check we are in stable state if checkState = true specified
                if (checkClusterUpgradable && !isClusterUpgradable()) {
                    throw APIException.serviceUnavailable.clusterStateNotStable();
                }
                _coordinator.setTargetInfo(info);
            } catch (Exception e) {
                throw SyssvcException.syssvcExceptions
                        .coordinatorClientError("Failed to set target state. " + e.getMessage());
            } finally {
                releaseTargetVersionLock();
            }
        } else {
            throw SyssvcException.syssvcExceptions
                    .coordinatorClientError("Failed to set target state. Unable to obtain target lock");
        }
    }

    /**
     * Set target info shared by all nodes.
     * 
     * @param info info, String id, String kind
     * @throws CoordinatorClientException
     */
    public void setTargetInfo(final CoordinatorSerializable info, String id, String kind)
            throws CoordinatorClientException {
        if (info == null) {
            return;
        }

        if (getTargetInfoLock()) {
            try {
                // check we are in stable state & version exists in available
                if (!isClusterUpgradable()) {
                    throw APIException.serviceUnavailable.clusterStateNotStable();
                }
                ConfigurationImpl cfg = new ConfigurationImpl();
                cfg.setId(id);
                cfg.setKind(kind);
                cfg.setConfig(TARGET_INFO, info.encodeAsString());
                _coordinator.persistServiceConfiguration(cfg);
            } catch (Exception e) {
                throw SyssvcException.syssvcExceptions
                        .coordinatorClientError("Failed to set target state. " + e.getMessage());
            } finally {
                releaseTargetVersionLock();
            }
        } else {
            throw SyssvcException.syssvcExceptions
                    .coordinatorClientError("Failed to set target state. Unable to obtain target lock");
        }
    }

    /**
     * Remove target info shared by all nodes.
     * checkClusterUpgradable = true is used for rest api's. It forces to check the cluster state stable or not.
     * checkClusterUpgradable = false will not check the cluster state. It is used when cluster is not in stable state,
     * but we have to set target info at the same time.
     * 
     * @param info
     * @param checkClusterUpgradable
     * @throws CoordinatorClientException
     */
    public void removeTargetInfo(final CoordinatorSerializable info, boolean checkClusterUpgradable)
            throws CoordinatorClientException {
        if (info == null) {
            return;
        }

        final CoordinatorClassInfo coordinatorInfo = info.getCoordinatorClassInfo();
        String id = coordinatorInfo.id;
        String kind = coordinatorInfo.kind;

        if (getTargetInfoLock()) {
            try {
                // check we are in stable state if checkState = true specified
                if (checkClusterUpgradable && !isClusterUpgradable()) {
                    throw APIException.serviceUnavailable.clusterStateNotStable();
                }
                ConfigurationImpl cfg = new ConfigurationImpl();
                cfg.setId(id);
                cfg.setKind(kind);
                cfg.setConfig(TARGET_INFO, info.encodeAsString());
                _coordinator.removeServiceConfiguration(cfg);
                _log.info("Target info removed: {}", info);
            } catch (Exception e) {
                throw SyssvcException.syssvcExceptions
                        .coordinatorClientError("Failed to remove target info. " + e.getMessage());
            } finally {
                releaseTargetVersionLock();
            }
        } else {
            throw SyssvcException.syssvcExceptions
                    .coordinatorClientError("Failed to remove target info. Unable to obtain target lock");
        }
    }

    /**
     * Remove target info shared by all nodes.
     * 
     * @param info info, String id, String kind
     * @throws CoordinatorClientException
     */
    public void removeTargetInfo(final CoordinatorSerializable info, String id, String kind)
            throws CoordinatorClientException {
        if (info == null) {
            return;
        }

        if (getTargetInfoLock()) {
            try {
                // check we are in stable state & version exists in available
                if (!isClusterUpgradable()) {
                    throw APIException.serviceUnavailable.clusterStateNotStable();
                }
                ConfigurationImpl cfg = new ConfigurationImpl();
                cfg.setId(id);
                cfg.setKind(kind);
                cfg.setConfig(TARGET_INFO, info.encodeAsString());
                _coordinator.removeServiceConfiguration(cfg);
                _log.info("Target info removed: {}", info);
            } catch (Exception e) {
                throw SyssvcException.syssvcExceptions
                        .coordinatorClientError("Failed to remove target info. " + e.getMessage());
            } finally {
                releaseTargetVersionLock();
            }
        } else {
            throw SyssvcException.syssvcExceptions
                    .coordinatorClientError("Failed to remove target info. Unable to obtain target lock");
        }
    }

    /**
     * Get target info
     * 
     * @param clazz
     * @param <T>
     * @return
     * @throws Exception
     */
    public <T extends CoordinatorSerializable> T getTargetInfo(final Class<T> clazz) throws CoordinatorException {
        return _coordinator.getTargetInfo(clazz);
    }

    /**
     * Get target info
     * 
     * @param clazz
     * @param id
     * @param kind
     * @return
     * @throws Exception
     */
    public <T extends CoordinatorSerializable> T getTargetInfo(final Class<T> clazz, String id, String kind)
            throws CoordinatorException {
        return _coordinator.getTargetInfo(clazz, id, kind);
    }

    /**
     * Get all target properties - include global(shared by active/standby), or site specific properties
     * 
     * @return
     * @throws Exception
     */
    public PropertyInfoExt getTargetProperties() throws Exception {
        PropertyInfoExt targetPropInfo = _coordinator.getTargetInfo(PropertyInfoExt.class);
        PropertyInfoExt siteScopePropInfo = _coordinator.getTargetInfo(PropertyInfoExt.class,
                _coordinator.getSiteId(), PropertyInfoExt.TARGET_PROPERTY);
        if (targetPropInfo != null && siteScopePropInfo != null) {
            PropertyInfoExt combinedProps = new PropertyInfoExt();
            for (Entry<String, String> entry : targetPropInfo.getAllProperties().entrySet()) {
                combinedProps.addProperty(entry.getKey(), entry.getValue());
            }
            for (Entry<String, String> entry : siteScopePropInfo.getAllProperties().entrySet()) {
                combinedProps.addProperty(entry.getKey(), entry.getValue());
            }
            return combinedProps;
        } else if (targetPropInfo != null) {
            return targetPropInfo;
        } else if (siteScopePropInfo != null) {
            return siteScopePropInfo;
        } else {
            return null;
        }
    }

    /**
     * Update system properties to zookeeper
     * 
     * @param currentProps
     * @throws CoordinatorClientException
     */
    public void setTargetProperties(Map<String, String> currentProps) throws CoordinatorClientException {
        Map<String, PropertyMetadata> propsMetadata = PropertiesMetadata.getGlobalMetadata();
        // split properties as global, or site specific
        HashMap<String, String> globalProps = new HashMap<String, String>();
        HashMap<String, String> siteProps = new HashMap<String, String>();
        for (Map.Entry<String, String> prop : currentProps.entrySet()) {
            String key = prop.getKey();
            PropertyMetadata metadata = propsMetadata.get(key);
            if (metadata.getSiteSpecific()) {
                siteProps.put(key, prop.getValue());
            } else {
                globalProps.put(key, prop.getValue());
            }
        }
        // update properties to zk
        if (getTargetInfoLock()) {
            try {
                // check we are in stable state if checkState = true specified
                if (!isClusterUpgradable()) {
                    throw APIException.serviceUnavailable.clusterStateNotStable();
                }
                ConfigurationImpl globalCfg = new ConfigurationImpl();
                globalCfg.setId(PropertyInfoExt.TARGET_PROPERTY_ID);
                globalCfg.setKind(PropertyInfoExt.TARGET_PROPERTY);
                PropertyInfoExt globalPropInfo = new PropertyInfoExt(globalProps);
                globalCfg.setConfig(TARGET_INFO, globalPropInfo.encodeAsString());
                _coordinator.persistServiceConfiguration(globalCfg);
                _log.info("target properties changed successfully. target properties {}",
                        globalPropInfo.toString());

                if (siteProps.size() > 0) {
                    setSiteSpecificProperties(siteProps, _coordinator.getSiteId());
                    _log.info("site scope target properties changed successfully. target properties {}",
                            siteProps.toString());
                }
            } catch (Exception e) {
                throw SyssvcException.syssvcExceptions
                        .coordinatorClientError("Failed to set target info. " + e.getMessage());
            } finally {
                releaseTargetVersionLock();
            }
        } else {
            throw SyssvcException.syssvcExceptions
                    .coordinatorClientError("Failed to set target state. Unable to obtain target lock");
        }
    }

    /**
     * Set site specific properties
     * 
     * @param props
     * @param siteId
     */
    public void setSiteSpecificProperties(Map<String, String> props, String siteId) {
        PropertyInfoExt siteScopeInfo = new PropertyInfoExt(props);
        ConfigurationImpl siteCfg = new ConfigurationImpl();
        siteCfg.setId(siteId);
        siteCfg.setKind(PropertyInfoExt.TARGET_PROPERTY);
        siteCfg.setConfig(TARGET_INFO, siteScopeInfo.encodeAsString());
        _coordinator.persistServiceConfiguration(siteCfg);
    }

    /**
     * Get site specific properties
     *
     * @param siteId
     */
    public PropertyInfoExt getSiteSpecificProperties(String siteId) {
        return _coordinator.getTargetInfo(PropertyInfoExt.class, siteId, PropertyInfoExt.TARGET_PROPERTY);
    }

    /**
     * Get all Node Infos.
     * 
     * @param clazz
     * @param nodeIdFilter
     * @return
     * @throws Exception
     */
    public <T extends CoordinatorSerializable> Map<Service, T> getAllNodeInfos(Class<T> clazz, Pattern nodeIdFilter)
            throws Exception {
        return getAllNodeInfos(clazz, nodeIdFilter, _coordinator.getSiteId());
    }

    public <T extends CoordinatorSerializable> Map<Service, T> getAllNodeInfos(Class<T> clazz, Pattern nodeIdFilter,
            String siteId) throws Exception {
        return _coordinator.getAllNodeInfos(clazz, nodeIdFilter, siteId);
    }

    public <T extends CoordinatorSerializable> T getNodeInfo(String node, Class<T> clazz)
            throws CoordinatorClientException {
        try {
            T state = _coordinator.getNodeInfo(_svc, node, clazz);
            if (state != null) {
                return state;
            }
            throw SyssvcException.syssvcExceptions.coordinatorClientError(
                    MessageFormat.format("Failed to get node info for node={0}: Can't find this node.", node));
        } catch (Exception e) {
            throw SyssvcException.syssvcExceptions.coordinatorClientError(
                    MessageFormat.format("Failed to get node info for node={0}: {1}", node, e));
        }
    }

    /**
     * The method to collect/retrieve the upgrade state of all the nodes in the
     * cluster
     * 
     * @return - ClusterInfo
     */
    public ClusterInfo getClusterInfo() {
        return getClusterInfo(_coordinator.getSiteId());
    }

    public ClusterInfo getClusterInfo(String siteIdParam) {
        try {
            String siteId = siteIdParam == null ? _coordinator.getSiteId() : siteIdParam;

            // get target repository and configVersion
            final RepositoryInfo targetRepository = _coordinator.getTargetInfo(RepositoryInfo.class);
            final PropertyInfoExt targetProperty = _coordinator.getTargetInfo(PropertyInfoExt.class);

            // get control nodes' repository and configVersion info
            final Map<Service, RepositoryInfo> controlNodesInfo = getAllNodeInfos(RepositoryInfo.class,
                    CONTROL_NODE_SYSSVC_ID_PATTERN, siteId);
            final Map<Service, ConfigVersion> controlNodesConfigVersions = getAllNodeInfos(ConfigVersion.class,
                    CONTROL_NODE_SYSSVC_ID_PATTERN, siteId);
            Site site = drUtil.getSiteFromLocalVdc(siteId);
            final ClusterInfo.ClusterState controlNodesState = _coordinator.getControlNodesState(siteId,
                    site.getNodeCount());

            // construct cluster information by both control nodes and extra nodes.
            // cluster state is determined both by control nodes' state and extra nodes
            return toClusterInfo(controlNodesState, controlNodesInfo, controlNodesConfigVersions, targetRepository,
                    targetProperty);
        } catch (Exception e) {
            _log.info("Fail to get the cluster information ", e);
            return null;
        }
    }

    /**
     * The method to get target configuration from coordinator
     * 
     * @param kind
     * @param id
     * @return - PropertyInfoExt
     */
    public PropertyInfoExt getConfigFromCoordinator(String kind, String id) {
        Configuration config = _coordinator.queryConfiguration(kind, id);
        if (config != null) {
            String str = config.getConfig(TARGET_INFO);
            return new PropertyInfoExt(new PropertyInfoExt().decodeFromString(str).getAllProperties());
        }

        return null;
    }

    /**
     * Check if the cluster is in a upgradable state
     * A cluster is stably upgradable if both control nodes and extra nodes are upgradable or control node is in initializing state
     * Initializing state is a special state that one control node will set it after deployed.
     * 
     * getExtraNodesState() checks the extra node upgrade lock.
     * 
     * @return
     */
    public boolean isClusterUpgradable() {
        return _coordinator.isClusterUpgradable();
    }

    /**
     * Check if control cluster are upgradable.
     * Note : INITIALIZING is a special state that it only happens when target info is not set
     * 
     * @return
     */
    public boolean isControlClusterUpgradable() {
        ClusterInfo.ClusterState state = _coordinator.getControlNodesState();

        return (state != null && (state.equals(ClusterInfo.ClusterState.STABLE)
                || state.equals(ClusterInfo.ClusterState.INITIALIZING)));
    }

    /**
     * Verify nodes' poweroff state not before the specified poweroff state
     * 
     * @param state compared state
     * @param checkNumOfControlNodes flag to check number of control nodes or not
     * @return true if all nodes are in the state; false otherwise
     * @throws Exception
     */
    public boolean verifyNodesPowerOffStateNotBefore(PowerOffState.State state, boolean checkNumOfControlNodes) {
        try {
            Map<Service, PowerOffState> controlNodesPowerOffState = getAllNodeInfos(PowerOffState.class,
                    CONTROL_NODE_SYSSVC_ID_PATTERN);

            if (checkNumOfControlNodes && controlNodesPowerOffState.size() != getNodeCount()) {
                return false;
            }

            for (Map.Entry<Service, PowerOffState> entry : controlNodesPowerOffState.entrySet()) {
                if (entry.getValue().getPowerOffState().compareTo(state) < 0) {
                    return false;
                }
            }
        } catch (Exception e) {
            _log.info("Fail to get nodes' poweroff state information ", e);
            return false;
        }

        return true;
    }

    /**
     * Get node endpoint from node id
     *
     * @param nodeId
     * @return
     */
    public URI getNodeEndpoint(String nodeId) {
        try {
            List<Service> svcs = _coordinator.locateAllServices(_svc.getName(), _svc.getVersion(), (String) null,
                    null);
            for (Service svc : svcs) {
                if (svc.getNodeId().equals(nodeId)) {
                    return svc.getEndpoint();
                }
            }
        } catch (Exception e) {
            _log.info("Fail to get the cluster information " + e.getMessage());
        }
        return null;
    }

    /**
     * Get node endpoint from syssvc id
     * 
     * @param svcId
     * @return
     */
    public URI getNodeEndpointForSvcId(String svcId) {
        try {
            List<Service> svcs = _coordinator.locateAllServices(_svc.getName(), _svc.getVersion(), null, null);
            for (Service svc : svcs) {
                if (svc.getId().equals(svcId)) {
                    return svc.getEndpoint();
                }
            }
        } catch (Exception e) {
            _log.info("Fail to get the cluster information " + e.getMessage());
        }
        return null;
    }

    /**
     * The method to check if the given node holds the persistent upgrade lock
     * 
     * @param svcId
     *            - the node whose candidacy for lock owner to check
     * @param lockId
     *            - lock id
     * @return True - If node holds the lock false - otherwise
     */
    public boolean hasPersistentLock(String svcId, String lockId) throws Exception {
        try {
            DistributedPersistentLock lock = _coordinator.getSiteLocalPersistentLock(lockId);

            if (lock != null) {
                String lockOwner = lock.getLockOwner();
                if (lockOwner != null && lockOwner.equals(svcId)) {
                    _log.info("Current owner of the {} lock: {} ", lockId, lockOwner);
                    return true;
                }
            }
        } catch (Exception e) {
            _log.error("Can not get {} lock owner ", lockId);
        }
        return false;
    }

    /**
     * The method to try and grab the persistent upgrade lock for given node
     * 
     * @param svcId
     *            - The candidate node who wants to grab the lock
     * @param lockId
     *            - lock id
     * @return True - If node can get the lock False - Otherwise
     */
    public boolean getPersistentLock(String svcId, String lockId) {
        try {
            DistributedPersistentLock lock = _coordinator.getSiteLocalPersistentLock(lockId);
            _log.info("Acquiring the {} lock for {}...", lockId, svcId);
            boolean result = lock.acquireLock(svcId);
            if (result) {
                return true;
            }
        } catch (Exception e) {
            _log.info("Can not get {} lock", lockId, e);
        }
        return false;
    }

    /**
     * The method to release the persistent upgrade lock. It first ensures that
     * the node holds the lock
     * 
     * @param svcId
     *            - which wants to release lock
     * @param lockId
     *            - lock id
     * @return true if succeed; false otherwise
     * @throws InvalidLockOwnerException
     */
    public boolean releasePersistentLock(String svcId, String lockId) throws Exception {
        DistributedPersistentLock lock = _coordinator.getSiteLocalPersistentLock(lockId);
        if (lock != null) {
            String lockOwner = lock.getLockOwner();

            if (lockOwner == null) {
                _log.info("{} lock is not held by any node", lockId);
                return true;
            }

            if (!lockOwner.equals(svcId)) {
                throw SyssvcException.syssvcExceptions.invalidLockOwnerError("Lock owner is " + lockOwner);
            } else {
                boolean result = lock.releaseLock(lockOwner);
                if (result) {
                    _log.info("{} lock released by owner {} successfully", lockId, lockOwner);
                    return true;
                } else {
                    _log.info("{} lock released failed for owner {}", lockId, lockOwner);
                }
            }
        }

        return false;
    }

    /**
     * The method to release the persistent upgrade lock.
     * Any node which calls the method can release the lock.
     * 
     * @param lockId
     *            - lock id
     * @return true if succeed; false otherwise
     * @throws Exception
     */
    public boolean releasePersistentLock(String lockId) throws Exception {
        DistributedPersistentLock lock = _coordinator.getSiteLocalPersistentLock(lockId);
        if (lock != null) {
            String lockOwner = lock.getLockOwner();

            if (lockOwner == null) {
                _log.info("Upgrade lock is not held by any node");
                return true;
            }

            boolean result = lock.releaseLock(lockOwner);
            if (result) {
                _log.info("Upgrade lock {} released successfully", lockId);
                return true;
            } else {
                _log.info("Upgrade lock {} released failed", lockId);
            }
        }

        return false;
    }

    /**
     * The method to identify and return the node which is currently holding the
     * persistent upgrade lock
     * 
     * @param lockId
     *            - lock id
     * @return NodeHandle - for node which holds the lock null - If no node
     *         holds the lock
     */
    public String getUpgradeLockOwner(String lockId) {
        // Special handling for 1.1 when we changed - to _
        return _coordinator.getUpgradeLockOwner(lockId);
    }

    /**
     * The method to check if the input node holds the non-persistent remote download
     * lock
     * 
     * @param svcId
     * 
     * @return True - if given node holds the lock false-otherwise
     */
    public boolean hasRemoteDownloadLock(String svcId) {
        return svcId.equals(getRemoteDownloadLeader());
    }

    /**
     * The method which try and gran the non-persistent remote download lock
     * 
     * @param svcId
     *            - The candidate node which wants to get the lock
     * @return True - If node gets the lock False - Otherwise
     */
    public boolean getRemoteDownloadLock(String svcId) {
        try {
            String leader = this.getRemoteDownloadLeader();
            if (leader != null) {
                if (leader.equals(svcId)) {
                    return true;
                }
            }
            if (_remoteDownloadLock == null) {
                _remoteDownloadLock = _coordinator.getSiteLocalLock(REMOTE_DOWNLOAD_LOCK);
            }
            if (_remoteDownloadLock.acquire(2, TimeUnit.SECONDS)) {
                publishRemoteDownloadLeader(svcId);
                return true;
            } else {
                return false;
            }
        } catch (Exception e) {
            _log.error("Can not get leader lock {}", svcId, e);
            return false;
        }
    }

    /**
     * The method to release the non-persistent remote download lock
     * 
     * @param svcId
     *            - which wants to release the lock
     */
    public void releaseRemoteDownloadLock(String svcId) {
        try {
            if (_remoteDownloadLock != null) {
                _remoteDownloadLock.release();
                publishRemoteDownloadLeader(null);
            }
        } catch (Exception e) {
            _log.error("Can not release leader lock {}", svcId, e);
        }
    }

    /**
     * The method to publish/store the identification of the node which holds
     * the non-persistent lock. When the node releases the lock, it still needs
     * to call this method with "null" argument to clear/reset the lock owner
     * status
     * 
     * @param leader
     *            - Which hold the lock null - when any node release the lock
     */
    private void publishRemoteDownloadLeader(String leader) {
        if (leader != null) {
            _svc.setAttribute(REMOTE_DOWNLOAD_LEADER, leader);
        } else {
            _svc.setAttribute(REMOTE_DOWNLOAD_LEADER, "");
        }
        try {
            _log.info("Publishing leader: {}", leader);
            _beacon.publish();
        } catch (Exception ex) {
            _log.error("Failed to publish the leader", ex);
        }
    }

    /**
     * The method to retrieve the node holding the non-persistent remote download lock
     * 
     * @return id for node which holds the leader lock, null if no node
     *         holds the leader lock
     */
    public String getRemoteDownloadLeader() {
        try {
            String leader = _svc.getAttribute(REMOTE_DOWNLOAD_LEADER);
            if (leader != null && !leader.isEmpty()) {
                return leader;
            }
            List<Service> svcList = _coordinator.locateAllServices(_svc.getName(), _svc.getVersion(), (String) null,
                    null);
            Iterator<Service> svcIter = svcList.iterator();
            while (svcIter.hasNext()) {
                Service svc = svcIter.next();
                leader = svc.getAttribute(REMOTE_DOWNLOAD_LEADER);
                if (leader != null && !leader.isEmpty()) {
                    return leader;
                }
            }
        } catch (Exception ex) {
            _log.error("Failed probing for leader", ex);
        }
        return null;
    }

    /**
     * The method which try and gran the non-persistent target version lock
     * 
     * @return True - If node gets the lock False - Otherwise
     */
    public boolean getTargetInfoLock() {
        try {
            if (_targetLock == null) {
                _targetLock = _coordinator.getLock(TARGET_INFO_LOCK);
            }
            _targetLock.acquire();
        } catch (Exception e) {
            _log.error("Can not get target version lock", e);
            return false;
        }
        return true;
    }

    /**
     * The method to release the non-persistent target version lock
     * 
     */
    public void releaseTargetVersionLock() {
        try {
            if (_targetLock != null) {
                _targetLock.release();
            }
        } catch (Exception e) {
            _log.error("Can not release target version lock", e);
        }
    }

    private List<Service> getAllServices(String siteId) throws Exception {
        return _coordinator.locateAllServices(siteId, _svc.getName(), _svc.getVersion(), null, null);
    }

    private List<Service> getAllServices() throws Exception {
        return getAllServices(_coordinator.getSiteId());
    }

    /**
     * The utility method to find all the nodes which are available in the
     * cluster. When each node starts, the system management service on each
     * node, registers themselves with the coordninator. This method iterates
     * over that registration namespace to find all the nodes in the cluster
     * 
     * @return List of NodeHandles for all nodes in the cluster
     */
    public List<String> getAllNodes() {
        return getAllNodes(_coordinator.getSiteId());
    }

    public List<String> getAllNodes(String siteIdParam) {
        String siteId = siteIdParam == null ? _coordinator.getSiteId() : siteIdParam;
        List<String> nodeIds = new ArrayList<>();
        try {
            List<Service> svcs = getAllServices(siteId);
            for (Service svc : svcs) {
                final String nodeId = svc.getId();
                if (nodeId != null) {
                    nodeIds.add(nodeId);
                }
            }
        } catch (Exception e) {
            _log.error("getAllNodes(): Failed to get all nodeIds: {}", e);
        }
        _log.info("getAllNodes(): Node Ids: {}", Strings.repr(nodeIds));
        return nodeIds;
    }

    /**
     * The utility method to find the corresponding nodeIds for the provided
     * node names. When each node starts, the system management service on each
     * node, registers themselves with the coordninator. This method iterates
     * over that registration namespace to find the nodes in the cluster
     *
     * @return NodeHandle for mathing node in the cluster
     */
    public List<String> getMatchingNodeIds(List<String> nodeNames) {
        List<String> nodeIds = new ArrayList<String>();

        //if use short name is enabled allow matching short name to nodeId
        boolean useShortName = Boolean.parseBoolean(getPropertyInfo().getProperty("use_short_node_name"));

        try {
            List<Service> svcs = getAllServices();
            for (Service svc : svcs) {
                if (useShortName) {
                    if (nodeNames.contains(svc.getNodeName())
                            || nodeNames.contains(svc.getNodeName().split("\\.")[0])) {
                        final String nodeId = svc.getNodeId();
                        nodeIds.add(nodeId);
                    }
                } else {
                    if (nodeNames.contains(svc.getNodeName())) {
                        final String nodeId = svc.getNodeId();
                        nodeIds.add(nodeId);
                    }
                }
            }
        } catch (Exception e) {
            _log.error("getMatchingNodeIds(): Failed to get all nodeIds for nodeNames {}: {}", nodeNames, e);
        }
        _log.info("getMatchingNodeIds(): Node Ids: {}", Strings.repr(nodeIds));
        return nodeIds;
    }

    /**
     * The utility method to find the corresponding nodeId for the provided
     * node name. When each node starts, the system management service on each
     * node, registers themselves with the coordninator. This method iterates
     * over that registration namespace to find the node in the cluster
     *
     * @return NodeHandle for mathing node in the cluster
     */
    public String getMatchingNodeId(String nodeName) {
        String nodeId = null;

        //if use short name is enabled allow matching short name to nodeId
        boolean useShortName = Boolean.parseBoolean(getPropertyInfo().getProperty("use_short_node_name"));

        try {
            List<Service> svcs = getAllServices();
            for (Service svc : svcs) {
                if (useShortName) {
                    if (nodeName.equals(svc.getNodeName()) || nodeName.equals(svc.getNodeName().split("\\.")[0])) {
                        nodeId = svc.getNodeId();
                    }
                } else {
                    if (nodeName.equals(svc.getNodeName())) {
                        nodeId = svc.getNodeId();
                    }
                }
            }
        } catch (Exception e) {
            _log.error("getMatchingNodeId(): Failed to get all nodes while searching for {}: {}", nodeName, e);
        }

        if (nodeId == null) {
            _log.error("getMatchingNodeId(): Failed to get nodeId for nodeName {}", nodeName);
        } else {
            _log.info("getMatchingNodeId(): Node Id: {}", nodeId);
        }

        return nodeId;
    }

    /**
     * The utility method to combine list of nodeIds and list of nodeNames into a single list of nodeIds
     * Duplicate nodes are removed.
     *
     * @return nodeIds for mathing nodeNames and nodeIds combined
     */
    public List<String> combineNodeNamesWithNodeIds(List<String> nodeNames, List<String> nodeIds) {
        if (!nodeNames.isEmpty()) {
            //get nodeIds for node names
            List<String> matchedIds = getMatchingNodeIds(nodeNames);

            if (matchedIds.size() != nodeNames.size()) {
                throw APIException.badRequests.parameterIsNotValid("node name");
            }

            //join list with nodeIds passed
            for (String id : matchedIds) {
                if (!nodeIds.contains(id))
                    nodeIds.add(id);
            }
        }

        return nodeIds;
    }

    /**
     * The utility method to find the corresponding nodeId for the provided
     * node name. When each node starts, the system management service on each
     * node, registers themselves with the coordninator. This method iterates
     * over that registration namespace to find the node in the cluster
     *
     * @return NodeHandle for mathing node in the cluster
     */
    public String getMatchingNodeName(String nodeId) {
        String nodeName = null;
        try {
            List<Service> svcs = getAllServices();
            for (Service svc : svcs) {
                if (nodeId.equals(svc.getNodeId())) {
                    nodeName = svc.getNodeName();
                    break;
                }
            }
        } catch (Exception e) {
            _log.error("getMatchingNodeName(): Failed to get all nodes while searching for {}: {}", nodeId, e);
        }

        if (nodeId == null) {
            _log.error("getMatchingNodeName(): Failed to get Node Name for Node Id {}", nodeId);
        } else {
            _log.info("getMatchingNodeName(): Node Name: {}", nodeName);
        }

        return nodeName;
    }

    /**
     * The utility method to get all controller nodes ids
     * Converts nodeCount to list of nodeIds regardless of availability
     *
     * @return List of nodeIds for all nodes in the cluster(get external id like vipr2)
     */
    public ArrayList<String> getAllNodeIds() {
        ArrayList<String> fullList = new ArrayList<String>();

        if (!getMyNodeId().equals("standalone")) {
            for (int i = 1; i <= getNodeCount(); i++) {
                fullList.add(ProductName.getName() + i);
            }
        } else {
            fullList.add("standalone");
        }

        _log.info("getAllNodeIds(): Node Ids: {}", Strings.repr(fullList));
        return fullList;
    }

    /**
     * The utility method to find all the controller nodes which are not available in the
     * cluster(they might be powered off or the syssvc is off). When each node starts, the system management service on each
     * node, registers themselves with the coordninator. This method iterates
     * over that registration namespace to find all the nodes in the cluster
     * and thus find out the nodes that are not available.
     * 
     * @return List of node id for all unavailable nodes in the cluster(get external id like vipr2)
     */
    public ArrayList<String> getUnavailableControllerNodes() {
        ArrayList<String> fullList = new ArrayList<String>();
        ArrayList<String> availableNodes = (ArrayList<String>) getAllNodes();

        if (getNodeCount() > 1) {
            for (int i = 1; i <= getNodeCount(); i++) {
                if (!availableNodes.contains("syssvc-" + i)) {
                    fullList.add(ProductName.getName() + i);
                }
            }
        }

        _log.info("getUnavailableControllerNodes(): Node Ids: {}", Strings.repr(fullList));
        return fullList;
    }

    public RepositoryInfo getRepositoryInfo(String node) throws CoordinatorClientException {
        return getNodeInfo(node, RepositoryInfo.class);
    }

    /**
     * The method to retrieve the available software versions for the given node
     * 
     * @param node
     *            - for which to find the available versions
     * @return List of available software versions on given node
     */
    public List<SoftwareVersion> getVersions(String node) throws CoordinatorClientException {
        return getNodeInfo(node, RepositoryInfo.class).getVersions();
    }

    /**
     * Get id for "this" node
     */
    public String getMyNodeId() {
        return _myNodeId;
    }

    /**
     * Get name for "this" node
     */
    public String getMyNodeName() {
        return _myNodeName;
    }

    /**
     * Get id for "this" node
     */
    public String getMySvcId() {
        return mySvcId;
    }

    /**
     * The method which try and grant the non-persistent target version lock
     * 
     * @return True - If node gets the lock False - Otherwise
     */
    public boolean getNewVersionLock() {
        try {
            if (_newVersionLock == null) {
                _newVersionLock = _coordinator.getSiteLocalLock(NEW_VERSIONS_LOCK);
            }
            _newVersionLock.acquire();
        } catch (Exception e) {
            _log.error("Can not get new version lock", e);
            return false;
        }
        return true;
    }

    /**
     * The method to release the non-persistent target version lock
     * 
     */
    public void releaseNewVersionLock() {
        try {
            if (_newVersionLock != null) {
                _newVersionLock.release();
            }
        } catch (Exception e) {
            _log.error("Can not release new version lock", e);
        }
    }

    /**
     * Check to see if product is licensed for the specified license type.
     * 
     * @return true if the product is licensed for the specified type
     */

    public boolean isProductLicensed(LicenseType licenseType) {
        return _coordinator.isStorageProductLicensed(licenseType);
    }

    /**
     * Get list of services for a Service.
     * 
     * @param service
     * @param version
     * @param tag
     * @param endpointKey
     * @return
     */
    public List<Service> locateAllServices(String service, String version, String tag, String endpointKey) {
        return _coordinator.locateAllServices(service, version, tag, endpointKey);
    }

    /**
     * Check if connection to cluster's zookeeper is active
     * 
     * @return
     */
    public boolean isConnected() {
        return _coordinator.isConnected();
    }

    /**
     * Get ip address from vipr end point
     * If IPV6 is configured, end point is in the format of https://[ipv6_addr]:port
     * Otherwise, the end point is https://ipv4_addr:port
     * 
     * @param endpointUri
     * @return
     */
    public String getIPAddrFromUri(URI endpointUri) {
        if (endpointUri != null) {
            String host = endpointUri.getHost();
            if (host.startsWith("[")) {
                // ipv6 address
                return host.substring(1, host.indexOf("]", 1));
            }
            return host;
        }
        return null;
    }

    public boolean isDBServiceStarted() {

        List<Service> svcs = null;
        try {
            svcs = locateAllServices(DBSVC_NAME, _coordinator.getTargetDbSchemaVersion(), (String) null, null);
        } catch (CoordinatorException e) {
            return false;
        }
        String dbSvcId = "db" + mySvcId.substring(mySvcId.lastIndexOf("-"));
        for (Service svc : svcs) {
            if (svc.getId().equals(dbSvcId)) {
                return true;
            }
        }
        return false;
    }

    public boolean isDBMigrationDone() {
        return statusChecker.isMigrationDone();
    }

    public String getCurrentDbSchemaVersion() {
        return _coordinator.getCurrentDbSchemaVersion();
    }

    /**
     * Get node seq from given svc id like syssvc-1, db-1 etc
     * 
     * @param svcId svc id with format syssvc-1, syssvc-2 etc
     * @return node seq in format db-1, db-2 etc
     */
    private String getNodeSeqFromSvcId(String svcId) {
        return svcId.substring(svcId.lastIndexOf("-") + 1);
    }

    /**
     * Get a set with good nodes with available service and version
     * 
     * @param svcName - service name
     * @param version - service version
     * @return a Set instance with good node seq id(1, 2, or 3 etc).
     */
    private Set<String> getGoodNodes(String svcName, String version) {
        Set<String> goodNodes = new HashSet<String>();
        List<Service> svcs = _coordinator.locateAllServices(svcName, version, (String) null, null);
        for (Service svc : svcs) {
            String svcId = svc.getId();
            goodNodes.add(getNodeSeqFromSvcId(svcId));
        }
        return goodNodes;
    }

    /**
     * Check if dbsvc/geodbsvc beacon is good or not
     * 
     * @param svcName either Constants.DBSVC_NAME or Constants.GEODBSVC_NAME
     * @return
     */
    public boolean isMyDbSvcGood(String svcName) {
        String nodeSeq = getNodeSeqFromSvcId(mySvcId);
        String dbVersion = _coordinator.getTargetDbSchemaVersion();
        return getGoodNodes(svcName, dbVersion).contains(nodeSeq);
    }

    /**
     * Get a syssvc endpoint URI for a node where has good dbsvc/geodbsvc
     */
    public URI getNodeEndpointWithGoodDbsvc() {
        try {
            String dbVersion = _coordinator.getTargetDbSchemaVersion();
            Set<String> localDbSvcState = getGoodNodes(Constants.DBSVC_NAME, dbVersion);
            Set<String> geoDbSvcState = getGoodNodes(Constants.GEODBSVC_NAME, dbVersion);

            List<Service> sysSvcs = _coordinator.locateAllServices(_svc.getName(), _svc.getVersion(), (String) null,
                    null);
            for (Service sysSvc : sysSvcs) {
                String nodeSeq = getNodeSeqFromSvcId(sysSvc.getId());
                if (localDbSvcState.contains(nodeSeq) && geoDbSvcState.contains(nodeSeq)) {
                    return sysSvc.getEndpoint();
                }
                _log.info("Syssvc " + nodeSeq + " is ignored for its dbsvc state: "
                        + localDbSvcState.contains(nodeSeq) + ", " + geoDbSvcState.contains(nodeSeq));
            }

        } catch (Exception e) {
            _log.info("Fail to get the cluster information " + e.getMessage());
        }
        return null;
    }

    /**
     * Initialization method.
     * On standby site, start a thread to monitor local coordinatorsvc status
     * On active site, start a thread to monitor db quorum of each standby site
     */
    public void start() {
        if (drUtil.isStandby()) {
            _log.info("Start monitoring local coordinatorsvc status on standby site");
            ScheduledExecutorService exe = Executors.newScheduledThreadPool(1, new ThreadFactory() {
                @Override
                public Thread newThread(Runnable r) {
                    return new Thread(r, "CoordinatorsvcMonitor");
                }
            });
            // delay for a period of time to start the monitor. For DR switchover, we stop original active, then start new active. 
            // So the original active may not see the new active immediately after reboot
            exe.scheduleAtFixedRate(coordinatorSvcMonitor, 3 * COODINATOR_MONITORING_INTERVAL,
                    COODINATOR_MONITORING_INTERVAL, TimeUnit.SECONDS);
        } else {
            _log.info("Start monitoring db quorum on all standby sites");
            ScheduledExecutorService exe = Executors.newScheduledThreadPool(1, new ThreadFactory() {
                @Override
                public Thread newThread(Runnable r) {
                    return new Thread(r, "DbsvcQuorumMonitor");
                }
            });
            exe.scheduleAtFixedRate(new DbsvcQuorumMonitor(getMyNodeId(), _coordinator, dbCommonInfo), 0,
                    DB_MONITORING_INTERVAL, TimeUnit.SECONDS);
        }
    }

    public void stopCoordinatorSvcMonitor() {
        stopCoordinatorSvcMonitor = true;
        _log.info("coordinatorsvc monitor thread stopped");
    }

    public void startCoordinatorSvcMonitor() {
        stopCoordinatorSvcMonitor = false;
        _log.info("coordinatorsvc monitor thread started");
    }

    /**
     * Monitor local coordinatorsvc on standby site
     */
    private Runnable coordinatorSvcMonitor = new Runnable() {
        private String initZkMode; // ZK mode during syssvc startup

        public void run() {
            if (stopCoordinatorSvcMonitor) {
                return;
            }

            try {
                checkLocalZKMode();
            } catch (Exception e) {
                //try catch exception to make sure next scheduled run can be launched.
                _log.error("Error occurs when monitor local zookeeper mode", e);
            }
        }

        private void checkLocalZKMode() {
            try {
                String state = drUtil.getLocalCoordinatorMode(getMyNodeId());
                if (initZkMode == null) {
                    initZkMode = state;
                }

                _log.info("Local zookeeper mode: {} ", state);

                if (isVirtualIPHolder()) {
                    _log.info("Local node has vip, monitor other node zk states");
                    checkLocalSiteZKModes();
                }

                if (DrUtil.ZOOKEEPER_MODE_LEADER.equals(state) || DrUtil.ZOOKEEPER_MODE_FOLLOWER.equals(state)
                        || DrUtil.ZOOKEEPER_MODE_STANDALONE.equals(state)) {
                    // node is in participant mode, update the local site state accordingly
                    checkAndUpdateLocalSiteState();

                    // check if active site is back
                    if (isActiveSiteHealthy()) {
                        _log.info("Active site is back. Reconfig coordinatorsvc to observer mode");
                        reconnectZKToActiveSite();
                    } else {
                        _log.info("Active site is unavailable. Keep coordinatorsvc in current state {}", state);
                    }
                }
            } catch (Exception e) {
                _log.error("Exception while monitoring node state: ", e);
            }
        }

        /**
         * Update the standby site state when the active site is lost.
         * if SYNCED, change it to PAUSED.
         * if SYNCING/RESUMING/ADDING, change it to ERROR since it will never finish without the active site.
         */
        private void checkAndUpdateLocalSiteState() {
            Site localSite = drUtil.getLocalSite();

            if (SiteState.STANDBY_SYNCED.equals(localSite.getState())) {
                _log.info("Updating local site from {} to STANDBY_PAUSED since active is unreachable",
                        localSite.getState());
                localSite.setState(SiteState.STANDBY_PAUSED);
                _coordinator.persistServiceConfiguration(localSite.toConfiguration());
            } else if (SiteState.STANDBY_SYNCING.equals(localSite.getState())
                    || SiteState.STANDBY_RESUMING.equals(localSite.getState())
                    || SiteState.STANDBY_ADDING.equals(localSite.getState())) {
                _log.info("Updating local site from {} to STANDBY_ERROR since active is unreachable",
                        localSite.getState());

                localSite.setLastState(localSite.getState());
                localSite.setState(SiteState.STANDBY_ERROR);
                _coordinator.persistServiceConfiguration(localSite.toConfiguration());
            }
        }

        /**
         * check all local nodes are in correct zk mode
         */
        private void checkLocalSiteZKModes() {
            List<String> readOnlyNodes = new ArrayList<>();
            List<String> observerNodes = new ArrayList<>();
            int numOnline = 0;

            for (String node : getAllNodeIds()) {

                String nodeState = drUtil.getLocalCoordinatorMode(node);
                if (nodeState == null) {
                    _log.debug("State for {}: null", node);
                    continue;
                }

                else if (DrUtil.ZOOKEEPER_MODE_READONLY.equals(nodeState)) {
                    // Found another node in read only
                    readOnlyNodes.add(node);
                } else if (DrUtil.ZOOKEEPER_MODE_OBSERVER.equals(nodeState)) {
                    // Found another node in observer
                    observerNodes.add(node);
                }
                _log.debug("State for {}: {}", node, nodeState);
                numOnline++;
            }

            int numParticipants = numOnline - readOnlyNodes.size() - observerNodes.size();
            int quorum = getNodeCount() / 2 + 1;

            _log.debug("Observer nodes: {}", observerNodes.size());
            _log.debug("Read Only nodes: {}", readOnlyNodes.size());
            _log.debug("Participant nodes: {}", numParticipants);
            _log.debug("nodes Online: {}", numOnline);

            // if there is a participant we need to reconfigure or it will be stuck there
            // if there are only participants no need to reconfigure
            // if there are only read only nodes and we have quorum we need to reconfigure
            if (0 < numParticipants && numParticipants < numOnline) {
                _log.info("Nodes must have consistent zk mode. Reconfiguring all nodes to participant: {}",
                        observerNodes.addAll(readOnlyNodes));
                reconfigZKToWritable(observerNodes, readOnlyNodes);
            } else if (readOnlyNodes.size() == numOnline && numOnline >= quorum) {
                _log.info("A quorum of nodes are read-only, Reconfiguring nodes to participant: {}", readOnlyNodes);
                reconfigZKToWritable(observerNodes, readOnlyNodes);
            }
        }

        /**
         * Reconnect to zookeeper in active site. 
         */
        private void reconnectZKToActiveSite() {
            DistributedDoubleBarrier barrier = null;
            barrier = _coordinator.getDistributedDoubleBarrier(DR_SWITCH_TO_ZK_OBSERVER_BARRIER, getNodeCount());
            LocalRepository localRepository = LocalRepository.getInstance();
            try {
                boolean allEntered = barrier.enter(DR_SWITCH_BARRIER_TIMEOUT, TimeUnit.SECONDS);
                if (allEntered) {
                    try {
                        localRepository.reconfigCoordinator("observer");
                    } finally {
                        leaveZKDoubleBarrier(barrier, DR_SWITCH_TO_ZK_OBSERVER_BARRIER);
                    }
                    localRepository.restartCoordinator("observer");
                } else {
                    _log.warn("All nodes unable to enter barrier {}. Try again later",
                            DR_SWITCH_TO_ZK_OBSERVER_BARRIER);
                    leaveZKDoubleBarrier(barrier, DR_SWITCH_TO_ZK_OBSERVER_BARRIER);
                }
            } catch (Exception ex) {
                _log.warn("Unexpected errors during switching back to zk observer. Try again later. {}", ex);
            }
        }
    };

    /**
     * reconfigure ZooKeeper to participant mode within the local site
     *
     * @param barrier barrier to leave
     * @param path for logging barrier
     * @return true for successful, false for success unknown
     */
    private void leaveZKDoubleBarrier(DistributedDoubleBarrier barrier, String path) {
        try {
            _log.info("Leaving the barrier {}", path);
            boolean leaved = barrier.leave(DR_SWITCH_BARRIER_TIMEOUT, TimeUnit.SECONDS);
            if (!leaved) {
                _log.warn("Unable to leave barrier for {}", path);
            }
        } catch (Exception ex) {
            _log.warn("Unexpected errors during leaving barrier", ex);
        }
    }

    /**
     * reconfigure ZooKeeper to participant mode within the local site
     */
    public void reconfigZKToWritable() {
        _log.info(
                "Standby is running in read-only mode due to connection loss with active site. Reconfig coordinatorsvc to writable");
        try {
            LocalRepository localRepository = LocalRepository.getInstance();
            localRepository.reconfigCoordinator("participant");
            localRepository.restartCoordinator("participant");
        } catch (Exception ex) {
            _log.warn("Unexpected errors during switching back to zk observer. Try again later. {}", ex.toString());
        }
    }

    /**
     * reconfigure ZooKeeper to participant mode within the local site
     *
     * @param observerNodes to be reconfigured
     * @param readOnlyNodes to be reconfigured
     */
    public void reconfigZKToWritable(List<String> observerNodes, List<String> readOnlyNodes) {
        _log.info("Standby is running in read-only mode due to connection loss with active site. "
                + "Reconfig coordinatorsvc of all nodes to writable");

        try {
            boolean reconfigLocal = false;

            // if zk is switched from observer mode to participant, reload syssvc
            for (String node : observerNodes) {
                //The local node cannot reboot itself before others
                if (node.equals(getMyNodeId())) {
                    reconfigLocal = true;
                    continue;
                }
                LocalRepository localRepository = LocalRepository.getInstance();
                localRepository.remoteReconfigCoordinator(node, "participant");
                localRepository.remoteRestartCoordinator(node, "participant");
            }

            for (String node : readOnlyNodes) {
                //The local node cannot reboot itself before others
                if (node.equals(getMyNodeId())) {
                    reconfigLocal = true;
                    continue;
                }
                LocalRepository localRepository = LocalRepository.getInstance();
                localRepository.remoteReconfigCoordinator(node, "participant");
                localRepository.remoteRestartCoordinator(node, "participant");
            }

            //reconfigure local node last
            if (reconfigLocal) {
                reconfigZKToWritable();
            }

        } catch (Exception ex) {
            _log.warn("Unexpected errors during switching back to zk observer. Try again later. {}", ex.toString());
        }
    }

    /**
      * Get current ZK connection state
      * @return state
      * @throws Exception
      */
    public States getConnectionState() throws Exception {
        return ((CoordinatorClientImpl) getCoordinatorClient()).getZkConnection().curator().getZookeeperClient()
                .getZooKeeper().getState();
    }

    /**
     * Get the nodes list on which specific service are available
     */
    public List<String> getServiceAvailableNodes(String serviceName) {
        List<String> availableNodes = new ArrayList<String>();
        try {
            String dbVersion = _coordinator.getTargetDbSchemaVersion();
            Set<String> ids = getGoodNodes(serviceName, dbVersion);
            for (String id : ids) {
                availableNodes.add(ProductName.getName() + id);
            }
        } catch (Exception ex) {
            _log.info("Check service({}) beacon error", serviceName, ex);
        }
        _log.info("Get available nodes by check {}: {}", serviceName, availableNodes);
        return availableNodes;
    }

    public void blockUntilZookeeperIsWritableConnected(long sleepInterval) {
        while (true) {
            try {
                States state = getConnectionState();
                if (state.equals(States.CONNECTED))
                    return;

                _log.info("ZK connection state is {}, wait for connected", state);
            } catch (Exception e) {
                _log.error("Can't get Zk state {}", e);
            }

            try {
                Thread.sleep(sleepInterval);
            } catch (InterruptedException e) {
                //Ingore
            }
        }
    }

    /**
     * Check if DR active site is stable and there is ZK leader in active site
     *
     * @return true for stable, otherwise false
     */
    public boolean isActiveSiteHealthy() {
        DrUtil drUtil = new DrUtil(_coordinator);
        String activeSiteId = drUtil.getActiveSite().getUuid();

        boolean isActiveSiteLeaderAlive = false;
        boolean isActiveSiteStable = false;

        if (StringUtils.isEmpty(activeSiteId) || drUtil.getLocalSite().getUuid().equals(activeSiteId)) {
            _log.info("Can't find active site id or local site is active, set active healthy as false");
        } else {
            Site activeSite = drUtil.getSiteFromLocalVdc(activeSiteId);
            isActiveSiteLeaderAlive = isActiveSiteZKLeaderAlive(activeSite);
            isActiveSiteStable = isActiveSiteStable(activeSite);
            _log.info("Active site ZK is alive: {}, active site stable is :{}", isActiveSiteLeaderAlive,
                    isActiveSiteStable);
        }

        SiteMonitorResult monitorResult = _coordinator.getTargetInfo(SiteMonitorResult.class);
        if (monitorResult == null) {
            monitorResult = new SiteMonitorResult();
        }
        monitorResult.setActiveSiteLeaderAlive(isActiveSiteLeaderAlive);
        monitorResult.setActiveSiteStable(isActiveSiteStable);
        _coordinator.setTargetInfo(monitorResult);

        return isActiveSiteLeaderAlive && isActiveSiteStable;
    }

    public boolean isActiveSiteZKLeaderAlive(Site activeSite) {
        // Check alive coordinatorsvc on active site
        Collection<String> nodeAddrList = activeSite.getHostIPv4AddressMap().values();
        if (!activeSite.isUsingIpv4()) {
            nodeAddrList = activeSite.getHostIPv6AddressMap().values();
        }

        if (nodeAddrList.size() > 1) {
            boolean isLeaderAlive = false;
            for (String nodeAddr : nodeAddrList) {
                if (isZookeeperLeader(nodeAddr, ZK_LEADER_ELECTION_PORT)) {
                    isLeaderAlive = true;
                    break;
                }
            }
            if (!isLeaderAlive) {
                _log.info("No zookeeper leader alive on active site.");
                return false;
            }
        } else { // standalone
            String nodeAddr = nodeAddrList.iterator().next();
            // check both election ports on the active site.
            if (!isZookeeperLeader(nodeAddr, ZK_LEADER_ELECTION_PORT)
                    && !isZookeeperLeader(nodeAddr, DUAL_ZK_LEADER_ELECTION_PORT)) {
                _log.info("No zookeeper leader alive on active site.");
                return false;
            }
        }

        return true;
    }

    public boolean isActiveSiteStable(Site activeSite) {
        // check if cluster state is stable
        String vip = activeSite.getVipEndPoint();
        int port = _svc.getEndpoint().getPort();
        String baseNodeURL = String.format(SysClientFactory.BASE_URL_FORMAT, vip, port);
        try {
            SysClient client = SysClientFactory.getSysClient(URI.create(baseNodeURL),
                    CHECK_ACTIVE_SITE_STABLE_READ_TIMEOUT_MS, CHECK_ACTIVE_SITE_STABLE_CONNECT_TIMEOUT_MS);
            ClusterInfo clusterInfo = client.get(URI.create(URI_INTERNAL_GET_CLUSTER_INFO), ClusterInfo.class,
                    null);
            _log.info("Get cluster info from active site {}", clusterInfo.getCurrentState());
            if (ClusterState.STABLE.equals(ClusterState.valueOf(clusterInfo.getCurrentState()))) {
                return true;
            }
        } catch (Exception ex) {
            _log.warn("Encounter error when call Sys API on active site{} ", ex.toString());
        }
        return false;
    }

    /**
     * Zookeeper leader nodes listens on 2888(see coordinator-var.xml) for follower/observers.
     *  We depends on this behaviour to check if leader election is started
     *
     * @param nodeIP
     * @param port
     * @return
     */
    private boolean isZookeeperLeader(String nodeIP, int port) {
        try {
            Socket sock = new Socket();
            sock.connect(new InetSocketAddress(nodeIP, port), 10000); // 10 seconds timeout
            sock.close();
            return true;
        } catch (IOException ex) {
            _log.warn("Unexpected IO errors when checking local coordinator state. {}", ex.toString());
        }
        return false;
    }

    /**
     * check whether current node is virtual IP holder
     */
    public boolean isVirtualIPHolder() {
        try {
            //standby node with vip will monitor all node states
            InetAddress vip = InetAddress.getByName(getVip());
            return (NetworkInterface.getByInetAddress(vip) != null);
        } catch (Exception e) {
            _log.error("Error occured while determining leading node for monitor", e);
            return false;
        }
    }
}