Example usage for org.apache.zookeeper Watcher Watcher

List of usage examples for org.apache.zookeeper Watcher Watcher


In this page you can find the example usage for org.apache.zookeeper Watcher Watcher.



Source Link


From source file:org.apache.accumulo.fate.zookeeper.ZooLock.java

License:Apache License

private synchronized void lockAsync(final String myLock, final AsyncLockWatcher lw)
        throws KeeperException, InterruptedException {

    if (asyncLock == null) {
        throw new IllegalStateException("Called lockAsync() when asyncLock == null");
    }//from  ww  w. j a v  a  2s .  c  om

    List<String> children = zooKeeper.getChildren(path);

    if (!children.contains(myLock)) {
        throw new RuntimeException("Lock attempt ephemeral node no longer exist " + myLock);

    if (log.isTraceEnabled()) {
        log.trace("Candidate lock nodes");
        for (String child : children) {
            log.trace("- " + child);

    if (children.get(0).equals(myLock)) {
        log.trace("First candidate is my lock, acquiring");
        if (!watchingParent) {
            throw new IllegalStateException("Can not acquire lock, no longer watching parent : " + path);
        this.lockWatcher = lw;
        this.lock = myLock;
        asyncLock = null;
        lockWasAcquired = true;
    String prev = null;
    for (String child : children) {
        if (child.equals(myLock)) {

        prev = child;

    final String lockToWatch = path + "/" + prev;
    log.trace("Establishing watch on " + lockToWatch);
    Stat stat = zooKeeper.getStatus(lockToWatch, new Watcher() {

        public void process(WatchedEvent event) {
            if (log.isTraceEnabled()) {
                log.trace("Processing event:");
                log.trace("- type  " + event.getType());
                log.trace("- path  " + event.getPath());
                log.trace("- state " + event.getState());
            boolean renew = true;
            if (event.getType() == EventType.NodeDeleted && event.getPath().equals(lockToWatch)) {
                log.trace("Detected deletion of " + lockToWatch + ", attempting to acquire lock");
                synchronized (ZooLock.this) {
                    try {
                        if (asyncLock != null) {
                            lockAsync(myLock, lw);
                        } else if (log.isTraceEnabled()) {
                            log.trace("While waiting for another lock " + lockToWatch + " " + myLock
                                    + " was deleted");
                    } catch (Exception e) {
                        if (lock == null) {
                            // have not acquired lock yet
                renew = false;

            if (event.getState() == KeeperState.Expired || event.getState() == KeeperState.Disconnected) {
                synchronized (ZooLock.this) {
                    if (lock == null) {
                        lw.failedToAcquireLock(new Exception("Zookeeper Session expired / disconnected"));
                renew = false;
            if (renew) {
                log.trace("Renewing watch on " + lockToWatch);
                try {
                    Stat restat = zooKeeper.getStatus(lockToWatch, this);
                    if (restat == null) {
                        lockAsync(myLock, lw);
                } catch (KeeperException e) {
                    lw.failedToAcquireLock(new Exception("Failed to renew watch on other master node"));
                } catch (InterruptedException e) {
                    lw.failedToAcquireLock(new Exception("Failed to renew watch on other master node"));


    if (stat == null)
        lockAsync(myLock, lw);

From source file:org.apache.accumulo.fate.zookeeper.ZooLock.java

License:Apache License

public synchronized void lockAsync(final AsyncLockWatcher lw, byte data[]) {

    if (lockWatcher != null || lock != null || asyncLock != null) {
        throw new IllegalStateException();
    }//from   ww  w . j ava  2  s.c  om

    lockWasAcquired = false;

    try {
        final String asyncLockPath = zooKeeper.putEphemeralSequential(path + "/" + LOCK_PREFIX, data);
        log.trace("Ephemeral node " + asyncLockPath + " created");
        Stat stat = zooKeeper.getStatus(asyncLockPath, new Watcher() {

            private void failedToAcquireLock() {
                lw.failedToAcquireLock(new Exception("Lock deleted before acquired"));
                asyncLock = null;

            public void process(WatchedEvent event) {
                synchronized (ZooLock.this) {
                    if (lock != null && event.getType() == EventType.NodeDeleted
                            && event.getPath().equals(path + "/" + lock)) {
                    } else if (asyncLock != null && event.getType() == EventType.NodeDeleted
                            && event.getPath().equals(path + "/" + asyncLock)) {
                    } else if (event.getState() != KeeperState.Disconnected
                            && event.getState() != KeeperState.Expired && (lock != null || asyncLock != null)) {
                        log.debug("Unexpected event watching lock node " + event + " " + asyncLockPath);
                        try {
                            Stat stat2 = zooKeeper.getStatus(asyncLockPath, this);
                            if (stat2 == null) {
                                if (lock != null)
                                else if (asyncLock != null)
                        } catch (Throwable e) {
                            log.error("Failed to stat lock node " + asyncLockPath, e);


        if (stat == null) {
            lw.failedToAcquireLock(new Exception("Lock does not exist after create"));

        asyncLock = asyncLockPath.substring(path.length() + 1);

        lockAsync(asyncLock, lw);

    } catch (KeeperException e) {
    } catch (InterruptedException e) {

From source file:org.apache.accumulo.master.Master.java

License:Apache License

public void run() throws IOException, InterruptedException, KeeperException {
    final String zroot = ZooUtil.getRoot(getInstance());

    // ACCUMULO-4424 Put up the Thrift servers before getting the lock as a sign of process health when a hot-standby
    ///*from  w  ww .j  ava2s  . c o m*/
    // Start the Master's Client service
    clientHandler = new MasterClientServiceHandler(this);
    // Ensure that calls before the master gets the lock fail
    Iface haProxy = HighlyAvailableServiceWrapper.service(clientHandler, this);
    Iface rpcProxy = RpcWrapper.service(haProxy, new Processor<Iface>(clientHandler));
    final Processor<Iface> processor;
    if (ThriftServerType.SASL == getThriftServerType()) {
        Iface tcredsProxy = TCredentialsUpdatingWrapper.service(rpcProxy, clientHandler.getClass(),
        processor = new Processor<>(tcredsProxy);
    } else {
        processor = new Processor<>(rpcProxy);
    ServerAddress sa = TServerUtils.startServer(this, hostname, Property.MASTER_CLIENTPORT, processor, "Master",
            "Master Client Service Handler", null, Property.MASTER_MINTHREADS, Property.MASTER_THREADCHECK,
    clientService = sa.server;
    log.info("Started Master client service at {}", sa.address);

    // Start the replication coordinator which assigns tservers to service replication requests
    MasterReplicationCoordinator impl = new MasterReplicationCoordinator(this);
    ReplicationCoordinator.Iface haReplicationProxy = HighlyAvailableServiceWrapper.service(impl, this);
    ReplicationCoordinator.Processor<ReplicationCoordinator.Iface> replicationCoordinatorProcessor = new ReplicationCoordinator.Processor<>(
            RpcWrapper.service(impl, new ReplicationCoordinator.Processor<>(haReplicationProxy)));
    ServerAddress replAddress = TServerUtils.startServer(this, hostname,
            Property.MASTER_REPLICATION_COORDINATOR_PORT, replicationCoordinatorProcessor,
            "Master Replication Coordinator", "Replication Coordinator", null,

    log.info("Started replication coordinator service at " + replAddress.address);

    // block until we can obtain the ZK lock for the master
    getMasterLock(zroot + Constants.ZMASTER_LOCK);

    recoveryManager = new RecoveryManager(this);


    StatusThread statusThread = new StatusThread();

    MigrationCleanupThread migrationCleanupThread = new MigrationCleanupThread();


    ZooReaderWriter zReaderWriter = ZooReaderWriter.getInstance();

    zReaderWriter.getChildren(zroot + Constants.ZRECOVERY, new Watcher() {
        public void process(WatchedEvent event) {
            nextEvent.event("Noticed recovery changes", event.getType());
            try {
                // watcher only fires once, add it back
                ZooReaderWriter.getInstance().getChildren(zroot + Constants.ZRECOVERY, this);
            } catch (Exception e) {
                log.error("Failed to add log recovery watcher back", e);

    watchers.add(new TabletGroupWatcher(this, new MetaDataStateStore(this, this), null) {
        boolean canSuspendTablets() {
            // Always allow user data tablets to enter suspended state.
            return true;

    watchers.add(new TabletGroupWatcher(this, new RootTabletStateStore(this, this), watchers.get(0)) {
        boolean canSuspendTablets() {
            // Allow metadata tablets to enter suspended state only if so configured. Generally we'll want metadata tablets to
            // be immediately reassigned, even if there's a global table.suspension.duration setting.
            return getConfiguration().getBoolean(Property.MASTER_METADATA_SUSPENDABLE);

    watchers.add(new TabletGroupWatcher(this, new ZooTabletStateStore(new ZooStore(zroot)), watchers.get(1)) {
        boolean canSuspendTablets() {
            // Never allow root tablet to enter suspended state.
            return false;
    for (TabletGroupWatcher watcher : watchers) {

    // Once we are sure the upgrade is complete, we can safely allow fate use.

    try {
        final AgeOffStore<Master> store = new AgeOffStore<>(
                new org.apache.accumulo.fate.ZooStore<Master>(ZooUtil.getRoot(getInstance()) + Constants.ZFATE,
                1000 * 60 * 60 * 8);

        int threads = getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE);

        fate = new Fate<>(this, store);

        SimpleTimer.getInstance(getConfiguration()).schedule(new Runnable() {

            public void run() {
        }, 63000, 63000);
    } catch (KeeperException e) {
        throw new IOException(e);
    } catch (InterruptedException e) {
        throw new IOException(e);

    ZooKeeperInitialization.ensureZooKeeperInitialized(zReaderWriter, zroot);

    // Make sure that we have a secret key (either a new one or an old one from ZK) before we start
    // the master client service.
    if (null != authenticationTokenKeyManager && null != keyDistributor) {
        log.info("Starting delegation-token key manager");
        boolean logged = false;
        while (!authenticationTokenKeyManager.isInitialized()) {
            // Print out a status message when we start waiting for the key manager to get initialized
            if (!logged) {
                log.info("Waiting for AuthenticationTokenKeyManager to be initialized");
                logged = true;
            sleepUninterruptibly(200, TimeUnit.MILLISECONDS);
        // And log when we are initialized
        log.info("AuthenticationTokenSecretManager is initialized");

    String address = sa.address.toString();
    log.info("Setting master lock data to " + address);

    while (!clientService.isServing()) {
        sleepUninterruptibly(100, TimeUnit.MILLISECONDS);

    // Start the daemon to scan the replication table and make units of work
    replicationWorkDriver = new ReplicationDriver(this);

    // Start the daemon to assign work to tservers to replicate to our peers
    try {
        replicationWorkAssigner = new WorkDriver(this);
    } catch (AccumuloException | AccumuloSecurityException e) {
        log.error("Caught exception trying to initialize replication WorkDriver", e);
        throw new RuntimeException(e);

    // Advertise that port we used so peers don't have to be told what it is
            ZooUtil.getRoot(getInstance()) + Constants.ZMASTER_REPLICATION_COORDINATOR_ADDR,
            replAddress.address.toString().getBytes(UTF_8), NodeExistsPolicy.OVERWRITE);

    // Register replication metrics
    MasterMetricsFactory factory = new MasterMetricsFactory(getConfiguration(), this);
    Metrics replicationMetrics = factory.createReplicationMetrics();
    try {
    } catch (Exception e) {
        log.error("Failed to register replication metrics", e);

    while (clientService.isServing()) {
        sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
    log.info("Shutting down fate.");

    log.info("Shutting down timekeeping.");

    final long deadline = System.currentTimeMillis() + MAX_CLEANUP_WAIT_TIME;
    // Signal that we want it to stop, and wait for it to do so.
    if (authenticationTokenKeyManager != null) {

    // quit, even if the tablet servers somehow jam up and the watchers
    // don't stop
    for (TabletGroupWatcher watcher : watchers) {

From source file:org.apache.accumulo.server.conf.ZooConfigurationFactory.java

License:Apache License

 * Gets a configuration object for the given instance with the given parent. Repeated calls will
 * return the same object.//from  w ww  .j  a v  a  2  s  .c  o  m
 * @param inst
 *          instance; if null, instance is determined from HDFS
 * @param zcf
 *          {@link ZooCacheFactory} for building {@link ZooCache} to contact ZooKeeper (required)
 * @param parent
 *          parent configuration (required)
 * @return configuration
ZooConfiguration getInstance(Instance inst, ZooCacheFactory zcf, AccumuloConfiguration parent) {
    String instanceId;
    if (inst == null) {
        // InstanceID should be the same across all volumes, so just choose one
        VolumeManager fs;
        try {
            fs = VolumeManagerImpl.get();
        } catch (IOException e) {
            throw new RuntimeException(e);
        Path instanceIdPath = Accumulo.getAccumuloInstanceIdPath(fs);
        instanceId = ZooUtil.getInstanceIDFromHdfs(instanceIdPath, parent);
    } else {
        instanceId = inst.getInstanceID();

    ZooConfiguration config;
    synchronized (instances) {
        config = instances.get(instanceId);
        if (config == null) {
            ZooCache propCache;

            // The purpose of this watcher is a hack. It forces the creation on a new zoocache instead
            // of using a shared one. This was done so that the zoocache
            // would update less, causing the configuration update count to changes less.
            Watcher watcher = new Watcher() {
                public void process(WatchedEvent arg0) {
            if (inst == null) {
                propCache = zcf.getZooCache(parent.get(Property.INSTANCE_ZK_HOST),
                        (int) parent.getTimeInMillis(Property.INSTANCE_ZK_TIMEOUT), watcher);
            } else {
                propCache = zcf.getZooCache(inst.getZooKeepers(), inst.getZooKeepersSessionTimeOut(), watcher);
            config = new ZooConfiguration(instanceId, propCache, parent);
            instances.put(instanceId, config);
    return config;

From source file:org.apache.accumulo.server.master.Master.java

License:Apache License

public void run() throws IOException, InterruptedException, KeeperException {
    final String zroot = ZooUtil.getRoot(instance);

    getMasterLock(zroot + Constants.ZMASTER_LOCK);

    recoveryManager = new RecoveryManager(this);


    StatusThread statusThread = new StatusThread();
    statusThread.start();//from  w  w  w . ja  v  a 2s .  c  o  m

    MigrationCleanupThread migrationCleanupThread = new MigrationCleanupThread();


    // TODO: add shutdown for fate object - ACCUMULO-1307
    try {
        final AgeOffStore<Master> store = new AgeOffStore<Master>(
                new org.apache.accumulo.fate.ZooStore<Master>(ZooUtil.getRoot(instance) + Constants.ZFATE,
                1000 * 60 * 60 * 8);

        int threads = this.getConfiguration().getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE);

        fate = new Fate<Master>(this, store, threads);

        SimpleTimer.getInstance().schedule(new Runnable() {

            public void run() {
        }, 63000, 63000);
    } catch (KeeperException e) {
        throw new IOException(e);
    } catch (InterruptedException e) {
        throw new IOException(e);

    ZooReaderWriter.getInstance().getChildren(zroot + Constants.ZRECOVERY, new Watcher() {
        public void process(WatchedEvent event) {
            nextEvent.event("Noticed recovery changes", event.getType());
            try {
                // watcher only fires once, add it back
                ZooReaderWriter.getInstance().getChildren(zroot + Constants.ZRECOVERY, this);
            } catch (Exception e) {
                log.error("Failed to add log recovery watcher back", e);

    Credentials systemCreds = SystemCredentials.get();
    watchers.add(new TabletGroupWatcher(this, new MetaDataStateStore(instance, systemCreds, this), null));
    watchers.add(new TabletGroupWatcher(this, new RootTabletStateStore(instance, systemCreds, this),
    watchers.add(new TabletGroupWatcher(this, new ZooTabletStateStore(new ZooStore(zroot)), watchers.get(1)));
    for (TabletGroupWatcher watcher : watchers) {

    Processor<Iface> processor = new Processor<Iface>(TraceWrap.service(new MasterClientServiceHandler()));
    ServerAddress sa = TServerUtils.startServer(getSystemConfiguration(), hostname, Property.MASTER_CLIENTPORT,
            processor, "Master", "Master Client Service Handler", null, Property.MASTER_MINTHREADS,
    clientService = sa.server;
    String address = sa.address.toString();
    log.info("Setting master lock data to " + address);

    while (!clientService.isServing()) {
    while (clientService.isServing()) {

    final long deadline = System.currentTimeMillis() + MAX_CLEANUP_WAIT_TIME;

    // quit, even if the tablet servers somehow jam up and the watchers
    // don't stop
    for (TabletGroupWatcher watcher : watchers) {

From source file:org.apache.accumulo.server.monitor.Monitor.java

License:Apache License

private static GCStatus fetchGcStatus() {
    GCStatus result = null;/* w  w  w . j a v  a  2  s  . c  o  m*/
    HostAndPort address = null;
    try {
        // Read the gc location from its lock
        Instance instance = HdfsZooInstance.getInstance();
        String zooKeepers = instance.getZooKeepers();
        log.debug("connecting to zookeepers " + zooKeepers);
        ZooKeeper zk = new ZooKeeper(zooKeepers,
                (int) config.getConfiguration().getTimeInMillis(Property.INSTANCE_ZK_TIMEOUT), new Watcher() {
                    public void process(WatchedEvent event) {
        try {
            String path = ZooUtil.getRoot(HdfsZooInstance.getInstance()) + Constants.ZGC_LOCK;
            List<String> locks = zk.getChildren(path, null);
            if (locks != null && locks.size() > 0) {
                address = new ServerServices(new String(zk.getData(path + "/" + locks.get(0), null, null)))
                GCMonitorService.Client client = ThriftUtil.getClient(new GCMonitorService.Client.Factory(),
                        address, config.getConfiguration());
                try {
                    result = client.getStatus(Tracer.traceInfo(), SystemCredentials.get().toThrift(instance));
                } finally {
        } finally {
    } catch (Exception ex) {
        log.warn("Unable to contact the garbage collector at " + address, ex);
    return result;

From source file:org.apache.accumulo.server.zookeeper.DistributedWorkQueue.java

License:Apache License

public void startProcessing(final Processor processor, ThreadPoolExecutor executorService)
        throws KeeperException, InterruptedException {

    threadPool = executorService;/*from  w w  w.j  a v  a 2s  .c  o  m*/

    zoo.mkdirs(path + "/" + LOCKS_NODE);

    List<String> children = zoo.getChildren(path, new Watcher() {
        public void process(WatchedEvent event) {
            switch (event.getType()) {
            case NodeChildrenChanged:
                if (event.getPath().equals(path))
                    try {
                        lookForWork(processor, zoo.getChildren(path, this));
                    } catch (KeeperException e) {
                        log.error("Failed to look for work", e);
                    } catch (InterruptedException e) {
                        log.info("Interrupted looking for work", e);
                    log.info("Unexpected path for NodeChildrenChanged event " + event.getPath());
            case NodeCreated:
            case NodeDataChanged:
            case NodeDeleted:
            case None:
                log.info("Got unexpected zookeeper event: " + event.getType() + " for " + path);


    lookForWork(processor, children);

    // Add a little jitter to avoid all the tservers slamming zookeeper at once
    SimpleTimer.getInstance(config).schedule(new Runnable() {
        public void run() {
            log.debug("Looking for work in " + path);
            try {
                lookForWork(processor, zoo.getChildren(path));
            } catch (KeeperException e) {
                log.error("Failed to look for work", e);
            } catch (InterruptedException e) {
                log.info("Interrupted looking for work", e);
    }, timerInitialDelay, timerPeriod);

From source file:org.apache.accumulo.server.zookeeper.DistributedWorkQueue.java

License:Apache License

public void waitUntilDone(Set<String> workIDs) throws KeeperException, InterruptedException {

    final Object condVar = new Object();

    Watcher watcher = new Watcher() {
        @Override/*from   w w w  .j  av a  2  s .  c o m*/
        public void process(WatchedEvent event) {
            switch (event.getType()) {
            case NodeChildrenChanged:
                synchronized (condVar) {
            case NodeCreated:
            case NodeDataChanged:
            case NodeDeleted:
            case None:
                log.info("Got unexpected zookeeper event: " + event.getType() + " for " + path);


    List<String> children = zoo.getChildren(path, watcher);

    while (!Collections.disjoint(children, workIDs)) {
        synchronized (condVar) {
        children = zoo.getChildren(path, watcher);

From source file:org.apache.activemq.partition.ZooKeeperPartitionBroker.java

License:Apache License

protected void reloadConfiguration() throws Exception {
    if (zk_client == null) {
        LOG.debug("Connecting to ZooKeeper");
        try {//  www .j a  v a  2s  .  co m
            LOG.debug("Connected to ZooKeeper");
        } catch (Exception e) {
            LOG.debug("Connection to ZooKeeper failed: " + e);
            throw e;

    byte[] data = null;
    try {
        Stat stat = new Stat();
        data = zk_client.getData(plugin().getZkPath(), new Watcher() {
            public void process(WatchedEvent watchedEvent) {
                try {
                } catch (Exception e) {
        }, stat);
        reloadConfigOnPoll = false;
    } catch (Exception e) {
        LOG.warn("Could load partitioning configuration: " + e, e);
        reloadConfigOnPoll = true;

    try {
        config = Partitioning.MAPPER.readValue(data, Partitioning.class);
    } catch (Exception e) {
        LOG.warn("Invalid partitioning configuration: " + e, e);

From source file:org.apache.airavata.gfac.monitor.util.CommonUtils.java

License:Apache License

 *  Update job count for a given set of paths.
 * @param zk - zookeeper instance//from w ww  .  ja  v  a2 s . com
 * @param changeCountMap - map of change job count with relevant path
 * @param isAdd - Should add or reduce existing job count by the given job count.
public static void updateZkWithJobCount(ZooKeeper zk, final Map<String, Integer> changeCountMap,
        boolean isAdd) {
    StringBuilder changeZNodePaths = new StringBuilder();
    try {
        if (zk == null || !zk.getState().isConnected()) {
            try {
                final CountDownLatch countDownLatch = new CountDownLatch(1);
                zk = new ZooKeeper(AiravataZKUtils.getZKhostPort(), 6000, new Watcher() {
                    public void process(WatchedEvent event) {
            } catch (ApplicationSettingsException e) {
                logger.error("Error while reading zookeeper hostport string");
            } catch (IOException e) {
                        "Error while reconnect attempt to zookeeper where zookeeper connection loss state");

        for (String path : changeCountMap.keySet()) {
            if (isAdd) {
                CommonUtils.checkAndCreateZNode(zk, path);
            byte[] byteData = zk.getData(path, null, null);
            String nodeData;
            if (byteData == null) {
                if (isAdd) {
                    zk.setData(path, String.valueOf(changeCountMap.get(path)).getBytes(), -1);
                } else {
                    // This is not possible, but we handle in case there any data zookeeper communication failure
                    logger.warn("Couldn't reduce job count in " + path
                            + " as it returns null data. Hence reset the job count to 0");
                    zk.setData(path, "0".getBytes(), -1);
            } else {
                nodeData = new String(byteData);
                if (isAdd) {
                            String.valueOf(changeCountMap.get(path) + Integer.parseInt(nodeData)).getBytes(),
                } else {
                    int previousCount = Integer.parseInt(nodeData);
                    int removeCount = changeCountMap.get(path);
                    if (previousCount >= removeCount) {
                        zk.setData(path, String.valueOf(previousCount - removeCount).getBytes(), -1);
                    } else {
                        // This is not possible, do we need to reset the job count to 0 ?
                        logger.error("Requested remove job count is " + removeCount
                                + " which is higher than the existing job count " + previousCount + " in  "
                                + path + " path.");

        // update stat node to trigger orchestrator watchers
        if (changeCountMap.size() > 0) {
            changeZNodePaths.deleteCharAt(changeZNodePaths.length() - 1);
            zk.setData("/" + Constants.STAT, changeZNodePaths.toString().getBytes(), -1);
    } catch (KeeperException e) {
        logger.error("Error while writing job count to zookeeper", e);
    } catch (InterruptedException e) {
        logger.error("Error while writing job count to zookeeper", e);
