From source file:com.netflix.curator.framework.recipes.KillSession.java

License:Apache License

public static void kill(String connectString, long sessionId, byte[] sessionPassword) throws Exception {
    final CountDownLatch zkLatch = new CountDownLatch(1);
    Watcher zkWatcher = new Watcher() {
        @Override//from  www.  j  ava2  s.  c o  m
        public void process(WatchedEvent event) {
    ZooKeeper zk = new ZooKeeper(connectString, 10000, zkWatcher, sessionId, sessionPassword);
    try {
        Assert.assertTrue(zkLatch.await(10, TimeUnit.SECONDS));
    } finally {
        zk.close(); // this should cause a session error in the main client

From source file:com.netflix.curator.framework.recipes.leader.LeaderLatch.java

License:Apache License

private void checkForLeadership() throws Exception {
    List<String> sortedChildren = LockInternals.getSortedChildren(client, latchPath, LOCK_NAME, sorter);
    if (sortedChildren.size() == 0) {
        throw new Exception("no children - unexpected state");
    }/* w w  w.  j  av  a 2 s  . c o  m*/

    int ourIndex = sortedChildren.indexOf(ZKPaths.getNodeFromPath(ourPath));
    if (ourIndex == 0) {
    } else {
        final String ourPathWhenWatched = ourPath; // protected against a lost/suspended connection and an old watcher - I'm not sure if this is possible but it can't hurt
        String watchPath = sortedChildren.get(ourIndex - 1);
        Watcher watcher = new Watcher() {
            public void process(WatchedEvent event) {
                if ((event.getType() == Event.EventType.NodeDeleted) && (ourPath != null)
                        && ourPath.equals(ourPathWhenWatched)) {
                    try {
                    } catch (Exception ex) {
                        log.error("An error ocurred checking the leadership.", ex);
        if (client.checkExists().usingWatcher(watcher)
                .forPath(ZKPaths.makePath(latchPath, watchPath)) == null) {
            //the previous Participant may be down, so we need to reevaluate the list 
            //to get the actual previous Participant or get the leadership 

From source file:com.netflix.curator.framework.recipes.queue.SimpleDistributedQueue.java

License:Apache License

private byte[] internalPoll(long timeout, TimeUnit unit) throws Exception {

    long startMs = System.currentTimeMillis();
    boolean hasTimeout = (unit != null);
    long maxWaitMs = hasTimeout ? TimeUnit.MILLISECONDS.convert(timeout, unit) : Long.MAX_VALUE;
    for (;;) {//from w w  w.jav  a 2 s .  c  o  m
        final CountDownLatch latch = new CountDownLatch(1);
        Watcher watcher = new Watcher() {
            public void process(WatchedEvent event) {
        byte[] bytes = internalElement(true, watcher);
        if (bytes != null) {
            return bytes;

        if (hasTimeout) {
            long elapsedMs = System.currentTimeMillis() - startMs;
            long thisWaitMs = maxWaitMs - elapsedMs;
            if (thisWaitMs <= 0) {
                return null;
            latch.await(thisWaitMs, TimeUnit.MILLISECONDS);
        } else {

From source file:com.netflix.curator.framework.recipes.queue.TestBoundedDistributedQueue.java

License:Apache License

@Test//from   w  ww  .  j  a v  a  2s  .com
public void testMulti() throws Exception {
    final String PATH = "/queue";
    final int CLIENT_QTY = 4;
    final int MAX_ITEMS = 10;
    final int ADD_ITEMS = MAX_ITEMS * 100;
    final int SLOP_FACTOR = 2;

    final QueueConsumer<String> consumer = new QueueConsumer<String>() {
        public void consumeMessage(String message) throws Exception {

        public void stateChanged(CuratorFramework client, ConnectionState newState) {

    final Timing timing = new Timing();
    final ExecutorService executor = Executors.newCachedThreadPool();
    ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<Void>(executor);

    final CuratorFramework client = CuratorFrameworkFactory.newClient(server.getConnectString(),
            timing.session(), timing.connection(), new RetryOneTime(1));
    try {

        final CountDownLatch isWaitingLatch = new CountDownLatch(1);
        final AtomicBoolean isDone = new AtomicBoolean(false);
        final List<Integer> counts = new CopyOnWriteArrayList<Integer>();
        final Object lock = new Object();
        executor.submit(new Callable<Void>() {
            public Void call() throws Exception {
                Watcher watcher = new Watcher() {
                    public void process(WatchedEvent event) {
                        synchronized (lock) {

                while (!Thread.currentThread().isInterrupted() && client.isStarted() && !isDone.get()) {
                    synchronized (lock) {
                        int size = client.getChildren().usingWatcher(watcher).forPath(PATH).size();
                return null;

        for (int i = 0; i < CLIENT_QTY; ++i) {
            final int index = i;
            completionService.submit(new Callable<Void>() {
                public Void call() throws Exception {
                    CuratorFramework client = null;
                    DistributedQueue<String> queue = null;

                    try {
                        client = CuratorFrameworkFactory.newClient(server.getConnectString(), timing.session(),
                                timing.connection(), new RetryOneTime(1));
                        queue = QueueBuilder.builder(client, consumer, serializer, PATH).executor(executor)

                        for (int i = 0; i < ADD_ITEMS; ++i) {
                            queue.put("" + index + "-" + i);
                    } finally {
                    return null;

        for (int i = 0; i < CLIENT_QTY; ++i) {

        synchronized (lock) {

        for (int count : counts) {
            Assert.assertTrue(counts.toString(), count <= (MAX_ITEMS * SLOP_FACTOR));
    } finally {

From source file:com.netflix.curator.test.KillSession.java

License:Apache License

 * Kill the given ZK session/*from w  ww  .  j ava2  s  .c o m*/
 * @param client the client to kill
 * @param connectString server connection string
 * @param maxMs max time ms to wait for kill
 * @throws Exception errors
public static void kill(ZooKeeper client, String connectString, int maxMs) throws Exception {
    long startTicks = System.currentTimeMillis();

    final CountDownLatch sessionLostLatch = new CountDownLatch(1);
    Watcher sessionLostWatch = new Watcher() {
        public void process(WatchedEvent event) {
    client.exists("/___CURATOR_KILL_SESSION___" + System.nanoTime(), sessionLostWatch);

    final CountDownLatch connectionLatch = new CountDownLatch(1);
    Watcher connectionWatcher = new Watcher() {
        public void process(WatchedEvent event) {
            if (event.getState() == Event.KeeperState.SyncConnected) {
    ZooKeeper zk = new ZooKeeper(connectString, maxMs, connectionWatcher, client.getSessionId(),
    try {
        if (!connectionLatch.await(maxMs, TimeUnit.MILLISECONDS)) {
            throw new Exception("KillSession could not establish duplicate session");
        try {
        } finally {
            zk = null;

        while (client.getState().isConnected() && !sessionLostLatch.await(100, TimeUnit.MILLISECONDS)) {
            long elapsed = System.currentTimeMillis() - startTicks;
            if (elapsed > maxMs) {
                throw new Exception("KillSession timed out waiting for session to expire");
    } finally {
        if (zk != null) {

From source file:com.ngdata.hbaseindexer.indexer.IndexerIT.java

License:Apache License

private void cleanZooKeeper(String zkConnectString, String rootToDelete) throws Exception {
    int sessionTimeout = 10000;

    ZooKeeper zk = new ZooKeeper(zkConnectString, sessionTimeout, new Watcher() {
        @Override/*from w w  w  .  j  ava2 s .c o  m*/
        public void process(WatchedEvent event) {
            if (event.getState() == Watcher.Event.KeeperState.Disconnected) {
                System.err.println("ZooKeeper Disconnected.");
            } else if (event.getState() == Event.KeeperState.Expired) {
                System.err.println("ZooKeeper session expired.");

    long waitUntil = System.currentTimeMillis() + sessionTimeout;
    while (zk.getState() != CONNECTED && waitUntil > System.currentTimeMillis()) {
        try {
        } catch (InterruptedException e) {

    if (zk.getState() != CONNECTED) {
        throw new RuntimeException("Failed to connect to ZK within " + sessionTimeout + "ms.");

    if (zk.exists(rootToDelete, false) != null) {
        List<String> paths = new ArrayList<String>();
        collectChildren(rootToDelete, zk, paths);

        for (String path : paths) {
            zk.delete(path, -1, null, null);

        // The above deletes are async, wait for them to be finished
        long startWait = System.currentTimeMillis();
        while (zk.exists(rootToDelete, null) != null) {

            if (System.currentTimeMillis() - startWait > 120000) {
                throw new RuntimeException("State was not cleared in ZK within the expected timeout");


From source file:com.nokia.dempsy.mpcluster.zookeeper.TestFullApp.java

License:Apache License

public void testStartForceMpDisconnectStop() throws Throwable {
    ClassPathXmlApplicationContext actx = null;
    Dempsy dempsy = null;//from w w w . ja  v a2  s  .  c o  m

    try {
        logger.debug("Starting up the appliction context ...");
        actx = new ClassPathXmlApplicationContext(ctx);

        final FullApplication app = (FullApplication) actx.getBean("app");

        dempsy = (Dempsy) actx.getBean("dempsy");

        // Override the cluster session factory to keep track of the sessions asked for.
        // This is so that I can grab the ZookeeperSession that's being instantiated by
        // the MyMp cluster.
        zookeeperCluster = null;
        dempsy.setClusterSessionFactory(new ZookeeperSessionFactory<ClusterInformation, SlotInformation>(
                System.getProperty("zk_connect"), 5000) {
            int sessionCount = 0;

            public synchronized MpClusterSession<ClusterInformation, SlotInformation> createSession()
                    throws MpClusterException {
                MpClusterSession<ClusterInformation, SlotInformation> ret = super.createSession();

                if (sessionCount == 2)
                    zookeeperCluster = (ZookeeperSession<ClusterInformation, SlotInformation>) ret;
                return ret;


        Dempsy.Application.Cluster cluster = dempsy.getCluster(
                new ClusterId(FullApplication.class.getSimpleName(), MyAdaptor.class.getSimpleName()));
        Dempsy.Application.Cluster.Node node = cluster.getNodes().get(0);
        final StatsCollector collector = node.getStatsCollector();

        // this checks that the throughput works.
        assertTrue(poll(baseTimeoutMillis * 5, app, new Condition<Object>() {
            public boolean conditionMet(Object o) {
                return app.finalMessageCount.get() > 10;


        assertEquals(0, collector.getDiscardedMessageCount());
        assertEquals(0, collector.getMessageFailedCount());

        // ok ... so now we have stuff going all the way through. let's kick
        // the middle Mp's zookeeper cluster and see what happens.
        ZooKeeper origZk = zookeeperCluster.zkref.get();
        long sessionid = origZk.getSessionId();
        ZooKeeper killer = new ZooKeeper(System.getProperty("zk_connect"), 5000, new Watcher() {
            public void process(WatchedEvent arg0) {
        }, sessionid, null);
        killer.close(); // tricks the server into expiring the other session

        //         // we should be getting failures now ... 
        //         // but it's possible that it can reconnect prior to actually seeing an error so if this 
        //         //   fails frequently we need to remove this test.
        //         assertTrue(poll(baseTimeoutMillis, app, new Condition()
        //         {
        //            @Override
        //            public boolean conditionMet(Object o)
        //            {
        //               return collector.getMessageFailedCount() > 1;
        //            }
        //         }));

        //... and then recover.

        // get the MyMp prototype
        cluster = dempsy
                .getCluster(new ClusterId(FullApplication.class.getSimpleName(), MyMp.class.getSimpleName()));
        node = cluster.getNodes().get(0);
        final MyMp prototype = (MyMp) node.getMpContainer().getPrototype();

        // so let's see where we are
        final long interimMessageCount = prototype.myMpReceived.get();

        // and now we should eventually get more as the session recovers.
        assertTrue(poll(baseTimeoutMillis * 5, app, new Condition<Object>() {
            public boolean conditionMet(Object o) {
                return prototype.myMpReceived.get() > interimMessageCount + 100;
    } finally {
        if (dempsy != null)

        if (actx != null)

        if (dempsy != null)

From source file:com.nokia.dempsy.mpcluster.zookeeper.TestZookeeperClusterResilience.java

License:Apache License

public void testNoServerOnStartup() throws Throwable {
    // create a session factory
    ZookeeperSessionFactory<String, String> factory = new ZookeeperSessionFactory<String, String>(
            "" + port, 5000);

    // create a session from the session factory
    ZookeeperSession<String, String> session = (ZookeeperSession<String, String>) factory.createSession();

    // create a cluster from the session
    MpCluster<String, String> cluster = session.getCluster(new ClusterId(appname, "testNoServerOnStartup"));

    // hook a test watch to make sure that callbacks work correctly
    TestWatcher callback = new TestWatcher();
    cluster.addWatcher(callback);/*from w  ww.  j a  v a  2  s  .c om*/


    // now accessing the cluster should get us an error.
    boolean gotCorrectError = false;
    try {
    } catch (MpClusterException e) {
        gotCorrectError = true;

    // now lets startup the server.
    ZookeeperTestServer server = null;
    try {
        server = new ZookeeperTestServer();

        // wait until this works.
        for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis()
                && !callback.called.get();)

        callback.called.set(false); // reset the callbacker ...

        // now see if the cluster works.

        // now we should be all happycakes ... but with the server running lets sever the connection
        // according to the zookeeper faq we can force a session expired to occur by closing the session from another client.
        // see: http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A4
        ZooKeeper origZk = session.zkref.get();
        long sessionid = origZk.getSessionId();
        callback.called.set(false); // reset the callbacker ...
        ZooKeeper killer = new ZooKeeper("" + port, 5000, new Watcher() {
            public void process(WatchedEvent arg0) {
        }, sessionid, null);
        killer.close(); // tricks the server into expiring the other session

        // wait for the callback
        for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis()
                && !callback.called.get();)

        // unfortunately I cannot check the getActiveSlots for failure because there's a race condition I can't fix.
        //  No matter how fast I check it's possible that it's okay again OR that allSlots hasn't been cleared.
        // however, they should eventually recover.
        gotCorrectError = true;
        for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis()
                && gotCorrectError;) {
            try {
                gotCorrectError = false;
            } catch (MpClusterException e) {


        // And join should work
        gotCorrectError = true;
        for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis()
                && gotCorrectError;) {
            try {
                gotCorrectError = false;
            } catch (MpClusterException e) {

    } finally {
        if (server != null)

        if (session != null)

From source file:com.nokia.dempsy.mpcluster.zookeeper.TestZookeeperClusterResilience.java

License:Apache License

public void testSessionExpired() throws Throwable {
    // now lets startup the server.
    ZookeeperTestServer server = null;/*from  w ww  .  j ava  2 s  .  co  m*/
    ZookeeperSession<String, String> session = null;
    final AtomicLong processCount = new AtomicLong(0);
    final AtomicReference<CountDownLatch> processFinishLatch = new AtomicReference<CountDownLatch>();
    processFinishLatch.set(new CountDownLatch(1));

    try {
        server = new ZookeeperTestServer();

        session = new ZookeeperSession<String, String>("" + port, 5000) {
            public ZookeeperCluster makeZookeeperCluster(ClusterId clusterId) throws MpClusterException {
                return new ZookeeperCluster(clusterId) {
                    public void process(WatchedEvent event) {
                        if (processFinishLatch.get() != null)

        assertEquals(0, processCount.intValue()); // no calls yet

        // This will create the cluster itself and so will call process.
        MpCluster<String, String> cluster = session.getCluster(new ClusterId(appname, "testSessionExpired"));

        // now the count should reach 1
        assertTrue(poll(5000, null, new Condition<Object>() {
            public boolean conditionMet(Object o) {
                return processCount.intValue() == 1;
        TestWatcher callback = new TestWatcher();

        // wait until the process call is actually finished ...
        assertTrue(processFinishLatch.get().await(5, TimeUnit.SECONDS));

        // ... before adding the watcher. There's a race condition (without the latch)
        // where the watcher could get added but the process loop is still running.


        // now see if the cluster works.

        // cause a problem with the server running lets sever the connection
        // according to the zookeeper faq we can force a session expired to occur by closing the session from another client.
        // see: http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A4
        ZooKeeper origZk = session.zkref.get();
        long sessionid = origZk.getSessionId();
        ZooKeeper killer = new ZooKeeper("" + port, 5000, new Watcher() {
            public void process(WatchedEvent arg0) {
        }, sessionid, null);

        // now the count should still be 1
        assertEquals(1, processCount.intValue());

        // and the callback wasn't called.

        killer.close(); // tricks the server into expiring the other session

        // now I should get a process call
        assertTrue(poll(5000, null, new Condition<Object>() {
            public boolean conditionMet(Object o) {
                return processCount.intValue() > 1;

        // and eventually a callback
        assertTrue(poll(5000, callback, new Condition<TestWatcher>() {
            public boolean conditionMet(TestWatcher o) {
                return o.called.get();
    } finally {
        if (server != null)

        if (session != null)

From source file:com.nokia.dempsy.mpcluster.zookeeper.TestZookeeperClusterResilience.java

License:Apache License

@Test//from   w w  w  .  j  a  v a  2  s. c  om
public void testSessionExpiredWithFullApp() throws Throwable {
    // now lets startup the server.
    ZookeeperTestServer server = null;
    final AtomicReference<ZookeeperSession> sessionRef = new AtomicReference<ZookeeperSession>();
    ZookeeperSession session = null;
    final AtomicLong processCount = new AtomicLong(0);

    Dempsy[] dempsy = new Dempsy[3];
    try {
        server = new ZookeeperTestServer();

        session = new ZookeeperSession("" + port, 5000) {
            public ZookeeperCluster makeZookeeperCluster(ClusterId clusterId) throws MpClusterException {
                return new ZookeeperCluster(clusterId) {
                    public void process(WatchedEvent event) {
                        //                     System.out.println("" + event);

        final FullApplication app = new FullApplication();
        ApplicationDefinition ad = app.getTopology();

        assertEquals(0, processCount.intValue()); // no calls yet

        dempsy[0] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(),
                FullApplication.MyAdaptor.class.getSimpleName()), ad);
                new ZookeeperSessionFactory<ClusterInformation, SlotInformation>("" + port, 5000));

        dempsy[1] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(),
                FullApplication.MyMp.class.getSimpleName()), ad);
                new ZookeeperSessionFactory<ClusterInformation, SlotInformation>("" + port, 5000));

        dempsy[2] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(),
                FullApplication.MyRankMp.class.getSimpleName()), ad);
        //         dempsy[2].setClusterSessionFactory(new ZookeeperSessionFactory<ClusterInformation, SlotInformation>("" + port,5000));

        dempsy[2].setClusterSessionFactory(new MpClusterSessionFactory<ClusterInformation, SlotInformation>() {
            public MpClusterSession<ClusterInformation, SlotInformation> createSession()
                    throws MpClusterException {
                return sessionRef.get();

        // start everything in reverse order
        for (int i = 2; i >= 0; i--)

        // make sure the final count is incrementing
        long curCount = app.finalMessageCount.get();
        assertTrue(poll(30000, curCount, new Condition<Long>() {

            public boolean conditionMet(Long o) {
                return app.finalMessageCount.get() > (o + 100L);


        // cause a problem with the server running lets sever the connection
        // according to the zookeeper faq we can force a session expired to occur by closing the session from another client.
        // see: http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A4
        ZooKeeper origZk = (ZooKeeper) session.zkref.get();
        long sessionid = origZk.getSessionId();
        ZooKeeper killer = new ZooKeeper("" + port, 5000, new Watcher() {
            public void process(WatchedEvent arg0) {
        }, sessionid, null);

        killer.close(); // tricks the server into expiring the other session


        // make sure the final count is STILL incrementing
        curCount = app.finalMessageCount.get();
        assertTrue(poll(30000, curCount, new Condition<Long>() {

            public boolean conditionMet(Long o) {
                return app.finalMessageCount.get() > (o + 100L);


    } finally {
        if (server != null)

        if (session != null)

        for (int i = 0; i < 3; i++)
            if (dempsy[i] != null)