org.apache.accumulo.server.master.LiveTServerSet.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.accumulo.server.master.LiveTServerSet.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.server.master;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy.SKIP;

import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.client.impl.ClientContext;
import org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException;
import org.apache.accumulo.core.data.impl.KeyExtent;
import org.apache.accumulo.core.master.thrift.TabletServerStatus;
import org.apache.accumulo.core.rpc.ThriftUtil;
import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
import org.apache.accumulo.core.trace.Tracer;
import org.apache.accumulo.core.util.AddressUtil;
import org.apache.accumulo.core.util.ServerServices;
import org.apache.accumulo.core.zookeeper.ZooUtil;
import org.apache.accumulo.server.master.state.TServerInstance;
import org.apache.accumulo.server.util.Halt;
import org.apache.accumulo.server.util.time.SimpleTimer;
import org.apache.accumulo.server.zookeeper.ZooCache;
import org.apache.accumulo.server.zookeeper.ZooLock;
import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
import org.apache.hadoop.io.Text;
import org.apache.thrift.TException;
import org.apache.thrift.transport.TTransport;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NotEmptyException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.net.HostAndPort;
import org.apache.accumulo.core.tabletserver.thrift.TUnloadTabletGoal;

public class LiveTServerSet implements Watcher {

    public interface Listener {
        void update(LiveTServerSet current, Set<TServerInstance> deleted, Set<TServerInstance> added);
    }

    private static final Logger log = LoggerFactory.getLogger(LiveTServerSet.class);

    private final Listener cback;
    private final ClientContext context;
    private ZooCache zooCache;

    public class TServerConnection {
        private final HostAndPort address;

        public TServerConnection(HostAndPort addr) throws TException {
            address = addr;
        }

        private String lockString(ZooLock mlock) {
            return mlock.getLockID().serialize(ZooUtil.getRoot(context.getInstance()) + Constants.ZMASTER_LOCK);
        }

        private void loadTablet(TabletClientService.Client client, ZooLock lock, KeyExtent extent)
                throws TException {
            client.loadTablet(Tracer.traceInfo(), context.rpcCreds(), lockString(lock), extent.toThrift());
        }

        public void assignTablet(ZooLock lock, KeyExtent extent) throws TException {
            if (extent.isMeta()) {
                // see ACCUMULO-3597
                TTransport transport = ThriftUtil.createTransport(address, context);
                try {
                    TabletClientService.Client client = ThriftUtil
                            .createClient(new TabletClientService.Client.Factory(), transport);
                    loadTablet(client, lock, extent);
                } finally {
                    transport.close();
                }
            } else {
                TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                        address, context);
                try {
                    loadTablet(client, lock, extent);
                } finally {
                    ThriftUtil.returnClient(client);
                }
            }
        }

        public void unloadTablet(ZooLock lock, KeyExtent extent, TUnloadTabletGoal goal, long requestTime)
                throws TException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                client.unloadTablet(Tracer.traceInfo(), context.rpcCreds(), lockString(lock), extent.toThrift(),
                        goal, requestTime);
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

        public TabletServerStatus getTableMap(boolean usePooledConnection)
                throws TException, ThriftSecurityException {

            if (usePooledConnection == true)
                throw new UnsupportedOperationException();

            TTransport transport = ThriftUtil.createTransport(address, context);

            try {
                TabletClientService.Client client = ThriftUtil
                        .createClient(new TabletClientService.Client.Factory(), transport);
                return client.getTabletServerStatus(Tracer.traceInfo(), context.rpcCreds());
            } finally {
                if (transport != null)
                    transport.close();
            }
        }

        public void halt(ZooLock lock) throws TException, ThriftSecurityException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                client.halt(Tracer.traceInfo(), context.rpcCreds(), lockString(lock));
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

        public void fastHalt(ZooLock lock) throws TException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                client.fastHalt(Tracer.traceInfo(), context.rpcCreds(), lockString(lock));
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

        public void flush(ZooLock lock, String tableId, byte[] startRow, byte[] endRow) throws TException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                client.flush(Tracer.traceInfo(), context.rpcCreds(), lockString(lock), tableId,
                        startRow == null ? null : ByteBuffer.wrap(startRow),
                        endRow == null ? null : ByteBuffer.wrap(endRow));
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

        public void chop(ZooLock lock, KeyExtent extent) throws TException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                client.chop(Tracer.traceInfo(), context.rpcCreds(), lockString(lock), extent.toThrift());
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

        public void splitTablet(ZooLock lock, KeyExtent extent, Text splitPoint)
                throws TException, ThriftSecurityException, NotServingTabletException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                client.splitTablet(Tracer.traceInfo(), context.rpcCreds(), extent.toThrift(),
                        ByteBuffer.wrap(splitPoint.getBytes(), 0, splitPoint.getLength()));
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

        public void compact(ZooLock lock, String tableId, byte[] startRow, byte[] endRow) throws TException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                client.compact(Tracer.traceInfo(), context.rpcCreds(), lockString(lock), tableId,
                        startRow == null ? null : ByteBuffer.wrap(startRow),
                        endRow == null ? null : ByteBuffer.wrap(endRow));
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

        public boolean isActive(long tid) throws TException {
            TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(),
                    address, context);
            try {
                return client.isActive(Tracer.traceInfo(), tid);
            } finally {
                ThriftUtil.returnClient(client);
            }
        }

    }

    static class TServerInfo {
        TServerConnection connection;
        TServerInstance instance;

        TServerInfo(TServerInstance instance, TServerConnection connection) {
            this.connection = connection;
            this.instance = instance;
        }
    }

    // The set of active tservers with locks, indexed by their name in zookeeper
    private Map<String, TServerInfo> current = new HashMap<>();
    // as above, indexed by TServerInstance
    private Map<TServerInstance, TServerInfo> currentInstances = new HashMap<>();

    // The set of entries in zookeeper without locks, and the first time each was noticed
    private Map<String, Long> locklessServers = new HashMap<>();

    public LiveTServerSet(ClientContext context, Listener cback) {
        this.cback = cback;
        this.context = context;
    }

    public synchronized ZooCache getZooCache() {
        if (zooCache == null)
            zooCache = new ZooCache(this);
        return zooCache;
    }

    public synchronized void startListeningForTabletServerChanges() {
        scanServers();
        SimpleTimer.getInstance(context.getConfiguration()).schedule(new Runnable() {
            @Override
            public void run() {
                scanServers();
            }
        }, 0, 5000);
    }

    public synchronized void scanServers() {
        try {
            final Set<TServerInstance> updates = new HashSet<>();
            final Set<TServerInstance> doomed = new HashSet<>();

            final String path = ZooUtil.getRoot(context.getInstance()) + Constants.ZTSERVERS;

            HashSet<String> all = new HashSet<>(current.keySet());
            all.addAll(getZooCache().getChildren(path));

            locklessServers.keySet().retainAll(all);

            for (String zPath : all) {
                checkServer(updates, doomed, path, zPath);
            }

            // log.debug("Current: " + current.keySet());
            if (!doomed.isEmpty() || !updates.isEmpty())
                this.cback.update(this, doomed, updates);
        } catch (Exception ex) {
            log.error("{}", ex.getMessage(), ex);
        }
    }

    private void deleteServerNode(String serverNode) throws InterruptedException, KeeperException {
        try {
            ZooReaderWriter.getInstance().delete(serverNode, -1);
        } catch (NotEmptyException ex) {
            // race condition: tserver created the lock after our last check; we'll see it at the next check
        } catch (NoNodeException nne) {
            // someone else deleted it
        }
    }

    private synchronized void checkServer(final Set<TServerInstance> updates, final Set<TServerInstance> doomed,
            final String path, final String zPath) throws TException, InterruptedException, KeeperException {

        TServerInfo info = current.get(zPath);

        final String lockPath = path + "/" + zPath;
        Stat stat = new Stat();
        byte[] lockData = ZooLock.getLockData(getZooCache(), lockPath, stat);

        if (lockData == null) {
            if (info != null) {
                doomed.add(info.instance);
                current.remove(zPath);
                currentInstances.remove(info.instance);
            }

            Long firstSeen = locklessServers.get(zPath);
            if (firstSeen == null) {
                locklessServers.put(zPath, System.currentTimeMillis());
            } else if (System.currentTimeMillis() - firstSeen > 10 * 60 * 1000) {
                deleteServerNode(path + "/" + zPath);
                locklessServers.remove(zPath);
            }
        } else {
            locklessServers.remove(zPath);
            ServerServices services = new ServerServices(new String(lockData, UTF_8));
            HostAndPort client = services.getAddress(ServerServices.Service.TSERV_CLIENT);
            TServerInstance instance = new TServerInstance(client, stat.getEphemeralOwner());

            if (info == null) {
                updates.add(instance);
                TServerInfo tServerInfo = new TServerInfo(instance, new TServerConnection(client));
                current.put(zPath, tServerInfo);
                currentInstances.put(instance, tServerInfo);
            } else if (!info.instance.equals(instance)) {
                doomed.add(info.instance);
                updates.add(instance);
                TServerInfo tServerInfo = new TServerInfo(instance, new TServerConnection(client));
                current.put(zPath, tServerInfo);
                currentInstances.remove(info.instance);
                currentInstances.put(instance, tServerInfo);
            }
        }
    }

    @Override
    public void process(WatchedEvent event) {

        // its important that these event are propagated by ZooCache, because this ensures when reading zoocache that is has already processed the event and cleared
        // relevant nodes before code below reads from zoocache

        if (event.getPath() != null) {
            if (event.getPath().endsWith(Constants.ZTSERVERS)) {
                scanServers();
            } else if (event.getPath().contains(Constants.ZTSERVERS)) {
                int pos = event.getPath().lastIndexOf('/');

                // do only if ZTSERVER is parent
                if (pos >= 0 && event.getPath().substring(0, pos).endsWith(Constants.ZTSERVERS)) {

                    String server = event.getPath().substring(pos + 1);

                    final Set<TServerInstance> updates = new HashSet<>();
                    final Set<TServerInstance> doomed = new HashSet<>();

                    final String path = ZooUtil.getRoot(context.getInstance()) + Constants.ZTSERVERS;

                    try {
                        checkServer(updates, doomed, path, server);
                        if (!doomed.isEmpty() || !updates.isEmpty())
                            this.cback.update(this, doomed, updates);
                    } catch (Exception ex) {
                        log.error("Exception", ex);
                    }
                }
            }
        }
    }

    public synchronized TServerConnection getConnection(TServerInstance server) {
        if (server == null)
            return null;
        TServerInfo tServerInfo = currentInstances.get(server);
        if (tServerInfo == null)
            return null;
        return tServerInfo.connection;
    }

    public synchronized Set<TServerInstance> getCurrentServers() {
        return new HashSet<>(currentInstances.keySet());
    }

    public synchronized int size() {
        return current.size();
    }

    public synchronized TServerInstance find(String tabletServer) {
        return find(current, tabletServer);
    }

    TServerInstance find(Map<String, TServerInfo> servers, String tabletServer) {
        HostAndPort addr;
        String sessionId = null;
        if (']' == tabletServer.charAt(tabletServer.length() - 1)) {
            int index = tabletServer.indexOf('[');
            if (-1 == index) {
                throw new IllegalArgumentException("Could not parse tabletserver '" + tabletServer + "'");
            }
            addr = AddressUtil.parseAddress(tabletServer.substring(0, index), false);
            // Strip off the last bracket
            sessionId = tabletServer.substring(index + 1, tabletServer.length() - 1);
        } else {
            addr = AddressUtil.parseAddress(tabletServer, false);
        }
        for (Entry<String, TServerInfo> entry : servers.entrySet()) {
            if (entry.getValue().instance.getLocation().equals(addr)) {
                // Return the instance if we have no desired session ID, or we match the desired session ID
                if (null == sessionId || sessionId.equals(entry.getValue().instance.getSession()))
                    return entry.getValue().instance;
            }
        }
        return null;
    }

    public synchronized void remove(TServerInstance server) {
        String zPath = null;
        for (Entry<String, TServerInfo> entry : current.entrySet()) {
            if (entry.getValue().instance.equals(server)) {
                zPath = entry.getKey();
                break;
            }
        }
        if (zPath == null)
            return;
        current.remove(zPath);
        currentInstances.remove(server);

        log.info("Removing zookeeper lock for " + server);
        String fullpath = ZooUtil.getRoot(context.getInstance()) + Constants.ZTSERVERS + "/" + zPath;
        try {
            ZooReaderWriter.getInstance().recursiveDelete(fullpath, SKIP);
        } catch (Exception e) {
            String msg = "error removing tablet server lock";
            // ACCUMULO-3651 Changed level to error and added FATAL to message for slf4j compatibility
            log.error("FATAL: {}", msg, e);
            Halt.halt(msg, -1);
        }
        getZooCache().clear(fullpath);
    }
}