Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.cloudata.core.master; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStreamReader; import java.net.InetAddress; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.KeeperException.NodeExistsException; import org.apache.zookeeper.data.Stat; import org.cloudata.core.client.CTable; import org.cloudata.core.client.Cell; import org.cloudata.core.client.Row; import org.cloudata.core.client.ScanCell; import org.cloudata.core.client.TabletLocationCache; import org.cloudata.core.client.scanner.ScannerFactory; import org.cloudata.core.client.scanner.TableScanner; import org.cloudata.core.commitlog.ServerMonitorInfo; import org.cloudata.core.common.Constants; import org.cloudata.core.common.GlobalConstants; import org.cloudata.core.common.CStatusHttpServer; import org.cloudata.core.common.conf.CloudataConf; import org.cloudata.core.common.exception.TableExistsException; import org.cloudata.core.common.io.CWritableUtils; import org.cloudata.core.common.ipc.AclManager; import org.cloudata.core.common.ipc.CRPC; import org.cloudata.core.common.ipc.CRPC.Server; import org.cloudata.core.common.lock.LockUtil; import org.cloudata.core.common.util.NetworkUtil; import org.cloudata.core.common.util.StringUtils; import org.cloudata.core.fs.CommitLogFileSystem; import org.cloudata.core.fs.CommitLogFileSystemIF; import org.cloudata.core.fs.CloudataFileSystem; import org.cloudata.core.fs.GPath; import org.cloudata.core.master.metrics.CloudataMasterMetrics; import org.cloudata.core.tablet.TableSchema; import org.cloudata.core.tablet.TableSchemaMap; import org.cloudata.core.tablet.TabletInfo; import org.cloudata.core.tabletserver.AsyncTask; import org.cloudata.core.tabletserver.AsyncTaskManager; import org.cloudata.core.tabletserver.AsyncTaskStatus; import org.cloudata.core.tabletserver.Tablet; import org.cloudata.core.tabletserver.TabletManagerProtocol; import org.cloudata.core.tabletserver.TabletServerInfo; /** * Cloudata? ? Master<p> * CloudataMaster? ? Tablet? . * ? CloudataMaster? ?? ?? (get, put)? ? . * * @author * */ public class CloudataMaster implements TableManagerProtocol, TabletMasterProtocol, Constants, Runnable, Watcher { public static final Log LOG = LogFactory.getLog(CloudataMaster.class.getName()); private CloudataConf conf; private CloudataFileSystem fs; /** * CloudataMaster host name(HOST_NAME:PORT) */ protected String hostName; /** * ? ? ? */ private Server server; /** * ? (tableName -> table) */ private TableSchemaMap schemaMap; /** * Tablet ?(tableName -> [tabletName -> tablets]) */ protected Map<String, Map<String, TabletInfo>> tabletInfos = new HashMap<String, Map<String, TabletInfo>>(100); /** * ? ? Tablet ?(TabletName -> TabletInfo) */ private Map<String, TabletInfo> unassignedTablets = new HashMap<String, TabletInfo>(100); /** * ? Tablet ?(TabletName -> TabletInfo) */ private Map<String, TabletInfo> assigningTablets = new HashMap<String, TabletInfo>(100); /** * Live TabletServer ?(hostName -> TabletServerInfo */ private Map<String, TabletServerInfo> liveTabletServers = new HashMap<String, TabletServerInfo>(10); /** * ? TabletServer ? */ private Set<String> deadTabletServers = new HashSet<String>(); /** * ? TabletServer(Tablet ? ? TabletServer ?) */ private TabletServerInfo lastAssignedTabletServer; /** * Drop ? TabletServer ?(TableName -> TabletServer HostName) */ private Map<String, List<String>> dropingTabletServers = new HashMap<String, List<String>>(10); /** * Master startup Root Tablet? */ private AtomicBoolean endRootTableAssignment = new AtomicBoolean(false); /** * Tablet , lock monitor */ private Object tabletInfoMonitor = new Object(); /** * CloudataMaster Thread flag */ private boolean stopRequested = false; /** * Root, Meta loading? ? true */ private static boolean clusterReady = false; private ThreadGroup threadGroup; private CStatusHttpServer infoServer; public static CloudataMaster cloudataMaster; protected Date masterStartTime; protected Date masterInitTime; protected AsyncTaskManager asyncTaskManager = new AsyncTaskManager(); private ZooKeeper zk; private Object masterElectMonitor = new Object(); private boolean masterElected = false; private Map<String, ServerMonitorInfo> liveCommitLogServers = new HashMap<String, ServerMonitorInfo>(); private Set<String> deadCommitLogServers = new HashSet<String>(); private CloudataMasterMetrics masterMetrics; private Balancer balancer; private int initialTabletAssignCount; private int initialTabletEndCount; public CloudataMaster() { System.setProperty("java.net.preferIPv4Stack", "true"); masterStartTime = new Date(); cloudataMaster = this; } public void init(CloudataConf conf) throws IOException { InetSocketAddress serverAddress = NetworkUtil.getLocalAddress(conf.getInt("masterServer.port", 7000)); this.hostName = serverAddress.getHostName() + ":" + serverAddress.getPort(); this.threadGroup = new ThreadGroup("CloudataMaster_" + hostName); this.conf = conf; this.fs = CloudataFileSystem.get(conf); if (this.fs == null) { LOG.fatal("FileSystem is not ready. CloudataMaster shutdown"); shutdown(); } this.zk = LockUtil.getZooKeeper(conf, hostName, this); this.schemaMap = new TableSchemaMap(conf, zk); this.server = CRPC.getServer(zk, this, serverAddress.getHostName(), serverAddress.getPort(), conf.getInt("masterServer.handler.count", 10), false, conf); this.server.start(); LOG.info("Netune master started at " + hostName); } /** * CloudataMaster main thread */ public void run() { try { String masterPathNo = createMasterPath(); while (!stopRequested) { LOG.info(hostName + " starts master election"); if (isMaster(masterPathNo)) { break; } } // Master LOG.info(hostName + " get master lock."); masterInit(); } catch (Exception e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); } } @Override public void process(WatchedEvent event) { if (event.getType() == Event.EventType.None) { switch (event.getState()) { case SyncConnected: break; case Disconnected: LOG.warn("Disconnected:" + event); break; case Expired: LOG.info("Shutdown cause lock expired:" + event); shutdown(); break; } } } private String createMasterPath() throws Exception { if (zk.exists(LockUtil.getZKPath(conf, Constants.MASTER), false) == null) { try { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.MASTER), null, CreateMode.PERSISTENT); } catch (Exception e) { LOG.info("Can't create " + LockUtil.getZKPath(conf, Constants.MASTER) + " lock", e); } } String path = LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.MASTER) + "/Leader-", hostName.getBytes(), CreateMode.EPHEMERAL_SEQUENTIAL); String[] tokens = path.split("/"); return tokens[tokens.length - 1]; } private boolean isMaster(String masterPathNo) throws Exception { List<String> values = null; try { values = zk.getChildren(LockUtil.getZKPath(conf, Constants.MASTER), false); } catch (NoNodeException e) { } if (values == null) { return false; } Collections.sort(values); String leaderNode = values.get(0); Stat stat = zk.exists(LockUtil.getZKPath(conf, MASTER) + "/" + leaderNode, new MasterNodeDeletedWatcher(leaderNode)); if (stat == null) { LOG.info("No master path [" + LockUtil.getZKPath(conf, MASTER) + "/" + leaderNode + "]"); return false; } boolean result = masterPathNo.equals(leaderNode); if (!result) { synchronized (masterElectMonitor) { LOG.info(hostName + " not master. watch master node [" + leaderNode + "]"); masterElectMonitor.wait(); } } return result; } public boolean isMasterElected() { return masterElected; } /** * Master lock? acquire ?? ? . * * @throws IOException */ private void masterInit() throws IOException { if (!fs.isReady()) { LOG.fatal("FileSystem is not ready. " + "check " + conf.get("cloudata.root") + " directory. CloudataMaster shutdown"); shutdown(); } masterInitTime = new Date(); masterElected = true; setClusterReady(false); masterMetrics = new CloudataMasterMetrics(conf); (new Thread(threadGroup, new UpdateMetricsThread())).start(); addLockEventHandler(); try { loadAllTableSchemas(conf); synchronized (Constants.SC_LOCK_PATH) { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.SC_LOCK_PATH), "0".getBytes(), CreateMode.PERSISTENT, true); LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.MC_LOCK_PATH), "0".getBytes(), CreateMode.PERSISTENT, true); } } catch (IOException e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); return; } Thread rootTabletAssignmentThread = new Thread(threadGroup, new RootTabletAssignmentThread()); rootTabletAssignmentThread.start(); InetSocketAddress infoServerAddress = NetworkUtil .getAddress(conf.get("masterServer.info.address", "0.0.0.0:57000")); try { this.infoServer = new CStatusHttpServer("master", infoServerAddress.getHostName(), infoServerAddress.getPort()); this.infoServer.start(); LOG.info("Info Http Server started: " + infoServerAddress.toString()); } catch (Exception e) { LOG.warn("Error while info server init:" + e.getMessage()); } // ? ? tablet drop? List<String> dropTables = null; try { dropTables = zk.getChildren(LockUtil.getZKPath(conf, Constants.TABLE_DROP), false); } catch (NoNodeException e) { } catch (Exception e) { throw new IOException(e); } if (dropTables != null) { for (String eachDropTable : dropTables) { asyncTaskManager.runAsyncTask(new TableDropTask(eachDropTable)); } } } AtomicInteger scLockNo = new AtomicInteger(0); AtomicInteger mcLockNo = new AtomicInteger(0); private void addDiskIOLock(String path, String name, AtomicInteger lockNo) throws IOException { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, path + "/" + name + "-" + lockNo.incrementAndGet()), "0".getBytes(), CreateMode.PERSISTENT, true); } /** * TabletServer ?. * * @param hostName * @return * @throws IOException */ protected TabletManagerProtocol connectTabletServer(String hostName) throws IOException { return connectTabletServer(conf, hostName); } static TabletManagerProtocol connectTabletServer(CloudataConf conf, String hostName) throws IOException { // LOG.debug("connectTabletServer:hostname=" + hostName); TabletManagerProtocol tabletServer = (TabletManagerProtocol) CRPC.getProxy(TabletManagerProtocol.class, TabletManagerProtocol.versionID, NetworkUtil.getAddress(hostName), conf); return tabletServer; } private TabletManagerProtocol connectTabletServer(TabletServerInfo tabletServerInfo) throws IOException { return connectTabletServer(tabletServerInfo.getHostName()); } /** * ROOT, META ?? ?/ ? ??? Tablet? ?/? ? ?? ready true ?. * * @param ready */ private void setClusterReady(boolean ready) throws IOException { clusterReady = ready; } /** * ROOT, META ? ? ? * * @return * @throws IOException */ public static boolean isClusterReady() throws IOException { return clusterReady; } /** * META tablet? . META table? Table? ?. * * @throws IOException */ private void assignMetaTablet(Map<String, TabletInfo> assignedTablets) throws IOException { // if meta not exists, make meta table boolean createdMetaTable = false; if (!schemaMap.contains(TABLE_NAME_META)) { createTable(GlobalConstants.META_TABLE, null); createdMetaTable = true; } Map<String, TabletInfo> metaTablets = tabletInfos.get(TABLE_NAME_META); //LOG.debug("Needed meta creation: " + createdMetaTable); if (createdMetaTable && metaTablets.isEmpty()) { // if empty, create // MaxTableName.MaxRow.Key TabletInfo tabletInfo = new TabletInfo(TABLE_NAME_META, Tablet.generateTabletName(TABLE_NAME_META), Row.Key.MIN_KEY, Tablet.generateMetaRowKey(null, Row.Key.MAX_KEY)); LOG.debug("Create META tablet:" + tabletInfo); synchronized (tabletInfoMonitor) { metaTablets.put(tabletInfo.getTabletName(), tabletInfo); unassignedTablets.put(tabletInfo.getTabletName(), tabletInfo); assignTablet(tabletInfo); } } else { LOG.debug("Load Tablet infos from META"); List<TabletInfo> tempMetaTablets = new ArrayList<TabletInfo>(); synchronized (tabletInfoMonitor) { tempMetaTablets.addAll(metaTablets.values()); } for (TabletInfo tabletInfo : tempMetaTablets) { String tabletName = tabletInfo.getTabletName(); // ? Tablet? if (!assignedTablets.containsKey(tabletName)) { synchronized (tabletInfoMonitor) { unassignedTablets.put(tabletInfo.getTabletName(), tabletInfo); try { assignTablet(tabletInfo); } catch (IOException e) { LOG.warn(e); // assignTablet ? } } } } } } private void loadAllTableSchemas(CloudataConf conf) throws IOException { List<String> tables = null; try { tables = zk.getChildren(LockUtil.getZKPath(conf, Constants.PATH_SCHEMA), false); } catch (NoNodeException e) { } catch (Exception e) { throw new IOException(e); } if (tables == null) { return; } ArrayList<TableSchema> tableSchemaList = new ArrayList<TableSchema>(); for (String eachTableName : tables) { TableSchema table = TableSchema.loadTableSchema(conf, zk, eachTableName); if (table == null) { LOG.error("No table schema data:" + eachTableName); continue; } LOG.info(table.getTableName() + " schema info loaded"); tableSchemaList.add(table); } schemaMap.load(tableSchemaList); } /** * master ? . * * @param conf * @return */ public static String getMasterServerHostName(CloudataConf conf, ZooKeeper zk) throws IOException { List<String> values = null; try { try { values = zk.getChildren(LockUtil.getZKPath(conf, Constants.MASTER), false); } catch (NoNodeException e) { } if (values == null || values.size() == 0) { return null; } Collections.sort(values); String leaderNode = values.get(0); if (zk.exists(LockUtil.getZKPath(conf, MASTER + "/" + leaderNode), false) == null) { LOG.info("No master path [" + LockUtil.getZKPath(conf, MASTER) + "/" + leaderNode + "]"); return null; } byte[] data; try { data = zk.getData(LockUtil.getZKPath(conf, MASTER + "/" + leaderNode), false, null); } catch (NoNodeException e) { return null; } if (data == null) { return null; } return new String(data); } catch (Exception e) { throw new IOException(e); } } /** * ?? ? . * * @throws IOException */ public static void format(CloudataConf conf, boolean force, String mode) throws IOException { if (force) { callFormat(conf, mode); } else { System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); System.out.println("Warning!!!!"); System.out.println("Format will remove all datas in " + conf.get("cloudata.root") + "," + conf.get("commitlog.image.dir")); System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); System.out.print("Continue format(Y|N): "); BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); String answer = reader.readLine(); if ("Y".equals(answer)) { callFormat(conf, mode); } else { System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); System.out.println("format is cancelled"); System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); } } } private static void callFormat(CloudataConf conf, String mode) throws IOException { System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); System.out.println("Start format " + mode); ZooKeeper zk = LockUtil.getZooKeeper(conf, "CloudataMaster", null); if ("file".equals(mode)) { doFormatFile(conf, zk); } else if ("lock".equals(mode)) { doFormatLock(conf, zk); } System.out.println("End format " + mode); System.out.println("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); } public static void formatFileSystem(CloudataConf conf) throws IOException { CloudataFileSystem fs = CloudataFileSystem.get(conf); if (fs == null) { LOG.fatal("Can't format cause FileSystem Error."); return; } if (!fs.delete(new GPath(conf.get("cloudata.root")), true)) { LOG.warn("Can't all data file:" + conf.get("cloudata.root")); } // fs.close(); } private static void doFormatFile(CloudataConf conf, ZooKeeper zk) throws IOException { LOG.info("start deleting files"); formatFileSystem(conf); LOG.info("end deleting files"); CommitLogFileSystemIF commitLogFileSystem = CommitLogFileSystem.getCommitLogFileSystem(conf, null, zk); LOG.info("start deleting commitlog"); commitLogFileSystem.format(); CloudataFileSystem fs = CloudataFileSystem.get(conf); if (!fs.delete(new GPath(conf.get("commitlog.image.dir")), true)) { LOG.warn("Can't all data file:" + conf.get("commitlog.image.dir")); } LOG.info("end deleting commitlog"); } private static boolean doFormatLock(CloudataConf conf, ZooKeeper zk) throws IOException { try { if (zk.exists(LockUtil.getZKPath(conf, ""), false) == null) { } else { List<String> locks = null; try { locks = zk.getChildren(LockUtil.getZKPath(conf, ""), false); } catch (NoNodeException e) { } if (locks != null) { for (String eachLock : locks) { LOG.debug("Delete exists lock: " + "" + eachLock); } } LockUtil.delete(zk, LockUtil.getZKPath(conf, ""), true); } LOG.info("end deleting lock"); return true; } catch (Exception e) { LOG.error(e.getMessage(), e); return false; } finally { CloudataMaster.addUser(conf, zk, conf.getUserId()); CloudataMaster.addSuperGroupUser(conf, zk, conf.getUserId()); } } private static void releaseLock(CloudataConf conf) throws IOException { LOG.info("start release lock"); ZooKeeper zk = LockUtil.getZooKeeper(conf, "CloudataMaster", null); try { if (zk.exists(LockUtil.getZKPath(conf, ""), false) == null) { return; } LOG.debug("release exists lock: " + "" + Constants.ROOT_TABLET_HOST); try { zk.delete(LockUtil.getZKPath(conf, Constants.ROOT_TABLET_HOST), -1); } catch (NoNodeException e) { } deleteChildrenLock(conf, zk, Constants.SERVER); deleteChildrenLock(conf, zk, Constants.COMMITLOG_SERVER); deleteChildrenLock(conf, zk, Constants.MASTER); deleteChildrenLock(conf, zk, Constants.TABLETSERVER_SPLIT); deleteChildrenLock(conf, zk, Constants.MC_LOCK_PATH); deleteChildrenLock(conf, zk, Constants.SC_LOCK_PATH); LOG.info("end release lock"); } catch (Exception e) { LOG.error(e.getMessage(), e); } } private static void deleteChildrenLock(CloudataConf conf, ZooKeeper zk, String path) throws IOException { try { List<String> locks = null; try { locks = zk.getChildren(LockUtil.getZKPath(conf, path), false); } catch (NoNodeException e) { return; } if (locks != null) { for (String eachLock : locks) { String lockPath = LockUtil.getZKPath(conf, path + "/" + eachLock); LOG.debug("release exists lock: " + "" + lockPath); LockUtil.delete(zk, lockPath, true); } } } catch (Exception e) { throw new IOException(e); } } /** * * @param args * @throws IOException */ public static void main(String args[]) throws IOException { CloudataConf conf = new CloudataConf(); if (args.length > 0) { boolean force = false; if (args.length > 1 && args[1].equals("-noPrompt")) { force = true; } if ("-formatFile".equals(args[0])) { format(conf, force, "file"); return; } else if ("-formatLock".equals(args[0])) { format(conf, force, "lock"); return; } else if ("-releaseLock".equals(args[0])) { releaseLock(conf); return; } else if ("-drop".equals(args[0])) { String tableName = args[1]; forcedDropTable(tableName); } else { System.out.println("available arguments [-formatFile|-formatLock|-drop]"); } System.exit(0); } try { StringUtils.startupShutdownMessage(CloudataMaster.class, args, LOG); CloudataMaster masterServer = new CloudataMaster(); masterServer.init(conf); (new Thread(masterServer)).start(); } catch (Exception e) { LOG.fatal("Error while start CloudataMaster:" + e.getMessage(), e); } } /** * lock, schema ?? drop * * @param tableName * @throws IOException */ public static void forcedDropTable(String tableName) throws IOException { CloudataConf conf = new CloudataConf(); CloudataFileSystem fs = CloudataFileSystem.get(conf); ZooKeeper zk = LockUtil.getZooKeeper(conf, "CloudataMaster", null); try { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.TABLE_DROP + "/" + tableName), "0".getBytes(), CreateMode.PERSISTENT); } catch (Exception e) { throw new IOException(e); } finalizeDrop(null, conf, fs, zk, tableName, true); } /** * ?? ? . */ /* public void addColumn(String tableName, ColumnInfo addedColumnInfo) throws IOException { AclManager.checkPermission(conf, zk, schemaMap, tableName, "r"); TableSchema tableSchema = schemaMap.get(tableName); if (tableSchema == null) { throw new IOException("Table not exists [" + tableName + "]"); } synchronized(tableSchema) { if (tableSchema.getColumnInfos().contains(addedColumnInfo)) { throw new IOException("Column already exists [" + addedColumnInfo + "]"); } tableSchema.addColumn(addedColumnInfo); try { zk.setData(LockUtil.getZKPath(conf, PATH_SCHEMA + "/" + tableName), LockUtil.getBytes(tableSchema), -1); } catch (Exception e) { tableSchema.removeColumn(addedColumnInfo); LOG.error(e.getMessage(), e); throw new IOException(e); } } } */ public void addColumn(String tableName, String addedColumnName) throws IOException { AclManager.checkPermission(conf, zk, schemaMap, tableName, "r"); TableSchema tableSchema = schemaMap.get(tableName); if (tableSchema == null) { throw new IOException("Table not exists [" + tableName + "]"); } synchronized (tableSchema) { if (tableSchema.getColumns().contains(addedColumnName)) { throw new IOException("Column already exists [" + addedColumnName + "]"); } tableSchema.addColumn(addedColumnName); try { zk.setData(LockUtil.getZKPath(conf, PATH_SCHEMA + "/" + tableName), LockUtil.getBytes(tableSchema), -1); } catch (Exception e) { tableSchema.removeColumn(addedColumnName); LOG.error(e.getMessage(), e); throw new IOException(e); } } } public TabletInfo addTablet(String tableName, Row.Key startRowKey, Row.Key endRowKey) throws IOException { if (tableName == null || tableName.equals(TABLE_NAME_ROOT) || tableName.equals(TABLE_NAME_META)) { throw new IOException("Check table name:" + tableName); } if (endRowKey == null || endRowKey.equals(Row.Key.MAX_KEY) || endRowKey.equals(Row.Key.MIN_KEY)) { throw new IOException("Check endRowKey:" + endRowKey); } if (schemaMap.contains(tableName)) { throw new IOException("No Table:" + tableName); } synchronized (tabletInfoMonitor) { Row.Key metaRowKey = Tablet.generateMetaRowKey(tableName, endRowKey); CTable ctable = CTable.openTable(conf, TABLE_NAME_META); if (ctable.hasValue(META_COLUMN_NAME_TABLETINFO, metaRowKey)) { throw new IOException("Already exists tablet:" + tableName + "," + endRowKey); } String tabletName = Tablet.generateTabletName(tableName); TabletInfo tabletInfo = new TabletInfo(tableName, tabletName, startRowKey, endRowKey); assignTablet(tabletInfo); return tabletInfo; } } /** * ?? ?. endRowKeys? ? ? tablet? ?? tablet? ? * min ~ max ? tablet ?. * * @param table * @param endRowKeys */ public void createTable(TableSchema table, Row.Key[] endRowKeys) throws IOException { // make table schema String tableName = table.getTableName(); LOG.debug("Create Table:" + tableName); String tabletDropLockPath = Constants.TABLET_DROP + "/" + tableName; try { if (zk.exists(LockUtil.getZKPath(conf, tabletDropLockPath), false) != null) { throw new TableExistsException("table [" + tableName + "] dropping"); } } catch (Exception e) { throw new IOException(e); } if (!schemaMap.putIfAbsent(tableName, table)) { throw new TableExistsException("already exists table [" + tableName + "]"); } table.saveTableSchema(conf, zk); synchronized (tabletInfoMonitor) { tabletInfos.put(tableName, new HashMap<String, TabletInfo>(100)); } // assign default tablet // default tablet row range : 0 ~ if (!TABLE_NAME_ROOT.equals(tableName) && !TABLE_NAME_META.equals(tableName)) { if (endRowKeys == null || endRowKeys.length == 0) { endRowKeys = new Row.Key[1]; endRowKeys[0] = Row.Key.MAX_KEY; } Row.Key startRowKey = Row.Key.MIN_KEY; for (int i = 0; i < endRowKeys.length; i++) { String tabletName = Tablet.generateTabletName(tableName); TabletInfo tabletInfo = new TabletInfo(tableName, tabletName, startRowKey, endRowKeys[i]); assignTablet(tabletInfo); startRowKey = endRowKeys[i]; } if (!Row.Key.MAX_KEY.equals(endRowKeys[endRowKeys.length - 1])) { String tabletName = Tablet.generateTabletName(tableName); TabletInfo tabletInfo = new TabletInfo(tableName, tabletName, startRowKey, Row.Key.MAX_KEY); assignTablet(tabletInfo); } } } public boolean assignTablet(TabletInfo tabletInfo) throws IOException { List<TabletServerInfo> tabletServers = new ArrayList<TabletServerInfo>(); synchronized (liveTabletServers) { tabletServers.addAll(liveTabletServers.values()); } return assignTablet(tabletInfo, tabletServers); } private boolean assignTablet(TabletInfo tabletInfo, Collection<TabletServerInfo> tabletServers) throws IOException { synchronized (tabletInfoMonitor) { if (assigningTablets.containsKey(tabletInfo)) { LOG.info("Already requested assigning(" + tabletInfo + ")"); return true; } unassignedTablets.remove(tabletInfo.getTabletName()); assigningTablets.put(tabletInfo.getTabletName(), tabletInfo); } int retry = 0; // TabletServer ? while (retry < 5) { int runningSize = tabletServers.size(); if (runningSize == 0) { LOG.debug("no active tablet server. can't assign tablet: " + tabletInfo); break; } TabletServerInfo tabletServerInfo = null; try { long startTime = System.currentTimeMillis(); TabletManagerProtocol tabletServer = null; while (true) { tabletServerInfo = selectTabletServerForAssignment(tabletServers); if (tabletServerInfo != null) { try { tabletServer = connectTabletServer(tabletServerInfo); break; } catch (IOException e) { tabletServers.remove(tabletServerInfo); } } if (System.currentTimeMillis() - startTime > (10 * 1000)) { break; } } if (tabletServerInfo == null) { LOG.error("Can't find proper tablet server(live TabletServer=" + runningSize); retry++; continue; } try { tabletServer.assignTablet(tabletInfo); } catch (IOException e) { // ? TabletServer? ? ? //FIXME ? Tablet? ? TabletServer? ? ?? if (!liveTabletServers.containsKey(tabletServerInfo.getHostName())) { LOG.error("Can't assign to " + tabletServerInfo.getHostName()); tabletServers.remove(tabletServerInfo); retry++; continue; } int errorRetry = 0; while (true) { try { TabletInfo assignedTabletInfo = tabletServer.getTabletInfo(tabletInfo.getTabletName()); if (assignedTabletInfo != null) { //?? ?? break; } } catch (IOException err) { LOG.warn("Error checking Tablet already assigned:" + tabletInfo + "," + tabletServerInfo); Thread.sleep(1000); errorRetry++; if (errorRetry >= 10) { throw new IOException(e); } } } } tabletInfo.setAssignedHostName(tabletServerInfo.getHostName()); tabletServerInfo.addNumOfTablets(); LOG.info("assignTablet: tabletName=" + tabletInfo.getTabletName() + ", assignedHost=" + tabletServerInfo.getHostName()); return true; } catch (Exception e) { LOG.warn("error while assignment. but retry:" + e.getMessage()); if (tabletServerInfo != null) { LOG.warn("Exception in assigning tablet : " + tabletInfo.getTabletName() + " to host : " + tabletServerInfo.getHostName() + ". Retry count : " + (retry + 1), e); } else { LOG.warn("Exception in assigning tablet : " + tabletInfo.getTabletName() + " to host null. Retry count : " + (retry + 1), e); } retry++; try { Thread.sleep(1 * 1000); } catch (InterruptedException e1) { } } } // if assignment fail synchronized (tabletInfoMonitor) { assigningTablets.remove(tabletInfo.getTabletName()); unassignedTablets.put(tabletInfo.getTabletName(), tabletInfo); } LOG.debug("Assignment fail:" + tabletInfo); return false; } private TabletServerInfo selectTabletServerForAssignment(Collection<TabletServerInfo> tabletServers) { List<TabletServerInfo> sortedTabletServers = new ArrayList<TabletServerInfo>(); sortedTabletServers.addAll(tabletServers); Collections.sort(sortedTabletServers, new TabletServerInfo.TabletServerInfoComparator()); TabletServerInfo selectedServer = sortedTabletServers.get(0); if (lastAssignedTabletServer == null) { lastAssignedTabletServer = selectedServer; return selectedServer; } else { synchronized (lastAssignedTabletServer) { if (sortedTabletServers.size() > 1 && selectedServer.equals(lastAssignedTabletServer)) { selectedServer = sortedTabletServers.get(1); lastAssignedTabletServer = selectedServer; return selectedServer; } else { lastAssignedTabletServer = selectedServer; return selectedServer; } } } } public AsyncTaskStatus getAsyncTaskStatus(String taskId) throws IOException { return asyncTaskManager.getAsyncTaskStatus(taskId); } public void removeAsyncTask(String taskId) throws IOException { asyncTaskManager.removeAsyncTask(taskId); } public String dropTable(String tableName) throws IOException { AclManager.checkOwner(conf, zk, schemaMap, tableName); String taskId = asyncTaskManager.runAsyncTask(new TableDropTask(tableName)); return taskId; } class TableDropTask extends AsyncTask { String tableName; public TableDropTask(String tableName) { this.tableName = tableName; } // FIXME Drop ?? schema ? META? ? ? public void exec() throws Exception { LOG.debug("Drop table:" + tableName); synchronized (dropingTabletServers) { if (dropingTabletServers.containsKey(tableName) && dropingTabletServers.get(tableName).size() > 0) { throw new IOException("Already dropping:" + tableName); } } String lockPath = Constants.TABLE_DROP + "/" + tableName; if (zk.exists(LockUtil.getZKPath(conf, lockPath), false) != null) { throw new IOException( "Can't drop table:" + tableName + "(already drop lock exists:" + lockPath + ")"); } else { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, lockPath), "0".getBytes(), CreateMode.PERSISTENT); } // Live TabletServer? drop synchronized (dropingTabletServers) { List<String> dropTabletServerList = new ArrayList<String>(); for (String hostName : liveTabletServers.keySet()) { TabletManagerProtocol tabletServer = null; try { tabletServer = connectTabletServer(hostName); if (tabletServer.dropTable(getTaskId(), tableName)) { dropTabletServerList.add(hostName); } } catch (IOException e) { LOG.fatal("TabletServer[" + hostName + "] can't drop table:" + tableName + "." + " shutdown tabletserver", e); try { tabletServer.shutdown(); } catch (IOException err) { LOG.error("Can't shutdown tabletserver:" + hostName, err); } } } // TabletServer? ? . // TODO ? ?? drop ? ?? Drop ? . dropingTabletServers.put(tableName, dropTabletServerList); long startTime = System.currentTimeMillis(); while (true) { dropingTabletServers.wait(10 * 1000); if (dropingTabletServers.get(tableName) == null) { break; } if (dropingTabletServers.get(tableName).isEmpty()) { dropingTabletServers.remove(tableName); break; } // 5 min /* long gap = System.currentTimeMillis() - startTime; if (gap > 5 * 60 * 1000) { LOG.error(tableName + " Drop timeout after " + gap); addErrorTrace(tableName + " Drop timeout after " + gap); dropingTabletServers.remove(tableName); break; } */ } } String errorMessage = finalizeDrop(CloudataMaster.this, conf, fs, zk, tableName, false); if (errorMessage != null && errorMessage.length() > 0) { addErrorTrace(errorMessage); } TabletLocationCache.getInstance(conf).removeTableSchemaCache(tableName); LOG.debug("Drop table end:" + tableName); } } public static String finalizeDrop(CloudataMaster cloudataMaster, CloudataConf conf, CloudataFileSystem fs, ZooKeeper zk, String tableName, boolean forced) throws IOException { String errorMessage = ""; // Schema try { LockUtil.delete(zk, LockUtil.getZKPath(conf, Constants.PATH_SCHEMA + "/" + tableName), true); } catch (Exception e) { if (forced) { errorMessage += "Can't delete table schema:" + Constants.PATH_SCHEMA + "/" + tableName + "\n"; } else { throw new IOException(e); } } // Table ?? Trash rename String tableDataPath = TableSchema.getTableDataPath(conf, tableName); if (fs.exists(new GPath(tableDataPath))) { String tableTrashPath = TableSchema.getTableDataTrashPath(conf, tableName); GPath dataTrashPath = new GPath(tableTrashPath); fs.mkdirs(dataTrashPath.getParent()); boolean renameToResult = fs.renameTo(new GPath(tableDataPath), dataTrashPath); if (!renameToResult) { LOG.error("Can't rename table file:" + tableDataPath + " to " + tableTrashPath + " : " + renameToResult); // LOG.fatal("CloudataMaster shutdown cause: Can't move table data file to trash while droping"); if (!"local".equals(conf.get("cloudata.filesystem"))) { errorMessage += "Can't rename table file:" + tableDataPath + " to " + tableTrashPath + " : " + renameToResult + "\n"; } } } // TODO CommitLog? Trash ?? // META ? ? ? Tablet? META CTable ctable = CTable.openTable(conf, TABLE_NAME_META); TableScanner scanner = null; List<ScanCell> scanCells = new ArrayList<ScanCell>(); try { scanner = ScannerFactory.openScanner(ctable, Tablet.generateMetaRowKey(tableName, Row.Key.MIN_KEY), Tablet.generateMetaRowKey(tableName, Row.Key.MAX_KEY), META_COLUMN_NAME_TABLETINFO, 20); ScanCell scanCell = null; while ((scanCell = scanner.next()) != null) { TabletInfo tabletInfo = new TabletInfo(); tabletInfo.readFields(scanCell.getBytes()); if (tableName.compareTo(tabletInfo.getTableName()) < 0) { break; } if (tableName.equals(tabletInfo.getTableName())) { scanCells.add(scanCell); } } } finally { if (scanner != null) scanner.close(); } if (scanCells != null) { for (ScanCell eachCell : scanCells) { TabletInfo tabletInfo = new TabletInfo(); tabletInfo.readFields(eachCell.getBytes()); try { if (tabletInfo.getAssignedHostName() != null) { if (CloudataMaster.connectTabletServer(conf, tabletInfo.getAssignedHostName()) .dropTable(null, tableName)) { continue; } } // TabletServer? META? . Row.Key rowKey = Tablet.generateMetaRowKey(tabletInfo.getTableName(), tabletInfo.getEndRowKey()); ctable.remove(rowKey, Constants.META_COLUMN_NAME_TABLETINFO, new Cell.Key(tabletInfo.getTabletName())); CommitLogFileSystemIF commitLogFileSystem = CommitLogFileSystem.getCommitLogFileSystem(conf, null, zk); Tablet.deleteCommitLog(commitLogFileSystem, tabletInfo.getTableName(), tabletInfo.getTabletName()); } catch (IOException err) { LOG.error("Can't delet META while table drop:" + tabletInfo + ":" + err.getMessage(), err); errorMessage += "Can't delet META while table drop:" + tabletInfo + ":" + err.getMessage() + "\n"; } } } // drop lock String tableDropLockPath = Constants.TABLE_DROP + "/" + tableName; try { if (zk.exists(LockUtil.getZKPath(conf, tableDropLockPath), false) != null) { LockUtil.delete(zk, LockUtil.getZKPath(conf, tableDropLockPath)); } } catch (Exception e) { if (!forced) { throw new IOException(e); } } // Master? schema if (cloudataMaster != null) { cloudataMaster.schemaMap.remove(tableName); } LOG.debug("End finalizeDrop:" + tableName); return errorMessage; } public void endTableDrop(String taskId, String hostName, String tableName) throws IOException { synchronized (dropingTabletServers) { LOG.debug("Receive endTableDrop from tabletserver:" + hostName + "," + tableName); List<String> servers = dropingTabletServers.get(tableName); if (servers != null) { servers.remove(hostName); } dropingTabletServers.notifyAll(); } } public void errorTableDrop(String taskId, String hostName, String tableName, String message) throws IOException { synchronized (dropingTabletServers) { LOG.debug("Receive errorTableDrop from tabletserver:" + hostName + "," + tableName); AsyncTask task = asyncTaskManager.getAsyncTask(taskId); if (task != null) { task.addErrorTrace(hostName + ":" + message); } List<String> servers = dropingTabletServers.get(tableName); if (server != null) { servers.remove(hostName); } dropingTabletServers.notifyAll(); } } public TabletInfo[] getTablets(String tableName) { synchronized (tabletInfoMonitor) { Map<String, TabletInfo> tablets = tabletInfos.get(tableName); if (tablets == null) return null; Collection<TabletInfo> tabletInfoList = tablets.values(); return tabletInfoList.toArray(new TabletInfo[tabletInfoList.size()]); } } public TableSchema[] listTables() { Collection<TableSchema> tableList = schemaMap.values(); return tableList.toArray(new TableSchema[tableList.size()]); } public void endTabletAssignment(TabletInfo tabletInfo, boolean created) throws IOException { // FIXME ? ??? LOG.info("endTabletAssignment:" + tabletInfo + ", created:" + created + "," + tabletInfo.getAssignedHostName()); if (!assigningTablets.containsKey(tabletInfo.getTabletName())) { // assigningTablets? Tablet? split? ? ? . LOG.info("receive endTabletAssignment for tablet which is not assigningTablet"); return; } synchronized (tabletInfoMonitor) { Map<String, TabletInfo> tabletInfoMap = tabletInfos.get(tabletInfo.getTableName()); if (tabletInfoMap == null) { tabletInfoMap = new HashMap<String, TabletInfo>(50); tabletInfos.put(tabletInfo.getTableName(), tabletInfoMap); } tabletInfoMap.put(tabletInfo.getTabletName(), tabletInfo); unassignedTablets.remove(tabletInfo.getTabletName()); assigningTablets.remove(tabletInfo.getTabletName()); // tsReportedTabletInfos.put(tabletInfo.getTableName(), tabletInfo); initialTabletEndCount++; } if (TABLE_NAME_ROOT.equals(tabletInfo.getTableName())) { synchronized (endRootTableAssignment) { endRootTableAssignment.set(true); endRootTableAssignment.notifyAll(); } } else if (TABLE_NAME_META.equals(tabletInfo.getTableName())) { // FIXME META tablet? split? // if (!clusterReady) { // setClusterReady(true); // // ? ? ? ROOT -> META -> User Table ? ? // UserTabletAssignmentThread userTabletAssignmentThread = new // UserTabletAssignmentThread(tabletInfo); // userTabletAssignmentThread.start(); // //addRunningThread(userTabletAssignmentThread); // } } } /** * Tablet ? ? ? ? . 1. Tablet? ? ?? ? ?? ? * stop service ? ??? stop server ? ??.cancelStop ?? * ? ? unsigned tablet ?? . 2. ?? Tablet? Tablet? unsigned * tablet ?? . */ public void errorTabletAssignment(String hostName, TabletInfo tabletInfo) { LOG.info("errorTabletAssignment:" + tabletInfo); if (TABLE_NAME_ROOT.equals(tabletInfo.getTableName())) { LOG.fatal("CloudataMaster shutdown cause:CloudataMaster stop cause by ROOT tablet assignment fail"); shutdown(); return; } if (!assigningTablets.containsKey(tabletInfo.getTabletName())) { // assigningTablets? Tablet? split? ? ? . LOG.error("receive errorTabletAssignment for tablet which is not assigningTablet"); return; } synchronized (tabletInfoMonitor) { TabletServerInfo tabletServerInfo = liveTabletServers.get(hostName); if (tabletServerInfo != null) { tabletServerInfo.subtractNumOfTablets(); } assigningTablets.remove(tabletInfo.getTabletName()); if (!schemaMap.contains(tabletInfo.getTableName())) { LOG.warn("Garbage tablet:" + tabletInfo); return; } else { unassignedTablets.put(tabletInfo.getTabletName(), tabletInfo); LOG.error("Added unassignedTablets:" + tabletInfo); } } } public long getProtocolVersion(String protocol, long clientVersion) throws IOException { if (protocol.equals(TableManagerProtocol.class.getName())) { return TableManagerProtocol.versionID; } else if (protocol.equals(TabletMasterProtocol.class.getName())) { return TabletMasterProtocol.versionID; } else { throw new IOException("Unknown protocol to tablet server: " + protocol); } } public boolean checkServer() { return true; } /** * TabletServe Tablet ?? . */ private List<TabletInfo> getTabletsFromTabletServer() throws IOException { List<TabletInfo> tabletInfos = new ArrayList<TabletInfo>(); List<String> servers = null; try { servers = zk.getChildren(LockUtil.getZKPath(conf, Constants.SERVER), false); } catch (NoNodeException e) { } catch (Exception e) { throw new IOException(e); } if (servers == null) { return tabletInfos; } for (String tabletServerHost : servers) { try { TabletManagerProtocol tabletServer = connectTabletServer(tabletServerHost); TabletInfo[] tsTabletInfos = tabletServer.reportTablets(); for (TabletInfo tabletInfo : tsTabletInfos) { tabletInfos.add(tabletInfo); } synchronized (liveTabletServers) { TabletServerInfo tabletServerInfo = liveTabletServers.get(tabletServerHost); if (tabletServerInfo == null) { tabletServerInfo = new TabletServerInfo(tabletServerHost); liveTabletServers.put(tabletServerHost, tabletServerInfo); } tabletServerInfo.setNumOfTablets(tsTabletInfos.length); } } catch (Exception e) { LOG.error("Can't receive tablet reports from:" + tabletServerHost, e); } } return tabletInfos; } public void reportTabletSplited(final TabletInfo targetTablet, final TabletInfo[] splitedTablets) { synchronized (tabletInfoMonitor) { try { removeTabletInAllVariable(targetTablet); Map<String, TabletInfo> tablets = tabletInfos.get(splitedTablets[0].getTableName()); if (tablets == null) { tablets = new HashMap<String, TabletInfo>(100); tabletInfos.put(splitedTablets[0].getTableName(), tablets); } tablets.put(splitedTablets[0].getTabletName(), splitedTablets[0]); tablets.put(splitedTablets[1].getTabletName(), splitedTablets[1]); } catch (Exception e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); return; } } Thread assignThread = new Thread(threadGroup, new Runnable() { public void run() { try { // ?? ? tablet server? stop? ? ? & false assignTablet(splitedTablets[0]); } catch (Exception e) { // ? Master? ? ? . } finally { // removeRunningThread(this); } } }); assignThread.start(); } private void removeTabletInAllVariable(TabletInfo tabletInfo) { Map<String, TabletInfo> tablets = tabletInfos.get(tabletInfo.getTableName()); if (tablets != null) { tablets.remove(tabletInfo.getTabletName()); } unassignedTablets.remove(tabletInfo.getTabletName()); assigningTablets.remove(tabletInfo.getTabletName()); } public void shutdown() { if (!conf.getBoolean("testmode", false)) { System.exit(0); } if (!stopRequested) { stopRequested = true; server.stop(); if (infoServer != null) { try { infoServer.stop(); } catch (InterruptedException e1) { } } // ThreadGroup threadGroup = Thread.currentThread().getThreadGroup(); Thread[] threads = new Thread[threadGroup.activeCount()]; threadGroup.enumerate(threads, true); for (Thread thread : threads) { try { // LOG.debug("Master shutdown:child thread:" + thread.getId()); thread.interrupt(); } catch (Exception e) { } } } LOG.debug("shutdown masterserver:" + hostName); } public boolean isShutdowned() { return stopRequested; } /** * Master ? Root, Meta Tablet? Tablet . * * @author babokim */ class RootTabletAssignmentThread implements Runnable { private void assignRootTablet() throws IOException { // if root not exists, make root table TabletInfo rootTabletInfo = null; if (schemaMap.contains(TABLE_NAME_ROOT)) { //TabletInfo LOG.info("loading root tablet info"); rootTabletInfo = Tablet.getRootTabletInfo(conf, zk); } else { LOG.info("create ROOT Tablet"); // ROOT user ? ? if (!existSuperGroup()) { LOG.info("Create supergroup user(" + conf.getUserId() + ")"); addUser(conf, zk, conf.getUserId()); addSuperGroupUser(conf, zk, conf.getUserId()); } // ? createTable(GlobalConstants.ROOT_TABLE, null); rootTabletInfo = new TabletInfo(TABLE_NAME_ROOT, Tablet.generateTabletName(TABLE_NAME_ROOT), Row.Key.MIN_KEY, Row.Key.MAX_KEY); try { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.ROOT_TABLET), LockUtil.getBytes(rootTabletInfo), CreateMode.PERSISTENT); } catch (Exception e) { LOG.fatal("CloudataMaster shutdown cause:Fail ROOT tablet lock:" + Constants.ROOT_TABLET); shutdown(); return; } } if (rootTabletInfo == null) { LOG.fatal("CloudataMaster shutdown cause:Can't get ROOT tablet info"); shutdown(); return; } //wait for starting TabletServer synchronized (liveTabletServers) { if (liveTabletServers.isEmpty()) { LOG.info("No live tablet server. wait until tablet server added."); try { liveTabletServers.wait(); } catch (InterruptedException e) { } LOG.info("TabletServer added: # of TabletServers: " + liveTabletServers.size()); } } synchronized (tabletInfoMonitor) { Map<String, TabletInfo> rootTablets = new HashMap<String, TabletInfo>(); rootTablets.put(rootTabletInfo.getTabletName(), rootTabletInfo); tabletInfos.put(TABLE_NAME_ROOT, rootTablets); } // TabletServer stop ? Master kill ? stop ? // ROOT Tablet? ? ?. // ? . boolean exists; try { exists = zk.exists(LockUtil.getZKPath(conf, Constants.ROOT_TABLET_HOST), false) != null; } catch (Exception e) { throw new IOException(e); } if (exists) { synchronized (endRootTableAssignment) { endRootTableAssignment.set(true); endRootTableAssignment.notifyAll(); } } else { unassignedTablets.put(rootTabletInfo.getTabletName(), rootTabletInfo); assignTablet(rootTabletInfo); } } public void run() { // assignment root tablet try { assignRootTablet(); } catch (Exception e) { LOG.fatal(e); shutdown(); } // check finished root tablet assignment synchronized (endRootTableAssignment) { if (!endRootTableAssignment.get()) { LOG.info("wait until ROOT tablet assigned"); try { endRootTableAssignment.wait(); } catch (InterruptedException e) { } LOG.info("ROOT tablet assigned."); } } // report from tablet server. find unassigned tablet Map<String, TabletInfo> assignedTablets = new HashMap<String, TabletInfo>(); try { List<TabletInfo> tsTabletInfos = getTabletsFromTabletServer(); for (TabletInfo eachTabletInfo : tsTabletInfos) { assignedTablets.put(eachTabletInfo.getTabletName(), eachTabletInfo); } } catch (Exception e) { LOG.fatal(e); shutdown(); } // scan root tablet finding all meta tablet list try { scanMetaTabletFromRoot(); } catch (Exception e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); return; } // assignment meta tablet try { assignMetaTablet(assignedTablets); } catch (IOException e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); return; } // MetaTablet? ? ? . while (true) { synchronized (tabletInfoMonitor) { if (assigningTablets.isEmpty() && unassignedTablets.isEmpty()) { break; } } try { Thread.sleep(1 * 1000); } catch (InterruptedException e) { return; } } clusterReady = true; // ? ? Tablet? . initUnassignedUserTablet(assignedTablets); // ? ? Tablet initialTabletAssignCount = unassignedTablets.size(); assignUserTablet(); // Drop ? ? . completeDropedTable(); // Split ? ? Tablet? ? . finalizeSplitTemp(); LOG.debug("End RootTabletAssignmentThread, TabletAssign thread start"); Thread assignThread = new Thread(threadGroup, new Runnable() { public void run() { while (true) { try { Thread.sleep(10 * 1000); } catch (InterruptedException e) { return; } // LOG.debug("Assign Thread:unassignedTablets.size()=" + // unassignedTablets.size()); assignUserTablet(); } } }); assignThread.start(); } private void completeDropedTable() { // LOG.debug("Check drop while master init"); try { String dropLockPath = Constants.TABLE_DROP; List<String> dropTableNames = null; try { dropTableNames = zk.getChildren(LockUtil.getZKPath(conf, dropLockPath), false); } catch (NoNodeException e) { } if (dropTableNames == null || dropTableNames.size() == 0) { return; } // ? CloudataMaster ? ? // ? ? drop ? tablet? ? tablet ? drop ? tablet ?? // . for (String eachTable : dropTableNames) { asyncTaskManager.runAsyncTask(new TableDropTask(eachTable)); } } catch (Exception e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); } } private void assignUserTablet() { Map<String, TabletInfo> assignTargetTablets = new HashMap<String, TabletInfo>(10); synchronized (tabletInfoMonitor) { assignTargetTablets.putAll(unassignedTablets); } for (TabletInfo eachTabletInfo : assignTargetTablets.values()) { try { assignTablet(eachTabletInfo); } catch (IOException e) { // e.printStackTrace(); } } } private void initUnassignedUserTablet(Map<String, TabletInfo> assignedTablets) { List<TabletInfo> tempMetaTablets = new ArrayList<TabletInfo>(); synchronized (tabletInfoMonitor) { Map<String, TabletInfo> metaTablets = tabletInfos.get(TABLE_NAME_META); tempMetaTablets.addAll(metaTablets.values()); } for (TabletInfo metaTabletInfo : tempMetaTablets) { TableScanner scanner = null; try { try { scanner = ScannerFactory.openScanner(conf, metaTabletInfo, META_COLUMN_NAME_TABLETINFO); } catch (Exception e) { LOG.fatal("Can't META Scanner", e); shutdown(); } Row scanRow = null; while ((scanRow = scanner.nextRow()) != null) { TabletInfo userTabletInfo = new TabletInfo(); List<Cell> cells = scanRow.getCellList(META_COLUMN_NAME_TABLETINFO); if (cells.size() > 1) { for (Cell eachCell : cells) { userTabletInfo.readFields(eachCell.getBytes()); LOG.fatal("Too many cells in META:" + scanRow.getKey() + "," + eachCell.getKey()); } System.exit(0); } try { userTabletInfo.readFields(cells.get(cells.size() - 1).getBytes()); } catch (EOFException e) { DataInputStream din = new DataInputStream( new ByteArrayInputStream(cells.get(cells.size() - 1).getBytes())); userTabletInfo.readOldFields(din); } String userTableName = userTabletInfo.getTableName(); if (TableSchema.loadTableSchema(conf, zk, userTableName) == null) { continue; } Map<String, TabletInfo> tablets = tabletInfos.get(userTableName); if (tablets == null) { tablets = new HashMap<String, TabletInfo>(); tabletInfos.put(userTableName, tablets); } tablets.put(userTabletInfo.getTabletName(), userTabletInfo); if (!assignedTablets.containsKey(userTabletInfo.getTabletName())) { synchronized (tabletInfoMonitor) { unassignedTablets.put(userTabletInfo.getTabletName(), userTabletInfo); } } } } catch (Exception e) { LOG.fatal("initUnassignedUserTablet: " + metaTabletInfo, e); shutdown(); } finally { if (scanner != null) { try { scanner.close(); } catch (IOException e) { e.printStackTrace(); } } } } } /** * ROOT tablet? scan META tablet? ?? . tabletInfos ? * * @throws IOException */ private void scanMetaTabletFromRoot() throws IOException { // LOG.debug("Start scanMetaTabletFromRoot: "); Map<String, TabletInfo> metaTabletInfos = tabletInfos.get(TABLE_NAME_META); if (metaTabletInfos == null) { metaTabletInfos = new HashMap<String, TabletInfo>(100); tabletInfos.put(TABLE_NAME_META, metaTabletInfos); } TabletInfo rootTabletInfo = tabletInfos.get(TABLE_NAME_ROOT).values().iterator().next(); // LOG.debug("scanMetaTabletFromRoot: " + rootTabletInfo); // Root Tablet? ?? ? Meta Tablet? . TableScanner scanner = ScannerFactory.openScanner(conf, rootTabletInfo, META_COLUMN_NAME_TABLETINFO); try { ScanCell scanCell = null; while ((scanCell = scanner.next()) != null) { TabletInfo metaTabletInfo = new TabletInfo(); metaTabletInfo.readFields(scanCell.getBytes()); metaTabletInfos.put(metaTabletInfo.getTabletName(), metaTabletInfo); } } finally { scanner.close(); } } }// end of RootTabletAssignmentThread class /** * ?? TabletServer Heartbeat TabletServer disable? * ? TabletServer? ? Tablet? TabletServer? . ? Thread? * unassignedTablet? . * * @author babokim */ class ReassignmentWhenTabletServerFailThread implements Runnable { // FIXME TabletServer fail ? ? ?? ? // fail ? ? private TabletServerInfo tabletServerInfo; public ReassignmentWhenTabletServerFailThread(TabletServerInfo tabletServerInfo) { this.tabletServerInfo = tabletServerInfo; } public void run() { String failedHostName = tabletServerInfo.getHostName(); LOG.debug("Failed TabletServer:" + failedHostName + ", time=" + System.currentTimeMillis() + ", Master table count=" + tabletInfos.size()); // Split META /? TabletServer fail ? CloudataMaster? ?? // . ? Tablet ? String splitLockPath = Constants.TABLETSERVER_SPLIT + "/" + failedHostName; List<String> splitedTablets = null; try { splitedTablets = zk.getChildren(LockUtil.getZKPath(conf, splitLockPath), false); } catch (NoNodeException e) { } catch (Exception e) { LOG.fatal("CloudataeMaster shutdown cause: can't drop info", e); shutdown(); return; } if (splitedTablets != null && splitedTablets.size() > 0) { LOG.fatal("CloudataMaster shutdown cause: tablet server failed while spliting"); shutdown(); return; } // FIXME tabletInfos ? synchronized ? ?. TabletInfo rootTablet = null; Set<TabletInfo> metaTabletInfos = new HashSet<TabletInfo>(); Set<TabletInfo> userTabletInfos = new HashSet<TabletInfo>(); synchronized (tabletInfoMonitor) { // Master server? ? tablet ?? fail? ? tablet server? // tablet? . for (Map<String, TabletInfo> tablets : tabletInfos.values()) { for (TabletInfo tabletInfo : tablets.values()) { if (failedHostName.equals(tabletInfo.getAssignedHostName())) { if (TABLE_NAME_ROOT.equals(tabletInfo.getTableName())) { rootTablet = tabletInfo; // LOG.debug("root Tablet on the failed TabletServer(tabletInfos):" // + tabletInfo); } else if (TABLE_NAME_META.equals(tabletInfo.getTableName())) { metaTabletInfos.add(tabletInfo); // LOG.debug("meta Tablet on the failed TabletServer(tabletInfos):" // + tabletInfo); } else { userTabletInfos.add(tabletInfo); // LOG.debug("user Tablet on the failed TabletServer(tabletInfos):" // + tabletInfo); } LOG.debug("Tablet on the failed TabletServer(tabletInfos):" + tabletInfo); assigningTablets.remove(tabletInfo.getTabletName()); } } } // ? tablet ?? fail? ? tablet server? tablet? . for (TabletInfo tabletInfo : assigningTablets.values()) { if (failedHostName.equals(tabletInfo.getAssignedHostName())) { if (TABLE_NAME_ROOT.equals(tabletInfo.getTableName())) { rootTablet = tabletInfo; // LOG.debug("root Tablet on the failed TabletServer(assigningTablets):" // + tabletInfo); } else if (TABLE_NAME_META.equals(tabletInfo.getTableName())) { metaTabletInfos.add(tabletInfo); // LOG.debug("meta Tablet on the failed TabletServer(assigningTablets):" // + tabletInfo); } else { userTabletInfos.add(tabletInfo); // LOG.debug("user Tablet on the failed TabletServer(assigningTablets):" // + tabletInfo); } // assigningTablets.remove(tabletInfo.getTabletName()); LOG.debug("Tablet on the failed TabletServer(assigningTablets):" + tabletInfo); } } // drop ? ?? Tablet? ?? . List<TabletInfo> removeTargets = new ArrayList<TabletInfo>(); for (TabletInfo tabletInfo : assigningTablets.values()) { if (dropingTabletServers.containsKey(tabletInfo.getTableName())) { removeTargets.add(tabletInfo); } } for (TabletInfo tabletInfo : removeTargets) { assigningTablets.remove(tabletInfo.getTabletName()); } removeTargets = new ArrayList<TabletInfo>(); for (TabletInfo tabletInfo : userTabletInfos) { if (dropingTabletServers.containsKey(tabletInfo.getTableName())) { removeTargets.add(tabletInfo); } } for (TabletInfo tabletInfo : removeTargets) { userTabletInfos.remove(tabletInfo.getTabletName()); } } // ROOT Tablet if (rootTablet != null) { try { assignTablet(rootTablet); } catch (Exception e) { LOG.error("ReassignmentWhenTabletServerFailThread: assign ROOT"); } } try { // ////////////////////////////////////////////////////// // META Tablet for (TabletInfo tabletInfo : metaTabletInfos) { try { assignTablet(tabletInfo); } catch (Exception e) { LOG.error("ReassignmentWhenTabletServerFailThread: assign META"); } } // ////////////////////////////////////////////////////// // User Tablet for (TabletInfo tabletInfo : userTabletInfos) { try { assignTablet(tabletInfo); } catch (Exception e) { LOG.error("ReassignmentWhenTabletServerFailThread: assign userTabletInfos: " + tabletInfo); } } } catch (Exception e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); } } } /** * TabletServer? Split ? ? Master report / ? ? * Split? ?? ? ? ?? ? tablet? ? ? . * * @throws IOException */ private void finalizeSplitTemp() { List<String> splitHostInfos = null; try { splitHostInfos = zk.getChildren(LockUtil.getZKPath(conf, Constants.TABLETSERVER_SPLIT), false); } catch (NoNodeException e) { } catch (Exception e) { LOG.warn("finalizeSplitTemp lockService.readDir error", e); } if (splitHostInfos == null || splitHostInfos.isEmpty()) { return; } try { for (String eachHostName : splitHostInfos) { LOG.info("finalizeSplitTemp:host=" + eachHostName); List<String> tabletSplitInfos = null; try { tabletSplitInfos = zk.getChildren( LockUtil.getZKPath(conf, Constants.TABLETSERVER_SPLIT + "/" + eachHostName), false); } catch (NoNodeException e) { } if (tabletSplitInfos == null || tabletSplitInfos.isEmpty()) { LockUtil.delete(zk, LockUtil.getZKPath(conf, Constants.TABLETSERVER_SPLIT + "/" + eachHostName), true); continue; } for (String splitTabletName : tabletSplitInfos) { LOG.info("finalizeSplitTemp:tablet=" + splitTabletName); String path = LockUtil.getZKPath(conf, Constants.TABLETSERVER_SPLIT + "/" + eachHostName + "/" + splitTabletName); byte[] splitInfo = zk.getData(path, false, null); DataInputStream din = new DataInputStream(new ByteArrayInputStream(splitInfo)); String tableName = CWritableUtils.readString(din); TabletInfo[] splitTabletInfos = new TabletInfo[2]; splitTabletInfos[0] = new TabletInfo(); splitTabletInfos[0].readFields(din); splitTabletInfos[1] = new TabletInfo(); splitTabletInfos[1].readFields(din); // ?? Tablet? ?. try { TabletInfo tabletInfo = connectTabletServer(hostName).getTabletInfo(splitTabletName); if (tabletInfo != null) { LOG.fatal("Tablet Split aborted abnormally." + splitTabletName + "," + splitTabletInfos[0] + "," + splitTabletInfos[1]); //shutdown(); } } catch (Exception e) { // ? Tablet LOG.error("finalizeSplitTemp:getTabletInfo Error:" + splitTabletName, e); } Map<String, TabletInfo> managedTabletInfos = tabletInfos.get(tableName); if (managedTabletInfos != null && managedTabletInfos.containsKey(splitTabletName)) { LOG.fatal("Tablet Split aborted abnormally." + splitTabletName + "," + splitTabletInfos[0] + "," + splitTabletInfos[1]); shutdown(); } else { // Tablet? ? GPath dataPath = Tablet.getTabletPath(conf, tableName, splitTabletName); LOG.info("delete tablet files while finalize split:" + splitTabletName + ":" + dataPath); long startTime = System.currentTimeMillis(); while (true) { boolean result = fs.delete(dataPath, true); if (result) { break; } try { Thread.sleep(100); } catch (InterruptedException e) { return; } if (System.currentTimeMillis() - startTime > 30 * 1000) { LOG.warn("Can't delete " + dataPath + " while droping " + splitTabletName); break; } } } } // for each tablet LockUtil.delete(zk, LockUtil.getZKPath(conf, Constants.TABLETSERVER_SPLIT + "/" + eachHostName), true); } // for each tabletserver } catch (Exception e) { LOG.fatal("CloudataMaster shutdown cause:" + e.getMessage(), e); shutdown(); } } private void addRunningTabletServer(String tabletServerHostName) throws IOException { LOG.info("Notice tablet server created: " + tabletServerHostName); TabletServerInfo tabletServerInfo = new TabletServerInfo(tabletServerHostName); if (liveTabletServers.containsKey(tabletServerHostName)) { return; } // call release all tablets. if tablet server reconnected. // connectTabletServer(tabletServerInfo).stopAllTablets(); liveTabletServers.put(tabletServerHostName, tabletServerInfo); int ratio = conf.getInt("cloudatamaster.iolock.ratio", 1); synchronized (Constants.SC_LOCK_PATH) { for (int i = 0; i < ratio; i++) { addDiskIOLock(Constants.SC_LOCK_PATH, Constants.SC_LOCK_NAME, scLockNo); addDiskIOLock(Constants.MC_LOCK_PATH, Constants.MC_LOCK_NAME, mcLockNo); } } if (deadTabletServers.contains(tabletServerHostName)) { deadTabletServers.remove(tabletServerHostName); } } class MasterNodeDeletedWatcher implements Watcher { String masterLockPathName; MasterNodeDeletedWatcher(String masterLockPathName) { this.masterLockPathName = masterLockPathName; } @Override public void process(WatchedEvent event) { if (event.getType() == Event.EventType.None) { switch (event.getState()) { case SyncConnected: break; case Disconnected: case Expired: LOG.info("Shutdown cause lock expired:" + event); shutdown(); break; } } else if (event.getType() == Event.EventType.NodeDeleted) { synchronized (masterElectMonitor) { masterElectMonitor.notifyAll(); } } } } class TabletServerWatcher implements Watcher { @Override public void process(WatchedEvent event) { if (event.getType() == Event.EventType.NodeChildrenChanged) { try { List<String> tabletServerLocks = null; try { tabletServerLocks = zk.getChildren(LockUtil.getZKPath(conf, Constants.SERVER), this); } catch (NoNodeException e) { } if (tabletServerLocks == null) { tabletServerLocks = new ArrayList<String>(); } synchronized (liveTabletServers) { Set<String> tmpTabletServers = new HashSet<String>(); tmpTabletServers.addAll(liveTabletServers.keySet()); for (String eachTabletServer : tabletServerLocks) { if (!tmpTabletServers.contains(eachTabletServer)) { addRunningTabletServer(eachTabletServer); } else { tmpTabletServers.remove(eachTabletServer); } } for (String failedTabletServer : tmpTabletServers) { processTabletServerFail(failedTabletServer); liveTabletServers.remove(failedTabletServer); } liveTabletServers.notifyAll(); } } catch (Exception e) { LOG.error(e.getMessage(), e); } } } } class CommitLogServerWatcher implements Watcher { @Override public void process(WatchedEvent event) { if (event.getType() == Event.EventType.NodeChildrenChanged) { try { List<String> commitLogServerLocks = null; try { commitLogServerLocks = zk.getChildren(LockUtil.getZKPath(conf, Constants.COMMITLOG_SERVER), this); } catch (NoNodeException e) { } if (commitLogServerLocks == null) { commitLogServerLocks = new ArrayList<String>(); } synchronized (liveCommitLogServers) { Set<String> tmpLiveCommitLogServers = new HashSet<String>(); tmpLiveCommitLogServers.addAll(liveCommitLogServers.keySet()); for (String eachCommitLogServer : commitLogServerLocks) { if (tmpLiveCommitLogServers.contains(eachCommitLogServer)) { tmpLiveCommitLogServers.remove(eachCommitLogServer); } else { liveCommitLogServers.put(eachCommitLogServer, new ServerMonitorInfo()); deadCommitLogServers.remove(eachCommitLogServer); } } for (String failedCommitLogServer : tmpLiveCommitLogServers) { deadCommitLogServers.add(failedCommitLogServer); liveCommitLogServers.remove(failedCommitLogServer); } } } catch (Exception e) { LOG.error(e.getMessage(), e); } } } } private void processTabletServerFail(String tabletServerHostName) { TabletServerInfo tabletServerInfo = new TabletServerInfo(tabletServerHostName); if (!liveTabletServers.containsKey(tabletServerHostName)) { return; } LOG.info("Notice tablet server failed: " + tabletServerHostName); liveTabletServers.remove(tabletServerHostName); Thread thread = new Thread(threadGroup, new ReassignmentWhenTabletServerFailThread(tabletServerInfo)); thread.start(); // Drop ? TabletServer? ?? synchronized (dropingTabletServers) { for (List<String> entry : dropingTabletServers.values()) { entry.remove(tabletServerHostName); } } deadTabletServers.add(tabletServerHostName); } private void addLockEventHandler() throws IOException { //TabletServer ?? synchronized (liveTabletServers) { try { if (zk.exists(LockUtil.getZKPath(conf, Constants.SERVER), false) == null) { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.SERVER), null, CreateMode.PERSISTENT); } } catch (NodeExistsException e) { } catch (Exception e) { throw new IOException(e); } List<String> servers; try { LOG.info("TabletServer lock watcher registered."); servers = zk.getChildren(LockUtil.getZKPath(conf, Constants.SERVER), new TabletServerWatcher()); } catch (Exception e) { throw new IOException(e); } if (servers != null) { for (String eachServer : servers) { addRunningTabletServer(eachServer); } liveTabletServers.notifyAll(); } else { LOG.info("No lock in " + Constants.SERVER); } } synchronized (liveCommitLogServers) { try { if (zk.exists(LockUtil.getZKPath(conf, Constants.COMMITLOG_SERVER), false) == null) { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, Constants.COMMITLOG_SERVER), null, CreateMode.PERSISTENT); } } catch (Exception e) { throw new IOException(e); } List<String> servers; try { LOG.info("CommitLogServer lock watcher registered."); servers = zk.getChildren(LockUtil.getZKPath(conf, Constants.COMMITLOG_SERVER), new CommitLogServerWatcher()); } catch (Exception e) { throw new IOException(e); } if (servers != null) { for (String eachServer : servers) { liveCommitLogServers.put(eachServer, new ServerMonitorInfo()); } } } } public String getHostName() { return this.hostName; } public String getTestHandlerKey() { return hostName; } public void test() { LOG.debug("called test:" + hostName); } public CloudataConf getConf() { return conf; } public TabletServerInfo[] getTabletServerInfos() { synchronized (liveTabletServers) { if (liveTabletServers.isEmpty()) { return null; } return liveTabletServers.values().toArray(new TabletServerInfo[] {}); } } public void addUser(String userId) throws IOException { // ? ? passwd ? if (!AclManager.isSuperGroup(conf, zk)) { throw new IOException("Can't access user management API"); } addUser(conf, zk, userId); } private static void addUser(CloudataConf conf, ZooKeeper zk, String userId) throws IOException { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, USERS + "/" + userId), userId.getBytes(), CreateMode.PERSISTENT); } private static void addSuperGroupUser(CloudataConf conf, ZooKeeper zk, String userId) throws IOException { LockUtil.createNodes(zk, LockUtil.getZKPath(conf, SUPERGROUP), userId.getBytes(), CreateMode.PERSISTENT); } private boolean existSuperGroup() throws IOException { try { return zk.exists(LockUtil.getZKPath(conf, SUPERGROUP), false) != null; } catch (Exception e) { throw new IOException(e); } } public void removeUser(String userId) throws IOException { if (!AclManager.isSuperGroup(conf, zk)) { throw new IOException("Can't access user management API"); } try { LockUtil.delete(zk, LockUtil.getZKPath(conf, USERS + "/" + userId), true); } catch (Exception e) { throw new IOException(e); } } public void addTablePermission(String tableName, String userId, String readWrite) throws IOException { AclManager.checkOwner(conf, zk, schemaMap, tableName); TableSchema tableSchema = schemaMap.get(tableName); if (tableSchema == null) { throw new IOException("No table:" + tableName); } tableSchema.addPermission(userId, readWrite); tableSchema.saveTableSchema(conf, zk); } public void removeTablePermission(String tableName, String userId) throws IOException { AclManager.checkOwner(conf, zk, schemaMap, tableName); TableSchema tableSchema = schemaMap.get(tableName); if (tableSchema == null) { throw new IOException("No table:" + tableName); } tableSchema.removePermission(userId); tableSchema.saveTableSchema(conf, zk); } protected Set<String> getDeadTabletServers() { return this.deadTabletServers; } protected Map<String, ServerMonitorInfo> getLiveCommitLogServers() { return this.liveCommitLogServers; } protected Set<String> getDeadCommitLogServers() { return this.deadCommitLogServers; } //Table Name -> # Tablet(total, unassign, assigning) protected Map<String, Integer[]> getNumTablets() { Map<String, Integer[]> result = new HashMap<String, Integer[]>(); synchronized (tabletInfos) { for (Map.Entry<String, Map<String, TabletInfo>> entry : tabletInfos.entrySet()) { String tableName = entry.getKey(); Map<String, TabletInfo> tableTablets = entry.getValue(); result.put(tableName, new Integer[] { tableTablets.keySet().size(), 0, 0 }); } } synchronized (tabletInfoMonitor) { for (TabletInfo eachTableInfo : unassignedTablets.values()) { String tableName = eachTableInfo.getTableName(); Integer[] nums = result.get(tableName); if (nums == null) { result.put(tableName, new Integer[] { 0, 1, 0 }); } else { nums[1]++; //result.put(tableName, nums); } } for (TabletInfo eachTableInfo : assigningTablets.values()) { String tableName = eachTableInfo.getTableName(); Integer[] nums = result.get(tableName); if (nums == null) { result.put(tableName, new Integer[] { 0, 0, 1 }); } else { nums[2]++; //result.put(tableName, nums); } } } return result; } public void heartbeatTS(String hostName, int tabletNum) throws IOException { TabletServerInfo info = liveTabletServers.get(hostName); if (info != null) { info.setLastHeartbeatTime(System.currentTimeMillis()); if (initialTabletEndCount > initialTabletAssignCount * 0.8) { info.setNumOfTablets(tabletNum); } } } public void heartbeatCS(String hostName, ServerMonitorInfo serverMonitorInfo) throws IOException { if (liveCommitLogServers.containsKey(hostName)) { serverMonitorInfo.setLastHeartbeatTime(System.currentTimeMillis()); liveCommitLogServers.put(hostName, serverMonitorInfo); } } /** * @return */ public String getLockStatus() throws IOException { List<String> scLocks = null; try { scLocks = zk.getChildren(LockUtil.getZKPath(conf, Constants.SC_LOCK_PATH), false); } catch (NoNodeException e) { return "Split Lock: 0/0 (current/total)"; } catch (Exception e) { throw new IOException(e); } if (scLocks == null || scLocks.size() == 0) { return "Split Lock: 0/0 (current/total)"; } int total = scLocks.size(); int locked = 0; for (String path : scLocks) { try { List<String> children = zk .getChildren(LockUtil.getZKPath(conf, Constants.SC_LOCK_PATH + "/" + path), false); if (children.size() > 0) { locked++; } } catch (Exception e) { throw new IOException(e); } } return "Split Lock: " + locked + "/" + total + " (current/total)"; } public synchronized void startBalancer() throws IOException { if (balancer == null) { balancer = new Balancer(); Thread balancerThread = new Thread(threadGroup, balancer); balancerThread.start(); } else { LOG.info("Balancer already started"); return; } } class Balancer implements Runnable { static final float BALANCE_RATE = 0.9f; List<TabletServerInfo> idleTabletServers = new ArrayList<TabletServerInfo>(); Map<String, TabletServerInfo> targetTabletServers = new HashMap<String, TabletServerInfo>(); List<TabletInfo> targetTablets = new ArrayList<TabletInfo>(); public void run() { try { LOG.info("Balancer.start"); List<TabletServerInfo> tabletServers = new ArrayList<TabletServerInfo>(); synchronized (liveTabletServers) { if (liveTabletServers.isEmpty()) { return; } tabletServers.addAll(liveTabletServers.values()); } int totalTablets = 0; for (TabletServerInfo eachTabletServer : tabletServers) { totalTablets += eachTabletServer.getNumOfTablets(); } int avgTablet = totalTablets / tabletServers.size(); int balancingTarget = 0; for (TabletServerInfo eachTabletServer : tabletServers) { if ((float) eachTabletServer.getNumOfTablets() * BALANCE_RATE < avgTablet) { balancingTarget += (avgTablet - eachTabletServer.getNumOfTablets() * BALANCE_RATE); idleTabletServers.add(eachTabletServer); } else { targetTabletServers.put(eachTabletServer.getHostName(), eachTabletServer); } } if (idleTabletServers.size() == 0 || targetTabletServers.size() == 0) { LOG.info("Balancer.stop cause " + idleTabletServers.size() + "," + targetTabletServers.size()); return; } int targetTabletNumPerTabletServer = balancingTarget / targetTabletServers.size(); if (targetTabletNumPerTabletServer > 50) { targetTabletNumPerTabletServer = 50; } synchronized (idleTabletServers) { for (TabletServerInfo eachTabletServer : targetTabletServers.values()) { try { TabletManagerProtocol tabletServer = CloudataMaster.this .connectTabletServer(eachTabletServer); LOG.info("Balancer.call electRebalacingTablets to " + eachTabletServer.getHostName() + "," + targetTabletNumPerTabletServer); tabletServer.electRebalacingTablets(targetTabletNumPerTabletServer); } catch (IOException e) { LOG.warn("Tablet Balancing Error:" + eachTabletServer.getHostName() + "," + e.getMessage(), e); } } while (true) { try { idleTabletServers.wait(); } catch (InterruptedException e) { } List<TabletInfo> tmpTargetTablets = new ArrayList<TabletInfo>(); synchronized (targetTablets) { tmpTargetTablets.addAll(targetTablets); targetTablets.clear(); } for (TabletInfo tabletInfo : tmpTargetTablets) { try { assignTablet(tabletInfo, idleTabletServers); } catch (IOException e) { LOG.error("Can't assign while rebalnacing: " + e.getMessage(), e); } } if (targetTabletServers.size() == 0) { break; } } } } finally { LOG.info("Balancer.end"); balancer = null; } } public void addTargetTablets(String tabletServerHostName, TabletInfo tabletInfo, boolean end) { synchronized (idleTabletServers) { if (end) { targetTabletServers.remove(tabletServerHostName); } if (tabletInfo != null) { /* synchronized (tabletInfoMonitor) { TabletServerInfo tabletServerInfo = liveTabletServers.get(tabletServerHostName); if (tabletServerInfo != null) { tabletServerInfo.subtractNumOfTablets(); } } */ synchronized (targetTablets) { targetTablets.add(tabletInfo); } } idleTabletServers.notify(); } } } public void doRebalacing(String tabletServerHostName, TabletInfo tabletInfo, boolean end) throws IOException { LOG.info("Balancer.received balancing tablet from " + tabletServerHostName + "," + tabletInfo.getTabletName()); balancer.addTargetTablets(tabletServerHostName, tabletInfo, end); } class UpdateMetricsThread implements Runnable { public void run() { while (true) { try { Thread.sleep(10 * 1000); } catch (InterruptedException e) { return; } masterMetrics.setLiveTabletServerNum(liveTabletServers.size()); masterMetrics.setDeadTabletServerNum(deadTabletServers.size()); masterMetrics.setLiveCommitLogServerNum(liveCommitLogServers.size()); masterMetrics.setDeadCommitLogServerNum(deadCommitLogServers.size()); int numTablets = 0; synchronized (tabletInfos) { for (Map.Entry<String, Map<String, TabletInfo>> entry : tabletInfos.entrySet()) { numTablets += entry.getValue().keySet().size(); } } masterMetrics.setTotalTabletNum(numTablets); } } } }