Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.datanode; import static org.apache.hadoop.hdfs.server.namenode.NameNode.DATANODE_PROTOCOL_ADDRESS; import java.io.File; import java.io.IOException; import java.net.ConnectException; import java.net.InetSocketAddress; import java.net.NoRouteToHostException; import java.net.PortUnreachableException; import java.net.SocketTimeoutException; import java.net.UnknownHostException; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.AvatarZooKeeperClient; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.FastProtocolHDFS; import org.apache.hadoop.hdfs.FastWritableHDFS; import org.apache.hadoop.hdfs.protocol.AvatarProtocol; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DataTransferProtocol; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.UnregisteredDatanodeException; import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.IncorrectVersionException; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException; import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand; import org.apache.hadoop.hdfs.util.InjectionEvent; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.InjectionHandler; import org.apache.hadoop.util.StringUtils; import org.apache.zookeeper.data.Stat; /** * This is an implementation of the AvatarDataNode, a wrapper * for a regular datanode that works with AvatarNode. * * The AvatarDataNode is needed to make a vanilla DataNode send * block reports to Primary and standby namenodes. The AvatarDataNode * does not know which one of the namenodes is primary and which is * secondary. * * Typically, an adminstrator will have to specify the pair of * AvatarNodes via fs1.default.name and fs2.default.name * */ public class AvatarDataNode extends DataNode { static { Configuration.addDefaultResource("avatar-default.xml"); Configuration.addDefaultResource("avatar-site.xml"); } public static final Log LOG = LogFactory.getLog(AvatarDataNode.class.getName()); public AvatarDataNode(Configuration conf, AbstractList<File> dataDirs, String dnThreadName) throws IOException { super(conf, dataDirs); AvatarDataNode.dnThreadName = dnThreadName; } @Override void startDataNode(Configuration conf, AbstractList<File> dataDirs) throws IOException { initGlobalSetting(conf, dataDirs); Collection<String> serviceIds = DFSUtil.getNameServiceIds(conf); List<String> defaultAddresses = getZnodePaths(serviceIds, conf); List<InetSocketAddress> nameAddrs0 = DFSUtil.getRPCAddresses("0", conf, serviceIds, DATANODE_PROTOCOL_ADDRESS, DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> nameAddrs1 = DFSUtil.getRPCAddresses("1", conf, serviceIds, DATANODE_PROTOCOL_ADDRESS, DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> avatarAddrs0 = AvatarDataNode.getAvatarNodeAddresses("0", conf, serviceIds); List<InetSocketAddress> avatarAddrs1 = AvatarDataNode.getAvatarNodeAddresses("1", conf, serviceIds); namespaceManager = new AvatarNamespaceManager(nameAddrs0, nameAddrs1, avatarAddrs0, avatarAddrs1, defaultAddresses, DFSUtil.getNameServiceIds(conf)); initDataSetAndScanner(conf, dataDirs, nameAddrs0.size()); } @Override protected void notifyNamenodeReceivedBlock(int namespaceId, Block block, String delHint) throws IOException { if (block == null) { throw new IllegalArgumentException("Block is null"); } ((AvatarNamespaceManager) namespaceManager).notifyNamenodeReceivedBlock(namespaceId, block, delHint); } @Override protected void notifyNamenodeDeletedBlock(int namespaceId, Block block) throws IOException { if (block == null) { throw new IllegalArgumentException("Block is null"); } ((AvatarNamespaceManager) namespaceManager).notifyNamenodeDeletedBlock(namespaceId, block); } /** TODO: will add more details to this later on * Manages OfferService objects for the data node namespaces. * Each namespace has two OfferServices, one for pirmary and one for standby. * Creation, removal, starting, stopping, shutdown on OfferService * objects must be done via APIs in this class. */ class AvatarNamespaceManager extends NamespaceManager { private final Object refreshNamenodesLock = new Object(); AvatarNamespaceManager(List<InetSocketAddress> nameAddrs0, List<InetSocketAddress> nameAddrs1, List<InetSocketAddress> avatarAddrs0, List<InetSocketAddress> avatarAddrs1, List<String> defaultAddrs, Collection<String> nameserviceIds) throws IOException { Iterator<String> it = nameserviceIds.iterator(); for (int i = 0; i < nameAddrs0.size(); i++) { InetSocketAddress nameAddr0 = nameAddrs0.get(i); String nameserviceId = it.hasNext() ? it.next() : null; nameNodeThreads.put(nameAddr0, new ServicePair(nameAddr0, nameAddrs1.get(i), avatarAddrs0.get(i), avatarAddrs1.get(i), defaultAddrs.get(i), nameserviceId)); } } /** * Notify both namenode(s) that we have received a block */ protected void notifyNamenodeReceivedBlock(int namespaceId, Block block, String delHint) throws IOException { NamespaceService servicePair = get(namespaceId); if (servicePair == null) { throw new IOException("Cannot locate OfferService thread for namespace=" + namespaceId); } servicePair.notifyNamenodeReceivedBlock(block, delHint); } /** * Notify both namenode(s) that we have deleted a block */ protected void notifyNamenodeDeletedBlock(int namespaceId, Block block) throws IOException { NamespaceService servicePair = this.get(namespaceId); if (servicePair == null) { throw new IOException("Cannot locate OfferService thread for namespace=" + namespaceId); } servicePair.notifyNamenodeDeletedBlock(block); } void refreshNamenodes(List<InetSocketAddress> nameAddrs0, List<InetSocketAddress> nameAddrs1, List<InetSocketAddress> avatarAddrs0, List<InetSocketAddress> avatarAddrs1, List<String> defaultAddrs, Collection<String> nameserviceIds) throws IOException, InterruptedException { List<Integer> toStart = new ArrayList<Integer>(); List<String> toStartNameserviceIds = new ArrayList<String>(); List<NamespaceService> toStop = new ArrayList<NamespaceService>(); List<InetSocketAddress> toStopNNs = new ArrayList<InetSocketAddress>(); synchronized (refreshNamenodesLock) { synchronized (this) { for (InetSocketAddress nnAddr : nameNodeThreads.keySet()) { if (!nameAddrs0.contains(nnAddr)) { LOG.info("To remove service at " + nnAddr); toStopNNs.add(nnAddr); } } for (InetSocketAddress nnAddr : toStopNNs) { NamespaceService ns = remove(nnAddr); if (ns != null) { LOG.info("Removing service: " + nnAddr); toStop.add(ns); } } Iterator<String> it = nameserviceIds.iterator(); for (int i = 0; i < nameAddrs0.size(); i++) { String nameserviceId = it.hasNext() ? it.next() : null; InetSocketAddress nnAddr = nameAddrs0.get(i); if (!nameNodeThreads.containsKey(nnAddr)) { LOG.info("Adding service " + nameserviceId + " at " + nnAddr); toStart.add(i); toStartNameserviceIds.add(nameserviceId); } } it = toStartNameserviceIds.iterator(); for (Integer i : toStart) { InetSocketAddress nameAddr0 = nameAddrs0.get(i); nameNodeThreads.put(nameAddr0, new ServicePair(nameAddr0, nameAddrs1.get(i), avatarAddrs0.get(i), avatarAddrs1.get(i), defaultAddrs.get(i), it.next())); } } for (NamespaceService nsos : toStop) { nsos.stop(); } for (NamespaceService nsos : toStop) { nsos.join(); } startAll(); } } /** * Refreshes the corresponding offer service if there were any * changes for that avatarnode in the config file. * * @param zeroOrOne * whether to refresh for AvatarZero or AvatarOne */ void refreshOfferService(InetSocketAddress nameAddrs0, InetSocketAddress nameAddrs1, InetSocketAddress avatarAddrs0, InetSocketAddress avatarAddrs1, String serviceName) throws IOException { LOG.info("OfferService refresh called."); synchronized (refreshNamenodesLock) { synchronized (this) { Collection<NamespaceService> allServices = nameNodeThreads.values(); NamespaceService theServicePairToUpdate = null; for (NamespaceService aService : allServices) { if (aService.getNameserviceId().equalsIgnoreCase(serviceName)) { theServicePairToUpdate = aService; break; } } if (theServicePairToUpdate == null) { throw new IOException("Invalid service name."); } boolean wasZeroRefreshed = false; ServicePair toBeRefreshed = (ServicePair) theServicePairToUpdate; if (!(nameAddrs0.equals(toBeRefreshed.nameAddr1) && avatarAddrs0.equals(toBeRefreshed.avatarAddr1))) { LOG.info("Refreshing offer service to node zero for service: " + serviceName); logChangeOf(toBeRefreshed.nameAddr1, nameAddrs0); logChangeOf(toBeRefreshed.avatarAddr1, avatarAddrs0); toBeRefreshed.restartServiceZeroWith(nameAddrs0, avatarAddrs0); remapNameservice(toBeRefreshed.nameAddr1, nameAddrs0); wasZeroRefreshed = true; } boolean wasOneRefreshed = false; if (!(nameAddrs1.equals(toBeRefreshed.nameAddr2) && avatarAddrs1.equals(toBeRefreshed.avatarAddr2))) { LOG.info("Refreshing offer service to node zero for service: " + serviceName); logChangeOf(toBeRefreshed.nameAddr2, nameAddrs1); logChangeOf(toBeRefreshed.avatarAddr2, avatarAddrs1); toBeRefreshed.restartServiceOneWith(nameAddrs1, avatarAddrs1); wasOneRefreshed = true; } if (!wasZeroRefreshed && !wasOneRefreshed) { LOG.warn("Neither of the offerservices were refreshed"); } } } } } private static void logChangeOf(InetSocketAddress prev, InetSocketAddress next) { if (prev.equals(next)) { return; } LOG.info("From: <" + prev + "> To: <" + next + ">"); } public class ServicePair extends NamespaceService { String defaultAddr; InetSocketAddress nameAddr1; InetSocketAddress nameAddr2; DatanodeProtocol namenode1; DatanodeProtocol namenode2; AvatarProtocol avatarnode1; AvatarProtocol avatarnode2; InetSocketAddress avatarAddr1; InetSocketAddress avatarAddr2; boolean doneRegister1 = false; // not yet registered with namenode1 boolean doneRegister2 = false; // not yet registered with namenode2 OfferService offerService1; OfferService offerService2; volatile OfferService primaryOfferService = null; volatile InetSocketAddress primaryAddr = null; Thread of1; Thread of2; // the registration layout version is matching volatile boolean currentRegistrationLayoutMatch1 = true; volatile boolean currentRegistrationLayoutMatch2 = true; int namespaceId; String nameserviceId; Thread spThread; AvatarZooKeeperClient zkClient; private NamespaceInfo nsInfo; DatanodeRegistration nsRegistration; private UpgradeManagerDatanode upgradeManager; private volatile boolean initialized = false; private volatile boolean shouldServiceRun = true; volatile long lastBeingAlive = now(); private ServicePair(InetSocketAddress nameAddr1, InetSocketAddress nameAddr2, InetSocketAddress avatarAddr1, InetSocketAddress avatarAddr2, String defaultAddr, String nameserviceId) { this.nameAddr1 = nameAddr1; this.nameAddr2 = nameAddr2; this.avatarAddr1 = avatarAddr1; this.avatarAddr2 = avatarAddr2; this.defaultAddr = defaultAddr; this.nameserviceId = nameserviceId; zkClient = new AvatarZooKeeperClient(getConf(), null); this.nsRegistration = new DatanodeRegistration(getMachineName()); } private void setNamespaceInfo(NamespaceInfo nsinfo) { this.nsInfo = nsinfo; this.namespaceId = nsinfo.getNamespaceID(); namespaceManager.addNamespace(this); } private void setupNS() throws IOException { // handshake with NN NamespaceInfo nsInfo; nsInfo = handshake(true); setNamespaceInfo(nsInfo); synchronized (AvatarDataNode.this) { setupNSStorage(); } nsRegistration.setIpcPort(ipcServer.getListenerAddress().getPort()); nsRegistration.setInfoPort(infoServer.getPort()); } private void setupNSStorage() throws IOException { Configuration conf = getConf(); StartupOption startOpt = getStartupOption(conf); assert startOpt != null : "Startup option must be set."; boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false); if (simulatedFSDataset) { nsRegistration.setStorageID(storage.getStorageID()); //same as DN nsRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION; nsRegistration.storageInfo.namespaceID = nsInfo.namespaceID; } else { // read storage info, lock data dirs and transition fs state if necessary // first do it at the top level dataDirs // This is done only once when among all namespaces storage.recoverTransitionRead(AvatarDataNode.this, nsInfo, dataDirs, startOpt); // Then do it for this namespace's directory storage.recoverTransitionRead(AvatarDataNode.this, nsInfo.namespaceID, nsInfo, dataDirs, startOpt, nameserviceId); LOG.info("setting up storage: namespaceId=" + namespaceId + ";lv=" + storage.layoutVersion + ";nsInfo=" + nsInfo); nsRegistration.setStorageInfo(storage.getNStorage(nsInfo.namespaceID), storage.getStorageID()); data.initialize(storage); } data.addNamespace(namespaceId, storage.getNameSpaceDataDir(namespaceId), conf); if (blockScanner != null) { blockScanner.start(); blockScanner.addNamespace(namespaceId); } } @Override public UpgradeManagerDatanode getUpgradeManager() { synchronized (AvatarDataNode.this) { if (upgradeManager == null) upgradeManager = new UpgradeManagerDatanode(AvatarDataNode.this, namespaceId); } return upgradeManager; } public void processUpgradeCommand(UpgradeCommand comm) throws IOException { assert upgradeManager != null : "DataNode.upgradeManager is null."; upgradeManager.processUpgradeCommand(comm); } /** * Start distributed upgrade if it should be initiated by the data-node. */ private void startDistributedUpgradeIfNeeded() throws IOException { UpgradeManagerDatanode um = getUpgradeManager(); if (!um.getUpgradeState()) return; um.setUpgradeState(false, um.getUpgradeVersion()); um.startUpgrade(); return; } public void start() { if ((spThread != null) && (spThread.isAlive())) { //Thread is started already return; } LOG.info("start service " + this.nameserviceId); spThread = new Thread(this, dnThreadName + " for namespace " + namespaceId); spThread.setDaemon(true); spThread.start(); } public void stop() { stopServices(); if (spThread != null) { spThread.interrupt(); } } private void initProxy1() throws IOException { synchronized (avatarAddr1) { if (namenode1 == null) { namenode1 = (DatanodeProtocol) RPC.getProxy(DatanodeProtocol.class, DatanodeProtocol.versionID, nameAddr1, getConf()); } if (avatarnode1 == null) { avatarnode1 = (AvatarProtocol) RPC.getProxy(AvatarProtocol.class, AvatarProtocol.versionID, avatarAddr1, getConf()); } } } private void initProxy2() throws IOException { InjectionHandler.processEventIO(InjectionEvent.OFFERSERVICE_BEFORE_INIT_PROXY2, nameAddr2, avatarAddr2); synchronized (avatarAddr2) { if (namenode2 == null) { namenode2 = (DatanodeProtocol) RPC.getProxy(DatanodeProtocol.class, DatanodeProtocol.versionID, nameAddr2, getConf()); } if (avatarnode2 == null) { avatarnode2 = (AvatarProtocol) RPC.getProxy(AvatarProtocol.class, AvatarProtocol.versionID, avatarAddr2, getConf()); } } } private void restartServiceZeroWith(InetSocketAddress namenodeZero, InetSocketAddress avatarZero) throws IOException { synchronized (avatarAddr1) { nameAddr1 = namenodeZero; avatarAddr1 = avatarZero; } restartService1(); } private void restartServiceOneWith(InetSocketAddress namenodeOne, InetSocketAddress avatarOne) throws IOException { synchronized (avatarAddr2) { nameAddr2 = namenodeOne; avatarAddr2 = avatarOne; } restartService2(); } public void restartService1() throws IOException { // Rely on handshake to restart the service. synchronized (avatarAddr1) { stopService1(); joinService1(); doneRegister1 = false; } } void stopService1() { RPC.stopProxy(avatarnode1); RPC.stopProxy(namenode1); avatarnode1 = null; namenode1 = null; if (offerService1 != null) { offerService1.stop(); } if (of1 != null) { of1.interrupt(); } } void stopService2() { RPC.stopProxy(avatarnode2); RPC.stopProxy(namenode2); avatarnode2 = null; namenode2 = null; if (offerService2 != null) { offerService2.stop(); } if (of2 != null) { of2.interrupt(); } } private void joinService1() { if (of1 != null) { try { of1.join(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } } private void joinService2() { if (of2 != null) { try { of2.join(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } } public void restartService2() throws IOException { // Rely on handshake to restart the service. synchronized (avatarAddr2) { stopService2(); joinService2(); doneRegister2 = false; } } /** stop two offer services */ private void stopServices() { this.shouldServiceRun = false; LOG.info("stop services " + this.nameserviceId); stopService1(); stopService2(); if (zkClient != null) { try { zkClient.shutdown(); } catch (InterruptedException ie) { LOG.warn("Zk shutdown is interrupted: ", ie); } } } public void join() { joinServices(); if (spThread != null) { try { spThread.join(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } spThread = null; } } /** Join two offer services */ private void joinServices() { joinService1(); joinService2(); } public void cleanUp() { if (upgradeManager != null) upgradeManager.shutdownUpgrade(); namespaceManager.remove(this.getNNSocketAddress()); shouldServiceRun = false; try { RPC.stopProxy(namenode1); } catch (Exception e) { LOG.warn("Exception stop the namenode RPC threads", e); } try { RPC.stopProxy(namenode2); } catch (Exception e) { LOG.warn("Exception stop the namenode RPC threads", e); } if (blockScanner != null) { blockScanner.removeNamespace(this.getNamespaceId()); } if (data != null) { data.removeNamespace(this.getNamespaceId()); } if (storage != null) { storage.removeNamespaceStorage(this.getNamespaceId()); } } public void shutdown() { stop(); join(); } // connect to both name node if possible. // If doWait is true, then return only when at least one handshake is // successful. // private NamespaceInfo handshake(boolean startup) throws IOException { NamespaceInfo nsInfo = null; boolean firstIsPrimary = false; // When true indicates ZK is null and there is no primary. This is to // enable datanode startups during failover. The assumption is that the // layout version of the Standby and Primary would be consistent when // we failover and hence we can speak to any one of the nodes to find out // the NamespaceInfo. boolean noPrimary = false; do { if (startup) { // The startup option is used when the datanode is first created // We only need to connect to the primary at this point and as soon // as possible. So figure out who the primary is from the ZK try { getPrimaryAddr(); noPrimary = (this.primaryAddr == null); firstIsPrimary = nameAddr1.equals(primaryAddr); } catch (Exception ex) { LOG.error("Could not get the primary address from ZooKeeper", ex); } } try { if ((firstIsPrimary && startup) || !startup || noPrimary) { // only try to connect to the first NN if it is not the // startup connection or if it is primary on startup // This way if it is standby we are not wasting datanode startup // time initProxy1(); if (startup) { nsInfo = handshake(namenode1, nameAddr1); } } } catch (ConnectException se) { // namenode has not been started LOG.info("Server at " + nameAddr1 + " not available yet, Zzzzz..."); } catch (NoRouteToHostException nrhe) { LOG.info("NoRouteToHostException connecting to server. " + nameAddr1, nrhe); } catch (PortUnreachableException pue) { LOG.info("PortUnreachableException connecting to server. " + nameAddr1, pue); } catch (UnknownHostException uhe) { LOG.info("UnknownHostException connecting to server. " + nameAddr1, uhe); } catch (SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server timeout. " + nameAddr1); } catch (IOException ioe) { LOG.info("Problem connecting to server. " + nameAddr1, ioe); } try { if ((!firstIsPrimary && startup) || !startup || noPrimary) { initProxy2(); if (startup) { NamespaceInfo tempInfo = handshake(namenode2, nameAddr2); // During failover both layouts should match. if (noPrimary && nsInfo != null && tempInfo.getLayoutVersion() != nsInfo.getLayoutVersion()) { throw new IOException("Layout versions don't match on zero, one: " + nsInfo.getLayoutVersion() + ", " + tempInfo.getLayoutVersion()); } nsInfo = tempInfo; } } } catch (ConnectException se) { // namenode has not been started LOG.info("Server at " + nameAddr2 + " not available yet, Zzzzz..."); } catch (NoRouteToHostException nrhe) { LOG.info("NoRouteToHostException connecting to server. " + nameAddr2, nrhe); } catch (PortUnreachableException pue) { LOG.info("PortUnreachableException connecting to server. " + nameAddr2, pue); } catch (UnknownHostException uhe) { LOG.info("UnknownHostException connecting to server. " + nameAddr2, uhe); } catch (SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server timeout. " + nameAddr2); } catch (RemoteException re) { handleRegistrationError(re, nameAddr2); } catch (IOException ioe) { LOG.info("Problem connecting to server. " + nameAddr2, ioe); } } while (startup && nsInfo == null && shouldServiceRun); return nsInfo; } private NamespaceInfo handshake(DatanodeProtocol node, InetSocketAddress machine) throws IOException { NamespaceInfo nsInfo = null; while (shouldServiceRun) { try { nsInfo = node.versionRequest(); break; } catch (SocketTimeoutException e) { // namenode is busy LOG.info("Problem connecting to server: " + machine); try { Thread.sleep(1000); } catch (InterruptedException ie) { } } } if (nsInfo == null) { // because shouldServiceRun becomes false return null; } LOG.info("Handshake with namenode server: " + machine); String errorMsg = null; // do not fail on incompatible build version if (!nsInfo.getBuildVersion().equals(Storage.getBuildVersion())) { errorMsg = "Incompatible build versions: namenode BV = " + nsInfo.getBuildVersion() + "; datanode BV = " + Storage.getBuildVersion(); LOG.warn(errorMsg); } if (FSConstants.LAYOUT_VERSION < nsInfo.getLayoutVersion()) { // datanode has a newer layout version - allowed LOG.warn("Datanode has newer layout versions than namenode: namenode LV = " + nsInfo.getLayoutVersion() + "; datanode BV = " + FSConstants.LAYOUT_VERSION + " Will continue assuming data node version."); nsInfo.layoutVersion = FSConstants.LAYOUT_VERSION; // to indicate that the upgrade should not be finalized // until we register to namenode with matching LV setRegistrationMatch(machine, false); } else if (FSConstants.LAYOUT_VERSION > nsInfo.getLayoutVersion()) { // namenode has newer layout version - disallowed errorMsg = "Datanode has older layout versions than namenode: namenode LV = " + nsInfo.getLayoutVersion() + "; datanode BV = " + FSConstants.LAYOUT_VERSION + " Datanode will shut down. namenode server: " + machine; LOG.fatal(errorMsg); try { node.errorReport(nsRegistration, DatanodeProtocol.NOTIFY, errorMsg); } catch (SocketTimeoutException e) { // namenode is busy LOG.info("Problem connecting to server: " + machine); } shutdownDN(); throw new IOException(errorMsg); } else { // versions are matching // so we can process finalize upgrade commands // offer service will discard standby DNA_FINALIZE on its own setRegistrationMatch(machine, true); } return nsInfo; } /** * Returns true if we are able to successfully register with namenode */ boolean register(DatanodeProtocol node, InetSocketAddress machine, boolean dnaRegister) throws IOException { if (nsRegistration.getStorageID().equals("")) { setNewStorageID(nsRegistration); } DatanodeRegistration tmp = new DatanodeRegistration(nsRegistration.getName()); tmp.setInfoPort(nsRegistration.getInfoPort()); tmp.setIpcPort(nsRegistration.getIpcPort()); boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false); if (simulatedFSDataset) { tmp.setStorageID(storage.getStorageID()); //same as DN tmp.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION; tmp.storageInfo.namespaceID = nsInfo.namespaceID; } else { tmp.setStorageInfo(storage.getNStorage(namespaceId), storage.getStorageID()); } // reset name to machineName. Mainly for web interface. tmp.name = machineName + ":" + nsRegistration.getPort(); try { tmp = node.register(tmp, DataTransferProtocol.DATA_TRANSFER_VERSION); // if we successded registering for the first time, then we update // the global registration objct if (!doneRegister1 && !doneRegister2) { nsRegistration = tmp; } } catch (SocketTimeoutException e) { // namenode is busy LOG.info("Problem connecting to server: " + machine); return false; } assert ("".equals(storage.getStorageID()) && !"".equals(nsRegistration.getStorageID())) || storage.getStorageID().equals(nsRegistration .getStorageID()) : "New storageID can be assigned only if data-node is not formatted"; if (storage.getStorageID().equals("")) { storage.setStorageID(nsRegistration.getStorageID()); storage.writeAll(); LOG.info("New storage id " + nsRegistration.getStorageID() + " is assigned to data-node " + nsRegistration.getName()); } if (!storage.getStorageID().equals(nsRegistration.getStorageID())) { throw new IOException("Inconsistent storage IDs. Name-node returned " + nsRegistration.getStorageID() + ". Expecting " + storage.getStorageID()); } // offerservice got DNA_REGISTER, so we might be talking to upgraded namenode // do the handshake again if (!getRegistrationMatch(machine) && dnaRegister) { handshake(node, machine); } sendBlocksBeingWrittenReport(node, namespaceId, nsRegistration); return true; } boolean isPrimaryOfferService(OfferService service) { return primaryOfferService == service; } /** * Return true if the last registration for the given offer service * had matching layout version, false otherwise. */ boolean shouldProcessFinalizeCommand(OfferService service) throws IOException { if (service == offerService1) { return currentRegistrationLayoutMatch1; } if (service == offerService2) { return currentRegistrationLayoutMatch2; } else { throw new IOException("Offer service not known!"); } } /** * Sets current registration matching layout for the given * namenode address. */ void setRegistrationMatch(InetSocketAddress nameNodeAddr, boolean value) throws IOException { if (nameAddr1.equals(nameNodeAddr)) { currentRegistrationLayoutMatch1 = value; } else if (nameAddr2.equals(nameNodeAddr)) { currentRegistrationLayoutMatch2 = value; } else { throw new IOException("Machine : " + nameNodeAddr + " is not configured as namenode"); } } boolean getRegistrationMatch(InetSocketAddress nameNodeAddr) { if (nameAddr1.equals(nameNodeAddr)) { return currentRegistrationLayoutMatch1; } else { return currentRegistrationLayoutMatch2; } } void setPrimaryOfferService(OfferService service) { this.primaryOfferService = service; if (service != null) LOG.info("Primary namenode is set to be " + service.avatarnodeAddress); else { LOG.info("Failover has happened. Stop accessing commands from " + "either namenode until the new primary is completely in" + "sync with all the datanodes"); } } boolean isPrimary(InetSocketAddress namenodeAddress) throws InterruptedException { getPrimaryAddr(); if (this.primaryAddr == null) { return false; } return this.primaryAddr.equals(namenodeAddress); } private void getPrimaryAddr() throws InterruptedException { try { Stat stat = new Stat(); this.primaryAddr = NetUtils .createSocketAddr(this.zkClient.getPrimaryAvatarAddress(this.defaultAddr, stat, false)); } catch (InterruptedException ie) { throw ie; } catch (Exception ex) { LOG.error("Could not get the primary from ZooKeeper", ex); this.primaryAddr = null; } } void handleRegistrationError(RemoteException re, InetSocketAddress failedNode) { // If either the primary or standby NN throws these exceptions, this // datanode will exit. I think this is the right behaviour because // the excludes list on both namenode better be the same. String reClass = re.getClassName(); if (failedNode.equals(primaryAddr) && (UnregisteredDatanodeException.class.getName().equals(reClass) || DisallowedDatanodeException.class.getName().equals(reClass) || IncorrectVersionException.class.getName().equals(reClass))) { LOG.warn("Shut down this service: ", re); this.shouldServiceRun = false; } else { LOG.warn(re); } } private void register1() throws IOException { synchronized (avatarAddr1) { InjectionHandler.processEventIO(InjectionEvent.AVATARDATANODE_BEFORE_START_OFFERSERVICE1); if (avatarnode1 != null && namenode1 != null && !doneRegister1 && register(namenode1, nameAddr1, false)) { InjectionHandler.processEvent(InjectionEvent.AVATARDATANODE_START_OFFERSERVICE1); doneRegister1 = true; offerService1 = new OfferService(AvatarDataNode.this, this, namenode1, nameAddr1, avatarnode1, avatarAddr1); of1 = new Thread(offerService1, "OfferService1 " + nameAddr1); of1.start(); } } } private void register2() throws IOException { synchronized (avatarAddr2) { InjectionHandler.processEventIO(InjectionEvent.AVATARDATANODE_BEFORE_START_OFFERSERVICE2); if (avatarnode2 != null && namenode2 != null && !doneRegister2 && register(namenode2, nameAddr2, false)) { InjectionHandler.processEvent(InjectionEvent.AVATARDATANODE_START_OFFERSERVICE2); doneRegister2 = true; offerService2 = new OfferService(AvatarDataNode.this, this, namenode2, nameAddr2, avatarnode2, avatarAddr2); of2 = new Thread(offerService2, "OfferService2 " + nameAddr2); of2.start(); } } } @Override public void run() { LOG.info(nsRegistration + "In AvatarDataNode.run, data = " + data); try { // set up namespace try { setupNS(); } catch (IOException ioe) { // Initial handshake, storage recovery or registration failed LOG.fatal(nsRegistration + " initialization failed for namespaceId " + namespaceId + " default addr: " + defaultAddr, ioe); return; } while (shouldServiceRun && shouldRun) { InetSocketAddress failedNode = null; try { // try handshaking with any namenode that we have not yet tried handshake(false); try { failedNode = nameAddr1; register1(); failedNode = nameAddr2; } finally { register2(); } this.initialized = true; startDistributedUpgradeIfNeeded(); } catch (RemoteException re) { handleRegistrationError(re, failedNode); } catch (IOException ioe) { //other io exception LOG.warn("IOException: ", ioe); } if (shouldServiceRun && shouldRun) { try { Thread.sleep(5000); } catch (InterruptedException ie) { } } } } catch (Exception e) { LOG.warn("Exception: ", e); } finally { LOG.info(nsRegistration + ":Finishing AvatarDataNode in: " + data); stopServices(); joinServices(); cleanUp(); } } /** * Notify both namenode(s) that we have received a block */ @Override public void notifyNamenodeReceivedBlock(Block block, String delHint) { if (offerService1 != null) { offerService1.notifyNamenodeReceivedBlock(block, delHint); } if (offerService2 != null) { offerService2.notifyNamenodeReceivedBlock(block, delHint); } } /** * Notify both namenode(s) that we have deleted a block */ @Override public void notifyNamenodeDeletedBlock(Block block) { if (offerService1 != null) { offerService1.notifyNamenodeDeletedBlock(block); } if (offerService2 != null) { offerService2.notifyNamenodeDeletedBlock(block); } } /** * Update received and retry list, when blocks are deleted */ void removeReceivedBlocks(Block[] list) { if (offerService1 != null) { offerService1.removeReceivedBlocks(list); } if (offerService2 != null) { offerService2.removeReceivedBlocks(list); } } @Override public DatanodeRegistration getNsRegistration() { return nsRegistration; } @Override public DatanodeProtocol getDatanodeProtocol() { return this.primaryOfferService.namenode; } @Override public InetSocketAddress getNNSocketAddress() { return this.nameAddr1; } @Override public int getNamespaceId() { return this.namespaceId; } @Override public String getNameserviceId() { return this.nameserviceId; } @Override public boolean initialized() { return initialized; } @Override public boolean isAlive() { return shouldServiceRun && spThread.isAlive(); } @Override public void reportBadBlocks(LocatedBlock[] blocks) throws IOException { if (this.offerService1 != null) this.offerService1.reportBadBlocks(blocks); if (this.offerService2 != null) this.offerService2.reportBadBlocks(blocks); } @Override public LocatedBlock syncBlock(Block block, List<BlockRecord> syncList, boolean closeFile, List<InterDatanodeProtocol> datanodeProxies, long deadline) throws IOException { if (offerService1 != null && isPrimaryOfferService(offerService1)) return offerService1.syncBlock(block, syncList, closeFile, datanodeProxies, deadline); if (offerService2 != null && isPrimaryOfferService(offerService2)) return offerService2.syncBlock(block, syncList, closeFile, datanodeProxies, deadline); return null; } @Override public void scheduleBlockReport(long delay) { if (this.offerService1 != null) this.offerService1.scheduleBlockReport(delay); if (this.offerService2 != null) this.offerService2.scheduleBlockReport(delay); } // Only use for testing public void scheduleBlockReceivedAndDeleted(long delay) { if (this.offerService1 != null) this.offerService1.scheduleBlockReceivedAndDeleted(delay); if (this.offerService2 != null) this.offerService2.scheduleBlockReceivedAndDeleted(delay); } } /** * Tells the datanode to start the shutdown process. */ public synchronized void shutdownDN() { shouldRun = false; if (namespaceManager != null) { namespaceManager.stopAll(); } } public boolean shouldRun() { return shouldRun; } DataStorage getStorage() { return storage; } private static void printUsage() { System.err.println("Usage: java DataNode"); System.err.println(" [-rollback]"); } /** * Parse and verify command line arguments and set configuration parameters. * * @return false if passed argements are incorrect */ private static boolean parseArguments(String args[], Configuration conf) { int argsLen = (args == null) ? 0 : args.length; StartupOption startOpt = StartupOption.REGULAR; for (int i = 0; i < argsLen; i++) { String cmd = args[i]; if ("-r".equalsIgnoreCase(cmd) || "--rack".equalsIgnoreCase(cmd)) { LOG.error("-r, --rack arguments are not supported anymore. RackID " + "resolution is handled by the NameNode."); System.exit(-1); } else if ("-rollback".equalsIgnoreCase(cmd)) { startOpt = StartupOption.ROLLBACK; } else if ("-regular".equalsIgnoreCase(cmd)) { startOpt = StartupOption.REGULAR; } else return false; } setStartupOption(conf, startOpt); return true; } private static void setStartupOption(Configuration conf, StartupOption opt) { conf.set("dfs.datanode.startup", opt.toString()); } /** * Returns the IP address of the namenode */ static InetSocketAddress getNameNodeAddress(Configuration conf, String cname, String rpcKey, String cname2) { String fs = conf.get(cname); String fs1 = conf.get(rpcKey); String fs2 = conf.get(cname2); Configuration newconf = new Configuration(conf); newconf.set("fs.default.name", fs); if (fs1 != null) { newconf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, fs1); } if (fs2 != null) { newconf.set("dfs.namenode.dn-address", fs2); } return DataNode.getNameNodeAddress(newconf); } /** * Returns the IP:port address of the avatar node */ private static List<InetSocketAddress> getAvatarNodeAddresses(String suffix, Configuration conf, Collection<String> serviceIds) throws IOException { List<InetSocketAddress> namenodeAddresses = DFSUtil.getRPCAddresses(suffix, conf, serviceIds, FSConstants.DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> avatarnodeAddresses = new ArrayList<InetSocketAddress>(namenodeAddresses.size()); for (InetSocketAddress namenodeAddress : namenodeAddresses) { avatarnodeAddresses.add(new InetSocketAddress(namenodeAddress.getAddress(), conf.getInt("dfs.avatarnode.port", namenodeAddress.getPort() + 1))); } return avatarnodeAddresses; } public static AvatarDataNode makeInstance(String[] dataDirs, Configuration conf) throws IOException { ArrayList<File> dirs = new ArrayList<File>(); for (int i = 0; i < dataDirs.length; i++) { File data = new File(dataDirs[i]); try { DiskChecker.checkDir(data); dirs.add(data); } catch (DiskErrorException e) { LOG.warn("Invalid directory in dfs.data.dir: " + e.getMessage()); } } if (dirs.size() > 0) { String dnThreadName = "AvatarDataNode: [" + StringUtils.arrayToString(dataDirs) + "]"; return new AvatarDataNode(conf, dirs, dnThreadName); } LOG.error("All directories in dfs.data.dir are invalid."); return null; } /** Instantiate a single datanode object. This must be run by invoking * {@link DataNode#runDatanodeDaemon(DataNode)} subsequently. */ public static AvatarDataNode instantiateDataNode(String args[], Configuration conf) throws IOException { if (conf == null) conf = new Configuration(); if (!parseArguments(args, conf)) { printUsage(); return null; } if (conf.get("dfs.network.script") != null) { LOG.error("This configuration for rack identification is not supported" + " anymore. RackID resolution is handled by the NameNode."); System.exit(-1); } String[] dataDirs = getListOfDataDirs(conf); return makeInstance(dataDirs, conf); } public static AvatarDataNode createDataNode(String args[], Configuration conf) throws IOException { AvatarDataNode dn = instantiateDataNode(args, conf); dn.runDatanodeDaemon(); return dn; } @Override public void refreshNamenodes(Configuration conf) throws IOException { LOG.info("refresh namenodes"); try { Collection<String> serviceIds = DFSUtil.getNameServiceIds(conf); List<InetSocketAddress> nameAddrs0 = DFSUtil.getRPCAddresses("0", conf, serviceIds, DATANODE_PROTOCOL_ADDRESS, DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> nameAddrs1 = DFSUtil.getRPCAddresses("1", conf, serviceIds, DATANODE_PROTOCOL_ADDRESS, DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> avatarAddrs0 = getAvatarNodeAddresses("0", conf, serviceIds); List<InetSocketAddress> avatarAddrs1 = getAvatarNodeAddresses("1", conf, serviceIds); List<String> defaultAddresses = getZnodePaths(serviceIds, conf); ((AvatarNamespaceManager) namespaceManager).refreshNamenodes(nameAddrs0, nameAddrs1, avatarAddrs0, avatarAddrs1, defaultAddresses, serviceIds); } catch (InterruptedException e) { throw new IOException(e.getCause()); } } private static List<String> getZnodePaths(Collection<String> serviceIds, Configuration conf) { List<String> datanodeProtocolZnodePaths = new ArrayList<String>(Math.max(serviceIds.size(), 1)); if (serviceIds.isEmpty()) { datanodeProtocolZnodePaths.add(conf.get(NameNode.DATANODE_PROTOCOL_ADDRESS)); } else { for (String service : serviceIds) { datanodeProtocolZnodePaths.add(conf.get(NameNode.DATANODE_PROTOCOL_ADDRESS + "." + service)); } } return datanodeProtocolZnodePaths; } @Override public void refreshOfferService(String serviceNameToRefresh) throws IOException { Configuration currentConf = new Configuration(); Collection<String> serviceIds = DFSUtil.getNameServiceIds(currentConf); String[] allServices = serviceIds.toArray(new String[] {}); for (int i = 0; i < allServices.length; i++) { if (((String) allServices[i]).equalsIgnoreCase(serviceNameToRefresh)) { List<InetSocketAddress> nameAddrs0 = DFSUtil.getRPCAddresses("0", currentConf, serviceIds, DATANODE_PROTOCOL_ADDRESS, DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> nameAddrs1 = DFSUtil.getRPCAddresses("1", currentConf, serviceIds, DATANODE_PROTOCOL_ADDRESS, DFS_NAMENODE_RPC_ADDRESS_KEY); List<InetSocketAddress> avatarAddrs0 = getAvatarNodeAddresses("0", currentConf, serviceIds); List<InetSocketAddress> avatarAddrs1 = getAvatarNodeAddresses("1", currentConf, serviceIds); ((AvatarNamespaceManager) namespaceManager).refreshOfferService(nameAddrs0.get(i), nameAddrs1.get(i), avatarAddrs0.get(i), avatarAddrs1.get(i), serviceNameToRefresh); return; } } throw new IOException("Service name (=" + serviceNameToRefresh + ") not found. "); } public static void main(String argv[]) { org.apache.hadoop.hdfs.DnsMonitorSecurityManager.setTheManager(); try { StringUtils.startupShutdownMessage(AvatarDataNode.class, argv, LOG); FastWritableHDFS.init(); FastProtocolHDFS.init(); AvatarDataNode avatarnode = createDataNode(argv, null); if (avatarnode != null) { avatarnode.waitAndShutdown(); } } catch (Throwable e) { LOG.error(StringUtils.stringifyException(e)); System.exit(-1); } } }