org.apache.jxtadoop.hdfs.server.datanode.DataNode.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.jxtadoop.hdfs.server.datanode.DataNode.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jxtadoop.hdfs.server.datanode;

import java.awt.AWTException;
import java.awt.Image;
import java.awt.MenuItem;
import java.awt.PopupMenu;
import java.awt.SystemTray;
import java.awt.Toolkit;
import java.awt.TrayIcon;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketTimeoutException;
import java.net.UnknownHostException;
import java.nio.channels.SocketChannel;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;

import net.jxta.peergroup.PeerGroup;
import net.jxta.socket.JxtaServerSocket;
import net.jxta.socket.JxtaSocket;
import net.jxta.socket.JxtaSocketAddress;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.jxtadoop.conf.Configuration;
import org.apache.jxtadoop.conf.Configured;
import org.apache.jxtadoop.hdfs.HDFSPolicyProvider;
import org.apache.jxtadoop.hdfs.desktop.DesktopTray;
import org.apache.jxtadoop.hdfs.p2p.DatanodePeer;
import org.apache.jxtadoop.hdfs.p2p.P2PConstants;
import org.apache.jxtadoop.hdfs.protocol.Block;
import org.apache.jxtadoop.hdfs.protocol.BlockListAsLongs;
import org.apache.jxtadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.jxtadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.jxtadoop.hdfs.protocol.DatanodeID;
import org.apache.jxtadoop.hdfs.protocol.DatanodeInfo;
import org.apache.jxtadoop.hdfs.protocol.FSConstants;
import org.apache.jxtadoop.hdfs.protocol.LocatedBlock;
import org.apache.jxtadoop.hdfs.protocol.UnregisteredDatanodeException;
import org.apache.jxtadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.jxtadoop.hdfs.server.common.HdfsConstants;
import org.apache.jxtadoop.hdfs.server.common.GenerationStamp;
import org.apache.jxtadoop.hdfs.server.common.IncorrectVersionException;
import org.apache.jxtadoop.hdfs.server.common.Storage;
import org.apache.jxtadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
import org.apache.jxtadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.jxtadoop.hdfs.server.namenode.NameNode;
import org.apache.jxtadoop.hdfs.server.protocol.BlockCommand;
import org.apache.jxtadoop.hdfs.server.protocol.BlockMetaDataInfo;
import org.apache.jxtadoop.hdfs.server.protocol.DatanodeCommand;
import org.apache.jxtadoop.hdfs.server.protocol.DatanodeProtocol;
import org.apache.jxtadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.jxtadoop.hdfs.server.protocol.DisallowedDatanodeException;
import org.apache.jxtadoop.hdfs.server.protocol.InterDatanodeProtocol;
import org.apache.jxtadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.jxtadoop.hdfs.server.protocol.UpgradeCommand;
import org.apache.jxtadoop.io.IOUtils;
import org.apache.jxtadoop.io.Text;
import org.apache.jxtadoop.ipc.RPC;
import org.apache.jxtadoop.ipc.RemoteException;
import org.apache.jxtadoop.ipc.Server;
import org.apache.jxtadoop.net.DNS;
import org.apache.jxtadoop.net.NetUtils;
import org.apache.jxtadoop.security.SecurityUtil;
import org.apache.jxtadoop.security.authorize.ConfiguredPolicy;
import org.apache.jxtadoop.security.authorize.PolicyProvider;
import org.apache.jxtadoop.security.authorize.ServiceAuthorizationManager;
import org.apache.jxtadoop.util.Daemon;
import org.apache.jxtadoop.util.DiskChecker;
import org.apache.jxtadoop.util.ReflectionUtils;
import org.apache.jxtadoop.util.StringUtils;
import org.apache.jxtadoop.util.DiskChecker.DiskErrorException;
import org.apache.jxtadoop.util.DiskChecker.DiskOutOfSpaceException;

/**********************************************************
 * DataNode is a class (and program) that stores a set of
 * blocks for a DFS deployment.  A single deployment can
 * have one or many DataNodes.  Each DataNode communicates
 * regularly with a single NameNode.  It also communicates
 * with client code and other DataNodes from time to time.
 *
 * DataNodes store a series of named blocks.  The DataNode
 * allows client code to read these blocks, or to write new
 * block data.  The DataNode may also, in response to instructions
 * from its NameNode, delete blocks or copy blocks to/from other
 * DataNodes.
 *
 * The DataNode maintains just one critical table:
 *   block-> stream of bytes (of BLOCK_SIZE or less)
 *
 * This info is stored on a local disk.  The DataNode
 * reports the table's contents to the NameNode upon startup
 * and every so often afterwards.
 *
 * DataNodes spend their lives in an endless loop of asking
 * the NameNode for something to do.  A NameNode cannot connect
 * to a DataNode directly; a NameNode simply returns values from
 * functions invoked by a DataNode.
 *
 * DataNodes maintain an open server socket so that client code 
 * or other DataNodes can read/write data.  The host/port for
 * this server is reported to the NameNode, which then sends that
 * information to clients or other DataNodes that might be interested.
 *
 **********************************************************/
@SuppressWarnings({ "unused" })
public class DataNode extends Configured
        implements InterDatanodeProtocol, ClientDatanodeProtocol, FSConstants, Runnable {
    public static final Log LOG = LogFactory.getLog(DataNode.class);

    static {
        Configuration.addDefaultResource("hdfs-default.xml");
        Configuration.addDefaultResource("hdfs-site.xml");
        Configuration.addDefaultResource("hdfs-p2p.xml");
    }

    public static final String DN_CLIENTTRACE_FORMAT = "src: %s" + // src IP
            ", dest: %s" + // dst IP
            ", bytes: %s" + // byte count
            ", op: %s" + // operation
            ", cliID: %s" + // DFSClient id
            ", srvID: %s" + // DatanodeRegistration
            ", blockid: %s"; // block id
    static final Log ClientTraceLog = LogFactory.getLog(DataNode.class.getName() + ".clienttrace");

    public DatanodeProtocol namenode = null;
    public FSDatasetInterface data = null;
    public DatanodeRegistration dnRegistration = null;
    /**
     * The datanode peer as per Jxta
     */
    private DatanodePeer dnpeer = null;

    volatile boolean shouldRun = true;
    private LinkedList<Block> receivedBlockList = new LinkedList<Block>();
    /** list of blocks being recovered */
    private final Map<Block, Block> ongoingRecovery = new HashMap<Block, Block>();
    private LinkedList<String> delHints = new LinkedList<String>();
    public final static String EMPTY_DEL_HINT = "";
    AtomicInteger xmitsInProgress = new AtomicInteger();
    Daemon dataXceiverServer = null;
    Daemon dataXceiver = null;
    ThreadGroup threadGroup = null;
    long blockReportInterval;
    //disallow the sending of BR before instructed to do so
    long lastBlockReport = 0;
    boolean resetBlockReportTime = true;
    long initialBlockReportDelay = BLOCKREPORT_INITIAL_DELAY * 1000L;
    long lastHeartbeat = 0;
    long heartBeatInterval;
    private DataStorage storage = null;
    DataNodeMetrics myMetrics;
    private InetSocketAddress selfAddr;
    private static DataNode datanodeObject = null;
    private Thread dataNodeThread = null;
    public String machineName;
    private static String dnThreadName;
    int socketTimeout;
    int socketWriteTimeout = 0;
    boolean transferToAllowed = true;
    int writePacketSize = 0;
    private boolean isConnected = false;

    public DataBlockScanner blockScanner = null;
    public Daemon blockScannerThread = null;

    private static final Random R = new Random();

    // For InterDataNodeProtocol
    public Server ipcServer;

    /**
     * Current system time.
     * @return current time in msec.
     */
    static long now() {
        return System.currentTimeMillis();
    }

    /**
     * Create the DataNode given a configuration and an array of dataDirs.
     * 'dataDirs' is where the blocks are stored.
     */
    DataNode(Configuration conf, AbstractList<File> dataDirs) throws IOException {
        super(conf);
        datanodeObject = this;

        dnpeer = new DatanodePeer("DN - " + System.getProperty("jxtadoop.datanode.id"));

        try {
            startDataNode(conf, dataDirs);
        } catch (IOException ie) {
            shutdown();
            throw ie;
        }
    }

    /**
     * This method starts the data node with the specified conf.
     * 
     * @param conf - the configuration
     *  if conf's CONFIG_PROPERTY_SIMULATED property is set
     *  then a simulated storage based data node is created.
     * 
     * @param dataDirs - only for a non-simulated storage data node
     * @throws IOException
     */
    void startDataNode(Configuration conf, AbstractList<File> dataDirs) throws IOException {
        dnpeer.initialize();
        dnpeer.start();

        while (dnpeer.getRpcSocketAddress() == null) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }
        }

        LOG.debug("Datanode peer ID : " + dnpeer.getPeerID().toString());

        // use configured nameserver & interface to get local hostname
        /*if (conf.get("slave.host.name") != null) {
          machineName = conf.get("slave.host.name");   
        }
        if (machineName == null) {
          machineName = DNS.getDefaultHost(
                                 conf.get("dfs.datanode.dns.interface","default"),
                                 conf.get("dfs.datanode.dns.nameserver","default"));
        }*/
        machineName = dnpeer.getPeerIDwithoutURN();
        LOG.debug("Datanode registration name : " + machineName);

        // this.socketTimeout =  conf.getInt("dfs.socket.timeout",
        //                                   HdfsConstants.READ_TIMEOUT);
        //this.socketWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout",
        //                                      HdfsConstants.WRITE_TIMEOUT);
        /* Based on results on different platforms, we might need set the default 
         * to false on some of them. */
        this.transferToAllowed = conf.getBoolean("dfs.datanode.transferTo.allowed", true);
        this.writePacketSize = conf.getInt("dfs.write.packet.size", 64 * 1024);
        /*String address = 
          NetUtils.getServerAddress(conf,
                            "dfs.datanode.bindAddress", 
                            "dfs.datanode.port",
                            "dfs.datanode.address");
        InetSocketAddress socAddr = NetUtils.createSocketAddr(address);
        int tmpPort = socAddr.getPort();*/
        storage = new DataStorage();
        // construct registration
        // this.dnRegistration = new DatanodeRegistration(machineName + ":" + tmpPort);
        this.dnRegistration = new DatanodeRegistration(machineName);

        // connect to name node
        //this.namenode = (DatanodeProtocol) RPC.waitForProxy(DatanodeProtocol.class,
        //                           DatanodeProtocol.versionID, nameNodeAddr, conf);
        this.namenode = (DatanodeProtocol) RPC.waitForProxy(DatanodeProtocol.class, DatanodeProtocol.versionID,
                dnpeer.getPeerGroup(), dnpeer.getRpcSocketAddress(), conf);
        // get version and id info from the name-node
        NamespaceInfo nsInfo = handshake();
        StartupOption startOpt = getStartupOption(conf);
        assert startOpt != null : "Startup option must be set.";

        boolean simulatedFSDataset = conf.getBoolean("dfs.datanode.simulateddatastorage", false);
        if (simulatedFSDataset) {
            setNewStorageID(dnRegistration);
            dnRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION;
            dnRegistration.storageInfo.namespaceID = nsInfo.namespaceID;
            // it would have been better to pass storage as a parameter to
            // constructor below - need to augment ReflectionUtils used below.
            conf.set("StorageId", dnRegistration.getStorageID());
            try {
                //Equivalent of following (can't do because Simulated is in test dir)
                //  this.data = new SimulatedFSDataset(conf);
                this.data = (FSDatasetInterface) ReflectionUtils.newInstance(
                        Class.forName("org.apache.jxtadoop.hdfs.server.datanode.SimulatedFSDataset"), conf);
            } catch (ClassNotFoundException e) {
                throw new IOException(StringUtils.stringifyException(e));
            }
        } else { // real storage
            // read storage info, lock data dirs and transition fs state if necessary
            storage.recoverTransitionRead(nsInfo, dataDirs, startOpt);
            // adjust

            this.dnRegistration.setStorageInfo(storage);
            // initialize data node internal structure
            this.data = new FSDataset(storage, conf);
        }

        // find free port
        /*ServerSocket ss = (socketWriteTimeout > 0) ? 
              ServerSocketChannel.open().socket() : new ServerSocket();
        Server.bind(ss, socAddr, 0);
        ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE); 
        // adjust machine name with the actual port
        tmpPort = ss.getLocalPort();
        selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(),
                                 tmpPort);
        this.dnRegistration.setName(machineName + ":" + tmpPort);*/
        JxtaServerSocket ss = new JxtaServerSocket(dnpeer.getPeerGroup(), dnpeer.getInfoPipeAdvertisement(),
                Integer.parseInt(conf.get("hadoop.p2p.info.backlog")),
                Integer.parseInt(conf.get("hadoop.p2p.info.timeout")));

        LOG.info("Opened info server");

        this.threadGroup = new ThreadGroup("dataXceiverServer");
        this.dataXceiverServer = new Daemon(threadGroup, new DataXceiverServer(ss, conf, this));
        this.threadGroup.setDaemon(true); // auto destroy when empty

        this.blockReportInterval = conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
        this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay", BLOCKREPORT_INITIAL_DELAY)
                * 1000L;
        if (this.initialBlockReportDelay >= blockReportInterval) {
            this.initialBlockReportDelay = 0;
            LOG.info("dfs.blockreport.initialDelay is greater than " + "dfs.blockreport.intervalMsec."
                    + " Setting initial delay to 0 msec:");
        }
        this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L;

        //initialize periodic block scanner
        String reason = null;
        if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) {
            reason = "verification is turned off by configuration";
        } else if (!(data instanceof FSDataset)) {
            reason = "verifcation is supported only with FSDataset";
        }
        if (reason == null) {
            blockScanner = new DataBlockScanner(this, (FSDataset) data, conf);
        } else {
            LOG.info("Periodic Block Verification is disabled because " + reason + ".");
        }

        myMetrics = new DataNodeMetrics(conf, dnRegistration.getStorageID());

        // set service-level authorization security policy
        if (conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) {
            PolicyProvider policyProvider = (PolicyProvider) (ReflectionUtils.newInstance(conf.getClass(
                    PolicyProvider.POLICY_PROVIDER_CONFIG, HDFSPolicyProvider.class, PolicyProvider.class), conf));
            SecurityUtil.setPolicy(new ConfiguredPolicy(conf, policyProvider));
        }

        //init ipc server
        /*InetSocketAddress ipcAddr = NetUtils.createSocketAddr(
            conf.get("dfs.datanode.ipc.address"));
        ipcServer = RPC.getServer(this, ipcAddr.getHostName(), ipcAddr.getPort(), 
            conf.getInt("dfs.datanode.handler.count", 3), false, conf);
        ipcServer.start();
        dnRegistration.setIpcPort(ipcServer.getListenerAddress().getPort());
        */
        ipcServer = RPC.getServer(this, dnpeer.getPeerGroup(), dnpeer.getServerSocketAddress(),
                conf.getInt("dfs.datanode.handler.count", 3), false, conf);
        dnRegistration.setIpcPort(P2PConstants.RPCPIPEID);

        LOG.info("dnRegistration = " + dnRegistration);

    }

    /**
     * Creates either NIO or regular depending on socketWriteTimeout.
     */
    protected Socket newSocket() throws IOException {
        return (socketWriteTimeout > 0) ? SocketChannel.open().socket() : new Socket();
    }

    private NamespaceInfo handshake() throws IOException {
        NamespaceInfo nsInfo = new NamespaceInfo();
        while (shouldRun) {
            try {
                nsInfo = namenode.versionRequest();
                break;
            } catch (SocketTimeoutException e) { // namenode is busy
                LOG.info("Problem connecting to server: " + getNameNodeAddr());
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException ie) {
                }
            }
        }
        String errorMsg = null;
        // verify build version
        if (!nsInfo.getBuildVersion().equals(Storage.getBuildVersion())) {
            errorMsg = "Incompatible build versions: namenode BV = " + nsInfo.getBuildVersion() + "; datanode BV = "
                    + Storage.getBuildVersion();
            LOG.fatal(errorMsg);
            try {
                namenode.errorReport(dnRegistration, DatanodeProtocol.NOTIFY, errorMsg);
            } catch (SocketTimeoutException e) { // namenode is busy
                LOG.info("Problem connecting to server: " + getNameNodeAddr());
            }
            throw new IOException(errorMsg);
        }
        assert FSConstants.LAYOUT_VERSION == nsInfo
                .getLayoutVersion() : "Data-node and name-node layout versions must be the same." + "Expected: "
                        + FSConstants.LAYOUT_VERSION + " actual " + nsInfo.getLayoutVersion();
        return nsInfo;
    }

    /** Return the DataNode object
     * 
     */
    public static DataNode getDataNode() {
        return datanodeObject;
    }

    public static InterDatanodeProtocol createInterDataNodeProtocolProxy(DatanodeID datanodeid, Configuration conf)
            throws IOException {

        //InetSocketAddress addr = NetUtils.createSocketAddr(
        //    datanodeid.getHost() + ":" + datanodeid.getIpcPort());
        PeerGroup dnpg = DataNode.getDataNode().getDnPeer().getPeerGroup();
        JxtaSocketAddress jsa = DataNode.getDataNode().getDnPeer().getServerSocketAddress();

        if (InterDatanodeProtocol.LOG.isDebugEnabled()) {
            InterDatanodeProtocol.LOG.info("InterDatanodeProtocol addr=" + datanodeid.getPeerId());
        }
        //return (InterDatanodeProtocol)RPC.getProxy(InterDatanodeProtocol.class,
        //    InterDatanodeProtocol.versionID, addr, conf);
        return (InterDatanodeProtocol) RPC.getServer(InterDatanodeProtocol.class, dnpg, jsa, conf);

    }

    public JxtaSocketAddress getNameNodeAddr() {
        try {
            return dnpeer.getRpcSocketAddress();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    public InetSocketAddress getSelfAddr() {
        return selfAddr;
    }

    DataNodeMetrics getMetrics() {
        return myMetrics;
    }

    DatanodePeer getDnPeer() {
        return dnpeer;
    }

    /**
     * Return the namenode's identifier
     */
    public String getNamenode() {
        //return namenode.toString();
        return "<namenode>";
    }

    public static void setNewStorageID(DatanodeRegistration dnReg) {
        /* Return 
         * "DS-randInt-ipaddr-currentTimeMillis"
         * It is considered extermely rare for all these numbers to match
         * on a different machine accidentally for the following 
         * a) SecureRandom(INT_MAX) is pretty much random (1 in 2 billion), and
         * b) Good chance ip address would be different, and
         * c) Even on the same machine, Datanode is designed to use different ports.
         * d) Good chance that these are started at different times.
         * For a confict to occur all the 4 above have to match!.
         * The format of this string can be changed anytime in future without
         * affecting its functionality.
         */
        String ip = "unknownIP";
        try {
            ip = DNS.getDefaultIP("default");
        } catch (UnknownHostException ignored) {
            LOG.warn("Could not find ip address of \"default\" inteface.");
        }

        int rand = 0;
        try {
            rand = SecureRandom.getInstance("SHA1PRNG").nextInt(Integer.MAX_VALUE);
        } catch (NoSuchAlgorithmException e) {
            LOG.warn("Could not use SecureRandom");
            rand = R.nextInt(Integer.MAX_VALUE);
        }
        dnReg.storageID = "DS-" + rand + "-" + ip + "-" + dnReg.getPeerId() + "-" + System.currentTimeMillis();
    }

    /**
     * Register datanode
     * <p>
     * The datanode needs to register with the namenode on startup in order
     * 1) to report which storage it is serving now and 
     * 2) to receive a registrationID 
     * issued by the namenode to recognize registered datanodes.
     * 
     * @see FSNamesystem#registerDatanode(DatanodeRegistration)
     * @throws IOException
     */
    private void register() throws IOException {
        if (dnRegistration.getStorageID().equals("")) {
            setNewStorageID(dnRegistration);
        }
        while (shouldRun) {
            try {
                // reset name to machineName. Mainly for web interface.
                //dnRegistration.name = machineName + ":" + dnRegistration.getPort();
                dnRegistration.id = machineName;
                dnRegistration = namenode.register(dnRegistration);
                break;
            } catch (SocketTimeoutException e) { // namenode is busy
                LOG.info("Problem connecting to server: " + getNameNodeAddr());
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException ie) {
                }
            }
        }
        assert ("".equals(storage.getStorageID()) && !"".equals(dnRegistration.getStorageID()))
                || storage.getStorageID().equals(dnRegistration
                        .getStorageID()) : "New storageID can be assigned only if data-node is not formatted";
        if (storage.getStorageID().equals("")) {
            storage.setStorageID(dnRegistration.getStorageID());
            storage.writeAll();
            LOG.info("New storage id " + dnRegistration.getStorageID() + " is assigned to data-node "
                    + dnRegistration.getPeerId());
        }
        if (!storage.getStorageID().equals(dnRegistration.getStorageID())) {
            throw new IOException("Inconsistent storage IDs. Name-node returned " + dnRegistration.getStorageID()
                    + ". Expecting " + storage.getStorageID());
        }

        // random short delay - helps scatter the BR from all DNs
        scheduleBlockReport(initialBlockReportDelay);
    }

    /**
     * Shut down this instance of the datanode.
     * Returns only after shutdown is complete.
     * This method can only be called by the offerService thread.
     * Otherwise, deadlock might occur.
     */
    public void shutdown() {
        if (ipcServer != null) {
            ipcServer.stop();
        }

        this.shouldRun = false;
        if (dataXceiverServer != null) {
            ((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
            this.dataXceiverServer.interrupt();

            // wait for all data receiver threads to exit
            if (this.threadGroup != null) {
                while (true) {
                    this.threadGroup.interrupt();
                    LOG.info(
                            "Waiting for threadgroup to exit, active threads is " + this.threadGroup.activeCount());
                    if (this.threadGroup.activeCount() == 0) {
                        break;
                    }
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                    }
                }
            }
            // wait for dataXceiveServer to terminate
            try {
                this.dataXceiverServer.join();
            } catch (InterruptedException ie) {
            }
        }

        RPC.stopProxy(namenode); // stop the RPC threads

        if (upgradeManager != null)
            upgradeManager.shutdownUpgrade();
        if (blockScannerThread != null) {
            blockScannerThread.interrupt();
            try {
                blockScannerThread.join(3600000L); // wait for at most 1 hour
            } catch (InterruptedException ie) {
            }
        }
        if (storage != null) {
            try {
                this.storage.unlockAll();
            } catch (IOException ie) {
            }
        }
        if (dataNodeThread != null) {
            dataNodeThread.interrupt();
            try {
                dataNodeThread.join();
            } catch (InterruptedException ie) {
            }
        }
        if (data != null) {
            data.shutdown();
        }
        if (myMetrics != null) {
            myMetrics.shutdown();
        }
    }

    /* Check if there is no space in disk or the disk is read-only
     *  when IOException occurs. 
     * If so, handle the error */
    protected void checkDiskError(IOException e) throws IOException {
        if (e.getMessage() != null && e.getMessage().startsWith("No space left on device")) {
            throw new DiskOutOfSpaceException("No space left on device");
        } else {
            checkDiskError();
        }
    }

    /* Check if there is no disk space and if so, handle the error*/
    protected void checkDiskError() throws IOException {
        try {
            data.checkDataDir();
        } catch (DiskErrorException de) {
            handleDiskError(de.getMessage());
        }
    }

    private void handleDiskError(String errMsgr) {
        LOG.warn("DataNode is shutting down.\n" + errMsgr);
        shouldRun = false;
        try {
            namenode.errorReport(dnRegistration, DatanodeProtocol.DISK_ERROR, errMsgr);
        } catch (IOException ignored) {
        }
    }

    /** Number of concurrent xceivers per node. */
    int getXceiverCount() {
        return threadGroup == null ? 0 : threadGroup.activeCount();
    }

    /**
     * Main loop for the DataNode.  Runs until shutdown,
     * forever calling remote NameNode functions.
     */
    public void offerService() throws Exception {

        LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" + " Initial delay: "
                + initialBlockReportDelay + "msec");

        //
        // Now loop for a long time....
        //
        isConnected = true;

        while (shouldRun) {
            try {
                long startTime = now();

                LOG.debug("Connected to NN : " + this.isConnectedToNN());

                //
                // Every so often, send heartbeat or block-report
                //

                if (startTime - lastHeartbeat > heartBeatInterval) {
                    //
                    // All heartbeat messages include following info:
                    // -- Datanode name
                    // -- data transfer port
                    // -- Total capacity
                    // -- Bytes remaining
                    //
                    lastHeartbeat = startTime;
                    DatanodeCommand[] cmds = namenode.sendHeartbeat(dnRegistration, data.getCapacity(),
                            data.getDfsUsed(), data.getRemaining(), xmitsInProgress.get(), getXceiverCount());
                    myMetrics.heartbeats.inc(now() - startTime);
                    if (!isConnected)
                        isConnected = true;
                    //LOG.info("Just sent heartbeat, with name " + localName);
                    if (!processCommand(cmds))
                        continue;
                }

                // check if there are newly received blocks
                Block[] blockArray = null;
                String[] delHintArray = null;
                synchronized (receivedBlockList) {
                    synchronized (delHints) {
                        int numBlocks = receivedBlockList.size();
                        if (numBlocks > 0) {
                            if (numBlocks != delHints.size()) {
                                LOG.warn("Panic: receiveBlockList and delHints are not of the same length");
                            }
                            //
                            // Send newly-received blockids to namenode
                            //
                            blockArray = receivedBlockList.toArray(new Block[numBlocks]);
                            delHintArray = delHints.toArray(new String[numBlocks]);
                        }
                    }
                }
                if (blockArray != null) {
                    if (delHintArray == null || delHintArray.length != blockArray.length) {
                        LOG.warn("Panic: block array & delHintArray are not the same");
                    }
                    namenode.blockReceived(dnRegistration, blockArray, delHintArray);
                    synchronized (receivedBlockList) {
                        synchronized (delHints) {
                            for (int i = 0; i < blockArray.length; i++) {
                                receivedBlockList.remove(blockArray[i]);
                                delHints.remove(delHintArray[i]);
                            }
                        }
                    }
                }

                // send block report
                if (startTime - lastBlockReport > blockReportInterval) {
                    //
                    // Send latest blockinfo report if timer has expired.
                    // Get back a list of local block(s) that are obsolete
                    // and can be safely GC'ed.
                    //
                    long brStartTime = now();
                    Block[] bReport = data.getBlockReport();
                    DatanodeCommand cmd = namenode.blockReport(dnRegistration,
                            BlockListAsLongs.convertToArrayLongs(bReport));
                    long brTime = now() - brStartTime;
                    myMetrics.blockReports.inc(brTime);
                    LOG.info("BlockReport of " + bReport.length + " blocks got processed in " + brTime + " msecs");
                    //
                    // If we have sent the first block report, then wait a random
                    // time before we start the periodic block reports.
                    //
                    if (resetBlockReportTime) {
                        lastBlockReport = startTime - R.nextInt((int) (blockReportInterval));
                        resetBlockReportTime = false;
                    } else {
                        /* say the last block report was at 8:20:14. The current report 
                         * should have started around 9:20:14 (default 1 hour interval). 
                         * If current time is :
                         *   1) normal like 9:20:18, next report should be at 10:20:14
                         *   2) unexpected like 11:35:43, next report should be at 12:20:14
                         */
                        lastBlockReport += (now() - lastBlockReport) / blockReportInterval * blockReportInterval;
                    }
                    processCommand(cmd);
                }

                // start block scanner
                if (blockScanner != null && blockScannerThread == null && upgradeManager.isUpgradeCompleted()) {
                    LOG.info("Starting Periodic block scanner.");
                    blockScannerThread = new Daemon(blockScanner);
                    blockScannerThread.start();
                }

                //
                // There is no work to do;  sleep until hearbeat timer elapses, 
                // or work arrives, and then iterate again.
                //
                long waitTime = heartBeatInterval - (System.currentTimeMillis() - lastHeartbeat);
                synchronized (receivedBlockList) {
                    if (waitTime > 0 && receivedBlockList.size() == 0) {
                        try {
                            receivedBlockList.wait(waitTime);
                        } catch (InterruptedException ie) {
                        }
                    }
                } // synchronized
            } catch (RemoteException re) {
                String reClass = re.getClassName();
                if (UnregisteredDatanodeException.class.getName().equals(reClass)
                        || DisallowedDatanodeException.class.getName().equals(reClass)
                        || IncorrectVersionException.class.getName().equals(reClass)) {
                    LOG.warn("DataNode is shutting down: " + StringUtils.stringifyException(re));
                    shutdown();
                    return;
                }
                isConnected = true;
                //LOG.warn(StringUtils.stringifyException(re));
                //LOG.warn(re.getMessage());
            } catch (IOException e) {
                isConnected = false;
                LOG.warn(e.getMessage());
            }
        } // while (shouldRun)
    } // offerService

    /**
     * Process an array of datanode commands
     * 
     * @param cmds an array of datanode commands
     * @return true if further processing may be required or false otherwise. 
     */
    private boolean processCommand(DatanodeCommand[] cmds) {
        if (cmds != null) {
            for (DatanodeCommand cmd : cmds) {
                try {
                    if (processCommand(cmd) == false) {
                        return false;
                    }
                } catch (IOException ioe) {
                    LOG.warn("Error processing datanode Command", ioe);
                }
            }
        }
        return true;
    }

    /**
     * 
     * @param cmd
     * @return true if further processing may be required or false otherwise. 
     * @throws IOException
     */
    private boolean processCommand(DatanodeCommand cmd) throws IOException {
        if (cmd == null)
            return true;
        final BlockCommand bcmd = cmd instanceof BlockCommand ? (BlockCommand) cmd : null;

        switch (cmd.getAction()) {
        case DatanodeProtocol.DNA_TRANSFER:
            // Send a copy of a block to another datanode
            transferBlocks(bcmd.getBlocks(), bcmd.getTargets());
            myMetrics.blocksReplicated.inc(bcmd.getBlocks().length);
            break;
        case DatanodeProtocol.DNA_INVALIDATE:
            //
            // Some local block(s) are obsolete and can be 
            // safely garbage-collected.
            //
            Block toDelete[] = bcmd.getBlocks();
            try {
                if (blockScanner != null) {
                    blockScanner.deleteBlocks(toDelete);
                }
                data.invalidate(toDelete);
            } catch (IOException e) {
                checkDiskError();
                throw e;
            }
            myMetrics.blocksRemoved.inc(toDelete.length);
            break;
        case DatanodeProtocol.DNA_SHUTDOWN:
            // shut down the data node
            this.shutdown();
            return false;
        case DatanodeProtocol.DNA_REGISTER:
            // namenode requested a registration - at start or if NN lost contact
            LOG.info("DatanodeCommand action: DNA_REGISTER");
            if (shouldRun) {
                register();
            }
            break;
        case DatanodeProtocol.DNA_FINALIZE:
            storage.finalizeUpgrade();
            break;
        case UpgradeCommand.UC_ACTION_START_UPGRADE:
            // start distributed upgrade here
            processDistributedUpgradeCommand((UpgradeCommand) cmd);
            break;
        case DatanodeProtocol.DNA_RECOVERBLOCK:
            recoverBlocks(bcmd.getBlocks(), bcmd.getTargets());
            break;
        default:
            LOG.warn("Unknown DatanodeCommand action: " + cmd.getAction());
        }
        return true;
    }

    // Distributed upgrade manager
    UpgradeManagerDatanode upgradeManager = new UpgradeManagerDatanode(this);

    private void processDistributedUpgradeCommand(UpgradeCommand comm) throws IOException {
        assert upgradeManager != null : "DataNode.upgradeManager is null.";
        upgradeManager.processUpgradeCommand(comm);
    }

    /**
     * Start distributed upgrade if it should be initiated by the data-node.
     */
    private void startDistributedUpgradeIfNeeded() throws IOException {
        UpgradeManagerDatanode um = DataNode.getDataNode().upgradeManager;
        assert um != null : "DataNode.upgradeManager is null.";
        if (!um.getUpgradeState())
            return;
        um.setUpgradeState(false, um.getUpgradeVersion());
        um.startUpgrade();
        return;
    }

    private void transferBlock(Block block, DatanodeInfo xferTargets[]) throws IOException {
        if (!data.isValidBlock(block)) {
            // block does not exist or is under-construction
            String errStr = "Can't send invalid block " + block;
            LOG.info(errStr);
            namenode.errorReport(dnRegistration, DatanodeProtocol.INVALID_BLOCK, errStr);
            return;
        }

        // Check if NN recorded length matches on-disk length 
        long onDiskLength = data.getLength(block);
        if (block.getNumBytes() > onDiskLength) {
            // Shorter on-disk len indicates corruption so report NN the corrupt block
            namenode.reportBadBlocks(new LocatedBlock[] {
                    new LocatedBlock(block, new DatanodeInfo[] { new DatanodeInfo(dnRegistration) }) });
            LOG.info("Can't replicate block " + block + " because on-disk length " + onDiskLength
                    + " is shorter than NameNode recorded length " + block.getNumBytes());
            return;
        }

        int numTargets = xferTargets.length;
        if (numTargets > 0) {
            if (LOG.isInfoEnabled()) {
                StringBuilder xfersBuilder = new StringBuilder();
                for (int i = 0; i < numTargets; i++) {
                    xfersBuilder.append(xferTargets[i].getName());
                    xfersBuilder.append(" ");
                }
                LOG.info(dnRegistration + " Starting thread to transfer block " + block + " to " + xfersBuilder);
            }

            LOG.debug("Number of threads in Xceiver : " + this.threadGroup.activeCount());
            new Daemon(new DataTransfer(xferTargets, block, this)).start();
            //this.dataXceiver = new Daemon(threadGroup, new DataTransfer(xferTargets, block, this));
            //this.threadGroup.setDaemon(true); // auto destroy when empty
            //this.dataXceiver.start();
        }
    }

    private void transferBlocks(Block blocks[], DatanodeInfo xferTargets[][]) {
        for (int i = 0; i < blocks.length; i++) {
            try {
                transferBlock(blocks[i], xferTargets[i]);
            } catch (IOException ie) {
                LOG.warn("Failed to transfer block " + blocks[i], ie);
            }
        }
    }

    /*
     * Informing the name node could take a long long time! Should we wait
     * till namenode is informed before responding with success to the
     * client? For now we don't.
     */
    protected void notifyNamenodeReceivedBlock(Block block, String delHint) {
        if (block == null || delHint == null) {
            throw new IllegalArgumentException(block == null ? "Block is null" : "delHint is null");
        }
        synchronized (receivedBlockList) {
            synchronized (delHints) {
                receivedBlockList.add(block);
                delHints.add(delHint);
                receivedBlockList.notifyAll();
            }
        }
    }

    /* ********************************************************************
    Protocol when a client reads data from Datanode (Cur Ver: 9):
        
    Client's Request :
    =================
         
       Processed in DataXceiver:
       +----------------------------------------------+
       | Common Header   | 1 byte OP == OP_READ_BLOCK |
       +----------------------------------------------+
           
       Processed in readBlock() :
       +-------------------------------------------------------------------------+
       | 8 byte Block ID | 8 byte genstamp | 8 byte start offset | 8 byte length |
       +-------------------------------------------------------------------------+
       |   vInt length   |  <DFSClient id> |
       +-----------------------------------+
           
       Client sends optional response only at the end of receiving data.
             
    DataNode Response :
    ===================
         
      In readBlock() :
      If there is an error while initializing BlockSender :
         +---------------------------+
         | 2 byte OP_STATUS_ERROR    | and connection will be closed.
         +---------------------------+
      Otherwise
         +---------------------------+
         | 2 byte OP_STATUS_SUCCESS  |
         +---------------------------+
             
      Actual data, sent by BlockSender.sendBlock() :
          
        ChecksumHeader :
        +--------------------------------------------------+
        | 1 byte CHECKSUM_TYPE | 4 byte BYTES_PER_CHECKSUM |
        +--------------------------------------------------+
        Followed by actual data in the form of PACKETS: 
        +------------------------------------+
        | Sequence of data PACKETs ....      |
        +------------------------------------+
          
      A "PACKET" is defined further below.
          
      The client reads data until it receives a packet with 
      "LastPacketInBlock" set to true or with a zero length. If there is 
      no checksum error, it replies to DataNode with OP_STATUS_CHECKSUM_OK:
          
      Client optional response at the end of data transmission :
        +------------------------------+
        | 2 byte OP_STATUS_CHECKSUM_OK |
        +------------------------------+
          
      PACKET : Contains a packet header, checksum and data. Amount of data
      ======== carried is set by BUFFER_SIZE.
          
        +-----------------------------------------------------+
        | 4 byte packet length (excluding packet header)      |
        +-----------------------------------------------------+
        | 8 byte offset in the block | 8 byte sequence number |
        +-----------------------------------------------------+
        | 1 byte isLastPacketInBlock                          |
        +-----------------------------------------------------+
        | 4 byte Length of actual data                        |
        +-----------------------------------------------------+
        | x byte checksum data. x is defined below            |
        +-----------------------------------------------------+
        | actual data ......                                  |
        +-----------------------------------------------------+
            
        x = (length of data + BYTE_PER_CHECKSUM - 1)/BYTES_PER_CHECKSUM *
      CHECKSUM_SIZE
          
        CHECKSUM_SIZE depends on CHECKSUM_TYPE (usually, 4 for CRC32)
            
        The above packet format is used while writing data to DFS also.
        Not all the fields might be used while reading.
          
     ************************************************************************ */

    /** Header size for a packet */
    public static final int PKT_HEADER_LEN = (4 + /* Packet payload length */
            8 + /* offset in block */
            8 + /* seqno */
            1 /* isLastPacketInBlock */);

    /**
     * Used for transferring a block of data.  This class
     * sends a piece of data to another DataNode.
     */
    class DataTransfer implements Runnable {
        DatanodeInfo targets[];
        Block b;
        DataNode datanode;

        /**
         * Connect to the first item in the target list.  Pass along the 
         * entire target list, the block, and the data.
         */
        public DataTransfer(DatanodeInfo targets[], Block b, DataNode datanode) throws IOException {
            this.targets = targets;
            this.b = b;
            this.datanode = datanode;
        }

        /**
         * Do the deed, write the bytes
         */
        public void run() {
            xmitsInProgress.getAndIncrement();
            //Socket sock = null;
            JxtaSocket jsock = null;
            DataOutputStream out = null;
            BlockSender blockSender = null;

            try {
                JxtaSocketAddress curTarget = dnpeer.getInfoSocketAddress(targets[0].getPeerId());
                //sock = newSocket();
                //NetUtils.connect(sock, curTarget, socketTimeout);
                jsock = dnpeer.getInfoSocket(targets[0].getPeerId());
                // jsock.setSoTimeout(targets.length * socketTimeout);
                if (jsock == null) {
                    throw new IOException("Failed to get jxta socket for data transfer");
                }
                //jsock.setSoTimeout(Integer.parseInt(conf.get("hadoop.p2p.rpc.timeout"))); -- No need as already done

                long writeTimeout = socketWriteTimeout
                        + HdfsConstants.WRITE_TIMEOUT_EXTENSION * (targets.length - 1);
                OutputStream baseStream = NetUtils.getOutputStream(jsock, writeTimeout);

                // NetUtils.getOutputStream(jsock, writeTimeout);
                //out = new DataOutputStream(new BufferedOutputStream(baseStream, 
                //                                                    SMALL_BUFFER_SIZE));
                out = new DataOutputStream(new BufferedOutputStream(baseStream));

                blockSender = new BlockSender(b, 0, b.getNumBytes(), false, false, false, datanode);
                DatanodeInfo srcNode = new DatanodeInfo(dnRegistration);

                //
                // Header info
                //
                out.writeShort(DataTransferProtocol.DATA_TRANSFER_VERSION);
                out.writeByte(DataTransferProtocol.OP_WRITE_BLOCK);
                out.writeLong(b.getBlockId());
                out.writeLong(b.getGenerationStamp());
                out.writeInt(0); // no pipelining
                out.writeBoolean(false); // not part of recovery
                Text.writeString(out, ""); // client
                out.writeBoolean(true); // sending src node information
                srcNode.write(out); // Write src node DatanodeInfo
                // write targets
                out.writeInt(targets.length - 1);
                for (int i = 1; i < targets.length; i++) {
                    targets[i].write(out);
                }
                // send data & checksum
                blockSender.sendBlock(out, baseStream, null);

                // no response necessary
                LOG.info(dnRegistration + ":Transmitted block " + b + " to "
                        + curTarget.getPeerAdvertisement().getPeerID().toString());

            } catch (SocketTimeoutException ste) {
                LOG.debug("Failed to get jxta socket to transfer data");
            } catch (IOException ie) {
                //LOG.warn(dnRegistration + ":Failed to transfer " + b + " to " + targets[0].getName()
                //    + " got " + StringUtils.stringifyException(ie));
                LOG.warn(dnRegistration + ":Failed to transfer " + b + " to " + targets[0].getName() + " got "
                        + ie.getMessage());
            } finally {
                xmitsInProgress.getAndDecrement();
                IOUtils.closeStream(blockSender);
                IOUtils.closeStream(out);
                //IOUtils.closeSocket(jsock);
            }
        }
    }

    /**
     * No matter what kind of exception we get, keep retrying to offerService().
     * That's the loop that connects to the NameNode and provides basic DataNode
     * functionality.
     *
     * Only stop when "shouldRun" is turned off (which can only happen at shutdown).
     */
    public void run() {
        LOG.info(dnRegistration + "In DataNode.run, data = " + data);

        // start dataXceiveServer
        dataXceiverServer.start();

        while (shouldRun) {
            try {
                startDistributedUpgradeIfNeeded();
                offerService();
            } catch (Exception ex) {
                //LOG.error("Exception: " + StringUtils.stringifyException(ex));
                LOG.error("Exception: " + ex.getMessage());
                if (shouldRun) {
                    try {
                        Thread.sleep(5000);
                    } catch (InterruptedException ie) {
                    }
                }
            }
        }

        LOG.info(dnRegistration + ":Finishing DataNode in: " + data);
        shutdown();
    }

    /** Start a single datanode daemon and wait for it to finish.
     *  If this thread is specifically interrupted, it will stop waiting.
     */
    public static void runDatanodeDaemon(DataNode dn) throws IOException {
        if (dn != null) {
            //register datanode
            dn.register();
            dn.dataNodeThread = new Thread(dn, dnThreadName);
            dn.dataNodeThread.setDaemon(true); // needed for JUnit testing
            dn.dataNodeThread.start();
        }
    }

    public static boolean isDatanodeUp(DataNode dn) {
        return dn.dataNodeThread != null && dn.dataNodeThread.isAlive();
    }

    /*
     * Return true if the DataNode is connected the NameNode
     */
    public boolean isConnectedToNN() {
        return isConnected;
    }

    /** Instantiate a single datanode object. This must be run by invoking
     *  {@link DataNode#runDatanodeDaemon(DataNode)} subsequently. 
     */
    public static DataNode instantiateDataNode(String args[], Configuration conf) throws IOException {
        if (conf == null)
            conf = new Configuration();
        if (!parseArguments(args, conf)) {
            printUsage();
            return null;
        }
        if (conf.get("dfs.network.script") != null) {
            LOG.error("This configuration for rack identification is not supported"
                    + " anymore. RackID resolution is handled by the NameNode.");
            System.exit(-1);
        }
        String[] dataDirs = conf.getStrings("dfs.data.dir");
        dnThreadName = "DataNode: [" + StringUtils.arrayToString(dataDirs) + "]";
        return makeInstance(dataDirs, conf);
    }

    /** Instantiate & Start a single datanode daemon and wait for it to finish.
     *  If this thread is specifically interrupted, it will stop waiting.
     */
    public static DataNode createDataNode(String args[], Configuration conf) throws IOException {
        DataNode dn = instantiateDataNode(args, conf);
        runDatanodeDaemon(dn);
        return dn;
    }

    public void join() {
        if (dataNodeThread != null) {
            try {
                dataNodeThread.join();
            } catch (InterruptedException e) {
            }
        }
    }

    /**
     * Make an instance of DataNode after ensuring that at least one of the
     * given data directories (and their parent directories, if necessary)
     * can be created.
     * @param dataDirs List of directories, where the new DataNode instance should
     * keep its files.
     * @param conf Configuration instance to use.
     * @return DataNode instance for given list of data dirs and conf, or null if
     * no directory from this directory list can be created.
     * @throws IOException
     */
    public static DataNode makeInstance(String[] dataDirs, Configuration conf) throws IOException {
        ArrayList<File> dirs = new ArrayList<File>();
        for (int i = 0; i < dataDirs.length; i++) {
            File data = new File(dataDirs[i]);
            try {
                DiskChecker.checkDir(data);
                dirs.add(data);
            } catch (DiskErrorException e) {
                LOG.warn("Invalid directory in dfs.data.dir: " + e.getMessage());
            }
        }
        if (dirs.size() > 0)
            return new DataNode(conf, dirs);
        LOG.error("All directories in dfs.data.dir are invalid.");
        return null;
    }

    @Override
    public String toString() {
        return "DataNode{" + "data=" + data + ", localName='" + dnRegistration.getPeerId() + "'" + ", storageID='"
                + dnRegistration.getStorageID() + "'" + ", xmitsInProgress=" + xmitsInProgress.get() + "}";
    }

    private static void printUsage() {
        System.err.println("Usage: java DataNode");
        System.err.println("           [-rollback]");
    }

    /**
     * Parse and verify command line arguments and set configuration parameters.
     *
     * @return false if passed argements are incorrect
     */
    private static boolean parseArguments(String args[], Configuration conf) {
        int argsLen = (args == null) ? 0 : args.length;
        StartupOption startOpt = StartupOption.REGULAR;
        for (int i = 0; i < argsLen; i++) {
            String cmd = args[i];
            if ("-r".equalsIgnoreCase(cmd) || "--rack".equalsIgnoreCase(cmd)) {
                LOG.error("-r, --rack arguments are not supported anymore. RackID "
                        + "resolution is handled by the NameNode.");
                System.exit(-1);
            } else if ("-rollback".equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.ROLLBACK;
            } else if ("-regular".equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.REGULAR;
            } else
                return false;
        }
        setStartupOption(conf, startOpt);
        return true;
    }

    private static void setStartupOption(Configuration conf, StartupOption opt) {
        conf.set("dfs.datanode.startup", opt.toString());
    }

    static StartupOption getStartupOption(Configuration conf) {
        return StartupOption.valueOf(conf.get("dfs.datanode.startup", StartupOption.REGULAR.toString()));
    }

    /**
     * This methods  arranges for the data node to send the block report at the next heartbeat.
     */
    public void scheduleBlockReport(long delay) {
        if (delay > 0) { // send BR after random delay
            lastBlockReport = System.currentTimeMillis() - (blockReportInterval - R.nextInt((int) (delay)));
        } else { // send at next heartbeat
            lastBlockReport = lastHeartbeat - blockReportInterval;
        }
        resetBlockReportTime = true; // reset future BRs for randomness
    }

    /**
     * This method is used for testing. 
     * Examples are adding and deleting blocks directly.
     * The most common usage will be when the data node's storage is similated.
     * 
     * @return the fsdataset that stores the blocks
     */
    public FSDatasetInterface getFSDataset() {
        return data;
    }

    /**
     */
    public static void main(String args[]) {
        DesktopTray desktopTray = null;

        try {
            // Beginning of System Tray handling
            if (SystemTray.isSupported()) {
                desktopTray = new DesktopTray();
                desktopTray.init(DataNode.class, args, LOG);
            }
            // End of System Tray handling
            else {
                StringUtils.startupShutdownMessage(DataNode.class, args, LOG);
                DataNode datanode = createDataNode(args, null);

                if (datanode != null)
                    datanode.join();
            }
        } catch (Throwable e) {
            LOG.error(StringUtils.stringifyException(e));
            System.exit(-1);
        }
    }

    // InterDataNodeProtocol implementation
    /** {@inheritDoc} */
    public BlockMetaDataInfo getBlockMetaDataInfo(Block block) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("block=" + block);
        }
        Block stored = data.getStoredBlock(block.getBlockId());

        if (stored == null) {
            return null;
        }
        BlockMetaDataInfo info = new BlockMetaDataInfo(stored, blockScanner.getLastScanTime(stored));
        if (LOG.isDebugEnabled()) {
            LOG.debug("getBlockMetaDataInfo successful block=" + stored + " length " + stored.getNumBytes()
                    + " genstamp " + stored.getGenerationStamp());
        }

        // paranoia! verify that the contents of the stored block
        // matches the block file on disk.
        data.validateBlockMetadata(stored);
        return info;
    }

    public Daemon recoverBlocks(final Block[] blocks, final DatanodeInfo[][] targets) {
        Daemon d = new Daemon(threadGroup, new Runnable() {
            /** Recover a list of blocks. It is run by the primary datanode. */
            public void run() {
                for (int i = 0; i < blocks.length; i++) {
                    try {
                        logRecoverBlock("NameNode", blocks[i], targets[i]);
                        recoverBlock(blocks[i], false, targets[i], true);
                    } catch (IOException e) {
                        LOG.warn("recoverBlocks FAILED, blocks[" + i + "]=" + blocks[i], e);
                    }
                }
            }
        });
        d.start();
        return d;
    }

    /** {@inheritDoc} */
    public void updateBlock(Block oldblock, Block newblock, boolean finalize) throws IOException {
        LOG.info("oldblock=" + oldblock + "(length=" + oldblock.getNumBytes() + "), newblock=" + newblock
                + "(length=" + newblock.getNumBytes() + "), datanode=" + dnRegistration.getPeerId());
        data.updateBlock(oldblock, newblock);
        if (finalize) {
            data.finalizeBlock(newblock);
            myMetrics.blocksWritten.inc();
            notifyNamenodeReceivedBlock(newblock, EMPTY_DEL_HINT);
            LOG.info("Received block " + newblock + " of size " + newblock.getNumBytes()
                    + " as part of lease recovery.");
        }
    }

    /** {@inheritDoc} */
    public long getProtocolVersion(String protocol, long clientVersion) throws IOException {
        if (protocol.equals(InterDatanodeProtocol.class.getName())) {
            return InterDatanodeProtocol.versionID;
        } else if (protocol.equals(ClientDatanodeProtocol.class.getName())) {
            return ClientDatanodeProtocol.versionID;
        }
        throw new IOException("Unknown protocol to " + getClass().getSimpleName() + ": " + protocol);
    }

    /** A convenient class used in lease recovery */
    private static class BlockRecord {
        final DatanodeID id;
        final InterDatanodeProtocol datanode;
        final Block block;

        BlockRecord(DatanodeID id, InterDatanodeProtocol datanode, Block block) {
            this.id = id;
            this.datanode = datanode;
            this.block = block;
        }

        /** {@inheritDoc} */
        public String toString() {
            return "block:" + block + " node:" + id;
        }
    }

    /** Recover a block */
    private LocatedBlock recoverBlock(Block block, boolean keepLength, DatanodeID[] datanodeids, boolean closeFile)
            throws IOException {

        // If the block is already being recovered, then skip recovering it.
        // This can happen if the namenode and client start recovering the same
        // file at the same time.
        synchronized (ongoingRecovery) {
            Block tmp = new Block();
            tmp.set(block.getBlockId(), block.getNumBytes(), GenerationStamp.WILDCARD_STAMP);
            if (ongoingRecovery.get(tmp) != null) {
                String msg = "Block " + block + " is already being recovered, "
                        + " ignoring this request to recover it.";
                LOG.info(msg);
                throw new IOException(msg);
            }
            ongoingRecovery.put(block, block);
        }
        try {
            List<BlockRecord> syncList = new ArrayList<BlockRecord>();
            long minlength = Long.MAX_VALUE;
            int errorCount = 0;

            //check generation stamps
            for (DatanodeID id : datanodeids) {
                try {
                    InterDatanodeProtocol datanode = dnRegistration.equals(id) ? this
                            : DataNode.createInterDataNodeProtocolProxy(id, getConf());
                    BlockMetaDataInfo info = datanode.getBlockMetaDataInfo(block);
                    if (info != null && info.getGenerationStamp() >= block.getGenerationStamp()) {
                        if (keepLength) {
                            if (info.getNumBytes() == block.getNumBytes()) {
                                syncList.add(new BlockRecord(id, datanode, new Block(info)));
                            }
                        } else {
                            syncList.add(new BlockRecord(id, datanode, new Block(info)));
                            if (info.getNumBytes() < minlength) {
                                minlength = info.getNumBytes();
                            }
                        }
                    }
                } catch (IOException e) {
                    ++errorCount;
                    InterDatanodeProtocol.LOG.warn(
                            "Failed to getBlockMetaDataInfo for block (=" + block + ") from datanode (=" + id + ")",
                            e);
                }
            }

            if (syncList.isEmpty() && errorCount > 0) {
                throw new IOException(
                        "All datanodes failed: block=" + block + ", datanodeids=" + Arrays.asList(datanodeids));
            }
            if (!keepLength) {
                block.setNumBytes(minlength);
            }
            return syncBlock(block, syncList, closeFile);
        } finally {
            synchronized (ongoingRecovery) {
                ongoingRecovery.remove(block);
            }
        }
    }

    /** Block synchronization */
    private LocatedBlock syncBlock(Block block, List<BlockRecord> syncList, boolean closeFile) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("block=" + block + ", (length=" + block.getNumBytes() + "), syncList=" + syncList
                    + ", closeFile=" + closeFile);
        }

        //syncList.isEmpty() that all datanodes do not have the block
        //so the block can be deleted.
        if (syncList.isEmpty()) {
            namenode.commitBlockSynchronization(block, 0, 0, closeFile, true, DatanodeID.EMPTY_ARRAY);
            return null;
        }

        List<DatanodeID> successList = new ArrayList<DatanodeID>();

        long generationstamp = namenode.nextGenerationStamp(block);
        Block newblock = new Block(block.getBlockId(), block.getNumBytes(), generationstamp);

        for (BlockRecord r : syncList) {
            try {
                r.datanode.updateBlock(r.block, newblock, closeFile);
                successList.add(r.id);
            } catch (IOException e) {
                InterDatanodeProtocol.LOG
                        .warn("Failed to updateBlock (newblock=" + newblock + ", datanode=" + r.id + ")", e);
            }
        }

        if (!successList.isEmpty()) {
            DatanodeID[] nlist = successList.toArray(new DatanodeID[successList.size()]);

            namenode.commitBlockSynchronization(block, newblock.getGenerationStamp(), newblock.getNumBytes(),
                    closeFile, false, nlist);
            DatanodeInfo[] info = new DatanodeInfo[nlist.length];
            for (int i = 0; i < nlist.length; i++) {
                info[i] = new DatanodeInfo(nlist[i]);
            }
            return new LocatedBlock(newblock, info); // success
        }

        //failed
        StringBuilder b = new StringBuilder();
        for (BlockRecord r : syncList) {
            b.append("\n  " + r.id);
        }
        throw new IOException("Cannot recover " + block + ", none of these " + syncList.size()
                + " datanodes success {" + b + "\n}");
    }

    // ClientDataNodeProtocol implementation
    /** {@inheritDoc} */
    public LocatedBlock recoverBlock(Block block, boolean keepLength, DatanodeInfo[] targets) throws IOException {
        logRecoverBlock("Client", block, targets);
        return recoverBlock(block, keepLength, targets, false);
    }

    private static void logRecoverBlock(String who, Block block, DatanodeID[] targets) {
        StringBuilder msg = new StringBuilder(targets[0].getPeerId());
        for (int i = 1; i < targets.length; i++) {
            msg.append(", " + targets[i].getPeerId());
        }
        LOG.info(who + " calls recoverBlock(block=" + block + ", targets=[" + msg + "])");
    }
}