org.apache.hadoop.hdfs.server.namenode.AvatarNodeNew.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.AvatarNodeNew.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import java.io.IOException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.DataOutputStream;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.net.InetSocketAddress;
import java.util.Date;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Collection;
import java.text.SimpleDateFormat;

import javax.security.auth.login.LoginException;

import org.apache.hadoop.ipc.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.avatarpro.tools.getHostIP;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hdfs.protocol.AvatarProtocol;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.Avatar;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.StartupOption;
import org.apache.hadoop.hdfs.protocol.AvatarConstants.InstanceId;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;

/**
 * This is an implementation of the AvatarNode, a hot
 * standby for the NameNode.
 * This is really cool, believe me!
 * The AvatarNode has two avatars.. the Standby avatar and the Active
 * avatar.
 * 
 * In the Standby avatar, the AvatarNode is consuming transaction logs
 * generated by the primary (via a transaction log stored in a shared device).
 * Typically, the primary Namenode is writing transactions to a NFS filesystem
 * and the Standby is reading the log from the same NFS filesystem. The 
 * Standby is also making periodic checkpoints to the primary namenode.
 * 
 * A manual command can switch the AvatarNode from the Standby avatar
 * to the Active avatar. In the Active avatar, the AvatarNode performs precisely
 * the same functionality as a real usual Namenode. The switching from 
 * Standby avatar to the Active avatar is fast and can typically occur 
 * within seconds.
 *
 * Typically, an adminstrator will run require two shared mount points for
 * transaction logs. It has to be set in fs.name.dir.shared0 and
 * fs.name.dir.shared1 (similarly for edits). Then the adminstrator starts
 * the AvatarNode on two different machines as follows:
 *
 * bin/hadoop org.apache.hadoop.hdfs.server.namenode.AvatarNode -zero -active
 * bin/hadoop org.apache.hadoop.hdfs.server.namenode.AvatarNode -one -standby
 * The first  AvatarNode uses  fs.name.dir.shared0 while the second
 * AvatarNode uses fs.name.dir.shared1 to write its transaction logs.
 * Also, at startup, the first instance is the primary Namenode and the
 * second instance is the Standby
 *
 * After a while, the adminstrator decides to change the avatar of the
 * second instance to Active. In this case, he/she has to first ensure that the
 * first instance is really really dead. This code does not handle the
 * split-brain scenario where there are two active namenodes in one cluster.
 *
 */

public class AvatarNodeNew extends NameNode implements AvatarProtocol {

    public static final Log LOG = LogFactory.getLog(AvatarNodeNew.class.getName());
    private static boolean syncAtStartup = false;
    private static final String STORAGE_FILE_LOCK = "in_use.lock";
    private static final String EDITSFILE = "/current/edits";
    private static final String EDITSNEW = "/current/edits.new";
    private static final String TIMEFILE = "/current/fstime";
    private static final String IMAGENEW = "/current/fsimage.ckpt";
    static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd-HH:mm:ss");

    // The instanceId is assigned at startuptime and does not change for
    // the lifetime of the Node. The adminstrator has to name each instance
    // of the AvatarNode with a different instanceId. The node number is used 
    // by the AvaterNode to determine which shared devices it should use to
    // checkpoint the image.
    //
    private static InstanceId instance = InstanceId.NODEZERO;

    // The time when (and if) the fsimage was sync-ed from the remote AvatarNode
    volatile private static long startCheckpointTime;

    // Should the AvatarNode restart itself?
    volatile static boolean doRestart;

    private Server server;
    /** RPC server */
    private InetSocketAddress serverAddress;
    /** RPC server address */
    private Avatar currentAvatar; // the current incarnation of this node
    private StandbyNew standby; // the standby object
    private Configuration confg; // config for the standby namenode
    private Configuration startupConf; // config for the namenode
    private Thread standbyThread; // the standby daemon thread

    AvatarNodeNew(Configuration conf) throws IOException {
        super(conf);
        initialize(conf);
    }

    /**
     * The startup Conf is the original configuration of the AvatarNode. It is used by the
     * secondary namenode to talk to the primary namenode.
     * The conf is the modified configuration that is used by the standby namenode
     */
    AvatarNodeNew(Configuration startupConf, Configuration conf, Avatar avatar) throws IOException {
        super(conf);
        initialize(conf);
        currentAvatar = avatar;
        this.startupConf = startupConf;
        this.confg = conf;

        if (avatar == Avatar.STANDBY) {
            //
            // If we are starting as a Hot Standby, then put namenode in 
            // safemode. This prevents this instance of the NameNode from 
            // doing active replication of blocks.
            //
            setSafeMode(SafeModeAction.SAFEMODE_ENTER);

            // Create a standby object which does the actual work of 
            // processing transactions from the primary and checkpointing
            standby = new StandbyNew(this, startupConf, confg);
            standbyThread = new Thread(standby);
            standbyThread.start();
        }
    }

    /**
     * Wait for the StandbyNode to exit. If it does, then stop the underlying namenode.
     */
    void waitForRestart() {
        if (standbyThread != null) {
            try {
                // if this is the standby avatarnode, then wait for the Standby to exit
                standbyThread.join();
            } catch (InterruptedException ie) {
                //eat it up
            }
            standbyThread = null;
            LOG.info("waitForRestart Standby thread exited.");

            // if we are still in standbymode, that means we need to restart from scratch.
            if (getAvatar() == Avatar.STANDBY) {
                LOG.info("waitForRestart Stopping encapsulated namenode.");
                super.stop(); // terminate encapsulated namenode
                super.join(); // wait for encapsulated namenode to exit

                if (server != null) { // shutdown the AvatarNode
                    LOG.info("waitForRestart Stopping avatarnode rpcserver.");
                    server.stop();
                    try {
                        server.join();
                    } catch (InterruptedException ie) {
                        //eat it up
                    }
                }
            }
            LOG.info("waitForRestart exiting");
            return;
        }
        super.join(); // wait for encapsulated namenode

        //this.getFSImage().getEditLog().o
        // The stop on a RPC Server does not shutdown the handler threads synchronously.
        // So, we wait for some time before restarting the server.
        try {
            Thread.sleep(60000);
        } catch (InterruptedException e) {
        }
    }

    /**
     * Initialize AvatarNode
     * @param conf the configuration
     */
    private void initialize(Configuration conf) throws IOException {
        InetSocketAddress socAddr = AvatarNodeNew.getAddress(conf);

        int handlerCount = conf.getInt("hdfs.avatarnode.handler.count", 3);
        try {
            UserGroupInformation.setCurrentUser(UnixUserGroupInformation.login(conf));
        } catch (LoginException e) {

        }
        // create rpc server 
        this.server = RPC.getServer(this, socAddr.getHostName(), socAddr.getPort(), handlerCount, false, conf);

        // The rpc-server port can be ephemeral... ensure we have the 
        // correct info
        this.serverAddress = this.server.getListenerAddress();
        LOG.info("AvatarNode up at: " + this.serverAddress);
        this.server.start();
    }

    /**
     * If the specified protocol is AvatarProtocol, then return the
     * AvatarProtocol version id, otherwise delegate to the underlying
     * namenode.
     */
    public long getProtocolVersion(String protocol, long clientVersion) throws IOException {
        if (protocol.equals(AvatarProtocol.class.getName())) {
            return AvatarProtocol.versionID;
        } else {
            return super.getProtocolVersion(protocol, clientVersion);
        }
    }

    //
    // methods to support Avatar Protocol
    //

    /**
     * @inheritDoc
     */
    public synchronized Avatar getAvatar() {
        return currentAvatar;
    }

    /**
     * @inheritDoc
     */
    public synchronized void setAvatar(Avatar avatar) throws IOException {
        if (avatar == currentAvatar) {
            LOG.info("Trying to change avatar to " + avatar + " but am already in that state.");
            return;
        }
        if (avatar == Avatar.STANDBY) { // ACTIVE to STANDBY
            String msg = "Changing state from active to standby is not allowed."
                    + "If you really want to pause your primary, put it in safemode.";
            LOG.warn(msg);
            throw new IOException(msg);
        } else { // STANDBY to ACTIVE
            // Check to see if the primary is somehow checkpointing itself. If so, then 
            // refuse to switch to active mode. This check is not foolproof but is a
            // defensive mechanism to prevent administrator errors.
            if (standby.hasStaleCheckpoint()) {
                String msg = "Failed to change avatar from " + currentAvatar + " to " + avatar
                        + " because the Standby has not yet consumed all transactions.";
                LOG.warn(msg);
                throw new IOException(msg);
            }
            standby.quiesce();
            setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
        }
        LOG.info("Changed avatar from " + currentAvatar + " to " + avatar);
        currentAvatar = avatar;
    }

    /**
     * @inheritDoc
     */
    public Block[] blockReceivedNew(DatanodeRegistration nodeReg, Block blocks[], String delHints[])
            throws IOException {
        super.blockReceived(nodeReg, blocks, delHints);
        List<Block> failed = new ArrayList<Block>();
        for (int i = 0; i < blocks.length; i++) {
            Block block = blocks[i];
            synchronized (namesystem) {
                BlockInfo storedBlock = namesystem.blocksMap.getStoredBlock(block);
                if (storedBlock == null || storedBlock.getINode() == null) {
                    // If this block does not belong to anyfile, then record it.
                    LOG.info("blockReceived request received for " + block + " on " + nodeReg.getName() + " size "
                            + block.getNumBytes() + " But it does not belong to any file." + " Retry later.");
                    failed.add(block);
                }
            }
        }
        return failed.toArray(new Block[failed.size()]);
    }

    /**
     * Returns the hostname:port for the AvatarNode. The default
     * port for the AvatarNode is one more than the port of the
     * underlying namenode.
     */
    public static InetSocketAddress getAddress(Configuration conf) {
        InetSocketAddress u = NameNode.getAddress(conf);
        int port = conf.getInt("dfs.avatarnode.port", u.getPort() + 1);
        return new InetSocketAddress(u.getHostName(), port);
    }

    /**
     * Help message for a user
     */
    private static void printUsage() {
        System.err.println("Usage: java AvatareNode [" + StartupOption.STANDBY.getName() + "] | ["
                + StartupOption.SYNC.getName() + "] | [" + StartupOption.NODEZERO.getName() + "] | ["
                + StartupOption.NODEONE.getName() + "] | [" + StartupOption.FORMAT.getName() + "] | ["
                + StartupOption.UPGRADE.getName() + "] | [" + StartupOption.ROLLBACK.getName() + "] | ["
                + StartupOption.FINALIZE.getName() + "] | [" + StartupOption.IMPORT.getName() + "]");
    }

    /**
     * validates command line arguments
     */
    static void validateStartupOptions(StartupOption startOpt) throws IOException {
        // sync cannot be specified along with format or finalize
        if (syncAtStartup) {
            if (startOpt == StartupOption.FORMAT || startOpt == StartupOption.FINALIZE) {
                String msg = "Option " + StartupOption.SYNC + " cannot be specified along with " + startOpt;
                LOG.warn(msg);
                throw new IOException(msg);
            }
        }
    }

    /**
     * Analyze the command line options
     */
    private static StartupOption parseArguments(String args[]) {
        int argsLen = (args == null) ? 0 : args.length;
        StartupOption startOpt = StartupOption.ACTIVE;
        for (int i = 0; i < argsLen; i++) {
            String cmd = args[i];
            if (StartupOption.STANDBY.getName().equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.STANDBY;
            } else if (StartupOption.SYNC.getName().equalsIgnoreCase(cmd)) {
                syncAtStartup = true;
            } else if (StartupOption.NODEZERO.getName().equalsIgnoreCase(cmd)) {
                instance = InstanceId.NODEZERO;
            } else if (StartupOption.NODEONE.getName().equalsIgnoreCase(cmd)) {
                instance = InstanceId.NODEONE;
            } else if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.FORMAT;
            } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.REGULAR;
            } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.UPGRADE;
            } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.ROLLBACK;
            } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.FINALIZE;
            } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
                startOpt = StartupOption.IMPORT;
            } else {
                return null;
            }
        }
        return startOpt;
    }

    /**
     * Records the startup command in the configuration
     */
    private static void setStartupOption(Configuration conf, StartupOption opt) {
        conf.set("dfs.avatarnode.startup", opt.toString());
    }

    public static AvatarNodeNew createAvatarNode(String argv[], Configuration conf) throws IOException {
        if (conf == null) {
            conf = new Configuration();
        }
        Configuration startupConf = conf; // save configuration at startup
        StartupOption startOpt = parseArguments(argv);
        if (startOpt == null) {
            printUsage();
            return null;
        }
        setStartupOption(conf, startOpt);

        // sync cannot be specified along with format or finalize
        validateStartupOptions(startOpt);

        // If sync is requested, then we copy only the fsimage
        //  (and not the transaction logs) from the other node. 
        // If we are NODEONE, then modify the configuration to 
        // set fs.name.dir, fs.default.name and dfs.http.address.
        //
        conf = copyFsImage(startupConf, startOpt);

        // namenode options.
        switch (startOpt) {
        case FORMAT:
            boolean aborted = format(conf, true);
            System.exit(aborted ? 1 : 0);
        case FINALIZE:
            aborted = finalize(conf, true);
            System.exit(aborted ? 1 : 0);
        default:
        }

        return new AvatarNodeNew(startupConf, conf, startOpt.toAvatar());
    }

    /**
     * Return the configuration that should be used by this instance of AvatarNode
     * Copy fsimages from the remote shared device. 
     */
    static Configuration copyFsImage(Configuration conf, StartupOption opt) throws IOException {
        String img0 = conf.get("dfs.name.dir.shared0");
        String img1 = conf.get("dfs.name.dir.shared1");
        String edit0 = conf.get("dfs.name.edits.dir.shared0");
        String edit1 = conf.get("dfs.name.edits.dir.shared1");
        Collection<String> namedirs = conf.getStringCollection("dfs.name.dir");
        Collection<String> editsdir = conf.getStringCollection("dfs.name.edits.dir");
        String msg = "";

        if (img0 == null || img0.isEmpty()) {
            msg += "No values specified in dfs.name.dir.share0";
        }
        if (img1 == null || img1.isEmpty()) {
            msg += " No values specified in dfs.name.dir.share1";
        }
        if (edit0 == null || edit0.isEmpty()) {
            msg += " No values specified in dfs.name.edits.dir.share0";
        }
        if (edit1 == null || edit1.isEmpty()) {
            msg += " No values specified in dfs.name.edits.dir.share1";
        }
        if (msg.length() != 0) {
            LOG.info(msg);
            throw new IOException(msg);
        }

        // verify that the shared dirctories are not specified as dfs.name.dir
        for (String str : namedirs) {
            if (str.equalsIgnoreCase(img0)) {
                msg = "The name specified in dfs.name.dir.shared0 " + img0 + " is already part of dfs.name.dir ";
            }
            if (str.equalsIgnoreCase(img1)) {
                msg += " The name specified in dfs.name.dir.shared1 " + img1 + " is already part of dfs.name.dir ";
            }
        }
        if (msg.length() != 0) {
            LOG.info(msg);
            throw new IOException(msg);
        }
        // verify that the shared edits directories are not specified as dfs.name.edits.dir
        for (String str : editsdir) {
            if (str.equalsIgnoreCase(edit0)) {
                msg = "The name specified in dfs.name.edits.dir.shared0 " + img0
                        + " is already part of dfs.name.dir ";
            }
            if (str.equalsIgnoreCase(edit1)) {
                msg += " The name specified in dfs.name.edits.dir.shared1 " + img1
                        + " is already part of dfs.name.dir ";
            }
        }
        if (msg.length() != 0) {
            LOG.info(msg);
            throw new IOException(msg);
        }

        //    // record the fstime of the checkpoint that we are about to sync from
        //    setStartCheckpointTime(conf);

        // if we are starting as the standby then
        // record the fstime of the checkpoint that we are about to sync from
        if (opt == StartupOption.STANDBY) {
            setStartCheckpointTime(conf);
        }

        File primary = new File(img0);
        File standby = new File(img1);
        String mdate = dateForm.format(new Date(now()));
        FileSystem localFs = FileSystem.getLocal(conf).getRaw();
        File src = null;
        File dest = null;
        File srcedit = null;
        File destedit = null;

        //
        // if we are instance one then copy from primary to secondary
        // otherwise copy from secondary to primary.
        //
        if (instance == InstanceId.NODEONE) {
            src = primary;
            dest = standby;
            srcedit = new File(edit0);
            destedit = new File(edit1);
        } else if (instance == InstanceId.NODEZERO) {
            dest = primary;
            src = standby;
            destedit = new File(edit0);
            srcedit = new File(edit1);
        }

        // copy fsimage directory if needed
        if (src.exists() && syncAtStartup) {
            if (dest.exists()) {
                File tmp = new File(dest + File.pathSeparator + mdate);
                if (!dest.renameTo(tmp)) {
                    throw new IOException("Unable to rename " + dest + " to " + tmp);
                }
                LOG.info("Moved aside " + dest + " as " + tmp);
            }
            if (!FileUtil.copy(localFs, new Path(src.toString()), localFs, new Path(dest.toString()), false,
                    conf)) {
                msg = "Error copying " + src + " to " + dest;
                LOG.error(msg);
                throw new IOException(msg);
            }
            LOG.info("Copied " + src + " into " + dest);

            // Remove the lock file from the newly synced directory
            File lockfile = new File(dest, STORAGE_FILE_LOCK);
            lockfile.delete();

            // Remove fsimage.ckpt if it exists.
            File ckptfile = new File(dest.toString() + IMAGENEW);
            ckptfile.delete();

            // Now, copy from the now-updated shared directory to all other
            // local dirs specified in fs.name.dir
            src = dest;
            if (!namedirs.isEmpty()) {
                for (String str : namedirs) {
                    dest = new File(str);
                    if (dest.exists()) {
                        File tmp = new File(dest + File.pathSeparator + mdate);
                        if (!dest.renameTo(tmp)) {
                            throw new IOException("Unable to rename " + dest + " to " + tmp);
                        }
                        LOG.info("Moved aside " + dest + " as " + tmp);
                    }
                    if (!FileUtil.copy(localFs, new Path(src.toString()), localFs, new Path(dest.toString()), false,
                            conf)) {
                        msg = "Error copying " + src + " to " + dest;
                        LOG.error(msg);
                        throw new IOException(msg);
                    }
                    LOG.info("Copied " + src + " into " + dest);
                }
            }
        }

        // copy edits directory if needed
        if (srcedit.exists() && syncAtStartup) {
            if (destedit.exists()) {
                File tmp = new File(destedit + File.pathSeparator + mdate);
                if (!destedit.renameTo(tmp)) {
                    throw new IOException("Unable to rename " + destedit + " to " + tmp);
                }
                LOG.info("Moved aside " + destedit + " as " + tmp);
            }
            if (!FileUtil.copy(localFs, new Path(srcedit.toString()), localFs, new Path(destedit.toString()), false,
                    conf)) {
                msg = "Error copying " + srcedit + " to " + destedit;
                LOG.error(msg);
                throw new IOException(msg);
            }
            LOG.info("Copied " + srcedit + " into " + destedit);

            // Remove the lock file from the newly synced directory
            File lockfile = new File(destedit, STORAGE_FILE_LOCK);
            if (lockfile.delete() == false) {
                throw new IOException("Unable to delete lock file " + lockfile);
            }

            // Remove edits and edits.new. Create empty edits file.
            File efile = new File(destedit.toString() + EDITSFILE);
            if (efile.delete() == false) {
                throw new IOException("Unable to delete edits file " + efile);
            }
            efile = new File(destedit + EDITSNEW);
            efile.delete();
            createEditsFile(destedit.toString());

            // Now, copy from the now-updated shared directory to all other
            // local dirs specified in fs.name.edits.dir
            srcedit = destedit;
            if (!editsdir.isEmpty()) {
                for (String str : editsdir) {
                    destedit = new File(str);
                    if (destedit.exists()) {
                        File tmp = new File(destedit + File.pathSeparator + mdate);
                        if (!destedit.renameTo(tmp)) {
                            throw new IOException("Unable to rename " + destedit + " to " + tmp);
                        }
                        LOG.info("Moved aside " + destedit + " as " + tmp);
                    }
                    if (!FileUtil.copy(localFs, new Path(srcedit.toString()), localFs,
                            new Path(destedit.toString()), false, conf)) {
                        msg = "Error copying " + srcedit + " to " + destedit;
                        LOG.error(msg);
                        throw new IOException(msg);
                    }
                    LOG.info("Copied " + srcedit + " into " + destedit);
                }
            }
        }

        // allocate a new configuration and update fs.name.dir approprately
        // The shared device should be the first in the list.
        Configuration newconf = new Configuration(conf);
        StringBuffer buf = new StringBuffer();
        if (instance == InstanceId.NODEONE) {
            buf.append(img1);
        } else if (instance == InstanceId.NODEZERO) {
            buf.append(img0);
        }
        for (String str : namedirs) {
            buf.append(",");
            buf.append(str);
        }
        newconf.set("dfs.name.dir", buf.toString());
        buf = null;

        // update fs.name.edits.dir approprately in the new configuration
        // The shared device should be the first in the list.
        StringBuffer buf1 = new StringBuffer();
        if (instance == InstanceId.NODEONE) {
            buf1.append(edit1);
        } else if (instance == InstanceId.NODEZERO) {
            buf1.append(edit0);
        }
        for (String str : editsdir) {
            buf1.append(",");
            buf1.append(str);
        }
        newconf.set("dfs.name.edits.dir", buf1.toString());

        // if we are starting as the other namenode, then change the 
        // default URL to make the namenode attach to the appropriate URL
        if (instance == InstanceId.NODEZERO) {
            String fs = conf.get("fs.default.name0");
            if (fs != null) {
                newconf.set("fs.default.name", fs);
            }
            fs = conf.get("dfs.http.address0");
            if (fs != null) {
                newconf.set("dfs.http.address", fs);
            }
            fs = conf.get("dfs.namenode.dn-address0");
            if (fs != null) {
                newconf.set("dfs.namenode.dn-address", fs);
            }
        }
        if (instance == InstanceId.NODEONE) {
            String fs = conf.get("fs.default.name1");
            if (fs != null) {
                newconf.set("fs.default.name", fs);
            }
            fs = conf.get("dfs.http.address1");
            if (fs != null) {
                newconf.set("dfs.http.address", fs);
            }
            fs = conf.get("dfs.namenode.dn-address1");
            if (fs != null) {
                newconf.set("dfs.namenode.dn-address", fs);
            }
        }
        return newconf;
    }

    /**
     * Returns the address of the remote namenode
     */
    static InetSocketAddress getRemoteNamenodeAddress(Configuration conf) throws IOException {
        String fs = null;
        if (instance == InstanceId.NODEZERO) {
            fs = conf.get("fs.default.name1");
        } else if (instance == InstanceId.NODEONE) {
            fs = conf.get("fs.default.name0");
        } else {
            throw new IOException("Unknown instance " + instance);
        }
        if (fs != null) {
            conf = new Configuration(conf);
            conf.set("fs.default.name", fs);
        }
        return NameNode.getAddress(conf);
    }

    /**
     * Returns the name of the http server of the local namenode
     */
    static String getRemoteNamenodeHttpName(Configuration conf) throws IOException {
        if (instance == InstanceId.NODEZERO) {
            return conf.get("dfs.http.address1");
        } else if (instance == InstanceId.NODEONE) {
            return conf.get("dfs.http.address0");
        } else {
            throw new IOException("Unknown instance " + instance);
        }
    }

    /**
     * Create an empty edits log
     */
    static void createEditsFile(String editDir) throws IOException {
        File editfile = new File(editDir + EDITSFILE);
        FileOutputStream fp = new FileOutputStream(editfile);
        DataOutputBuffer buf = new DataOutputBuffer(1024);
        buf.writeInt(FSConstants.LAYOUT_VERSION);
        buf.writeTo(fp);
        buf.close();
        fp.close();
    }

    /**
     * Return the edits file of the remote NameNode
     */
    File getRemoteEditsFile(Configuration conf) throws IOException {
        String edit = null;
        if (instance == InstanceId.NODEZERO) {
            edit = conf.get("dfs.name.edits.dir.shared1");
        } else if (instance == InstanceId.NODEONE) {
            edit = conf.get("dfs.name.edits.dir.shared0");
        } else {
            LOG.info("Instance is invalid. " + instance);
            throw new IOException("Instance is invalid. " + instance);
        }
        return new File(edit + EDITSFILE);
    }

    /**
     * Return the edits.new file of the remote NameNode
     */
    File getRemoteEditsFileNew(Configuration conf) throws IOException {
        String edit = null;
        if (instance == InstanceId.NODEZERO) {
            edit = conf.get("dfs.name.edits.dir.shared1");
        } else if (instance == InstanceId.NODEONE) {
            edit = conf.get("dfs.name.edits.dir.shared0");
        } else {
            LOG.info("Instance is invalid. " + instance);
            throw new IOException("Instance is invalid. " + instance);
        }
        return new File(edit + EDITSNEW);
    }

    /**
     * Return the fstime file of the remote NameNode
     */
    File getRemoteTimeFile(Configuration conf) throws IOException {
        String edit = null;
        if (instance == InstanceId.NODEZERO) {
            edit = conf.get("dfs.name.edits.dir.shared1");
        } else if (instance == InstanceId.NODEONE) {
            edit = conf.get("dfs.name.edits.dir.shared0");
        } else {
            LOG.info("Instance is invalid. " + instance);
            throw new IOException("Instance is invalid. " + instance);
        }
        return new File(edit + TIMEFILE);
    }

    /**
     * Reads the timestamp of the last checkpoint from the remote fstime file.
     */
    static long readRemoteFstime(Configuration conf) throws IOException {
        String edit = null;
        if (instance == InstanceId.NODEZERO) {
            edit = conf.get("dfs.name.edits.dir.shared1");
        } else if (instance == InstanceId.NODEONE) {
            edit = conf.get("dfs.name.edits.dir.shared0");
        } else {
            LOG.info("Instance is invalid. " + instance);
            throw new IOException("Instance is invalid. " + instance);
        }
        File timeFile = new File(edit + TIMEFILE);
        long timeStamp = 0L;
        DataInputStream in = null;
        try {
            in = new DataInputStream(new FileInputStream(timeFile));
            timeStamp = in.readLong();
        } catch (IOException e) {
            if (!timeFile.exists()) {
                String msg = "Error reading checkpoint time file " + timeFile + " file does not exist.";
                LOG.error(msg);
                throw new IOException(msg + e);
            } else if (!timeFile.canRead()) {
                String msg = "Error reading checkpoint time file " + timeFile + " cannot read file of size "
                        + timeFile.length() + " last modified "
                        + dateForm.format(new Date(timeFile.lastModified()));
                LOG.error(msg);
                throw new IOException(msg + e);
            } else {
                String msg = "Error reading checkpoint time file " + timeFile;
                LOG.error(msg);
                throw new IOException(msg + e);
            }
        } finally {
            if (in != null) {
                in.close();
            }
        }
        return timeStamp;
    }

    /**
     * Returns the starting checkpoint time of this AvatarNode
     */
    static long getStartCheckpointTime() {
        return startCheckpointTime;
    }

    /**
     * Sets the starting checkpoint time of this AvatarNode
     */
    static void setStartCheckpointTime(Configuration conf) throws IOException {
        startCheckpointTime = readRemoteFstime(conf);
    }

    /**
     * Indicates that the AvatarNode shoudl restart
     */
    static void doRestart() {
        doRestart = true;
    }

    /**
     * Returns true if both edits and edits.new for the
     * remote namenode exists.
     */
    boolean twoEditsFile(Configuration conf) throws IOException {
        File f1 = getRemoteEditsFile(conf);
        File f2 = getRemoteEditsFileNew(conf);
        return f1.exists() && f2.exists();
    }

    /**
     * Returns the size of the edits file for the remote
     * namenode.
     */
    long editSize(Configuration conf) throws IOException {
        return getRemoteEditsFile(conf).length();
    }

    /**
     * Current system time.
     * @return current time in msec.
     */
    static long now() {
        return System.currentTimeMillis();
    }

    /**
     * Verify that configured directories exist, then
     * Interactively confirm that formatting is desired 
     * for each existing directory and format them.
     * 
     * @param conf
     * @param isConfirmationNeeded
     * @return true if formatting was aborted, false otherwise
     * @throws IOException
     */
    private static boolean format(Configuration conf, boolean isConfirmationNeeded) throws IOException {
        boolean allowFormat = conf.getBoolean("dfs.namenode.support.allowformat", true);
        if (!allowFormat) {
            throw new IOException("The option dfs.namenode.support.allowformat is "
                    + "set to false for this filesystem, so it " + "cannot be formatted. You will need to set "
                    + "dfs.namenode.support.allowformat parameter " + "to true in order to format this filesystem");
        }
        Collection<File> dirsToFormat = FSNamesystem.getNamespaceDirs(conf);
        Collection<File> editDirsToFormat = FSNamesystem.getNamespaceEditsDirs(conf);
        for (Iterator<File> it = dirsToFormat.iterator(); it.hasNext();) {
            File curDir = it.next();
            if (!curDir.exists())
                continue;
            if (isConfirmationNeeded) {
                System.err.print("Re-format filesystem in " + curDir + " ? (Y or N) ");
                if (!(System.in.read() == 'Y')) {
                    System.err.println("Format aborted in " + curDir);
                    return true;
                }
                while (System.in.read() != '\n')
                    ; // discard the enter-key
            }
        }

        FSNamesystem nsys = new FSNamesystem(new FSImage(dirsToFormat, editDirsToFormat), conf);
        nsys.dir.fsImage.format();
        return false;
    }

    private static boolean finalize(Configuration conf, boolean isConfirmationNeeded) throws IOException {
        Collection<File> dirsToFormat = FSNamesystem.getNamespaceDirs(conf);
        Collection<File> editDirsToFormat = FSNamesystem.getNamespaceEditsDirs(conf);
        FSNamesystem nsys = new FSNamesystem(new FSImage(dirsToFormat, editDirsToFormat), conf);
        System.err.print("\"finalize\" will remove the previous state of the files system.\n"
                + "Recent upgrade will become permanent.\n" + "Rollback option will not be available anymore.\n");
        if (isConfirmationNeeded) {
            System.err.print("Finalize filesystem state ? (Y or N) ");
            if (!(System.in.read() == 'Y')) {
                System.err.println("Finalize aborted.");
                return true;
            }
            while (System.in.read() != '\n')
                ; // discard the enter-key
        }
        nsys.dir.fsImage.finalizeUpgrade();
        return false;
    }

    /**
     */
    public static void main(String argv[]) throws Exception {
        System.out.println("--------------AvatarNode.main()-----0------");
        do {
            doRestart = false;
            try {
                StringUtils.startupShutdownMessage(AvatarNodeNew.class, argv, LOG);
                AvatarNodeNew avatarnode = createAvatarNode(argv, null);
                if (avatarnode != null) {
                    avatarnode.waitForRestart();
                }
            } catch (Throwable e) {
                LOG.error(StringUtils.stringifyException(e));
                if (doRestart) {
                    LOG.error("AvatarNode restarting...");
                }
            }
        } while (doRestart == true);
    }

    //--------------------------------------
    public static InetSocketAddress getLocalAddress(Configuration conf) {
        InetSocketAddress u = NameNode.getAddress(conf);
        int port = conf.getInt("dfs.avatarnode.port", u.getPort() + 1);
        return new InetSocketAddress(getHostIP.getLocalIP(), port);
    }

    public static InetSocketAddress getAvatarShellAddress(Configuration conf) {
        InetSocketAddress u = NameNode.getAddress(conf);
        int port = conf.getInt("dfs.avatarnode.port", u.getPort() + 1);
        String avatarShellIP = conf.get("dfs.avatarshell.ip", u.getHostName());
        return new InetSocketAddress(avatarShellIP, port);
    }

    //added by wanglei --2011.11.16 --start
    public void copyEditLogAndFSImageToNFS(String NFSEditPath, String NFSImagePath) throws IOException {
        String[] namedirs = this.startupConf.getStrings("dfs.name.dir");
        String[] editsdir = this.startupConf.getStrings("dfs.name.edits.dir");
        String msg = "";

        FileSystem localFs = FileSystem.getLocal(this.confg).getRaw();

        File srcimage = new File(namedirs[0]);
        File srcedit = new File(editsdir[0]);

        File destimage = new File(NFSImagePath);
        File destedit = new File(NFSEditPath);
        String mdate = dateForm.format(new Date(now()));
        // copy fsimage directory if needed
        if (srcimage.exists()) {

            if (destimage.exists()) {
                File tmp = new File(destimage + File.pathSeparator + mdate);
                if (!destimage.renameTo(tmp)) {
                    throw new IOException("Unable to rename " + destimage + " to " + tmp);
                }
                LOG.info("Moved aside " + destimage + " as " + tmp);
            }

            if (!FileUtil.copy(localFs, new Path(srcimage.toString()), localFs, new Path(destimage.toString()),
                    false, this.startupConf)) {
                msg = "Error copying " + srcimage + " to " + destimage;
                LOG.error(msg);
                throw new IOException(msg);
            }
            LOG.info("Copied " + srcimage + " into " + destimage);

            // Remove the lock file from the newly synced directory
            File lockfile = new File(destimage, STORAGE_FILE_LOCK);
            lockfile.delete();

            // Remove fsimage.ckpt if it exists.
            File ckptfile = new File(destimage.toString() + IMAGENEW);
            ckptfile.delete();
        }

        // copy editlog directory if needed
        if (srcedit.exists()) {

            if (destedit.exists()) {
                File tmp = new File(destedit + File.pathSeparator + mdate);
                if (!destedit.renameTo(tmp)) {
                    throw new IOException("Unable to rename " + destedit + " to " + tmp);
                }
                LOG.info("Moved aside " + destedit + " as " + tmp);
            }

            if (!FileUtil.copy(localFs, new Path(srcedit.toString()), localFs, new Path(destedit.toString()), false,
                    this.startupConf)) {
                msg = "Error copying " + srcedit + " to " + destedit;
                LOG.error(msg);
                throw new IOException(msg);
            }
            LOG.info("Copied " + srcedit + " into " + destedit);

            // Remove the lock file from the newly synced directory
            //    File lockfile = new File(destedit, STORAGE_FILE_LOCK);
            //    if (lockfile.exists() && lockfile.delete() == false) {
            //      throw new IOException("Unable to delete lock file " + lockfile);
            //    }

            // Remove edits and edits.new. Create empty edits file.
            //    File efile = new File(destedit.toString() + EDITSFILE);
            //    if (efile.exists() && efile.delete() == false) {
            //      throw new IOException("Unable to delete edits file " + efile);
            //    }
            //    File efile = new File(destedit + EDITSNEW);
            //    efile.delete();
        }
    }

    public boolean AddEditLogAndFSImagePath(String NFSEditPath, String NFSImagePath) {

        //this.startupConf.getStrings("dfs.name.dir")[0].split("/");

        try {
            if (!this.getFSImage().addStorageDirectories(NFSEditPath, NFSImagePath))
                return false;

            //this.copyEditLogAndFSImageToNFS(NFSEditPath,NFSImagePath);   
            this.copyFSImageToNFS(NFSImagePath);
            this.getFSImage().getEditLog().editLock();
            this.getFSImage().getEditLog().close();
            this.copyEditLogToNFS(NFSEditPath);

            this.getFSImage().getEditLog().open();
            this.getFSImage().getEditLog().editUnlock();

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return true;

    }

    //added by wanglei --2011.11.16 --end
    //added by wanglei --2011.12.26 --start
    /**
     * Returns the address of the remote namenode
     */
    public String getRemoteAvatarnodeAddress() throws IOException {
        String fs = null;
        if (instance == InstanceId.NODEZERO) {
            fs = this.startupConf.get("fs.default.name1");
        } else if (instance == InstanceId.NODEONE) {
            fs = this.startupConf.get("fs.default.name0");
        } else {
            throw new IOException("Unknown instance " + instance);
        }

        return fs;
    }

    //added by wanglei --2011.12.26 --end
    //added by wanglei --2012.2.11 --start
    public void copyFSImageToNFS(String NFSImagePath) throws IOException {
        String[] namedirs = this.startupConf.getStrings("dfs.name.dir");
        String msg = "";

        FileSystem localFs = FileSystem.getLocal(this.confg).getRaw();

        File srcimage = new File(namedirs[0]);

        File destimage = new File(NFSImagePath);
        String mdate = dateForm.format(new Date(now()));
        // copy fsimage directory if needed
        if (srcimage.exists()) {

            if (destimage.exists()) {
                File tmp = new File(destimage + File.pathSeparator + mdate);
                if (!destimage.renameTo(tmp)) {
                    throw new IOException("Unable to rename " + destimage + " to " + tmp);
                }
                LOG.info("Moved aside " + destimage + " as " + tmp);
            }

            if (!FileUtil.copy(localFs, new Path(srcimage.toString()), localFs, new Path(destimage.toString()),
                    false, this.startupConf)) {
                msg = "Error copying " + srcimage + " to " + destimage;
                LOG.error(msg);
                throw new IOException(msg);
            }
            LOG.info("Copied " + srcimage + " into " + destimage);

            // Remove the lock file from the newly synced directory
            File lockfile = new File(destimage, STORAGE_FILE_LOCK);
            lockfile.delete();

            // Remove fsimage.ckpt if it exists.
            File ckptfile = new File(destimage.toString() + IMAGENEW);
            ckptfile.delete();
        }

    }

    public void copyEditLogToNFS(String NFSEditPath) throws IOException {
        String[] editsdir = this.startupConf.getStrings("dfs.name.edits.dir");
        String msg = "";

        FileSystem localFs = FileSystem.getLocal(this.confg).getRaw();

        File srcedit = new File(editsdir[0]);

        File destedit = new File(NFSEditPath);
        String mdate = dateForm.format(new Date(now()));

        // copy editlog directory if needed
        if (srcedit.exists()) {

            if (destedit.exists()) {
                File tmp = new File(destedit + File.pathSeparator + mdate);
                if (!destedit.renameTo(tmp)) {
                    throw new IOException("Unable to rename " + destedit + " to " + tmp);
                }
                LOG.info("Moved aside " + destedit + " as " + tmp);
            }

            if (!FileUtil.copy(localFs, new Path(srcedit.toString()), localFs, new Path(destedit.toString()), false,
                    this.startupConf)) {
                msg = "Error copying " + srcedit + " to " + destedit;
                LOG.error(msg);
                throw new IOException(msg);
            }
            LOG.info("Copied " + srcedit + " into " + destedit);

        }
    }
    //added by wanglei --2012.2.11 --end

    /**
     * Download <code>fsimage</code> 
     * files from the name-node.
     * @throws IOException
     */
    public static void downloadFsimageFile(Configuration conf) throws IOException {
        String fsName = getRemoteNamenodeHttpName(conf);
        // get fsimage
        String fileid = "getimage=1";
        Collection<String> namedirs = conf.getStringCollection("dfs.name.dir");
        File[] srcNames = new File[namedirs.size()];
        int i = 0;
        for (String namedir : namedirs) {
            srcNames[i] = new File(namedir, "/current/fsimage");
        }

        assert srcNames.length > 0 : "No checkpoint targets.";
        TransferFsImage.getFileClient(fsName, fileid, srcNames);
        LOG.info("Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");
    }

    /**
     * Download  <code>edits</code>
     * files from the name-node.
     * @throws IOException
     */
    public static void downloadEditFile(Configuration conf) throws IOException {

        String fsName = getRemoteNamenodeHttpName(conf);

        // get edits file
        String fileid = "getedit=1";
        Collection<String> editdirs = conf.getStringCollection("dfs.name.edits.dir");
        File[] srcNames = new File[editdirs.size()];
        int i = 0;
        for (String editdir : editdirs) {
            srcNames[i] = new File(editdir, "/current/edits");
        }
        assert srcNames.length > 0 : "No checkpoint targets.";
        TransferFsImage.getFileClient(fsName, fileid, srcNames);
        LOG.info("Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");
    }

    static void downloadEditFileAndFsimageFile(Configuration conf) {
        try {
            downloadFsimageFile(conf);
            ///if(pAvatarnode.getFSDirReady() == true){
            //   pAvatarnode.setFSDirReady(false);
            //}
            downloadEditFile(conf);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

}