org.apache.hadoop.hdfs.server.namenode.SnapshotNode.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.SnapshotNode.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import org.apache.commons.logging.*;

import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSLocatedBlocks;
import org.apache.hadoop.hdfs.DFSInputStream;
import org.apache.hadoop.hdfs.protocol.*;
import org.apache.hadoop.hdfs.server.common.Storage.*;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.FSImage.CheckpointStates;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.*;
import org.apache.hadoop.hdfs.server.namenode.WaitingRoom.*;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.server.protocol.SnapshotProtocol;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.ipc.*;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.concurrent.*;

/**********************************************************
 * The SnapshotNode is responsible for taking periodic 
 * snapshots of the HDFS. The current design only allows
 * one SnapshotNode per cluster.
 *
 * The SnapshotNode is a daemon that periodically wakes
 * up (determined by the schedule specified in the configuration),
 * triggers a periodic snapshot and then goes back to sleep.
 * The SnapshotNode uses the Namesystem's jetty server to 
 * retreive files.
 *
 **********************************************************/
public class SnapshotNode implements SnapshotProtocol {

    public static final Log LOG = LogFactory.getLog(SnapshotNode.class);

    public static final String CURRENT_DIR = "/current";
    public static String SSNAME = "dfs_snapshot_"; // prefix of ss files

    private Configuration conf; // conf

    private String fileServer; // jetty image server namenode listens on
    private FileSystem dfs; // file system

    private String tempDir; // temp dir to download files from namenode
    private String ssDir; // path to store snapshots in

    private Daemon purgeThread; //waiting room purger thread

    private ExecutorService leaseUpdateThreadPool;
    private int maxLeaseUpdateThreads;

    private Server server; // RPC Server
    private InetSocketAddress serverAddress = null; // RPC server address

    private NamenodeProtocol namenode;
    private InetSocketAddress nameNodeAddr;

    public SnapshotNode(Configuration conf) {
        try {
            this.conf = conf;
            init();
        } catch (IOException e) {
            LOG.error("Failed to start SnapshotNode");
            shutdown();
        }
    }

    /**
     * Initialize SnapshotNode
     * @throws IOException
     */
    private void init() throws IOException {
        ssDir = conf.get("fs.snapshot.dir", "/.SNAPSHOT");
        tempDir = conf.get("fs.snapshot.tempdir", "/tmp/snapshot");

        fileServer = getImageServer();
        dfs = FileSystem.get(conf);

        Path ssPath = new Path(ssDir);
        if (!dfs.exists(ssPath)) {
            dfs.mkdirs(ssPath);
        }

        maxLeaseUpdateThreads = conf.getInt("fs.snapshot.leaseupdatethreads", 100);

        // Waiting room purge thread
        purgeThread = new Daemon((new WaitingRoom(conf)).getPurger());
        purgeThread.start();

        // Get namenode rpc connection
        nameNodeAddr = NameNode.getAddress(conf);
        namenode = (NamenodeProtocol) RPC.waitForProxy(NamenodeProtocol.class, NamenodeProtocol.versionID,
                nameNodeAddr, conf);

        // Snapshot RPC Server
        InetSocketAddress socAddr = SnapshotNode.getAddress(conf);
        int handlerCount = conf.getInt("fs.snapshot.handler.count", 10);
        server = RPC.getServer(this, socAddr.getHostName(), socAddr.getPort(), handlerCount, false, conf);
        // The rpc-server port can be ephemeral... ensure we have the correct info
        serverAddress = server.getListenerAddress();
        LOG.info("SnapshotNode up at: " + serverAddress);

        server.start(); // start rpc server
    }

    private static InetSocketAddress getAddress(String address) {
        return NetUtils.createSocketAddr(address);
    }

    public static InetSocketAddress getAddress(Configuration conf) {
        String nodeport = conf.get("fs.snapshot.server.address");
        if (nodeport == null) {
            nodeport = "localhost:" + 60000; // DEFAULT PORT
        }
        return getAddress(nodeport);
    }

    @Override
    public long getProtocolVersion(String protocol, long clientVersion) throws IOException {
        if (protocol.equals(SnapshotProtocol.class.getName())) {
            return SnapshotProtocol.versionID;
        }

        throw new IOException("Unknown protocol to snapshot node: " + protocol);
    }

    @Override
    public ProtocolSignature getProtocolSignature(String protocol, long clientVersion, int clientMethodsHash)
            throws IOException {
        return ProtocolSignature.getProtocolSignature(this, protocol, clientVersion, clientMethodsHash);
    }

    void prepareDownloadDirs() throws IOException {
        // Check if temp dir exists
        File temp = new File(tempDir);
        if (!temp.exists())
            temp.mkdirs();
        if (!temp.isDirectory())
            throw new IOException("Temp Dir: " + tempDir + " is not a directory.");

        // Check if current dir in temp exists
        temp = new File(tempDir + CURRENT_DIR);
        if (!temp.exists())
            temp.mkdir();
        if (!temp.isDirectory())
            throw new IOException("Current in Temp Dir: " + tempDir + CURRENT_DIR + " is not a directory.");

        // Delete all previously downloaded files
        for (File f : temp.listFiles()) {
            f.delete();
        }
    }

    /**
     * Shutdown snapshot node and attached daemons
     */
    public void shutdown() {
        if (purgeThread != null) {
            WaitingRoomPurger purger = (WaitingRoomPurger) purgeThread.getRunnable();
            purger.shutdown();
        }

        RPC.stopProxy(namenode);
        if (server != null)
            server.stop();
    }

    /**
     * Shutdown snapshot node and attached daemons
     */
    public void shutdownWaitingRoomPurger() {
        if (purgeThread != null) {
            WaitingRoomPurger purger = (WaitingRoomPurger) purgeThread.getRunnable();
            purger.shutdown();
        }
    }

    // SNAPSHOT PROTOCOL //

    @Override
    public String[] listSnapshots() throws IOException {
        Path ssPath = new Path(ssDir);

        if (!dfs.exists(ssPath)) {
            throw new FileNotFoundException("Snapshot dir doesn't exist");
        }

        FileStatus ssStatus = dfs.getFileStatus(ssPath);
        if (!ssStatus.isDir()) {
            throw new IOException("ssDir " + ssDir + " is not a directory");
        }

        FileStatus[] files = dfs.listStatus(ssPath);
        List<String> ssIds = new ArrayList<String>();

        // Separate snapshot files
        for (FileStatus ss : files) {
            if (ss.isDir())
                continue; // skips dirs
            String name = ss.getPath().getName();
            if (!name.startsWith("dfs_snapshot_"))
                continue;
            ssIds.add(name.substring(13));
        }

        String[] rtn = new String[ssIds.size()];
        for (int i = 0; i < ssIds.size(); i++) {
            rtn[i] = ssIds.get(i);
        }

        return rtn;
    }

    @Override
    public FileStatus getSnapshotFileStatus(String id) throws IOException {
        Path ss = new Path(ssDir + "/" + SSNAME + id);
        return dfs.getFileStatus(ss);
    }

    @Override
    public boolean deleteSnapshot(String id) throws IOException {
        Path fileToDelete = new Path(ssDir + "/" + SSNAME + id);
        return dfs.delete(fileToDelete, false);
    }

    @Override
    public LocatedBlocksWithMetaInfo[] getLocatedBlocks(String snapshotId, String path) throws IOException {
        FSImage fsImage = new FSImage();
        FSNamesystem namesystem = new FSNamesystem(fsImage, conf);
        Path ssPath = new Path(ssDir + "/" + SSNAME + snapshotId);
        FSDataInputStream in = dfs.open(ssPath);
        fsImage.loadFSImage(new File(ssPath.toString()), in);
        INode inode = namesystem.dir.getInode(path);

        if (inode == null) {
            throw new IOException("File/dir at " + path + " does not exist in snapshot " + snapshotId);
        }

        List<LocatedBlocksWithMetaInfo> blocks = new ArrayList<LocatedBlocksWithMetaInfo>();
        getAllLocatedBlocks(inode, blocks); // fill blocks with LocatedBlocks for all files

        LocatedBlocksWithMetaInfo[] blocksArr = new LocatedBlocksWithMetaInfo[blocks.size()];
        for (int i = 0; i < blocksArr.length; ++i) {
            blocksArr[i] = blocks.get(i);
        }

        fsImage.close();
        return blocksArr;
    }

    @Override
    public void createSnapshot(String snapshotId, boolean updateLeases) throws IOException {
        // Create new SnapshotStore
        SnapshotStorage ssStore = new SnapshotStorage(conf, Util.stringAsURI(tempDir));

        // Download image & edit files from namenode
        downloadSnapshotFiles(ssStore);

        // Merge image and edit files
        doMerge(ssStore);

        // Update file lengths for leased files (optional)
        if (updateLeases) {
            updateLeasedFiles(ssStore);
        }

        // Save snapshot
        saveSnapshot(ssStore, snapshotId);
        ssStore.close();
    }

    private void getAllLocatedBlocks(INode inode, List<LocatedBlocksWithMetaInfo> blocks) throws IOException {
        if (inode.isDirectory()) {
            INodeDirectory dir = (INodeDirectory) inode;
            for (INode child : dir.getChildren()) {
                getAllLocatedBlocks(child, blocks);
            }
        } else {
            INodeFile file = (INodeFile) inode;
            BlockInfo[] fileBlocks = file.getBlocks();
            List<LocatedBlock> lb = new ArrayList<LocatedBlock>();
            for (BlockInfo block : fileBlocks) {
                // DatanodeInfo is unavailable, so set as empty for now
                lb.add(new LocatedBlock(block, new DatanodeInfo[0]));
            }

            LocatedBlocks locatedBlocks = new LocatedBlocks(file.computeContentSummary().getLength(), // flength
                    lb, // blks
                    false); // isUnderConstruction

            // Update DatanodeInfo from NN
            blocks.add(namenode.updateDatanodeInfo(locatedBlocks));
        }
    }

    void saveSnapshot(SnapshotStorage ssStore, String id) throws IOException {
        // Create new snapshot in temp file
        Path tmpPath = new Path("/tmp/" + SSNAME + id);
        FSDataOutputStream out = dfs.create(tmpPath);
        ssStore.saveSnapshot(tmpPath.toString(), out);
        out.close();

        // Rename snapshot
        Path ssPath = new Path(ssDir + "/" + SSNAME + id);
        if (!dfs.rename(tmpPath, ssPath)) {
            throw new IOException("Could not rename temp snapshot file");
        }
    }

    void doMerge(SnapshotStorage ssStore) throws IOException {
        FSNamesystem namesystem = new FSNamesystem(ssStore, conf);
        ssStore.doMerge();
    }

    /**
     * Create a snapshot with id equals to 
     * current system time.
     */
    void createSnapshot() throws IOException {
        createSnapshot(Long.toString(System.currentTimeMillis()), true);
    }

    void createSnapshot(String id) throws IOException {
        createSnapshot(id, true);
    }

    /**
     * Tries to get the most up to date lengths of files under construction.
     */
    void updateLeasedFiles(SnapshotStorage ssStore) throws IOException {
        FSNamesystem fsNamesys = ssStore.getFSNamesystem();
        List<Block> blocksForNN = new ArrayList<Block>();

        leaseUpdateThreadPool = new ThreadPoolExecutor(1, maxLeaseUpdateThreads, 60, TimeUnit.SECONDS,
                new LinkedBlockingQueue<Runnable>());
        ((ThreadPoolExecutor) leaseUpdateThreadPool).allowCoreThreadTimeOut(true);

        // Try to update lengths for leases from DN
        LightWeightLinkedSet<Lease> sortedLeases = fsNamesys.leaseManager.getSortedLeases();
        Iterator<Lease> itr = sortedLeases.iterator();
        while (itr.hasNext()) {
            Lease lease = itr.next();
            for (String path : lease.getPaths()) {
                // Update file lengths using worker threads to increase throughput
                leaseUpdateThreadPool.execute(new LeaseUpdateWorker(conf, path, fsNamesys, blocksForNN));
            }
        }

        try {
            leaseUpdateThreadPool.shutdown();
            // Wait till update tasks finish successfully (max 20 mins?)
            if (!leaseUpdateThreadPool.awaitTermination(1200, TimeUnit.SECONDS)) {
                throw new IOException("Updating lease files failed");
            }
        } catch (InterruptedException e) {
            throw new IOException("Snapshot creation interrupted while updating leased files");
        }

        // Fetch block lengths for renamed/deleted leases from NN
        long[] blockIds = new long[blocksForNN.size()];

        for (int i = 0; i < blocksForNN.size(); ++i) {
            blockIds[i] = blocksForNN.get(i).getBlockId();
        }

        long[] lengths = namenode.getBlockLengths(blockIds);

        for (int i = 0; i < blocksForNN.size(); ++i) {
            if (lengths[i] == -1) {
                // Couldn't update block length, keep preferred length
                LOG.error("Couldn't update length for block " + blocksForNN.get(i));
            } else {
                blocksForNN.get(i).setNumBytes(lengths[i]);
            }
        }
    }

    /**
     * Download fsimage, edits and edits.new files from the name-node.
     * Files will be downloaded in CURRENT_DIR
     * @throws IOException
     */
    void downloadSnapshotFiles(SnapshotStorage ssStore) throws IOException {
        CheckpointSignature start = namenode.getCheckpointSignature();
        ssStore.storage.setStorageInfo(start);
        CheckpointSignature end = null;
        boolean success;

        do {
            // Clear temp files
            prepareDownloadDirs();

            // get fsimage
            File[] srcNames = ssStore.getImageFiles();
            assert srcNames.length == 1 : "No snapshot temporary dir.";
            TransferFsImage.downloadImageToStorage(fileServer, HdfsConstants.INVALID_TXID, ssStore, true, srcNames);
            LOG.info("Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");

            // get edits file
            srcNames = ssStore.getEditsFiles();
            assert srcNames.length == 1 : "No snapshot temporary dir.";
            TransferFsImage.downloadEditsToStorage(fileServer, new RemoteEditLog(), ssStore, false);
            LOG.info("Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");

            // get edits.new file (only if in the middle of ckpt)
            try {
                srcNames = ssStore.getEditsNewFiles();
                assert srcNames.length == 1 : "No snapshot temporary dir.";
                TransferFsImage.downloadEditsToStorage(fileServer, new RemoteEditLog(), ssStore, true);
                LOG.info("Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");
            } catch (FileNotFoundException e) {
                // do nothing
            }

            end = namenode.getCheckpointSignature();

            // Are the downloaded files consistent?
            success = end.checkpointTime == start.checkpointTime
                    && end.checkpointState != CheckpointStates.UPLOAD_DONE;

            start = end;
        } while (!success);
    }

    /**
     * Returns the jetty image server that the Namenode is listening on.
     * @throws IOException
     */
    private String getImageServer() throws IOException {
        URI fsName = FileSystem.getDefaultUri(conf);

        if (!"hdfs".equals(fsName.getScheme())) {
            throw new IOException("This is not a DFS");
        }

        return NetUtils.getServerAddress(conf, "dfs.info.bindAddress", "dfs.info.port", "dfs.http.address");
    }

    static class SnapshotStorage extends FSImage {
        Configuration conf;
        File tempDir;
        DataOutputStream out;

        public SnapshotStorage(Configuration conf, URI tempDir) throws IOException {
            super(tempDir);
            this.conf = conf;
            this.tempDir = new File(tempDir.getPath());
        }

        /**
         * Merge image and edit log (in memory).
         * Files to merge include fsimage, edits, and possibly edits.new
         * @throws IOException
         */
        void doMerge() throws IOException {
            StorageDirectory sdTemp = null;
            Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE_AND_EDITS);
            if (it.hasNext()) {
                sdTemp = it.next();
            } else {
                throw new IOException("Could not locate snapshot temp directory.");
            }

            loadFSImage(NNStorage.getStorageFile(sdTemp, NameNodeFile.IMAGE));
            Collection<EditLogInputStream> editStreams = new ArrayList<EditLogInputStream>();
            EditLogInputStream is = new EditLogFileInputStream(
                    NNStorage.getStorageFile(sdTemp, NameNodeFile.EDITS));
            editStreams.add(is);
            File editsNew = NNStorage.getStorageFile(sdTemp, NameNodeFile.EDITS_NEW);
            if (editsNew.exists()) {
                is = new EditLogFileInputStream(editsNew);
                editStreams.add(is);
            }
            loadEdits(editStreams);
        }

        /** 
         * Writes snapshot to the OutputStream.
         * @param out Stream to write snapshot to
         */
        void saveSnapshot(String dest, DataOutputStream out) throws IOException {
            saveFSImage(dest, out);
        }
    }

    private class LeaseUpdateWorker implements Runnable {
        String path;
        Configuration conf;
        List<Block> blocks;
        FSNamesystem fsNamesys;

        public LeaseUpdateWorker(Configuration conf, String path, FSNamesystem namesystem, List<Block> blocks) {
            this.path = path;
            this.conf = conf;
            this.blocks = blocks;
            this.fsNamesys = namesystem;
        }

        @Override
        public void run() {
            boolean error = false;
            INodeFile node = null;
            DFSClient client = null;

            try {
                client = new DFSClient(conf);

                LOG.info("Trying to update lease for file at " + path);

                // verify that path exists in namespace
                node = fsNamesys.dir.getFileINode(path);
                if (node == null) {
                    error = true;
                }
                if (!node.isUnderConstruction()) {
                    error = true;
                }
            } catch (IOException e) {
                LOG.error(StringUtils.stringifyException(e));
                error = true;
            }

            // Could not find inode in FSNamespace, quit now
            if (error) {
                LOG.error("Couldn't update length for leased file at " + path + " because file not in namespace");
                return;
            }

            BlockInfo[] blks = node.getBlocks();

            // If NN has not leased out any block, return
            if (blks.length == 0)
                return;

            int index = blks.length - 1; // index of last file block

            LOG.info("Block at index " + index + " being written for file at  " + path);

            // Pessimistically update last block length from DataNode. 
            // File could have been renamed, and a new file created in its place.
            try {
                DFSInputStream stm = client.open(path);
                DFSLocatedBlocks locBlks = stm.fetchLocatedBlocks();

                if (locBlks.locatedBlockCount() >= blks.length) {
                    if (blks[index] != null && locBlks.get(index) != null) {
                        if (blks[index].getBlockId() == locBlks.get(index).getBlock().getBlockId()) {
                            blks[index].setNumBytes(locBlks.get(index).getBlock().getNumBytes());
                            return;
                        }
                    }
                }

                stm.close();
                client.close(); // close dfs client
            } catch (IOException e) {
                LOG.error(StringUtils.stringifyException(e));
            }

            // If file was renamed/deleted, set block length to preferred size
            // and add it to list of blocks which we should try to update from NN
            LOG.info("Couldn't update block " + blks[index] + " for file " + "at " + path
                    + " from DN. Setting length to preferred length "
                    + "and queuing block to be checked from NN for updated length.");
            blks[index].setNumBytes(node.getPreferredBlockSize());

            synchronized (blocks) {
                blocks.add(blks[index]);
            }
        }
    }
}