org.apache.hadoop.hbase.backup.util.RestoreServerUtil.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.backup.util.RestoreServerUtil.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.backup.util;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupRestoreServerFactory;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
import org.apache.hadoop.hbase.backup.IncrementalRestoreService;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.util.Bytes;

/**
 * A collection for methods used by multiple classes to restore HBase tables.
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class RestoreServerUtil {

    public static final Log LOG = LogFactory.getLog(RestoreServerUtil.class);

    private final String[] ignoreDirs = { "recovered.edits" };

    protected Configuration conf = null;

    protected Path backupRootPath;

    protected String backupId;

    protected FileSystem fs;
    private final String RESTORE_TMP_PATH = "/tmp";
    private final Path restoreTmpPath;

    // store table name and snapshot dir mapping
    private final HashMap<TableName, Path> snapshotMap = new HashMap<>();

    public RestoreServerUtil(Configuration conf, final Path backupRootPath, final String backupId)
            throws IOException {
        this.conf = conf;
        this.backupRootPath = backupRootPath;
        this.backupId = backupId;
        this.fs = backupRootPath.getFileSystem(conf);
        this.restoreTmpPath = new Path(
                conf.get("hbase.fs.tmp.dir") != null ? conf.get("hbase.fs.tmp.dir") : RESTORE_TMP_PATH, "restore");
    }

    /**
     * return value represent path for:
     * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/archive/data/default/t1_dn"
     * @param tabelName table name
     * @return path to table archive
     * @throws IOException exception
     */
    Path getTableArchivePath(TableName tableName) throws IOException {
        Path baseDir = new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
                HConstants.HFILE_ARCHIVE_DIRECTORY);
        Path dataDir = new Path(baseDir, HConstants.BASE_NAMESPACE_DIR);
        Path archivePath = new Path(dataDir, tableName.getNamespaceAsString());
        Path tableArchivePath = new Path(archivePath, tableName.getQualifierAsString());
        if (!fs.exists(tableArchivePath) || !fs.getFileStatus(tableArchivePath).isDirectory()) {
            LOG.debug("Folder tableArchivePath: " + tableArchivePath.toString() + " does not exists");
            tableArchivePath = null; // empty table has no archive
        }
        return tableArchivePath;
    }

    /**
     * Gets region list
     * @param tableName table name
     * @return RegionList region list
     * @throws FileNotFoundException exception
     * @throws IOException exception
     */
    ArrayList<Path> getRegionList(TableName tableName) throws FileNotFoundException, IOException {
        Path tableArchivePath = this.getTableArchivePath(tableName);
        ArrayList<Path> regionDirList = new ArrayList<Path>();
        FileStatus[] children = fs.listStatus(tableArchivePath);
        for (FileStatus childStatus : children) {
            // here child refer to each region(Name)
            Path child = childStatus.getPath();
            regionDirList.add(child);
        }
        return regionDirList;
    }

    /**
     * During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently
     * tableNames and newTablesNames only contain single table, will be expanded to multiple tables in
     * the future
     * @param logDir : incremental backup folders, which contains WAL
     * @param tableNames : source tableNames(table names were backuped)
     * @param newTableNames : target tableNames(table names to be restored to)
     * @throws IOException exception
     */
    public void incrementalRestoreTable(Path[] logDirs, TableName[] tableNames, TableName[] newTableNames)
            throws IOException {

        if (tableNames.length != newTableNames.length) {
            throw new IOException("Number of source tables and target tables does not match!");
        }

        // for incremental backup image, expect the table already created either by user or previous
        // full backup. Here, check that all new tables exists
        try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin()) {
            for (TableName tableName : newTableNames) {
                if (!admin.tableExists(tableName)) {
                    admin.close();
                    throw new IOException("HBase table " + tableName
                            + " does not exist. Create the table first, e.g. by restoring a full backup.");
                }
            }
            IncrementalRestoreService restoreService = BackupRestoreServerFactory
                    .getIncrementalRestoreService(conf);

            restoreService.run(logDirs, tableNames, newTableNames);
        }
    }

    public void fullRestoreTable(Path tableBackupPath, TableName tableName, TableName newTableName,
            boolean converted, boolean truncateIfExists) throws IOException {
        restoreTableAndCreate(tableName, newTableName, tableBackupPath, converted, truncateIfExists);
    }

    /**
     * return value represent path for:
     * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/.hbase-snapshot"
     * @param backupRootPath backup root path
     * @param tableName table name
     * @param backupId backup Id
     * @return path for snapshot
     */
    static Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) {
        return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
                HConstants.SNAPSHOT_DIR_NAME);
    }

    /**
     * return value represent path for:
     * "..../default/t1_dn/backup_1396650096738/.hbase-snapshot/snapshot_1396650097621_default_t1_dn"
     * this path contains .snapshotinfo, .tabledesc (0.96 and 0.98) this path contains .snapshotinfo,
     * .data.manifest (trunk)
     * @param tableName table name
     * @return path to table info
     * @throws FileNotFoundException exception
     * @throws IOException exception
     */
    Path getTableInfoPath(TableName tableName) throws FileNotFoundException, IOException {
        Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
        Path tableInfoPath = null;

        // can't build the path directly as the timestamp values are different
        FileStatus[] snapshots = fs.listStatus(tableSnapShotPath);
        for (FileStatus snapshot : snapshots) {
            tableInfoPath = snapshot.getPath();
            // SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest";
            if (tableInfoPath.getName().endsWith("data.manifest")) {
                break;
            }
        }
        return tableInfoPath;
    }

    /**
     * @param tableName is the table backed up
     * @return {@link HTableDescriptor} saved in backup image of the table
     */
    HTableDescriptor getTableDesc(TableName tableName) throws FileNotFoundException, IOException {
        Path tableInfoPath = this.getTableInfoPath(tableName);
        SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath);
        SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc);
        HTableDescriptor tableDescriptor = manifest.getTableDescriptor();
        if (!tableDescriptor.getTableName().equals(tableName)) {
            LOG.error("couldn't find Table Desc for table: " + tableName + " under tableInfoPath: "
                    + tableInfoPath.toString());
            LOG.error("tableDescriptor.getNameAsString() = " + tableDescriptor.getNameAsString());
            throw new FileNotFoundException("couldn't find Table Desc for table: " + tableName
                    + " under tableInfoPath: " + tableInfoPath.toString());
        }
        return tableDescriptor;
    }

    /**
     * Duplicate the backup image if it's on local cluster
     * @see HStore#bulkLoadHFile(String, long)
     * @see HRegionFileSystem#bulkLoadStoreFile(String familyName, Path srcPath, long seqNum)
     * @param tableArchivePath archive path
     * @return the new tableArchivePath
     * @throws IOException exception
     */
    Path checkLocalAndBackup(Path tableArchivePath) throws IOException {
        // Move the file if it's on local cluster
        boolean isCopyNeeded = false;

        FileSystem srcFs = tableArchivePath.getFileSystem(conf);
        FileSystem desFs = FileSystem.get(conf);
        if (tableArchivePath.getName().startsWith("/")) {
            isCopyNeeded = true;
        } else {
            // This should match what is done in @see HRegionFileSystem#bulkLoadStoreFile(String, Path,
            // long)
            if (srcFs.getUri().equals(desFs.getUri())) {
                LOG.debug("cluster hold the backup image: " + srcFs.getUri() + "; local cluster node: "
                        + desFs.getUri());
                isCopyNeeded = true;
            }
        }
        if (isCopyNeeded) {
            LOG.debug("File " + tableArchivePath + " on local cluster, back it up before restore");
            if (desFs.exists(restoreTmpPath)) {
                try {
                    desFs.delete(restoreTmpPath, true);
                } catch (IOException e) {
                    LOG.debug("Failed to delete path: " + restoreTmpPath
                            + ", need to check whether restore target DFS cluster is healthy");
                }
            }
            FileUtil.copy(srcFs, tableArchivePath, desFs, restoreTmpPath, false, conf);
            LOG.debug("Copied to temporary path on local cluster: " + restoreTmpPath);
            tableArchivePath = restoreTmpPath;
        }
        return tableArchivePath;
    }

    private void restoreTableAndCreate(TableName tableName, TableName newTableName, Path tableBackupPath,
            boolean converted, boolean truncateIfExists) throws IOException {
        if (newTableName == null || newTableName.equals("")) {
            newTableName = tableName;
        }

        FileSystem fileSys = tableBackupPath.getFileSystem(this.conf);

        // get table descriptor first
        HTableDescriptor tableDescriptor = null;

        Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);

        if (fileSys.exists(tableSnapshotPath)) {
            // snapshot path exist means the backup path is in HDFS
            // check whether snapshot dir already recorded for target table
            if (snapshotMap.get(tableName) != null) {
                SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath);
                SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc);
                tableDescriptor = manifest.getTableDescriptor();
            } else {
                tableDescriptor = getTableDesc(tableName);
                snapshotMap.put(tableName, getTableInfoPath(tableName));
            }
            if (tableDescriptor == null) {
                LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost");
            }
        } else if (converted) {
            // first check if this is a converted backup image
            LOG.error("convert will be supported in a future jira");
        }

        Path tableArchivePath = getTableArchivePath(tableName);
        if (tableArchivePath == null) {
            if (tableDescriptor != null) {
                // find table descriptor but no archive dir means the table is empty, create table and exit
                if (LOG.isDebugEnabled()) {
                    LOG.debug("find table descriptor but no archive dir for table " + tableName
                            + ", will only create table");
                }
                tableDescriptor.setName(newTableName);
                checkAndCreateTable(tableBackupPath, tableName, newTableName, null, tableDescriptor,
                        truncateIfExists);
                return;
            } else {
                throw new IllegalStateException(
                        "Cannot restore hbase table because directory '" + " tableArchivePath is null.");
            }
        }

        if (tableDescriptor == null) {
            tableDescriptor = new HTableDescriptor(newTableName);
        } else {
            tableDescriptor.setName(newTableName);
        }

        if (!converted) {
            // record all region dirs:
            // load all files in dir
            try {
                ArrayList<Path> regionPathList = getRegionList(tableName);

                // should only try to create the table with all region informations, so we could pre-split
                // the regions in fine grain
                checkAndCreateTable(tableBackupPath, tableName, newTableName, regionPathList, tableDescriptor,
                        truncateIfExists);
                if (tableArchivePath != null) {
                    // start real restore through bulkload
                    // if the backup target is on local cluster, special action needed
                    Path tempTableArchivePath = checkLocalAndBackup(tableArchivePath);
                    if (tempTableArchivePath.equals(tableArchivePath)) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("TableArchivePath for bulkload using existPath: " + tableArchivePath);
                        }
                    } else {
                        regionPathList = getRegionList(tempTableArchivePath); // point to the tempDir
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("TableArchivePath for bulkload using tempPath: " + tempTableArchivePath);
                        }
                    }

                    LoadIncrementalHFiles loader = createLoader(tempTableArchivePath, false);
                    for (Path regionPath : regionPathList) {
                        String regionName = regionPath.toString();
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Restoring HFiles from directory " + regionName);
                        }
                        String[] args = { regionName, newTableName.getNameAsString() };
                        loader.run(args);
                    }
                }
                // we do not recovered edits
            } catch (Exception e) {
                throw new IllegalStateException("Cannot restore hbase table", e);
            }
        } else {
            LOG.debug("convert will be supported in a future jira");
        }
    }

    /**
     * Gets region list
     * @param tableArchivePath table archive path
     * @return RegionList region list
     * @throws FileNotFoundException exception
     * @throws IOException exception
     */
    ArrayList<Path> getRegionList(Path tableArchivePath) throws FileNotFoundException, IOException {
        ArrayList<Path> regionDirList = new ArrayList<Path>();
        FileStatus[] children = fs.listStatus(tableArchivePath);
        for (FileStatus childStatus : children) {
            // here child refer to each region(Name)
            Path child = childStatus.getPath();
            regionDirList.add(child);
        }
        return regionDirList;
    }

    /**
     * Counts the number of files in all subdirectories of an HBase table, i.e. HFiles.
     * @param regionPath Path to an HBase table directory
     * @return the number of files all directories
     * @throws IOException exception
     */
    int getNumberOfFilesInDir(Path regionPath) throws IOException {
        int result = 0;

        if (!fs.exists(regionPath) || !fs.getFileStatus(regionPath).isDirectory()) {
            throw new IllegalStateException("Cannot restore hbase table because directory '" + regionPath.toString()
                    + "' is not a directory.");
        }

        FileStatus[] tableDirContent = fs.listStatus(regionPath);
        for (FileStatus subDirStatus : tableDirContent) {
            FileStatus[] colFamilies = fs.listStatus(subDirStatus.getPath());
            for (FileStatus colFamilyStatus : colFamilies) {
                FileStatus[] colFamilyContent = fs.listStatus(colFamilyStatus.getPath());
                result += colFamilyContent.length;
            }
        }
        return result;
    }

    /**
     * Counts the number of files in all subdirectories of an HBase tables, i.e. HFiles. And finds the
     * maximum number of files in one HBase table.
     * @param tableArchivePath archive path
     * @return the maximum number of files found in 1 HBase table
     * @throws IOException exception
     */
    int getMaxNumberOfFilesInSubDir(Path tableArchivePath) throws IOException {
        int result = 1;
        ArrayList<Path> regionPathList = getRegionList(tableArchivePath);
        // tableArchivePath = this.getTableArchivePath(tableName);

        if (regionPathList == null || regionPathList.size() == 0) {
            throw new IllegalStateException(
                    "Cannot restore hbase table because directory '" + tableArchivePath + "' is not a directory.");
        }

        for (Path regionPath : regionPathList) {
            result = Math.max(result, getNumberOfFilesInDir(regionPath));
        }
        return result;
    }

    /**
     * Create a {@link LoadIncrementalHFiles} instance to be used to restore the HFiles of a full
     * backup.
     * @return the {@link LoadIncrementalHFiles} instance
     * @throws IOException exception
     */
    private LoadIncrementalHFiles createLoader(Path tableArchivePath, boolean multipleTables) throws IOException {
        // set configuration for restore:
        // LoadIncrementalHFile needs more time
        // <name>hbase.rpc.timeout</name> <value>600000</value>
        // calculates
        Integer milliSecInMin = 60000;
        Integer previousMillis = this.conf.getInt("hbase.rpc.timeout", 0);
        Integer numberOfFilesInDir = multipleTables ? getMaxNumberOfFilesInSubDir(tableArchivePath)
                : getNumberOfFilesInDir(tableArchivePath);
        Integer calculatedMillis = numberOfFilesInDir * milliSecInMin; // 1 minute per file
        Integer resultMillis = Math.max(calculatedMillis, previousMillis);
        if (resultMillis > previousMillis) {
            LOG.info("Setting configuration for restore with LoadIncrementalHFile: " + "hbase.rpc.timeout to "
                    + calculatedMillis / milliSecInMin + " minutes, to handle the number of files in backup "
                    + tableArchivePath);
            this.conf.setInt("hbase.rpc.timeout", resultMillis);
        }

        // By default, it is 32 and loader will fail if # of files in any region exceed this
        // limit. Bad for snapshot restore.
        this.conf.setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, Integer.MAX_VALUE);
        LoadIncrementalHFiles loader = null;
        try {
            loader = new LoadIncrementalHFiles(this.conf);
        } catch (Exception e1) {
            throw new IOException(e1);
        }
        return loader;
    }

    /**
     * Calculate region boundaries and add all the column families to the table descriptor
     * @param regionDirList region dir list
     * @return a set of keys to store the boundaries
     */
    byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws FileNotFoundException, IOException {
        TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
        // Build a set of keys to store the boundaries
        byte[][] keys = null;
        // calculate region boundaries and add all the column families to the table descriptor
        for (Path regionDir : regionDirList) {
            LOG.debug("Parsing region dir: " + regionDir);
            Path hfofDir = regionDir;

            if (!fs.exists(hfofDir)) {
                LOG.warn("HFileOutputFormat dir " + hfofDir + " not found");
            }

            FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
            if (familyDirStatuses == null) {
                throw new IOException("No families found in " + hfofDir);
            }

            for (FileStatus stat : familyDirStatuses) {
                if (!stat.isDirectory()) {
                    LOG.warn("Skipping non-directory " + stat.getPath());
                    continue;
                }
                boolean isIgnore = false;
                String pathName = stat.getPath().getName();
                for (String ignore : ignoreDirs) {
                    if (pathName.contains(ignore)) {
                        LOG.warn("Skipping non-family directory" + pathName);
                        isIgnore = true;
                        break;
                    }
                }
                if (isIgnore) {
                    continue;
                }
                Path familyDir = stat.getPath();
                LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]");
                // Skip _logs, etc
                if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) {
                    continue;
                }

                // start to parse hfile inside one family dir
                Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
                for (Path hfile : hfiles) {
                    if (hfile.getName().startsWith("_") || hfile.getName().startsWith(".")
                            || StoreFileInfo.isReference(hfile.getName())
                            || HFileLink.isHFileLink(hfile.getName())) {
                        continue;
                    }
                    HFile.Reader reader = HFile.createReader(fs, hfile, new CacheConfig(conf), conf);
                    final byte[] first, last;
                    try {
                        reader.loadFileInfo();
                        first = reader.getFirstRowKey();
                        last = reader.getLastRowKey();
                        LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first="
                                + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last));

                        // To eventually infer start key-end key boundaries
                        Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0;
                        map.put(first, value + 1);
                        value = map.containsKey(last) ? (Integer) map.get(last) : 0;
                        map.put(last, value - 1);
                    } finally {
                        reader.close();
                    }
                }
            }
        }
        keys = LoadIncrementalHFiles.inferBoundaries(map);
        return keys;
    }

    /**
     * Prepare the table for bulkload, most codes copied from
     * {@link LoadIncrementalHFiles#createTable(String, String)}
     * @param tableBackupPath path
     * @param tableName table name
     * @param targetTableName target table name
     * @param regionDirList region directory list
     * @param htd table descriptor
     * @throws IOException exception
     */
    private void checkAndCreateTable(Path tableBackupPath, TableName tableName, TableName targetTableName,
            ArrayList<Path> regionDirList, HTableDescriptor htd, boolean truncateIfExists) throws IOException {
        HBaseAdmin hbadmin = null;
        Connection conn = null;
        try {
            conn = ConnectionFactory.createConnection(conf);
            hbadmin = (HBaseAdmin) conn.getAdmin();
            boolean createNew = false;
            if (hbadmin.tableExists(targetTableName)) {
                if (truncateIfExists) {
                    LOG.info("Truncating exising target table '" + targetTableName + "', preserving region splits");
                    hbadmin.disableTable(targetTableName);
                    hbadmin.truncateTable(targetTableName, true);
                } else {
                    LOG.info("Using exising target table '" + targetTableName + "'");
                }
            } else {
                createNew = true;
            }
            if (createNew) {
                LOG.info("Creating target table '" + targetTableName + "'");
                // if no region directory given, create the table and return
                if (regionDirList == null || regionDirList.size() == 0) {
                    hbadmin.createTable(htd);
                    return;
                }
                byte[][] keys = generateBoundaryKeys(regionDirList);
                // create table using table descriptor and region boundaries
                hbadmin.createTable(htd, keys);
            }
        } catch (Exception e) {
            throw new IOException(e);
        } finally {
            if (hbadmin != null) {
                hbadmin.close();
            }
            if (conn != null) {
                conn.close();
            }
        }
    }

}