org.apache.hadoop.hbase.io.HFileLink.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.io.HFileLink.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.io;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.Pair;

/**
 * HFileLink describes a link to an hfile.
 *
 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
 * HFileLink allows to access the referenced hfile regardless of the location where it is.
 *
 * <p>Searches for hfiles in the following order and locations:
 * <ul>
 *  <li>/hbase/table/region/cf/hfile</li>
 *  <li>/hbase/.archive/table/region/cf/hfile</li>
 * </ul>
 *
 * The link checks first in the original path if it is not present
 * it fallbacks to the archived path.
 */
@InterfaceAudience.Private
public class HFileLink extends FileLink {
    private static final Log LOG = LogFactory.getLog(HFileLink.class);

    /**
     * A non-capture group, for HFileLink, so that this can be embedded.
     * The HFileLink describe a link to an hfile in a different table/region
     * and the name is in the form: table=region-hfile.
     * <p>
     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
     */
    public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s",
            TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
            HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);

    /** Define the HFile Link name parser in the form of: table=region-hfile */
    //made package private for testing
    static final Pattern LINK_NAME_PATTERN = Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
            TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
            HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));

    /**
     * The pattern should be used for hfile and reference links
     * that can be found in /hbase/table/region/family/
     */
    private static final Pattern REF_OR_HFILE_LINK_PATTERN = Pattern
            .compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX,
                    TableName.VALID_TABLE_QUALIFIER_REGEX, HRegionInfo.ENCODED_REGION_NAME_REGEX));

    private final Path archivePath;
    private final Path originPath;
    private final Path tempPath;

    /**
     * @param conf {@link Configuration} from which to extract specific archive locations
     * @param path The path of the HFile Link.
     * @throws IOException on unexpected error.
     */
    public HFileLink(Configuration conf, Path path) throws IOException {
        this(FSUtils.getRootDir(conf), HFileArchiveUtil.getArchivePath(conf), path);
    }

    /**
     * @param rootDir Path to the root directory where hbase files are stored
     * @param archiveDir Path to the hbase archive directory
     * @param path The path of the HFile Link.
     */
    public HFileLink(final Path rootDir, final Path archiveDir, final Path path) {
        Path hfilePath = getRelativeTablePath(path);
        this.tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
        this.originPath = new Path(rootDir, hfilePath);
        this.archivePath = new Path(archiveDir, hfilePath);
        setLocations(originPath, tempPath, archivePath);
    }

    /**
     * Create an HFileLink relative path for the table/region/family/hfile location
     * @param table Table name
     * @param region Region Name
     * @param family Family Name
     * @param hfile HFile Name
     * @return the relative Path to open the specified table/region/family/hfile link
     */
    public static Path createPath(final TableName table, final String region, final String family,
            final String hfile) {
        if (HFileLink.isHFileLink(hfile)) {
            return new Path(family, hfile);
        }
        return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
    }

    /**
     * Create an HFileLink instance from table/region/family/hfile location
     * @param conf {@link Configuration} from which to extract specific archive locations
     * @param table Table name
     * @param region Region Name
     * @param family Family Name
     * @param hfile HFile Name
     * @return Link to the file with the specified table/region/family/hfile location
     * @throws IOException on unexpected error.
     */
    public static HFileLink create(final Configuration conf, final TableName table, final String region,
            final String family, final String hfile) throws IOException {
        return new HFileLink(conf, createPath(table, region, family, hfile));
    }

    /**
     * @return the origin path of the hfile.
     */
    public Path getOriginPath() {
        return this.originPath;
    }

    /**
     * @return the path of the archived hfile.
     */
    public Path getArchivePath() {
        return this.archivePath;
    }

    /**
     * @param path Path to check.
     * @return True if the path is a HFileLink.
     */
    public static boolean isHFileLink(final Path path) {
        return isHFileLink(path.getName());
    }

    /**
     * @param fileName File name to check.
     * @return True if the path is a HFileLink.
     */
    public static boolean isHFileLink(String fileName) {
        Matcher m = LINK_NAME_PATTERN.matcher(fileName);
        if (!m.matches())
            return false;
        return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
    }

    /**
     * Convert a HFileLink path to a table relative path.
     * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
     *      becomes: /hbase/testtb/4567/cf/abcd
     *
     * @param path HFileLink path
     * @return Relative table path
     * @throws IOException on unexpected error.
     */
    private static Path getRelativeTablePath(final Path path) {
        // table=region-hfile
        Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
        if (!m.matches()) {
            throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink name!");
        }

        // Convert the HFileLink name into a real table/region/cf/hfile path.
        TableName tableName = TableName.valueOf(m.group(1), m.group(2));
        String regionName = m.group(3);
        String hfileName = m.group(4);
        String familyName = path.getParent().getName();
        Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
        return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName)));
    }

    /**
     * Get the HFile name of the referenced link
     *
     * @param fileName HFileLink file name
     * @return the name of the referenced HFile
     */
    public static String getReferencedHFileName(final String fileName) {
        Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
        if (!m.matches()) {
            throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
        }
        return (m.group(4));
    }

    /**
     * Get the Region name of the referenced link
     *
     * @param fileName HFileLink file name
     * @return the name of the referenced Region
     */
    public static String getReferencedRegionName(final String fileName) {
        Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
        if (!m.matches()) {
            throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
        }
        return (m.group(3));
    }

    /**
     * Get the Table name of the referenced link
     *
     * @param fileName HFileLink file name
     * @return the name of the referenced Table
     */
    public static TableName getReferencedTableName(final String fileName) {
        Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
        if (!m.matches()) {
            throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
        }
        return (TableName.valueOf(m.group(1), m.group(2)));
    }

    /**
     * Returns true if the HFileLink exists
     */
    public boolean exists(final FileSystem fs) throws IOException {
        return fs.exists(this.originPath) || fs.exists(this.tempPath) || fs.exists(this.archivePath);
    }

    /**
     * Create a new HFileLink name
     *
     * @param hfileRegionInfo - Linked HFile Region Info
     * @param hfileName - Linked HFile name
     * @return file name of the HFile Link
     */
    public static String createHFileLinkName(final HRegionInfo hfileRegionInfo, final String hfileName) {
        return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(), hfileName);
    }

    /**
     * Create a new HFileLink name
     *
     * @param tableName - Linked HFile table name
     * @param regionName - Linked HFile region name
     * @param hfileName - Linked HFile name
     * @return file name of the HFile Link
     */
    public static String createHFileLinkName(final TableName tableName, final String regionName,
            final String hfileName) {
        String s = String.format("%s=%s-%s", tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
                regionName, hfileName);
        return s;
    }

    /**
     * Create a new HFileLink
     *
     * <p>It also adds a back-reference to the hfile back-reference directory
     * to simplify the reference-count and the cleaning process.
     *
     * @param conf {@link Configuration} to read for the archive directory name
     * @param fs {@link FileSystem} on which to write the HFileLink
     * @param dstFamilyPath - Destination path (table/region/cf/)
     * @param hfileRegionInfo - Linked HFile Region Info
     * @param hfileName - Linked HFile name
     * @return true if the file is created, otherwise the file exists.
     * @throws IOException on file or parent directory creation failure
     */
    public static boolean create(final Configuration conf, final FileSystem fs, final Path dstFamilyPath,
            final HRegionInfo hfileRegionInfo, final String hfileName) throws IOException {
        TableName linkedTable = hfileRegionInfo.getTable();
        String linkedRegion = hfileRegionInfo.getEncodedName();
        return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName);
    }

    /**
     * Create a new HFileLink
     *
     * <p>It also adds a back-reference to the hfile back-reference directory
     * to simplify the reference-count and the cleaning process.
     *
     * @param conf {@link Configuration} to read for the archive directory name
     * @param fs {@link FileSystem} on which to write the HFileLink
     * @param dstFamilyPath - Destination path (table/region/cf/)
     * @param linkedTable - Linked Table Name
     * @param linkedRegion - Linked Region Name
     * @param hfileName - Linked HFile name
     * @return true if the file is created, otherwise the file exists.
     * @throws IOException on file or parent directory creation failure
     */
    public static boolean create(final Configuration conf, final FileSystem fs, final Path dstFamilyPath,
            final TableName linkedTable, final String linkedRegion, final String hfileName) throws IOException {
        String familyName = dstFamilyPath.getName();
        String regionName = dstFamilyPath.getParent().getName();
        String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent()).getNameAsString();

        String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
        String refName = createBackReferenceName(tableName, regionName);

        // Make sure the destination directory exists
        fs.mkdirs(dstFamilyPath);

        // Make sure the FileLink reference directory exists
        Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, linkedTable, linkedRegion, familyName);
        Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
        fs.mkdirs(backRefssDir);

        // Create the reference for the link
        Path backRefPath = new Path(backRefssDir, refName);
        fs.createNewFile(backRefPath);
        try {
            // Create the link
            return fs.createNewFile(new Path(dstFamilyPath, name));
        } catch (IOException e) {
            LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
            // Revert the reference if the link creation failed
            fs.delete(backRefPath, false);
            throw e;
        }
    }

    /**
     * Create a new HFileLink starting from a hfileLink name
     *
     * <p>It also adds a back-reference to the hfile back-reference directory
     * to simplify the reference-count and the cleaning process.
     *
     * @param conf {@link Configuration} to read for the archive directory name
     * @param fs {@link FileSystem} on which to write the HFileLink
     * @param dstFamilyPath - Destination path (table/region/cf/)
     * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
     * @return true if the file is created, otherwise the file exists.
     * @throws IOException on file or parent directory creation failure
     */
    public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
            final Path dstFamilyPath, final String hfileLinkName) throws IOException {
        Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
        if (!m.matches()) {
            throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
        }
        return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), m.group(3), m.group(4));
    }

    /**
     * Create the back reference name
     */
    //package-private for testing
    static String createBackReferenceName(final String tableNameStr, final String regionName) {

        return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
    }

    /**
     * Get the full path of the HFile referenced by the back reference
     *
     * @param rootDir root hbase directory
     * @param linkRefPath Link Back Reference path
     * @return full path of the referenced hfile
     * @throws IOException on unexpected error.
     */
    public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
        Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
        TableName linkTableName = p.getFirst();
        String linkRegionName = p.getSecond();

        String hfileName = getBackReferenceFileName(linkRefPath.getParent());
        Path familyPath = linkRefPath.getParent().getParent();
        Path regionPath = familyPath.getParent();
        Path tablePath = regionPath.getParent();

        String linkName = createHFileLinkName(FSUtils.getTableName(tablePath), regionPath.getName(), hfileName);
        Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
        Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
        return new Path(new Path(regionDir, familyPath.getName()), linkName);
    }

    static Pair<TableName, String> parseBackReferenceName(String name) {
        int separatorIndex = name.indexOf('.');
        String linkRegionName = name.substring(0, separatorIndex);
        String tableSubstr = name.substring(separatorIndex + 1).replace('=', TableName.NAMESPACE_DELIM);
        TableName linkTableName = TableName.valueOf(tableSubstr);
        return new Pair<TableName, String>(linkTableName, linkRegionName);
    }

    /**
     * Get the full path of the HFile referenced by the back reference
     *
     * @param conf {@link Configuration} to read for the archive directory name
     * @param linkRefPath Link Back Reference path
     * @return full path of the referenced hfile
     * @throws IOException on unexpected error.
     */
    public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
            throws IOException {
        return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
    }

}