org.apache.hadoop.hbase.regionserver.StoreFileInfo.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.regionserver.StoreFileInfo.java

Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.regionserver;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.HalfStoreFileReader;
import org.apache.hadoop.hbase.io.Reference;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.util.FSUtils;

/**
 * Describe a StoreFile (hfile, reference, link)
 */
@InterfaceAudience.Private
public class StoreFileInfo {
    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);

    /**
     * A non-capture group, for hfiles, so that this can be embedded.
     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
     */
    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";

    /** Regex that will work for hfiles */
    private static final Pattern HFILE_NAME_PATTERN = Pattern.compile("^(" + HFILE_NAME_REGEX + ")");

    /**
     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
     * If reference, then the regex has more than just one group.
     * Group 1, hfile/hfilelink pattern, is this file's id.
     * Group 2 '(.+)' is the reference's parent region name.
     */
    private static final Pattern REF_NAME_PATTERN = Pattern
            .compile(String.format("^(%s|%s)\\.(.+)$", HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));

    // Configuration
    private Configuration conf;

    // HDFS blocks distribution information
    private HDFSBlocksDistribution hdfsBlocksDistribution = null;

    // If this storefile references another, this is the reference instance.
    private final Reference reference;

    // If this storefile is a link to another, this is the link instance.
    private final HFileLink link;

    // FileSystem information for the file.
    private final FileStatus fileStatus;

    private RegionCoprocessorHost coprocessorHost;

    /**
     * Create a Store File Info
     * @param conf the {@link Configuration} to use
     * @param fs The current file system to use.
     * @param path The {@link Path} of the file
     */
    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path) throws IOException {
        this(conf, fs, fs.getFileStatus(path));
    }

    /**
     * Create a Store File Info
     * @param conf the {@link Configuration} to use
     * @param fs The current file system to use.
     * @param fileStatus The {@link FileStatus} of the file
     */
    public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
            throws IOException {
        this.conf = conf;
        this.fileStatus = fileStatus;
        Path p = fileStatus.getPath();
        if (HFileLink.isHFileLink(p)) {
            // HFileLink
            this.reference = null;
            this.link = new HFileLink(conf, p);
            if (LOG.isTraceEnabled())
                LOG.trace(p + " is a link");
        } else if (isReference(p)) {
            this.reference = Reference.read(fs, p);
            Path referencePath = getReferredToFile(p);
            if (HFileLink.isHFileLink(referencePath)) {
                // HFileLink Reference
                this.link = new HFileLink(conf, referencePath);
            } else {
                // Reference
                this.link = null;
            }
            if (LOG.isTraceEnabled())
                LOG.trace(p + " is a " + reference.getFileRegion() + " reference to " + referencePath);
        } else if (isHFile(p)) {
            // HFile
            this.reference = null;
            this.link = null;
        } else {
            throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
        }
    }

    /**
     * Sets the region coprocessor env.
     * @param coprocessorHost
     */
    public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
        this.coprocessorHost = coprocessorHost;
    }

    /*
     * @return the Reference object associated to this StoreFileInfo.
     *         null if the StoreFile is not a reference.
     */
    public Reference getReference() {
        return this.reference;
    }

    /** @return True if the store file is a Reference */
    public boolean isReference() {
        return this.reference != null;
    }

    /** @return True if the store file is a top Reference */
    public boolean isTopReference() {
        return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
    }

    /** @return True if the store file is a link */
    public boolean isLink() {
        return this.link != null && this.reference == null;
    }

    /** @return the HDFS block distribution */
    public HDFSBlocksDistribution getHDFSBlockDistribution() {
        return this.hdfsBlocksDistribution;
    }

    /**
     * Open a Reader for the StoreFile
     * @param fs The current file system to use.
     * @param cacheConf The cache configuration and block cache reference.
     * @return The StoreFile.Reader for the file
     */
    public StoreFile.Reader open(final FileSystem fs, final CacheConfig cacheConf) throws IOException {
        FSDataInputStreamWrapper in;
        FileStatus status;

        if (this.link != null) {
            // HFileLink
            in = new FSDataInputStreamWrapper(fs, this.link);
            status = this.link.getFileStatus(fs);
        } else if (this.reference != null) {
            // HFile Reference
            Path referencePath = getReferredToFile(this.getPath());
            in = new FSDataInputStreamWrapper(fs, referencePath);
            status = fs.getFileStatus(referencePath);
        } else {
            in = new FSDataInputStreamWrapper(fs, this.getPath());
            status = fileStatus;
        }
        long length = status.getLen();
        if (this.reference != null) {
            hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution(fs, reference, status);
        } else {
            hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length);
        }
        StoreFile.Reader reader = null;
        if (this.coprocessorHost != null) {
            reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length, cacheConf,
                    reference);
        }
        if (reader == null) {
            if (this.reference != null) {
                reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference, conf);
            } else {
                reader = new StoreFile.Reader(fs, this.getPath(), in, length, cacheConf, conf);
            }
        }
        if (this.coprocessorHost != null) {
            reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length, cacheConf,
                    reference, reader);
        }
        return reader;
    }

    /**
     * Compute the HDFS Block Distribution for this StoreFile
     */
    public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) throws IOException {
        FileStatus status = getReferencedFileStatus(fs);
        if (this.reference != null) {
            return computeRefFileHDFSBlockDistribution(fs, reference, status);
        } else {
            return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
        }
    }

    /**
     * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
     * @param fs The current file system to use.
     * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
     */
    public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
        FileStatus status;
        if (this.reference != null) {
            if (this.link != null) {
                // HFileLink Reference
                status = link.getFileStatus(fs);
            } else {
                // HFile Reference
                Path referencePath = getReferredToFile(this.getPath());
                status = fs.getFileStatus(referencePath);
            }
        } else {
            if (this.link != null) {
                // HFileLink
                status = link.getFileStatus(fs);
            } else {
                status = this.fileStatus;
            }
        }
        return status;
    }

    /** @return The {@link Path} of the file */
    public Path getPath() {
        return this.fileStatus.getPath();
    }

    /** @return The {@link FileStatus} of the file */
    public FileStatus getFileStatus() {
        return this.fileStatus;
    }

    /** @return Get the modification time of the file. */
    public long getModificationTime() {
        return this.fileStatus.getModificationTime();
    }

    @Override
    public String toString() {
        return this.getPath() + (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
    }

    /**
     * @param path Path to check.
     * @return True if the path has format of a HFile.
     */
    public static boolean isHFile(final Path path) {
        return isHFile(path.getName());
    }

    public static boolean isHFile(final String fileName) {
        Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
        return m.matches() && m.groupCount() > 0;
    }

    /**
     * @param path Path to check.
     * @return True if the path has format of a HStoreFile reference.
     */
    public static boolean isReference(final Path path) {
        return isReference(path.getName());
    }

    /**
     * @param name file name to check.
     * @return True if the path has format of a HStoreFile reference.
     */
    public static boolean isReference(final String name) {
        Matcher m = REF_NAME_PATTERN.matcher(name);
        return m.matches() && m.groupCount() > 1;
    }

    /*
     * Return path to the file referred to by a Reference.  Presumes a directory
     * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
     * @param p Path to a Reference file.
     * @return Calculated path to parent region file.
     * @throws IllegalArgumentException when path regex fails to match.
     */
    public static Path getReferredToFile(final Path p) {
        Matcher m = REF_NAME_PATTERN.matcher(p.getName());
        if (m == null || !m.matches()) {
            LOG.warn("Failed match of store file name " + p.toString());
            throw new IllegalArgumentException("Failed match of store file name " + p.toString());
        }

        // Other region name is suffix on the passed Reference file name
        String otherRegion = m.group(2);
        // Tabledir is up two directories from where Reference was written.
        Path tableDir = p.getParent().getParent().getParent();
        String nameStrippedOfSuffix = m.group(1);
        LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);

        // Build up new path with the referenced region in place of our current
        // region in the reference path.  Also strip regionname suffix from name.
        return new Path(new Path(new Path(tableDir, otherRegion), p.getParent().getName()), nameStrippedOfSuffix);
    }

    /**
     * Validate the store file name.
     * @param fileName name of the file to validate
     * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
     */
    public static boolean validateStoreFileName(final String fileName) {
        if (HFileLink.isHFileLink(fileName) || isReference(fileName))
            return (true);
        return !fileName.contains("-");
    }

    /**
     * Return if the specified file is a valid store file or not.
     * @param fileStatus The {@link FileStatus} of the file
     * @return <tt>true</tt> if the file is valid
     */
    public static boolean isValid(final FileStatus fileStatus) throws IOException {
        final Path p = fileStatus.getPath();

        if (fileStatus.isDirectory())
            return false;

        // Check for empty hfile. Should never be the case but can happen
        // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
        // NOTE: that the HFileLink is just a name, so it's an empty file.
        if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
            LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
            return false;
        }

        return validateStoreFileName(p.getName());
    }

    /**
     * helper function to compute HDFS blocks distribution of a given reference
     * file.For reference file, we don't compute the exact value. We use some
     * estimate instead given it might be good enough. we assume bottom part
     * takes the first half of reference file, top part takes the second half
     * of the reference file. This is just estimate, given
     * midkey ofregion != midkey of HFile, also the number and size of keys vary.
     * If this estimate isn't good enough, we can improve it later.
     * @param fs  The FileSystem
     * @param reference  The reference
     * @param status  The reference FileStatus
     * @return HDFS blocks distribution
     */
    private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileSystem fs,
            final Reference reference, final FileStatus status) throws IOException {
        if (status == null) {
            return null;
        }

        long start = 0;
        long length = 0;

        if (Reference.isTopFileRegion(reference.getFileRegion())) {
            start = status.getLen() / 2;
            length = status.getLen() - status.getLen() / 2;
        } else {
            start = 0;
            length = status.getLen() / 2;
        }
        return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
    }
}