Java tutorial
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.util; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InterruptedIOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hbase.ClusterId; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HDFSBlocksDistribution; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionPlacementMaintainer; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.FSProtos; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import com.google.common.primitives.Ints; import com.google.protobuf.InvalidProtocolBufferException; /** * Utility methods for interacting with the underlying file system. */ @InterfaceAudience.Private public abstract class FSUtils { private static final Log LOG = LogFactory.getLog(FSUtils.class); /** Full access permissions (starting point for a umask) */ public static final String FULL_RWX_PERMISSIONS = "777"; private static final String THREAD_POOLSIZE = "hbase.client.localityCheck.threadPoolSize"; private static final int DEFAULT_THREAD_POOLSIZE = 2; /** Set to true on Windows platforms */ public static final boolean WINDOWS = System.getProperty("os.name").startsWith("Windows"); protected FSUtils() { super(); } /** * Compare of path component. Does not consider schema; i.e. if schemas different but <code>path * <code> starts with <code>rootPath<code>, then the function returns true * @param rootPath * @param path * @return True if <code>path</code> starts with <code>rootPath</code> */ public static boolean isStartingWithPath(final Path rootPath, final String path) { String uriRootPath = rootPath.toUri().getPath(); String tailUriPath = (new Path(path)).toUri().getPath(); return tailUriPath.startsWith(uriRootPath); } /** * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the * '/a/b/c' part. Does not consider schema; i.e. if schemas different but path or subpath matches, * the two will equate. * @param pathToSearch Path we will be trying to match. * @param pathTail * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code> */ public static boolean isMatchingTail(final Path pathToSearch, String pathTail) { return isMatchingTail(pathToSearch, new Path(pathTail)); } /** * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the * '/a/b/c' part. If you passed in 'hdfs://a/b/c and b/c, it would return true. Does not consider * schema; i.e. if schemas different but path or subpath matches, the two will equate. * @param pathToSearch Path we will be trying to match. * @param pathTail * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code> */ public static boolean isMatchingTail(final Path pathToSearch, final Path pathTail) { if (pathToSearch.depth() != pathTail.depth()) return false; Path tailPath = pathTail; String tailName; Path toSearch = pathToSearch; String toSearchName; boolean result = false; do { tailName = tailPath.getName(); if (tailName == null || tailName.length() <= 0) { result = true; break; } toSearchName = toSearch.getName(); if (toSearchName == null || toSearchName.length() <= 0) break; // Move up a parent on each path for next go around. Path doesn't let us go off the end. tailPath = tailPath.getParent(); toSearch = toSearch.getParent(); } while (tailName.equals(toSearchName)); return result; } public static FSUtils getInstance(FileSystem fs, Configuration conf) { String scheme = fs.getUri().getScheme(); if (scheme == null) { LOG.warn("Could not find scheme for uri " + fs.getUri() + ", default to hdfs"); scheme = "hdfs"; } Class<?> fsUtilsClass = conf.getClass("hbase.fsutil." + scheme + ".impl", FSHDFSUtils.class); // Default to HDFS impl FSUtils fsUtils = (FSUtils) ReflectionUtils.newInstance(fsUtilsClass, conf); return fsUtils; } /** * Delete if exists. * @param fs filesystem object * @param dir directory to delete * @return True if deleted <code>dir</code> * @throws IOException e */ public static boolean deleteDirectory(final FileSystem fs, final Path dir) throws IOException { return fs.exists(dir) && fs.delete(dir, true); } /** * Return the number of bytes that large input files should be optimally * be split into to minimize i/o time. * * use reflection to search for getDefaultBlockSize(Path f) * if the method doesn't exist, fall back to using getDefaultBlockSize() * * @param fs filesystem object * @return the default block size for the path's filesystem * @throws IOException e */ public static long getDefaultBlockSize(final FileSystem fs, final Path path) throws IOException { Method m = null; Class<? extends FileSystem> cls = fs.getClass(); try { m = cls.getMethod("getDefaultBlockSize", new Class<?>[] { Path.class }); } catch (NoSuchMethodException e) { LOG.info("FileSystem doesn't support getDefaultBlockSize"); } catch (SecurityException e) { LOG.info("Doesn't have access to getDefaultBlockSize on FileSystems", e); m = null; // could happen on setAccessible() } if (m == null) { return fs.getDefaultBlockSize(path); } else { try { Object ret = m.invoke(fs, path); return ((Long) ret).longValue(); } catch (Exception e) { throw new IOException(e); } } } /* * Get the default replication. * * use reflection to search for getDefaultReplication(Path f) * if the method doesn't exist, fall back to using getDefaultReplication() * * @param fs filesystem object * @param f path of file * @return default replication for the path's filesystem * @throws IOException e */ public static short getDefaultReplication(final FileSystem fs, final Path path) throws IOException { Method m = null; Class<? extends FileSystem> cls = fs.getClass(); try { m = cls.getMethod("getDefaultReplication", new Class<?>[] { Path.class }); } catch (NoSuchMethodException e) { LOG.info("FileSystem doesn't support getDefaultReplication"); } catch (SecurityException e) { LOG.info("Doesn't have access to getDefaultReplication on FileSystems", e); m = null; // could happen on setAccessible() } if (m == null) { return fs.getDefaultReplication(path); } else { try { Object ret = m.invoke(fs, path); return ((Number) ret).shortValue(); } catch (Exception e) { throw new IOException(e); } } } /** * Returns the default buffer size to use during writes. * * The size of the buffer should probably be a multiple of hardware * page size (4096 on Intel x86), and it determines how much data is * buffered during read and write operations. * * @param fs filesystem object * @return default buffer size to use during writes */ public static int getDefaultBufferSize(final FileSystem fs) { return fs.getConf().getInt("io.file.buffer.size", 4096); } /** * Create the specified file on the filesystem. By default, this will: * <ol> * <li>overwrite the file if it exists</li> * <li>apply the umask in the configuration (if it is enabled)</li> * <li>use the fs configured buffer size (or 4096 if not set)</li> * <li>use the default replication</li> * <li>use the default block size</li> * <li>not track progress</li> * </ol> * * @param fs {@link FileSystem} on which to write the file * @param path {@link Path} to the file to write * @param perm permissions * @param favoredNodes * @return output stream to the created file * @throws IOException if the file cannot be created */ public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm, InetSocketAddress[] favoredNodes) throws IOException { if (fs instanceof HFileSystem) { FileSystem backingFs = ((HFileSystem) fs).getBackingFs(); if (backingFs instanceof DistributedFileSystem) { // Try to use the favoredNodes version via reflection to allow backwards- // compatibility. try { return (FSDataOutputStream) (DistributedFileSystem.class .getDeclaredMethod("create", Path.class, FsPermission.class, boolean.class, int.class, short.class, long.class, Progressable.class, InetSocketAddress[].class) .invoke(backingFs, path, perm, true, getDefaultBufferSize(backingFs), getDefaultReplication(backingFs, path), getDefaultBlockSize(backingFs, path), null, favoredNodes)); } catch (InvocationTargetException ite) { // Function was properly called, but threw it's own exception. throw new IOException(ite.getCause()); } catch (NoSuchMethodException e) { LOG.debug("DFS Client does not support most favored nodes create; using default create"); if (LOG.isTraceEnabled()) LOG.trace("Ignoring; use default create", e); } catch (IllegalArgumentException e) { LOG.debug("Ignoring (most likely Reflection related exception) " + e); } catch (SecurityException e) { LOG.debug("Ignoring (most likely Reflection related exception) " + e); } catch (IllegalAccessException e) { LOG.debug("Ignoring (most likely Reflection related exception) " + e); } } } return create(fs, path, perm, true); } /** * Create the specified file on the filesystem. By default, this will: * <ol> * <li>apply the umask in the configuration (if it is enabled)</li> * <li>use the fs configured buffer size (or 4096 if not set)</li> * <li>use the default replication</li> * <li>use the default block size</li> * <li>not track progress</li> * </ol> * * @param fs {@link FileSystem} on which to write the file * @param path {@link Path} to the file to write * @param perm * @param overwrite Whether or not the created file should be overwritten. * @return output stream to the created file * @throws IOException if the file cannot be created */ public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm, boolean overwrite) throws IOException { if (LOG.isTraceEnabled()) { LOG.trace("Creating file=" + path + " with permission=" + perm + ", overwrite=" + overwrite); } return fs.create(path, perm, overwrite, getDefaultBufferSize(fs), getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null); } /** * Get the file permissions specified in the configuration, if they are * enabled. * * @param fs filesystem that the file will be created on. * @param conf configuration to read for determining if permissions are * enabled and which to use * @param permssionConfKey property key in the configuration to use when * finding the permission * @return the permission to use when creating a new file on the fs. If * special permissions are not specified in the configuration, then * the default permissions on the the fs will be returned. */ public static FsPermission getFilePermissions(final FileSystem fs, final Configuration conf, final String permssionConfKey) { boolean enablePermissions = conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false); if (enablePermissions) { try { FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS); // make sure that we have a mask, if not, go default. String mask = conf.get(permssionConfKey); if (mask == null) return FsPermission.getFileDefault(); // appy the umask FsPermission umask = new FsPermission(mask); return perm.applyUMask(umask); } catch (IllegalArgumentException e) { LOG.warn("Incorrect umask attempted to be created: " + conf.get(permssionConfKey) + ", using default file permissions.", e); return FsPermission.getFileDefault(); } } return FsPermission.getFileDefault(); } /** * Checks to see if the specified file system is available * * @param fs filesystem * @throws IOException e */ public static void checkFileSystemAvailable(final FileSystem fs) throws IOException { if (!(fs instanceof DistributedFileSystem)) { return; } IOException exception = null; DistributedFileSystem dfs = (DistributedFileSystem) fs; try { if (dfs.exists(new Path("/"))) { return; } } catch (IOException e) { exception = RemoteExceptionHandler.checkIOException(e); } try { fs.close(); } catch (Exception e) { LOG.error("file system close failed: ", e); } IOException io = new IOException("File system is not available"); io.initCause(exception); throw io; } /** * We use reflection because {@link DistributedFileSystem#setSafeMode( * FSConstants.SafeModeAction action, boolean isChecked)} is not in hadoop 1.1 * * @param dfs * @return whether we're in safe mode * @throws IOException */ private static boolean isInSafeMode(DistributedFileSystem dfs) throws IOException { boolean inSafeMode = false; try { Method m = DistributedFileSystem.class.getMethod("setSafeMode", new Class<?>[] { org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.class, boolean.class }); inSafeMode = (Boolean) m.invoke(dfs, org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET, true); } catch (Exception e) { if (e instanceof IOException) throw (IOException) e; // Check whether dfs is on safemode. inSafeMode = dfs.setSafeMode(org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET); } return inSafeMode; } /** * Check whether dfs is in safemode. * @param conf * @throws IOException */ public static void checkDfsSafeMode(final Configuration conf) throws IOException { boolean isInSafeMode = false; FileSystem fs = FileSystem.get(conf); if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fs; isInSafeMode = isInSafeMode(dfs); } if (isInSafeMode) { throw new IOException("File system is in safemode, it can't be written now"); } } /** * Verifies current version of file system * * @param fs filesystem object * @param rootdir root hbase directory * @return null if no version file exists, version string otherwise. * @throws IOException e * @throws org.apache.hadoop.hbase.exceptions.DeserializationException */ public static String getVersion(FileSystem fs, Path rootdir) throws IOException, DeserializationException { Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME); FileStatus[] status = null; try { // hadoop 2.0 throws FNFE if directory does not exist. // hadoop 1.0 returns null if directory does not exist. status = fs.listStatus(versionFile); } catch (FileNotFoundException fnfe) { return null; } if (status == null || status.length == 0) return null; String version = null; byte[] content = new byte[(int) status[0].getLen()]; FSDataInputStream s = fs.open(versionFile); try { IOUtils.readFully(s, content, 0, content.length); if (ProtobufUtil.isPBMagicPrefix(content)) { version = parseVersionFrom(content); } else { // Presume it pre-pb format. InputStream is = new ByteArrayInputStream(content); DataInputStream dis = new DataInputStream(is); try { version = dis.readUTF(); } finally { dis.close(); } // Update the format LOG.info("Updating the hbase.version file format with version=" + version); setVersion(fs, rootdir, version, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } } catch (EOFException eof) { LOG.warn("Version file was empty, odd, will try to set it."); } finally { s.close(); } return version; } /** * Parse the content of the ${HBASE_ROOTDIR}/hbase.version file. * @param bytes The byte content of the hbase.version file. * @return The version found in the file as a String. * @throws DeserializationException */ static String parseVersionFrom(final byte[] bytes) throws DeserializationException { ProtobufUtil.expectPBMagicPrefix(bytes); int pblen = ProtobufUtil.lengthOfPBMagic(); FSProtos.HBaseVersionFileContent.Builder builder = FSProtos.HBaseVersionFileContent.newBuilder(); FSProtos.HBaseVersionFileContent fileContent; try { fileContent = builder.mergeFrom(bytes, pblen, bytes.length - pblen).build(); return fileContent.getVersion(); } catch (InvalidProtocolBufferException e) { // Convert throw new DeserializationException(e); } } /** * Create the content to write into the ${HBASE_ROOTDIR}/hbase.version file. * @param version Version to persist * @return Serialized protobuf with <code>version</code> content and a bit of pb magic for a prefix. */ static byte[] toVersionByteArray(final String version) { FSProtos.HBaseVersionFileContent.Builder builder = FSProtos.HBaseVersionFileContent.newBuilder(); return ProtobufUtil.prependPBMagic(builder.setVersion(version).build().toByteArray()); } /** * Verifies current version of file system * * @param fs file system * @param rootdir root directory of HBase installation * @param message if true, issues a message on System.out * * @throws IOException e * @throws DeserializationException */ public static void checkVersion(FileSystem fs, Path rootdir, boolean message) throws IOException, DeserializationException { checkVersion(fs, rootdir, message, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } /** * Verifies current version of file system * * @param fs file system * @param rootdir root directory of HBase installation * @param message if true, issues a message on System.out * @param wait wait interval * @param retries number of times to retry * * @throws IOException e * @throws DeserializationException */ public static void checkVersion(FileSystem fs, Path rootdir, boolean message, int wait, int retries) throws IOException, DeserializationException { String version = getVersion(fs, rootdir); if (version == null) { if (!metaRegionExists(fs, rootdir)) { // rootDir is empty (no version file and no root region) // just create new version file (HBASE-1195) setVersion(fs, rootdir, wait, retries); return; } } else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0) return; // version is deprecated require migration // Output on stdout so user sees it in terminal. String msg = "HBase file layout needs to be upgraded." + " You have version " + version + " and I want version " + HConstants.FILE_SYSTEM_VERSION + ". Is your hbase.rootdir valid? If so, you may need to run " + "'hbase hbck -fixVersionFile'."; if (message) { System.out.println("WARNING! " + msg); } throw new FileSystemVersionException(msg); } /** * Sets version of file system * * @param fs filesystem object * @param rootdir hbase root * @throws IOException e */ public static void setVersion(FileSystem fs, Path rootdir) throws IOException { setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } /** * Sets version of file system * * @param fs filesystem object * @param rootdir hbase root * @param wait time to wait for retry * @param retries number of times to retry before failing * @throws IOException e */ public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries) throws IOException { setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries); } /** * Sets version of file system * * @param fs filesystem object * @param rootdir hbase root directory * @param version version to set * @param wait time to wait for retry * @param retries number of times to retry before throwing an IOException * @throws IOException e */ public static void setVersion(FileSystem fs, Path rootdir, String version, int wait, int retries) throws IOException { Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME); while (true) { try { FSDataOutputStream s = fs.create(versionFile); s.write(toVersionByteArray(version)); s.close(); LOG.debug("Created version file at " + rootdir.toString() + " with version=" + version); return; } catch (IOException e) { if (retries > 0) { LOG.warn("Unable to create version file at " + rootdir.toString() + ", retrying", e); fs.delete(versionFile, false); try { if (wait > 0) { Thread.sleep(wait); } } catch (InterruptedException ie) { throw (InterruptedIOException) new InterruptedIOException().initCause(ie); } retries--; } else { throw e; } } } } /** * Checks that a cluster ID file exists in the HBase root directory * @param fs the root directory FileSystem * @param rootdir the HBase root directory in HDFS * @param wait how long to wait between retries * @return <code>true</code> if the file exists, otherwise <code>false</code> * @throws IOException if checking the FileSystem fails */ public static boolean checkClusterIdExists(FileSystem fs, Path rootdir, int wait) throws IOException { while (true) { try { Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME); return fs.exists(filePath); } catch (IOException ioe) { if (wait > 0) { LOG.warn("Unable to check cluster ID file in " + rootdir.toString() + ", retrying in " + wait + "msec: " + StringUtils.stringifyException(ioe)); try { Thread.sleep(wait); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } else { throw ioe; } } } } /** * Returns the value of the unique cluster ID stored for this HBase instance. * @param fs the root directory FileSystem * @param rootdir the path to the HBase root directory * @return the unique cluster identifier * @throws IOException if reading the cluster ID file fails */ public static ClusterId getClusterId(FileSystem fs, Path rootdir) throws IOException { Path idPath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME); ClusterId clusterId = null; FileStatus status = fs.exists(idPath) ? fs.getFileStatus(idPath) : null; if (status != null) { int len = Ints.checkedCast(status.getLen()); byte[] content = new byte[len]; FSDataInputStream in = fs.open(idPath); try { in.readFully(content); } catch (EOFException eof) { LOG.warn("Cluster ID file " + idPath.toString() + " was empty"); } finally { in.close(); } try { clusterId = ClusterId.parseFrom(content); } catch (DeserializationException e) { throw new IOException("content=" + Bytes.toString(content), e); } // If not pb'd, make it so. if (!ProtobufUtil.isPBMagicPrefix(content)) { String cid = null; in = fs.open(idPath); try { cid = in.readUTF(); clusterId = new ClusterId(cid); } catch (EOFException eof) { LOG.warn("Cluster ID file " + idPath.toString() + " was empty"); } finally { in.close(); } rewriteAsPb(fs, rootdir, idPath, clusterId); } return clusterId; } else { LOG.warn("Cluster ID file does not exist at " + idPath.toString()); } return clusterId; } /** * @param cid * @throws IOException */ private static void rewriteAsPb(final FileSystem fs, final Path rootdir, final Path p, final ClusterId cid) throws IOException { // Rewrite the file as pb. Move aside the old one first, write new // then delete the moved-aside file. Path movedAsideName = new Path(p + "." + System.currentTimeMillis()); if (!fs.rename(p, movedAsideName)) throw new IOException("Failed rename of " + p); setClusterId(fs, rootdir, cid, 100); if (!fs.delete(movedAsideName, false)) { throw new IOException("Failed delete of " + movedAsideName); } LOG.debug("Rewrote the hbase.id file as pb"); } /** * Writes a new unique identifier for this cluster to the "hbase.id" file * in the HBase root directory * @param fs the root directory FileSystem * @param rootdir the path to the HBase root directory * @param clusterId the unique identifier to store * @param wait how long (in milliseconds) to wait between retries * @throws IOException if writing to the FileSystem fails and no wait value */ public static void setClusterId(FileSystem fs, Path rootdir, ClusterId clusterId, int wait) throws IOException { while (true) { try { Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME); FSDataOutputStream s = fs.create(filePath); try { s.write(clusterId.toByteArray()); } finally { s.close(); } if (LOG.isDebugEnabled()) { LOG.debug("Created cluster ID file at " + filePath.toString() + " with ID: " + clusterId); } return; } catch (IOException ioe) { if (wait > 0) { LOG.warn("Unable to create cluster ID file in " + rootdir.toString() + ", retrying in " + wait + "msec: " + StringUtils.stringifyException(ioe)); try { Thread.sleep(wait); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } else { throw ioe; } } } } /** * Verifies root directory path is a valid URI with a scheme * * @param root root directory path * @return Passed <code>root</code> argument. * @throws IOException if not a valid URI with a scheme */ public static Path validateRootPath(Path root) throws IOException { try { URI rootURI = new URI(root.toString()); String scheme = rootURI.getScheme(); if (scheme == null) { throw new IOException("Root directory does not have a scheme"); } return root; } catch (URISyntaxException e) { IOException io = new IOException("Root directory path is not a valid " + "URI -- check your " + HConstants.HBASE_DIR + " configuration"); io.initCause(e); throw io; } } /** * Checks for the presence of the root path (using the provided conf object) in the given path. If * it exists, this method removes it and returns the String representation of remaining relative path. * @param path * @param conf * @return String representation of the remaining relative path * @throws IOException */ public static String removeRootPath(Path path, final Configuration conf) throws IOException { Path root = FSUtils.getRootDir(conf); String pathStr = path.toString(); // check that the path is absolute... it has the root path in it. if (!pathStr.startsWith(root.toString())) return pathStr; // if not, return as it is. return pathStr.substring(root.toString().length() + 1);// remove the "/" too. } /** * If DFS, check safe mode and if so, wait until we clear it. * @param conf configuration * @param wait Sleep between retries * @throws IOException e */ public static void waitOnSafeMode(final Configuration conf, final long wait) throws IOException { FileSystem fs = FileSystem.get(conf); if (!(fs instanceof DistributedFileSystem)) return; DistributedFileSystem dfs = (DistributedFileSystem) fs; // Make sure dfs is not in safe mode while (isInSafeMode(dfs)) { LOG.info("Waiting for dfs to exit safe mode..."); try { Thread.sleep(wait); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } /** * Return the 'path' component of a Path. In Hadoop, Path is an URI. This * method returns the 'path' component of a Path's URI: e.g. If a Path is * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>, * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>. * This method is useful if you want to print out a Path without qualifying * Filesystem instance. * @param p Filesystem Path whose 'path' component we are to return. * @return Path portion of the Filesystem */ public static String getPath(Path p) { return p.toUri().getPath(); } /** * @param c configuration * @return Path to hbase root directory: i.e. <code>hbase.rootdir</code> from * configuration as a qualified Path. * @throws IOException e */ public static Path getRootDir(final Configuration c) throws IOException { Path p = new Path(c.get(HConstants.HBASE_DIR)); FileSystem fs = p.getFileSystem(c); return p.makeQualified(fs); } public static void setRootDir(final Configuration c, final Path root) throws IOException { c.set(HConstants.HBASE_DIR, root.toString()); } public static void setFsDefault(final Configuration c, final Path root) throws IOException { c.set("fs.defaultFS", root.toString()); // for hadoop 0.21+ } /** * Checks if meta region exists * * @param fs file system * @param rootdir root directory of HBase installation * @return true if exists * @throws IOException e */ @SuppressWarnings("deprecation") public static boolean metaRegionExists(FileSystem fs, Path rootdir) throws IOException { Path metaRegionDir = HRegion.getRegionDir(rootdir, HRegionInfo.FIRST_META_REGIONINFO); return fs.exists(metaRegionDir); } /** * Compute HDFS blocks distribution of a given file, or a portion of the file * @param fs file system * @param status file status of the file * @param start start position of the portion * @param length length of the portion * @return The HDFS blocks distribution */ static public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs, FileStatus status, long start, long length) throws IOException { HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution(); BlockLocation[] blockLocations = fs.getFileBlockLocations(status, start, length); for (BlockLocation bl : blockLocations) { String[] hosts = bl.getHosts(); long len = bl.getLength(); blocksDistribution.addHostsAndBlockWeight(hosts, len); } return blocksDistribution; } /** * Runs through the hbase rootdir and checks all stores have only * one file in them -- that is, they've been major compacted. Looks * at root and meta tables too. * @param fs filesystem * @param hbaseRootDir hbase root directory * @return True if this hbase install is major compacted. * @throws IOException e */ public static boolean isMajorCompacted(final FileSystem fs, final Path hbaseRootDir) throws IOException { List<Path> tableDirs = getTableDirs(fs, hbaseRootDir); for (Path d : tableDirs) { FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs)); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // Else its a region name. Now look in region for families. FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs)); for (FileStatus familyDir : familyDirs) { Path family = familyDir.getPath(); // Now in family make sure only one file. FileStatus[] familyStatus = fs.listStatus(family); if (familyStatus.length > 1) { LOG.debug(family.toString() + " has " + familyStatus.length + " files."); return false; } } } } return true; } // TODO move this method OUT of FSUtils. No dependencies to HMaster /** * Returns the total overall fragmentation percentage. Includes hbase:meta and * -ROOT- as well. * * @param master The master defining the HBase root and file system. * @return A map for each table and its percentage. * @throws IOException When scanning the directory fails. */ public static int getTotalTableFragmentation(final HMaster master) throws IOException { Map<String, Integer> map = getTableFragmentation(master); return map != null && map.size() > 0 ? map.get("-TOTAL-") : -1; } /** * Runs through the HBase rootdir and checks how many stores for each table * have more than one file in them. Checks -ROOT- and hbase:meta too. The total * percentage across all tables is stored under the special key "-TOTAL-". * * @param master The master defining the HBase root and file system. * @return A map for each table and its percentage. * * @throws IOException When scanning the directory fails. */ public static Map<String, Integer> getTableFragmentation(final HMaster master) throws IOException { Path path = getRootDir(master.getConfiguration()); // since HMaster.getFileSystem() is package private FileSystem fs = path.getFileSystem(master.getConfiguration()); return getTableFragmentation(fs, path); } /** * Runs through the HBase rootdir and checks how many stores for each table * have more than one file in them. Checks -ROOT- and hbase:meta too. The total * percentage across all tables is stored under the special key "-TOTAL-". * * @param fs The file system to use. * @param hbaseRootDir The root directory to scan. * @return A map for each table and its percentage. * @throws IOException When scanning the directory fails. */ public static Map<String, Integer> getTableFragmentation(final FileSystem fs, final Path hbaseRootDir) throws IOException { Map<String, Integer> frags = new HashMap<String, Integer>(); int cfCountTotal = 0; int cfFragTotal = 0; DirFilter df = new DirFilter(fs); List<Path> tableDirs = getTableDirs(fs, hbaseRootDir); for (Path d : tableDirs) { int cfCount = 0; int cfFrag = 0; FileStatus[] regionDirs = fs.listStatus(d, df); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // else its a region name, now look in region for families FileStatus[] familyDirs = fs.listStatus(dd, df); for (FileStatus familyDir : familyDirs) { cfCount++; cfCountTotal++; Path family = familyDir.getPath(); // now in family make sure only one file FileStatus[] familyStatus = fs.listStatus(family); if (familyStatus.length > 1) { cfFrag++; cfFragTotal++; } } } // compute percentage per table and store in result list frags.put(FSUtils.getTableName(d).getNameAsString(), Math.round((float) cfFrag / cfCount * 100)); } // set overall percentage for all tables frags.put("-TOTAL-", Math.round((float) cfFragTotal / cfCountTotal * 100)); return frags; } /** * Expects to find -ROOT- directory. * @param fs filesystem * @param hbaseRootDir hbase root directory * @return True if this a pre020 layout. * @throws IOException e */ public static boolean isPre020FileLayout(final FileSystem fs, final Path hbaseRootDir) throws IOException { Path mapfiles = new Path(new Path(new Path(new Path(hbaseRootDir, "-ROOT-"), "70236052"), "info"), "mapfiles"); return fs.exists(mapfiles); } /** * Runs through the hbase rootdir and checks all stores have only * one file in them -- that is, they've been major compacted. Looks * at root and meta tables too. This version differs from * {@link #isMajorCompacted(FileSystem, Path)} in that it expects a * pre-0.20.0 hbase layout on the filesystem. Used migrating. * @param fs filesystem * @param hbaseRootDir hbase root directory * @return True if this hbase install is major compacted. * @throws IOException e */ public static boolean isMajorCompactedPre020(final FileSystem fs, final Path hbaseRootDir) throws IOException { // Presumes any directory under hbase.rootdir is a table. List<Path> tableDirs = getTableDirs(fs, hbaseRootDir); for (Path d : tableDirs) { // Inside a table, there are compaction.dir directories to skip. // Otherwise, all else should be regions. Then in each region, should // only be family directories. Under each of these, should be a mapfile // and info directory and in these only one file. if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) { continue; } FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs)); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // Else its a region name. Now look in region for families. FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs)); for (FileStatus familyDir : familyDirs) { Path family = familyDir.getPath(); FileStatus[] infoAndMapfile = fs.listStatus(family); // Assert that only info and mapfile in family dir. if (infoAndMapfile.length != 0 && infoAndMapfile.length != 2) { LOG.debug(family.toString() + " has more than just info and mapfile: " + infoAndMapfile.length); return false; } // Make sure directory named info or mapfile. for (int ll = 0; ll < 2; ll++) { if (infoAndMapfile[ll].getPath().getName().equals("info") || infoAndMapfile[ll].getPath().getName().equals("mapfiles")) continue; LOG.debug("Unexpected directory name: " + infoAndMapfile[ll].getPath()); return false; } // Now in family, there are 'mapfile' and 'info' subdirs. Just // look in the 'mapfile' subdir. FileStatus[] familyStatus = fs.listStatus(new Path(family, "mapfiles")); if (familyStatus.length > 1) { LOG.debug(family.toString() + " has " + familyStatus.length + " files."); return false; } } } } return true; } /** * Returns the {@link org.apache.hadoop.fs.Path} object representing the table directory under * path rootdir * * @param rootdir qualified path of HBase root directory * @param tableName name of table * @return {@link org.apache.hadoop.fs.Path} for table */ public static Path getTableDir(Path rootdir, final TableName tableName) { return new Path(getNamespaceDir(rootdir, tableName.getNamespaceAsString()), tableName.getQualifierAsString()); } /** * Returns the {@link org.apache.hadoop.hbase.TableName} object representing * the table directory under * path rootdir * * @param tablePath path of table * @return {@link org.apache.hadoop.fs.Path} for table */ public static TableName getTableName(Path tablePath) { return TableName.valueOf(tablePath.getParent().getName(), tablePath.getName()); } /** * Returns the {@link org.apache.hadoop.fs.Path} object representing * the namespace directory under path rootdir * * @param rootdir qualified path of HBase root directory * @param namespace namespace name * @return {@link org.apache.hadoop.fs.Path} for table */ public static Path getNamespaceDir(Path rootdir, final String namespace) { return new Path(rootdir, new Path(HConstants.BASE_NAMESPACE_DIR, new Path(namespace))); } /** * A {@link PathFilter} that returns only regular files. */ static class FileFilter implements PathFilter { private final FileSystem fs; public FileFilter(final FileSystem fs) { this.fs = fs; } @Override public boolean accept(Path p) { try { return fs.isFile(p); } catch (IOException e) { LOG.debug("unable to verify if path=" + p + " is a regular file", e); return false; } } } /** * Directory filter that doesn't include any of the directories in the specified blacklist */ public static class BlackListDirFilter implements PathFilter { private final FileSystem fs; private List<String> blacklist; /** * Create a filter on the give filesystem with the specified blacklist * @param fs filesystem to filter * @param directoryNameBlackList list of the names of the directories to filter. If * <tt>null</tt>, all directories are returned */ @SuppressWarnings("unchecked") public BlackListDirFilter(final FileSystem fs, final List<String> directoryNameBlackList) { this.fs = fs; blacklist = (List<String>) (directoryNameBlackList == null ? Collections.emptyList() : directoryNameBlackList); } @Override public boolean accept(Path p) { boolean isValid = false; try { if (blacklist.contains(p.getName().toString())) { isValid = false; } else { isValid = fs.getFileStatus(p).isDirectory(); } } catch (IOException e) { LOG.warn("An error occurred while verifying if [" + p.toString() + "] is a valid directory. Returning 'not valid' and continuing.", e); } return isValid; } } /** * A {@link PathFilter} that only allows directories. */ public static class DirFilter extends BlackListDirFilter { public DirFilter(FileSystem fs) { super(fs, null); } } /** * A {@link PathFilter} that returns usertable directories. To get all directories use the * {@link BlackListDirFilter} with a <tt>null</tt> blacklist */ public static class UserTableDirFilter extends BlackListDirFilter { public UserTableDirFilter(FileSystem fs) { super(fs, HConstants.HBASE_NON_TABLE_DIRS); } } /** * Heuristic to determine whether is safe or not to open a file for append * Looks both for dfs.support.append and use reflection to search * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush() * @param conf * @return True if append support */ public static boolean isAppendSupported(final Configuration conf) { boolean append = conf.getBoolean("dfs.support.append", false); if (append) { try { // TODO: The implementation that comes back when we do a createWriter // may not be using SequenceFile so the below is not a definitive test. // Will do for now (hdfs-200). SequenceFile.Writer.class.getMethod("syncFs", new Class<?>[] {}); append = true; } catch (SecurityException e) { } catch (NoSuchMethodException e) { append = false; } } if (!append) { // Look for the 0.21, 0.22, new-style append evidence. try { FSDataOutputStream.class.getMethod("hflush", new Class<?>[] {}); append = true; } catch (NoSuchMethodException e) { append = false; } } return append; } /** * @param conf * @return True if this filesystem whose scheme is 'hdfs'. * @throws IOException */ public static boolean isHDFS(final Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); String scheme = fs.getUri().getScheme(); return scheme.equalsIgnoreCase("hdfs"); } /** * Recover file lease. Used when a file might be suspect * to be had been left open by another process. * @param fs FileSystem handle * @param p Path of file to recover lease * @param conf Configuration handle * @throws IOException */ public abstract void recoverFileLease(final FileSystem fs, final Path p, Configuration conf, CancelableProgressable reporter) throws IOException; public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir) throws IOException { List<Path> tableDirs = new LinkedList<Path>(); for (FileStatus status : fs.globStatus(new Path(rootdir, new Path(HConstants.BASE_NAMESPACE_DIR, "*")))) { tableDirs.addAll(FSUtils.getLocalTableDirs(fs, status.getPath())); } return tableDirs; } /** * @param fs * @param rootdir * @return All the table directories under <code>rootdir</code>. Ignore non table hbase folders such as * .logs, .oldlogs, .corrupt folders. * @throws IOException */ public static List<Path> getLocalTableDirs(final FileSystem fs, final Path rootdir) throws IOException { // presumes any directory under hbase.rootdir is a table FileStatus[] dirs = fs.listStatus(rootdir, new UserTableDirFilter(fs)); List<Path> tabledirs = new ArrayList<Path>(dirs.length); for (FileStatus dir : dirs) { tabledirs.add(dir.getPath()); } return tabledirs; } /** * Checks if the given path is the one with 'recovered.edits' dir. * @param path * @return True if we recovered edits */ public static boolean isRecoveredEdits(Path path) { return path.toString().contains(HConstants.RECOVERED_EDITS_DIR); } /** * Filter for all dirs that don't start with '.' */ public static class RegionDirFilter implements PathFilter { // This pattern will accept 0.90+ style hex region dirs and older numeric region dir names. final public static Pattern regionDirPattern = Pattern.compile("^[0-9a-f]*$"); final FileSystem fs; public RegionDirFilter(FileSystem fs) { this.fs = fs; } @Override public boolean accept(Path rd) { if (!regionDirPattern.matcher(rd.getName()).matches()) { return false; } try { return fs.getFileStatus(rd).isDirectory(); } catch (IOException ioe) { // Maybe the file was moved or the fs was disconnected. LOG.warn("Skipping file " + rd + " due to IOException", ioe); return false; } } } /** * Given a particular table dir, return all the regiondirs inside it, excluding files such as * .tableinfo * @param fs A file system for the Path * @param tableDir Path to a specific table directory <hbase.rootdir>/<tabledir> * @return List of paths to valid region directories in table dir. * @throws IOException */ public static List<Path> getRegionDirs(final FileSystem fs, final Path tableDir) throws IOException { // assumes we are in a table dir. FileStatus[] rds = fs.listStatus(tableDir, new RegionDirFilter(fs)); List<Path> regionDirs = new ArrayList<Path>(rds.length); for (FileStatus rdfs : rds) { Path rdPath = rdfs.getPath(); regionDirs.add(rdPath); } return regionDirs; } /** * Filter for all dirs that are legal column family names. This is generally used for colfam * dirs <hbase.rootdir>/<tabledir>/<regiondir>/<colfamdir>. */ public static class FamilyDirFilter implements PathFilter { final FileSystem fs; public FamilyDirFilter(FileSystem fs) { this.fs = fs; } @Override public boolean accept(Path rd) { try { // throws IAE if invalid HColumnDescriptor.isLegalFamilyName(Bytes.toBytes(rd.getName())); } catch (IllegalArgumentException iae) { // path name is an invalid family name and thus is excluded. return false; } try { return fs.getFileStatus(rd).isDirectory(); } catch (IOException ioe) { // Maybe the file was moved or the fs was disconnected. LOG.warn("Skipping file " + rd + " due to IOException", ioe); return false; } } } /** * Given a particular region dir, return all the familydirs inside it * * @param fs A file system for the Path * @param regionDir Path to a specific region directory * @return List of paths to valid family directories in region dir. * @throws IOException */ public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir) throws IOException { // assumes we are in a region dir. FileStatus[] fds = fs.listStatus(regionDir, new FamilyDirFilter(fs)); List<Path> familyDirs = new ArrayList<Path>(fds.length); for (FileStatus fdfs : fds) { Path fdPath = fdfs.getPath(); familyDirs.add(fdPath); } return familyDirs; } /** * Filter for HFiles that excludes reference files. */ public static class HFileFilter implements PathFilter { // This pattern will accept 0.90+ style hex hfies files but reject reference files final public static Pattern hfilePattern = Pattern.compile("^([0-9a-f]+)$"); final FileSystem fs; public HFileFilter(FileSystem fs) { this.fs = fs; } @Override public boolean accept(Path rd) { if (!hfilePattern.matcher(rd.getName()).matches()) { return false; } try { // only files return !fs.getFileStatus(rd).isDirectory(); } catch (IOException ioe) { // Maybe the file was moved or the fs was disconnected. LOG.warn("Skipping file " + rd + " due to IOException", ioe); return false; } } } /** * @param conf * @return Returns the filesystem of the hbase rootdir. * @throws IOException */ public static FileSystem getCurrentFileSystem(Configuration conf) throws IOException { return getRootDir(conf).getFileSystem(conf); } /** * Runs through the HBase rootdir/tablename and creates a reverse lookup map for * table StoreFile names to the full Path. * <br> * Example...<br> * Key = 3944417774205889744 <br> * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744 * * @param map map to add values. If null, this method will create and populate one to return * @param fs The file system to use. * @param hbaseRootDir The root directory to scan. * @param tableName name of the table to scan. * @return Map keyed by StoreFile name with a value of the full Path. * @throws IOException When scanning the directory fails. */ public static Map<String, Path> getTableStoreFilePathMap(Map<String, Path> map, final FileSystem fs, final Path hbaseRootDir, TableName tableName) throws IOException { if (map == null) { map = new HashMap<String, Path>(); } // only include the directory paths to tables Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName); // Inside a table, there are compaction.dir directories to skip. Otherwise, all else // should be regions. PathFilter df = new BlackListDirFilter(fs, HConstants.HBASE_NON_TABLE_DIRS); FileStatus[] regionDirs = fs.listStatus(tableDir); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // else its a region name, now look in region for families FileStatus[] familyDirs = fs.listStatus(dd, df); for (FileStatus familyDir : familyDirs) { Path family = familyDir.getPath(); // now in family, iterate over the StoreFiles and // put in map FileStatus[] familyStatus = fs.listStatus(family); for (FileStatus sfStatus : familyStatus) { Path sf = sfStatus.getPath(); map.put(sf.getName(), sf); } } } return map; } /** * Runs through the HBase rootdir and creates a reverse lookup map for * table StoreFile names to the full Path. * <br> * Example...<br> * Key = 3944417774205889744 <br> * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744 * * @param fs The file system to use. * @param hbaseRootDir The root directory to scan. * @return Map keyed by StoreFile name with a value of the full Path. * @throws IOException When scanning the directory fails. */ public static Map<String, Path> getTableStoreFilePathMap(final FileSystem fs, final Path hbaseRootDir) throws IOException { Map<String, Path> map = new HashMap<String, Path>(); // if this method looks similar to 'getTableFragmentation' that is because // it was borrowed from it. // only include the directory paths to tables for (Path tableDir : FSUtils.getTableDirs(fs, hbaseRootDir)) { getTableStoreFilePathMap(map, fs, hbaseRootDir, FSUtils.getTableName(tableDir)); } return map; } /** * Calls fs.listStatus() and treats FileNotFoundException as non-fatal * This accommodates differences between hadoop versions, where hadoop 1 * does not throw a FileNotFoundException, and return an empty FileStatus[] * while Hadoop 2 will throw FileNotFoundException. * * @param fs file system * @param dir directory * @param filter path filter * @return null if dir is empty or doesn't exist, otherwise FileStatus array */ public static FileStatus[] listStatus(final FileSystem fs, final Path dir, final PathFilter filter) throws IOException { FileStatus[] status = null; try { status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter); } catch (FileNotFoundException fnfe) { // if directory doesn't exist, return null if (LOG.isTraceEnabled()) { LOG.trace(dir + " doesn't exist"); } } if (status == null || status.length < 1) return null; return status; } /** * Calls fs.listStatus() and treats FileNotFoundException as non-fatal * This would accommodates differences between hadoop versions * * @param fs file system * @param dir directory * @return null if dir is empty or doesn't exist, otherwise FileStatus array */ public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException { return listStatus(fs, dir, null); } /** * Calls fs.delete() and returns the value returned by the fs.delete() * * @param fs * @param path * @param recursive * @return the value returned by the fs.delete() * @throws IOException */ public static boolean delete(final FileSystem fs, final Path path, final boolean recursive) throws IOException { return fs.delete(path, recursive); } /** * Calls fs.exists(). Checks if the specified path exists * * @param fs * @param path * @return the value returned by fs.exists() * @throws IOException */ public static boolean isExists(final FileSystem fs, final Path path) throws IOException { return fs.exists(path); } /** * Throw an exception if an action is not permitted by a user on a file. * * @param ugi * the user * @param file * the file * @param action * the action */ public static void checkAccess(UserGroupInformation ugi, FileStatus file, FsAction action) throws AccessControlException { if (ugi.getShortUserName().equals(file.getOwner())) { if (file.getPermission().getUserAction().implies(action)) { return; } } else if (contains(ugi.getGroupNames(), file.getGroup())) { if (file.getPermission().getGroupAction().implies(action)) { return; } } else if (file.getPermission().getOtherAction().implies(action)) { return; } throw new AccessControlException("Permission denied:" + " action=" + action + " path=" + file.getPath() + " user=" + ugi.getShortUserName()); } private static boolean contains(String[] groups, String user) { for (String group : groups) { if (group.equals(user)) { return true; } } return false; } /** * Log the current state of the filesystem from a certain root directory * @param fs filesystem to investigate * @param root root file/directory to start logging from * @param LOG log to output information * @throws IOException if an unexpected exception occurs */ public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG) throws IOException { LOG.debug("Current file system:"); logFSTree(LOG, fs, root, "|-"); } /** * Recursive helper to log the state of the FS * * @see #logFileSystemState(FileSystem, Path, Log) */ private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix) throws IOException { FileStatus[] files = FSUtils.listStatus(fs, root, null); if (files == null) return; for (FileStatus file : files) { if (file.isDirectory()) { LOG.debug(prefix + file.getPath().getName() + "/"); logFSTree(LOG, fs, file.getPath(), prefix + "---"); } else { LOG.debug(prefix + file.getPath().getName()); } } } public static boolean renameAndSetModifyTime(final FileSystem fs, final Path src, final Path dest) throws IOException { // set the modify time for TimeToLive Cleaner fs.setTimes(src, EnvironmentEdgeManager.currentTimeMillis(), -1); return fs.rename(src, dest); } /** * This function is to scan the root path of the file system to get the * degree of locality for each region on each of the servers having at least * one block of that region. * This is used by the tool {@link RegionPlacementMaintainer} * * @param conf * the configuration to use * @return the mapping from region encoded name to a map of server names to * locality fraction * @throws IOException * in case of file system errors or interrupts */ public static Map<String, Map<String, Float>> getRegionDegreeLocalityMappingFromFS(final Configuration conf) throws IOException { return getRegionDegreeLocalityMappingFromFS(conf, null, conf.getInt(THREAD_POOLSIZE, DEFAULT_THREAD_POOLSIZE)); } /** * This function is to scan the root path of the file system to get the * degree of locality for each region on each of the servers having at least * one block of that region. * * @param conf * the configuration to use * @param desiredTable * the table you wish to scan locality for * @param threadPoolSize * the thread pool size to use * @return the mapping from region encoded name to a map of server names to * locality fraction * @throws IOException * in case of file system errors or interrupts */ public static Map<String, Map<String, Float>> getRegionDegreeLocalityMappingFromFS(final Configuration conf, final String desiredTable, int threadPoolSize) throws IOException { Map<String, Map<String, Float>> regionDegreeLocalityMapping = new ConcurrentHashMap<String, Map<String, Float>>(); getRegionLocalityMappingFromFS(conf, desiredTable, threadPoolSize, null, regionDegreeLocalityMapping); return regionDegreeLocalityMapping; } /** * This function is to scan the root path of the file system to get either the * mapping between the region name and its best locality region server or the * degree of locality of each region on each of the servers having at least * one block of that region. The output map parameters are both optional. * * @param conf * the configuration to use * @param desiredTable * the table you wish to scan locality for * @param threadPoolSize * the thread pool size to use * @param regionToBestLocalityRSMapping * the map into which to put the best locality mapping or null * @param regionDegreeLocalityMapping * the map into which to put the locality degree mapping or null, * must be a thread-safe implementation * @throws IOException * in case of file system errors or interrupts */ private static void getRegionLocalityMappingFromFS(final Configuration conf, final String desiredTable, int threadPoolSize, Map<String, String> regionToBestLocalityRSMapping, Map<String, Map<String, Float>> regionDegreeLocalityMapping) throws IOException { FileSystem fs = FileSystem.get(conf); Path rootPath = FSUtils.getRootDir(conf); long startTime = EnvironmentEdgeManager.currentTimeMillis(); Path queryPath; // The table files are in ${hbase.rootdir}/data/<namespace>/<table>/* if (null == desiredTable) { queryPath = new Path(new Path(rootPath, HConstants.BASE_NAMESPACE_DIR).toString() + "/*/*/*/"); } else { queryPath = new Path(FSUtils.getTableDir(rootPath, TableName.valueOf(desiredTable)).toString() + "/*/"); } // reject all paths that are not appropriate PathFilter pathFilter = new PathFilter() { @Override public boolean accept(Path path) { // this is the region name; it may get some noise data if (null == path) { return false; } // no parent? Path parent = path.getParent(); if (null == parent) { return false; } String regionName = path.getName(); if (null == regionName) { return false; } if (!regionName.toLowerCase().matches("[0-9a-f]+")) { return false; } return true; } }; FileStatus[] statusList = fs.globStatus(queryPath, pathFilter); if (null == statusList) { return; } else { LOG.debug("Query Path: " + queryPath + " ; # list of files: " + statusList.length); } // lower the number of threads in case we have very few expected regions threadPoolSize = Math.min(threadPoolSize, statusList.length); // run in multiple threads ThreadPoolExecutor tpe = new ThreadPoolExecutor(threadPoolSize, threadPoolSize, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(statusList.length)); try { // ignore all file status items that are not of interest for (FileStatus regionStatus : statusList) { if (null == regionStatus) { continue; } if (!regionStatus.isDirectory()) { continue; } Path regionPath = regionStatus.getPath(); if (null == regionPath) { continue; } tpe.execute(new FSRegionScanner(fs, regionPath, regionToBestLocalityRSMapping, regionDegreeLocalityMapping)); } } finally { tpe.shutdown(); int threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 60 * 1000); try { // here we wait until TPE terminates, which is either naturally or by // exceptions in the execution of the threads while (!tpe.awaitTermination(threadWakeFrequency, TimeUnit.MILLISECONDS)) { // printing out rough estimate, so as to not introduce // AtomicInteger LOG.info("Locality checking is underway: { Scanned Regions : " + tpe.getCompletedTaskCount() + "/" + tpe.getTaskCount() + " }"); } } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } long overhead = EnvironmentEdgeManager.currentTimeMillis() - startTime; String overheadMsg = "Scan DFS for locality info takes " + overhead + " ms"; LOG.info(overheadMsg); } /** * Do our short circuit read setup. * Checks buffer size to use and whether to do checksumming in hbase or hdfs. * @param conf */ public static void setupShortCircuitRead(final Configuration conf) { // Check that the user has not set the "dfs.client.read.shortcircuit.skip.checksum" property. boolean shortCircuitSkipChecksum = conf.getBoolean("dfs.client.read.shortcircuit.skip.checksum", false); boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true); if (shortCircuitSkipChecksum) { LOG.warn("Configuration \"dfs.client.read.shortcircuit.skip.checksum\" should not " + "be set to true." + (useHBaseChecksum ? " HBase checksum doesn't require " + "it, see https://issues.apache.org/jira/browse/HBASE-6868." : "")); assert !shortCircuitSkipChecksum; //this will fail if assertions are on } checkShortCircuitReadBufferSize(conf); } /** * Check if short circuit read buffer size is set and if not, set it to hbase value. * @param conf */ public static void checkShortCircuitReadBufferSize(final Configuration conf) { final int defaultSize = HConstants.DEFAULT_BLOCKSIZE * 2; final int notSet = -1; // DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY is only defined in h2 final String dfsKey = "dfs.client.read.shortcircuit.buffer.size"; int size = conf.getInt(dfsKey, notSet); // If a size is set, return -- we will use it. if (size != notSet) return; // But short circuit buffer size is normally not set. Put in place the hbase wanted size. int hbaseSize = conf.getInt("hbase." + dfsKey, defaultSize); conf.setIfUnset(dfsKey, Integer.toString(hbaseSize)); } }