Java tutorial
/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.moz.fiji.mapreduce.tools; import java.io.IOException; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hbase.HConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.moz.fiji.annotations.ApiAudience; import com.moz.fiji.common.flags.Flag; import com.moz.fiji.mapreduce.HFileLoader; import com.moz.fiji.schema.Fiji; import com.moz.fiji.schema.FijiTable; import com.moz.fiji.schema.FijiURI; import com.moz.fiji.schema.tools.BaseTool; import com.moz.fiji.schema.tools.FijiToolLauncher; import com.moz.fiji.schema.util.ResourceUtils; /** Bulk loads HFiles into a Fiji table. */ @ApiAudience.Private public final class FijiBulkLoad extends BaseTool { private static final Logger LOG = LoggerFactory.getLogger(FijiBulkLoad.class); @Flag(name = "hfile", usage = "Path of the directory containing HFile(s) to bulk-load. " + "Typically --hfile=hdfs://hdfs-cluster-address/path/to/hfile.dir/") private String mHFileFlag = null; @Flag(name = "table", usage = "URI of the Fiji table to bulk-load into.") private String mTableURIFlag = null; @Flag(name = "timeout-seconds", usage = "Timeout in seconds to wait for a bulk-load to " + "succeed. Default is 60 seconds") private final Long mTimeoutSeconds = 60L; @Flag(name = "chmod-background", usage = "When true, while bulk loading, periodically scan the " + "hfile directory recursively to add all read-write permissions to files created by " + "splits so that they will be usable by HBase. See " + "https://issues.apache.org/jira/browse/HBASE-6422.") private Boolean mChmodBackground = false; /** URI of the Fiji table to bulk-load into. */ private FijiURI mTableURI = null; /** Path of the HFile(s) to bulk-load. */ private Path mHFile = null; /** {@inheritDoc} */ @Override public String getName() { return "bulk-load"; } /** {@inheritDoc} */ @Override public String getDescription() { return "Bulk load HFiles into a table"; } /** {@inheritDoc} */ @Override public String getCategory() { return "Bulk"; } /** * Recursively grant additional read and write permissions to all. There is no * built-in way in the Hadoop Java API to recursively set permissions on a directory, * so we implement it here. * * @param path The Path to the directory to chmod. * @throws IOException on IOException. */ private void recursiveGrantAllReadWritePermissions(Path path) throws IOException { FileSystem hdfs = path.getFileSystem(getConf()); recursiveGrantAllReadWritePermissions(hdfs, hdfs.getFileStatus(path)); } /** * Helper method used by recursiveGrantAllReadWritePermissions to actually grant the * additional read and write permissions to all. It deals with FileStatus objects * since that is the object that supports listStatus. * * @param hdfs The FileSystem on which the file exists. * @param status The status of the file whose permissions are checked and on whose children * this method is called recursively. * @throws IOException on IOException. */ private void recursiveGrantAllReadWritePermissions(FileSystem hdfs, FileStatus status) throws IOException { final FsPermission currentPermissions = status.getPermission(); if (!currentPermissions.getOtherAction().implies(FsAction.READ_WRITE)) { LOG.info("Adding a+rw to permissions for {}: {}", status.getPath(), currentPermissions); hdfs.setPermission(status.getPath(), new FsPermission(currentPermissions.getUserAction(), currentPermissions.getGroupAction().or(FsAction.READ_WRITE), currentPermissions.getOtherAction().or(FsAction.READ_WRITE))); } // Recurse into any files and directories in the path. // We must use listStatus because listFiles does not list subdirectories. FileStatus[] subStatuses = hdfs.listStatus(status.getPath()); for (FileStatus subStatus : subStatuses) { if (!subStatus.equals(status)) { recursiveGrantAllReadWritePermissions(hdfs, subStatus); } } } /** {@inheritDoc} */ @Override protected void validateFlags() throws Exception { super.validateFlags(); Preconditions.checkArgument((mTableURIFlag != null) && !mTableURIFlag.isEmpty(), "Specify the table to bulk-load into with " + "--table=fiji://hbase-address/fiji-instance/table"); mTableURI = FijiURI.newBuilder(mTableURIFlag).build(); Preconditions.checkArgument(mTableURI.getTable() != null, "Specify the table to bulk-load into with " + "--table=fiji://hbase-address/fiji-instance/table"); Preconditions.checkArgument((mHFileFlag != null) && !mHFileFlag.isEmpty(), "Specify the HFiles to bulk-load. " + "E.g. --hfile=hdfs://hdfs-cluster-address/path/to/hfile.dir/"); mHFile = new Path(mHFileFlag); } // For some reason checkstyle complains if we include an explicit @inheritDoc annotation here. @Override protected int run(List<String> nonFlagArgs) throws Exception { final Fiji fiji = Fiji.Factory.open(mTableURI, getConf()); try { final FijiTable table = fiji.openTable(mTableURI.getTable()); try { // Load the HFiles. // // TODO: Consolidate this logic in a single central place: We must consolidate the // logic to properly initialize a Configuration object to target a specific HBase // cluster (hence the manual override of the ZooKeeper quorum/client-port). // // The reason for this manual override here is : FijiBulkLoad needs a // Configuration to create an HBaseLoader for the HBase instance targeted at from // the table URI. FijiTable does not expose its internal Configuration and // Fiji.getConf() is deprecated, so we have to construct one externally. final Configuration conf = getConf(); conf.set(HConstants.ZOOKEEPER_QUORUM, Joiner.on(",").join(mTableURI.getZookeeperQuorumOrdered())); conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, mTableURI.getZookeeperClientPort()); final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(2); Future<?> chmodTask = null; try { // We have to continually run chmod a+rw so that files newly created by splits will // be usable by hbase. See https://issues.apache.org/jira/browse/HBASE-6422. // HBase proposes to solve this with the Secure Bulk Loader, but, for now, this // band-aid works. if (mChmodBackground) { final Runnable chmodRunnable = new Runnable() { static final int MAX_CONSECUTIVE_ERRORS = 5; private int mNumConsecutiveErrors = 0; /** {@inheritDoc} */ @Override public void run() { try { recursiveGrantAllReadWritePermissions(mHFile); mNumConsecutiveErrors = 0; } catch (IOException ex) { LOG.warn("recursiveGrantAllReadWritePermissions raised exception: {}", ex); mNumConsecutiveErrors += 1; if (mNumConsecutiveErrors >= MAX_CONSECUTIVE_ERRORS) { throw new RuntimeException("too many IOExceptions", ex); } } } }; chmodTask = executorService.scheduleAtFixedRate(chmodRunnable, 0, 1, TimeUnit.SECONDS); } // NOTE: HFileLoader never uses conf. final HFileLoader hFileLoader = HFileLoader.create(conf); // launch the load on a separate thread and wait to cancel if timeout is exceeded final Callable<Void> hFileLoadCallable = new Callable<Void>() { public Void call() throws Exception { hFileLoader.load(mHFile, table); return null; } }; final Future<Void> hFileLoadTask = executorService.submit(hFileLoadCallable); try { hFileLoadTask.get(mTimeoutSeconds, TimeUnit.SECONDS); } catch (TimeoutException ex) { getPrintStream() .println("Bulk-load failed due to a timeout after " + mTimeoutSeconds + "s."); hFileLoadTask.cancel(true); return FAILURE; } catch (ExecutionException executionException) { // try to unpack the exception that bulk load raised Exception cause = null; try { cause = (Exception) executionException.getCause(); if (cause == null) { // There was no cause? Fall back to the original ExecutionException. cause = executionException; } } catch (ClassCastException castException) { // Cause wasn't an exception? Fall back to the original ExecutionException. cause = executionException; } throw cause; } } finally { if (chmodTask != null) { chmodTask.cancel(false); } executorService.shutdown(); } return SUCCESS; } finally { ResourceUtils.releaseOrLog(table); } } finally { ResourceUtils.releaseOrLog(fiji); } } /** * Program entry point. * * @param args The command-line arguments. * @throws Exception If there is an error. */ public static void main(String[] args) throws Exception { System.exit(new FijiToolLauncher().run(new FijiBulkLoad(), args)); } }