Java tutorial
/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.bento; import java.io.File; import java.io.IOException; import java.lang.reflect.Field; import java.security.AccessController; import java.security.PrivilegedAction; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * <p>This class provides mini hadoop and hbase clusters that can be executed in a process on a * local machine. </p> * * <p>The actual functionality is mostly provided by extending {@link HBaseTestingUtility}. * Some changes are made to the testing utility to ensure cluster data is persistent between * runs of the cluster.</p> */ class BentoHBaseTestingUtility extends HBaseTestingUtility { private static final Logger LOG = LoggerFactory.getLogger(BentoHBaseTestingUtility.class); /** A directory containing the storage for the mini cluster. */ private File mDataTestDir; /** File for the directory containing cluster data, including dfs data. */ private File mClusterTestDir; /** * Constructs a new BentoHBaseTestingUtility and sets the location for its * data. * * @param conf The configuration to use for the started clusters. * @param dataTestDir The directory used for cluster storage. * @param clusterTestDir The directory used for cluster data, including dfs data. * @throws Exception If an error occurred while trying to set the data location. */ public BentoHBaseTestingUtility(Configuration conf, File dataTestDir, File clusterTestDir) throws Exception { super(conf); mDataTestDir = dataTestDir; mClusterTestDir = clusterTestDir; // We need to set these private fields to ensure that dfs data will be preserved // across runs. Unfortunately this can't be done through subclassing, because all // methods related to the initialization of these fields are private. setField(HBaseTestingUtility.class, this, "dataTestDir", mDataTestDir); setField(HBaseTestingUtility.class, this, "clusterTestDir", mClusterTestDir); } /** * Stops mini hbase, zk, and hdfs clusters. We override this so we don't erase the * dfs contents. * * @throws Exception If a component of the minicluster failed to shutdown. * @see {@link #startMiniCluster(int)} */ @Override public void shutdownMiniCluster() throws Exception { LOG.info("Shutting down bento cluster."); shutdownMiniHBaseCluster(); LOG.info("bento cluster is down."); } /** * Start a mini dfs cluster. We override this method in our child class so we can * disable formatting the filesystem between runs and so we can pass configuration options for * the namenode port and namenode ui address. * * @param servers How many DNs to start. * @param hosts hostnames DNs to run on. * @throws Exception If an error occurs when starting up the cluster. * @see {@link #shutdownMiniDFSCluster()} * @return The mini dfs cluster created. */ @Override public MiniDFSCluster startMiniDFSCluster(int servers, final String[] hosts) throws Exception { // Check that there is not already a cluster running isRunningCluster(); // We have to set this property as it is used by MiniCluster System.setProperty("test.build.data", mClusterTestDir.toString()); // Some tests also do this: // System.getProperty("test.cache.data", "build/test/cache"); // It's also deprecated System.setProperty("test.cache.data", mClusterTestDir.toString()); // Use configuration provided values for the namenode port and namenode ui port, or use // accepted defaults. Configuration conf = getConfiguration(); int nameNodePort = FileSystem.get(conf).getUri().getPort(); int nameNodeUiPort = getPortFromConfiguration("dfs.http.address", 50070); MiniDFSCluster dfsCluster = null; MiniDFSCluster.Builder options = new MiniDFSCluster.Builder(conf).nameNodePort(nameNodePort) .nameNodeHttpPort(nameNodeUiPort).numDataNodes(servers).manageNameDfsDirs(true) .manageDataDfsDirs(true).hosts(hosts); // Ok, now we can start. First try it without reformatting. try { LOG.debug("Attempting to use existing cluster storage."); dfsCluster = options.format(false).build(); } catch (InconsistentFSStateException e) { LOG.debug("Couldn't use existing storage. Attempting to format and try again."); dfsCluster = options.format(true).build(); } // Set this just-started cluster as our filesystem. FileSystem fs = dfsCluster.getFileSystem(); conf.set("fs.defaultFS", fs.getUri().toString()); // Do old style too just to be safe. conf.set("fs.default.name", fs.getUri().toString()); // Wait for the cluster to be totally up dfsCluster.waitClusterUp(); // Save the dfsCluster in the private field of the parent class. setField(HBaseTestingUtility.class, this, "dfsCluster", dfsCluster); return dfsCluster; } /** * Starts a <code>MiniMRCluster</code>. We override this method so we can pass configuration * options for the jobtracker port. * * @param servers The number of <code>TaskTracker</code>'s to start. * @throws IOException When starting the cluster fails. */ @Override public void startMiniMapReduceCluster(final int servers) throws IOException { LOG.info("Starting mini mapreduce cluster..."); // These are needed for the new and improved Map/Reduce framework Configuration conf = getConfiguration(); String logDir = conf.get("hadoop.log.dir"); String tmpDir = conf.get("hadoop.tmp.dir"); if (logDir == null) { logDir = tmpDir; } System.setProperty("hadoop.log.dir", logDir); // Use the bento extension of MiniMRCluster that avoids overwriting some parameters we pass // in the configuration. MiniMRCluster mrCluster = new BentoMiniMRCluster(servers, FileSystem.get(conf).getUri().toString(), 1, conf); LOG.info("Mini mapreduce cluster started"); // Save the mrCluster private field of the parent class. setField(HBaseTestingUtility.class, this, "mrCluster", mrCluster); } /** * Uses reflection to set a field in an instance of a class. Any exceptions thrown during this * process are converted to {@link RuntimeException}. * * @param clazz The class of the instance whose field will be set. * @param instance The class instance. * @param fieldName The name of the field to set. * @param value The value for the field. * @param <T> The type of the class whose field will be set. */ private <T> void setField(Class<T> clazz, Object instance, String fieldName, Object value) { try { final Field field = clazz.getDeclaredField(fieldName); AccessController.doPrivileged(new PrivilegedAction<Object>() { @Override public Object run() { field.setAccessible(true); return null; } }); field.set(instance, value); } catch (Exception e) { throw new RuntimeException( "There was a problem using reflection while configuring the " + "mini cluster.", e); } } /** * Reads a URL from a key in a configuration, obtains the port from the URL, * and returns the result. Any exceptions that occur while parsing the URL are converted to * runtime exceptions. * * @param key The configuration key holding the URL. * @param defaultPort A default URL to use if the configuration is missing the specified key. * @return The port. */ private int getPortFromConfiguration(String key, int defaultPort) { return getConfiguration().getSocketAddr(key, "localhost", defaultPort).getPort(); } /** * An extension of {@link MiniMRCluster} that avoids overwriting the value of the job tracker * info port in the configuration used to start the job tracker. */ class BentoMiniMRCluster extends MiniMRCluster { /** * Constructs a new instance. * * @param numServers The number of task trackers to start. * @param namenode URI to the namenode. * @param numDir num dir * @param conf The configuration to use when starting the cluster. * @throws IOException If there is a problem starting the cluster. */ public BentoMiniMRCluster(int numServers, String namenode, int numDir, Configuration conf) throws IOException { // Despite passing "0" for the job tracker and task tracker ports here, their values // will still be taken from the configuration, due to our changes to createJobConf. // If either of these are not set in the configuration, they will start on a random port. super(0, 0, numServers, namenode, numDir, null, null, null, new JobConf(conf)); } /** * Creates a job conf for use with the started clusters. This implementation simply returns * the passed conf. * * @param conf The configuration to use when starting the clusters. * @return The same configuration. */ @Override public JobConf createJobConf(JobConf conf) { return conf; } } }