Java tutorial
/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.client.minicluster; import java.lang.reflect.Method; import eu.stratosphere.nephele.instance.HardwareDescriptionFactory; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import eu.stratosphere.api.common.io.FileInputFormat; import eu.stratosphere.api.common.io.FileOutputFormat; import eu.stratosphere.configuration.ConfigConstants; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.configuration.GlobalConfiguration; import eu.stratosphere.nephele.client.JobClient; import eu.stratosphere.nephele.jobgraph.JobGraph; import eu.stratosphere.nephele.jobmanager.JobManager; import eu.stratosphere.nephele.jobmanager.JobManager.ExecutionMode; public class NepheleMiniCluster { private static final Log LOG = LogFactory.getLog(NepheleMiniCluster.class); private static final int DEFAULT_JM_RPC_PORT = 6498; private static final int DEFAULT_TM_RPC_PORT = 6501; private static final int DEFAULT_TM_DATA_PORT = 7501; private static final long DEFAULT_MEMORY_SIZE = -1; private static final int DEFAULT_NUM_TASK_MANAGER = 1; private static final boolean DEFAULT_LAZY_MEMORY_ALLOCATION = true; // -------------------------------------------------------------------------------------------- private final Object startStopLock = new Object(); private int jobManagerRpcPort = DEFAULT_JM_RPC_PORT; private int taskManagerRpcPort = DEFAULT_TM_RPC_PORT; private int taskManagerDataPort = DEFAULT_TM_DATA_PORT; private int numTaskManager = DEFAULT_NUM_TASK_MANAGER; private long memorySize = DEFAULT_MEMORY_SIZE; private String configDir; private String hdfsConfigFile; private boolean lazyMemoryAllocation = DEFAULT_LAZY_MEMORY_ALLOCATION; private boolean defaultOverwriteFiles = false; private boolean defaultAlwaysCreateDirectory = false; private JobManager jobManager; // ------------------------------------------------------------------------ // Constructor and feature / properties setup // ------------------------------------------------------------------------ public int getJobManagerRpcPort() { return jobManagerRpcPort; } public void setJobManagerRpcPort(int jobManagerRpcPort) { this.jobManagerRpcPort = jobManagerRpcPort; } public int getTaskManagerRpcPort() { return taskManagerRpcPort; } public void setTaskManagerRpcPort(int taskManagerRpcPort) { this.taskManagerRpcPort = taskManagerRpcPort; } public int getTaskManagerDataPort() { return taskManagerDataPort; } public void setTaskManagerDataPort(int taskManagerDataPort) { this.taskManagerDataPort = taskManagerDataPort; } public long getMemorySize() { return memorySize; } public void setMemorySize(long memorySize) { this.memorySize = memorySize; } public String getConfigDir() { return configDir; } public void setConfigDir(String configDir) { this.configDir = configDir; } public String getHdfsConfigFile() { return hdfsConfigFile; } public void setHdfsConfigFile(String hdfsConfigFile) { this.hdfsConfigFile = hdfsConfigFile; } public boolean isLazyMemoryAllocation() { return lazyMemoryAllocation; } public void setLazyMemoryAllocation(boolean lazyMemoryAllocation) { this.lazyMemoryAllocation = lazyMemoryAllocation; } public boolean isDefaultOverwriteFiles() { return defaultOverwriteFiles; } public void setDefaultOverwriteFiles(boolean defaultOverwriteFiles) { this.defaultOverwriteFiles = defaultOverwriteFiles; } public boolean isDefaultAlwaysCreateDirectory() { return defaultAlwaysCreateDirectory; } public void setDefaultAlwaysCreateDirectory(boolean defaultAlwaysCreateDirectory) { this.defaultAlwaysCreateDirectory = defaultAlwaysCreateDirectory; } public void setNumTaskManager(int numTaskManager) { this.numTaskManager = numTaskManager; } public int getNumTaskManager() { return numTaskManager; } // ------------------------------------------------------------------------ // Life cycle and Job Submission // ------------------------------------------------------------------------ public JobClient getJobClient(JobGraph jobGraph) throws Exception { Configuration configuration = jobGraph.getJobConfiguration(); configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost"); configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort); return new JobClient(jobGraph, configuration); } public void start() throws Exception { synchronized (startStopLock) { // set up the global configuration if (this.configDir != null) { GlobalConfiguration.loadConfiguration(configDir); } else { Configuration conf = getMiniclusterDefaultConfig(jobManagerRpcPort, taskManagerRpcPort, taskManagerDataPort, memorySize, hdfsConfigFile, lazyMemoryAllocation, defaultOverwriteFiles, defaultAlwaysCreateDirectory, numTaskManager); GlobalConfiguration.includeConfiguration(conf); } // force the input/output format classes to load the default values from the configuration. // we need to do this here, because the format classes may have been initialized before the mini cluster was started initializeIOFormatClasses(); // before we start the JobManager, we need to make sure that there are no lingering IPC threads from before // check that all threads are done before we return Thread[] allThreads = new Thread[Thread.activeCount()]; int numThreads = Thread.enumerate(allThreads); for (int i = 0; i < numThreads; i++) { Thread t = allThreads[i]; String name = t.getName(); if (name.startsWith("IPC")) { t.join(); } } // start the job manager jobManager = new JobManager(ExecutionMode.LOCAL); waitForJobManagerToBecomeReady(numTaskManager); } } public void stop() throws Exception { synchronized (this.startStopLock) { if (jobManager != null) { jobManager.shutdown(); jobManager = null; } } } // ------------------------------------------------------------------------ // Network utility methods // ------------------------------------------------------------------------ private void waitForJobManagerToBecomeReady(int numTaskManagers) throws InterruptedException { while (jobManager.getNumberOfTaskTrackers() < numTaskManagers) { Thread.sleep(50); } } private static void initializeIOFormatClasses() { try { Method im = FileInputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration"); im.setAccessible(true); im.invoke(null); Method om = FileOutputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration"); om.setAccessible(true); om.invoke(null); } catch (Exception e) { LOG.error( "Cannot (re) initialize the globally loaded defaults. Some classes might mot follow the specified default behavior."); } } public static Configuration getMiniclusterDefaultConfig(int jobManagerRpcPort, int taskManagerRpcPort, int taskManagerDataPort, long memorySize, String hdfsConfigFile, boolean lazyMemory, boolean defaultOverwriteFiles, boolean defaultAlwaysCreateDirectory, int numTaskManager) { final Configuration config = new Configuration(); // addresses and ports config.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost"); config.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort); config.setInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, taskManagerRpcPort); config.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, taskManagerDataPort); // with the low dop, we can use few RPC handlers config.setInteger(ConfigConstants.JOB_MANAGER_IPC_HANDLERS_KEY, 2); config.setBoolean(ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY, lazyMemory); // polling interval config.setInteger(ConfigConstants.JOBCLIENT_POLLING_INTERVAL_KEY, 2); // hdfs if (hdfsConfigFile != null) { config.setString(ConfigConstants.HDFS_DEFAULT_CONFIG, hdfsConfigFile); } // file system behavior config.setBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY, defaultOverwriteFiles); config.setBoolean(ConfigConstants.FILESYSTEM_OUTPUT_ALWAYS_CREATE_DIRECTORY_KEY, defaultAlwaysCreateDirectory); if (memorySize < 0) { memorySize = HardwareDescriptionFactory.extractFromSystem().getSizeOfFreeMemory(); // at this time, we need to scale down the memory, because we cannot dedicate all free memory to the // memory manager. we have to account for the buffer pools as well, and the job manager#s data structures long bufferMem = GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS) * GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY, ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE); memorySize = (long) (0.8 * (memorySize - bufferMem)); //convert from bytes to mega bytes memorySize >>>= 20; } memorySize /= numTaskManager; config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, memorySize / numTaskManager); config.setInteger(ConfigConstants.LOCAL_INSTANCE_MANAGER_NUMBER_TASK_MANAGER, numTaskManager); return config; } }