eu.stratosphere.client.minicluster.NepheleMiniCluster.java Source code

Java tutorial

Introduction

Here is the source code for eu.stratosphere.client.minicluster.NepheleMiniCluster.java

Source

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.client.minicluster;

import java.lang.reflect.Method;

import eu.stratosphere.nephele.instance.HardwareDescriptionFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.api.common.io.FileInputFormat;
import eu.stratosphere.api.common.io.FileOutputFormat;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.nephele.client.JobClient;
import eu.stratosphere.nephele.jobgraph.JobGraph;
import eu.stratosphere.nephele.jobmanager.JobManager;
import eu.stratosphere.nephele.jobmanager.JobManager.ExecutionMode;

public class NepheleMiniCluster {

    private static final Log LOG = LogFactory.getLog(NepheleMiniCluster.class);

    private static final int DEFAULT_JM_RPC_PORT = 6498;

    private static final int DEFAULT_TM_RPC_PORT = 6501;

    private static final int DEFAULT_TM_DATA_PORT = 7501;

    private static final long DEFAULT_MEMORY_SIZE = -1;

    private static final int DEFAULT_NUM_TASK_MANAGER = 1;

    private static final boolean DEFAULT_LAZY_MEMORY_ALLOCATION = true;

    // --------------------------------------------------------------------------------------------

    private final Object startStopLock = new Object();

    private int jobManagerRpcPort = DEFAULT_JM_RPC_PORT;

    private int taskManagerRpcPort = DEFAULT_TM_RPC_PORT;

    private int taskManagerDataPort = DEFAULT_TM_DATA_PORT;

    private int numTaskManager = DEFAULT_NUM_TASK_MANAGER;

    private long memorySize = DEFAULT_MEMORY_SIZE;

    private String configDir;

    private String hdfsConfigFile;

    private boolean lazyMemoryAllocation = DEFAULT_LAZY_MEMORY_ALLOCATION;

    private boolean defaultOverwriteFiles = false;

    private boolean defaultAlwaysCreateDirectory = false;

    private JobManager jobManager;

    // ------------------------------------------------------------------------
    //  Constructor and feature / properties setup
    // ------------------------------------------------------------------------

    public int getJobManagerRpcPort() {
        return jobManagerRpcPort;
    }

    public void setJobManagerRpcPort(int jobManagerRpcPort) {
        this.jobManagerRpcPort = jobManagerRpcPort;
    }

    public int getTaskManagerRpcPort() {
        return taskManagerRpcPort;
    }

    public void setTaskManagerRpcPort(int taskManagerRpcPort) {
        this.taskManagerRpcPort = taskManagerRpcPort;
    }

    public int getTaskManagerDataPort() {
        return taskManagerDataPort;
    }

    public void setTaskManagerDataPort(int taskManagerDataPort) {
        this.taskManagerDataPort = taskManagerDataPort;
    }

    public long getMemorySize() {
        return memorySize;
    }

    public void setMemorySize(long memorySize) {
        this.memorySize = memorySize;
    }

    public String getConfigDir() {
        return configDir;
    }

    public void setConfigDir(String configDir) {
        this.configDir = configDir;
    }

    public String getHdfsConfigFile() {
        return hdfsConfigFile;
    }

    public void setHdfsConfigFile(String hdfsConfigFile) {
        this.hdfsConfigFile = hdfsConfigFile;
    }

    public boolean isLazyMemoryAllocation() {
        return lazyMemoryAllocation;
    }

    public void setLazyMemoryAllocation(boolean lazyMemoryAllocation) {
        this.lazyMemoryAllocation = lazyMemoryAllocation;
    }

    public boolean isDefaultOverwriteFiles() {
        return defaultOverwriteFiles;
    }

    public void setDefaultOverwriteFiles(boolean defaultOverwriteFiles) {
        this.defaultOverwriteFiles = defaultOverwriteFiles;
    }

    public boolean isDefaultAlwaysCreateDirectory() {
        return defaultAlwaysCreateDirectory;
    }

    public void setDefaultAlwaysCreateDirectory(boolean defaultAlwaysCreateDirectory) {
        this.defaultAlwaysCreateDirectory = defaultAlwaysCreateDirectory;
    }

    public void setNumTaskManager(int numTaskManager) {
        this.numTaskManager = numTaskManager;
    }

    public int getNumTaskManager() {
        return numTaskManager;
    }

    // ------------------------------------------------------------------------
    // Life cycle and Job Submission
    // ------------------------------------------------------------------------

    public JobClient getJobClient(JobGraph jobGraph) throws Exception {
        Configuration configuration = jobGraph.getJobConfiguration();
        configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost");
        configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort);
        return new JobClient(jobGraph, configuration);
    }

    public void start() throws Exception {
        synchronized (startStopLock) {
            // set up the global configuration
            if (this.configDir != null) {
                GlobalConfiguration.loadConfiguration(configDir);
            } else {
                Configuration conf = getMiniclusterDefaultConfig(jobManagerRpcPort, taskManagerRpcPort,
                        taskManagerDataPort, memorySize, hdfsConfigFile, lazyMemoryAllocation,
                        defaultOverwriteFiles, defaultAlwaysCreateDirectory, numTaskManager);
                GlobalConfiguration.includeConfiguration(conf);
            }

            // force the input/output format classes to load the default values from the configuration.
            // we need to do this here, because the format classes may have been initialized before the mini cluster was started
            initializeIOFormatClasses();

            // before we start the JobManager, we need to make sure that there are no lingering IPC threads from before
            // check that all threads are done before we return
            Thread[] allThreads = new Thread[Thread.activeCount()];
            int numThreads = Thread.enumerate(allThreads);

            for (int i = 0; i < numThreads; i++) {
                Thread t = allThreads[i];
                String name = t.getName();
                if (name.startsWith("IPC")) {
                    t.join();
                }
            }

            // start the job manager
            jobManager = new JobManager(ExecutionMode.LOCAL);

            waitForJobManagerToBecomeReady(numTaskManager);
        }
    }

    public void stop() throws Exception {
        synchronized (this.startStopLock) {
            if (jobManager != null) {
                jobManager.shutdown();
                jobManager = null;
            }
        }
    }

    // ------------------------------------------------------------------------
    // Network utility methods
    // ------------------------------------------------------------------------

    private void waitForJobManagerToBecomeReady(int numTaskManagers) throws InterruptedException {
        while (jobManager.getNumberOfTaskTrackers() < numTaskManagers) {
            Thread.sleep(50);
        }
    }

    private static void initializeIOFormatClasses() {
        try {
            Method im = FileInputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
            im.setAccessible(true);
            im.invoke(null);

            Method om = FileOutputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
            om.setAccessible(true);
            om.invoke(null);
        } catch (Exception e) {
            LOG.error(
                    "Cannot (re) initialize the globally loaded defaults. Some classes might mot follow the specified default behavior.");
        }
    }

    public static Configuration getMiniclusterDefaultConfig(int jobManagerRpcPort, int taskManagerRpcPort,
            int taskManagerDataPort, long memorySize, String hdfsConfigFile, boolean lazyMemory,
            boolean defaultOverwriteFiles, boolean defaultAlwaysCreateDirectory, int numTaskManager) {
        final Configuration config = new Configuration();

        // addresses and ports
        config.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost");
        config.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort);
        config.setInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, taskManagerRpcPort);
        config.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, taskManagerDataPort);

        // with the low dop, we can use few RPC handlers
        config.setInteger(ConfigConstants.JOB_MANAGER_IPC_HANDLERS_KEY, 2);

        config.setBoolean(ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY, lazyMemory);

        // polling interval
        config.setInteger(ConfigConstants.JOBCLIENT_POLLING_INTERVAL_KEY, 2);

        // hdfs
        if (hdfsConfigFile != null) {
            config.setString(ConfigConstants.HDFS_DEFAULT_CONFIG, hdfsConfigFile);
        }

        // file system behavior
        config.setBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY, defaultOverwriteFiles);
        config.setBoolean(ConfigConstants.FILESYSTEM_OUTPUT_ALWAYS_CREATE_DIRECTORY_KEY,
                defaultAlwaysCreateDirectory);

        if (memorySize < 0) {
            memorySize = HardwareDescriptionFactory.extractFromSystem().getSizeOfFreeMemory();

            // at this time, we need to scale down the memory, because we cannot dedicate all free memory to the
            // memory manager. we have to account for the buffer pools as well, and the job manager#s data structures
            long bufferMem = GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
                    ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS)
                    * GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY,
                            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE);

            memorySize = (long) (0.8 * (memorySize - bufferMem));

            //convert from bytes to mega bytes
            memorySize >>>= 20;
        }

        memorySize /= numTaskManager;

        config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, memorySize / numTaskManager);

        config.setInteger(ConfigConstants.LOCAL_INSTANCE_MANAGER_NUMBER_TASK_MANAGER, numTaskManager);

        return config;
    }
}