org.apache.flink.client.minicluster.NepheleMiniCluster.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flink.client.minicluster.NepheleMiniCluster.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.client.minicluster;

import java.lang.reflect.Method;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.flink.api.common.io.FileInputFormat;
import org.apache.flink.api.common.io.FileOutputFormat;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.GlobalConfiguration;
import org.apache.flink.runtime.ExecutionMode;
import org.apache.flink.runtime.client.JobClient;
import org.apache.flink.runtime.instance.HardwareDescriptionFactory;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobmanager.JobManager;

public class NepheleMiniCluster {

    private static final Log LOG = LogFactory.getLog(NepheleMiniCluster.class);

    private static final int DEFAULT_JM_RPC_PORT = 6498;

    private static final int DEFAULT_TM_RPC_PORT = 6501;

    private static final int DEFAULT_TM_DATA_PORT = 7501;

    private static final long DEFAULT_MEMORY_SIZE = -1;

    private static final int DEFAULT_NUM_TASK_MANAGER = 1;

    private static final boolean DEFAULT_LAZY_MEMORY_ALLOCATION = true;

    private static final int DEFAULT_TASK_MANAGER_NUM_SLOTS = -1;

    // --------------------------------------------------------------------------------------------

    private final Object startStopLock = new Object();

    private int jobManagerRpcPort = DEFAULT_JM_RPC_PORT;

    private int taskManagerRpcPort = DEFAULT_TM_RPC_PORT;

    private int taskManagerDataPort = DEFAULT_TM_DATA_PORT;

    private int numTaskTracker = DEFAULT_NUM_TASK_MANAGER;

    private int taskManagerNumSlots = DEFAULT_TASK_MANAGER_NUM_SLOTS;

    private long memorySize = DEFAULT_MEMORY_SIZE;

    private String configDir;

    private String hdfsConfigFile;

    private boolean lazyMemoryAllocation = DEFAULT_LAZY_MEMORY_ALLOCATION;

    private boolean defaultOverwriteFiles = false;

    private boolean defaultAlwaysCreateDirectory = false;

    private JobManager jobManager;

    // ------------------------------------------------------------------------
    //  Constructor and feature / properties setup
    // ------------------------------------------------------------------------

    public int getJobManagerRpcPort() {
        return jobManagerRpcPort;
    }

    public void setJobManagerRpcPort(int jobManagerRpcPort) {
        this.jobManagerRpcPort = jobManagerRpcPort;
    }

    public int getTaskManagerRpcPort() {
        return taskManagerRpcPort;
    }

    public void setTaskManagerRpcPort(int taskManagerRpcPort) {
        this.taskManagerRpcPort = taskManagerRpcPort;
    }

    public int getTaskManagerDataPort() {
        return taskManagerDataPort;
    }

    public void setTaskManagerDataPort(int taskManagerDataPort) {
        this.taskManagerDataPort = taskManagerDataPort;
    }

    public long getMemorySize() {
        return memorySize;
    }

    public void setMemorySize(long memorySize) {
        this.memorySize = memorySize;
    }

    public String getConfigDir() {
        return configDir;
    }

    public void setConfigDir(String configDir) {
        this.configDir = configDir;
    }

    public String getHdfsConfigFile() {
        return hdfsConfigFile;
    }

    public void setHdfsConfigFile(String hdfsConfigFile) {
        this.hdfsConfigFile = hdfsConfigFile;
    }

    public boolean isLazyMemoryAllocation() {
        return lazyMemoryAllocation;
    }

    public void setLazyMemoryAllocation(boolean lazyMemoryAllocation) {
        this.lazyMemoryAllocation = lazyMemoryAllocation;
    }

    public boolean isDefaultOverwriteFiles() {
        return defaultOverwriteFiles;
    }

    public void setDefaultOverwriteFiles(boolean defaultOverwriteFiles) {
        this.defaultOverwriteFiles = defaultOverwriteFiles;
    }

    public boolean isDefaultAlwaysCreateDirectory() {
        return defaultAlwaysCreateDirectory;
    }

    public void setDefaultAlwaysCreateDirectory(boolean defaultAlwaysCreateDirectory) {
        this.defaultAlwaysCreateDirectory = defaultAlwaysCreateDirectory;
    }

    public void setNumTaskTracker(int numTaskTracker) {
        this.numTaskTracker = numTaskTracker;
    }

    public int getNumTaskTracker() {
        return numTaskTracker;
    }

    public void setTaskManagerNumSlots(int taskManagerNumSlots) {
        this.taskManagerNumSlots = taskManagerNumSlots;
    }

    public int getTaskManagerNumSlots() {
        return taskManagerNumSlots;
    }

    // ------------------------------------------------------------------------
    // Life cycle and Job Submission
    // ------------------------------------------------------------------------

    public JobClient getJobClient(JobGraph jobGraph) throws Exception {
        Configuration configuration = jobGraph.getJobConfiguration();
        configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost");
        configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort);
        return new JobClient(jobGraph, configuration, getClass().getClassLoader());
    }

    public void start() throws Exception {

        String forkNumberString = System.getProperty("forkNumber");
        int forkNumber = -1;
        try {
            forkNumber = Integer.parseInt(forkNumberString);
        } catch (NumberFormatException e) {
            // running inside and IDE, so the forkNumber property is not properly set
            // just ignore
        }
        if (forkNumber != -1) {
            // we are running inside a surefire/failsafe test, determine forkNumber and set
            // ports accordingly so that we can have multiple parallel instances

            jobManagerRpcPort = 1024 + forkNumber * 300;
            taskManagerRpcPort = 1024 + forkNumber * 300 + 100;
            taskManagerDataPort = 1024 + forkNumber * 300 + 200;
        }

        synchronized (startStopLock) {
            // set up the global configuration
            if (this.configDir != null) {
                GlobalConfiguration.loadConfiguration(configDir);
            } else {
                Configuration conf = getMiniclusterDefaultConfig(jobManagerRpcPort, taskManagerRpcPort,
                        taskManagerDataPort, memorySize, hdfsConfigFile, lazyMemoryAllocation,
                        defaultOverwriteFiles, defaultAlwaysCreateDirectory, taskManagerNumSlots, numTaskTracker);
                GlobalConfiguration.includeConfiguration(conf);
            }

            // force the input/output format classes to load the default values from the configuration.
            // we need to do this here, because the format classes may have been initialized before the mini cluster was started
            initializeIOFormatClasses();

            // before we start the JobManager, we need to make sure that there are no lingering IPC threads from before
            // check that all threads are done before we return
            Thread[] allThreads = new Thread[Thread.activeCount()];
            int numThreads = Thread.enumerate(allThreads);

            for (int i = 0; i < numThreads; i++) {
                Thread t = allThreads[i];
                String name = t.getName();
                if (name.startsWith("IPC")) {
                    t.join();
                }
            }

            // start the job manager
            jobManager = new JobManager(ExecutionMode.LOCAL);

            waitForJobManagerToBecomeReady(numTaskTracker);
        }
    }

    public void stop() throws Exception {
        synchronized (this.startStopLock) {
            if (jobManager != null) {
                jobManager.shutdown();
                jobManager = null;
            }
        }
    }

    // ------------------------------------------------------------------------
    // Network utility methods
    // ------------------------------------------------------------------------

    private void waitForJobManagerToBecomeReady(int numTaskManagers) throws InterruptedException {
        while (jobManager.getNumberOfTaskManagers() < numTaskManagers) {
            Thread.sleep(50);
        }
    }

    private static void initializeIOFormatClasses() {
        try {
            Method im = FileInputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
            im.setAccessible(true);
            im.invoke(null);

            Method om = FileOutputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
            om.setAccessible(true);
            om.invoke(null);
        } catch (Exception e) {
            LOG.error(
                    "Cannot (re) initialize the globally loaded defaults. Some classes might mot follow the specified default behavior.");
        }
    }

    public static Configuration getMiniclusterDefaultConfig(int jobManagerRpcPort, int taskManagerRpcPort,
            int taskManagerDataPort, long memorySize, String hdfsConfigFile, boolean lazyMemory,
            boolean defaultOverwriteFiles, boolean defaultAlwaysCreateDirectory, int taskManagerNumSlots,
            int numTaskManager) {
        final Configuration config = new Configuration();

        // addresses and ports
        config.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost");
        config.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRpcPort);
        config.setInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, taskManagerRpcPort);
        config.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, taskManagerDataPort);

        // with the low dop, we can use few RPC handlers
        config.setInteger(ConfigConstants.JOB_MANAGER_IPC_HANDLERS_KEY, 2);

        config.setBoolean(ConfigConstants.TASK_MANAGER_MEMORY_LAZY_ALLOCATION_KEY, lazyMemory);

        // polling interval
        config.setInteger(ConfigConstants.JOBCLIENT_POLLING_INTERVAL_KEY, 2);

        // hdfs
        if (hdfsConfigFile != null) {
            config.setString(ConfigConstants.HDFS_DEFAULT_CONFIG, hdfsConfigFile);
        }

        // file system behavior
        config.setBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY, defaultOverwriteFiles);
        config.setBoolean(ConfigConstants.FILESYSTEM_OUTPUT_ALWAYS_CREATE_DIRECTORY_KEY,
                defaultAlwaysCreateDirectory);

        if (memorySize < 0) {
            memorySize = HardwareDescriptionFactory.extractFromSystem().getSizeOfFreeMemory();

            // at this time, we need to scale down the memory, because we cannot dedicate all free memory to the
            // memory manager. we have to account for the buffer pools as well, and the job manager#s data structures
            long bufferMem = GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY,
                    ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_NUM_BUFFERS)
                    * GlobalConfiguration.getLong(ConfigConstants.TASK_MANAGER_NETWORK_BUFFER_SIZE_KEY,
                            ConfigConstants.DEFAULT_TASK_MANAGER_NETWORK_BUFFER_SIZE);

            memorySize = memorySize - (bufferMem * numTaskManager);

            // apply the fraction that makes sure memory is left to the heap for other data structures and UDFs.
            memorySize = (long) (memorySize * ConfigConstants.DEFAULT_MEMORY_MANAGER_MEMORY_FRACTION);

            //convert from bytes to megabytes
            memorySize >>>= 20;
        }

        memorySize /= numTaskManager;

        config.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, memorySize);

        config.setInteger(ConfigConstants.LOCAL_INSTANCE_MANAGER_NUMBER_TASK_MANAGER, numTaskManager);

        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, taskManagerNumSlots);

        return config;
    }
}