cascading.platform.hadoop2.Hadoop2MR1Platform.java Source code

Java tutorial

Introduction

Here is the source code for cascading.platform.hadoop2.Hadoop2MR1Platform.java

Source

/*
 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cascading.platform.hadoop2;

import java.io.File;
import java.io.IOException;
import java.util.Map;

import cascading.flow.FlowConnector;
import cascading.flow.FlowProcess;
import cascading.flow.FlowProps;
import cascading.flow.FlowSession;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector;
import cascading.flow.hadoop2.Hadoop2MR1Planner;
import cascading.platform.hadoop.BaseHadoopPlatform;
import cascading.util.Util;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRClientCluster;
import org.apache.hadoop.mapred.MiniMRClientClusterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Class Hadoop2Platform is automatically loaded and injected into a {@link cascading.PlatformTestCase} instance
 * so that all *PlatformTest classes can be tested against Apache Hadoop 2.x.
 */
public class Hadoop2MR1Platform extends BaseHadoopPlatform<JobConf> {
    private static final Logger LOG = LoggerFactory.getLogger(Hadoop2MR1Platform.class);
    private transient static MiniDFSCluster dfs;
    private transient static MiniMRClientCluster mr;

    public Hadoop2MR1Platform() {
    }

    @Override
    public String getName() {
        return "hadoop2-mr1";
    }

    @Override
    public FlowConnector getFlowConnector(Map<Object, Object> properties) {
        return new Hadoop2MR1FlowConnector(properties);
    }

    @Override
    public void setNumMapTasks(Map<Object, Object> properties, int numMapTasks) {
        properties.put("mapreduce.job.maps", Integer.toString(numMapTasks));
    }

    @Override
    public void setNumReduceTasks(Map<Object, Object> properties, int numReduceTasks) {
        properties.put("mapreduce.job.reduces", Integer.toString(numReduceTasks));
    }

    @Override
    public Integer getNumMapTasks(Map<Object, Object> properties) {
        if (properties.get("mapreduce.job.maps") == null)
            return null;

        return Integer.parseInt(properties.get("mapreduce.job.maps").toString());
    }

    @Override
    public Integer getNumReduceTasks(Map<Object, Object> properties) {
        if (properties.get("mapreduce.job.reduces") == null)
            return null;

        return Integer.parseInt(properties.get("mapreduce.job.reduces").toString());
    }

    public JobConf getConfiguration() {
        return new JobConf(configuration);
    }

    @Override
    public FlowProcess getFlowProcess() {
        return new HadoopFlowProcess(FlowSession.NULL, getConfiguration(), true);
    }

    @Override
    public synchronized void setUp() throws IOException {
        if (configuration != null)
            return;

        if (!isUseCluster()) {
            LOG.info("not using cluster");
            configuration = new JobConf();

            // enforce settings to make local mode behave the same across distributions
            configuration.set("fs.defaultFS", "file:///");
            configuration.set("mapreduce.framework.name", "local");
            configuration.set("mapreduce.jobtracker.staging.root.dir",
                    System.getProperty("user.dir") + "/" + "build/tmp/cascading/staging");

            String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir");

            if (Util.isEmpty(stagingDir))
                configuration.set("mapreduce.jobtracker.staging.root.dir",
                        System.getProperty("user.dir") + "/build/tmp/cascading/staging");

            fileSys = FileSystem.get(configuration);
        } else {
            LOG.info("using cluster");

            if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
                System.setProperty("hadoop.log.dir", "build/test/log");

            if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
                System.setProperty("hadoop.tmp.dir", "build/test/tmp");

            new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored

            JobConf conf = new JobConf();

            if (!Util.isEmpty(System.getProperty("mapred.jar"))) {
                LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar"));
                configuration = conf;

                ((JobConf) configuration).setJar(System.getProperty("mapred.jar"));

                if (!Util.isEmpty(System.getProperty("fs.default.name"))) {
                    LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name"));
                    configuration.set("fs.default.name", System.getProperty("fs.default.name"));
                }

                if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) {
                    LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                    configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                }

                if (!Util.isEmpty(System.getProperty("fs.defaultFS"))) {
                    LOG.info("using {}={}", "fs.defaultFS", System.getProperty("fs.defaultFS"));
                    configuration.set("fs.defaultFS", System.getProperty("fs.defaultFS"));
                }

                if (!Util.isEmpty(System.getProperty("yarn.resourcemanager.address"))) {
                    LOG.info("using {}={}", "yarn.resourcemanager.address",
                            System.getProperty("yarn.resourcemanager.address"));
                    configuration.set("yarn.resourcemanager.address",
                            System.getProperty("yarn.resourcemanager.address"));
                }

                if (!Util.isEmpty(System.getProperty("mapreduce.jobhistory.address"))) {
                    LOG.info("using {}={}", "mapreduce.jobhistory.address",
                            System.getProperty("mapreduce.jobhistory.address"));
                    configuration.set("mapreduce.jobhistory.address",
                            System.getProperty("mapreduce.jobhistory.address"));
                }

                configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies
                configuration.set("mapreduce.framework.name", "yarn");

                fileSys = FileSystem.get(configuration);
            } else {
                conf.setBoolean("yarn.is.minicluster", true);
                //      conf.setInt( "yarn.nodemanager.delete.debug-delay-sec", -1 );
                //      conf.set( "yarn.scheduler.capacity.root.queues", "default" );
                //      conf.set( "yarn.scheduler.capacity.root.default.capacity", "100" );
                // disable blacklisting hosts not to fail localhost during unit tests
                conf.setBoolean("yarn.app.mapreduce.am.job.node-blacklisting.enable", false);

                dfs = new MiniDFSCluster(conf, 4, true, null);
                fileSys = dfs.getFileSystem();

                FileSystem.setDefaultUri(conf, fileSys.getUri());

                mr = MiniMRClientClusterFactory.create(this.getClass(), 4, conf);

                configuration = mr.getConfig();
            }

            configuration.set("mapred.child.java.opts", "-Xmx512m");
            configuration.setInt("mapreduce.job.jvm.numtasks", -1);
            configuration.setInt("mapreduce.client.completion.pollinterval", 50);
            configuration.setInt("mapreduce.client.progressmonitor.pollinterval", 50);
            configuration.setBoolean("mapreduce.map.speculative", false);
            configuration.setBoolean("mapreduce.reduce.speculative", false);
        }

        configuration.setInt("mapreduce.job.maps", numMappers);
        configuration.setInt("mapreduce.job.reduces", numReducers);

        Map<Object, Object> globalProperties = getGlobalProperties();

        if (logger != null)
            globalProperties.put("log4j.logger", logger);

        FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

        Hadoop2MR1Planner.copyProperties(configuration, globalProperties); // copy any external properties

        Hadoop2MR1Planner.copyConfiguration(properties, configuration); // put all properties on the jobconf
    }
}