org.pentaho.hadoop.shim.common.HadoopShimImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.hadoop.shim.common.HadoopShimImpl.java

Source

/*******************************************************************************
 * Pentaho Big Data
 * <p>
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 * <p>
 * ******************************************************************************
 * <p>
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 ******************************************************************************/

package org.pentaho.hadoop.shim.common;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.pentaho.hadoop.shim.HadoopConfiguration;
import org.pentaho.hadoop.shim.HadoopConfigurationFileSystemManager;
import org.pentaho.hadoop.shim.api.mapred.RunningJob;
import org.pentaho.hdfs.vfs.HDFSFileProvider;

import java.io.IOException;
import java.net.URI;
import java.util.List;

public class HadoopShimImpl extends CommonHadoopShim {

    static {
        JDBC_DRIVER_MAP.put("hive2", org.apache.hive.jdbc.HiveDriver.class);
    }

    @Override
    protected String getDefaultNamenodePort() {
        return "8020";
    }

    @Override
    protected String getDefaultJobtrackerPort() {
        return "8021";
    }

    @Override
    public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
        validateHadoopHomeWithWinutils();
        fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
        setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {

            public void addFileToClassPath(Path file, Configuration conf) throws IOException {
                String classpath = conf.get("mapred.job.classpath.files");
                conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                        : classpath + getClusterPathSeparator() + file.toString());
                FileSystem fs = FileSystem.get(file.toUri(), conf);
                URI uri = fs.makeQualified(file).toUri();

                DistributedCache.addCacheFile(uri, conf);
            }

            public String getClusterPathSeparator() {
                // Use a comma rather than an OS-specific separator (see https://issues.apache.org/jira/browse/HADOOP-4864)
                return System.getProperty("hadoop.cluster.path.separator", ",");
            }
        });
    }

    @Override
    public RunningJob submitJob(org.pentaho.hadoop.shim.api.Configuration c) throws IOException {
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        try {
            return c.submit();
        } catch (InterruptedException | ClassNotFoundException e) {
            throw new RuntimeException(e);
        } finally {
            Thread.currentThread().setContextClassLoader(cl);
        }
    }

    @Override
    public org.pentaho.hadoop.shim.api.Configuration createConfiguration() {
        // Set the context class loader when instantiating the configuration
        // since org.apache.hadoop.conf.Configuration uses it to load resources
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        try {
            return new ConfigurationProxyV2();
        } catch (IOException e) {
            throw new RuntimeException("Unable to create configuration for new mapreduce api: ", e);
        } finally {
            Thread.currentThread().setContextClassLoader(cl);
        }
    }

    @Override
    public void configureConnectionInformation(String namenodeHost, String namenodePort, String jobtrackerHost,
            String jobtrackerPort, org.pentaho.hadoop.shim.api.Configuration conf, List<String> logMessages)
            throws Exception {

        String runtimeFsDefaultName = conf.get("pentaho.runtime.fs.default.name");
        String runtimeFsDefaultScheme = conf.get("pentaho.runtime.fs.default.scheme", "hdfs");
        String runtimeJobTracker = conf.get("pentaho.runtime.job.tracker");
        if (runtimeFsDefaultName == null) {
            if (namenodeHost == null || namenodeHost.trim().length() == 0) {
                throw new Exception("No hdfs host specified!");
            }

            if (namenodePort != null && namenodePort.trim().length() != 0 && !"-1".equals(namenodePort.trim())) {
                namenodePort = ":" + namenodePort;
            } else {
                // it's been realized that this is pretty fine to have
                // NameNode URL w/o port: e.g. HA mode (BAD-358)
                namenodePort = "";
                logMessages.add("No hdfs port specified - HA? ");
            }

            runtimeFsDefaultName = runtimeFsDefaultScheme + "://" + namenodeHost + namenodePort;
        }

        if (runtimeJobTracker == null) {
            if (jobtrackerHost == null || jobtrackerHost.trim().length() == 0) {
                throw new Exception("No job tracker host specified!");
            }

            if (jobtrackerPort == null || jobtrackerPort.trim().length() == 0) {
                jobtrackerPort = getDefaultJobtrackerPort();
                logMessages.add("No job tracker port specified - using default: " + jobtrackerPort);
            }
            runtimeJobTracker = jobtrackerHost + ":" + jobtrackerPort;
        }

        conf.set("fs.default.name", runtimeFsDefaultName);
        conf.set("mapred.job.tracker", runtimeJobTracker);
    }
}