org.pentaho.hadoop.shim.mapr410.HadoopShim.java Source code

Java tutorial

Introduction

Here is the source code for org.pentaho.hadoop.shim.mapr410.HadoopShim.java

Source

/*******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.hadoop.shim.mapr410;

import java.io.IOException;
import java.util.List;

import org.pentaho.hadoop.shim.HadoopConfiguration;
import org.pentaho.hadoop.shim.HadoopConfigurationFileSystemManager;
import org.pentaho.hadoop.shim.api.Configuration;
import org.pentaho.hadoop.shim.common.CommonHadoopShim;
import org.pentaho.hadoop.shim.common.ShimUtils;
import org.pentaho.hdfs.vfs.MapRFileProvider;
import org.apache.hadoop.mapreduce.Job;
import org.pentaho.hadoop.shim.api.mapred.RunningJob;

public class HadoopShim extends CommonHadoopShim {
    protected static final String SUPER_USER = "authentication.superuser.provider";
    protected static final String PROVIDER_LIST = "authentication.provider.list";
    protected static final String DEFAULT_CLUSTER = "/";
    protected static final String MFS_SCHEME = "maprfs://";
    protected static final String[] EMPTY_CONNECTION_INFO = new String[2];

    static {
        JDBC_DRIVER_MAP.put("hive2", org.apache.hive.jdbc.HiveDriver.class);
    }

    @Override
    public String[] getNamenodeConnectionInfo(Configuration c) {
        return EMPTY_CONNECTION_INFO;
    }

    @Override
    public String[] getJobtrackerConnectionInfo(Configuration c) {
        return EMPTY_CONNECTION_INFO;
    }

    @Override
    public void configureConnectionInformation(String namenodeHost, String namenodePort, String jobtrackerHost,
            String jobtrackerPort, Configuration conf, List<String> logMessages) throws Exception {
        if (namenodeHost == null || namenodeHost.length() == 0) {
            namenodeHost = DEFAULT_CLUSTER;
            logMessages.add("Using MapR default cluster for filesystem");
        } else if (namenodePort == null || namenodePort.trim().length() == 0) {
            logMessages.add("Using MapR CLDB named cluster: " + namenodeHost + " for filesystem");
            namenodeHost = "/mapr/" + namenodeHost;
        } else {
            logMessages.add("Using filesystem at " + namenodeHost + ":" + namenodePort);
            namenodeHost = namenodeHost + ":" + namenodePort;
        }

        if (jobtrackerHost == null || jobtrackerHost.trim().length() == 0) {
            jobtrackerHost = DEFAULT_CLUSTER;
            logMessages.add("Using MapR default cluster for job tracker");
        } else if (jobtrackerPort == null || jobtrackerPort.trim().length() == 0) {
            logMessages.add("Using MapR CLDB named cluster: " + jobtrackerHost + " for job tracker");
            jobtrackerHost = "/mapr/" + jobtrackerHost;
        } else {
            logMessages.add("Using job tracker at " + jobtrackerHost + ":" + jobtrackerPort);
            jobtrackerHost = jobtrackerHost + ":" + jobtrackerPort;
        }

        String fsDefaultName = MFS_SCHEME + namenodeHost;
        String jobTracker = MFS_SCHEME + jobtrackerHost;
        //conf.set( "fs.default.name", fsDefaultName );
        //conf.set( "mapred.job.tracker", jobTracker );
        conf.set("fs.maprfs.impl", MapRFileProvider.FS_MAPR_IMPL);
    }

    @Override
    public org.pentaho.hadoop.shim.api.Configuration createConfiguration() {
        org.pentaho.hadoop.shim.api.Configuration result;
        // Set the context class loader when instantiating the configuration
        // since org.apache.hadoop.conf.Configuration uses it to load resources
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        try {
            result = new org.pentaho.hadoop.shim.mapr410.ConfigurationProxyV2();
        } catch (IOException e) {
            throw new RuntimeException("Unable to create configuration for new mapreduce api: ", e);
        } finally {
            Thread.currentThread().setContextClassLoader(cl);
        }
        ShimUtils.asConfiguration(result).addResource("hbase-site.xml");
        return result;
    }

    @Override
    public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
        fsm.addProvider(config, MapRFileProvider.SCHEME, config.getIdentifier(), new MapRFileProvider());
        setDistributedCacheUtil(new MapR4DistributedCacheUtilImpl(config));
    }

    @Override
    public RunningJob submitJob(org.pentaho.hadoop.shim.api.Configuration c) throws IOException {
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        try {
            Job job = ((org.pentaho.hadoop.shim.mapr410.ConfigurationProxyV2) c).getJob();
            job.submit();
            return new RunningJobProxyV2(job);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        } finally {
            Thread.currentThread().setContextClassLoader(cl);
        }
    }
}