org.apache.sqoop.submission.spark.SqoopSparkClientFactory.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sqoop.submission.spark.SqoopSparkClientFactory.java

Source

/*
 * Copyright (C) 2016 Stratio (http://stratio.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.sqoop.submission.spark;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.compress.utils.CharsetNames;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkException;

public class SqoopSparkClientFactory {

    protected static final transient Log LOG = LogFactory.getLog(SqoopSparkClientFactory.class);
    private static final String SPARK_DEFAULT_CONF_FILE = "spark-defaults.conf";
    private static final String SPARK_DEFAULT_MASTER = "local";
    private static final String SPARK_DEFAULT_APP_NAME = "sqoop-spark";
    private static final String SPARK_DEFAULT_SERIALIZER = "org.apache.spark.serializer.KryoSerializer";

    public static SqoopSparkClientManager createSqoopSparkClient(SqoopConf sqoopConf)
            throws IOException, SparkException {

        Map<String, String> sparkConf = prepareSparkConfMapFromSqoopConfig(sqoopConf);
        // Submit spark job through local spark context while spark master is local
        // mode, otherwise submit spark job through remote spark context.
        String master = sparkConf.get(Constants.SPARK_MASTER);
        if (!master.startsWith("yarn")) {
            // With local spark context, all user sessions share the same spark context.
            return LocalSqoopSparkClient.getInstance(generateSparkConf(sparkConf));
        } else {
            LOG.info("Using yarn submitter");
            //TODO: hook up yarn submitter
            return null;
        }
        //        if (master.equals("yarn") || master.startsWith("yarn")) {
        //
        //            LOG.info("Using yarn submitter");
        //            return YarnSqoopSparkClient.getInstance(sparkConf);
        //
        //        } else if (master.equals("local") || master.startsWith("local[")) {
        //            // With local spark context, all user sessions share the same spark context.
        //            return LocalSqoopSparkClient.getInstance(generateSparkConf(sparkConf));
        //
        //        } else {
        //            LOG.error("Unable to parse master configuration: "+ master);
        //            return null;
        //        }
    }

    public static Map<String, String> prepareSparkConfMapFromSqoopConfig(SqoopConf sqoopConf) {
        Map<String, String> sparkConf = new HashMap<String, String>();
        // set default spark configurations.
        sparkConf.put(Constants.SPARK_MASTER, SPARK_DEFAULT_MASTER);
        sparkConf.put(Constants.SPARK_APP_NAME, SPARK_DEFAULT_APP_NAME);
        sparkConf.put(Constants.SPARK_SERIALIZER, SPARK_DEFAULT_SERIALIZER);

        for (Map.Entry<String, String> p : sqoopConf.getProps().entrySet()) {
            LOG.info("sqoop spark properties from: " + p.getKey() + ": " + p.getValue());
        }
        // load properties from spark-defaults.conf.
        InputStream inputStream = null;
        try {
            inputStream = SqoopSparkClientFactory.class.getClassLoader()
                    .getResourceAsStream(SPARK_DEFAULT_CONF_FILE);
            if (inputStream != null) {
                LOG.info("Loading spark properties from:" + SPARK_DEFAULT_CONF_FILE);
                Properties properties = new Properties();
                properties.load(new InputStreamReader(inputStream, CharsetNames.UTF_8));
                for (String propertyName : properties.stringPropertyNames()) {
                    if (propertyName.startsWith("spark")) {
                        String value = properties.getProperty(propertyName);
                        sparkConf.put(propertyName, properties.getProperty(propertyName));
                        LOG.info(String.format("Load spark property from %s (%s -> %s).", SPARK_DEFAULT_CONF_FILE,
                                propertyName, value));
                    }
                }
            }
        } catch (IOException e) {
            LOG.info("Failed to open spark configuration file:" + SPARK_DEFAULT_CONF_FILE, e);
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    LOG.debug("Failed to close inputstream.", e);
                }
            }
        }
        return sparkConf;
    }

    static SparkConf generateSparkConf(Map<String, String> conf) {
        SparkConf sparkConf = new SparkConf(false);
        for (Map.Entry<String, String> entry : conf.entrySet()) {
            sparkConf.set(entry.getKey(), entry.getValue());
        }
        return sparkConf;
    }

}