org.apache.sqoop.mapredsparkcommon.MRConfigurationUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sqoop.mapredsparkcommon.MRConfigurationUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.sqoop.mapredsparkcommon;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.log4j.PropertyConfigurator;
import org.apache.sqoop.common.Direction;
import org.apache.sqoop.json.JSONUtils;
import org.apache.sqoop.json.util.SchemaSerialization;
import org.apache.sqoop.model.ConfigUtils;
import org.apache.sqoop.schema.Schema;
import org.apache.sqoop.utils.ClassUtils;
import org.json.simple.JSONObject;

import java.io.InputStream;
import java.util.Properties;

/**
 * Helper class to store and load various information in/from MapReduce configuration
 * object and JobConf object.
 */
public final class MRConfigurationUtils {

    private static final String MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.from.link";

    private static final String MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK_NAME = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.from.link.name";

    private static final String MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK_OBJ = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.from.link.obj";

    private static final String MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.to.link";

    private static final String MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK_NAME = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.to.link.name";

    private static final String MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK_OBJ = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.to.link.obj";

    private static final String MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.from.job";

    private static final String MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB_NAME = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.from.job.name";

    private static final String MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB_OBJ = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.from.job.obj";

    private static final String MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.to.job";

    private static final String MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB_NAME = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.to.job.name";

    private static final String MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB_OBJ = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.connector.to.job.obj";

    private static final String MR_JOB_CONFIG_DRIVER_CONFIG_CLASS = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.class.driver";

    private static final String MR_JOB_CONFIG_FROM_CONNECTOR_LINK = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.connector.from.link";

    private static final Text MR_JOB_CONFIG_FROM_CONNECTOR_LINK_KEY = new Text(MR_JOB_CONFIG_FROM_CONNECTOR_LINK);

    private static final String MR_JOB_CONFIG_TO_CONNECTOR_LINK = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.connector.to.link";

    private static final Text MR_JOB_CONFIG_TO_CONNECTOR_LINK_KEY = new Text(MR_JOB_CONFIG_TO_CONNECTOR_LINK);

    private static final String MR_JOB_CONFIG_FROM_JOB_CONFIG = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.connector.from.job";

    private static final Text MR_JOB_CONFIG_FROM_JOB_CONFIG_KEY = new Text(MR_JOB_CONFIG_FROM_JOB_CONFIG);

    private static final String MR_JOB_CONFIG_TO_JOB_CONFIG = MRJobConstants.PREFIX_JOB_CONFIG
            + "config.connector.to.job";

    private static final Text MR_JOB_CONFIG_TO_JOB_CONFIG_KEY = new Text(MR_JOB_CONFIG_TO_JOB_CONFIG);

    private static final String MR_JOB_CONFIG_DRIVER_CONFIG = MRJobConstants.PREFIX_JOB_CONFIG + "config.driver";

    private static final Text MR_JOB_CONFIG_DRIVER_CONFIG_KEY = new Text(MR_JOB_CONFIG_DRIVER_CONFIG);

    private static final String SCHEMA_FROM = MRJobConstants.PREFIX_JOB_CONFIG + "schema.connector.from";

    private static final Text SCHEMA_FROM_KEY = new Text(SCHEMA_FROM);

    private static final String SCHEMA_TO = MRJobConstants.PREFIX_JOB_CONFIG + "schema.connector.to";

    private static final Text SCHEMA_TO_KEY = new Text(SCHEMA_TO);

    /**
     * Persist Connector configuration object for link.
     *
     * @param job MapReduce job object
     * @param obj Configuration object
     */
    public static void setConnectorLinkConfig(Direction type, Job job, Object obj) {
        switch (type) {
        case FROM:
            job.getConfiguration().set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK, obj.getClass().getName());
            job.getCredentials().addSecretKey(MR_JOB_CONFIG_FROM_CONNECTOR_LINK_KEY,
                    ConfigUtils.toJson(obj).getBytes());
            break;

        case TO:
            job.getConfiguration().set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK, obj.getClass().getName());
            job.getCredentials().addSecretKey(MR_JOB_CONFIG_TO_CONNECTOR_LINK_KEY,
                    ConfigUtils.toJson(obj).getBytes());
            break;
        }
    }

    /**
     * Persist Connector configuration object for link directly in the Mapreduce Configuration object
     * instead of the Hadoop credentials store
     *
     * @param conf MapReduce configuration object
     * @param obj Link configuration object
     */
    public static void setConnectorLinkConfigUnsafe(Direction type, Configuration conf, Object obj) {
        switch (type) {
        case FROM:
            conf.set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK_NAME, obj.getClass().getName());
            conf.set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK_OBJ, ConfigUtils.toJson(obj));
            break;

        case TO:
            conf.set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK_NAME, obj.getClass().getName());
            conf.set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK_OBJ, ConfigUtils.toJson(obj));
            break;
        }
    }

    /**
     * Persist Connector configuration objects for job.
     *
     * @param job MapReduce job object
     * @param obj Configuration object
     */
    public static void setConnectorJobConfig(Direction type, Job job, Object obj) {
        switch (type) {
        case FROM:
            job.getConfiguration().set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB, obj.getClass().getName());
            job.getCredentials().addSecretKey(MR_JOB_CONFIG_FROM_JOB_CONFIG_KEY,
                    ConfigUtils.toJson(obj).getBytes());
            break;

        case TO:
            job.getConfiguration().set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB, obj.getClass().getName());
            job.getCredentials().addSecretKey(MR_JOB_CONFIG_TO_JOB_CONFIG_KEY, ConfigUtils.toJson(obj).getBytes());
            break;
        }
    }

    /**
     * Persist Connector configuration objects for job directly in the Mapreduce Configuration object
     * instead of the Hadoop credentials store
     *
     * @param conf MapReduce job object
     * @param obj Configuration object
     */
    public static void setConnectorJobConfigUnsafe(Direction type, Configuration conf, Object obj) {
        switch (type) {
        case FROM:
            conf.set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB_NAME, obj.getClass().getName());
            conf.set(MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB_OBJ, ConfigUtils.toJson(obj));
            break;

        case TO:
            conf.set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB_NAME, obj.getClass().getName());
            conf.set(MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB_OBJ, ConfigUtils.toJson(obj));
            break;
        }
    }

    /**
     * Persist driver configuration object for job.
     *
     * @param job MapReduce job object
     * @param obj Configuration object
     */
    public static void setDriverConfig(Job job, Object obj) {
        job.getConfiguration().set(MR_JOB_CONFIG_DRIVER_CONFIG_CLASS, obj.getClass().getName());
        job.getCredentials().addSecretKey(MR_JOB_CONFIG_DRIVER_CONFIG_KEY, ConfigUtils.toJson(obj).getBytes());
    }

    /**
     * Persist Connector generated schema.
     *
     * @param type  Direction of schema we are persisting
     * @param job MapReduce Job object
     * @param schema Schema
     */
    public static void setConnectorSchema(Direction type, Job job, Schema schema) {
        String jsonSchema = SchemaSerialization.extractSchema(schema).toJSONString();
        switch (type) {
        case FROM:
            job.getCredentials().addSecretKey(SCHEMA_FROM_KEY, jsonSchema.getBytes());
            return;
        case TO:
            job.getCredentials().addSecretKey(SCHEMA_TO_KEY, jsonSchema.getBytes());
            return;
        }
    }

    /**
     * Persist Connector generated schema directly in the Configuration object
     * instead of putting into the Hadoop credentials store
     *
     * @param type  Direction of schema we are persisting
     * @param conf  Configuration object
     * @param schema Schema
     */
    public static void setConnectorSchemaUnsafe(Direction type, Configuration conf, Schema schema) {
        String jsonSchema = SchemaSerialization.extractSchema(schema).toJSONString();
        switch (type) {
        case FROM:
            //job.getCredentials().addSecretKey(SCHEMA_FROM_KEY,jsonSchema.getBytes());
            conf.set(SCHEMA_FROM, jsonSchema);
            return;
        case TO:
            //job.getCredentials().addSecretKey(SCHEMA_TO_KEY, jsonSchema.getBytes());
            conf.set(SCHEMA_TO, jsonSchema);
            return;
        }
    }

    /**
     * Retrieve Connector configuration object for link.
     * @param configuration MapReduce configuration object
     * @return Configuration object
     */
    public static Object getConnectorLinkConfig(Direction type, Configuration configuration) {
        switch (type) {
        case FROM:
            return loadConfiguration((JobConf) configuration, MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK,
                    MR_JOB_CONFIG_FROM_CONNECTOR_LINK_KEY);

        case TO:
            return loadConfiguration((JobConf) configuration, MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK,
                    MR_JOB_CONFIG_TO_CONNECTOR_LINK_KEY);
        }

        return null;
    }

    /**
     * Retrieve Connector configuration object for link using the Mapreduce configuration object instead of the
     * Hadoop credentials store
     * @param configuration MapReduce configuration object
     * @return Configuration object
     */
    public static Object getConnectorLinkConfigUnsafe(Direction type, Configuration configuration) {
        switch (type) {
        //Removed typecast to JobConf to avoid ClassCastException thrown while using the Spark engine
        case FROM:
            return loadConfigurationUnsafe(configuration, MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK_NAME,
                    MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_LINK_OBJ);

        case TO:
            return loadConfigurationUnsafe(configuration, MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK_NAME,
                    MR_JOB_CONFIG_CLASS_TO_CONNECTOR_LINK_OBJ);
        }

        return null;
    }

    /**
     * Retrieve Connector configuration object for job using the Mapreduce configuration object instead of the
     * Hadoop credentials store
     *
     * @param configuration MapReduce configuration object
     * @return Configuration object
     */
    public static Object getConnectorJobConfigUnsafe(Direction type, Configuration configuration) {
        switch (type) {
        //Removed typecast to JobConf to avoid ClassCastException thrown while using the Spark engine
        case FROM:
            return loadConfigurationUnsafe(configuration, MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB_NAME,
                    MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB_OBJ);

        case TO:
            return loadConfigurationUnsafe(configuration, MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB_NAME,
                    MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB_OBJ);
        }

        return null;
    }

    /**
     * Retrieve Connector configuration object for job.
     *
     * @param configuration MapReduce configuration object
     * @return Configuration object
     */
    public static Object getConnectorJobConfig(Direction type, Configuration configuration) {
        switch (type) {
        case FROM:
            return loadConfiguration((JobConf) configuration, MR_JOB_CONFIG_CLASS_FROM_CONNECTOR_JOB,
                    MR_JOB_CONFIG_FROM_JOB_CONFIG_KEY);

        case TO:
            return loadConfiguration((JobConf) configuration, MR_JOB_CONFIG_CLASS_TO_CONNECTOR_JOB,
                    MR_JOB_CONFIG_TO_JOB_CONFIG_KEY);
        }

        return null;
    }

    /**
     * Retrieve Framework configuration object for job.
     *
     * @param configuration MapReduce configuration object
     * @return Configuration object
     */
    public static Object getDriverConfig(Configuration configuration) {
        return loadConfiguration((JobConf) configuration, MR_JOB_CONFIG_DRIVER_CONFIG_CLASS,
                MR_JOB_CONFIG_DRIVER_CONFIG_KEY);
    }

    /**
     * Retrieve Connector generated schema.
     *
     * @param type The FROM or TO connector
     * @param configuration MapReduce configuration object
     */
    public static Schema getConnectorSchema(Direction type, Configuration configuration) {
        switch (type) {
        case FROM:
            return getSchemaFromBytes(((JobConf) configuration).getCredentials().getSecretKey(SCHEMA_FROM_KEY));

        case TO:
            return getSchemaFromBytes(((JobConf) configuration).getCredentials().getSecretKey(SCHEMA_TO_KEY));
        }

        return null;
    }

    /**
     * Retrieve Connector generated schema directly from the Configuration object instead of the Hadoop
     * credentials store
     *
     * @param type The FROM or TO connector
     * @param configuration MapReduce configuration object
     */
    public static Schema getConnectorSchemaUnsafe(Direction type, Configuration configuration) {
        switch (type) {
        case FROM:
            return getSchemaFromString(configuration.get(SCHEMA_FROM));

        case TO:
            return getSchemaFromString(configuration.get(SCHEMA_TO));
        }

        return null;
    }

    /**
     * Deserialize schema from JSON encoded bytes.
     *
     * This method is null safe.
     *
     * @param bytes
     * @return
     */
    private static Schema getSchemaFromBytes(byte[] bytes) {
        if (bytes == null) {
            return null;
        }

        JSONObject jsonSchema = JSONUtils.parse(new String(bytes));
        return SchemaSerialization.restoreSchema(jsonSchema);
    }

    /**
     * Deserialize schema from JSON encoded string.
     *
     * This method is null safe.
     *
     * @param jsonString
     * @return
     */
    private static Schema getSchemaFromString(String jsonString) {
        if (jsonString == null) {
            return null;
        }

        JSONObject jsonSchema = JSONUtils.parse(jsonString);
        return SchemaSerialization.restoreSchema(jsonSchema);
    }

    /**
     * Load configuration instance serialized in Hadoop credentials cache.
     *
     * @param configuration JobConf object associated with the job
     * @param classProperty Property with stored configuration class name
     * @param valueProperty Property with stored JSON representation of the
     *                      configuration object
     * @return New instance with loaded data
     */
    private static Object loadConfiguration(JobConf configuration, String classProperty, Text valueProperty) {
        // Create new instance of configuration class
        Object object = ClassUtils.instantiate(configuration.get(classProperty));
        if (object == null) {
            return null;
        }

        String json = new String(configuration.getCredentials().getSecretKey(valueProperty));

        // Fill it with JSON data
        ConfigUtils.fillValues(json, object);

        // And give it back
        return object;
    }

    /**
     * Load configuration instance stored in the Mapreduce configuration object
     * instead of the Hadoop credentials cache.
     *
     * @param configuration Configuration object associated with the job
     * @param classProperty Property with stored configuration class name
     * @param valueProperty Property with stored JSON representation of the
     *                      configuration object
     * @return New instance with loaded data
     */
    private static Object loadConfigurationUnsafe(Configuration configuration, String classProperty,
            String valueProperty) {
        // Create new instance of configuration class
        Object object = ClassUtils.instantiate(configuration.get(classProperty));
        if (object == null) {
            return null;
        }

        String json = configuration.get(valueProperty);

        // Fill it with JSON data
        ConfigUtils.fillValues(json, object);

        // And give it back
        return object;
    }

    private MRConfigurationUtils() {
        // Instantiation is prohibited
    }

    public static void configureLogging(Class klass) {
        try {
            Properties props = new Properties();
            InputStream resourceAsStream = klass.getResourceAsStream("/META-INF/log4j.properties");
            props.load(resourceAsStream);
            PropertyConfigurator.configure(props);
        } catch (Exception e) {
            System.err.println("Encountered exception while configuring logging " + "for sqoop: " + e);
        }
    }
}