org.apache.sqoop.tool.SqoopTool.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sqoop.tool.SqoopTool.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.sqoop.tool;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import org.apache.sqoop.util.ClassLoaderStack;
import org.apache.sqoop.config.ConfigurationHelper;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
import com.cloudera.sqoop.cli.SqoopParser;
import com.cloudera.sqoop.cli.ToolOptions;
import com.cloudera.sqoop.tool.ToolDesc;

/**
 * Base class for Sqoop subprograms (e.g., SqoopImport, SqoopExport, etc.)
 * Allows subprograms to configure the arguments they accept and
 * provides an entry-point to the subprogram.
 */
public abstract class SqoopTool {

    public static final Log LOG = LogFactory.getLog(SqoopTool.class.getName());

    /**
     * Configuration key that specifies the set of ToolPlugin instances to load
     * before determining which SqoopTool instance to load.
     */
    public static final String TOOL_PLUGINS_KEY = "sqoop.tool.plugins";

    private static final Map<String, Class<? extends SqoopTool>> TOOLS;
    private static final Map<String, String> DESCRIPTIONS;

    static {
        // All SqoopTool instances should be registered here so that
        // they can be found internally.
        TOOLS = new TreeMap<String, Class<? extends SqoopTool>>();
        DESCRIPTIONS = new TreeMap<String, String>();

        registerTool("codegen", CodeGenTool.class, "Generate code to interact with database records");
        registerTool("create-hive-table", CreateHiveTableTool.class, "Import a table definition into Hive");
        registerTool("eval", EvalSqlTool.class, "Evaluate a SQL statement and display the results");
        registerTool("export", ExportTool.class, "Export an HDFS directory to a database table");
        registerTool("import", ImportTool.class, "Import a table from a database to HDFS");
        registerTool("import-all-tables", ImportAllTablesTool.class, "Import tables from a database to HDFS");
        registerTool("help", HelpTool.class, "List available commands");
        registerTool("list-databases", ListDatabasesTool.class, "List available databases on a server");
        registerTool("list-tables", ListTablesTool.class, "List available tables in a database");
        registerTool("merge", MergeTool.class, "Merge results of incremental imports");
        registerTool("metastore", MetastoreTool.class, "Run a standalone Sqoop metastore");
        registerTool("job", JobTool.class, "Work with saved jobs");
        registerTool("version", VersionTool.class, "Display version information");
    }

    /**
     * Add a tool to the available set of SqoopTool instances.
     * @param toolName the name the user access the tool through.
     * @param cls the class providing the tool.
     * @param description a user-friendly description of the tool's function.
     */
    private static void registerTool(String toolName, Class<? extends SqoopTool> cls, String description) {
        Class<? extends SqoopTool> existing = TOOLS.get(toolName);
        if (null != existing) {
            // Already have a tool with this name. Refuse to start.
            throw new RuntimeException("A plugin is attempting to register a tool " + "with name " + toolName
                    + ", but this tool already exists (" + existing.getName() + ")");
        }

        TOOLS.put(toolName, cls);
        DESCRIPTIONS.put(toolName, description);
    }

    /**
     * Add tool to available set of SqoopTool instances using the ToolDesc
     * struct as the sole argument.
     */
    private static void registerTool(ToolDesc toolDescription) {
        registerTool(toolDescription.getName(), toolDescription.getToolClass(), toolDescription.getDesc());
    }

    /**
     * Load plugins referenced in sqoop-site.xml or other config (e.g., tools.d/),
     * to allow external tool definitions.
     *
     * @return the Configuration used to load the plugins.
     */
    public static Configuration loadPlugins(Configuration conf) {
        conf = loadPluginsFromConfDir(conf);
        List<ToolPlugin> plugins = org.apache.sqoop.config.ConfigurationHelper.getInstances(conf, TOOL_PLUGINS_KEY,
                ToolPlugin.class);
        for (ToolPlugin plugin : plugins) {
            LOG.debug("Loading plugin: " + plugin.getClass().getName());
            List<ToolDesc> descriptions = plugin.getTools();
            for (ToolDesc desc : descriptions) {
                LOG.debug("  Adding tool: " + desc.getName() + " -> " + desc.getToolClass().getName());
                registerTool(desc);
            }
        }

        return conf;
    }

    /**
     * If $SQOOP_CONF_DIR/tools.d/ exists and sqoop.tool.plugins is not set,
     * then we look through the files in that directory; they should contain
     * lines of the form 'plugin.class.name[=/path/to/containing.jar]'.
     *
     * <p>Put all plugin.class.names into the Configuration, and load any
     * specified jars into the ClassLoader.
     * </p>
     *
     * @param conf the current configuration to populate with class names.
     * @return conf again, after possibly populating sqoop.tool.plugins.
     */
    private static Configuration loadPluginsFromConfDir(Configuration conf) {
        if (conf.get(TOOL_PLUGINS_KEY) != null) {
            LOG.debug(TOOL_PLUGINS_KEY + " is set; ignoring tools.d");
            return conf;
        }

        String confDirName = System.getenv("SQOOP_CONF_DIR");
        if (null == confDirName) {
            LOG.warn("$SQOOP_CONF_DIR has not been set in the environment. "
                    + "Cannot check for additional configuration.");
            return conf;
        }

        File confDir = new File(confDirName);
        File toolsDir = new File(confDir, "tools.d");

        if (toolsDir.exists() && toolsDir.isDirectory()) {
            // We have a tools.d subdirectory. Get the file list, sort it,
            // and process them in order.
            String[] fileNames = toolsDir.list();
            Arrays.sort(fileNames);

            for (String fileName : fileNames) {
                File f = new File(toolsDir, fileName);
                if (f.isFile()) {
                    loadPluginsFromFile(conf, f);
                }
            }
        }

        // Set the classloader in this configuration so that it will use
        // the jars we just loaded in.
        conf.setClassLoader(Thread.currentThread().getContextClassLoader());
        return conf;
    }

    /**
     * Read the specified file and extract any ToolPlugin implementation
     * names from there.
     * @param conf the configuration to populate.
     * @param f the file containing the configuration data to add.
     */
    private static void loadPluginsFromFile(Configuration conf, File f) {
        Reader r = null;
        try {
            // The file format is actually Java properties-file syntax.
            r = new InputStreamReader(new FileInputStream(f));
            Properties props = new Properties();
            props.load(r);

            for (Map.Entry<Object, Object> entry : props.entrySet()) {
                // Each key is a ToolPlugin class name.
                // Each value, if set, is the jar that contains it.
                String plugin = entry.getKey().toString();
                addPlugin(conf, plugin);

                String jarName = entry.getValue().toString();
                if (jarName.length() > 0) {
                    ClassLoaderStack.addJarFile(jarName, plugin);
                    LOG.debug("Added plugin " + plugin + " in jar " + jarName + " specified by " + f);
                } else if (LOG.isDebugEnabled()) {
                    LOG.debug("Added plugin " + plugin + " specified by " + f);
                }
            }
        } catch (IOException ioe) {
            LOG.error("Error loading ToolPlugin information from file " + f + ": "
                    + StringUtils.stringifyException(ioe));
        } finally {
            if (null != r) {
                try {
                    r.close();
                } catch (IOException ioe) {
                    LOG.warn("Error closing file " + f + ": " + ioe);
                }
            }
        }
    }

    /**
     * Add the specified plugin class name to the configuration string
     * listing plugin classes.
     */
    private static void addPlugin(Configuration conf, String pluginName) {
        String existingPlugins = conf.get(TOOL_PLUGINS_KEY);
        String newPlugins = null;
        if (null == existingPlugins || existingPlugins.length() == 0) {
            newPlugins = pluginName;
        } else {
            newPlugins = existingPlugins + "," + pluginName;
        }

        conf.set(TOOL_PLUGINS_KEY, newPlugins);
    }

    /**
     * @return the list of available tools.
     */
    public static Set<String> getToolNames() {
        return TOOLS.keySet();
    }

    /**
     * @return the SqoopTool instance with the provided name, or null
     * if no such tool exists.
     */
    public static SqoopTool getTool(String toolName) {
        Class<? extends SqoopTool> cls = TOOLS.get(toolName);
        try {
            if (null != cls) {
                SqoopTool tool = cls.newInstance();
                tool.setToolName(toolName);
                return tool;
            }
        } catch (Exception e) {
            LOG.error(StringUtils.stringifyException(e));
            return null;
        }

        return null;
    }

    /**
     * @return the user-friendly description for a tool, or null if the tool
     * cannot be found.
     */
    public static String getToolDescription(String toolName) {
        return DESCRIPTIONS.get(toolName);
    }

    /** The name of the current tool. */
    private String toolName;

    /** Arguments that remained unparsed after parseArguments. */
    protected String[] extraArguments;

    public SqoopTool() {
        this.toolName = "<" + this.getClass().getName() + ">";
    }

    public SqoopTool(String name) {
        this.toolName = name;
    }

    public String getToolName() {
        return this.toolName;
    }

    protected void setToolName(String name) {
        this.toolName = name;
    }

    /**
     * Main body of code to run the tool.
     * @param options the SqoopOptions configured via
     * configureOptions()/applyOptions().
     * @return an integer return code for external programs to consume. 0
     * represents success; nonzero means failure.
     */
    public abstract int run(SqoopOptions options);

    /**
     * Configure the command-line arguments we expect to receive.
     * @param opts a ToolOptions that should be populated with sets of
     * RelatedOptions for the tool.
     */
    public void configureOptions(ToolOptions opts) {
        // Default implementation does nothing.
    }

    /**
     * Print the help message for this tool.
     * @param opts the configured tool options
     */
    public void printHelp(ToolOptions opts) {
        System.out.println("usage: sqoop " + getToolName() + " [GENERIC-ARGS] [TOOL-ARGS]");
        System.out.println("");

        opts.printHelp();

        System.out.println("");
        System.out.println("Generic Hadoop command-line arguments:");
        System.out.println("(must preceed any tool-specific arguments)");
        ToolRunner.printGenericCommandUsage(System.out);
    }

    /** Generate the SqoopOptions containing actual argument values from
     * the extracted CommandLine arguments.
     * @param in the CLI CommandLine that contain the user's set Options.
     * @param out the SqoopOptions with all fields applied.
     * @throws InvalidOptionsException if there's a problem.
     */
    public void applyOptions(CommandLine in, SqoopOptions out) throws InvalidOptionsException {
        // Default implementation does nothing.
    }

    /**
     * Validates options and ensures that any required options are
     * present and that any mutually-exclusive options are not selected.
     * @throws InvalidOptionsException if there's a problem.
     */
    public void validateOptions(SqoopOptions options) throws InvalidOptionsException {
        // Default implementation does nothing.
    }

    /**
     * Configures a SqoopOptions according to the specified arguments.
     * Reads a set of arguments and uses them to configure a SqoopOptions
     * and its embedded configuration (i.e., through GenericOptionsParser.)
     * Stores any unparsed arguments in the extraArguments field.
     *
     * @param args the arguments to parse.
     * @param conf if non-null, set as the configuration for the returned
     * SqoopOptions.
     * @param in a (perhaps partially-configured) SqoopOptions. If null,
     * then a new SqoopOptions will be used. If this has a null configuration
     * and conf is null, then a new Configuration will be inserted in this.
     * @param useGenericOptions if true, will also parse generic Hadoop
     * options into the Configuration.
     * @return a SqoopOptions that is fully configured by a given tool.
     */
    public SqoopOptions parseArguments(String[] args, Configuration conf, SqoopOptions in,
            boolean useGenericOptions) throws ParseException, SqoopOptions.InvalidOptionsException {
        SqoopOptions out = in;

        if (null == out) {
            out = new SqoopOptions();
        }

        if (null != conf) {
            // User specified a configuration; use it and override any conf
            // that may have been in the SqoopOptions.
            out.setConf(conf);
        } else if (null == out.getConf()) {
            // User did not specify a configuration, but neither did the
            // SqoopOptions. Fabricate a new one.
            out.setConf(new Configuration());
        }

        // This tool is the "active" tool; bind it in the SqoopOptions.
        //TODO(jarcec): Remove the cast when SqoopOptions will be moved
        //              to apache package
        out.setActiveSqoopTool((com.cloudera.sqoop.tool.SqoopTool) this);

        String[] toolArgs = args; // args after generic parser is done.
        if (useGenericOptions) {
            try {
                toolArgs = ConfigurationHelper.parseGenericOptions(out.getConf(), args);
            } catch (IOException ioe) {
                ParseException pe = new ParseException("Could not parse generic arguments");
                pe.initCause(ioe);
                throw pe;
            }
        }

        // Parse tool-specific arguments.
        ToolOptions toolOptions = new ToolOptions();
        configureOptions(toolOptions);
        CommandLineParser parser = new SqoopParser();
        CommandLine cmdLine = parser.parse(toolOptions.merge(), toolArgs, true);
        applyOptions(cmdLine, out);
        this.extraArguments = cmdLine.getArgs();
        return out;
    }

    /**
     * Append 'extra' to extraArguments.
     */
    public void appendArgs(String[] extra) {
        int existingLen = (this.extraArguments == null) ? 0 : this.extraArguments.length;
        int newLen = (extra == null) ? 0 : extra.length;
        String[] newExtra = new String[existingLen + newLen];

        if (null != this.extraArguments) {
            System.arraycopy(this.extraArguments, 0, newExtra, 0, existingLen);
        }

        if (null != extra) {
            System.arraycopy(extra, 0, newExtra, existingLen, newLen);
        }

        this.extraArguments = newExtra;
    }

    /**
     * Allow a tool to specify a set of dependency jar filenames. This is used
     * to allow tools to bundle arbitrary dependency jars necessary for a
     * MapReduce job executed by Sqoop. The jar containing the SqoopTool
     * instance itself will already be handled by Sqoop.
     *
     * <p>Called by JobBase.cacheJars().</p>
     *
     * <p>
     * This does not load the jars into the current VM; they are assumed to be
     * already on the classpath if they are needed on the client side (or
     * otherwise classloaded by the tool itself). This is purely to specify jars
     * necessary to be added to the distributed cache. The tool itself can
     * classload these jars by running loadDependencyJars().
     * </p>
     *
     * <p>See also: c.c.s.util.Jars.getJarPathForClass()</p>
     */
    public List<String> getDependencyJars() {
        // Default behavior: no additional dependencies.
        return Collections.emptyList();
    }

    /**
     * Loads dependency jars specified by getDependencyJars() into the current
     * classloader stack. May optionally be called by a [third-party] tool
     * before doing work, to ensure that all of its dependencies get classloaded
     * properly. Note that dependencies will not be available until after the
     * tool is already constructed.
     */
    protected void loadDependencyJars(SqoopOptions options) throws IOException {
        List<String> deps = getDependencyJars();
        if (null == deps) {
            return;
        }

        for (String depFilename : deps) {
            LOG.debug("Loading dependency: " + depFilename);
            ClassLoaderStack.addJarFile(depFilename, null);
        }

        options.getConf().setClassLoader(Thread.currentThread().getContextClassLoader());
    }

    @Override
    public String toString() {
        return getToolName();
    }
}