com.cloudera.sqoop.manager.SqlManager.java Source code

Introduction

Here is the source code for com.cloudera.sqoop.manager.SqlManager.java
Source

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.sqoop.manager;

import java.sql.Timestamp;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.hive.HiveTypes;
import com.cloudera.sqoop.lib.BlobRef;
import com.cloudera.sqoop.lib.ClobRef;
import com.cloudera.sqoop.mapreduce.DataDrivenImportJob;
import com.cloudera.sqoop.mapreduce.HBaseImportJob;
import com.cloudera.sqoop.mapreduce.ImportJobBase;
import com.cloudera.sqoop.mapreduce.JdbcExportJob;
import com.cloudera.sqoop.mapreduce.JdbcUpdateExportJob;
import com.cloudera.sqoop.util.ExportException;
import com.cloudera.sqoop.util.ImportException;
import com.cloudera.sqoop.util.ResultSetPrinter;

import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.Statement;
import java.sql.SQLException;
import java.sql.Types;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.util.StringUtils;
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;

/**
 * ConnManager implementation for generic SQL-compliant database.
 * This is an abstract class; it requires a database-specific
 * ConnManager implementation to actually create the connection.
 */
public abstract class SqlManager extends ConnManager {

    public static final Log LOG = LogFactory.getLog(SqlManager.class.getName());

    /** Substring that must appear in free-form queries submitted by users.
     * This is the string '$CONDITIONS'.
     */
    public static final String SUBSTITUTE_TOKEN = DataDrivenDBInputFormat.SUBSTITUTE_TOKEN;

    protected static final int DEFAULT_FETCH_SIZE = 1000;

    protected SqoopOptions options;
    private Statement lastStatement;

    /**
     * Constructs the SqlManager.
     * @param opts the SqoopOptions describing the user's requested action.
     */
    public SqlManager(final SqoopOptions opts) {
        this.options = opts;
        initOptionDefaults();
    }

    /**
     * Sets default values for values that were not provided by the user.
     * Only options with database-specific defaults should be configured here.
     */
    protected void initOptionDefaults() {
        if (options.getFetchSize() == null) {
            LOG.info("Using default fetchSize of " + DEFAULT_FETCH_SIZE);
            options.setFetchSize(DEFAULT_FETCH_SIZE);
        }
    }

    /**
     * @return the SQL query to use in getColumnNames() in case this logic must
     * be tuned per-database, but the main extraction loop is still inheritable.
     */
    protected String getColNamesQuery(String tableName) {
        // adding where clause to prevent loading a big table
        return "SELECT t.* FROM " + escapeTableName(tableName) + " AS t WHERE 1=0";
    }

    @Override
    /** {@inheritDoc} */
    public String[] getColumnNames(String tableName) {
        String stmt = getColNamesQuery(tableName);
        return getColumnNamesForRawQuery(stmt);
    }

    @Override
    /** {@inheritDoc} */
    public String[] getColumnNamesForQuery(String query) {
        String rawQuery = query.replace(SUBSTITUTE_TOKEN, " (1 = 0) ");
        return getColumnNamesForRawQuery(rawQuery);
    }

    /**
     * Get column names for a query statement that we do not modify further.
     */
    public String[] getColumnNamesForRawQuery(String stmt) {
        ResultSet results;
        try {
            results = execute(stmt);
        } catch (SQLException sqlE) {
            LOG.error("Error executing statement: " + sqlE.toString(), sqlE);
            release();
            return null;
        }

        try {
            int cols = results.getMetaData().getColumnCount();
            ArrayList<String> columns = new ArrayList<String>();
            ResultSetMetaData metadata = results.getMetaData();
            for (int i = 1; i < cols + 1; i++) {
                String colName = metadata.getColumnName(i);
                if (colName == null || colName.equals("")) {
                    colName = metadata.getColumnLabel(i);
                    if (null == colName) {
                        colName = "_RESULT_" + i;
                    }
                }
                columns.add(colName);
            }
            return columns.toArray(new String[0]);
        } catch (SQLException sqlException) {
            LOG.error("Error reading from database: " + sqlException.toString(), sqlException);
            return null;
        } finally {
            try {
                results.close();
                getConnection().commit();
            } catch (SQLException sqlE) {
                LOG.warn("SQLException closing ResultSet: " + sqlE.toString(), sqlE);
            }

            release();
        }
    }

    /**
     * @return the SQL query to use in getColumnTypes() in case this logic must
     * be tuned per-database, but the main extraction loop is still inheritable.
     */
    protected String getColTypesQuery(String tableName) {
        return getColNamesQuery(tableName);
    }

    @Override
    public Map<String, Integer> getColumnTypes(String tableName) {
        String stmt = getColTypesQuery(tableName);
        return getColumnTypesForRawQuery(stmt);
    }

    @Override
    public Map<String, Integer> getColumnTypesForQuery(String query) {
        // Manipulate the query to return immediately, with zero rows.
        String rawQuery = query.replace(SUBSTITUTE_TOKEN, " (1 = 0) ");
        return getColumnTypesForRawQuery(rawQuery);
    }

    /**
     * Get column types for a query statement that we do not modify further.
     */
    protected Map<String, Integer> getColumnTypesForRawQuery(String stmt) {
        ResultSet results;
        try {
            results = execute(stmt);
        } catch (SQLException sqlE) {
            LOG.error("Error executing statement: " + sqlE.toString());
            release();
            return null;
        }

        try {
            Map<String, Integer> colTypes = new HashMap<String, Integer>();

            int cols = results.getMetaData().getColumnCount();
            ResultSetMetaData metadata = results.getMetaData();
            for (int i = 1; i < cols + 1; i++) {
                int typeId = metadata.getColumnType(i);
                String colName = metadata.getColumnName(i);
                if (colName == null || colName.equals("")) {
                    colName = metadata.getColumnLabel(i);
                }

                colTypes.put(colName, Integer.valueOf(typeId));
            }

            return colTypes;
        } catch (SQLException sqlException) {
            LOG.error("Error reading from database: " + sqlException.toString());
            return null;
        } finally {
            try {
                results.close();
                getConnection().commit();
            } catch (SQLException sqlE) {
                LOG.warn("SQLException closing ResultSet: " + sqlE.toString());
            }

            release();
        }
    }

    @Override
    public ResultSet readTable(String tableName, String[] columns) throws SQLException {
        if (columns == null) {
            columns = getColumnNames(tableName);
        }

        StringBuilder sb = new StringBuilder();
        sb.append("SELECT ");
        boolean first = true;
        for (String col : columns) {
            if (!first) {
                sb.append(", ");
            }
            sb.append(escapeColName(col));
            first = false;
        }
        sb.append(" FROM ");
        sb.append(escapeTableName(tableName));
        sb.append(" AS "); // needed for hsqldb; doesn't hurt anyone else.
        sb.append(escapeTableName(tableName));

        String sqlCmd = sb.toString();
        LOG.debug("Reading table with command: " + sqlCmd);
        return execute(sqlCmd);
    }

    @Override
    public String[] listDatabases() {
        // TODO(aaron): Implement this!
        LOG.error("Generic SqlManager.listDatabases() not implemented.");
        return null;
    }

    @Override
    public String[] listTables() {
        ResultSet results = null;
        String[] tableTypes = { "TABLE" };
        try {
            try {
                DatabaseMetaData metaData = this.getConnection().getMetaData();
                results = metaData.getTables(null, null, null, tableTypes);
            } catch (SQLException sqlException) {
                LOG.error("Error reading database metadata: " + sqlException.toString());
                return null;
            }

            if (null == results) {
                return null;
            }

            try {
                ArrayList<String> tables = new ArrayList<String>();
                while (results.next()) {
                    String tableName = results.getString("TABLE_NAME");
                    tables.add(tableName);
                }

                return tables.toArray(new String[0]);
            } catch (SQLException sqlException) {
                LOG.error("Error reading from database: " + sqlException.toString());
                return null;
            }
        } finally {
            if (null != results) {
                try {
                    results.close();
                    getConnection().commit();
                } catch (SQLException sqlE) {
                    LOG.warn("Exception closing ResultSet: " + sqlE.toString());
                }
            }
        }
    }

    @Override
    public String getPrimaryKey(String tableName) {
        try {
            DatabaseMetaData metaData = this.getConnection().getMetaData();
            ResultSet results = metaData.getPrimaryKeys(null, null, tableName);
            if (null == results) {
                return null;
            }

            try {
                if (results.next()) {
                    return results.getString("COLUMN_NAME");
                } else {
                    return null;
                }
            } finally {
                results.close();
                getConnection().commit();
            }
        } catch (SQLException sqlException) {
            LOG.error("Error reading primary key metadata: " + sqlException.toString());
            return null;
        }
    }

    /**
     * Retrieve the actual connection from the outer ConnManager.
     */
    public abstract Connection getConnection() throws SQLException;

    /**
     * Determine what column to use to split the table.
     * @param opts the SqoopOptions controlling this import.
     * @param tableName the table to import.
     * @return the splitting column, if one is set or inferrable, or null
     * otherwise.
     */
    protected String getSplitColumn(SqoopOptions opts, String tableName) {
        String splitCol = opts.getSplitByCol();
        if (null == splitCol && null != tableName) {
            // If the user didn't specify a splitting column, try to infer one.
            splitCol = getPrimaryKey(tableName);
        }

        return splitCol;
    }

    /**
     * Offers the ConnManager an opportunity to validate that the
     * options specified in the ImportJobContext are valid.
     * @throws ImportException if the import is misconfigured.
     */
    protected void checkTableImportOptions(ImportJobContext context) throws IOException, ImportException {
        String tableName = context.getTableName();
        SqoopOptions opts = context.getOptions();

        // Default implementation: check that the split column is set
        // correctly.
        String splitCol = getSplitColumn(opts, tableName);
        if (null == splitCol && opts.getNumMappers() > 1) {
            // Can't infer a primary key.
            throw new ImportException("No primary key could be found for table " + tableName
                    + ". Please specify one with --split-by or perform " + "a sequential import with '-m 1'.");
        }

    }

    /**
     * Default implementation of importTable() is to launch a MapReduce job
     * via DataDrivenImportJob to read the table with DataDrivenDBInputFormat.
     */
    public void importTable(ImportJobContext context) throws IOException, ImportException {
        String tableName = context.getTableName();
        String jarFile = context.getJarFile();
        SqoopOptions opts = context.getOptions();

        context.setConnManager(this);

        ImportJobBase importer;
        if (opts.getHBaseTable() != null) {
            // Import to HBase.
            importer = new HBaseImportJob(opts, context);
        } else {
            // Import to HDFS.
            importer = new DataDrivenImportJob(opts, context.getInputFormat(), context);
        }

        checkTableImportOptions(context);

        String splitCol = getSplitColumn(opts, tableName);
        importer.runImport(tableName, jarFile, splitCol, opts.getConf());
    }

    /**
     * Default implementation of importQuery() is to launch a MapReduce job
     * via DataDrivenImportJob to read the table with DataDrivenDBInputFormat,
     * using its free-form query importer.
     */
    public void importQuery(ImportJobContext context) throws IOException, ImportException {
        String jarFile = context.getJarFile();
        SqoopOptions opts = context.getOptions();

        context.setConnManager(this);

        ImportJobBase importer;
        if (opts.getHBaseTable() != null) {
            // Import to HBase.
            importer = new HBaseImportJob(opts, context);
        } else {
            // Import to HDFS.
            importer = new DataDrivenImportJob(opts, context.getInputFormat(), context);
        }

        String splitCol = getSplitColumn(opts, null);
        if (null == splitCol && opts.getNumMappers() > 1) {
            // Can't infer a primary key.
            throw new ImportException("A split-by column must be specified for "
                    + "parallel free-form query imports. Please specify one with "
                    + "--split-by or perform a sequential import with '-m 1'.");
        }

        importer.runImport(null, jarFile, splitCol, opts.getConf());
    }

    /**
     * Executes an arbitrary SQL statement.
     * @param stmt The SQL statement to execute
     * @param fetchSize Overrides default or parameterized fetch size
     * @return A ResultSet encapsulating the results or null on error
     */
    protected ResultSet execute(String stmt, Integer fetchSize, Object... args) throws SQLException {
        // Release any previously-open statement.
        release();

        PreparedStatement statement = null;
        statement = this.getConnection().prepareStatement(stmt, ResultSet.TYPE_FORWARD_ONLY,
                ResultSet.CONCUR_READ_ONLY);
        if (fetchSize != null) {
            LOG.debug("Using fetchSize for next query: " + fetchSize);
            statement.setFetchSize(fetchSize);
        }
        this.lastStatement = statement;
        if (null != args) {
            for (int i = 0; i < args.length; i++) {
                statement.setObject(i + 1, args[i]);
            }
        }

        LOG.info("Executing SQL statement: " + stmt);
        return statement.executeQuery();
    }

    /**
     * Executes an arbitrary SQL Statement.
     * @param stmt The SQL statement to execute
     * @return A ResultSet encapsulating the results or null on error
     */
    protected ResultSet execute(String stmt, Object... args) throws SQLException {
        return execute(stmt, options.getFetchSize(), args);
    }

    /**
     * Resolve a database-specific type to the Java type that should contain it.
     * @param sqlType
     * @return the name of a Java type to hold the sql datatype, or null if none.
     */
    public String toJavaType(int sqlType) {
        // Mappings taken from:
        // http://java.sun.com/j2se/1.3/docs/guide/jdbc/getstart/mapping.html
        if (sqlType == Types.INTEGER) {
            return "Integer";
        } else if (sqlType == Types.VARCHAR) {
            return "String";
        } else if (sqlType == Types.CHAR) {
            return "String";
        } else if (sqlType == Types.LONGVARCHAR) {
            return "String";
        } else if (sqlType == Types.NUMERIC) {
            return "java.math.BigDecimal";
        } else if (sqlType == Types.DECIMAL) {
            return "java.math.BigDecimal";
        } else if (sqlType == Types.BIT) {
            return "Boolean";
        } else if (sqlType == Types.BOOLEAN) {
            return "Boolean";
        } else if (sqlType == Types.TINYINT) {
            return "Integer";
        } else if (sqlType == Types.SMALLINT) {
            return "Integer";
        } else if (sqlType == Types.BIGINT) {
            return "Long";
        } else if (sqlType == Types.REAL) {
            return "Float";
        } else if (sqlType == Types.FLOAT) {
            return "Double";
        } else if (sqlType == Types.DOUBLE) {
            return "Double";
        } else if (sqlType == Types.DATE) {
            return "java.sql.Date";
        } else if (sqlType == Types.TIME) {
            return "java.sql.Time";
        } else if (sqlType == Types.TIMESTAMP) {
            return "java.sql.Timestamp";
        } else if (sqlType == Types.BINARY || sqlType == Types.VARBINARY) {
            return BytesWritable.class.getName();
        } else if (sqlType == Types.CLOB) {
            return ClobRef.class.getName();
        } else if (sqlType == Types.BLOB || sqlType == Types.LONGVARBINARY) {
            return BlobRef.class.getName();
        } else {
            // TODO(aaron): Support DISTINCT, ARRAY, STRUCT, REF, JAVA_OBJECT.
            // Return null indicating database-specific manager should return a
            // java data type if it can find one for any nonstandard type.
            return null;
        }
    }

    /**
     * Resolve a database-specific type to Hive data type.
     * @param sqlType     sql type
     * @return            hive type
     */
    public String toHiveType(int sqlType) {
        return HiveTypes.toHiveType(sqlType);
    }

    public void close() throws SQLException {
        release();
    }

    /**
     * Prints the contents of a ResultSet to the specified PrintWriter.
     * The ResultSet is closed at the end of this method.
     * @param results the ResultSet to print.
     * @param pw the location to print the data to.
     */
    protected void formatAndPrintResultSet(ResultSet results, PrintWriter pw) {
        try {
            try {
                int cols = results.getMetaData().getColumnCount();
                pw.println("Got " + cols + " columns back");
                if (cols > 0) {
                    ResultSetMetaData rsmd = results.getMetaData();
                    String schema = rsmd.getSchemaName(1);
                    String table = rsmd.getTableName(1);
                    if (null != schema) {
                        pw.println("Schema: " + schema);
                    }

                    if (null != table) {
                        pw.println("Table: " + table);
                    }
                }
            } catch (SQLException sqlE) {
                LOG.error("SQLException reading result metadata: " + sqlE.toString());
            }

            try {
                new ResultSetPrinter().printResultSet(pw, results);
            } catch (IOException ioe) {
                LOG.error("IOException writing results: " + ioe.toString());
                return;
            }
        } finally {
            try {
                results.close();
                getConnection().commit();
            } catch (SQLException sqlE) {
                LOG.warn("SQLException closing ResultSet: " + sqlE.toString());
            }

            release();
        }
    }

    /**
     * Poor man's SQL query interface; used for debugging.
     * @param s the SQL statement to execute.
     */
    public void execAndPrint(String s) {
        ResultSet results = null;
        try {
            results = execute(s);
        } catch (SQLException sqlE) {
            LOG.error("Error executing statement: " + StringUtils.stringifyException(sqlE));
            release();
            return;
        }

        PrintWriter pw = new PrintWriter(System.out, true);
        try {
            formatAndPrintResultSet(results, pw);
        } finally {
            pw.close();
        }
    }

    /**
     * Create a connection to the database; usually used only from within
     * getConnection(), which enforces a singleton guarantee around the
     * Connection object.
     */
    protected Connection makeConnection() throws SQLException {

        Connection connection;
        String driverClass = getDriverClass();

        try {
            Class.forName(driverClass);
        } catch (ClassNotFoundException cnfe) {
            throw new RuntimeException("Could not load db driver class: " + driverClass);
        }

        String username = options.getUsername();
        String password = options.getPassword();
        if (null == username) {
            connection = DriverManager.getConnection(options.getConnectString());
        } else {
            connection = DriverManager.getConnection(options.getConnectString(), username, password);
        }

        // We only use this for metadata queries. Loosest semantics are okay.
        connection.setTransactionIsolation(getMetadataIsolationLevel());
        connection.setAutoCommit(false);

        return connection;
    }

    /**
     * @return the transaction isolation level to use for metadata queries
     * (queries executed by the ConnManager itself).
     */
    protected int getMetadataIsolationLevel() {
        return Connection.TRANSACTION_READ_COMMITTED;
    }

    /**
     * Export data stored in HDFS into a table in a database.
     */
    public void exportTable(ExportJobContext context) throws IOException, ExportException {
        context.setConnManager(this);
        JdbcExportJob exportJob = new JdbcExportJob(context);
        exportJob.runExport();
    }

    public void release() {
        if (null != this.lastStatement) {
            try {
                this.lastStatement.close();
            } catch (SQLException e) {
                LOG.warn("Exception closing executed Statement: " + e);
            }

            this.lastStatement = null;
        }
    }

    @Override
    /**
     * {@inheritDoc}
     */
    public void updateTable(ExportJobContext context) throws IOException, ExportException {
        context.setConnManager(this);
        JdbcUpdateExportJob exportJob = new JdbcUpdateExportJob(context);
        exportJob.runExport();
    }

    /**
     * @return a SQL query to retrieve the current timestamp from the db.
     */
    protected String getCurTimestampQuery() {
        return "SELECT CURRENT_TIMESTAMP()";
    }

    @Override
    /**
     * {@inheritDoc}
     */
    public Timestamp getCurrentDbTimestamp() {
        release(); // Release any previous ResultSet.

        Statement s = null;
        ResultSet rs = null;
        try {
            Connection c = getConnection();
            s = c.createStatement();
            rs = s.executeQuery(getCurTimestampQuery());
            if (rs == null || !rs.next()) {
                return null; // empty ResultSet.
            }

            return rs.getTimestamp(1);
        } catch (SQLException sqlE) {
            LOG.warn("SQL exception accessing current timestamp: " + sqlE);
            return null;
        } finally {
            try {
                if (null != rs) {
                    rs.close();
                }
            } catch (SQLException sqlE) {
                LOG.warn("SQL Exception closing resultset: " + sqlE);
            }

            try {
                if (null != s) {
                    s.close();
                }
            } catch (SQLException sqlE) {
                LOG.warn("SQL Exception closing statement: " + sqlE);
            }
        }
    }

    @Override
    public long getTableRowCount(String tableName) throws SQLException {
        release(); // Release any previous ResultSet
        long result = -1;
        String countQuery = "SELECT COUNT(*) FROM " + tableName;
        Statement stmt = null;
        ResultSet rset = null;
        try {
            Connection conn = getConnection();
            stmt = conn.createStatement();
            rset = stmt.executeQuery(countQuery);
            rset.next();
            result = rset.getLong(1);
        } catch (SQLException ex) {
            LOG.error("Unable to query count * for table " + tableName, ex);
            throw ex;
        } finally {
            if (rset != null) {
                try {
                    rset.close();
                } catch (SQLException ex) {
                    LOG.error("Unable to close result set", ex);
                }
            }
            if (stmt != null) {
                try {
                    stmt.close();
                } catch (SQLException ex) {
                    LOG.error("Unable to close statement", ex);
                }
            }
        }
        return result;
    }

    @Override
    public void deleteAllRecords(String tableName) throws SQLException {
        release(); // Release any previous ResultSet
        String deleteQuery = "DELETE FROM " + tableName;
        Statement stmt = null;
        try {
            Connection conn = getConnection();
            stmt = conn.createStatement();
            int updateCount = stmt.executeUpdate(deleteQuery);
            conn.commit();
            LOG.info("Deleted " + updateCount + " records from " + tableName);
        } catch (SQLException ex) {
            LOG.error("Unable to execute delete query: " + deleteQuery, ex);
            throw ex;
        } finally {
            if (stmt != null) {
                try {
                    stmt.close();
                } catch (SQLException ex) {
                    LOG.error("Unable to close statement", ex);
                }
            }
        }
    }

    @Override
    public void migrateData(String fromTable, String toTable) throws SQLException {
        release(); // Release any previous ResultSet
        String updateQuery = "INSERT INTO " + toTable + " ( SELECT * FROM " + fromTable + " )";

        String deleteQuery = "DELETE FROM " + fromTable;
        Statement stmt = null;
        try {
            Connection conn = getConnection();
            stmt = conn.createStatement();

            // Insert data from the fromTable to the toTable
            int updateCount = stmt.executeUpdate(updateQuery);
            LOG.info("Migrated " + updateCount + " records from " + fromTable + " to " + toTable);

            // Delete the records from the fromTable
            int deleteCount = stmt.executeUpdate(deleteQuery);

            // If the counts do not match, fail the transaction
            if (updateCount != deleteCount) {
                conn.rollback();
                throw new RuntimeException("Inconsistent record counts");
            }
            conn.commit();
        } catch (SQLException ex) {
            LOG.error("Unable to migrate data from " + fromTable + " to " + toTable, ex);
            throw ex;
        } finally {
            if (stmt != null) {
                try {
                    stmt.close();
                } catch (SQLException ex) {
                    LOG.error("Unable to close statement", ex);
                }
            }
        }
    }
}