co.cask.hydrator.plugin.db.batch.sink.ETLDBOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.hydrator.plugin.db.batch.sink.ETLDBOutputFormat.java

Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.hydrator.plugin.db.batch.sink;

import co.cask.hydrator.plugin.DBUtils;
import co.cask.hydrator.plugin.JDBCDriverShim;
import co.cask.hydrator.plugin.db.batch.NoOpCommitConnection;
import com.google.common.base.Throwables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.sql.Connection;
import java.sql.Driver;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;

/**
 * Class that extends {@link DBOutputFormat} to load the database driver class correctly.
 *
 * @param <K> - Key passed to this class to be written
 * @param <V> - Value passed to this class to be written. The value is ignored.
 *
 * {@inheritDoc}
 */
public class ETLDBOutputFormat<K extends DBWritable, V> extends DBOutputFormat<K, V> {
    public static final String AUTO_COMMIT_ENABLED = "co.cask.hydrator.db.output.autocommit.enabled";

    private static final Logger LOG = LoggerFactory.getLogger(ETLDBOutputFormat.class);
    private Driver driver;
    private JDBCDriverShim driverShim;

    @Override
    public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
        Configuration conf = context.getConfiguration();
        DBConfiguration dbConf = new DBConfiguration(conf);
        String tableName = dbConf.getOutputTableName();
        String[] fieldNames = dbConf.getOutputFieldNames();

        if (fieldNames == null) {
            fieldNames = new String[dbConf.getOutputFieldCount()];
        }

        try {
            Connection connection = getConnection(conf);
            PreparedStatement statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
            return new DBRecordWriter(connection, statement) {

                private boolean emptyData = true;

                //Implementation of the close method below is the exact implementation in DBOutputFormat except that
                //we check if there is any data to be written and if not, we skip executeBatch call.
                //There might be reducers that don't receive any data and thus this check is necessary to prevent
                //empty data to be committed (since some Databases doesn't support that).
                @Override
                public void close(TaskAttemptContext context) throws IOException {
                    try {
                        if (!emptyData) {
                            getStatement().executeBatch();
                            getConnection().commit();
                        }
                    } catch (SQLException e) {
                        try {
                            getConnection().rollback();
                        } catch (SQLException ex) {
                            LOG.warn(StringUtils.stringifyException(ex));
                        }
                        throw new IOException(e.getMessage());
                    } finally {
                        try {
                            getStatement().close();
                            getConnection().close();
                        } catch (SQLException ex) {
                            throw new IOException(ex.getMessage());
                        }
                    }

                    try {
                        DriverManager.deregisterDriver(driverShim);
                    } catch (SQLException e) {
                        throw new IOException(e);
                    }
                }

                @Override
                public void write(K key, V value) throws IOException {
                    super.write(key, value);
                    emptyData = false;
                }
            };
        } catch (Exception ex) {
            throw new IOException(ex.getMessage());
        }
    }

    private Connection getConnection(Configuration conf) {
        Connection connection;
        try {
            String url = conf.get(DBConfiguration.URL_PROPERTY);
            try {
                // throws SQLException if no suitable driver is found
                DriverManager.getDriver(url);
            } catch (SQLException e) {
                if (driverShim == null) {
                    if (driver == null) {
                        ClassLoader classLoader = conf.getClassLoader();
                        @SuppressWarnings("unchecked")
                        Class<? extends Driver> driverClass = (Class<? extends Driver>) classLoader
                                .loadClass(conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY));
                        driver = driverClass.newInstance();

                        // De-register the default driver that gets registered when driver class is loaded.
                        DBUtils.deregisterAllDrivers(driverClass);
                    }

                    driverShim = new JDBCDriverShim(driver);
                    DriverManager.registerDriver(driverShim);
                    LOG.debug("Registered JDBC driver via shim {}. Actual Driver {}.", driverShim, driver);
                }
            }

            if (conf.get(DBConfiguration.USERNAME_PROPERTY) == null) {
                connection = DriverManager.getConnection(url);
            } else {
                connection = DriverManager.getConnection(url, conf.get(DBConfiguration.USERNAME_PROPERTY),
                        conf.get(DBConfiguration.PASSWORD_PROPERTY));
            }

            boolean autoCommitEnabled = conf.getBoolean(AUTO_COMMIT_ENABLED, false);
            if (autoCommitEnabled) {
                // hack to work around jdbc drivers like the hive driver that throw exceptions on commit
                connection = new NoOpCommitConnection(connection);
            } else {
                connection.setAutoCommit(false);
            }
            connection.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        return connection;
    }

    @Override
    public String constructQuery(String table, String[] fieldNames) {
        String query = super.constructQuery(table, fieldNames);
        // Strip the ';' at the end since Oracle doesn't like it.
        // TODO: Perhaps do a conditional if we can find a way to tell that this is going to Oracle
        // However, tested this to work on Mysql and Oracle
        return query.substring(0, query.length() - 1);
    }
}