com.cloudera.sqoop.testutil.ExportJobTestCase.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.sqoop.testutil.ExportJobTestCase.java

Source

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.sqoop.testutil;

import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.StringUtils;

import com.cloudera.sqoop.Sqoop;
import com.cloudera.sqoop.mapreduce.ExportOutputFormat;
import com.cloudera.sqoop.tool.ExportTool;

/**
 * Class that implements common methods required for tests which export data
 * from HDFS to databases, to verify correct export.
 */
public class ExportJobTestCase extends BaseSqoopTestCase {

    public static final Log LOG = LogFactory.getLog(ExportJobTestCase.class.getName());

    protected String getTablePrefix() {
        return "EXPORT_TABLE_";
    }

    /**
     * @return the maximum rows to fold into an INSERT statement.
     * HSQLDB can only support the single-row INSERT syntax. Other databases
     * can support greater numbers of rows per statement.
     */
    protected int getMaxRowsPerStatement() {
        return 1;
    }

    /**
     * @return a connection to the database under test.
     */
    protected Connection getConnection() {
        try {
            return getTestServer().getConnection();
        } catch (SQLException sqlE) {
            LOG.error("Could not get connection to test server: " + sqlE);
            return null;
        }
    }

    /**
     * Create the argv to pass to Sqoop.
     * @param includeHadoopFlags if true, then include -D various.settings=values
     * @param rowsPerStmt number of rows to export in a single INSERT statement.
     * @param statementsPerTx ## of statements to use in a transaction.
     * @return the argv as an array of strings.
     */
    protected String[] getArgv(boolean includeHadoopFlags, int rowsPerStmt, int statementsPerTx,
            String... additionalArgv) {
        ArrayList<String> args = new ArrayList<String>();

        if (includeHadoopFlags) {
            CommonArgs.addHadoopFlags(args);
            args.add("-D");
            int realRowsPerStmt = Math.min(rowsPerStmt, getMaxRowsPerStatement());
            if (realRowsPerStmt != rowsPerStmt) {
                LOG.warn("Rows per statement set to " + realRowsPerStmt + " by getMaxRowsPerStatement() limit.");
            }
            args.add(ExportOutputFormat.RECORDS_PER_STATEMENT_KEY + "=" + realRowsPerStmt);
            args.add("-D");
            args.add(ExportOutputFormat.STATEMENTS_PER_TRANSACTION_KEY + "=" + statementsPerTx);
        }

        // Any additional Hadoop flags (-D foo=bar) are prepended.
        if (null != additionalArgv) {
            boolean prevIsFlag = false;
            for (String arg : additionalArgv) {
                if (arg.equals("-D")) {
                    args.add(arg);
                    prevIsFlag = true;
                } else if (prevIsFlag) {
                    args.add(arg);
                    prevIsFlag = false;
                }
            }
        }

        // The sqoop-specific additional args are then added.
        if (null != additionalArgv) {
            boolean prevIsFlag = false;
            for (String arg : additionalArgv) {
                if (arg.equals("-D")) {
                    prevIsFlag = true;
                    continue;
                } else if (prevIsFlag) {
                    prevIsFlag = false;
                    continue;
                } else {
                    // normal argument.
                    args.add(arg);
                }
            }
        }

        args.add("--table");
        args.add(getTableName());
        args.add("--export-dir");
        args.add(getTablePath().toString());
        args.add("--connect");
        args.add(getConnectString());
        args.add("--fields-terminated-by");
        args.add("\\t");
        args.add("--lines-terminated-by");
        args.add("\\n");
        args.add("-m");
        args.add("1");

        LOG.debug("args:");
        for (String a : args) {
            LOG.debug("  " + a);
        }

        return args.toArray(new String[0]);
    }

    /** When exporting text columns, what should the text contain? */
    protected String getMsgPrefix() {
        return "textfield";
    }

    /** @return the minimum 'id' value in the table */
    protected int getMinRowId(Connection conn) throws SQLException {
        PreparedStatement statement = conn.prepareStatement("SELECT MIN(id) FROM " + getTableName(),
                ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
        int minVal = 0;
        try {
            ResultSet rs = statement.executeQuery();
            try {
                rs.next();
                minVal = rs.getInt(1);
            } finally {
                rs.close();
            }
        } finally {
            statement.close();
        }

        return minVal;
    }

    /** @return the maximum 'id' value in the table */
    protected int getMaxRowId(Connection conn) throws SQLException {
        PreparedStatement statement = conn.prepareStatement("SELECT MAX(id) FROM " + getTableName(),
                ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
        int maxVal = 0;
        try {
            ResultSet rs = statement.executeQuery();
            try {
                rs.next();
                maxVal = rs.getInt(1);
            } finally {
                rs.close();
            }
        } finally {
            statement.close();
        }

        return maxVal;
    }

    /**
     * Check that we got back the expected row set.
     * @param expectedNumRecords The number of records we expected to load
     * into the database.
     */
    protected void verifyExport(int expectedNumRecords) throws IOException, SQLException {
        Connection conn = getConnection();
        verifyExport(expectedNumRecords, conn);
    }

    /**
     * Check that we got back the expected row set.
     * @param expectedNumRecords The number of records we expected to load
     * into the database.
     * @param conn the db connection to use.
     */
    protected void verifyExport(int expectedNumRecords, Connection conn) throws IOException, SQLException {
        LOG.info("Verifying export: " + getTableName());
        // Check that we got back the correct number of records.
        PreparedStatement statement = conn.prepareStatement("SELECT COUNT(*) FROM " + getTableName(),
                ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
        int actualNumRecords = 0;
        ResultSet rs = null;
        try {
            rs = statement.executeQuery();
            try {
                rs.next();
                actualNumRecords = rs.getInt(1);
            } finally {
                rs.close();
            }
        } finally {
            statement.close();
        }

        assertEquals("Got back unexpected row count", expectedNumRecords, actualNumRecords);

        if (expectedNumRecords == 0) {
            return; // Nothing more to verify.
        }

        // Check that we start with row 0.
        int minVal = getMinRowId(conn);
        assertEquals("Minimum row was not zero", 0, minVal);

        // Check that the last row we loaded is numRows - 1
        int maxVal = getMaxRowId(conn);
        assertEquals("Maximum row had invalid id", expectedNumRecords - 1, maxVal);

        // Check that the string values associated with these points match up.
        statement = conn.prepareStatement("SELECT msg FROM " + getTableName() + " WHERE id = " + minVal,
                ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
        String minMsg = "";
        try {
            rs = statement.executeQuery();
            try {
                rs.next();
                minMsg = rs.getString(1);
            } finally {
                rs.close();
            }
        } finally {
            statement.close();
        }

        assertEquals("Invalid msg field for min value", getMsgPrefix() + minVal, minMsg);

        statement = conn.prepareStatement("SELECT msg FROM " + getTableName() + " WHERE id = " + maxVal,
                ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
        String maxMsg = "";
        try {
            rs = statement.executeQuery();
            try {
                rs.next();
                maxMsg = rs.getString(1);
            } finally {
                rs.close();
            }
        } finally {
            statement.close();
        }

        assertEquals("Invalid msg field for min value", getMsgPrefix() + maxVal, maxMsg);
    }

    /**
     * Run a MapReduce-based export (using the argv provided to control
     * execution).
     * @return the generated jar filename
     */
    protected List<String> runExport(String[] argv) throws IOException {
        // run the tool through the normal entry-point.
        int ret;
        List<String> generatedJars = null;
        try {
            ExportTool exporter = new ExportTool();
            Sqoop sqoop = new Sqoop(exporter);
            ret = Sqoop.runSqoop(sqoop, argv);
            generatedJars = exporter.getGeneratedJarFiles();
        } catch (Exception e) {
            LOG.error("Got exception running Sqoop: " + StringUtils.stringifyException(e));
            ret = 1;
        }

        // expect a successful return.
        if (0 != ret) {
            throw new IOException("Failure during job; return status " + ret);
        }

        return generatedJars;
    }

}