com.cloudera.sqoop.manager.PostgresqlImportTest.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.sqoop.manager.PostgresqlImportTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.sqoop.manager;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.File;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Before;
import org.junit.Test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.testutil.CommonArgs;
import com.cloudera.sqoop.testutil.ImportJobTestCase;
import com.cloudera.sqoop.util.FileListing;

/**
 * Test the PostgresqlManager and DirectPostgresqlManager implementations.
 * The former uses the postgres JDBC driver to perform an import;
 * the latter uses pg_dump to facilitate it.
 *
 * Since this requires a Postgresql installation on your local machine to use,
 * this class is named in such a way that Hadoop's default QA process does not
 * run it. You need to run this manually with -Dtestcase=PostgresqlImportTest or
 * -Dthirdparty=true.
 *
 * You need to put Postgresql's JDBC driver library into a location where
 * Hadoop can access it (e.g., $HADOOP_HOME/lib).
 *
 * To configure a postgresql database to allow local connections, put the
 * following in /etc/postgresql/8.3/main/pg_hba.conf:
 *     local  all all trust
 *     host all all 127.0.0.1/32 trust
 *     host all all ::1/128      trust
 *
 * ... and comment out any other lines referencing 127.0.0.1 or ::1.
 *
 * Also in the file /etc/postgresql/8.3/main/postgresql.conf, uncomment
 * the line that starts with listen_addresses and set its value to '*' as
 * follows
 *     listen_addresses = '*'
 *
 * For postgresql 8.1, this may be in /var/lib/pgsql/data, instead.  You may
 * need to restart the postgresql service after modifying this file.
 *
 * You should also create a sqooptest user and database:
 *
 * $ sudo -u postgres psql -U postgres template1
 * template1=> CREATE USER sqooptest;
 * template1=> CREATE DATABASE sqooptest;
 * template1=> GRANT ALL ON DATABASE sqooptest TO sqooptest;
 * template1=> \q
 *
 */
public class PostgresqlImportTest extends ImportJobTestCase {

    public static final Log LOG = LogFactory.getLog(PostgresqlImportTest.class.getName());

    static final String HOST_URL = System.getProperty("sqoop.test.postgresql.connectstring.host_url",
            "jdbc:postgresql://localhost/");
    static final String DATABASE_USER = System.getProperty("sqoop.test.postgresql.connectstring.username",
            "sqooptest");
    static final String DATABASE_NAME = System.getProperty("sqoop.test.postgresql.connectstring.database",
            "sqooptest");
    static final String PASSWORD = System.getProperty("sqoop.test.postgresql.connectstring.password");

    static final String TABLE_NAME = "EMPLOYEES_PG";
    static final String NULL_TABLE_NAME = "NULL_EMPLOYEES_PG";
    static final String SPECIAL_TABLE_NAME = "EMPLOYEES_PG's";
    static final String DIFFERENT_TABLE_NAME = "DIFFERENT_TABLE";
    static final String SCHEMA_PUBLIC = "public";
    static final String SCHEMA_SPECIAL = "special";
    static final String CONNECT_STRING = HOST_URL + DATABASE_NAME;

    @Override
    protected boolean useHsqldbTestServer() {
        return false;
    }

    @Before
    public void setUp() {
        super.setUp();

        LOG.debug("Setting up another postgresql test: " + CONNECT_STRING);

        setUpData(TABLE_NAME, SCHEMA_PUBLIC, false);
        setUpData(NULL_TABLE_NAME, SCHEMA_PUBLIC, true);
        setUpData(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC, false);
        setUpData(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL, false);

        LOG.debug("setUp complete.");
    }

    public void setUpData(String tableName, String schema, boolean nullEntry) {
        SqoopOptions options = new SqoopOptions(CONNECT_STRING, tableName);
        options.setUsername(DATABASE_USER);
        options.setPassword(PASSWORD);

        ConnManager manager = null;
        Connection connection = null;
        Statement st = null;

        try {
            manager = new PostgresqlManager(options);
            connection = manager.getConnection();
            connection.setAutoCommit(false);
            st = connection.createStatement();

            // Create schema if not exists in dummy way (always create and ignore
            // errors.
            try {
                st.executeUpdate("CREATE SCHEMA " + manager.escapeTableName(schema));
                connection.commit();
            } catch (SQLException e) {
                LOG.info(
                        "Couldn't create schema " + schema + " (is o.k. as long as" + "the schema already exists.");
                connection.rollback();
            }

            String fullTableName = manager.escapeTableName(schema) + "." + manager.escapeTableName(tableName);
            LOG.info("Creating table: " + fullTableName);

            try {
                // Try to remove the table first. DROP TABLE IF EXISTS didn't
                // get added until pg 8.3, so we just use "DROP TABLE" and ignore
                // any exception here if one occurs.
                st.executeUpdate("DROP TABLE " + fullTableName);
            } catch (SQLException e) {
                LOG.info("Couldn't drop table " + schema + "." + tableName + " (ok)");
                // Now we need to reset the transaction.
                connection.rollback();
            }

            st.executeUpdate("CREATE TABLE " + fullTableName + " (" + manager.escapeColName("id")
                    + " INT NOT NULL PRIMARY KEY, " + manager.escapeColName("name") + " VARCHAR(24) NOT NULL, "
                    + manager.escapeColName("start_date") + " DATE, " + manager.escapeColName("Salary") + " FLOAT, "
                    + manager.escapeColName("Fired") + " BOOL, " + manager.escapeColName("dept") + " VARCHAR(32))");

            st.executeUpdate("INSERT INTO " + fullTableName
                    + " VALUES(1,'Aaron','2009-05-14',1000000.00,TRUE,'engineering')");
            st.executeUpdate("INSERT INTO " + fullTableName + " VALUES(2,'Bob','2009-04-20',400.00,TRUE,'sales')");
            st.executeUpdate(
                    "INSERT INTO " + fullTableName + " VALUES(3,'Fred','2009-01-23',15.00,FALSE,'marketing')");
            if (nullEntry) {
                st.executeUpdate("INSERT INTO " + fullTableName + " VALUES(4,'Mike',NULL,NULL,NULL,NULL)");

            }
            connection.commit();
        } catch (SQLException sqlE) {
            LOG.error("Encountered SQL Exception: " + sqlE);
            sqlE.printStackTrace();
            fail("SQLException when running test setUp(): " + sqlE);
        } finally {
            try {
                if (null != st) {
                    st.close();
                }

                if (null != manager) {
                    manager.close();
                }
            } catch (SQLException sqlE) {
                LOG.warn("Got SQLException when closing connection: " + sqlE);
            }
        }

        LOG.debug("setUp complete.");
    }

    private String[] getArgv(boolean isDirect, String tableName, String... extraArgs) {
        ArrayList<String> args = new ArrayList<String>();

        CommonArgs.addHadoopFlags(args);

        args.add("--table");
        args.add(tableName);
        args.add("--warehouse-dir");
        args.add(getWarehouseDir());
        args.add("--connect");
        args.add(CONNECT_STRING);
        args.add("--username");
        args.add(DATABASE_USER);
        args.add("--where");
        args.add("id > 1");
        args.add("-m");
        args.add("1");

        if (isDirect) {
            args.add("--direct");
        }

        for (String arg : extraArgs) {
            args.add(arg);
        }

        return args.toArray(new String[0]);
    }

    private void doImportAndVerify(boolean isDirect, String[] expectedResults, String tableName,
            String... extraArgs) throws IOException {

        Path warehousePath = new Path(this.getWarehouseDir());
        Path tablePath = new Path(warehousePath, tableName);

        // if importing with merge step, directory should exist and output should be from a reducer
        boolean isMerge = Arrays.asList(extraArgs).contains("--merge-key");
        Path filePath = new Path(tablePath, isMerge ? "part-r-00000" : "part-m-00000");

        File tableFile = new File(tablePath.toString());
        if (tableFile.exists() && tableFile.isDirectory() && !isMerge) {
            // remove the directory before running the import.
            FileListing.recursiveDeleteDir(tableFile);
        }

        String[] argv = getArgv(isDirect, tableName, extraArgs);
        try {
            runImport(argv);
        } catch (IOException ioe) {
            LOG.error("Got IOException during import: " + ioe.toString());
            ioe.printStackTrace();
            fail(ioe.toString());
        }

        File f = new File(filePath.toString());
        assertTrue("Could not find imported data file, " + f, f.exists());
        BufferedReader r = null;
        try {
            // Read through the file and make sure it's all there.
            r = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
            for (String expectedLine : expectedResults) {
                assertEquals(expectedLine, r.readLine());
            }
        } catch (IOException ioe) {
            LOG.error("Got IOException verifying results: " + ioe.toString());
            ioe.printStackTrace();
            fail(ioe.toString());
        } finally {
            IOUtils.closeStream(r);
        }
    }

    @Test
    public void testJdbcBasedImport() throws IOException {
        String[] expectedResults = { "2,Bob,2009-04-20,400.0,true,sales",
                "3,Fred,2009-01-23,15.0,false,marketing", };

        doImportAndVerify(false, expectedResults, TABLE_NAME);
    }

    @Test
    public void testDirectImport() throws IOException {
        String[] expectedResults = { "2,Bob,2009-04-20,400,TRUE,sales", "3,Fred,2009-01-23,15,FALSE,marketing", };

        doImportAndVerify(true, expectedResults, TABLE_NAME);
    }

    @Test
    public void testListTables() throws IOException {
        SqoopOptions options = new SqoopOptions(new Configuration());
        options.setConnectString(CONNECT_STRING);
        options.setUsername(DATABASE_USER);

        ConnManager mgr = new PostgresqlManager(options);
        String[] tables = mgr.listTables();
        Arrays.sort(tables);
        assertTrue(TABLE_NAME + " is not found!", Arrays.binarySearch(tables, TABLE_NAME) >= 0);
    }

    @Test
    public void testTableNameWithSpecialCharacter() throws IOException {
        String[] expectedResults = { "2,Bob,2009-04-20,400.0,true,sales",
                "3,Fred,2009-01-23,15.0,false,marketing", };

        doImportAndVerify(false, expectedResults, SPECIAL_TABLE_NAME);
    }

    @Test
    public void testIncrementalImport() throws IOException {
        String[] expectedResults = {};

        String[] extraArgs = { "--incremental", "lastmodified", "--check-column", "start_date", };

        doImportAndVerify(false, expectedResults, TABLE_NAME, extraArgs);
    }

    public void testDirectIncrementalImport() throws IOException {
        String[] expectedResults = {};

        String[] extraArgs = { "--incremental", "lastmodified", "--check-column", "start_date", };

        doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);
    }

    public void testDirectIncrementalImportMerge() throws IOException {
        String[] expectedResults = {};

        String[] extraArgs = { "--incremental", "lastmodified", "--check-column", "start_date", };

        doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);

        extraArgs = new String[] { "--incremental", "lastmodified", "--check-column", "start_date", "--merge-key",
                "id", "--last-value", "2009-04-20" };

        doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);
    }

    @Test
    public void testDifferentSchemaImport() throws IOException {
        String[] expectedResults = { "2,Bob,2009-04-20,400.0,true,sales",
                "3,Fred,2009-01-23,15.0,false,marketing", };

        String[] extraArgs = { "--", "--schema", SCHEMA_SPECIAL, };

        doImportAndVerify(false, expectedResults, DIFFERENT_TABLE_NAME, extraArgs);
    }

    @Test
    public void testDifferentSchemaImportDirect() throws IOException {
        String[] expectedResults = { "2,Bob,2009-04-20,400,TRUE,sales", "3,Fred,2009-01-23,15,FALSE,marketing", };

        String[] extraArgs = { "--", "--schema", SCHEMA_SPECIAL, };

        doImportAndVerify(true, expectedResults, DIFFERENT_TABLE_NAME, extraArgs);
    }

    @Test
    public void testNullEscapeCharacters() throws Exception {
        String[] expectedResults = { "2,Bob,2009-04-20,400,TRUE,sales", "3,Fred,2009-01-23,15,FALSE,marketing",
                "4,Mike,\\N,\\N,\\N,\\N", };

        String[] extraArgs = { "--null-string", "\\\\\\\\N", "--null-non-string", "\\\\\\\\N", };

        doImportAndVerify(true, expectedResults, NULL_TABLE_NAME, extraArgs);
    }

    @Test
    public void testDifferentBooleanValues() throws Exception {
        String[] expectedResults = { "2,Bob,2009-04-20,400,REAL_TRUE,sales",
                "3,Fred,2009-01-23,15,REAL_FALSE,marketing", };

        String[] extraArgs = { "--", "--boolean-true-string", "REAL_TRUE", "--boolean-false-string",
                "REAL_FALSE", };

        doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs);
    }
}