Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.sqoop.manager; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.FileInputStream; import java.io.File; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.junit.Before; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import com.cloudera.sqoop.SqoopOptions; import com.cloudera.sqoop.testutil.CommonArgs; import com.cloudera.sqoop.testutil.ImportJobTestCase; import com.cloudera.sqoop.util.FileListing; /** * Test the PostgresqlManager and DirectPostgresqlManager implementations. * The former uses the postgres JDBC driver to perform an import; * the latter uses pg_dump to facilitate it. * * Since this requires a Postgresql installation on your local machine to use, * this class is named in such a way that Hadoop's default QA process does not * run it. You need to run this manually with -Dtestcase=PostgresqlImportTest or * -Dthirdparty=true. * * You need to put Postgresql's JDBC driver library into a location where * Hadoop can access it (e.g., $HADOOP_HOME/lib). * * To configure a postgresql database to allow local connections, put the * following in /etc/postgresql/8.3/main/pg_hba.conf: * local all all trust * host all all 127.0.0.1/32 trust * host all all ::1/128 trust * * ... and comment out any other lines referencing 127.0.0.1 or ::1. * * Also in the file /etc/postgresql/8.3/main/postgresql.conf, uncomment * the line that starts with listen_addresses and set its value to '*' as * follows * listen_addresses = '*' * * For postgresql 8.1, this may be in /var/lib/pgsql/data, instead. You may * need to restart the postgresql service after modifying this file. * * You should also create a sqooptest user and database: * * $ sudo -u postgres psql -U postgres template1 * template1=> CREATE USER sqooptest; * template1=> CREATE DATABASE sqooptest; * template1=> GRANT ALL ON DATABASE sqooptest TO sqooptest; * template1=> \q * */ public class PostgresqlImportTest extends ImportJobTestCase { public static final Log LOG = LogFactory.getLog(PostgresqlImportTest.class.getName()); static final String HOST_URL = System.getProperty("sqoop.test.postgresql.connectstring.host_url", "jdbc:postgresql://localhost/"); static final String DATABASE_USER = System.getProperty("sqoop.test.postgresql.connectstring.username", "sqooptest"); static final String DATABASE_NAME = System.getProperty("sqoop.test.postgresql.connectstring.database", "sqooptest"); static final String PASSWORD = System.getProperty("sqoop.test.postgresql.connectstring.password"); static final String TABLE_NAME = "EMPLOYEES_PG"; static final String NULL_TABLE_NAME = "NULL_EMPLOYEES_PG"; static final String SPECIAL_TABLE_NAME = "EMPLOYEES_PG's"; static final String DIFFERENT_TABLE_NAME = "DIFFERENT_TABLE"; static final String SCHEMA_PUBLIC = "public"; static final String SCHEMA_SPECIAL = "special"; static final String CONNECT_STRING = HOST_URL + DATABASE_NAME; @Override protected boolean useHsqldbTestServer() { return false; } @Before public void setUp() { super.setUp(); LOG.debug("Setting up another postgresql test: " + CONNECT_STRING); setUpData(TABLE_NAME, SCHEMA_PUBLIC, false); setUpData(NULL_TABLE_NAME, SCHEMA_PUBLIC, true); setUpData(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC, false); setUpData(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL, false); LOG.debug("setUp complete."); } public void setUpData(String tableName, String schema, boolean nullEntry) { SqoopOptions options = new SqoopOptions(CONNECT_STRING, tableName); options.setUsername(DATABASE_USER); options.setPassword(PASSWORD); ConnManager manager = null; Connection connection = null; Statement st = null; try { manager = new PostgresqlManager(options); connection = manager.getConnection(); connection.setAutoCommit(false); st = connection.createStatement(); // Create schema if not exists in dummy way (always create and ignore // errors. try { st.executeUpdate("CREATE SCHEMA " + manager.escapeTableName(schema)); connection.commit(); } catch (SQLException e) { LOG.info( "Couldn't create schema " + schema + " (is o.k. as long as" + "the schema already exists."); connection.rollback(); } String fullTableName = manager.escapeTableName(schema) + "." + manager.escapeTableName(tableName); LOG.info("Creating table: " + fullTableName); try { // Try to remove the table first. DROP TABLE IF EXISTS didn't // get added until pg 8.3, so we just use "DROP TABLE" and ignore // any exception here if one occurs. st.executeUpdate("DROP TABLE " + fullTableName); } catch (SQLException e) { LOG.info("Couldn't drop table " + schema + "." + tableName + " (ok)"); // Now we need to reset the transaction. connection.rollback(); } st.executeUpdate("CREATE TABLE " + fullTableName + " (" + manager.escapeColName("id") + " INT NOT NULL PRIMARY KEY, " + manager.escapeColName("name") + " VARCHAR(24) NOT NULL, " + manager.escapeColName("start_date") + " DATE, " + manager.escapeColName("Salary") + " FLOAT, " + manager.escapeColName("Fired") + " BOOL, " + manager.escapeColName("dept") + " VARCHAR(32))"); st.executeUpdate("INSERT INTO " + fullTableName + " VALUES(1,'Aaron','2009-05-14',1000000.00,TRUE,'engineering')"); st.executeUpdate("INSERT INTO " + fullTableName + " VALUES(2,'Bob','2009-04-20',400.00,TRUE,'sales')"); st.executeUpdate( "INSERT INTO " + fullTableName + " VALUES(3,'Fred','2009-01-23',15.00,FALSE,'marketing')"); if (nullEntry) { st.executeUpdate("INSERT INTO " + fullTableName + " VALUES(4,'Mike',NULL,NULL,NULL,NULL)"); } connection.commit(); } catch (SQLException sqlE) { LOG.error("Encountered SQL Exception: " + sqlE); sqlE.printStackTrace(); fail("SQLException when running test setUp(): " + sqlE); } finally { try { if (null != st) { st.close(); } if (null != manager) { manager.close(); } } catch (SQLException sqlE) { LOG.warn("Got SQLException when closing connection: " + sqlE); } } LOG.debug("setUp complete."); } private String[] getArgv(boolean isDirect, String tableName, String... extraArgs) { ArrayList<String> args = new ArrayList<String>(); CommonArgs.addHadoopFlags(args); args.add("--table"); args.add(tableName); args.add("--warehouse-dir"); args.add(getWarehouseDir()); args.add("--connect"); args.add(CONNECT_STRING); args.add("--username"); args.add(DATABASE_USER); args.add("--where"); args.add("id > 1"); args.add("-m"); args.add("1"); if (isDirect) { args.add("--direct"); } for (String arg : extraArgs) { args.add(arg); } return args.toArray(new String[0]); } private void doImportAndVerify(boolean isDirect, String[] expectedResults, String tableName, String... extraArgs) throws IOException { Path warehousePath = new Path(this.getWarehouseDir()); Path tablePath = new Path(warehousePath, tableName); // if importing with merge step, directory should exist and output should be from a reducer boolean isMerge = Arrays.asList(extraArgs).contains("--merge-key"); Path filePath = new Path(tablePath, isMerge ? "part-r-00000" : "part-m-00000"); File tableFile = new File(tablePath.toString()); if (tableFile.exists() && tableFile.isDirectory() && !isMerge) { // remove the directory before running the import. FileListing.recursiveDeleteDir(tableFile); } String[] argv = getArgv(isDirect, tableName, extraArgs); try { runImport(argv); } catch (IOException ioe) { LOG.error("Got IOException during import: " + ioe.toString()); ioe.printStackTrace(); fail(ioe.toString()); } File f = new File(filePath.toString()); assertTrue("Could not find imported data file, " + f, f.exists()); BufferedReader r = null; try { // Read through the file and make sure it's all there. r = new BufferedReader(new InputStreamReader(new FileInputStream(f))); for (String expectedLine : expectedResults) { assertEquals(expectedLine, r.readLine()); } } catch (IOException ioe) { LOG.error("Got IOException verifying results: " + ioe.toString()); ioe.printStackTrace(); fail(ioe.toString()); } finally { IOUtils.closeStream(r); } } @Test public void testJdbcBasedImport() throws IOException { String[] expectedResults = { "2,Bob,2009-04-20,400.0,true,sales", "3,Fred,2009-01-23,15.0,false,marketing", }; doImportAndVerify(false, expectedResults, TABLE_NAME); } @Test public void testDirectImport() throws IOException { String[] expectedResults = { "2,Bob,2009-04-20,400,TRUE,sales", "3,Fred,2009-01-23,15,FALSE,marketing", }; doImportAndVerify(true, expectedResults, TABLE_NAME); } @Test public void testListTables() throws IOException { SqoopOptions options = new SqoopOptions(new Configuration()); options.setConnectString(CONNECT_STRING); options.setUsername(DATABASE_USER); ConnManager mgr = new PostgresqlManager(options); String[] tables = mgr.listTables(); Arrays.sort(tables); assertTrue(TABLE_NAME + " is not found!", Arrays.binarySearch(tables, TABLE_NAME) >= 0); } @Test public void testTableNameWithSpecialCharacter() throws IOException { String[] expectedResults = { "2,Bob,2009-04-20,400.0,true,sales", "3,Fred,2009-01-23,15.0,false,marketing", }; doImportAndVerify(false, expectedResults, SPECIAL_TABLE_NAME); } @Test public void testIncrementalImport() throws IOException { String[] expectedResults = {}; String[] extraArgs = { "--incremental", "lastmodified", "--check-column", "start_date", }; doImportAndVerify(false, expectedResults, TABLE_NAME, extraArgs); } public void testDirectIncrementalImport() throws IOException { String[] expectedResults = {}; String[] extraArgs = { "--incremental", "lastmodified", "--check-column", "start_date", }; doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs); } public void testDirectIncrementalImportMerge() throws IOException { String[] expectedResults = {}; String[] extraArgs = { "--incremental", "lastmodified", "--check-column", "start_date", }; doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs); extraArgs = new String[] { "--incremental", "lastmodified", "--check-column", "start_date", "--merge-key", "id", "--last-value", "2009-04-20" }; doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs); } @Test public void testDifferentSchemaImport() throws IOException { String[] expectedResults = { "2,Bob,2009-04-20,400.0,true,sales", "3,Fred,2009-01-23,15.0,false,marketing", }; String[] extraArgs = { "--", "--schema", SCHEMA_SPECIAL, }; doImportAndVerify(false, expectedResults, DIFFERENT_TABLE_NAME, extraArgs); } @Test public void testDifferentSchemaImportDirect() throws IOException { String[] expectedResults = { "2,Bob,2009-04-20,400,TRUE,sales", "3,Fred,2009-01-23,15,FALSE,marketing", }; String[] extraArgs = { "--", "--schema", SCHEMA_SPECIAL, }; doImportAndVerify(true, expectedResults, DIFFERENT_TABLE_NAME, extraArgs); } @Test public void testNullEscapeCharacters() throws Exception { String[] expectedResults = { "2,Bob,2009-04-20,400,TRUE,sales", "3,Fred,2009-01-23,15,FALSE,marketing", "4,Mike,\\N,\\N,\\N,\\N", }; String[] extraArgs = { "--null-string", "\\\\\\\\N", "--null-non-string", "\\\\\\\\N", }; doImportAndVerify(true, expectedResults, NULL_TABLE_NAME, extraArgs); } @Test public void testDifferentBooleanValues() throws Exception { String[] expectedResults = { "2,Bob,2009-04-20,400,REAL_TRUE,sales", "3,Fred,2009-01-23,15,REAL_FALSE,marketing", }; String[] extraArgs = { "--", "--boolean-true-string", "REAL_TRUE", "--boolean-false-string", "REAL_FALSE", }; doImportAndVerify(true, expectedResults, TABLE_NAME, extraArgs); } }