Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.sqoop.testutil; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.sql.SQLException; import java.util.ArrayList; import org.apache.avro.file.DataFileConstants; import org.apache.avro.file.DataFileReader; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.mapred.FsInput; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.sqoop.io.CodecMap; import org.apache.sqoop.lib.BlobRef; /** * Tests BLOB/CLOB import for Avro. */ public abstract class LobAvroImportTestCase extends ImportJobTestCase { private Log log; public LobAvroImportTestCase() { this.log = LogFactory.getLog(LobAvroImportTestCase.class.getName()); } /** * @return the Log object to use for reporting during this test */ protected abstract Log getLogger(); /** * @return a "friendly" name for the database. e.g "mysql" or "oracle". */ protected abstract String getDbFriendlyName(); @Override protected String getTablePrefix() { return "LOB_" + getDbFriendlyName().toUpperCase() + "_"; } @Override protected boolean useHsqldbTestServer() { // Hsqldb does not support BLOB/CLOB return false; } @Override public void tearDown() { try { // Clean up the database on our way out. dropTableIfExists(getTableName()); } catch (SQLException e) { log.warn("Error trying to drop table '" + getTableName() + "' on tearDown: " + e); } super.tearDown(); } protected String[] getArgv(String... additionalArgs) { // Import every column of the table String[] colNames = getColNames(); String splitByCol = colNames[0]; String columnsString = ""; for (String col : colNames) { columnsString += col + ","; } ArrayList<String> args = new ArrayList<String>(); CommonArgs.addHadoopFlags(args); args.add("--table"); args.add(getTableName()); args.add("--columns"); args.add(columnsString); args.add("--split-by"); args.add(splitByCol); args.add("--warehouse-dir"); args.add(getWarehouseDir()); args.add("--connect"); args.add(getConnectString()); args.add("--as-avrodatafile"); args.add("--num-mappers"); args.add("2"); for (String arg : additionalArgs) { args.add(arg); } return args.toArray(new String[0]); } protected String getBlobType() { return "BLOB"; } protected String getBlobInsertStr(String blobData) { return "'" + blobData + "'"; } /** * Return the current table number as a string. In test, table number is used * to name .lob files. * @return current table number. */ private String getTableNum() { return getTableName().substring(getTablePrefix().length()); } /** * Return an instance of DataFileReader for the given filename. * @param filename path that we're opening a reader for. * @return instance of DataFileReader. * @throws IOException */ private DataFileReader<GenericRecord> read(Path filename) throws IOException { Configuration conf = getConf(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } FsInput fsInput = new FsInput(filename, conf); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); return new DataFileReader<GenericRecord>(fsInput, datumReader); } /** Import blob data that is smaller than inline lob limit. Blob data * should be saved as Avro bytes. * @throws IOException * @throws SQLException */ public void testBlobAvroImportInline() throws IOException, SQLException { String[] types = { getBlobType() }; String expectedVal = "This is short BLOB data"; String[] vals = { getBlobInsertStr(expectedVal) }; createTableWithColTypes(types, vals); runImport(getArgv()); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that blob data is imported as Avro bytes. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); assertEquals(getColName(0), expectedVal, returnVal); } /** * Import blob data that is larger than inline lob limit. The reference file * should be saved as Avro bytes. Blob data should be saved in LOB file * format. * @throws IOException * @throws SQLException */ public void testBlobAvroImportExternal() throws IOException, SQLException { String[] types = { getBlobType() }; String data = "This is short BLOB data"; String[] vals = { getBlobInsertStr(data) }; createTableWithColTypes(types, vals); // Set inline lob limit to a small value so that blob data will be // written to an external file. runImport(getArgv("--inline-lob-limit", "1")); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that the reference file is written in Avro bytes. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); String expectedVal = "externalLob(lf,_lob/large_obj_task_local_000" + getTableNum() + "_m_0000000.lob,68," + data.length() + ")"; assertEquals(expectedVal, returnVal); // Verify that blob data stored in the external lob file is correct. BlobRef br = BlobRef.parse(returnVal); Path lobFileDir = new Path(getWarehouseDir(), getTableName()); InputStream in = br.getDataStream(getConf(), lobFileDir); byte[] bufArray = new byte[data.length()]; int chars = in.read(bufArray); in.close(); assertEquals(chars, data.length()); returnVal = new String(bufArray); expectedVal = data; assertEquals(getColName(0), returnVal, expectedVal); } /** * Import blob data that is smaller than inline lob limit and compress with * deflate codec. Blob data should be encoded and saved as Avro bytes. * @throws IOException * @throws SQLException */ public void testBlobCompressedAvroImportInline() throws IOException, SQLException { String[] types = { getBlobType() }; String expectedVal = "This is short BLOB data"; String[] vals = { getBlobInsertStr(expectedVal) }; createTableWithColTypes(types, vals); runImport(getArgv("--compression-codec", CodecMap.DEFLATE)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that the data block of the Avro file is compressed with deflate // codec. assertEquals(CodecMap.DEFLATE, reader.getMetaString(DataFileConstants.CODEC)); // Verify that all columns are imported correctly. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); assertEquals(getColName(0), expectedVal, returnVal); } /** * Import blob data that is larger than inline lob limit and compress with * deflate codec. The reference file should be encoded and saved as Avro * bytes. Blob data should be saved in LOB file format without compression. * @throws IOException * @throws SQLException */ public void testBlobCompressedAvroImportExternal() throws IOException, SQLException { String[] types = { getBlobType() }; String data = "This is short BLOB data"; String[] vals = { getBlobInsertStr(data) }; createTableWithColTypes(types, vals); // Set inline lob limit to a small value so that blob data will be // written to an external file. runImport(getArgv("--inline-lob-limit", "1", "--compression-codec", CodecMap.DEFLATE)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that the data block of the Avro file is compressed with deflate // codec. assertEquals(CodecMap.DEFLATE, reader.getMetaString(DataFileConstants.CODEC)); // Verify that the reference file is written in Avro bytes. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); String expectedVal = "externalLob(lf,_lob/large_obj_task_local_000" + getTableNum() + "_m_0000000.lob,68," + data.length() + ")"; assertEquals(expectedVal, returnVal); // Verify that blob data stored in the external lob file is correct. BlobRef br = BlobRef.parse(returnVal); Path lobFileDir = new Path(getWarehouseDir(), getTableName()); InputStream in = br.getDataStream(getConf(), lobFileDir); byte[] bufArray = new byte[data.length()]; int chars = in.read(bufArray); in.close(); assertEquals(chars, data.length()); returnVal = new String(bufArray); expectedVal = data; assertEquals(getColName(0), returnVal, expectedVal); } /** * Import multiple columns of blob data. Blob data should be saved as Avro * bytes. * @throws IOException * @throws SQLException */ public void testBlobAvroImportMultiCols() throws IOException, SQLException { String[] types = { getBlobType(), getBlobType(), getBlobType(), }; String expectedVal1 = "This is short BLOB data1"; String expectedVal2 = "This is short BLOB data2"; String expectedVal3 = "This is short BLOB data3"; String[] vals = { getBlobInsertStr(expectedVal1), getBlobInsertStr(expectedVal2), getBlobInsertStr(expectedVal3), }; createTableWithColTypes(types, vals); runImport(getArgv()); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); // Verify that all columns are imported correctly. ByteBuffer buf = (ByteBuffer) record.get(getColName(0)); String returnVal = new String(buf.array()); assertEquals(getColName(0), expectedVal1, returnVal); buf = (ByteBuffer) record.get(getColName(1)); returnVal = new String(buf.array()); assertEquals(getColName(1), expectedVal2, returnVal); buf = (ByteBuffer) record.get(getColName(2)); returnVal = new String(buf.array()); assertEquals(getColName(2), expectedVal3, returnVal); } public void testClobAvroImportInline() throws IOException, SQLException { // TODO: add tests for CLOB support for Avro import } public void testClobAvroImportExternal() throws IOException, SQLException { // TODO: add tests for CLOB support for Avro import } public void testClobCompressedAvroImportInline() throws IOException, SQLException { // TODO: add tests for CLOB support for Avro import } public void testClobCompressedAvroImportExternal() throws IOException, SQLException { // TODO: add tests for CLOB support for Avro import } public void testClobAvroImportMultiCols() throws IOException, SQLException { // TODO: add tests for CLOB support for Avro import } }