Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.gobblin.source.jdbc; import java.io.IOException; import java.sql.Blob; import java.sql.Clob; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.sql.Types; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOrderBy; import org.apache.calcite.sql.SqlSelect; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Joiner; import com.google.gson.Gson; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import org.apache.gobblin.configuration.ConfigurationKeys; import org.apache.gobblin.configuration.WorkUnitState; import org.apache.gobblin.password.PasswordManager; import org.apache.gobblin.source.extractor.DataRecordException; import org.apache.gobblin.source.extractor.exception.HighWatermarkException; import org.apache.gobblin.source.extractor.exception.RecordCountException; import org.apache.gobblin.source.extractor.exception.SchemaException; import org.apache.gobblin.source.extractor.extract.Command; import org.apache.gobblin.source.extractor.extract.CommandOutput; import org.apache.gobblin.source.extractor.extract.QueryBasedExtractor; import org.apache.gobblin.source.extractor.extract.SourceSpecificLayer; import org.apache.gobblin.source.jdbc.JdbcCommand.JdbcCommandType; import org.apache.gobblin.source.extractor.resultset.RecordSetList; import org.apache.gobblin.source.extractor.schema.ColumnAttributes; import org.apache.gobblin.source.extractor.schema.ColumnNameCase; import org.apache.gobblin.source.extractor.schema.Schema; import org.apache.gobblin.source.extractor.utils.Utils; import org.apache.gobblin.source.extractor.watermark.Predicate; import org.apache.gobblin.source.extractor.watermark.WatermarkType; import org.apache.gobblin.source.workunit.WorkUnit; /** * Extract data using JDBC protocol * * @author nveeramr */ public abstract class JdbcExtractor extends QueryBasedExtractor<JsonArray, JsonElement> implements SourceSpecificLayer<JsonArray, JsonElement>, JdbcSpecificLayer { private static final Gson gson = new Gson(); private List<String> headerRecord; private boolean firstPull = true; private CommandOutput<?, ?> dataResponse = null; protected String extractSql; protected long sampleRecordCount; protected JdbcProvider jdbcSource; protected Connection dataConnection; protected int timeOut; private List<ColumnAttributes> columnAliasMap = new ArrayList<>(); private Map<String, Schema> metadataColumnMap = new HashMap<>(); private List<String> metadataColumnList = new ArrayList<>(); private String inputColumnProjection; private String outputColumnProjection; private long totalRecordCount = 0; private boolean nextRecord = true; private int unknownColumnCounter = 1; protected boolean enableDelimitedIdentifier = false; private Logger log = LoggerFactory.getLogger(JdbcExtractor.class); /** * Metadata column mapping to lookup columns specified in input query * * @return metadata(schema) column mapping */ public Map<String, Schema> getMetadataColumnMap() { return this.metadataColumnMap; } /** * @param metadataColumnMap metadata column mapping */ public void setMetadataColumnMap(Map<String, Schema> metadataColumnMap) { this.metadataColumnMap = metadataColumnMap; } /** * Metadata column list * * @return metadata(schema) column list */ public List<String> getMetadataColumnList() { return this.metadataColumnList; } /** * @param metadataColumnList metadata column list */ public void setMetadataColumnList(List<String> metadataColumnList) { this.metadataColumnList = metadataColumnList; } /** * Sample Records specified in input query * * @return sample record count */ public long getSampleRecordCount() { return this.sampleRecordCount; } /** * @param sampleRecordCount sample record count */ public void setSampleRecordCount(long sampleRecordCount) { this.sampleRecordCount = sampleRecordCount; } /** * query to extract data from data source * * @return query */ public String getExtractSql() { return this.extractSql; } /** * @param extractSql extract query */ public void setExtractSql(String extractSql) { this.extractSql = extractSql; } /** * output column projection with aliases specified in input sql * * @return column projection */ public String getOutputColumnProjection() { return this.outputColumnProjection; } /** * @param outputColumnProjection output column projection */ public void setOutputColumnProjection(String outputColumnProjection) { this.outputColumnProjection = outputColumnProjection; } /** * input column projection with source columns specified in input sql * * @return column projection */ public String getInputColumnProjection() { return this.inputColumnProjection; } /** * @param inputColumnProjection input column projection */ public void setInputColumnProjection(String inputColumnProjection) { this.inputColumnProjection = inputColumnProjection; } /** * source column and alias mapping * * @return map of column name and alias name */ public List<ColumnAttributes> getColumnAliasMap() { return this.columnAliasMap; } /** * add column and alias mapping * * @param columnAliasMap column alias mapping */ public void addToColumnAliasMap(ColumnAttributes columnAliasMap) { this.columnAliasMap.add(columnAliasMap); } /** * check whether is first pull or not * * @return true, for the first run and it will be set to false after the * first run */ public boolean isFirstPull() { return this.firstPull; } /** * @param firstPull */ public void setFirstPull(boolean firstPull) { this.firstPull = firstPull; } /** * Header record to convert csv to json * * @return header record with list of columns */ protected List<String> getHeaderRecord() { return this.headerRecord; } /** * @param headerRecord list of column names */ protected void setHeaderRecord(List<String> headerRecord) { this.headerRecord = headerRecord; } /** * @return connection timeout */ public int getTimeOut() { return this.timeOut; } /** * @return true, if records available. Otherwise, false */ public boolean hasNextRecord() { return this.nextRecord; } /** * @param nextRecord next Record */ public void setNextRecord(boolean nextRecord) { this.nextRecord = nextRecord; } /** * @param timeOut connection timeout */ @Override public void setTimeOut(int timeOut) { this.timeOut = timeOut; } /** * @return private static final Gson factory */ public Gson getGson() { return this.gson; } public JdbcExtractor(WorkUnitState workUnitState) { super(workUnitState); } @Override public void extractMetadata(String schema, String entity, WorkUnit workUnit) throws SchemaException, IOException { this.log.info("Extract metadata using JDBC"); String inputQuery = workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_QUERY); if (workUnitState.getPropAsBoolean(ConfigurationKeys.SOURCE_QUERYBASED_IS_METADATA_COLUMN_CHECK_ENABLED, Boolean.valueOf(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_IS_METADATA_COLUMN_CHECK_ENABLED)) && hasJoinOperation(inputQuery)) { throw new RuntimeException("Query across multiple tables not supported"); } String watermarkColumn = workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY); this.enableDelimitedIdentifier = workUnitState.getPropAsBoolean( ConfigurationKeys.ENABLE_DELIMITED_IDENTIFIER, ConfigurationKeys.DEFAULT_ENABLE_DELIMITED_IDENTIFIER); JsonObject defaultWatermark = this.getDefaultWatermark(); String derivedWatermarkColumnName = defaultWatermark.get("columnName").getAsString(); this.setSampleRecordCount(this.exractSampleRecordCountFromQuery(inputQuery)); inputQuery = this.removeSampleClauseFromQuery(inputQuery); JsonArray targetSchema = new JsonArray(); List<String> headerColumns = new ArrayList<>(); try { List<Command> cmds = this.getSchemaMetadata(schema, entity); CommandOutput<?, ?> response = this.executePreparedSql(cmds); JsonArray array = this.getSchema(response); this.buildMetadataColumnMap(array); this.parseInputQuery(inputQuery); List<String> sourceColumns = this.getMetadataColumnList(); for (ColumnAttributes colMap : this.columnAliasMap) { String alias = colMap.getAliasName(); String columnName = colMap.getColumnName(); String sourceColumnName = colMap.getSourceColumnName(); if (this.isMetadataColumn(columnName, sourceColumns)) { String targetColumnName = this.getTargetColumnName(columnName, alias); Schema obj = this.getUpdatedSchemaObject(columnName, alias, targetColumnName); String jsonStr = gson.toJson(obj); JsonObject jsonObject = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject(); targetSchema.add(jsonObject); headerColumns.add(targetColumnName); sourceColumnName = getLeftDelimitedIdentifier() + sourceColumnName + getRightDelimitedIdentifier(); this.columnList.add(sourceColumnName); } } if (this.hasMultipleWatermarkColumns(watermarkColumn)) { derivedWatermarkColumnName = getLeftDelimitedIdentifier() + derivedWatermarkColumnName + getRightDelimitedIdentifier(); this.columnList.add(derivedWatermarkColumnName); headerColumns.add(derivedWatermarkColumnName); targetSchema.add(defaultWatermark); this.workUnitState.setProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, derivedWatermarkColumnName); } String outputColProjection = Joiner.on(",").useForNull("null").join(this.columnList); outputColProjection = outputColProjection.replace(derivedWatermarkColumnName, Utils.getCoalesceColumnNames(watermarkColumn) + " AS " + derivedWatermarkColumnName); this.setOutputColumnProjection(outputColProjection); String extractQuery = this.getExtractQuery(schema, entity, inputQuery); this.setHeaderRecord(headerColumns); this.setOutputSchema(targetSchema); this.setExtractSql(extractQuery); // this.workUnit.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, // this.escapeCharsInColumnName(this.workUnit.getProp(ConfigurationKeys.SOURCE_ENTITY), // ConfigurationKeys.ESCAPE_CHARS_IN_COLUMN_NAME, "_")); this.log.info("Schema:" + targetSchema); this.log.info("Extract query: " + this.getExtractSql()); } catch (RuntimeException | IOException | SchemaException e) { throw new SchemaException("Failed to get metadata using JDBC; error - " + e.getMessage(), e); } } /** * Build/Format input query in the required format * * @param schema * @param entity * @param inputQuery * @return formatted extract query */ private String getExtractQuery(String schema, String entity, String inputQuery) { String inputColProjection = this.getInputColumnProjection(); String outputColProjection = this.getOutputColumnProjection(); String query = inputQuery; if (query == null) { // if input query is null, build the query from metadata query = "SELECT " + outputColProjection + " FROM " + schema + "." + entity; } else { // replace input column projection with output column projection if (StringUtils.isNotBlank(inputColProjection)) { query = query.replace(inputColProjection, outputColProjection); } } query = addOptionalWatermarkPredicate(query); return query; } /** * @param query * @return query with watermark predicate symbol */ protected String addOptionalWatermarkPredicate(String query) { String watermarkPredicateSymbol = ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL; if (!query.contains(watermarkPredicateSymbol)) { query = SqlQueryUtils.addPredicate(query, watermarkPredicateSymbol); } return query; } /** * Update schema of source column Update column name with target column * name/alias Update watermark, nullable and primary key flags * * @param sourceColumnName * @param targetColumnName * @return schema object of a column */ private Schema getUpdatedSchemaObject(String sourceColumnName, String alias, String targetColumnName) { // Check for source column and alias Schema obj = this.getMetadataColumnMap().get(sourceColumnName.toLowerCase()); if (obj == null && alias != null) { obj = this.getMetadataColumnMap().get(alias.toLowerCase()); } if (obj == null) { obj = getCustomColumnSchema(targetColumnName); } else { String watermarkColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY); String primarykeyColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY); boolean isMultiColumnWatermark = this.hasMultipleWatermarkColumns(watermarkColumn); obj.setColumnName(targetColumnName); boolean isWatermarkColumn = this.isWatermarkColumn(watermarkColumn, sourceColumnName); if (isWatermarkColumn) { this.updateDeltaFieldConfig(sourceColumnName, targetColumnName); } else if (alias != null) { // Check for alias isWatermarkColumn = this.isWatermarkColumn(watermarkColumn, alias); this.updateDeltaFieldConfig(alias, targetColumnName); } // If there is only one watermark column, then consider it as a // watermark. Otherwise add a default watermark column in the end if (!isMultiColumnWatermark) { obj.setWaterMark(isWatermarkColumn); } // override all columns to nullable except primary key and watermark // columns if ((isWatermarkColumn && !isMultiColumnWatermark) || this.getPrimarykeyIndex(primarykeyColumn, sourceColumnName) > 0) { obj.setNullable(false); } else { obj.setNullable(true); } // set primary key index for all the primary key fields int primarykeyIndex = this.getPrimarykeyIndex(primarykeyColumn, sourceColumnName); if (primarykeyIndex > 0 && (!sourceColumnName.equalsIgnoreCase(targetColumnName))) { this.updatePrimaryKeyConfig(sourceColumnName, targetColumnName); } obj.setPrimaryKey(primarykeyIndex); } return obj; } /** * Get target column name if column is not found in metadata, then name it * as unknown column If alias is not found, target column is nothing but * source column * * @param sourceColumnName * @param alias * @return targetColumnName */ private String getTargetColumnName(String sourceColumnName, String alias) { String targetColumnName = alias; Schema obj = this.getMetadataColumnMap().get(sourceColumnName.toLowerCase()); if (obj == null) { targetColumnName = (targetColumnName == null ? "unknown" + this.unknownColumnCounter : targetColumnName); this.unknownColumnCounter++; } else { targetColumnName = (StringUtils.isNotBlank(targetColumnName) ? targetColumnName : sourceColumnName); } targetColumnName = this.toCase(targetColumnName); return Utils.escapeSpecialCharacters(targetColumnName, ConfigurationKeys.ESCAPE_CHARS_IN_COLUMN_NAME, "_"); } /** * Build metadata column map with column name and column schema object. * Build metadata column list with list columns in metadata * * @param array Schema of all columns */ private void buildMetadataColumnMap(JsonArray array) { if (array != null) { for (JsonElement columnElement : array) { Schema schemaObj = gson.fromJson(columnElement, Schema.class); String columnName = schemaObj.getColumnName(); this.metadataColumnMap.put(columnName.toLowerCase(), schemaObj); this.metadataColumnList.add(columnName.toLowerCase()); } } } /** * Update water mark column property if there is an alias defined in query * * @param srcColumnName source column name * @param tgtColumnName target column name */ private void updateDeltaFieldConfig(String srcColumnName, String tgtColumnName) { if (this.workUnitState.contains(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY)) { String watermarkCol = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY); this.workUnitState.setProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, watermarkCol.replaceAll(srcColumnName, tgtColumnName)); } } /** * Update primary key column property if there is an alias defined in query * * @param srcColumnName source column name * @param tgtColumnName target column name */ private void updatePrimaryKeyConfig(String srcColumnName, String tgtColumnName) { if (this.workUnitState.contains(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY)) { String primarykey = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY); this.workUnitState.setProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, primarykey.replaceAll(srcColumnName, tgtColumnName)); } } /** * If input query is null or '*' in the select list, consider all columns. * * @return true, to select all colums. else, false. */ private boolean isSelectAllColumns() { String columnProjection = this.getInputColumnProjection(); if (columnProjection == null || columnProjection.trim().equals("*") || columnProjection.contains(".*")) { return true; } return false; } /** * Parse query provided in pull file Set input column projection - column * projection in the input query Set columnAlias map - column and its alias * mentioned in input query * * @param query input query */ private void parseInputQuery(String query) { List<String> projectedColumns = new ArrayList<>(); if (StringUtils.isNotBlank(query)) { String queryLowerCase = query.toLowerCase(); int startIndex = queryLowerCase.indexOf("select ") + 7; int endIndex = queryLowerCase.indexOf(" from "); if (startIndex >= 0 && endIndex >= 0) { String columnProjection = query.substring(startIndex, endIndex); this.setInputColumnProjection(columnProjection); // parse the select list StringBuffer sb = new StringBuffer(); int bracketCount = 0; for (int i = 0; i < columnProjection.length(); i++) { char c = columnProjection.charAt(i); if (c == '(') { bracketCount++; } if (c == ')') { bracketCount--; } if (bracketCount != 0) { sb.append(c); } else { if (c != ',') { sb.append(c); } else { projectedColumns.add(sb.toString()); sb = new StringBuffer(); } } } projectedColumns.add(sb.toString()); } } if (this.isSelectAllColumns()) { List<String> columnList = this.getMetadataColumnList(); for (String columnName : columnList) { ColumnAttributes col = new ColumnAttributes(); col.setColumnName(columnName); col.setAliasName(columnName); col.setSourceColumnName(columnName); this.addToColumnAliasMap(col); } } else { for (String projectedColumn : projectedColumns) { String column = projectedColumn.trim(); String alias = null; String sourceColumn = column; int spaceOccurences = StringUtils.countMatches(column.trim(), " "); if (spaceOccurences > 0) { // separate column and alias if they are separated by "as" // or space int lastSpaceIndex = column.toLowerCase().lastIndexOf(" as "); sourceColumn = column.substring(0, lastSpaceIndex); alias = column.substring(lastSpaceIndex + 4); } // extract column name if projection has table name in it String columnName = sourceColumn; if (sourceColumn.contains(".")) { columnName = sourceColumn.substring(sourceColumn.indexOf(".") + 1); } ColumnAttributes col = new ColumnAttributes(); col.setColumnName(columnName); col.setAliasName(alias); col.setSourceColumnName(sourceColumn); this.addToColumnAliasMap(col); } } } /** * Execute query using JDBC simple Statement Set fetch size * * @param cmds commands - query, fetch size * @return JDBC ResultSet * @throws Exception */ private CommandOutput<?, ?> executeSql(List<Command> cmds) { String query = null; int fetchSize = 0; for (Command cmd : cmds) { if (cmd instanceof JdbcCommand) { JdbcCommandType type = (JdbcCommandType) cmd.getCommandType(); switch (type) { case QUERY: query = cmd.getParams().get(0); break; case FETCHSIZE: fetchSize = Integer.parseInt(cmd.getParams().get(0)); break; default: this.log.error("Command " + type.toString() + " not recognized"); break; } } } this.log.info("Executing query:" + query); ResultSet resultSet = null; try { this.jdbcSource = createJdbcSource(); if (this.dataConnection == null) { this.dataConnection = this.jdbcSource.getConnection(); } Statement statement = this.dataConnection.createStatement(); if (fetchSize != 0 && this.getExpectedRecordCount() > 2000) { statement.setFetchSize(fetchSize); } final boolean status = statement.execute(query); if (status == false) { this.log.error("Failed to execute sql:" + query); } resultSet = statement.getResultSet(); } catch (Exception e) { this.log.error("Failed to execute sql:" + query + " ;error-" + e.getMessage(), e); } CommandOutput<JdbcCommand, ResultSet> output = new JdbcCommandOutput(); output.put((JdbcCommand) cmds.get(0), resultSet); return output; } /** * Execute query using JDBC PreparedStatement to pass query parameters Set * fetch size * * @param cmds commands - query, fetch size, query parameters * @return JDBC ResultSet * @throws Exception */ private CommandOutput<?, ?> executePreparedSql(List<Command> cmds) { String query = null; List<String> queryParameters = null; int fetchSize = 0; for (Command cmd : cmds) { if (cmd instanceof JdbcCommand) { JdbcCommandType type = (JdbcCommandType) cmd.getCommandType(); switch (type) { case QUERY: query = cmd.getParams().get(0); break; case QUERYPARAMS: queryParameters = cmd.getParams(); break; case FETCHSIZE: fetchSize = Integer.parseInt(cmd.getParams().get(0)); break; default: this.log.error("Command " + type.toString() + " not recognized"); break; } } } this.log.info("Executing query:" + query); ResultSet resultSet = null; try { this.jdbcSource = createJdbcSource(); if (this.dataConnection == null) { this.dataConnection = this.jdbcSource.getConnection(); } PreparedStatement statement = this.dataConnection.prepareStatement(query, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); int parameterPosition = 1; if (queryParameters != null && queryParameters.size() > 0) { for (String parameter : queryParameters) { statement.setString(parameterPosition, parameter); parameterPosition++; } } if (fetchSize != 0) { statement.setFetchSize(fetchSize); } final boolean status = statement.execute(); if (status == false) { this.log.error("Failed to execute sql:" + query); } resultSet = statement.getResultSet(); } catch (Exception e) { this.log.error("Failed to execute sql:" + query + " ;error-" + e.getMessage(), e); } CommandOutput<JdbcCommand, ResultSet> output = new JdbcCommandOutput(); output.put((JdbcCommand) cmds.get(0), resultSet); return output; } /** * Create JDBC source to get connection * * @return JDBCSource */ protected JdbcProvider createJdbcSource() { String driver = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_DRIVER); String userName = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USERNAME); String password = PasswordManager.getInstance(this.workUnitState) .readPassword(this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_PASSWORD)); String connectionUrl = this.getConnectionUrl(); String proxyHost = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL); int proxyPort = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_PORT) != null ? this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_CONN_USE_PROXY_PORT) : -1; if (this.jdbcSource == null || this.jdbcSource.isClosed()) { this.jdbcSource = new JdbcProvider(driver, connectionUrl, userName, password, 1, this.getTimeOut(), "DEFAULT", proxyHost, proxyPort); return this.jdbcSource; } else { return this.jdbcSource; } } @Override public long getMaxWatermark(String schema, String entity, String watermarkColumn, List<Predicate> predicateList, String watermarkSourceFormat) throws HighWatermarkException { this.log.info("Get high watermark using JDBC"); long calculatedHighWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; try { List<Command> cmds = this.getHighWatermarkMetadata(schema, entity, watermarkColumn, predicateList); CommandOutput<?, ?> response = this.executeSql(cmds); calculatedHighWatermark = this.getHighWatermark(response, watermarkColumn, watermarkSourceFormat); return calculatedHighWatermark; } catch (Exception e) { throw new HighWatermarkException("Failed to get high watermark using JDBC; error - " + e.getMessage(), e); } } @Override public long getSourceCount(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws RecordCountException { this.log.info("Get source record count using JDBC"); long count = 0; try { List<Command> cmds = this.getCountMetadata(schema, entity, workUnit, predicateList); CommandOutput<?, ?> response = this.executeSql(cmds); count = this.getCount(response); this.log.info("Source record count:" + count); return count; } catch (Exception e) { throw new RecordCountException( "Failed to get source record count using JDBC; error - " + e.getMessage(), e); } } @Override public Iterator<JsonElement> getRecordSet(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException, IOException { Iterator<JsonElement> rs = null; List<Command> cmds; try { if (isFirstPull()) { this.log.info("Get data recordset using JDBC"); cmds = this.getDataMetadata(schema, entity, workUnit, predicateList); this.dataResponse = this.executePreparedSql(cmds); this.setFirstPull(false); } rs = this.getData(this.dataResponse); return rs; } catch (Exception e) { throw new DataRecordException("Failed to get record set using JDBC; error - " + e.getMessage(), e); } } @Override public JsonArray getSchema(CommandOutput<?, ?> response) throws SchemaException, IOException { this.log.debug("Extract schema from resultset"); ResultSet resultset = null; Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator(); if (itr.hasNext()) { resultset = itr.next(); } else { throw new SchemaException("Failed to get schema from database - Resultset has no records"); } JsonArray fieldJsonArray = new JsonArray(); try { while (resultset.next()) { Schema schema = new Schema(); String columnName = resultset.getString(1); schema.setColumnName(columnName); String dataType = resultset.getString(2); String elementDataType = "string"; List<String> mapSymbols = null; JsonObject newDataType = this.convertDataType(columnName, dataType, elementDataType, mapSymbols); schema.setDataType(newDataType); schema.setLength(resultset.getLong(3)); schema.setPrecision(resultset.getInt(4)); schema.setScale(resultset.getInt(5)); schema.setNullable(resultset.getBoolean(6)); schema.setFormat(resultset.getString(7)); schema.setComment(resultset.getString(8)); schema.setDefaultValue(null); schema.setUnique(false); String jsonStr = gson.toJson(schema); JsonObject obj = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject(); fieldJsonArray.add(obj); } } catch (Exception e) { throw new SchemaException("Failed to get schema from database; error - " + e.getMessage(), e); } return fieldJsonArray; } @Override public long getHighWatermark(CommandOutput<?, ?> response, String watermarkColumn, String watermarkColumnFormat) throws HighWatermarkException { this.log.debug("Extract high watermark from resultset"); ResultSet resultset = null; Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator(); if (itr.hasNext()) { resultset = itr.next(); } else { throw new HighWatermarkException( "Failed to get high watermark from database - Resultset has no records"); } Long HighWatermark; try { String watermark; if (resultset.next()) { watermark = resultset.getString(1); } else { watermark = null; } if (watermark == null) { return ConfigurationKeys.DEFAULT_WATERMARK_VALUE; } if (watermarkColumnFormat != null) { SimpleDateFormat inFormat = new SimpleDateFormat(watermarkColumnFormat); Date date = null; try { date = inFormat.parse(watermark); } catch (ParseException e) { this.log.error("ParseException: " + e.getMessage(), e); } SimpleDateFormat outFormat = new SimpleDateFormat("yyyyMMddHHmmss"); HighWatermark = Long.parseLong(outFormat.format(date)); } else { HighWatermark = Long.parseLong(watermark); } } catch (Exception e) { throw new HighWatermarkException( "Failed to get high watermark from database; error - " + e.getMessage(), e); } return HighWatermark; } @Override public long getCount(CommandOutput<?, ?> response) throws RecordCountException { this.log.debug("Extract source record count from resultset"); ResultSet resultset = null; long count = 0; Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator(); if (itr.hasNext()) { resultset = itr.next(); try { if (resultset.next()) { count = resultset.getLong(1); } } catch (Exception e) { throw new RecordCountException( "Failed to get source record count from database; error - " + e.getMessage(), e); } } else { throw new RuntimeException( "Failed to get source record count from database - Resultset has no records"); } return count; } @Override public Iterator<JsonElement> getData(CommandOutput<?, ?> response) throws DataRecordException, IOException { this.log.debug("Extract data records from resultset"); RecordSetList<JsonElement> recordSet = this.getNewRecordSetList(); if (response == null || !this.hasNextRecord()) { return recordSet.iterator(); } ResultSet resultset = null; Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator(); if (itr.hasNext()) { resultset = itr.next(); } else { throw new DataRecordException( "Failed to get source record count from database - Resultset has no records"); } try { final ResultSetMetaData resultsetMetadata = resultset.getMetaData(); int batchSize = this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_FETCH_SIZE, 0); batchSize = (batchSize == 0 ? ConfigurationKeys.DEFAULT_SOURCE_FETCH_SIZE : batchSize); int recordCount = 0; while (resultset.next()) { final int numColumns = resultsetMetadata.getColumnCount(); JsonObject jsonObject = new JsonObject(); for (int i = 1; i < numColumns + 1; i++) { final String columnName = this.getHeaderRecord().get(i - 1); jsonObject.addProperty(columnName, parseColumnAsString(resultset, resultsetMetadata, i)); } recordSet.add(jsonObject); recordCount++; this.totalRecordCount++; // Insert records in record set until it reaches the batch size if (recordCount >= batchSize) { this.log.info("Total number of records processed so far: " + this.totalRecordCount); return recordSet.iterator(); } } this.setNextRecord(false); this.log.info("Total number of records processed so far: " + this.totalRecordCount); return recordSet.iterator(); } catch (Exception e) { throw new DataRecordException("Failed to get records from database; error - " + e.getMessage(), e); } } /* * For Blob data, need to get the bytes and use base64 encoding to encode the byte[] * When reading from the String, need to use base64 decoder * String tmp = ... ( get the String value ) * byte[] foo = Base64.decodeBase64(tmp); */ private String readBlobAsString(Blob logBlob) throws SQLException { if (logBlob == null) { return StringUtils.EMPTY; } byte[] ba = logBlob.getBytes(1L, (int) (logBlob.length())); if (ba == null) { return StringUtils.EMPTY; } String baString = Base64.encodeBase64String(ba); return baString; } /* * For Clob data, we need to use the substring function to extract the string */ private String readClobAsString(Clob logClob) throws SQLException { if (logClob == null) { return StringUtils.EMPTY; } long length = logClob.length(); return logClob.getSubString(1, (int) length); } /** * HACK: there is a bug in the MysqlExtractor where tinyint columns are always treated as ints. * There are MySQL jdbc driver setting (tinyInt1isBit=true and transformedBitIsBoolean=false) that * can cause tinyint(1) columns to be treated as BIT/BOOLEAN columns. The default behavior is to * treat tinyint(1) as BIT. * * Currently, {@link MysqlExtractor#getDataTypeMap()} uses the information_schema to check types. * That does not do the above conversion. {@link #parseColumnAsString(ResultSet, ResultSetMetaData, int)} * which does the above type mapping. * * On the other hand, SqlServerExtractor treats BIT columns as Booleans. So we can be in a bind * where sometimes BIT has to be converted to an int (for backwards compatibility in MySQL) and * sometimes to a Boolean (for SqlServer). * * This function adds configurable behavior depending on the Extractor type. **/ protected boolean convertBitToBoolean() { return true; } private String parseColumnAsString(final ResultSet resultset, final ResultSetMetaData resultsetMetadata, int i) throws SQLException { if (isBlob(resultsetMetadata.getColumnType(i))) { return readBlobAsString(resultset.getBlob(i)); } if (isClob(resultsetMetadata.getColumnType(i))) { return readClobAsString(resultset.getClob(i)); } if ((resultsetMetadata.getColumnType(i) == Types.BIT || resultsetMetadata.getColumnType(i) == Types.BOOLEAN) && convertBitToBoolean()) { return Boolean.toString(resultset.getBoolean(i)); } return resultset.getString(i); } private static boolean isBlob(int columnType) { return columnType == Types.LONGVARBINARY || columnType == Types.BINARY; } private static boolean isClob(int columnType) { return columnType == Types.CLOB; } protected static Command getCommand(String query, JdbcCommandType commandType) { return new JdbcCommand().build(Arrays.asList(query), commandType); } protected static Command getCommand(int fetchSize, JdbcCommandType commandType) { return new JdbcCommand().build(Arrays.asList(Integer.toString(fetchSize)), commandType); } protected static Command getCommand(List<String> params, JdbcCommandType commandType) { return new JdbcCommand().build(params, commandType); } /** * Concatenate all predicates with "and" clause * * @param predicateList list of predicate(filter) conditions * @return predicate */ protected String concatPredicates(List<Predicate> predicateList) { List<String> conditions = new ArrayList<>(); for (Predicate predicate : predicateList) { conditions.add(predicate.getCondition()); } return Joiner.on(" and ").skipNulls().join(conditions); } /** * Schema of default watermark column-required if there are multiple watermarks * * @return column schema */ private JsonObject getDefaultWatermark() { Schema schema = new Schema(); String dataType; String columnName = "derivedwatermarkcolumn"; schema.setColumnName(columnName); WatermarkType wmType = WatermarkType.valueOf(this.workUnitState .getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, "TIMESTAMP").toUpperCase()); switch (wmType) { case TIMESTAMP: dataType = "timestamp"; break; case DATE: dataType = "date"; break; default: dataType = "int"; break; } String elementDataType = "string"; List<String> mapSymbols = null; JsonObject newDataType = this.convertDataType(columnName, dataType, elementDataType, mapSymbols); schema.setDataType(newDataType); schema.setWaterMark(true); schema.setPrimaryKey(0); schema.setLength(0); schema.setPrecision(0); schema.setScale(0); schema.setNullable(false); schema.setFormat(null); schema.setComment("Default watermark column"); schema.setDefaultValue(null); schema.setUnique(false); String jsonStr = gson.toJson(schema); JsonObject obj = gson.fromJson(jsonStr, JsonObject.class).getAsJsonObject(); return obj; } /** * Schema of a custom column - required if column not found in metadata * * @return column schema */ private Schema getCustomColumnSchema(String columnName) { Schema schema = new Schema(); String dataType = "string"; schema.setColumnName(columnName); String elementDataType = "string"; List<String> mapSymbols = null; JsonObject newDataType = this.convertDataType(columnName, dataType, elementDataType, mapSymbols); schema.setDataType(newDataType); schema.setWaterMark(false); schema.setPrimaryKey(0); schema.setLength(0); schema.setPrecision(0); schema.setScale(0); schema.setNullable(true); schema.setFormat(null); schema.setComment("Custom column"); schema.setDefaultValue(null); schema.setUnique(false); return schema; } /** * Check if the SELECT query has join operation */ public static boolean hasJoinOperation(String selectQuery) { if (selectQuery == null || selectQuery.length() == 0) { return false; } SqlParser sqlParser = SqlParser.create(selectQuery); try { SqlNode all = sqlParser.parseQuery(); SqlSelect query; if (all instanceof SqlSelect) { query = (SqlSelect) all; } else if (all instanceof SqlOrderBy) { query = (SqlSelect) ((SqlOrderBy) all).query; } else { throw new UnsupportedOperationException( "The select query is type of " + all.getClass() + " which is not supported here"); } return query.getFrom().getKind() == SqlKind.JOIN; } catch (SqlParseException e) { return false; } } /** * New record set for iterator * * @return RecordSetList */ private static RecordSetList<JsonElement> getNewRecordSetList() { return new RecordSetList<>(); } /** * Change the column name case to upper, lower or nochange; Default nochange * * @return column name with the required case */ private String toCase(String targetColumnName) { String columnName = targetColumnName; ColumnNameCase caseType = ColumnNameCase.valueOf(this.workUnitState .getProp(ConfigurationKeys.SOURCE_COLUMN_NAME_CASE, ConfigurationKeys.DEFAULT_COLUMN_NAME_CASE) .toUpperCase()); switch (caseType) { case TOUPPER: columnName = targetColumnName.toUpperCase(); break; case TOLOWER: columnName = targetColumnName.toLowerCase(); break; default: columnName = targetColumnName; break; } return columnName; } /** * Default DelimitedIdentifier is 'double quotes', * but that would make the column name case sensitive in some of the systems, e.g. Oracle. * Queries may fail if * (1) enableDelimitedIdentifier is true, and * (2) Queried system is case sensitive when using double quotes as delimited identifier, and * (3) Intended column name does not match the column name in the schema including case. * * @return leftDelimitedIdentifier */ public String getLeftDelimitedIdentifier() { return this.enableDelimitedIdentifier ? "\"" : ""; } public String getRightDelimitedIdentifier() { return this.enableDelimitedIdentifier ? "\"" : ""; } @Override public void closeConnection() throws Exception { if (this.dataConnection != null) { try { this.dataConnection.close(); } catch (SQLException e) { this.log.error("Failed to close connection ;error-" + e.getMessage(), e); } } this.jdbcSource.close(); } }