gobblin.source.extractor.extract.jdbc.SqlServerExtractor.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.source.extractor.extract.jdbc.SqlServerExtractor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.source.extractor.extract.jdbc;

import gobblin.source.extractor.DataRecordException;
import gobblin.source.extractor.exception.HighWatermarkException;
import gobblin.source.extractor.utils.Utils;
import gobblin.source.extractor.watermark.Predicate;
import gobblin.source.extractor.watermark.WatermarkType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;

import com.google.common.collect.ImmutableMap;
import com.google.gson.JsonElement;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.WorkUnitState;
import gobblin.source.extractor.exception.RecordCountException;
import gobblin.source.extractor.exception.SchemaException;
import gobblin.source.extractor.extract.Command;
import gobblin.source.workunit.WorkUnit;
import lombok.extern.slf4j.Slf4j;

/**
 * SqlServer extractor using JDBC protocol
 *
 * @author nveeramr
 */
@Slf4j
public class SqlServerExtractor extends JdbcExtractor {
    private static final String TIMESTAMP_FORMAT = "yyyy-MM-dd HH:mm:ss";
    private static final String DATE_FORMAT = "yyyy-MM-dd";
    private static final String HOUR_FORMAT = "HH";
    private static final long SAMPLERECORDCOUNT = -1;

    public SqlServerExtractor(WorkUnitState workUnitState) {
        super(workUnitState);
    }

    @Override
    public List<Command> getSchemaMetadata(String schema, String entity) throws SchemaException {
        log.debug("Build query to get schema");
        List<Command> commands = new ArrayList<>();
        List<String> queryParams = Arrays.asList(entity, schema);

        String metadataSql = "select " + " col.column_name, " + " col.data_type, "
                + " case when CHARACTER_OCTET_LENGTH is null then 0 else 0 end as length, "
                + " case when NUMERIC_PRECISION is null then 0 else NUMERIC_PRECISION end as precesion, "
                + " case when NUMERIC_SCALE is null then 0 else NUMERIC_SCALE end as scale, "
                + " case when is_nullable='NO' then 'false' else 'true' end as nullable, " + " '' as format, "
                + " '' as comment " + " from information_schema.COLUMNS col "
                + " WHERE upper(col.table_name)=upper(?) AND upper(col.table_schema)=upper(?) "
                + " order by col.ORDINAL_POSITION ";

        commands.add(JdbcExtractor.getCommand(metadataSql, JdbcCommand.JdbcCommandType.QUERY));
        commands.add(JdbcExtractor.getCommand(queryParams, JdbcCommand.JdbcCommandType.QUERYPARAMS));
        return commands;
    }

    @Override
    public List<Command> getHighWatermarkMetadata(String schema, String entity, String watermarkColumn,
            List<Predicate> predicateList) throws HighWatermarkException {
        log.debug("Build query to get high watermark");
        List<Command> commands = new ArrayList<>();

        String columnProjection = "max(" + Utils.getCoalesceColumnNames(watermarkColumn) + ")";
        String watermarkFilter = this.concatPredicates(predicateList);
        String query = this.getExtractSql();

        if (StringUtils.isBlank(watermarkFilter)) {
            watermarkFilter = "1=1";
        }
        query = query.replace(this.getOutputColumnProjection(), columnProjection)
                .replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);

        commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
        return commands;
    }

    @Override
    public List<Command> getCountMetadata(String schema, String entity, WorkUnit workUnit,
            List<Predicate> predicateList) throws RecordCountException {
        log.debug("Build query to get source record count");
        List<Command> commands = new ArrayList<>();

        String columnProjection = "COUNT(1)";
        String watermarkFilter = this.concatPredicates(predicateList);
        String query = this.getExtractSql();

        if (StringUtils.isBlank(watermarkFilter)) {
            watermarkFilter = "1=1";
        }
        query = query.replace(this.getOutputColumnProjection(), columnProjection)
                .replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL, watermarkFilter);
        String sampleFilter = this.constructSampleClause();

        if (!StringUtils.isEmpty(sampleFilter)) {
            String col = sampleFilter + " 1 as col ";
            query = "SELECT COUNT(1) FROM (" + query.replace(" COUNT(1) ", col) + ")temp";
        }
        commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
        return commands;
    }

    @Override
    public List<Command> getDataMetadata(String schema, String entity, WorkUnit workUnit,
            List<Predicate> predicateList) throws DataRecordException {
        log.debug("Build query to extract data");
        List<Command> commands = new ArrayList<>();
        int fetchSize = this.workUnitState.getPropAsInt(
                ConfigurationKeys.SOURCE_QUERYBASED_JDBC_RESULTSET_FETCH_SIZE,
                ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_JDBC_RESULTSET_FETCH_SIZE);
        log.info("Setting jdbc resultset fetch size as " + fetchSize);

        String watermarkFilter = this.concatPredicates(predicateList);
        String query = this.getExtractSql();
        if (StringUtils.isBlank(watermarkFilter)) {
            watermarkFilter = "1=1";
        }

        query = query.replace(ConfigurationKeys.DEFAULT_SOURCE_QUERYBASED_WATERMARK_PREDICATE_SYMBOL,
                watermarkFilter);
        String sampleFilter = this.constructSampleClause();

        if (!StringUtils.isEmpty(sampleFilter)) {
            String columnProjection = this.getOutputColumnProjection();
            String newColumnProjection = sampleFilter + " " + columnProjection;
            query = query.replace(columnProjection, newColumnProjection);
        }

        commands.add(JdbcExtractor.getCommand(query, JdbcCommand.JdbcCommandType.QUERY));
        commands.add(JdbcExtractor.getCommand(fetchSize, JdbcCommand.JdbcCommandType.FETCHSIZE));
        return commands;
    }

    @Override
    public Map<String, String> getDataTypeMap() {
        Map<String, String> dataTypeMap = ImmutableMap.<String, String>builder().put("smallint", "int")
                .put("tinyint", "int").put("int", "int").put("bigint", "long").put("decimal", "double")
                .put("numeric", "double").put("float", "float").put("real", "double").put("money", "double")
                .put("smallmoney", "double").put("binary", "string").put("varbinary", "string")
                .put("char", "string").put("varchar", "string").put("nchar", "string").put("nvarchar", "string")
                .put("text", "string").put("ntext", "string").put("image", "string").put("hierarchyid", "string")
                .put("uniqueidentifier", "string").put("date", "date").put("datetime", "timestamp")
                .put("datetime2", "timestamp").put("datetimeoffset", "timestamp").put("smalldatetime", "timestamp")
                .put("time", "time").put("bit", "boolean").build();
        return dataTypeMap;
    }

    @Override
    public Iterator<JsonElement> getRecordSetFromSourceApi(String schema, String entity, WorkUnit workUnit,
            List<Predicate> predicateList) throws IOException {
        return null;
    }

    @Override
    public String getConnectionUrl() {
        String host = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME);
        String port = this.workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_PORT);
        String url = "jdbc:sqlserver://" + host.trim() + ":" + port;
        return url;
    }

    @Override
    public long exractSampleRecordCountFromQuery(String query) {
        if (StringUtils.isBlank(query)) {
            return SAMPLERECORDCOUNT;
        }

        long recordcount = SAMPLERECORDCOUNT;
        String inputQuery = query.toLowerCase();

        int limitStartIndex = inputQuery.indexOf(" top ");
        int limitEndIndex = getLimitEndIndex(inputQuery, limitStartIndex);
        if (limitStartIndex > 0) {
            String limitValue = query.substring(limitStartIndex + 5, limitEndIndex);
            try {
                recordcount = Long.parseLong(limitValue);
            } catch (Exception e) {
                log.error("Ignoring incorrct limit value in input query:" + limitValue);
            }
        }
        return recordcount;
    }

    @Override
    public String removeSampleClauseFromQuery(String query) {
        if (StringUtils.isBlank(query)) {
            return null;
        }

        String outputQuery = query;
        String inputQuery = query.toLowerCase();
        int limitStartIndex = inputQuery.indexOf(" top ");
        int limitEndIndex = getLimitEndIndex(inputQuery, limitStartIndex);
        if (limitStartIndex > 0) {
            outputQuery = query.substring(0, limitStartIndex) + " " + query.substring(limitEndIndex);
        }
        return outputQuery;
    }

    private static int getLimitEndIndex(String inputQuery, int limitStartIndex) {
        int limitEndIndex = -1;
        if (limitStartIndex > 0) {
            limitEndIndex = limitStartIndex + 5;
            String remainingQuery = inputQuery.substring(limitEndIndex);
            boolean numFound = false;

            int pos = 0;
            while (pos < remainingQuery.length()) {
                char ch = remainingQuery.charAt(pos);
                if (ch == ' ' && !numFound) {
                    pos++;
                    continue;
                } else if (numFound && (!Character.isDigit(ch))) {
                    break;
                } else {
                    numFound = true;
                }
                pos++;
            }
            limitEndIndex = limitEndIndex + pos;
        }
        return limitEndIndex;
    }

    @Override
    public String constructSampleClause() {
        long sampleRowCount = this.getSampleRecordCount();
        if (sampleRowCount >= 0) {
            return " top " + sampleRowCount;
        }
        return "";
    }

    @Override
    public String getWatermarkSourceFormat(WatermarkType watermarkType) {
        String columnFormat = null;
        switch (watermarkType) {
        case TIMESTAMP:
            columnFormat = "yyyy-MM-dd HH:mm:ss";
            break;
        case DATE:
            columnFormat = "yyyy-MM-dd";
            break;
        default:
            log.error("Watermark type " + watermarkType.toString() + " not recognized");
        }
        return columnFormat;
    }

    @Override
    public String getHourPredicateCondition(String column, long value, String valueFormat, String operator) {
        log.debug("Getting hour predicate for Sqlserver");
        String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, HOUR_FORMAT);
        return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
    }

    @Override
    public String getDatePredicateCondition(String column, long value, String valueFormat, String operator) {
        log.debug("Getting date predicate for Sqlserver");
        String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, DATE_FORMAT);
        return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
    }

    @Override
    public String getTimestampPredicateCondition(String column, long value, String valueFormat, String operator) {
        log.debug("Getting timestamp predicate for Sqlserver");
        String formattedvalue = Utils.toDateTimeFormat(Long.toString(value), valueFormat, TIMESTAMP_FORMAT);
        return Utils.getCoalesceColumnNames(column) + " " + operator + " '" + formattedvalue + "'";
    }
}