gobblin.source.extractor.extract.restapi.RestApiExtractor.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.source.extractor.extract.restapi.RestApiExtractor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.source.extractor.extract.restapi;

import com.google.common.collect.ImmutableList;

import gobblin.source.extractor.exception.RestApiConnectionException;
import gobblin.source.extractor.exception.RestApiProcessingException;
import gobblin.source.extractor.utils.Utils;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.common.base.Splitter;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.WorkUnitState;
import gobblin.source.extractor.watermark.Predicate;
import gobblin.source.extractor.DataRecordException;
import gobblin.source.extractor.exception.HighWatermarkException;
import gobblin.source.extractor.exception.RecordCountException;
import gobblin.source.extractor.exception.SchemaException;
import gobblin.source.extractor.extract.QueryBasedExtractor;
import gobblin.source.extractor.extract.Command;
import gobblin.source.extractor.extract.CommandOutput;
import gobblin.source.extractor.extract.SourceSpecificLayer;
import gobblin.source.extractor.schema.Schema;
import gobblin.source.workunit.WorkUnit;
import lombok.extern.slf4j.Slf4j;

/**
 * An implementation of rest api extractor for the sources that are using rest api
 *
 * @param <D> type of data record
 * @param <S> type of schema
 */
@Slf4j
public abstract class RestApiExtractor extends QueryBasedExtractor<JsonArray, JsonElement>
        implements SourceSpecificLayer<JsonArray, JsonElement>, RestApiSpecificLayer {
    private static final Gson GSON = new Gson();
    protected String instanceUrl;
    protected String updatedQuery;

    protected final RestApiConnector connector;

    public RestApiExtractor(WorkUnitState state) {
        super(state);
        this.connector = getConnector(state);
    }

    protected abstract RestApiConnector getConnector(WorkUnitState state);

    protected String buildDataQuery(String inputQuery, String entity) {
        String dataQuery = null;
        if (inputQuery == null && this.columnList.size() != 0) {
            // if input query is null, build the query from metadata
            dataQuery = "SELECT " + Joiner.on(",").join(this.columnList) + " FROM " + entity;
        } else {
            // if input query is not null, build the query with intersection of columns from input query and columns from Metadata
            if (inputQuery != null) {
                String queryLowerCase = inputQuery.toLowerCase();
                int columnsStartIndex = queryLowerCase.indexOf("select ") + 7;
                int columnsEndIndex = queryLowerCase.indexOf(" from ");
                if (columnsStartIndex > 0 && columnsEndIndex > 0) {
                    String givenColumnList = inputQuery.substring(columnsStartIndex, columnsEndIndex);
                    dataQuery = inputQuery.replace(givenColumnList, Joiner.on(",").join(this.columnList));
                } else {
                    dataQuery = inputQuery;
                }
            }
        }
        log.info("Updated data query: " + dataQuery);
        return dataQuery;
    }

    @Override
    public void extractMetadata(String schema, String entity, WorkUnit workUnit) throws SchemaException {
        log.info("Extract Metadata using Rest Api");
        JsonArray columnArray = new JsonArray();
        String inputQuery = workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_QUERY);
        List<String> columnListInQuery = null;
        JsonArray array = null;
        if (!Strings.isNullOrEmpty(inputQuery)) {
            columnListInQuery = Utils.getColumnListFromQuery(inputQuery);
        }

        String excludedColumns = workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXCLUDED_COLUMNS);
        List<String> columnListExcluded = ImmutableList.<String>of();

        if (Strings.isNullOrEmpty(inputQuery) && !Strings.isNullOrEmpty(excludedColumns)) {
            Splitter splitter = Splitter.on(",").omitEmptyStrings().trimResults();
            columnListExcluded = splitter.splitToList(excludedColumns.toLowerCase());
        }

        try {
            boolean success = this.connector.connect();
            if (!success) {
                throw new SchemaException("Failed to connect.");
            }
            log.debug("Connected successfully.");
            List<Command> cmds = this.getSchemaMetadata(schema, entity);
            CommandOutput<?, ?> response = this.connector.getResponse(cmds);
            array = this.getSchema(response);

            for (JsonElement columnElement : array) {
                Schema obj = GSON.fromJson(columnElement, Schema.class);
                String columnName = obj.getColumnName();

                obj.setWaterMark(this.isWatermarkColumn(workUnitState.getProp("extract.delta.fields"), columnName));

                if (this.isWatermarkColumn(workUnitState.getProp("extract.delta.fields"), columnName)) {
                    obj.setNullable(false);
                } else if (this.getPrimarykeyIndex(workUnitState.getProp("extract.primary.key.fields"),
                        columnName) == 0) {
                    // set all columns as nullable except primary key and watermark columns
                    obj.setNullable(true);
                }

                obj.setPrimaryKey(
                        this.getPrimarykeyIndex(workUnitState.getProp("extract.primary.key.fields"), columnName));

                String jsonStr = GSON.toJson(obj);
                JsonObject jsonObject = GSON.fromJson(jsonStr, JsonObject.class).getAsJsonObject();

                // If input query is null or provided '*' in the query select all columns.
                // Else, consider only the columns mentioned in the column list
                if (inputQuery == null || columnListInQuery == null
                        || (columnListInQuery.size() == 1 && columnListInQuery.get(0).equals("*"))
                        || (columnListInQuery.size() >= 1
                                && this.isMetadataColumn(columnName, columnListInQuery))) {
                    if (!columnListExcluded.contains(columnName.trim().toLowerCase())) {
                        this.columnList.add(columnName);
                        columnArray.add(jsonObject);
                    }
                }
            }

            this.updatedQuery = buildDataQuery(inputQuery, entity);
            log.info("Schema:" + columnArray);
            this.setOutputSchema(columnArray);
        } catch (RuntimeException | RestApiConnectionException | RestApiProcessingException | IOException
                | SchemaException e) {
            throw new SchemaException("Failed to get schema using rest api; error - " + e.getMessage(), e);
        }
    }

    @Override
    public long getMaxWatermark(String schema, String entity, String watermarkColumn, List<Predicate> predicateList,
            String watermarkSourceFormat) throws HighWatermarkException {
        log.info("Get high watermark using Rest Api");
        long CalculatedHighWatermark = -1;
        try {
            boolean success = this.connector.connect();
            if (!success) {
                throw new HighWatermarkException("Failed to connect.");
            }
            log.debug("Connected successfully.");
            List<Command> cmds = this.getHighWatermarkMetadata(schema, entity, watermarkColumn, predicateList);
            CommandOutput<?, ?> response = this.connector.getResponse(cmds);
            CalculatedHighWatermark = this.getHighWatermark(response, watermarkColumn, watermarkSourceFormat);
            log.info("High watermark:" + CalculatedHighWatermark);
            return CalculatedHighWatermark;
        } catch (Exception e) {
            throw new HighWatermarkException(
                    "Failed to get high watermark using rest api; error - " + e.getMessage(), e);
        }
    }

    @Override
    public long getSourceCount(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList)
            throws RecordCountException {
        log.info("Get source record count using Rest Api");
        long count = 0;
        try {
            boolean success = this.connector.connect();
            if (!success) {
                throw new RecordCountException("Failed to connect.");
            }
            log.debug("Connected successfully.");
            List<Command> cmds = this.getCountMetadata(schema, entity, workUnit, predicateList);
            CommandOutput<?, ?> response = this.connector.getResponse(cmds);
            count = getCount(response);
            log.info("Source record count:" + count);
            return count;
        } catch (Exception e) {
            throw new RecordCountException("Failed to get record count using rest api; error - " + e.getMessage(),
                    e);
        }
    }

    @Override
    public Iterator<JsonElement> getRecordSet(String schema, String entity, WorkUnit workUnit,
            List<Predicate> predicateList) throws DataRecordException {
        log.debug("Get data records using Rest Api");
        Iterator<JsonElement> rs = null;
        List<Command> cmds;
        try {
            boolean success = true;
            if (this.connector.isConnectionClosed()) {
                success = this.connector.connect();
            }

            if (!success) {
                throw new DataRecordException("Failed to connect.");
            }
            log.debug("Connected successfully.");
            if (this.getPullStatus() == false) {
                return null;
            }
            if (this.getNextUrl() == null) {
                cmds = this.getDataMetadata(schema, entity, workUnit, predicateList);
            } else {
                cmds = RestApiConnector.constructGetCommand(this.getNextUrl());
            }
            CommandOutput<?, ?> response = this.connector.getResponse(cmds);
            rs = this.getData(response);
            return rs;
        } catch (Exception e) {
            throw new DataRecordException("Failed to get records using rest api; error - " + e.getMessage(), e);
        }
    }

    @Override
    public void setTimeOut(int timeOut) {
        this.connector.setAuthTokenTimeout(timeOut);
    }

}