org.apache.atlas.sqoop.hook.SqoopHook.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.atlas.sqoop.hook.SqoopHook.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.atlas.sqoop.hook;

import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasConstants;
import org.apache.atlas.hive.bridge.HiveMetaStoreBridge;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.hook.AtlasHook;
import org.apache.atlas.hook.AtlasHookException;
import org.apache.atlas.notification.hook.HookNotification;
import org.apache.atlas.sqoop.model.SqoopDataTypes;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang3.StringUtils;
import org.apache.sqoop.SqoopJobDataPublisher;
import org.apache.sqoop.util.ImportException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

/**
 * AtlasHook sends lineage information to the AtlasSever.
 */
public class SqoopHook extends SqoopJobDataPublisher {

    private static final Logger LOG = LoggerFactory.getLogger(SqoopHook.class);
    public static final String CONF_PREFIX = "atlas.hook.sqoop.";
    public static final String HOOK_NUM_RETRIES = CONF_PREFIX + "numRetries";

    public static final String ATLAS_CLUSTER_NAME = "atlas.cluster.name";
    public static final String DEFAULT_CLUSTER_NAME = "primary";

    public static final String USER = "userName";
    public static final String DB_STORE_TYPE = "dbStoreType";
    public static final String DB_STORE_USAGE = "storeUse";
    public static final String SOURCE = "source";
    public static final String DESCRIPTION = "description";
    public static final String STORE_URI = "storeUri";
    public static final String OPERATION = "operation";
    public static final String START_TIME = "startTime";
    public static final String END_TIME = "endTime";
    public static final String CMD_LINE_OPTS = "commandlineOpts";
    // multiple inputs and outputs for process
    public static final String INPUTS = "inputs";
    public static final String OUTPUTS = "outputs";

    static {
        org.apache.hadoop.conf.Configuration.addDefaultResource("sqoop-site.xml");
    }

    public Referenceable createHiveDatabaseInstance(String clusterName, String dbName) {
        Referenceable dbRef = new Referenceable(HiveDataTypes.HIVE_DB.getName());
        dbRef.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName);
        dbRef.set(AtlasClient.NAME, dbName);
        dbRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                HiveMetaStoreBridge.getDBQualifiedName(clusterName, dbName));
        return dbRef;
    }

    public Referenceable createHiveTableInstance(String clusterName, Referenceable dbRef, String tableName,
            String dbName) {
        Referenceable tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
        tableRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                HiveMetaStoreBridge.getTableQualifiedName(clusterName, dbName, tableName));
        tableRef.set(AtlasClient.NAME, tableName.toLowerCase());
        tableRef.set(HiveMetaStoreBridge.DB, dbRef);
        return tableRef;
    }

    private Referenceable createDBStoreInstance(SqoopJobDataPublisher.Data data) throws ImportException {

        Referenceable storeRef = new Referenceable(SqoopDataTypes.SQOOP_DBDATASTORE.getName());
        String table = data.getStoreTable();
        String query = data.getStoreQuery();
        if (StringUtils.isBlank(table) && StringUtils.isBlank(query)) {
            throw new ImportException("Both table and query cannot be empty for DBStoreInstance");
        }

        String usage = table != null ? "TABLE" : "QUERY";
        String source = table != null ? table : query;
        String name = getSqoopDBStoreName(data);
        storeRef.set(AtlasClient.NAME, name);
        storeRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, name);
        storeRef.set(SqoopHook.DB_STORE_TYPE, data.getStoreType());
        storeRef.set(SqoopHook.DB_STORE_USAGE, usage);
        storeRef.set(SqoopHook.STORE_URI, data.getUrl());
        storeRef.set(SqoopHook.SOURCE, source);
        storeRef.set(SqoopHook.DESCRIPTION, "");
        storeRef.set(AtlasClient.OWNER, data.getUser());
        return storeRef;
    }

    private Referenceable createSqoopProcessInstance(Referenceable dbStoreRef, Referenceable hiveTableRef,
            SqoopJobDataPublisher.Data data, String clusterName) {
        Referenceable procRef = new Referenceable(SqoopDataTypes.SQOOP_PROCESS.getName());
        final String sqoopProcessName = getSqoopProcessName(data, clusterName);
        procRef.set(AtlasClient.NAME, sqoopProcessName);
        procRef.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, sqoopProcessName);
        procRef.set(SqoopHook.OPERATION, data.getOperation());
        if (isImportOperation(data)) {
            procRef.set(SqoopHook.INPUTS, dbStoreRef);
            procRef.set(SqoopHook.OUTPUTS, hiveTableRef);
        } else {
            procRef.set(SqoopHook.INPUTS, hiveTableRef);
            procRef.set(SqoopHook.OUTPUTS, dbStoreRef);
        }
        procRef.set(SqoopHook.USER, data.getUser());
        procRef.set(SqoopHook.START_TIME, new Date(data.getStartTime()));
        procRef.set(SqoopHook.END_TIME, new Date(data.getEndTime()));

        Map<String, String> sqoopOptionsMap = new HashMap<>();
        Properties options = data.getOptions();
        for (Object k : options.keySet()) {
            sqoopOptionsMap.put((String) k, (String) options.get(k));
        }
        procRef.set(SqoopHook.CMD_LINE_OPTS, sqoopOptionsMap);

        return procRef;
    }

    static String getSqoopProcessName(Data data, String clusterName) {
        StringBuilder name = new StringBuilder(
                String.format("sqoop %s --connect %s", data.getOperation(), data.getUrl()));
        if (StringUtils.isNotEmpty(data.getStoreTable())) {
            name.append(" --table ").append(data.getStoreTable());
        }
        if (StringUtils.isNotEmpty(data.getStoreQuery())) {
            name.append(" --query ").append(data.getStoreQuery());
        }
        name.append(String.format(" --hive-%s --hive-database %s --hive-table %s --hive-cluster %s",
                data.getOperation(), data.getHiveDB().toLowerCase(), data.getHiveTable().toLowerCase(),
                clusterName));
        return name.toString();
    }

    static String getSqoopDBStoreName(SqoopJobDataPublisher.Data data) {
        StringBuilder name = new StringBuilder(String.format("%s --url %s", data.getStoreType(), data.getUrl()));
        if (StringUtils.isNotEmpty(data.getStoreTable())) {
            name.append(" --table ").append(data.getStoreTable());
        }
        if (StringUtils.isNotEmpty(data.getStoreQuery())) {
            name.append(" --query ").append(data.getStoreQuery());
        }
        return name.toString();
    }

    static boolean isImportOperation(SqoopJobDataPublisher.Data data) {
        return data.getOperation().toLowerCase().equals("import");
    }

    @Override
    public void publish(SqoopJobDataPublisher.Data data) throws AtlasHookException {
        try {
            Configuration atlasProperties = ApplicationProperties.get();
            String clusterName = atlasProperties.getString(ATLAS_CLUSTER_NAME, DEFAULT_CLUSTER_NAME);

            Referenceable dbStoreRef = createDBStoreInstance(data);
            Referenceable dbRef = createHiveDatabaseInstance(clusterName, data.getHiveDB());
            Referenceable hiveTableRef = createHiveTableInstance(clusterName, dbRef, data.getHiveTable(),
                    data.getHiveDB());
            Referenceable procRef = createSqoopProcessInstance(dbStoreRef, hiveTableRef, data, clusterName);

            int maxRetries = atlasProperties.getInt(HOOK_NUM_RETRIES, 3);
            HookNotification.HookNotificationMessage message = new HookNotification.EntityCreateRequest(
                    AtlasHook.getUser(), dbStoreRef, dbRef, hiveTableRef, procRef);
            AtlasHook.notifyEntities(Arrays.asList(message), maxRetries);
        } catch (Exception e) {
            throw new AtlasHookException("SqoopHook.publish() failed.", e);
        }
    }
}