Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.atlas.reporting; import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer.PROVENANCE_BATCH_SIZE; import static org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer.PROVENANCE_START_POSITION; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.time.Instant; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.ServiceLoader; import java.util.Set; import java.util.function.Consumer; import java.util.stream.Stream; import org.apache.atlas.ApplicationProperties; import org.apache.atlas.AtlasServiceException; import org.apache.commons.lang3.StringUtils; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.config.SslConfigs; import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; import org.apache.nifi.annotation.behavior.Stateful; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.annotation.lifecycle.OnStopped; import org.apache.nifi.annotation.lifecycle.OnUnscheduled; import org.apache.nifi.atlas.hook.NiFiAtlasHook; import org.apache.nifi.atlas.NiFiAtlasClient; import org.apache.nifi.atlas.NiFiFlow; import org.apache.nifi.atlas.NiFiFlowAnalyzer; import org.apache.nifi.atlas.provenance.AnalysisContext; import org.apache.nifi.atlas.provenance.StandardAnalysisContext; import org.apache.nifi.atlas.provenance.lineage.CompleteFlowPathLineage; import org.apache.nifi.atlas.provenance.lineage.LineageStrategy; import org.apache.nifi.atlas.provenance.lineage.SimpleFlowPathLineage; import org.apache.nifi.atlas.resolver.ClusterResolver; import org.apache.nifi.atlas.resolver.ClusterResolvers; import org.apache.nifi.atlas.resolver.RegexClusterResolver; import org.apache.nifi.atlas.security.AtlasAuthN; import org.apache.nifi.atlas.security.Basic; import org.apache.nifi.atlas.security.Kerberos; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.PropertyValue; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.components.state.Scope; import org.apache.nifi.context.PropertyContext; import org.apache.nifi.controller.ConfigurationContext; import org.apache.nifi.controller.status.ProcessGroupStatus; import org.apache.nifi.kerberos.KerberosCredentialsService; import org.apache.nifi.expression.ExpressionLanguageScope; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.provenance.ProvenanceEventRecord; import org.apache.nifi.provenance.ProvenanceRepository; import org.apache.nifi.reporting.AbstractReportingTask; import org.apache.nifi.reporting.EventAccess; import org.apache.nifi.reporting.ReportingContext; import org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer; import org.apache.nifi.ssl.SSLContextService; import com.sun.jersey.api.client.ClientResponse; @Tags({ "atlas", "lineage" }) @CapabilityDescription("Report NiFi flow data set level lineage to Apache Atlas." + " End-to-end lineages across NiFi environments and other systems can be reported if those are" + " connected by different protocols and data set, such as NiFi Site-to-Site, Kafka topic or Hive tables ... etc." + " Atlas lineage reported by this reporting task can be useful to grasp the high level relationships between processes and data sets," + " in addition to NiFi provenance events providing detailed event level lineage." + " See 'Additional Details' for further description and limitations.") @Stateful(scopes = Scope.LOCAL, description = "Stores the Reporting Task's last event Id so that on restart the task knows where it left off.") @DynamicProperty(name = "hostnamePattern.<ClusterName>", value = "hostname Regex patterns", description = RegexClusterResolver.PATTERN_PROPERTY_PREFIX_DESC, expressionLanguageScope = ExpressionLanguageScope.VARIABLE_REGISTRY) // In order for each reporting task instance to have its own static objects such as KafkaNotification. @RequiresInstanceClassLoading public class ReportLineageToAtlas extends AbstractReportingTask { static final PropertyDescriptor ATLAS_URLS = new PropertyDescriptor.Builder().name("atlas-urls") .displayName("Atlas URLs") .description("Comma separated URL of Atlas Servers" + " (e.g. http://atlas-server-hostname:21000 or https://atlas-server-hostname:21443)." + " For accessing Atlas behind Knox gateway, specify Knox gateway URL" + " (e.g. https://knox-hostname:8443/gateway/{topology-name}/atlas).") .required(true).expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) .addValidator(StandardValidators.NON_BLANK_VALIDATOR).build(); static final AllowableValue ATLAS_AUTHN_BASIC = new AllowableValue("basic", "Basic", "Use username and password."); static final AllowableValue ATLAS_AUTHN_KERBEROS = new AllowableValue("kerberos", "Kerberos", "Use Kerberos keytab file."); static final PropertyDescriptor ATLAS_AUTHN_METHOD = new PropertyDescriptor.Builder() .name("atlas-authentication-method").displayName("Atlas Authentication Method") .description("Specify how to authenticate this reporting task to Atlas server.").required(true) .allowableValues(ATLAS_AUTHN_BASIC, ATLAS_AUTHN_KERBEROS).defaultValue(ATLAS_AUTHN_BASIC.getValue()) .build(); public static final PropertyDescriptor ATLAS_USER = new PropertyDescriptor.Builder().name("atlas-username") .displayName("Atlas Username").description("User name to communicate with Atlas.").required(false) .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) .addValidator(StandardValidators.NON_BLANK_VALIDATOR).build(); public static final PropertyDescriptor ATLAS_PASSWORD = new PropertyDescriptor.Builder().name("atlas-password") .displayName("Atlas Password").description("Password to communicate with Atlas.").required(false) .sensitive(true).expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) .addValidator(StandardValidators.NON_BLANK_VALIDATOR).build(); static final PropertyDescriptor ATLAS_CONF_DIR = new PropertyDescriptor.Builder().name("atlas-conf-dir") .displayName("Atlas Configuration Directory") .description("Directory path that contains 'atlas-application.properties' file." + " If not specified and 'Create Atlas Configuration File' is disabled," + " then, 'atlas-application.properties' file under root classpath is used.") .required(false).expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) .addValidator(StandardValidators.NON_BLANK_VALIDATOR).build(); public static final PropertyDescriptor ATLAS_NIFI_URL = new PropertyDescriptor.Builder().name("atlas-nifi-url") .displayName("NiFi URL for Atlas") .description("NiFi URL is used in Atlas to represent this NiFi cluster (or standalone instance)." + " It is recommended to use one that can be accessible remotely instead of using 'localhost'.") .required(true).expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) .addValidator(StandardValidators.URL_VALIDATOR).build(); public static final PropertyDescriptor ATLAS_DEFAULT_CLUSTER_NAME = new PropertyDescriptor.Builder() .name("atlas-default-cluster-name").displayName("Atlas Default Cluster Name") .description("Cluster name for Atlas entities reported by this ReportingTask." + " If not specified, 'atlas.cluster.name' in Atlas Configuration File is used." + " Cluster name mappings can be configured by user defined properties." + " See additional detail for detail.") .required(false).expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) .addValidator(StandardValidators.NON_BLANK_VALIDATOR).build(); static final PropertyDescriptor ATLAS_CONF_CREATE = new PropertyDescriptor.Builder().name("atlas-conf-create") .displayName("Create Atlas Configuration File") .description( "If enabled, 'atlas-application.properties' file will be created in 'Atlas Configuration Directory'" + " automatically when this Reporting Task starts." + " Note that the existing configuration file will be overwritten.") .required(true).expressionLanguageSupported(ExpressionLanguageScope.NONE) .allowableValues("true", "false").defaultValue("false").build(); static final PropertyDescriptor SSL_CONTEXT_SERVICE = new PropertyDescriptor.Builder() .name("ssl-context-service").displayName("SSL Context Service") .description("Specifies the SSL Context Service to use for communicating with Atlas and Kafka.") .required(false).identifiesControllerService(SSLContextService.class).build(); static final PropertyDescriptor KAFKA_BOOTSTRAP_SERVERS = new PropertyDescriptor.Builder() .name("kafka-bootstrap-servers").displayName("Kafka Bootstrap Servers") .description( "Kafka Bootstrap Servers to send Atlas hook notification messages based on NiFi provenance events." + " E.g. 'localhost:9092'" + " NOTE: Once this reporting task has started, restarting NiFi is required to changed this property" + " as Atlas library holds a unmodifiable static reference to Kafka client.") .required(false).expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) .addValidator(StandardValidators.NON_BLANK_VALIDATOR).build(); static final AllowableValue SEC_PLAINTEXT = new AllowableValue("PLAINTEXT", "PLAINTEXT", "PLAINTEXT"); static final AllowableValue SEC_SSL = new AllowableValue("SSL", "SSL", "SSL"); static final AllowableValue SEC_SASL_PLAINTEXT = new AllowableValue("SASL_PLAINTEXT", "SASL_PLAINTEXT", "SASL_PLAINTEXT"); static final AllowableValue SEC_SASL_SSL = new AllowableValue("SASL_SSL", "SASL_SSL", "SASL_SSL"); static final PropertyDescriptor KAFKA_SECURITY_PROTOCOL = new PropertyDescriptor.Builder() .name("kafka-security-protocol").displayName("Kafka Security Protocol") .description("Protocol used to communicate with Kafka brokers to send Atlas hook notification messages." + " Corresponds to Kafka's 'security.protocol' property.") .required(true).expressionLanguageSupported(ExpressionLanguageScope.NONE) .allowableValues(SEC_PLAINTEXT, SEC_SSL, SEC_SASL_PLAINTEXT, SEC_SASL_SSL) .defaultValue(SEC_PLAINTEXT.getValue()).build(); public static final PropertyDescriptor NIFI_KERBEROS_PRINCIPAL = new PropertyDescriptor.Builder() .name("nifi-kerberos-principal").displayName("NiFi Kerberos Principal") .description("The Kerberos principal for this NiFi instance to access Atlas API and Kafka brokers." + " If not set, it is expected to set a JAAS configuration file in the JVM properties defined in the bootstrap.conf file." + " This principal will be set into 'sasl.jaas.config' Kafka's property.") .required(false).addValidator(StandardValidators.NON_BLANK_VALIDATOR) .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY).build(); public static final PropertyDescriptor NIFI_KERBEROS_KEYTAB = new PropertyDescriptor.Builder() .name("nifi-kerberos-keytab").displayName("NiFi Kerberos Keytab") .description("The Kerberos keytab for this NiFi instance to access Atlas API and Kafka brokers." + " If not set, it is expected to set a JAAS configuration file in the JVM properties defined in the bootstrap.conf file." + " This principal will be set into 'sasl.jaas.config' Kafka's property.") .required(false).addValidator(StandardValidators.FILE_EXISTS_VALIDATOR) .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY).build(); public static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder() .name("kerberos-credentials-service").displayName("Kerberos Credentials Service") .description( "Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos") .identifiesControllerService(KerberosCredentialsService.class).required(false).build(); static final PropertyDescriptor KAFKA_KERBEROS_SERVICE_NAME = new PropertyDescriptor.Builder() .name("kafka-kerberos-service-name").displayName("Kafka Kerberos Service Name") .description("The Kerberos principal name that Kafka runs for Atlas notification." + " This can be defined either in Kafka's JAAS config or in Kafka's config." + " Corresponds to Kafka's 'security.protocol' property." + " It is ignored unless one of the SASL options of the <Security Protocol> are selected.") .required(false).addValidator(StandardValidators.NON_BLANK_VALIDATOR) .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY).defaultValue("kafka").build(); static final AllowableValue LINEAGE_STRATEGY_SIMPLE_PATH = new AllowableValue("SimplePath", "Simple Path", "Map NiFi provenance events and target Atlas DataSets to statically created 'nifi_flow_path' Atlas Processes." + " See also 'Additional Details'."); static final AllowableValue LINEAGE_STRATEGY_COMPLETE_PATH = new AllowableValue("CompletePath", "Complete Path", "Create separate 'nifi_flow_path' Atlas Processes for each distinct input and output DataSet combinations" + " by looking at the complete route for a given FlowFile. See also 'Additional Details."); static final PropertyDescriptor NIFI_LINEAGE_STRATEGY = new PropertyDescriptor.Builder() .name("nifi-lineage-strategy").displayName("NiFi Lineage Strategy") .description("Specifies granularity on how NiFi data flow should be reported to Atlas." + " NOTE: It is strongly recommended to keep using the same strategy once this reporting task started to keep Atlas data clean." + " Switching strategies will not delete Atlas entities created by the old strategy." + " Having mixed entities created by different strategies makes Atlas lineage graph noisy." + " For more detailed description on each strategy and differences, refer 'NiFi Lineage Strategy' section in Additional Details.") .required(true).allowableValues(LINEAGE_STRATEGY_SIMPLE_PATH, LINEAGE_STRATEGY_COMPLETE_PATH) .defaultValue(LINEAGE_STRATEGY_SIMPLE_PATH.getValue()).build(); private static final String ATLAS_PROPERTIES_FILENAME = "atlas-application.properties"; private static final String ATLAS_PROPERTY_CLUSTER_NAME = "atlas.cluster.name"; private static final String ATLAS_PROPERTY_ENABLE_TLS = "atlas.enableTLS"; private static final String ATLAS_KAFKA_PREFIX = "atlas.kafka."; private static final String ATLAS_PROPERTY_KAFKA_BOOTSTRAP_SERVERS = ATLAS_KAFKA_PREFIX + "bootstrap.servers"; private static final String ATLAS_PROPERTY_KAFKA_CLIENT_ID = ATLAS_KAFKA_PREFIX + ProducerConfig.CLIENT_ID_CONFIG; private final ServiceLoader<ClusterResolver> clusterResolverLoader = ServiceLoader.load(ClusterResolver.class); private volatile AtlasAuthN atlasAuthN; private volatile Properties atlasProperties; private volatile boolean isTypeDefCreated = false; private volatile String defaultClusterName; private volatile ProvenanceEventConsumer consumer; private volatile ClusterResolvers clusterResolvers; private volatile NiFiAtlasHook nifiAtlasHook; private volatile LineageStrategy lineageStrategy; @Override protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { final List<PropertyDescriptor> properties = new ArrayList<>(); properties.add(ATLAS_URLS); properties.add(ATLAS_AUTHN_METHOD); properties.add(ATLAS_USER); properties.add(ATLAS_PASSWORD); properties.add(ATLAS_CONF_DIR); properties.add(ATLAS_NIFI_URL); properties.add(ATLAS_DEFAULT_CLUSTER_NAME); properties.add(NIFI_LINEAGE_STRATEGY); properties.add(PROVENANCE_START_POSITION); properties.add(PROVENANCE_BATCH_SIZE); properties.add(SSL_CONTEXT_SERVICE); // Following properties are required if ATLAS_CONF_CREATE is enabled. // Otherwise should be left blank. properties.add(ATLAS_CONF_CREATE); properties.add(KERBEROS_CREDENTIALS_SERVICE); properties.add(NIFI_KERBEROS_PRINCIPAL); properties.add(NIFI_KERBEROS_KEYTAB); properties.add(KAFKA_KERBEROS_SERVICE_NAME); properties.add(KAFKA_BOOTSTRAP_SERVERS); properties.add(KAFKA_SECURITY_PROTOCOL); return properties; } @Override protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) { for (ClusterResolver resolver : clusterResolverLoader) { final PropertyDescriptor propertyDescriptor = resolver .getSupportedDynamicPropertyDescriptor(propertyDescriptorName); if (propertyDescriptor != null) { return propertyDescriptor; } } return null; } private void parseAtlasUrls(final PropertyValue atlasUrlsProp, final Consumer<String> urlStrConsumer) { final String atlasUrlsStr = atlasUrlsProp.evaluateAttributeExpressions().getValue(); if (atlasUrlsStr != null && !atlasUrlsStr.isEmpty()) { Arrays.stream(atlasUrlsStr.split(",")).map(String::trim).forEach(urlStrConsumer); } } @Override protected Collection<ValidationResult> customValidate(ValidationContext context) { final Collection<ValidationResult> results = new ArrayList<>(); final boolean isSSLContextServiceSet = context.getProperty(SSL_CONTEXT_SERVICE).isSet(); final ValidationResult.Builder invalidSSLService = new ValidationResult.Builder() .subject(SSL_CONTEXT_SERVICE.getDisplayName()).valid(false); parseAtlasUrls(context.getProperty(ATLAS_URLS), input -> { final ValidationResult.Builder builder = new ValidationResult.Builder() .subject(ATLAS_URLS.getDisplayName()).input(input); try { final URL url = new URL(input); if ("https".equalsIgnoreCase(url.getProtocol()) && !isSSLContextServiceSet) { results.add(invalidSSLService.explanation("required by HTTPS Atlas access").build()); } else { results.add(builder.explanation("Valid URI").valid(true).build()); } } catch (Exception e) { results.add(builder.explanation("Contains invalid URI: " + e).valid(false).build()); } }); final String atlasAuthNMethod = context.getProperty(ATLAS_AUTHN_METHOD).getValue(); final AtlasAuthN atlasAuthN = getAtlasAuthN(atlasAuthNMethod); results.addAll(atlasAuthN.validate(context)); clusterResolverLoader.forEach(resolver -> results.addAll(resolver.validate(context))); if (context.getProperty(ATLAS_CONF_CREATE).asBoolean()) { Stream.of(ATLAS_CONF_DIR, ATLAS_DEFAULT_CLUSTER_NAME, KAFKA_BOOTSTRAP_SERVERS) .filter(p -> !context.getProperty(p).isSet()) .forEach(p -> results.add(new ValidationResult.Builder().subject(p.getDisplayName()) .explanation("required to create Atlas configuration file.").valid(false).build())); validateKafkaProperties(context, results, isSSLContextServiceSet, invalidSSLService); } return results; } private void validateKafkaProperties(ValidationContext context, Collection<ValidationResult> results, boolean isSSLContextServiceSet, ValidationResult.Builder invalidSSLService) { final String kafkaSecurityProtocol = context.getProperty(KAFKA_SECURITY_PROTOCOL).getValue(); if ((SEC_SSL.equals(kafkaSecurityProtocol) || SEC_SASL_SSL.equals(kafkaSecurityProtocol)) && !isSSLContextServiceSet) { results.add(invalidSSLService.explanation("required by SSL Kafka connection").build()); } final String explicitPrincipal = context.getProperty(NIFI_KERBEROS_PRINCIPAL).evaluateAttributeExpressions() .getValue(); final String explicitKeytab = context.getProperty(NIFI_KERBEROS_KEYTAB).evaluateAttributeExpressions() .getValue(); final KerberosCredentialsService credentialsService = context .getProperty(ReportLineageToAtlas.KERBEROS_CREDENTIALS_SERVICE) .asControllerService(KerberosCredentialsService.class); String principal; String keytab; if (credentialsService == null) { principal = explicitPrincipal; keytab = explicitKeytab; } else { principal = credentialsService.getPrincipal(); keytab = credentialsService.getKeytab(); } if (SEC_SASL_PLAINTEXT.equals(kafkaSecurityProtocol) || SEC_SASL_SSL.equals(kafkaSecurityProtocol)) { if (!context.getProperty(KAFKA_KERBEROS_SERVICE_NAME).isSet()) { results.add(new ValidationResult.Builder().subject(KAFKA_KERBEROS_SERVICE_NAME.getDisplayName()) .explanation("Required by Kafka SASL authentication.").valid(false).build()); } if (keytab == null || principal == null) { results.add(new ValidationResult.Builder().subject("Kerberos Authentication") .explanation( "Keytab and Principal are required for Kerberos authentication with Apache Kafka.") .valid(false).build()); } } } @OnScheduled public void setup(ConfigurationContext context) throws IOException { // initAtlasClient has to be done first as it loads AtlasProperty. initAtlasProperties(context); initLineageStrategy(context); initClusterResolvers(context); } private void initLineageStrategy(ConfigurationContext context) throws IOException { nifiAtlasHook = new NiFiAtlasHook(); final String strategy = context.getProperty(NIFI_LINEAGE_STRATEGY).getValue(); if (LINEAGE_STRATEGY_SIMPLE_PATH.equals(strategy)) { lineageStrategy = new SimpleFlowPathLineage(); } else if (LINEAGE_STRATEGY_COMPLETE_PATH.equals(strategy)) { lineageStrategy = new CompleteFlowPathLineage(); } lineageStrategy.setLineageContext(nifiAtlasHook); initProvenanceConsumer(context); } private void initClusterResolvers(ConfigurationContext context) { final Set<ClusterResolver> loadedClusterResolvers = new LinkedHashSet<>(); clusterResolverLoader.forEach(resolver -> { resolver.configure(context); loadedClusterResolvers.add(resolver); }); clusterResolvers = new ClusterResolvers(Collections.unmodifiableSet(loadedClusterResolvers), defaultClusterName); } private void initAtlasProperties(ConfigurationContext context) throws IOException { List<String> urls = new ArrayList<>(); parseAtlasUrls(context.getProperty(ATLAS_URLS), urls::add); final boolean isAtlasApiSecure = urls.stream().anyMatch(url -> url.toLowerCase().startsWith("https")); final String atlasAuthNMethod = context.getProperty(ATLAS_AUTHN_METHOD).getValue(); final String confDirStr = context.getProperty(ATLAS_CONF_DIR).evaluateAttributeExpressions().getValue(); final File confDir = confDirStr != null && !confDirStr.isEmpty() ? new File(confDirStr) : null; atlasProperties = new Properties(); final File atlasPropertiesFile = new File(confDir, ATLAS_PROPERTIES_FILENAME); final Boolean createAtlasConf = context.getProperty(ATLAS_CONF_CREATE).asBoolean(); if (!createAtlasConf) { // Load existing properties file. if (atlasPropertiesFile.isFile()) { getLogger().info("Loading {}", new Object[] { atlasPropertiesFile }); try (InputStream in = new FileInputStream(atlasPropertiesFile)) { atlasProperties.load(in); } } else { final String fileInClasspath = "/" + ATLAS_PROPERTIES_FILENAME; try (InputStream in = ReportLineageToAtlas.class.getResourceAsStream(fileInClasspath)) { getLogger().info("Loading {} from classpath", new Object[] { fileInClasspath }); if (in == null) { throw new ProcessException(String.format( "Could not find %s in classpath." + " Please add it to classpath," + " or specify %s a directory containing Atlas properties file," + " or enable %s to generate it.", fileInClasspath, ATLAS_CONF_DIR.getDisplayName(), ATLAS_CONF_CREATE.getDisplayName())); } atlasProperties.load(in); } } } // Resolve default cluster name. defaultClusterName = context.getProperty(ATLAS_DEFAULT_CLUSTER_NAME).evaluateAttributeExpressions() .getValue(); if (defaultClusterName == null || defaultClusterName.isEmpty()) { // If default cluster name is not specified by processor configuration, then load it from Atlas config. defaultClusterName = atlasProperties.getProperty(ATLAS_PROPERTY_CLUSTER_NAME); } // If default cluster name is still not defined, processor should not be able to start. if (defaultClusterName == null || defaultClusterName.isEmpty()) { throw new ProcessException("Default cluster name is not defined."); } atlasAuthN = getAtlasAuthN(atlasAuthNMethod); atlasAuthN.configure(context); // Create Atlas configuration file if necessary. if (createAtlasConf) { atlasProperties.put(ATLAS_PROPERTY_CLUSTER_NAME, defaultClusterName); atlasProperties.put(ATLAS_PROPERTY_ENABLE_TLS, String.valueOf(isAtlasApiSecure)); setKafkaConfig(atlasProperties, context); atlasAuthN.populateProperties(atlasProperties); try (FileOutputStream fos = new FileOutputStream(atlasPropertiesFile)) { String ts = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSX").withZone(ZoneOffset.UTC) .format(Instant.now()); atlasProperties.store(fos, "Generated by Apache NiFi ReportLineageToAtlas ReportingTask at " + ts); } } getLogger().debug("Force reloading Atlas application properties."); ApplicationProperties.forceReload(); if (confDir != null) { // If atlasConfDir is not set, atlas-application.properties will be searched under classpath. Properties props = System.getProperties(); final String atlasConfProp = "atlas.conf"; props.setProperty(atlasConfProp, confDir.getAbsolutePath()); getLogger().debug("{} has been set to: {}", new Object[] { atlasConfProp, props.getProperty(atlasConfProp) }); } } /** * In order to avoid authentication expiration issues (i.e. Kerberos ticket and DelegationToken expiration), * create Atlas client instance at every onTrigger execution. */ private NiFiAtlasClient createNiFiAtlasClient(ReportingContext context) { List<String> urls = new ArrayList<>(); parseAtlasUrls(context.getProperty(ATLAS_URLS), urls::add); try { return new NiFiAtlasClient(atlasAuthN.createClient(urls.toArray(new String[] {}))); } catch (final NullPointerException e) { throw new ProcessException(String.format("Failed to initialize Atlas client due to %s." + " Make sure 'atlas-application.properties' is in the directory specified with %s" + " or under root classpath if not specified.", e, ATLAS_CONF_DIR.getDisplayName()), e); } } private AtlasAuthN getAtlasAuthN(String atlasAuthNMethod) { final AtlasAuthN atlasAuthN; switch (atlasAuthNMethod) { case "basic": atlasAuthN = new Basic(); break; case "kerberos": atlasAuthN = new Kerberos(); break; default: throw new IllegalArgumentException( atlasAuthNMethod + " is not supported as an Atlas authentication method."); } return atlasAuthN; } private void initProvenanceConsumer(final ConfigurationContext context) throws IOException { consumer = new ProvenanceEventConsumer(); consumer.setStartPositionValue(context.getProperty(PROVENANCE_START_POSITION).getValue()); consumer.setBatchSize(context.getProperty(PROVENANCE_BATCH_SIZE).asInteger()); consumer.addTargetEventType(lineageStrategy.getTargetEventTypes()); consumer.setLogger(getLogger()); consumer.setScheduled(true); } @OnUnscheduled public void onUnscheduled() { if (consumer != null) { // Tell provenance consumer to stop pulling more provenance events. // This should be called from @OnUnscheduled to stop the loop in the thread called from onTrigger. consumer.setScheduled(false); } } @OnStopped public void onStopped() { if (nifiAtlasHook != null) { nifiAtlasHook.close(); nifiAtlasHook = null; } } @Override public void onTrigger(ReportingContext context) { final String clusterNodeId = context.getClusterNodeIdentifier(); final boolean isClustered = context.isClustered(); if (isClustered && isEmpty(clusterNodeId)) { // Clustered, but this node's ID is unknown. Not ready for processing yet. return; } // If standalone or being primary node in a NiFi cluster, this node is responsible for doing primary tasks. final boolean isResponsibleForPrimaryTasks = !isClustered || getNodeTypeProvider().isPrimary(); final NiFiAtlasClient atlasClient = createNiFiAtlasClient(context); // Create Entity defs in Atlas if there's none yet. if (!isTypeDefCreated) { try { if (isResponsibleForPrimaryTasks) { // Create NiFi type definitions in Atlas type system. atlasClient.registerNiFiTypeDefs(false); } else { // Otherwise, just check existence of NiFi type definitions. if (!atlasClient.isNiFiTypeDefsRegistered()) { getLogger().debug("NiFi type definitions are not ready in Atlas type system yet."); return; } } isTypeDefCreated = true; } catch (AtlasServiceException e) { throw new RuntimeException( "Failed to check and create NiFi flow type definitions in Atlas due to " + e, e); } } // Regardless of whether being a primary task node, each node has to analyse NiFiFlow. // Assuming each node has the same flow definition, that is guaranteed by NiFi cluster management mechanism. final NiFiFlow nifiFlow = createNiFiFlow(context, atlasClient); if (isResponsibleForPrimaryTasks) { try { atlasClient.registerNiFiFlow(nifiFlow); } catch (AtlasServiceException e) { throw new RuntimeException("Failed to register NiFI flow. " + e, e); } } // NOTE: There is a race condition between the primary node and other nodes. // If a node notifies an event related to a NiFi component which is not yet created by NiFi primary node, // then the notification message will fail due to having a reference to a non-existing entity. nifiAtlasHook.setAtlasClient(atlasClient); consumeNiFiProvenanceEvents(context, nifiFlow); } private NiFiFlow createNiFiFlow(ReportingContext context, NiFiAtlasClient atlasClient) { final ProcessGroupStatus rootProcessGroup = context.getEventAccess().getGroupStatus("root"); final String flowName = rootProcessGroup.getName(); final String nifiUrl = context.getProperty(ATLAS_NIFI_URL).evaluateAttributeExpressions().getValue(); final String clusterName; try { final String nifiHostName = new URL(nifiUrl).getHost(); clusterName = clusterResolvers.fromHostNames(nifiHostName); } catch (MalformedURLException e) { throw new IllegalArgumentException("Failed to parse NiFi URL, " + e.getMessage(), e); } NiFiFlow existingNiFiFlow = null; try { // Retrieve Existing NiFiFlow from Atlas. existingNiFiFlow = atlasClient.fetchNiFiFlow(rootProcessGroup.getId(), clusterName); } catch (AtlasServiceException e) { if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())) { getLogger().debug("Existing flow was not found for {}@{}", new Object[] { rootProcessGroup.getId(), clusterName }); } else { throw new RuntimeException("Failed to fetch existing NiFI flow. " + e, e); } } final NiFiFlow nifiFlow = existingNiFiFlow != null ? existingNiFiFlow : new NiFiFlow(rootProcessGroup.getId()); nifiFlow.setFlowName(flowName); nifiFlow.setUrl(nifiUrl); nifiFlow.setClusterName(clusterName); final NiFiFlowAnalyzer flowAnalyzer = new NiFiFlowAnalyzer(); flowAnalyzer.analyzeProcessGroup(nifiFlow, rootProcessGroup); flowAnalyzer.analyzePaths(nifiFlow); return nifiFlow; } private void consumeNiFiProvenanceEvents(ReportingContext context, NiFiFlow nifiFlow) { final EventAccess eventAccess = context.getEventAccess(); final AnalysisContext analysisContext = new StandardAnalysisContext(nifiFlow, clusterResolvers, // FIXME: This class cast shouldn't be necessary to query lineage. Possible refactor target in next major update. (ProvenanceRepository) eventAccess.getProvenanceRepository()); consumer.consumeEvents(context, (componentMapHolder, events) -> { for (ProvenanceEventRecord event : events) { try { lineageStrategy.processEvent(analysisContext, nifiFlow, event); } catch (Exception e) { // If something went wrong, log it and continue with other records. getLogger().error("Skipping failed analyzing event {} due to {}.", new Object[] { event, e, e }); } } nifiAtlasHook.commitMessages(); }); } private void setKafkaConfig(Map<Object, Object> mapToPopulate, PropertyContext context) { final String kafkaBootStrapServers = context.getProperty(KAFKA_BOOTSTRAP_SERVERS) .evaluateAttributeExpressions().getValue(); mapToPopulate.put(ATLAS_PROPERTY_KAFKA_BOOTSTRAP_SERVERS, kafkaBootStrapServers); mapToPopulate.put(ATLAS_PROPERTY_KAFKA_CLIENT_ID, String.format("%s.%s", getName(), getIdentifier())); final String kafkaSecurityProtocol = context.getProperty(KAFKA_SECURITY_PROTOCOL).getValue(); mapToPopulate.put(ATLAS_KAFKA_PREFIX + "security.protocol", kafkaSecurityProtocol); // Translate SSLContext Service configuration into Kafka properties final SSLContextService sslContextService = context.getProperty(SSL_CONTEXT_SERVICE) .asControllerService(SSLContextService.class); if (sslContextService != null && sslContextService.isKeyStoreConfigured()) { mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, sslContextService.getKeyStoreFile()); mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, sslContextService.getKeyStorePassword()); final String keyPass = sslContextService.getKeyPassword() == null ? sslContextService.getKeyStorePassword() : sslContextService.getKeyPassword(); mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEY_PASSWORD_CONFIG, keyPass); mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, sslContextService.getKeyStoreType()); } if (sslContextService != null && sslContextService.isTrustStoreConfigured()) { mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, sslContextService.getTrustStoreFile()); mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, sslContextService.getTrustStorePassword()); mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, sslContextService.getTrustStoreType()); } if (SEC_SASL_PLAINTEXT.equals(kafkaSecurityProtocol) || SEC_SASL_SSL.equals(kafkaSecurityProtocol)) { setKafkaJaasConfig(mapToPopulate, context); } } /** * Populate Kafka JAAS properties for Atlas notification. * Since Atlas 0.8.1 uses Kafka client 0.10.0.0, we can not use 'sasl.jaas.config' property * as it is available since 0.10.2, implemented by KAFKA-4259. * Instead, this method uses old property names. * @param mapToPopulate Map of configuration properties * @param context Context */ private void setKafkaJaasConfig(Map<Object, Object> mapToPopulate, PropertyContext context) { String keytab; String principal; final String explicitPrincipal = context.getProperty(NIFI_KERBEROS_PRINCIPAL).evaluateAttributeExpressions() .getValue(); final String explicitKeytab = context.getProperty(NIFI_KERBEROS_KEYTAB).evaluateAttributeExpressions() .getValue(); final KerberosCredentialsService credentialsService = context .getProperty(ReportLineageToAtlas.KERBEROS_CREDENTIALS_SERVICE) .asControllerService(KerberosCredentialsService.class); if (credentialsService == null) { principal = explicitPrincipal; keytab = explicitKeytab; } else { principal = credentialsService.getPrincipal(); keytab = credentialsService.getKeytab(); } String serviceName = context.getProperty(KAFKA_KERBEROS_SERVICE_NAME).evaluateAttributeExpressions() .getValue(); if (StringUtils.isNotBlank(keytab) && StringUtils.isNotBlank(principal) && StringUtils.isNotBlank(serviceName)) { mapToPopulate.put("atlas.jaas.KafkaClient.loginModuleControlFlag", "required"); mapToPopulate.put("atlas.jaas.KafkaClient.loginModuleName", "com.sun.security.auth.module.Krb5LoginModule"); mapToPopulate.put("atlas.jaas.KafkaClient.option.keyTab", keytab); mapToPopulate.put("atlas.jaas.KafkaClient.option.principal", principal); mapToPopulate.put("atlas.jaas.KafkaClient.option.serviceName", serviceName); mapToPopulate.put("atlas.jaas.KafkaClient.option.storeKey", "True"); mapToPopulate.put("atlas.jaas.KafkaClient.option.useKeyTab", "True"); mapToPopulate.put("atlas.jaas.ticketBased-KafkaClient.loginModuleControlFlag", "required"); mapToPopulate.put("atlas.jaas.ticketBased-KafkaClient.loginModuleName", "com.sun.security.auth.module.Krb5LoginModule"); mapToPopulate.put("atlas.jaas.ticketBased-KafkaClient.option.useTicketCache", "true"); mapToPopulate.put(ATLAS_KAFKA_PREFIX + "sasl.kerberos.service.name", serviceName); } } }