gobblin.service.modules.flow.IdentityFlowToJobSpecCompiler.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.service.modules.flow.IdentityFlowToJobSpecCompiler.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.service.modules.flow;

import java.io.IOException;
import java.net.URI;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;

import java.util.concurrent.TimeUnit;
import javax.annotation.Nonnull;
import lombok.Getter;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.codahale.metrics.Meter;
import com.codahale.metrics.Timer;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigValueFactory;

import gobblin.annotation.Alpha;
import gobblin.configuration.ConfigurationKeys;
import gobblin.instrumented.Instrumented;
import gobblin.metrics.MetricContext;
import gobblin.metrics.Tag;
import gobblin.runtime.api.FlowSpec;
import gobblin.runtime.api.JobSpec;
import gobblin.runtime.api.JobTemplate;
import gobblin.runtime.api.Spec;
import gobblin.runtime.api.SpecCompiler;
import gobblin.runtime.api.SpecExecutorInstance;
import gobblin.runtime.api.SpecExecutorInstanceProducer;
import gobblin.runtime.api.SpecNotFoundException;
import gobblin.runtime.api.TopologySpec;
import gobblin.runtime.job_catalog.FSJobCatalog;
import gobblin.runtime.job_spec.ResolvedJobSpec;
import gobblin.service.ServiceConfigKeys;
import gobblin.service.ServiceMetricNames;
import gobblin.util.ConfigUtils;

/***
 * Take in a logical {@link Spec} ie flow and compile corresponding materialized job {@link Spec}
 * and its mapping to {@link SpecExecutorInstance}.
 */
@Alpha
public class IdentityFlowToJobSpecCompiler implements SpecCompiler {

    private final Map<URI, TopologySpec> topologySpecMap;
    private final Config config;
    private final Logger log;
    private final Optional<FSJobCatalog> templateCatalog;

    protected final MetricContext metricContext;
    @Getter
    private Optional<Meter> flowCompilationSuccessFulMeter;
    @Getter
    private Optional<Meter> flowCompilationFailedMeter;
    @Getter
    private Optional<Timer> flowCompilationTimer;

    public IdentityFlowToJobSpecCompiler(Config config) {
        this(config, true);
    }

    public IdentityFlowToJobSpecCompiler(Config config, boolean instrumentationEnabled) {
        this(config, Optional.<Logger>absent(), instrumentationEnabled);
    }

    public IdentityFlowToJobSpecCompiler(Config config, Optional<Logger> log) {
        this(config, log, true);
    }

    public IdentityFlowToJobSpecCompiler(Config config, Optional<Logger> log, boolean instrumentationEnabled) {
        this.log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass());
        if (instrumentationEnabled) {
            this.metricContext = Instrumented.getMetricContext(ConfigUtils.configToState(config),
                    IdentityFlowToJobSpecCompiler.class);
            this.flowCompilationSuccessFulMeter = Optional
                    .of(this.metricContext.meter(ServiceMetricNames.FLOW_COMPILATION_SUCCESSFUL_METER));
            this.flowCompilationFailedMeter = Optional
                    .of(this.metricContext.meter(ServiceMetricNames.FLOW_COMPILATION_FAILED_METER));
            this.flowCompilationTimer = Optional
                    .<Timer>of(this.metricContext.timer(ServiceMetricNames.FLOW_COMPILATION_TIMER));
        } else {
            this.metricContext = null;
            this.flowCompilationSuccessFulMeter = Optional.absent();
            this.flowCompilationFailedMeter = Optional.absent();
            this.flowCompilationTimer = Optional.absent();
        }

        this.topologySpecMap = Maps.newConcurrentMap();
        this.config = config;
        /***
         * For multi-tenancy, the following needs to be added:
         * 1. Change singular templateCatalog to Map<URI, JobCatalogWithTemplates> to support multiple templateCatalogs
         * 2. Pick templateCatalog from JobCatalogWithTemplates based on URI, and try to resolve JobSpec using that
         */
        try {
            if (this.config.hasPath(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY)
                    && StringUtils.isNotBlank(
                            this.config.getString(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY))) {
                Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY,
                        this.config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
                this.templateCatalog = Optional.of(new FSJobCatalog(templateCatalogCfg));
            } else {
                this.templateCatalog = Optional.absent();
            }
        } catch (IOException e) {
            throw new RuntimeException("Could not initialize IdentityFlowToJobSpecCompiler because of "
                    + "TemplateCatalog initialization failure", e);
        }
    }

    @Override
    public Map<Spec, SpecExecutorInstanceProducer> compileFlow(Spec spec) {
        Preconditions.checkNotNull(spec);
        Preconditions.checkArgument(spec instanceof FlowSpec,
                "IdentityFlowToJobSpecCompiler only converts FlowSpec to JobSpec");

        long startTime = System.nanoTime();
        Map<Spec, SpecExecutorInstanceProducer> specExecutorInstanceMap = Maps.newLinkedHashMap();

        FlowSpec flowSpec = (FlowSpec) spec;
        String source = flowSpec.getConfig().getString(ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY);
        String destination = flowSpec.getConfig().getString(ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY);
        log.info(String.format("Compiling flow for source: %s and destination: %s", source, destination));

        JobSpec jobSpec;
        JobSpec.Builder jobSpecBuilder = JobSpec.builder(flowSpec.getUri()).withConfig(flowSpec.getConfig())
                .withDescription(flowSpec.getDescription()).withVersion(flowSpec.getVersion());

        if (flowSpec.getTemplateURIs().isPresent() && templateCatalog.isPresent()) {
            // Only first template uri will be honored for Identity
            jobSpecBuilder = jobSpecBuilder.withTemplate(flowSpec.getTemplateURIs().get().iterator().next());
            try {
                jobSpec = new ResolvedJobSpec(jobSpecBuilder.build(), templateCatalog.get());
                log.info("Resolved JobSpec properties are: " + jobSpec.getConfigAsProperties());
            } catch (SpecNotFoundException | JobTemplate.TemplateException e) {
                throw new RuntimeException("Could not resolve template in JobSpec from TemplateCatalog", e);
            }
        } else {
            jobSpec = jobSpecBuilder.build();
            log.info("Unresolved JobSpec properties are: " + jobSpec.getConfigAsProperties());
        }

        // Remove schedule
        jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY));

        // Add job.name and job.group
        if (flowSpec.getConfig().hasPath(ConfigurationKeys.FLOW_NAME_KEY)) {
            jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_NAME_KEY,
                    flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_NAME_KEY)));
        }
        if (flowSpec.getConfig().hasPath(ConfigurationKeys.FLOW_GROUP_KEY)) {
            jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_GROUP_KEY,
                    flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_GROUP_KEY)));
        }

        // Add flow execution id for this compilation
        long flowExecutionId = System.currentTimeMillis();
        jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY,
                ConfigValueFactory.fromAnyRef(flowExecutionId)));

        // Reset properties in Spec from Config
        jobSpec.setConfigAsProperties(ConfigUtils.configToProperties(jobSpec.getConfig()));

        for (TopologySpec topologySpec : topologySpecMap.values()) {
            try {
                Map<String, String> capabilities = (Map<String, String>) topologySpec
                        .getSpecExecutorInstanceProducer().getCapabilities().get();
                for (Map.Entry<String, String> capability : capabilities.entrySet()) {
                    log.info(String.format(
                            "Evaluating current JobSpec: %s against TopologySpec: %s with "
                                    + "capability of source: %s and destination: %s ",
                            jobSpec.getUri(), topologySpec.getUri(), capability.getKey(), capability.getValue()));
                    if (source.equals(capability.getKey()) && destination.equals(capability.getValue())) {
                        specExecutorInstanceMap.put(jobSpec, topologySpec.getSpecExecutorInstanceProducer());
                        log.info(String.format(
                                "Current JobSpec: %s is executable on TopologySpec: %s. Added TopologySpec as candidate.",
                                jobSpec.getUri(), topologySpec.getUri()));

                        log.info("Since we found a candidate executor, we will not try to compute more. "
                                + "(Intended limitation for IdentityFlowToJobSpecCompiler)");
                        return specExecutorInstanceMap;
                    }
                }
            } catch (InterruptedException | ExecutionException e) {
                Instrumented.markMeter(this.flowCompilationFailedMeter);
                throw new RuntimeException("Cannot determine topology capabilities", e);
            }
        }
        Instrumented.markMeter(this.flowCompilationSuccessFulMeter);
        Instrumented.updateTimer(this.flowCompilationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);

        return specExecutorInstanceMap;
    }

    @Override
    public Map<URI, TopologySpec> getTopologySpecMap() {
        return this.topologySpecMap;
    }

    @Override
    public void onAddSpec(Spec addedSpec) {
        topologySpecMap.put(addedSpec.getUri(), (TopologySpec) addedSpec);
    }

    @Override
    public void onDeleteSpec(URI deletedSpecURI, String deletedSpecVersion) {
        topologySpecMap.remove(deletedSpecURI);
    }

    @Override
    public void onUpdateSpec(Spec updatedSpec) {
        topologySpecMap.put(updatedSpec.getUri(), (TopologySpec) updatedSpec);
    }

    @Nonnull
    @Override
    public MetricContext getMetricContext() {
        return this.metricContext;
    }

    @Override
    public boolean isInstrumentationEnabled() {
        return null != this.metricContext;
    }

    @Override
    public List<Tag<?>> generateTags(gobblin.configuration.State state) {
        return Collections.emptyList();
    }

    @Override
    public void switchMetricContext(List<Tag<?>> tags) {
        throw new UnsupportedOperationException();
    }

    @Override
    public void switchMetricContext(MetricContext context) {
        throw new UnsupportedOperationException();
    }
}