Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.gobblin.service.modules.scheduler; import java.net.URI; import java.util.Collection; import java.util.Map; import java.util.Properties; import java.util.concurrent.Callable; import java.util.concurrent.Future; import org.apache.commons.lang.StringUtils; import org.apache.helix.HelixManager; import org.quartz.DisallowConcurrentExecution; import org.quartz.InterruptableJob; import org.quartz.JobDataMap; import org.quartz.JobExecutionContext; import org.quartz.JobExecutionException; import org.quartz.UnableToInterruptJobException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; import com.google.common.collect.Maps; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.gobblin.annotation.Alpha; import org.apache.gobblin.configuration.ConfigurationKeys; import org.apache.gobblin.runtime.JobException; import org.apache.gobblin.runtime.api.FlowSpec; import org.apache.gobblin.runtime.api.Spec; import org.apache.gobblin.runtime.api.SpecCatalogListener; import org.apache.gobblin.runtime.listeners.JobListener; import org.apache.gobblin.runtime.spec_catalog.FlowCatalog; import org.apache.gobblin.runtime.spec_catalog.TopologyCatalog; import org.apache.gobblin.scheduler.BaseGobblinJob; import org.apache.gobblin.scheduler.JobScheduler; import org.apache.gobblin.scheduler.SchedulerService; import org.apache.gobblin.service.ServiceConfigKeys; import org.apache.gobblin.service.modules.orchestration.Orchestrator; import org.apache.gobblin.util.ConfigUtils; import org.apache.gobblin.util.PropertiesUtils; /** * An extension to {@link JobScheduler} that is also a {@link SpecCatalogListener}. * {@link GobblinServiceJobScheduler} listens for new / updated {@link FlowSpec} and schedules * and runs them via {@link Orchestrator}. */ @Alpha public class GobblinServiceJobScheduler extends JobScheduler implements SpecCatalogListener { protected final Logger _log; protected final Optional<FlowCatalog> flowCatalog; protected final Optional<HelixManager> helixManager; protected final Orchestrator orchestrator; @Getter protected final Map<String, Spec> scheduledFlowSpecs; @Getter private volatile boolean isActive; private String serviceName; public GobblinServiceJobScheduler(String serviceName, Config config, Optional<HelixManager> helixManager, Optional<FlowCatalog> flowCatalog, Optional<TopologyCatalog> topologyCatalog, Orchestrator orchestrator, SchedulerService schedulerService, Optional<Logger> log) throws Exception { super(ConfigUtils.configToProperties(config), schedulerService); _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); this.serviceName = serviceName; this.flowCatalog = flowCatalog; this.helixManager = helixManager; this.orchestrator = orchestrator; this.scheduledFlowSpecs = Maps.newHashMap(); } public GobblinServiceJobScheduler(String serviceName, Config config, Optional<HelixManager> helixManager, Optional<FlowCatalog> flowCatalog, Optional<TopologyCatalog> topologyCatalog, SchedulerService schedulerService, Optional<Logger> log) throws Exception { this(serviceName, config, helixManager, flowCatalog, topologyCatalog, new Orchestrator(config, topologyCatalog, log), schedulerService, log); } public synchronized void setActive(boolean isActive) { if (this.isActive == isActive) { // No-op if already in correct state return; } // Since we are going to change status to isActive=true, schedule all flows if (isActive) { // Need to set active=true first; otherwise in the onAddSpec(), node will forward specs to active node, which is itself. this.isActive = isActive; if (this.flowCatalog.isPresent()) { Collection<Spec> specs = this.flowCatalog.get().getSpecsWithTimeUpdate(); for (Spec spec : specs) { //Disable FLOW_RUN_IMMEDIATELY on service startup or leadership change if (spec instanceof FlowSpec) { Spec modifiedSpec = disableFlowRunImmediatelyOnStart((FlowSpec) spec); onAddSpec(modifiedSpec); } else { onAddSpec(spec); } } } } // Since we are going to change status to isActive=false, unschedule all flows else { for (Spec spec : this.scheduledFlowSpecs.values()) { onDeleteSpec(spec.getUri(), spec.getVersion()); } // Need to set active=false at the end; otherwise in the onDeleteSpec(), node will forward specs to active node, which is itself. this.isActive = isActive; } } @VisibleForTesting protected static Spec disableFlowRunImmediatelyOnStart(FlowSpec spec) { Properties properties = spec.getConfigAsProperties(); properties.setProperty(ConfigurationKeys.FLOW_RUN_IMMEDIATELY, "false"); Config config = ConfigFactory.parseProperties(properties); FlowSpec flowSpec = new FlowSpec(spec.getUri(), spec.getVersion(), spec.getDescription(), config, properties, spec.getTemplateURIs(), spec.getChildSpecs()); return flowSpec; } @Override protected void startUp() throws Exception { super.startUp(); } /** * Synchronize the job scheduling because the same flowSpec can be scheduled by different threads. */ @Override public synchronized void scheduleJob(Properties jobProps, JobListener jobListener) throws JobException { Map<String, Object> additionalJobDataMap = Maps.newHashMap(); additionalJobDataMap.put(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWSPEC, this.scheduledFlowSpecs.get(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY))); try { scheduleJob(jobProps, jobListener, additionalJobDataMap, GobblinServiceJob.class); } catch (Exception e) { throw new JobException("Failed to schedule job " + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), e); } } @Override public void runJob(Properties jobProps, JobListener jobListener) throws JobException { try { Spec flowSpec = this.scheduledFlowSpecs.get(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)); this.orchestrator.orchestrate(flowSpec); } catch (Exception e) { throw new JobException("Failed to run Spec: " + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), e); } } /** {@inheritDoc} */ @Override public void onAddSpec(Spec addedSpec) { if (this.helixManager.isPresent() && !this.helixManager.get().isConnected()) { // Specs in store will be notified when Scheduler is added as listener to FlowCatalog, so ignore // .. Specs if in cluster mode and Helix is not yet initialized _log.info("System not yet initialized. Skipping Spec Addition: " + addedSpec); return; } _log.info("New Flow Spec detected: " + addedSpec); if (addedSpec instanceof FlowSpec) { try { FlowSpec flowSpec = (FlowSpec) addedSpec; Properties jobConfig = new Properties(); Properties flowSpecProperties = ((FlowSpec) addedSpec).getConfigAsProperties(); jobConfig.putAll(this.properties); jobConfig.setProperty(ConfigurationKeys.JOB_NAME_KEY, addedSpec.getUri().toString()); jobConfig.setProperty(ConfigurationKeys.JOB_GROUP_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_GROUP_KEY).toString()); jobConfig.setProperty(ConfigurationKeys.FLOW_RUN_IMMEDIATELY, ConfigUtils .getString((flowSpec).getConfig(), ConfigurationKeys.FLOW_RUN_IMMEDIATELY, "false")); if (flowSpecProperties.containsKey(ConfigurationKeys.JOB_SCHEDULE_KEY) && StringUtils .isNotBlank(flowSpecProperties.getProperty(ConfigurationKeys.JOB_SCHEDULE_KEY))) { jobConfig.setProperty(ConfigurationKeys.JOB_SCHEDULE_KEY, flowSpecProperties.getProperty(ConfigurationKeys.JOB_SCHEDULE_KEY)); } this.scheduledFlowSpecs.put(addedSpec.getUri().toString(), addedSpec); if (jobConfig.containsKey(ConfigurationKeys.JOB_SCHEDULE_KEY)) { _log.info("{} Scheduling flow spec: {} ", this.serviceName, addedSpec); scheduleJob(jobConfig, null); if (PropertiesUtils.getPropAsBoolean(jobConfig, ConfigurationKeys.FLOW_RUN_IMMEDIATELY, "false")) { _log.info("RunImmediately requested, hence executing FlowSpec: " + addedSpec); this.jobExecutor .execute(new NonScheduledJobRunner(flowSpec.getUri(), false, jobConfig, null)); } } else { _log.info("No FlowSpec schedule found, so running FlowSpec: " + addedSpec); this.jobExecutor.execute(new NonScheduledJobRunner(flowSpec.getUri(), true, jobConfig, null)); } } catch (JobException je) { _log.error("{} Failed to schedule or run FlowSpec {}", serviceName, addedSpec, je); } } } public void onDeleteSpec(URI deletedSpecURI, String deletedSpecVersion) { onDeleteSpec(deletedSpecURI, deletedSpecVersion, new Properties()); } /** {@inheritDoc} */ @Override public void onDeleteSpec(URI deletedSpecURI, String deletedSpecVersion, Properties headers) { if (this.helixManager.isPresent() && !this.helixManager.get().isConnected()) { // Specs in store will be notified when Scheduler is added as listener to FlowCatalog, so ignore // .. Specs if in cluster mode and Helix is not yet initialized _log.info("System not yet initialized. Skipping Spec Deletion: " + deletedSpecURI); return; } _log.info("Spec deletion detected: " + deletedSpecURI + "/" + deletedSpecVersion); try { Spec deletedSpec = this.scheduledFlowSpecs.get(deletedSpecURI.toString()); if (null != deletedSpec) { this.orchestrator.remove(deletedSpec, headers); this.scheduledFlowSpecs.remove(deletedSpecURI.toString()); unscheduleJob(deletedSpecURI.toString()); } else { _log.warn(String .format("Spec with URI: %s was not found in cache. May be it was cleaned, if not please " + "clean it manually", deletedSpecURI)); } } catch (JobException e) { _log.warn(String.format("Spec with URI: %s was not unscheduled cleaning", deletedSpecURI), e); } } /** {@inheritDoc} */ @Override public void onUpdateSpec(Spec updatedSpec) { if (this.helixManager.isPresent() && !this.helixManager.get().isConnected()) { // Specs in store will be notified when Scheduler is added as listener to FlowCatalog, so ignore // .. Specs if in cluster mode and Helix is not yet initialized _log.info("System not yet initialized. Skipping Spec Update: " + updatedSpec); return; } _log.info("Spec changed: " + updatedSpec); if (!(updatedSpec instanceof FlowSpec)) { return; } try { onDeleteSpec(updatedSpec.getUri(), updatedSpec.getVersion()); } catch (Exception e) { _log.error("Failed to update Spec: " + updatedSpec, e); } try { onAddSpec(updatedSpec); } catch (Exception e) { _log.error("Failed to update Spec: " + updatedSpec, e); } } /** * A Gobblin job to be scheduled. */ @DisallowConcurrentExecution @Slf4j public static class GobblinServiceJob extends BaseGobblinJob implements InterruptableJob { private static final Logger _log = LoggerFactory.getLogger(GobblinServiceJob.class); @Override public void executeImpl(JobExecutionContext context) throws JobExecutionException { _log.info("Starting FlowSpec " + context.getJobDetail().getKey()); JobDataMap dataMap = context.getJobDetail().getJobDataMap(); JobScheduler jobScheduler = (JobScheduler) dataMap.get(JOB_SCHEDULER_KEY); Properties jobProps = (Properties) dataMap.get(PROPERTIES_KEY); JobListener jobListener = (JobListener) dataMap.get(JOB_LISTENER_KEY); try { jobScheduler.runJob(jobProps, jobListener); } catch (Throwable t) { throw new JobExecutionException(t); } } @Override public void interrupt() throws UnableToInterruptJobException { log.info("Job was interrupted"); } } /** * This class is responsible for running non-scheduled jobs. */ class NonScheduledJobRunner implements Runnable { private final URI specUri; private final Properties jobConfig; private final JobListener jobListener; private final boolean removeSpec; public NonScheduledJobRunner(URI uri, boolean removeSpec, Properties jobConfig, JobListener jobListener) { this.specUri = uri; this.jobConfig = jobConfig; this.jobListener = jobListener; this.removeSpec = removeSpec; } @Override public void run() { try { GobblinServiceJobScheduler.this.runJob(this.jobConfig, this.jobListener); if (flowCatalog.isPresent() && removeSpec) { GobblinServiceJobScheduler.this.flowCatalog.get().remove(specUri, new Properties(), false); } } catch (JobException je) { _log.error("Failed to run job " + this.jobConfig.getProperty(ConfigurationKeys.JOB_NAME_KEY), je); } } } }