org.apache.samza.processor.StreamProcessor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.samza.processor.StreamProcessor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.samza.processor;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.commons.lang3.StringUtils;
import org.apache.samza.annotation.InterfaceStability;
import org.apache.samza.config.Config;
import org.apache.samza.config.JobCoordinatorConfig;
import org.apache.samza.config.TaskConfigJava;
import org.apache.samza.container.SamzaContainer;
import org.apache.samza.container.SamzaContainerListener;
import org.apache.samza.context.ApplicationContainerContext;
import org.apache.samza.context.ApplicationContainerContextFactory;
import org.apache.samza.context.ApplicationTaskContext;
import org.apache.samza.context.ApplicationTaskContextFactory;
import org.apache.samza.context.ExternalContext;
import org.apache.samza.context.JobContextImpl;
import org.apache.samza.coordinator.JobCoordinator;
import org.apache.samza.coordinator.JobCoordinatorFactory;
import org.apache.samza.coordinator.JobCoordinatorListener;
import org.apache.samza.job.model.JobModel;
import org.apache.samza.metrics.MetricsRegistryMap;
import org.apache.samza.metrics.MetricsReporter;
import org.apache.samza.runtime.ProcessorLifecycleListener;
import org.apache.samza.task.TaskFactory;
import org.apache.samza.util.ScalaJavaUtil;
import org.apache.samza.util.Util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;

/**
 * StreamProcessor can be embedded in any application or executed in a distributed environment (aka cluster) as an
 * independent process.
 *
 * <p>
 *
 * <b>Note</b>: A single JVM can create multiple StreamProcessor instances. It is safe to create StreamProcessor instances in
 * multiple threads. This class is thread safe.
 *
 * </p>
 *
 * <pre>
 * A StreamProcessor could be in any one of the following states:
 * NEW, STARTED, IN_REBALANCE, RUNNING, STOPPING, STOPPED.
 *
 * Describes the valid state transitions of the {@link StreamProcessor}.
 *
 *
 *                                                                                                   
 *                                                                                                                                 
 *                                                                                                                                 
 *                                                                                                                                 
 *                                                                                                                                 
 *     New                                StreamProcessor.start()          Rebalance triggered      V        Receives JobModel      
 *  StreamProcessor    NEW  STARTED  IN_REBALANCE  RUNNING
 *   Creation                                                            by group leader               and starts Container      
 *                                                                                                                               
 *                             StreamProcessor.stop()           StreamProcessor.stop()        StreamProcessor.stop()         StreamProcessor.stop()
 *                                                                                                                               
 *                                                                                                                               
 *                                                                                                                               
 *                                 V                                 V                              V                               V
 *                                   STOPPING D
 *                                                                  
 *                                                                  
 *                                            After JobCoordinator and SamzaContainer had shutdown.
 *                                                                  
 *                                                                  V
 *                                                                 STOPPED
 *
 * </pre>
 */
@InterfaceStability.Evolving
public class StreamProcessor {
    private static final Logger LOGGER = LoggerFactory.getLogger(StreamProcessor.class);
    private static final String CONTAINER_THREAD_NAME_FORMAT = "Samza StreamProcessor Container Thread-%d";

    private final JobCoordinator jobCoordinator;
    private final ProcessorLifecycleListener processorListener;
    private final TaskFactory taskFactory;
    /**
     * Type parameter needs to be {@link ApplicationContainerContext} so that we can eventually call the base methods of
     * the context object.
     */
    private final Optional<ApplicationContainerContextFactory<ApplicationContainerContext>> applicationDefinedContainerContextFactoryOptional;
    /**
     * Type parameter needs to be {@link ApplicationTaskContext} so that we can eventually call the base methods of the
     * context object.
     */
    private final Optional<ApplicationTaskContextFactory<ApplicationTaskContext>> applicationDefinedTaskContextFactoryOptional;
    private final Optional<ExternalContext> externalContextOptional;
    private final Map<String, MetricsReporter> customMetricsReporter;
    private final Config config;
    private final long taskShutdownMs;
    private final String processorId;
    private final ExecutorService containerExcecutorService;
    private final Object lock = new Object();
    private final MetricsRegistryMap metricsRegistry;

    private volatile Throwable containerException = null;

    volatile CountDownLatch containerShutdownLatch = new CountDownLatch(1);

    /**
     * Indicates the current status of a {@link StreamProcessor}.
     */
    public enum State {
        STARTED("STARTED"), RUNNING("RUNNING"), STOPPING("STOPPING"), STOPPED("STOPPED"), NEW("NEW"), IN_REBALANCE(
                "IN_REBALANCE");

        private String strVal;

        State(String strVal) {
            this.strVal = strVal;
        }

        @Override
        public String toString() {
            return strVal;
        }
    }

    /**
     * @return the current state of StreamProcessor.
     */
    public State getState() {
        return state;
    }

    @VisibleForTesting
    State state = State.NEW;

    @VisibleForTesting
    SamzaContainer container = null;

    @VisibleForTesting
    JobCoordinatorListener jobCoordinatorListener = null;

    /**
     * Same as {@link #StreamProcessor(String, Config, Map, TaskFactory, ProcessorLifecycleListener, JobCoordinator)}, except
     * it creates a {@link JobCoordinator} instead of accepting it as an argument.
     *
     * @param processorId a unique logical identifier assigned to the stream processor.
     * @param config configuration required to launch {@link JobCoordinator} and {@link SamzaContainer}.
     * @param customMetricsReporters registered with the metrics system to report metrics.
     * @param taskFactory the task factory to instantiate the Task.
     * @param processorListener listener to the StreamProcessor life cycle.
     *
     * Deprecated: Use {@link #StreamProcessor(String, Config, Map, TaskFactory, Optional, Optional, Optional,
     * StreamProcessorLifecycleListenerFactory, JobCoordinator)} instead.
     */
    @Deprecated
    public StreamProcessor(String processorId, Config config, Map<String, MetricsReporter> customMetricsReporters,
            TaskFactory taskFactory, ProcessorLifecycleListener processorListener) {
        this(processorId, config, customMetricsReporters, taskFactory, processorListener, null);
    }

    /**
     * Same as {@link #StreamProcessor(String, Config, Map, TaskFactory, Optional, Optional, Optional,
     * StreamProcessorLifecycleListenerFactory, JobCoordinator)}, with the following differences:
     * <ol>
     *   <li>Passes null for application-defined context factories</li>
     *   <li>Accepts a {@link ProcessorLifecycleListener} directly instead of a
     *   {@link StreamProcessorLifecycleListenerFactory}</li>
     * </ol>
     *
     * @param processorId a unique logical identifier assigned to the stream processor.
     * @param config configuration required to launch {@link JobCoordinator} and {@link SamzaContainer}
     * @param customMetricsReporters registered with the metrics system to report metrics
     * @param taskFactory task factory to instantiate the Task
     * @param processorListener listener to the StreamProcessor life cycle
     * @param jobCoordinator the instance of {@link JobCoordinator}
     *
     * Deprecated: Use {@link #StreamProcessor(String, Config, Map, TaskFactory, Optional, Optional, Optional,
     * StreamProcessorLifecycleListenerFactory, JobCoordinator)} instead.
     */
    @Deprecated
    public StreamProcessor(String processorId, Config config, Map<String, MetricsReporter> customMetricsReporters,
            TaskFactory taskFactory, ProcessorLifecycleListener processorListener, JobCoordinator jobCoordinator) {
        this(processorId, config, customMetricsReporters, taskFactory, Optional.empty(), Optional.empty(),
                Optional.empty(), sp -> processorListener, jobCoordinator);
    }

    /**
     * Builds a {@link StreamProcessor} with full specification of processing components.
     *
     * @param processorId a unique logical identifier assigned to the stream processor.
     * @param config configuration required to launch {@link JobCoordinator} and {@link SamzaContainer}
     * @param customMetricsReporters registered with the metrics system to report metrics
     * @param taskFactory task factory to instantiate the Task
     * @param applicationDefinedContainerContextFactoryOptional optional factory for application-defined container context
     * @param applicationDefinedTaskContextFactoryOptional optional factory for application-defined task context
     * @param externalContextOptional optional {@link ExternalContext} to pass through to the application
     * @param listenerFactory factory for creating a listener to the StreamProcessor life cycle
     * @param jobCoordinator the instance of {@link JobCoordinator}
     */
    public StreamProcessor(String processorId, Config config, Map<String, MetricsReporter> customMetricsReporters,
            TaskFactory taskFactory,
            Optional<ApplicationContainerContextFactory<ApplicationContainerContext>> applicationDefinedContainerContextFactoryOptional,
            Optional<ApplicationTaskContextFactory<ApplicationTaskContext>> applicationDefinedTaskContextFactoryOptional,
            Optional<ExternalContext> externalContextOptional,
            StreamProcessorLifecycleListenerFactory listenerFactory, JobCoordinator jobCoordinator) {
        Preconditions.checkNotNull(listenerFactory, "StreamProcessorListenerFactory cannot be null.");
        Preconditions.checkArgument(StringUtils.isNotBlank(processorId), "ProcessorId cannot be null.");
        this.config = config;
        this.processorId = processorId;
        this.metricsRegistry = new MetricsRegistryMap();
        this.customMetricsReporter = customMetricsReporters;
        for (MetricsReporter metricsReporter : customMetricsReporter.values()) {
            metricsReporter.register("StreamProcessor", metricsRegistry);
        }
        this.taskFactory = taskFactory;
        this.applicationDefinedContainerContextFactoryOptional = applicationDefinedContainerContextFactoryOptional;
        this.applicationDefinedTaskContextFactoryOptional = applicationDefinedTaskContextFactoryOptional;
        this.externalContextOptional = externalContextOptional;
        this.taskShutdownMs = new TaskConfigJava(config).getShutdownMs();
        this.jobCoordinator = (jobCoordinator != null) ? jobCoordinator : createJobCoordinator();
        this.jobCoordinatorListener = createJobCoordinatorListener();
        this.jobCoordinator.setListener(jobCoordinatorListener);
        ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat(CONTAINER_THREAD_NAME_FORMAT)
                .setDaemon(true).build();
        this.containerExcecutorService = Executors.newSingleThreadExecutor(threadFactory);
        this.processorListener = listenerFactory.createInstance(this);
    }

    /**
     * Asynchronously starts this {@link StreamProcessor}.
     * <p>
     *   <b>Implementation</b>:
     *   Starts the {@link JobCoordinator}, which will eventually start the {@link SamzaContainer} when a new
     *   {@link JobModel} is available.
     * </p>
     */
    public void start() {
        synchronized (lock) {
            if (state == State.NEW) {
                processorListener.beforeStart();
                state = State.STARTED;
                jobCoordinator.start();
            } else {
                LOGGER.info("Start is no-op, since the current state is {} and not {}.", state, State.NEW);
            }
        }
    }

    /**
     * <p>
     * Asynchronously stops the {@link StreamProcessor}'s running components - {@link SamzaContainer}
     * and {@link JobCoordinator}
     * </p>
     * Here're the ways which can stop the StreamProcessor:
     * <ol>
     *   <li>Caller of StreamProcessor invokes stop()</li>
     *   <li>Samza Container completes processing (eg. bounded input) and shuts down</li>
     *   <li>Samza Container fails</li>
     *   <li>Job Coordinator fails</li>
     * </ol>
     * When either container or coordinator stops (cleanly or due to exception), it will try to shutdown the
     * StreamProcessor. This needs to be synchronized so that only one code path gets triggered for shutdown.
     * <br>
     * If container is running,
     * <ol>
     *   <li>container is shutdown cleanly and {@link SamzaContainerListener#afterStop()} will trigger
     *   {@link JobCoordinator#stop()}</li>
     *   <li>container fails to shutdown cleanly and {@link SamzaContainerListener#afterFailure(Throwable)} will
     *   trigger {@link JobCoordinator#stop()}</li>
     * </ol>
     * If container is not running, then this method will simply shutdown the {@link JobCoordinator}.
     *
     */
    public void stop() {
        synchronized (lock) {
            LOGGER.info("Stopping the stream processor: {}.", processorId);
            if (state != State.STOPPING && state != State.STOPPED) {
                state = State.STOPPING;
                try {
                    LOGGER.info("Shutting down the container: {} of stream processor: {}.", container, processorId);
                    boolean hasContainerShutdown = stopSamzaContainer();
                    if (!hasContainerShutdown) {
                        LOGGER.info("Interrupting the container: {} thread to die.", container);
                        containerExcecutorService.shutdownNow();
                    }
                } catch (Throwable throwable) {
                    LOGGER.error(
                            String.format("Exception occurred on container: %s shutdown of stream processor: %s.",
                                    container, processorId),
                            throwable);
                }
                LOGGER.info("Shutting down JobCoordinator of stream processor: {}.", processorId);
                jobCoordinator.stop();
            } else {
                LOGGER.info("StreamProcessor state is: {}. Ignoring the stop.", state);
            }
        }
    }

    @VisibleForTesting
    JobCoordinator getCurrentJobCoordinator() {
        return jobCoordinator;
    }

    @VisibleForTesting
    SamzaContainer getContainer() {
        return container;
    }

    @VisibleForTesting
    SamzaContainer createSamzaContainer(String processorId, JobModel jobModel) {
        return SamzaContainer.apply(processorId, jobModel, ScalaJavaUtil.toScalaMap(this.customMetricsReporter),
                this.taskFactory, JobContextImpl.fromConfigWithDefaults(this.config),
                Option.apply(this.applicationDefinedContainerContextFactoryOptional.orElse(null)),
                Option.apply(this.applicationDefinedTaskContextFactoryOptional.orElse(null)),
                Option.apply(this.externalContextOptional.orElse(null)), null, null);
    }

    private JobCoordinator createJobCoordinator() {
        String jobCoordinatorFactoryClassName = new JobCoordinatorConfig(config)
                .getJobCoordinatorFactoryClassName();
        return Util.getObj(jobCoordinatorFactoryClassName, JobCoordinatorFactory.class)
                .getJobCoordinator(processorId, config, metricsRegistry);
    }

    /**
     * Stops the {@link SamzaContainer}.
     * @return true if {@link SamzaContainer} had shutdown within task.shutdown.ms. false otherwise.
     */
    private boolean stopSamzaContainer() {
        boolean hasContainerShutdown = true;
        if (container != null) {
            try {
                container.shutdown();
                LOGGER.info("Waiting {} ms for the container: {} to shutdown.", taskShutdownMs, container);
                hasContainerShutdown = containerShutdownLatch.await(taskShutdownMs, TimeUnit.MILLISECONDS);
            } catch (InterruptedException e) {
                LOGGER.error("Exception occurred when shutting down the container: {}.", container, e);
                hasContainerShutdown = false;
                if (containerException != null) {
                    containerException = e;
                }
            }
            LOGGER.info(String.format("Shutdown status of container: %s for stream processor: %s is: %b.",
                    container, processorId, hasContainerShutdown));
        }

        // We want to propagate TimeoutException when container shutdown times out. It is possible that the timeout exception
        // we propagate to the application runner maybe overwritten by container failure cause in case of interleaved execution.
        // It is acceptable since container exception is much more useful compared to timeout exception.
        // We can infer from the logs about the fact that container shutdown timed out or not for additional inference.
        if (!hasContainerShutdown && containerException == null) {
            containerException = new TimeoutException(
                    "Container shutdown timed out after " + taskShutdownMs + " ms.");
        }

        return hasContainerShutdown;
    }

    private JobCoordinatorListener createJobCoordinatorListener() {
        return new JobCoordinatorListener() {

            @Override
            public void onJobModelExpired() {
                synchronized (lock) {
                    if (state == State.STARTED || state == State.RUNNING) {
                        state = State.IN_REBALANCE;
                        LOGGER.info("Job model expired. Shutting down the container: {} of stream processor: {}.",
                                container, processorId);
                        boolean hasContainerShutdown = stopSamzaContainer();
                        if (!hasContainerShutdown) {
                            LOGGER.warn(
                                    "Container: {} shutdown was unsuccessful. Stopping the stream processor: {}.",
                                    container, processorId);
                            state = State.STOPPING;
                            jobCoordinator.stop();
                        } else {
                            LOGGER.info("Container: {} shutdown completed for stream processor: {}.", container,
                                    processorId);
                        }
                    } else {
                        LOGGER.info(
                                "Ignoring onJobModelExpired invocation since the current state is {} and not in {}.",
                                state, ImmutableList.of(State.RUNNING, State.STARTED));
                    }
                }
            }

            @Override
            public void onNewJobModel(String processorId, JobModel jobModel) {
                synchronized (lock) {
                    if (state == State.IN_REBALANCE) {
                        containerShutdownLatch = new CountDownLatch(1);
                        container = createSamzaContainer(processorId, jobModel);
                        container.setContainerListener(new ContainerListener());
                        LOGGER.info("Starting the container: {} for the stream processor: {}.", container,
                                processorId);
                        containerExcecutorService.submit(container);
                    } else {
                        LOGGER.info("Ignoring onNewJobModel invocation since the current state is {} and not {}.",
                                state, State.IN_REBALANCE);
                    }
                }
            }

            @Override
            public void onCoordinatorStop() {
                synchronized (lock) {
                    LOGGER.info("Shutting down the executor service of the stream processor: {}.", processorId);
                    boolean hasContainerShutdown = stopSamzaContainer();

                    // we only want to interrupt when container shutdown times out.
                    if (!hasContainerShutdown) {
                        containerExcecutorService.shutdownNow();
                    }
                    state = State.STOPPED;
                }
                if (containerException != null)
                    processorListener.afterFailure(containerException);
                else
                    processorListener.afterStop();

            }

            @Override
            public void onCoordinatorFailure(Throwable throwable) {
                synchronized (lock) {
                    LOGGER.info(String.format(
                            "Coordinator: %s failed with an exception. Stopping the stream processor: %s. Original exception:",
                            jobCoordinator, processorId), throwable);
                    boolean hasContainerShutdown = stopSamzaContainer();

                    // we only want to interrupt when container shutdown times out.
                    if (!hasContainerShutdown) {
                        containerExcecutorService.shutdownNow();
                    }
                    state = State.STOPPED;
                }
                processorListener.afterFailure(throwable);
            }
        };
    }

    /**
     * Interface to create a {@link ProcessorLifecycleListener}
     */
    @FunctionalInterface
    public interface StreamProcessorLifecycleListenerFactory {
        ProcessorLifecycleListener createInstance(StreamProcessor processor);
    }

    class ContainerListener implements SamzaContainerListener {

        private boolean processorOnStartCalled = false;

        @Override
        public void beforeStart() {
            // processorListener.beforeStart() is invoked in StreamProcessor.start()
        }

        @Override
        public void afterStart() {
            LOGGER.warn("Received container start notification for container: {} in stream processor: {}.",
                    container, processorId);
            if (!processorOnStartCalled) {
                processorListener.afterStart();
                processorOnStartCalled = true;
            }
            state = State.RUNNING;
        }

        @Override
        public void afterStop() {
            containerShutdownLatch.countDown();

            synchronized (lock) {
                if (state == State.IN_REBALANCE) {
                    LOGGER.info("Container: {} of the stream processor: {} was stopped by the JobCoordinator.",
                            container, processorId);
                } else {
                    LOGGER.info("Container: {} stopped. Stopping the stream processor: {}.", container,
                            processorId);
                    state = State.STOPPING;
                    jobCoordinator.stop();
                }
            }
        }

        @Override
        public void afterFailure(Throwable t) {
            containerException = t;
            containerShutdownLatch.countDown();

            synchronized (lock) {
                LOGGER.error(String.format(
                        "Container: %s failed with an exception. Stopping the stream processor: %s. Original exception:",
                        container, processorId), t);
                state = State.STOPPING;
                jobCoordinator.stop();
            }
        }
    }
}