Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.storm.daemon.worker; import java.io.File; import java.io.IOException; import java.nio.charset.Charset; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.security.auth.Subject; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.ObjectUtils; import org.apache.storm.Config; import org.apache.storm.cluster.ClusterStateContext; import org.apache.storm.cluster.ClusterUtils; import org.apache.storm.cluster.DaemonType; import org.apache.storm.cluster.IStateStorage; import org.apache.storm.cluster.IStormClusterState; import org.apache.storm.daemon.DaemonCommon; import org.apache.storm.daemon.Shutdownable; import org.apache.storm.daemon.StormCommon; import org.apache.storm.executor.Executor; import org.apache.storm.executor.ExecutorShutdown; import org.apache.storm.executor.IRunningExecutor; import org.apache.storm.executor.LocalExecutor; import org.apache.storm.generated.Credentials; import org.apache.storm.generated.ExecutorInfo; import org.apache.storm.generated.ExecutorStats; import org.apache.storm.generated.LSWorkerHeartbeat; import org.apache.storm.generated.LogConfig; import org.apache.storm.messaging.IConnection; import org.apache.storm.messaging.IContext; import org.apache.storm.messaging.TaskMessage; import org.apache.storm.security.auth.AuthUtils; import org.apache.storm.security.auth.IAutoCredentials; import org.apache.storm.stats.StatsUtil; import org.apache.storm.utils.ConfigUtils; import org.apache.storm.utils.Utils; import org.apache.storm.utils.DisruptorBackpressureCallback; import org.apache.storm.utils.LocalState; import org.apache.storm.utils.ObjectReader; import org.apache.storm.utils.Time; import org.apache.storm.utils.WorkerBackpressureCallback; import org.apache.storm.utils.WorkerBackpressureThread; import org.apache.zookeeper.data.ACL; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; import com.lmax.disruptor.EventHandler; import uk.org.lidalia.sysoutslf4j.context.SysOutOverSLF4J; public class Worker implements Shutdownable, DaemonCommon { private static final Logger LOG = LoggerFactory.getLogger(Worker.class); private static final Pattern BLOB_VERSION_EXTRACTION = Pattern.compile(".*\\.([0-9]+)$"); private final Map<String, Object> conf; private final IContext context; private final String topologyId; private final String assignmentId; private final int port; private final String workerId; private final LogConfigManager logConfigManager; private WorkerState workerState; private AtomicReference<List<IRunningExecutor>> executorsAtom; private Thread transferThread; private WorkerBackpressureThread backpressureThread; // How long until the backpressure znode is invalid. private long backpressureZnodeTimeoutMs; private AtomicReference<Credentials> credentialsAtom; private Subject subject; private Collection<IAutoCredentials> autoCreds; /** * TODO: should worker even take the topologyId as input? this should be * deducible from cluster state (by searching through assignments) * what about if there's inconsistency in assignments? -> but nimbus should guarantee this consistency * * @param conf - Storm configuration * @param context - * @param topologyId - topology id * @param assignmentId - assignment id * @param port - port on which the worker runs * @param workerId - worker id */ public Worker(Map<String, Object> conf, IContext context, String topologyId, String assignmentId, int port, String workerId) { this.conf = conf; this.context = context; this.topologyId = topologyId; this.assignmentId = assignmentId; this.port = port; this.workerId = workerId; this.logConfigManager = new LogConfigManager(); } public void start() throws Exception { LOG.info("Launching worker for {} on {}:{} with id {} and conf {}", topologyId, assignmentId, port, workerId, conf); // because in local mode, its not a separate // process. supervisor will register it in this case // if ConfigUtils.isLocalMode(conf) returns false then it is in distributed mode. if (!ConfigUtils.isLocalMode(conf)) { // Distributed mode SysOutOverSLF4J.sendSystemOutAndErrToSLF4J(); String pid = Utils.processPid(); FileUtils.touch(new File(ConfigUtils.workerPidPath(conf, workerId, pid))); FileUtils.writeStringToFile(new File(ConfigUtils.workerArtifactsPidPath(conf, topologyId, port)), pid, Charset.forName("UTF-8")); } final Map<String, Object> topologyConf = ConfigUtils .overrideLoginConfigWithSystemProperty(ConfigUtils.readSupervisorStormConf(conf, topologyId)); List<ACL> acls = Utils.getWorkerACL(topologyConf); IStateStorage stateStorage = ClusterUtils.mkStateStorage(conf, topologyConf, acls, new ClusterStateContext(DaemonType.WORKER)); IStormClusterState stormClusterState = ClusterUtils.mkStormClusterState(stateStorage, acls, new ClusterStateContext()); Credentials initialCredentials = stormClusterState.credentials(topologyId, null); Map<String, String> initCreds = new HashMap<>(); if (initialCredentials != null) { initCreds.putAll(initialCredentials.get_creds()); } autoCreds = AuthUtils.GetAutoCredentials(topologyConf); subject = AuthUtils.populateSubject(null, autoCreds, initCreds); backpressureZnodeTimeoutMs = ObjectReader.getInt(topologyConf.get(Config.BACKPRESSURE_ZNODE_TIMEOUT_SECS)) * 1000; Subject.doAs(subject, new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { workerState = new WorkerState(conf, context, topologyId, assignmentId, port, workerId, topologyConf, stateStorage, stormClusterState); // Heartbeat here so that worker process dies if this fails // it's important that worker heartbeat to supervisor ASAP so that supervisor knows // that worker is running and moves on doHeartBeat(); executorsAtom = new AtomicReference<>(null); // launch heartbeat threads immediately so that slow-loading tasks don't cause the worker to timeout // to the supervisor workerState.heartbeatTimer.scheduleRecurring(0, (Integer) conf.get(Config.WORKER_HEARTBEAT_FREQUENCY_SECS), () -> { try { doHeartBeat(); } catch (IOException e) { throw new RuntimeException(e); } }); workerState.executorHeartbeatTimer.scheduleRecurring(0, (Integer) conf.get(Config.WORKER_HEARTBEAT_FREQUENCY_SECS), Worker.this::doExecutorHeartbeats); workerState.registerCallbacks(); workerState.refreshConnections(null); workerState.activateWorkerWhenAllConnectionsReady(); workerState.refreshStormActive(null); workerState.runWorkerStartHooks(); List<IRunningExecutor> newExecutors = new ArrayList<IRunningExecutor>(); for (List<Long> e : workerState.getExecutors()) { if (ConfigUtils.isLocalMode(topologyConf)) { newExecutors.add(LocalExecutor.mkExecutor(workerState, e, initCreds).execute()); } else { newExecutors.add(Executor.mkExecutor(workerState, e, initCreds).execute()); } } executorsAtom.set(newExecutors); EventHandler<Object> tupleHandler = (packets, seqId, batchEnd) -> workerState .sendTuplesToRemoteWorker((HashMap<Integer, ArrayList<TaskMessage>>) packets, seqId, batchEnd); // This thread will publish the messages destined for remote tasks to remote connections transferThread = Utils.asyncLoop(() -> { workerState.transferQueue.consumeBatchWhenAvailable(tupleHandler); return 0L; }); DisruptorBackpressureCallback disruptorBackpressureHandler = mkDisruptorBackpressureHandler( workerState); workerState.transferQueue.registerBackpressureCallback(disruptorBackpressureHandler); workerState.transferQueue .setEnableBackpressure((Boolean) topologyConf.get(Config.TOPOLOGY_BACKPRESSURE_ENABLE)); workerState.transferQueue.setHighWaterMark( ObjectReader.getDouble(topologyConf.get(Config.BACKPRESSURE_DISRUPTOR_HIGH_WATERMARK))); workerState.transferQueue.setLowWaterMark( ObjectReader.getDouble(topologyConf.get(Config.BACKPRESSURE_DISRUPTOR_LOW_WATERMARK))); WorkerBackpressureCallback backpressureCallback = mkBackpressureHandler(topologyConf); backpressureThread = new WorkerBackpressureThread(workerState.backpressureTrigger, workerState, backpressureCallback); if ((Boolean) topologyConf.get(Config.TOPOLOGY_BACKPRESSURE_ENABLE)) { backpressureThread.start(); stormClusterState.topologyBackpressure(topologyId, backpressureZnodeTimeoutMs, workerState::refreshThrottle); int pollingSecs = ObjectReader.getInt(topologyConf.get(Config.TASK_BACKPRESSURE_POLL_SECS)); workerState.refreshBackpressureTimer.scheduleRecurring(0, pollingSecs, workerState::refreshThrottle); } credentialsAtom = new AtomicReference<Credentials>(initialCredentials); establishLogSettingCallback(); workerState.stormClusterState.credentials(topologyId, Worker.this::checkCredentialsChanged); workerState.refreshCredentialsTimer.scheduleRecurring(0, (Integer) conf.get(Config.TASK_CREDENTIALS_POLL_SECS), new Runnable() { @Override public void run() { checkCredentialsChanged(); if ((Boolean) topologyConf.get(Config.TOPOLOGY_BACKPRESSURE_ENABLE)) { checkThrottleChanged(); } } }); workerState.checkForUpdatedBlobsTimer.scheduleRecurring(0, (Integer) conf.getOrDefault(Config.WORKER_BLOB_UPDATE_POLL_INTERVAL_SECS, 10), new Runnable() { @Override public void run() { try { LOG.debug("Checking if blobs have updated"); updateBlobUpdates(); } catch (IOException e) { // IOException from reading the version files to be ignored LOG.error(e.getStackTrace().toString()); } } }); // The jitter allows the clients to get the data at different times, and avoids thundering herd if (!(Boolean) topologyConf.get(Config.TOPOLOGY_DISABLE_LOADAWARE_MESSAGING)) { workerState.refreshLoadTimer.scheduleRecurringWithJitter(0, 1, 500, Worker.this::doRefreshLoad); } workerState.refreshConnectionsTimer.scheduleRecurring(0, (Integer) conf.get(Config.TASK_REFRESH_POLL_SECS), workerState::refreshConnections); workerState.resetLogLevelsTimer.scheduleRecurring(0, (Integer) conf.get(Config.WORKER_LOG_LEVEL_RESET_POLL_SECS), logConfigManager::resetLogLevels); workerState.refreshActiveTimer.scheduleRecurring(0, (Integer) conf.get(Config.TASK_REFRESH_POLL_SECS), workerState::refreshStormActive); LOG.info("Worker has topology config {}", Utils.redactValue(topologyConf, Config.STORM_ZOOKEEPER_TOPOLOGY_AUTH_PAYLOAD)); LOG.info("Worker {} for storm {} on {}:{} has finished loading", workerId, topologyId, assignmentId, port); return this; }; }); } public void doRefreshLoad() { workerState.refreshLoad(executorsAtom.get()); final List<IRunningExecutor> executors = executorsAtom.get(); for (IRunningExecutor executor : executors) { executor.loadChanged(workerState.getLoadMapping()); } } public void doHeartBeat() throws IOException { LocalState state = ConfigUtils.workerState(workerState.conf, workerState.workerId); state.setWorkerHeartBeat(new LSWorkerHeartbeat(Time.currentTimeSecs(), workerState.topologyId, workerState.executors.stream() .map(executor -> new ExecutorInfo(executor.get(0).intValue(), executor.get(1).intValue())) .collect(Collectors.toList()), workerState.port)); state.cleanup(60); // this is just in case supervisor is down so that disk doesn't fill up. // it shouldn't take supervisor 120 seconds between listing dir and reading it } public void doExecutorHeartbeats() { Map<List<Integer>, ExecutorStats> stats; List<IRunningExecutor> executors = this.executorsAtom.get(); if (null == executors) { stats = StatsUtil.mkEmptyExecutorZkHbs(workerState.executors); } else { stats = StatsUtil.convertExecutorZkHbs(executors.stream() .collect(Collectors.toMap( (Function<IRunningExecutor, List<Long>>) IRunningExecutor::getExecutorId, (Function<IRunningExecutor, ExecutorStats>) IRunningExecutor::renderStats))); } Map<String, Object> zkHB = StatsUtil.mkZkWorkerHb(workerState.topologyId, stats, workerState.uptime.upTime()); try { workerState.stormClusterState.workerHeartbeat(workerState.topologyId, workerState.assignmentId, (long) workerState.port, StatsUtil.thriftifyZkWorkerHb(zkHB)); } catch (Exception ex) { LOG.error("Worker failed to write heartbeats to ZK or Pacemaker...will retry", ex); } } public Map<String, Long> getCurrentBlobVersions() throws IOException { Map<String, Long> results = new HashMap<>(); Map<String, Map<String, Object>> blobstoreMap = (Map<String, Map<String, Object>>) workerState .getTopologyConf().get(Config.TOPOLOGY_BLOBSTORE_MAP); if (blobstoreMap != null) { String stormRoot = ConfigUtils.supervisorStormDistRoot(workerState.getTopologyConf(), workerState.getTopologyId()); for (Map.Entry<String, Map<String, Object>> entry : blobstoreMap.entrySet()) { String localFileName = entry.getKey(); Map<String, Object> blobInfo = entry.getValue(); if (blobInfo != null && blobInfo.containsKey("localname")) { localFileName = (String) blobInfo.get("localname"); } String blobWithVersion = new File(stormRoot, localFileName).getCanonicalFile().getName(); Matcher m = BLOB_VERSION_EXTRACTION.matcher(blobWithVersion); if (m.matches()) { results.put(localFileName, Long.valueOf(m.group(1))); } } } return results; } public void updateBlobUpdates() throws IOException { Map<String, Long> latestBlobVersions = getCurrentBlobVersions(); workerState.blobToLastKnownVersion.putAll(latestBlobVersions); LOG.debug("Latest versions for blobs {}", latestBlobVersions); } public void checkCredentialsChanged() { Credentials newCreds = workerState.stormClusterState.credentials(topologyId, null); if (!ObjectUtils.equals(newCreds, credentialsAtom.get())) { // This does not have to be atomic, worst case we update when one is not needed AuthUtils.updateSubject(subject, autoCreds, (null == newCreds) ? null : newCreds.get_creds()); for (IRunningExecutor executor : executorsAtom.get()) { executor.credentialsChanged(newCreds); } credentialsAtom.set(newCreds); } } public void checkThrottleChanged() { boolean throttleOn = workerState.stormClusterState.topologyBackpressure(topologyId, backpressureZnodeTimeoutMs, this::checkThrottleChanged); workerState.throttleOn.set(throttleOn); } public void checkLogConfigChanged() { LogConfig logConfig = workerState.stormClusterState.topologyLogConfig(topologyId, null); logConfigManager.processLogConfigChange(logConfig); establishLogSettingCallback(); } public void establishLogSettingCallback() { workerState.stormClusterState.topologyLogConfig(topologyId, this::checkLogConfigChanged); } /** * make a handler for the worker's send disruptor queue to * check highWaterMark and lowWaterMark for backpressure */ private DisruptorBackpressureCallback mkDisruptorBackpressureHandler(WorkerState workerState) { return new DisruptorBackpressureCallback() { @Override public void highWaterMark() throws Exception { LOG.debug("worker {} transfer-queue is congested, checking backpressure state", workerState.workerId); WorkerBackpressureThread.notifyBackpressureChecker(workerState.backpressureTrigger); } @Override public void lowWaterMark() throws Exception { LOG.debug("worker {} transfer-queue is not congested, checking backpressure state", workerState.workerId); WorkerBackpressureThread.notifyBackpressureChecker(workerState.backpressureTrigger); } }; } /** * make a handler that checks and updates worker's backpressure flag */ private WorkerBackpressureCallback mkBackpressureHandler(Map<String, Object> topologyConf) { final List<IRunningExecutor> executors = executorsAtom.get(); final long updateFreqMs = ObjectReader.getInt(topologyConf.get(Config.BACKPRESSURE_ZNODE_UPDATE_FREQ_SECS)) * 1000; return new WorkerBackpressureCallback() { @Override public void onEvent(Object obj) { if (null != executors) { String topologyId = workerState.topologyId; String assignmentId = workerState.assignmentId; int port = workerState.port; IStormClusterState stormClusterState = workerState.stormClusterState; long prevBackpressureTimestamp = workerState.backpressure.get(); long currTimestamp = System.currentTimeMillis(); long currBackpressureTimestamp = 0; // the backpressure flag is true if at least one of the disruptor queues has throttle-on boolean backpressureFlag = workerState.transferQueue.getThrottleOn() || (executors.stream() .map(IRunningExecutor::getBackPressureFlag).reduce((op1, op2) -> (op1 || op2)).get()); if (backpressureFlag) { // update the backpressure timestamp every updateFreqMs ms if ((currTimestamp - prevBackpressureTimestamp) > updateFreqMs) { currBackpressureTimestamp = currTimestamp; } else { currBackpressureTimestamp = prevBackpressureTimestamp; } } if (currBackpressureTimestamp != prevBackpressureTimestamp) { try { LOG.debug("worker backpressure timestamp changing from {} to {}", prevBackpressureTimestamp, currBackpressureTimestamp); stormClusterState.workerBackpressure(topologyId, assignmentId, (long) port, currBackpressureTimestamp); // doing the local reset after the zk update succeeds is very important to avoid a bad state upon zk exception workerState.backpressure.set(currBackpressureTimestamp); } catch (Exception ex) { LOG.error("workerBackpressure update failed when connecting to ZK ... will retry", ex); } } } } }; } @Override public void shutdown() { try { LOG.info("Shutting down worker {} {} {}", topologyId, assignmentId, port); for (IConnection socket : workerState.cachedNodeToPortSocket.get().values()) { //this will do best effort flushing since the linger period // was set on creation socket.close(); } LOG.info("Terminating messaging context"); LOG.info("Shutting down executors"); for (IRunningExecutor executor : executorsAtom.get()) { ((ExecutorShutdown) executor).shutdown(); } LOG.info("Shut down executors"); // this is fine because the only time this is shared is when it's a local context, // in which case it's a noop workerState.mqContext.term(); LOG.info("Shutting down transfer thread"); workerState.transferQueue.haltWithInterrupt(); transferThread.interrupt(); transferThread.join(); LOG.info("Shut down transfer thread"); backpressureThread.terminate(); LOG.info("Shut down backpressure thread"); workerState.heartbeatTimer.close(); workerState.refreshConnectionsTimer.close(); workerState.refreshCredentialsTimer.close(); workerState.checkForUpdatedBlobsTimer.close(); workerState.refreshBackpressureTimer.close(); workerState.refreshActiveTimer.close(); workerState.executorHeartbeatTimer.close(); workerState.userTimer.close(); workerState.refreshLoadTimer.close(); workerState.resetLogLevelsTimer.close(); workerState.closeResources(); LOG.info("Trigger any worker shutdown hooks"); workerState.runWorkerShutdownHooks(); workerState.stormClusterState.removeWorkerHeartbeat(topologyId, assignmentId, (long) port); workerState.stormClusterState.removeWorkerBackpressure(topologyId, assignmentId, (long) port); LOG.info("Disconnecting from storm cluster state context"); workerState.stormClusterState.disconnect(); workerState.stateStorage.close(); LOG.info("Shut down worker {} {} {}", topologyId, assignmentId, port); } catch (Exception ex) { throw Utils.wrapInRuntime(ex); } } @Override public boolean isWaiting() { return workerState.heartbeatTimer.isTimerWaiting() && workerState.refreshConnectionsTimer.isTimerWaiting() && workerState.refreshLoadTimer.isTimerWaiting() && workerState.refreshCredentialsTimer.isTimerWaiting() && workerState.checkForUpdatedBlobsTimer.isTimerWaiting() && workerState.refreshBackpressureTimer.isTimerWaiting() && workerState.refreshActiveTimer.isTimerWaiting() && workerState.executorHeartbeatTimer.isTimerWaiting() && workerState.userTimer.isTimerWaiting(); } public static void main(String[] args) throws Exception { Preconditions.checkArgument(args.length == 4, "Illegal number of arguments. Expected: 4, Actual: " + args.length); String stormId = args[0]; String assignmentId = args[1]; String portStr = args[2]; String workerId = args[3]; Map<String, Object> conf = Utils.readStormConfig(); Utils.setupDefaultUncaughtExceptionHandler(); StormCommon.validateDistributedMode(conf); Worker worker = new Worker(conf, null, stormId, assignmentId, Integer.parseInt(portStr), workerId); worker.start(); Utils.addShutdownHookWithForceKillIn1Sec(worker::shutdown); } }