Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.servicemonitor.reporting; import org.apache.ambari.servicemonitor.MonitorKeys; import org.apache.ambari.servicemonitor.probes.Probe; import org.apache.ambari.servicemonitor.utils.MonitorUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.io.Closeable; import java.io.IOException; import java.util.List; /** * This is the monitor service */ public final class ReportingLoop implements Runnable, ProbeReportHandler, MonitorKeys, Closeable { private static final Log LOG = LogFactory.getLog(ReportingLoop.class); private final ProbeWorker worker; private final Thread workerThread; private final int reportInterval; private final int probeTimeout; private final int bootstrapTimeout; private ProbeReportHandler reporter; private final String name; private volatile boolean mustExit; public ReportingLoop(String name, ProbeReportHandler reporter, List<Probe> probes, List<Probe> dependencyProbes, int probeInterval, int reportInterval, int probeTimeout, int bootstrapTimeout) throws IOException { this(name, reporter, new ProbeWorker(probes, dependencyProbes, probeInterval, bootstrapTimeout), reportInterval, probeTimeout); } /** * Create a new reporting loop -and bond the worker's ProbeReportHandler * to us * @param name * @param reporter * @param worker * @param reportInterval * @param probeTimeout */ public ReportingLoop(String name, ProbeReportHandler reporter, ProbeWorker worker, int reportInterval, int probeTimeout) throws IOException { this.name = name; this.reporter = reporter; this.reportInterval = reportInterval; this.probeTimeout = probeTimeout; this.worker = worker; this.bootstrapTimeout = worker.getBootstrapTimeout(); worker.setReportHandler(this); workerThread = new Thread(worker, "probe thread - " + name); worker.init(); } public int getBootstrapTimeout() { return bootstrapTimeout; } public void setReporter(Reporter reporter) { assert this.reporter == null : "attempting to reassign reporter "; assert reporter != null : "new reporter is null"; this.reporter = reporter; } /** * Start the monitoring. * * @return false if the monitoring did not start and that the worker threads * should be run up. */ public boolean startReporting() { String description = "Service Monitor for " + name + ", probe-interval= " + MonitorUtils.millisToHumanTime(worker.interval) + ", report-interval=" + MonitorUtils.millisToHumanTime(reportInterval) + ", probe-timeout=" + timeoutToStr(probeTimeout) + ", bootstrap-timeout=" + timeoutToStr(bootstrapTimeout); LOG.info("Starting reporting" + " to " + reporter + description); return reporter.start(name, description); } private String timeoutToStr(int timeout) { return timeout >= 0 ? MonitorUtils.millisToHumanTime(timeout) : "not set"; } private void startWorker() { LOG.info("Starting reporting worker thread "); workerThread.setDaemon(true); workerThread.start(); } /** * This exits the process cleanly */ @Override public void close() { LOG.info("Stopping reporting"); mustExit = true; if (worker != null) { worker.setMustExit(); workerThread.interrupt(); } if (reporter != null) { reporter.unregister(); } } @Override public void probeFailure(ProbeFailedException exception) { reporter.probeFailure(exception); } @Override public void probeProcessStateChange(ProbePhase probePhase) { reporter.probeProcessStateChange(probePhase); } @Override public void probeBooted(ProbeStatus status) { reporter.probeBooted(status); } private long now() { return System.currentTimeMillis(); } @Override public void probeResult(ProbePhase phase, ProbeStatus status) { reporter.probeResult(phase, status); } @Override public boolean start(String n, String description) { return true; } @Override public void unregister() { } @Override public boolean isIntegratedWithHAMonitoringSystem() { return false; } @Override public void heartbeat(ProbeStatus status) { } @Override public void probeTimedOut(ProbePhase currentPhase, Probe probe, ProbeStatus lastStatus, long currentTime) { } @Override public void liveProbeCycleCompleted() { //delegate to the reporter reporter.liveProbeCycleCompleted(); } /** * The reporting loop */ void reportingLoop() { while (!mustExit) { try { ProbeStatus workerStatus = worker.getLastStatus(); long now = now(); long lastStatusIssued = workerStatus.getTimestamp(); long timeSinceLastStatusIssued = now - lastStatusIssued; //two actions can occur here: a heartbeat is issued or a timeout reported. //this flag decides which boolean heartbeat; //based on phase, decide whether to heartbeat or timeout ProbePhase probePhase = worker.getProbePhase(); switch (probePhase) { case DEPENDENCY_CHECKING: //no timeouts in dependency phase heartbeat = true; break; case BOOTSTRAPPING: //the timeout here is fairly straightforward: heartbeats are //raised while the worker hasn't timed out heartbeat = bootstrapTimeout < 0 || timeSinceLastStatusIssued < bootstrapTimeout; break; case LIVE: //use the probe timeout interval between the current time //and the time the last status event was received. heartbeat = timeSinceLastStatusIssued < probeTimeout; break; case INIT: case TERMINATING: default: //send a heartbeat, because this isn't the time to be failing heartbeat = true; } if (heartbeat) { //a heartbeat is sent to the reporter reporter.heartbeat(workerStatus); } else { //no response from the worker -it is hung. reporter.probeTimedOut(probePhase, worker.getCurrentProbe(), workerStatus, now); } //now sleep Thread.sleep(reportInterval); } catch (InterruptedException e) { //interrupted -always exit the loop. break; } } //this point is reached if and only if a clean exit was requested or something failed. } /** * This can be run in a separate thread, or it can be run directly from the caller. * Test runs do the latter, HAM runs multiple reporting threads. */ @Override public void run() { try { startWorker(); reportingLoop(); } catch (RuntimeException e) { LOG.warn("Failure in the reporting loop: " + e, e); //rethrow so that inline code can pick it up (e.g. test runs) throw e; } } }