Java tutorial
/* * RHQ Management Platform * Copyright (C) 2005-2008 Red Hat, Inc. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.rhq.enterprise.server.cloud.instance; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.Collection; import java.util.List; import javax.annotation.Resource; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.ejb.Timeout; import javax.ejb.Timer; import javax.ejb.TimerService; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.Query; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.rhq.core.domain.cloud.PartitionEventType; import org.rhq.core.domain.cloud.Server; import org.rhq.core.domain.resource.Agent; import org.rhq.core.util.exception.ThrowableUtil; import org.rhq.enterprise.communications.GlobalSuspendCommandListener; import org.rhq.enterprise.server.RHQConstants; import org.rhq.enterprise.server.auth.SubjectManagerLocal; import org.rhq.enterprise.server.cloud.CloudManagerLocal; import org.rhq.enterprise.server.cloud.PartitionEventManagerLocal; import org.rhq.enterprise.server.cloud.StatusManagerLocal; import org.rhq.enterprise.server.core.comm.ServerCommunicationsServiceUtil; /** * If you want to manipulate or report on the {@link Server} instance that * some piece of code is currently executing on, use the {@link ServerManagerBean}. * * This session bean determines the identity of the server it's running on by * reading the <code>rhq.server.high-availability.name</code> property from the * rhq-server.properties file. * * The functionality provided here is useful when you need to execute something * on every server in the cloud, such as partitioned services and data. * * @author Joseph Marques */ @Stateless public class ServerManagerBean implements ServerManagerLocal { private final Log log = LogFactory.getLog(ServerManagerBean.class); static private final String RHQ_SERVER_NAME_PROPERTY = "rhq.server.high-availability.name"; static private Server.OperationMode lastEstablishedServerMode = null; @Resource private TimerService timerService; @PersistenceContext(unitName = RHQConstants.PERSISTENCE_UNIT_NAME) private EntityManager entityManager; @EJB private CloudManagerLocal cloudManager; @EJB private StatusManagerLocal agentStatusManager; @EJB private PartitionEventManagerLocal partitionEventManager; @EJB private SubjectManagerLocal subjectManager; @EJB private ServerManagerLocal serverManager; private final String TIMER_DATA = "ServerManagerBean.beat"; @SuppressWarnings("unchecked") public void scheduleServerHeartbeat() { /* each time the webapp is reloaded, it would create * duplicate events if we don't cancel the existing ones */ Collection<Timer> timers = timerService.getTimers(); for (Timer existingTimer : timers) { log.debug("Found timer - attempting to cancel: " + existingTimer.toString()); try { existingTimer.cancel(); } catch (Exception e) { log.warn("Failed in attempting to cancel timer: " + existingTimer.toString()); } } // single-action timer that will trigger in 30 seconds timerService.createTimer(30000, TIMER_DATA); } @Timeout public void handleHeartbeatTimer(Timer timer) { try { serverManager.beat(); } catch (Throwable t) { log.error("Failed to handle cloud heartbeat timer - will try again later. Cause: " + t); } finally { // reschedule ourself to trigger in another 30 seconds try { timerService.createTimer(30000, TIMER_DATA); } catch (Throwable t) { log.error( "Failed to reschedule cloud heartbeat timer! Server status handling will not work from this point. A server restart may be needed after issue is resolved:" + t); } } } public int create(Server server) { entityManager.persist(server); return server.getId(); } public String getIdentity() { String identity = System.getProperty(RHQ_SERVER_NAME_PROPERTY, ""); if (identity.equals("")) { return "localhost"; } return identity; } public List<Agent> getAgents() { String identity = getIdentity(); List<Agent> results = cloudManager.getAgentsByServerName(identity); return results; } public List<Integer> getAndClearAgentsWithStatus() { List<Integer> results = agentStatusManager.getAndClearAgentsWithStatusForServer(getIdentity()); return results; } public boolean getAndClearServerStatus() { String identity = getIdentity(); Server server = cloudManager.getServerByName(identity); if (server == null) { return false; // don't reload caches if we don't know who we are } boolean hadStatus = (server.getStatus() != 0); server.clearStatus(); return hadStatus; } public Server getServer() throws ServerNotFoundException { String identity = getIdentity(); Server result = cloudManager.getServerByName(identity); if (result == null) { throw new ServerNotFoundException("Could not find server; is the " + RHQ_SERVER_NAME_PROPERTY + " property set in rhq-server.properties?"); } return result; } public void printWithTrace(String message) { try { new IllegalArgumentException(message); } catch (IllegalArgumentException iae) { String stackTrace = ThrowableUtil.getStackAsString(iae); LogFactory.getLog("HighAvailabilityLogic").fatal(stackTrace); } } public void establishCurrentServerMode() { Server server = getServer(); Server.OperationMode serverMode = server.getOperationMode(); // no state change means no work if (serverMode == lastEstablishedServerMode) return; // whenever starting up clear the agent references to this server. Agent references will exist // for previously connected agents that did not fail-over while this server was unavailable. This // is done to avoid unnecessary cache re/load and moreover provides a logically initialized environment. if (null == lastEstablishedServerMode) { printWithTrace("establishCurrentServerMode: NULL->" + serverMode + ", clearing agent references"); clearAgentReferences(server); } try { if (Server.OperationMode.NORMAL == serverMode) { // If moving into normal operating mode from Maintenance Mode then: // 1) Ensure lingering agent references are cleared // - this may have been done at startup already, this covers the case when we go in and // - out of MM without ever taking down the server // 2) Re-establish server communication by taking away the MM listener if (Server.OperationMode.MAINTENANCE == lastEstablishedServerMode) { printWithTrace("establishCurrentServerMode: MAINTENANCE->NORMAL, clearing agent references"); clearAgentReferences(server); ServerCommunicationsServiceUtil.getService().safeGetServiceContainer() .removeCommandListener(getMaintenanceModeListener()); log.info("Notified communication layer of server operation mode " + serverMode); } } else if (Server.OperationMode.MAINTENANCE == serverMode) { // If moving into Maintenance Mode from any other mode then stop processing agent commands ServerCommunicationsServiceUtil.getService().safeGetServiceContainer() .addCommandListener(getMaintenanceModeListener()); log.info("Notified communication layer of server operation mode " + serverMode); } else if (Server.OperationMode.INSTALLED == serverMode) { // The server must have just been installed and must be coming for the first time // up as of this call. So, update the mode to NORMAL and update mtime as an initial heart beat. // This will prevent a running CloudManagerJob from resetting to DOWN before the real // ServerManagerJob starts updating the heart beat regularly. lastEstablishedServerMode = serverMode; serverMode = Server.OperationMode.NORMAL; server.setOperationMode(serverMode); server.setMtime(System.currentTimeMillis()); } else if (Server.OperationMode.DOWN == serverMode) { // The server can't be DOWN if this code is executing, it means the server must be coming // up as of this call. So, update the mode to NORMAL and update mtime as an initial heart beat. // This will prevent a running CloudManagerJob from resetting to DOWN before the real // ServerManagerJob starts updating the heart beat regularly. lastEstablishedServerMode = serverMode; serverMode = Server.OperationMode.NORMAL; server.setOperationMode(serverMode); server.setMtime(System.currentTimeMillis()); } // If this server just transitioned from INSTALLED to NORMAL operation mode then it // has just been added to the cloud. Changing the number of servers in the cloud requires agent // distribution work, even if this is a 1-Server cloud. Generate a request for a repartitioning // of agent load, it will be executed on the next invocation of the cluster manager job. // Otherwise, audit the operation mode change as a partition event of interest. String audit = server.getName() + ": " + ((null != lastEstablishedServerMode) ? lastEstablishedServerMode : Server.OperationMode.DOWN) + " --> " + serverMode; if ((Server.OperationMode.NORMAL == serverMode) && (Server.OperationMode.INSTALLED == lastEstablishedServerMode)) { partitionEventManager.cloudPartitionEventRequest(subjectManager.getOverlord(), PartitionEventType.OPERATION_MODE_CHANGE, audit); } else { partitionEventManager.auditPartitionEvent(subjectManager.getOverlord(), PartitionEventType.OPERATION_MODE_CHANGE, audit); } lastEstablishedServerMode = serverMode; } catch (Exception e) { log.error("Unable to change HA Server Mode from " + lastEstablishedServerMode + " to " + serverMode + ": " + e); } } private void clearAgentReferences(Server server) { Query query = entityManager.createNamedQuery(Agent.QUERY_REMOVE_SERVER_REFERENCE); query.setParameter("serverId", server.getId()); int numRows = query.executeUpdate(); if (numRows > 0) { log.info("Removed " + numRows + " obsolete agent reference(s) to server " + server.getName()); } } // use this to ensure a listener of the same name. not using static singleton in case of class reload by different // classloaders (in case an exception bubbles up to the slsb layer) private GlobalSuspendCommandListener getMaintenanceModeListener() { return new GlobalSuspendCommandListener(Server.OperationMode.MAINTENANCE.name(), Server.OperationMode.MAINTENANCE.name()); } public void syncEndpointAddress() throws SyncEndpointAddressException { Server server = getServer(); try { String hostName = InetAddress.getLocalHost().getHostName(); if (!hostName.equals(server.getAddress())) { server.setAddress(hostName); } } catch (UnknownHostException e) { throw new SyncEndpointAddressException("Failed to sync endpoint address for " + server, e); } } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void beat() { Server server = getServer(); server.setMtime(System.currentTimeMillis()); // Handles server mode state changes // note: this call should be fast. if not we need to break the heart beat into its own job establishCurrentServerMode(); } }