Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.llama.am.impl; import com.cloudera.llama.am.api.LlamaAM; import com.cloudera.llama.am.api.NodeInfo; import com.cloudera.llama.am.cache.CacheRMConnector; import com.cloudera.llama.util.ErrorCode; import com.cloudera.llama.util.LlamaException; import com.cloudera.llama.am.api.PlacedReservation; import com.cloudera.llama.am.api.PlacedResource; import com.cloudera.llama.am.spi.RMResource; import com.cloudera.llama.am.api.Reservation; import com.cloudera.llama.am.spi.RMEvent; import com.cloudera.llama.am.spi.RMListener; import com.cloudera.llama.am.spi.RMConnector; import com.cloudera.llama.am.yarn.YarnRMConnector; import com.cloudera.llama.server.MetricUtil; import com.cloudera.llama.util.FastFormat; import com.cloudera.llama.util.UUID; import com.codahale.metrics.Gauge; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ScheduledExecutorService; /** * The <code>SingleQueueLlamaAM</code> handles the core logic to do gang * reservations and interacts with a {@link RMConnector} to request resources * from the Resource Manager. * There are three configuration properties that drive the logic of this class: * <ul> * <li>{@link #RM_CONNECTOR_CLASS_KEY}</li> * <li>{@link #CACHING_ENABLED_KEY}</li> * <li>{@link #NORMALIZING_ENABLED_KEY}</li> * </ul> */ public class SingleQueueLlamaAM extends LlamaAMImpl implements RMListener { private static final Logger LOG = LoggerFactory.getLogger(SingleQueueLlamaAM.class); private static final String METRIC_PREFIX = LlamaAM.METRIC_PREFIX + "queue."; private static final String RESERVATIONS_GAUGE_TEMPLATE = METRIC_PREFIX + "reservations[{}].gauge"; private static final String RESOURCES_GAUGE_TEMPLATE = METRIC_PREFIX + "resources[{}].gauge"; private static final String RESERVATIONS_ALLOCATION_TIMER_TEMPLATE = METRIC_PREFIX + "reservations-allocation-latency[{}].timer"; private static final String RESOURCES_ALLOCATION_TIMER_TEMPLATE = METRIC_PREFIX + "resources-allocation-latency[{}].timer"; public static final List<String> METRIC_TEMPLATE_KEYS = Arrays.asList(RESERVATIONS_GAUGE_TEMPLATE, RESOURCES_GAUGE_TEMPLATE, RESERVATIONS_ALLOCATION_TIMER_TEMPLATE, RESOURCES_ALLOCATION_TIMER_TEMPLATE); private final String queue; private final Map<UUID, PlacedReservationImpl> reservationsMap; private final Map<UUID, PlacedResourceImpl> resourcesMap; private final ScheduledExecutorService stp; private IntraLlamaAMsCallback callback; private String reservationsAllocationTimerKey; private String resourcesAllocationTimerKey; private RMConnector rmConnector; private boolean running; public static Class<? extends RMConnector> getRMConnectorClass(Configuration conf) { return conf.getClass(LlamaAM.RM_CONNECTOR_CLASS_KEY, YarnRMConnector.class, RMConnector.class); } public SingleQueueLlamaAM(Configuration conf, String queue, ScheduledExecutorService stp) { super(conf); this.queue = queue; reservationsMap = new HashMap<UUID, PlacedReservationImpl>(); resourcesMap = new HashMap<UUID, PlacedResourceImpl>(); this.stp = stp; } public void setCallback(IntraLlamaAMsCallback callback) { this.callback = callback; } private RMConnector createRMConnector() { Class<? extends RMConnector> klass = getRMConnectorClass(getConf()); RMConnector connector = ReflectionUtils.newInstance(klass, getConf()); // queue is null only for the AM used to report getNodes(), // we don't need caching for it TODO and no normalization either when done if (queue != null) { boolean caching = getConf().getBoolean(CACHING_ENABLED_KEY, CACHING_ENABLED_DEFAULT); boolean normalizing = getConf().getBoolean(NORMALIZING_ENABLED_KEY, NORMALIZING_ENABLED_DEFAULT); caching = getConf().getBoolean(CACHING_ENABLED_KEY + "." + queue, caching); normalizing = getConf().getBoolean(NORMALIZING_ENABLED_KEY + "." + queue, normalizing); LOG.info("Caching for queue '{}' enabled '{}'", queue, caching); if (caching && normalizing) { CacheRMConnector connectorCache = new CacheRMConnector(getConf(), connector); connector = connectorCache; } else if (caching) { LOG.warn("Caching not allowed without normalization. To enable caching," + "set '{}' to true.", LlamaAM.NORMALIZING_ENABLED_KEY); } if (normalizing) { NormalizerRMConnector normalizer = new NormalizerRMConnector(getConf(), connector); connector = normalizer; } } return connector; } // LlamaAM API @Override public void start() throws LlamaException { rmConnector = new PhasingOutRMConnector(getConf(), stp, new PhasingOutRMConnector.RmConnectorCreator() { @Override public RMConnector create() { return createRMConnector(); } }); rmConnector.setMetricRegistry(getMetricRegistry()); rmConnector.setRMListener(this); rmConnector.start(); if (queue != null) { rmConnector.register(queue); } running = true; if (getMetricRegistry() != null) { String key = FastFormat.format(RESERVATIONS_GAUGE_TEMPLATE, queue); MetricUtil.registerGauge(getMetricRegistry(), key, new Gauge<Integer>() { @Override public Integer getValue() { synchronized (this) { return reservationsMap.size(); } } }); key = FastFormat.format(RESOURCES_GAUGE_TEMPLATE, queue); MetricUtil.registerGauge(getMetricRegistry(), key, new Gauge<Integer>() { @Override public Integer getValue() { synchronized (this) { return resourcesMap.size(); } } }); key = FastFormat.format(RESERVATIONS_ALLOCATION_TIMER_TEMPLATE, queue); MetricUtil.registerTimer(getMetricRegistry(), key); reservationsAllocationTimerKey = key; key = FastFormat.format(RESOURCES_ALLOCATION_TIMER_TEMPLATE, queue); MetricUtil.registerTimer(getMetricRegistry(), key); resourcesAllocationTimerKey = key; } } public RMConnector getRMConnector() { return rmConnector; } @Override public boolean isRunning() { return running; } @Override public synchronized void stop() { running = false; if (rmConnector != null) { if (queue != null) { rmConnector.unregister(); } rmConnector.stop(); } } @Override public List<NodeInfo> getNodes() throws LlamaException { return rmConnector.getNodes(); } private void _addReservation(PlacedReservationImpl reservation) { UUID reservationId = reservation.getReservationId(); reservationsMap.put(reservationId, reservation); for (PlacedResourceImpl resource : reservation.getPlacedResourceImpls()) { resource.setStatus(PlacedResource.Status.PENDING); resourcesMap.put(resource.getResourceId(), resource); } } PlacedReservationImpl _getReservation(UUID reservationId) { return reservationsMap.get(reservationId); } private PlacedReservationImpl _deleteReservation(UUID reservationId, PlacedReservation.Status status) { PlacedReservationImpl reservation = reservationsMap.remove(reservationId); if (reservation != null) { for (PlacedResource resource : reservation.getPlacedResources()) { resourcesMap.remove(resource.getResourceId()); } } IntraLlamaAMsCallback localReference = this.callback; if (localReference != null) { localReference.discardReservation(reservationId); } if (reservation != null) { reservation.setStatus(status); } return reservation; } public void deleteAllYarnApplications() throws LlamaException { rmConnector.deleteAllReservations(); } @Override @SuppressWarnings("unchecked") public void reserve(UUID reservationId, final Reservation reservation) throws LlamaException { final PlacedReservationImpl impl = new PlacedReservationImpl(reservationId, reservation); LlamaAMEventImpl event = LlamaAMEventImpl.createEvent(true, impl); synchronized (this) { _addReservation(impl); } try { rmConnector.reserve((List) impl.getPlacedResourceImpls()); } catch (LlamaException ex) { synchronized (this) { _deleteReservation(impl.getReservationId(), PlacedReservation.Status.REJECTED); } throw ex; } dispatch(event); } @Override public PlacedReservation getReservation(final UUID reservationId) throws LlamaException { synchronized (this) { return _getReservation(reservationId); } } @Override public PlacedReservation releaseReservation(UUID handle, final UUID reservationId, boolean doNotCache) throws LlamaException { return releaseReservation(handle, reservationId, doNotCache, false); } @SuppressWarnings("unchecked") public PlacedReservation releaseReservation(UUID handle, final UUID reservationId, boolean doNotCache, boolean doNotDispatch) throws LlamaException { PlacedReservationImpl reservation; LlamaAMEventImpl event = null; synchronized (this) { reservation = _getReservation(reservationId); if (reservation != null) { if (!reservation.getHandle().equals(handle) && !isAdminCall()) { throw new LlamaException(ErrorCode.CLIENT_DOES_NOT_OWN_RESERVATION, handle, reservation.getReservationId()); } reservation = _deleteReservation(reservationId, PlacedReservation.Status.RELEASED); event = LlamaAMEventImpl.createEvent(isCallProducingEchoEvent(handle), reservation); } } if (reservation != null) { rmConnector.release((List<RMResource>) (List) reservation.getResources(), doNotCache); if (!doNotDispatch) { dispatch(event); } } else { LOG.warn("Unknown reservationId '{}'", reservationId); } return reservation; } @Override @SuppressWarnings("unchecked") public List<PlacedReservation> releaseReservationsForHandle(UUID handle, boolean doNotCache) throws LlamaException { List<PlacedReservation> reservations = new ArrayList<PlacedReservation>(); synchronized (this) { for (PlacedReservation reservation : new ArrayList<PlacedReservation>(reservationsMap.values())) { if (reservation.getHandle().equals(handle)) { reservation = _deleteReservation(reservation.getReservationId(), PlacedReservation.Status.RELEASED); reservations.add(reservation); LOG.debug("Releasing all reservations for handle '{}', reservationId '{}'", handle, reservation.getReservationId()); } } } for (PlacedReservation reservation : reservations) { rmConnector.release((List<RMResource>) (List) reservation.getResources(), doNotCache); } if (!reservations.isEmpty()) { dispatch(LlamaAMEventImpl.createEvent(isCallProducingEchoEvent(handle), reservations)); } return reservations; } @Override public List<PlacedReservation> releaseReservationsForQueue(String queue, boolean doNotCache) throws LlamaException { List<PlacedReservation> reservations; synchronized (this) { reservations = new ArrayList<PlacedReservation>(reservationsMap.values()); for (PlacedReservation res : reservations) { releaseReservation(res.getHandle(), res.getReservationId(), doNotCache, true); LOG.debug("Releasing all reservations for queue '{}', reservationId '{}'", queue, res.getReservationId()); } } if (!reservations.isEmpty()) { dispatch(LlamaAMEventImpl.createEvent(isCallProducingEchoEvent(WILDCARD_HANDLE), reservations)); } return reservations; } @Override public void emptyCacheForQueue(String queue) throws LlamaException { rmConnector.emptyCache(); } // PRIVATE METHODS private List<PlacedResourceImpl> _resourceRejected(PlacedResourceImpl resource, LlamaAMEventImpl event) { List<PlacedResourceImpl> toRelease = null; resource.setStatus(PlacedResource.Status.REJECTED); UUID reservationId = resource.getReservationId(); PlacedReservationImpl reservation = reservationsMap.get(reservationId); if (reservation == null) { LOG.warn("Unknown Reservation '{}' during resource '{}' rejection " + "handling", reservationId, resource.getResourceId()); } else { // if reservation is ALLOCATED, or it is PARTIAL and not GANG we let it be // and in the ELSE we notify the resource rejection switch (reservation.getStatus()) { case PENDING: case PARTIAL: if (reservation.isGang()) { reservation = _deleteReservation(reservationId, PlacedReservation.Status.REJECTED); toRelease = reservation.getPlacedResourceImpls(); event.addReservation(reservation); } event.addResource(resource); break; case ALLOCATED: LOG.warn( "Illegal internal state, reservation '{}' is " + "ALLOCATED, resource cannot be rejected '{}'", reservationId, resource.getResourceId()); break; } } return toRelease; } private void _resourceAllocated(PlacedResourceImpl resource, RMEvent change, LlamaAMEventImpl event) { resource.setAllocationInfo(change.getLocation(), change.getCpuVCores(), change.getMemoryMbs()); resource.setRmResourceId(change.getRmResourceId()); UUID reservationId = resource.getReservationId(); PlacedReservationImpl reservation = reservationsMap.get(reservationId); if (reservation == null) { LOG.warn("Reservation '{}' during resource allocation handling " + "for" + " '{}'", reservationId, resource.getResourceId()); } else { MetricUtil.time(getMetricRegistry(), resourcesAllocationTimerKey, System.currentTimeMillis() - reservation.getPlacedOn(), new ReservationResourceLogContext(resource)); List<PlacedResourceImpl> resources = reservation.getPlacedResourceImpls(); boolean fulfilled = true; for (int i = 0; fulfilled && i < resources.size(); i++) { fulfilled = resources.get(i).getStatus() == PlacedResource.Status.ALLOCATED; } if (fulfilled) { reservation.setStatus(PlacedReservation.Status.ALLOCATED); MetricUtil.time(getMetricRegistry(), reservationsAllocationTimerKey, System.currentTimeMillis() - reservation.getPlacedOn(), new ReservationResourceLogContext(reservation)); } else { reservation.setStatus(PlacedReservation.Status.PARTIAL); } event.addReservation(reservation); event.addResource(resource); } } private List<PlacedResourceImpl> _resourcePreempted(PlacedResourceImpl resource, LlamaAMEventImpl event) { List<PlacedResourceImpl> toRelease = null; resource.setStatus(PlacedResource.Status.PREEMPTED); UUID reservationId = resource.getReservationId(); PlacedReservationImpl reservation = reservationsMap.get(reservationId); if (reservation == null) { LOG.warn("Unknown Reservation '{}' during resource preemption " + "handling for" + " '{}'", reservationId, resource.getResourceId()); } else { switch (reservation.getStatus()) { case ALLOCATED: event.addResource(resource); break; case PARTIAL: if (reservation.isGang()) { _deleteReservation(reservationId, PlacedReservation.Status.PREEMPTED); toRelease = reservation.getPlacedResourceImpls(); event.addReservation(reservation); } else { event.addResource(resource); } break; case PENDING: LOG.warn( "Illegal internal state, reservation '{}' is PENDING, " + "resource '{}' cannot be preempted, releasing reservation ", reservationId, resource.getResourceId()); reservation = _deleteReservation(reservationId, PlacedReservation.Status.PREEMPTED); toRelease = reservation.getPlacedResourceImpls(); event.addReservation(reservation); break; } } return toRelease; } private List<PlacedResourceImpl> _resourceLost(PlacedResourceImpl resource, LlamaAMEventImpl event) { List<PlacedResourceImpl> toRelease = null; resource.setStatus(PlacedResource.Status.LOST); UUID reservationId = resource.getReservationId(); PlacedReservationImpl reservation = reservationsMap.get(reservationId); if (reservation == null) { LOG.warn("Unknown Reservation '{}' during resource lost handling " + "for '{}'", reservationId, resource.getResourceId()); } else { switch (reservation.getStatus()) { case ALLOCATED: // Check if there are any existing resources, which are not lost. boolean allResourcesLost = true; for (PlacedResource r : reservation.getPlacedResources()) { if (r.getStatus() != PlacedResource.Status.LOST) { allResourcesLost = false; break; } } if (allResourcesLost) { reservation = _deleteReservation(reservationId, PlacedReservation.Status.LOST); toRelease = reservation.getPlacedResourceImpls(); event.addReservation(reservation); } else { event.addResource(resource); } break; case PARTIAL: if (reservation.isGang()) { reservation = _deleteReservation(reservationId, PlacedReservation.Status.LOST); toRelease = reservation.getPlacedResourceImpls(); event.addReservation(reservation); } else { event.addResource(resource); } break; case PENDING: LOG.warn("RM lost reservation '{}' with resource '{}', " + "rejecting reservation", reservationId, resource.getResourceId()); reservation = _deleteReservation(reservationId, PlacedReservation.Status.LOST); toRelease = reservation.getPlacedResourceImpls(); event.addReservation(reservation); break; } } return toRelease; } // RMListener API @Override @SuppressWarnings("unchecked") public void onEvent(final List<RMEvent> rmEvents) { if (rmEvents == null) { throw new IllegalArgumentException("changes cannot be NULL"); } LOG.trace("onEvent({})", rmEvents); LlamaAMEventImpl llamaAMEvent = new LlamaAMEventImpl(); List<PlacedResourceImpl> toRelease = new ArrayList<PlacedResourceImpl>(); synchronized (this) { for (RMEvent change : rmEvents) { PlacedResourceImpl resource = resourcesMap.get(change.getResourceId()); if (resource == null) { LOG.warn("Unknown resource '{}'", change.getResourceId()); } else { List<PlacedResourceImpl> release = null; switch (change.getStatus()) { case REJECTED: release = _resourceRejected(resource, llamaAMEvent); break; case ALLOCATED: _resourceAllocated(resource, change, llamaAMEvent); break; case PREEMPTED: release = _resourcePreempted(resource, llamaAMEvent); break; case LOST: release = _resourceLost(resource, llamaAMEvent); break; } if (release != null) { toRelease.addAll(release); } } } } if (!toRelease.isEmpty()) { try { rmConnector.release((List<RMResource>) (List) toRelease, false); } catch (LlamaException ex) { LOG.warn("release() error: {}", ex.toString(), ex); } } dispatch(llamaAMEvent); } //visible for testing only void loseAllReservations() { synchronized (this) { List<UUID> clientResourceIds = new ArrayList<UUID>(resourcesMap.keySet()); List<RMEvent> changes = new ArrayList<RMEvent>(); for (UUID clientResourceId : clientResourceIds) { changes.add(RMEvent.createStatusChangeEvent(clientResourceId, PlacedResource.Status.LOST)); } onEvent(changes); } } @Override public void stoppedByRM() { LOG.warn("Stopped by '{}'", rmConnector.getClass().getSimpleName()); loseAllReservations(); IntraLlamaAMsCallback localReference = this.callback; if (localReference != null) { localReference.discardAM(queue); } } }