Java tutorial
/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.aurora.scheduler.async.preemptor; import java.lang.annotation.Retention; import java.lang.annotation.Target; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import javax.inject.Inject; import javax.inject.Qualifier; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Functions; import com.google.common.base.Optional; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.FluentIterable; import com.google.common.collect.HashMultiset; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Multiset; import com.google.common.collect.Sets; import com.twitter.common.quantity.Amount; import com.twitter.common.quantity.Time; import com.twitter.common.util.Clock; import org.apache.aurora.scheduler.HostOffer; import org.apache.aurora.scheduler.async.OfferManager; import org.apache.aurora.scheduler.base.Query; import org.apache.aurora.scheduler.base.TaskGroupKey; import org.apache.aurora.scheduler.base.Tasks; import org.apache.aurora.scheduler.filter.AttributeAggregate; import org.apache.aurora.scheduler.storage.Storage; import org.apache.aurora.scheduler.storage.Storage.StoreProvider; import org.apache.aurora.scheduler.storage.entities.IAssignedTask; import org.apache.aurora.scheduler.storage.entities.IJobKey; import org.apache.aurora.scheduler.storage.entities.IScheduledTask; import org.apache.aurora.scheduler.storage.entities.ITaskConfig; import static java.lang.annotation.ElementType.FIELD; import static java.lang.annotation.ElementType.METHOD; import static java.lang.annotation.ElementType.PARAMETER; import static java.lang.annotation.RetentionPolicy.RUNTIME; import static java.util.Objects.requireNonNull; import static org.apache.aurora.gen.ScheduleStatus.PENDING; import static org.apache.aurora.scheduler.base.Tasks.SCHEDULED_TO_ASSIGNED; /** * Attempts to find preemption slots for all PENDING tasks eligible for preemption. */ @VisibleForTesting public class PendingTaskProcessor implements Runnable { private final Storage storage; private final OfferManager offerManager; private final PreemptionVictimFilter preemptionVictimFilter; private final PreemptorMetrics metrics; private final Amount<Long, Time> preemptionCandidacyDelay; private final BiCache<PreemptionProposal, TaskGroupKey> slotCache; private final ClusterState clusterState; private final Clock clock; /** * Binding annotation for the time interval after which a pending task becomes eligible to * preempt other tasks. To avoid excessive churn, the preemptor requires that a task is PENDING * for a duration (dictated by {@link #preemptionCandidacyDelay}) before it becomes eligible * to preempt other tasks. */ @VisibleForTesting @Qualifier @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME) public @interface PreemptionDelay { } @Inject PendingTaskProcessor(Storage storage, OfferManager offerManager, PreemptionVictimFilter preemptionVictimFilter, PreemptorMetrics metrics, @PreemptionDelay Amount<Long, Time> preemptionCandidacyDelay, BiCache<PreemptionProposal, TaskGroupKey> slotCache, ClusterState clusterState, Clock clock) { this.storage = requireNonNull(storage); this.offerManager = requireNonNull(offerManager); this.preemptionVictimFilter = requireNonNull(preemptionVictimFilter); this.metrics = requireNonNull(metrics); this.preemptionCandidacyDelay = requireNonNull(preemptionCandidacyDelay); this.slotCache = requireNonNull(slotCache); this.clusterState = requireNonNull(clusterState); this.clock = requireNonNull(clock); } @Override public void run() { metrics.recordTaskProcessorRun(); storage.read(new Storage.Work.Quiet<Void>() { @Override public Void apply(StoreProvider store) { Multimap<String, PreemptionVictim> slavesToActiveTasks = clusterState.getSlavesToActiveTasks(); if (slavesToActiveTasks.isEmpty()) { // No preemption victims to consider. return null; } // Group the offers by slave id so they can be paired with active tasks from the same slave. Map<String, HostOffer> slavesToOffers = Maps.uniqueIndex(offerManager.getOffers(), OFFER_TO_SLAVE_ID); Set<String> allSlaves = Sets .newHashSet(Iterables.concat(slavesToOffers.keySet(), slavesToActiveTasks.keySet())); // The algorithm below attempts to find a reservation for every task group by matching // it against all available slaves until a preemption slot is found. Groups are evaluated // in a round-robin fashion to ensure fairness (e.g.: G1, G2, G3, G1, G2). // A slave is removed from further matching once a reservation is made. Similarly, all // identical task group instances are removed from further iteration if none of the // available slaves could yield a preemption proposal. A consuming iterator is used for // task groups to ensure iteration order is preserved after a task group is removed. LoadingCache<IJobKey, AttributeAggregate> jobStates = attributeCache(store); List<TaskGroupKey> pendingGroups = fetchIdlePendingGroups(store); Iterator<TaskGroupKey> groups = Iterators.consumingIterator(pendingGroups.iterator()); while (!pendingGroups.isEmpty()) { boolean matched = false; TaskGroupKey group = groups.next(); ITaskConfig task = group.getTask(); metrics.recordPreemptionAttemptFor(task); Iterator<String> slaveIterator = allSlaves.iterator(); while (slaveIterator.hasNext()) { String slaveId = slaveIterator.next(); Optional<ImmutableSet<PreemptionVictim>> candidates = preemptionVictimFilter .filterPreemptionVictims(task, slavesToActiveTasks.get(slaveId), jobStates.getUnchecked(task.getJob()), Optional.fromNullable(slavesToOffers.get(slaveId)), store); metrics.recordSlotSearchResult(candidates, task); if (candidates.isPresent()) { // Slot found -> remove slave to avoid multiple task reservations. slaveIterator.remove(); slotCache.put(new PreemptionProposal(candidates.get(), slaveId), group); matched = true; break; } } if (!matched) { // No slot found for the group -> remove group and reset group iterator. pendingGroups.removeAll(ImmutableSet.of(group)); groups = Iterators.consumingIterator(pendingGroups.iterator()); } } return null; } }); } private List<TaskGroupKey> fetchIdlePendingGroups(StoreProvider store) { Multiset<TaskGroupKey> taskGroupCounts = HashMultiset .create(FluentIterable.from(store.getTaskStore().fetchTasks(Query.statusScoped(PENDING))) .filter(Predicates.and(isIdleTask, Predicates.not(hasCachedSlot))) .transform(Functions.compose(ASSIGNED_TO_GROUP_KEY, SCHEDULED_TO_ASSIGNED))); return getPreemptionSequence(taskGroupCounts); } /** * Creates execution sequence for pending task groups by interleaving their unique occurrences. * For example: {G1, G1, G1, G2, G2} will be converted into {G1, G2, G1, G2, G1}. * * @param groups Multiset of task groups. * @return A task group execution sequence. */ private static List<TaskGroupKey> getPreemptionSequence(Multiset<TaskGroupKey> groups) { Multiset<TaskGroupKey> mutableGroups = HashMultiset.create(groups); List<TaskGroupKey> instructions = Lists.newLinkedList(); Set<TaskGroupKey> keys = ImmutableSet.copyOf(groups.elementSet()); while (!mutableGroups.isEmpty()) { for (TaskGroupKey key : keys) { if (mutableGroups.contains(key)) { instructions.add(key); mutableGroups.remove(key); } } } return instructions; } private LoadingCache<IJobKey, AttributeAggregate> attributeCache(final StoreProvider store) { return CacheBuilder.newBuilder().build(CacheLoader.from(new Function<IJobKey, AttributeAggregate>() { @Override public AttributeAggregate apply(IJobKey job) { return AttributeAggregate.getJobActiveState(store, job); } })); } private static final Function<IAssignedTask, TaskGroupKey> ASSIGNED_TO_GROUP_KEY = new Function<IAssignedTask, TaskGroupKey>() { @Override public TaskGroupKey apply(IAssignedTask task) { return TaskGroupKey.from(task.getTask()); } }; private final Predicate<IScheduledTask> hasCachedSlot = new Predicate<IScheduledTask>() { @Override public boolean apply(IScheduledTask task) { return !slotCache.getByValue(TaskGroupKey.from(task.getAssignedTask().getTask())).isEmpty(); } }; private final Predicate<IScheduledTask> isIdleTask = new Predicate<IScheduledTask>() { @Override public boolean apply(IScheduledTask task) { return (clock.nowMillis() - Tasks.getLatestEvent(task).getTimestamp()) >= preemptionCandidacyDelay .as(Time.MILLISECONDS); } }; private static final Function<HostOffer, String> OFFER_TO_SLAVE_ID = new Function<HostOffer, String>() { @Override public String apply(HostOffer offer) { return offer.getOffer().getSlaveId().getValue(); } }; }