Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.Vector; import java.net.UnknownHostException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.CleanupQueue.PathDeletionContext; import org.apache.hadoop.mapred.Counters.CountersExceededException; import org.apache.hadoop.mapred.Counters.Group; import org.apache.hadoop.mapred.JobHistory.Values; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.JobSubmissionFiles; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.mapreduce.security.token.DelegationTokenRenewal; import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker; import org.apache.hadoop.mapreduce.split.JobSplit; import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader; import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.StringUtils; /************************************************************* * JobInProgress maintains all the info for keeping * a Job on the straight and narrow. It keeps its JobProfile * and its latest JobStatus, plus a set of tables for * doing bookkeeping of its Tasks. * *********************************************************** */ public class JobInProgress { /** * Used when the a kill is issued to a job which is initializing. */ @SuppressWarnings("serial") static class KillInterruptedException extends InterruptedException { public KillInterruptedException(String msg) { super(msg); } } static final Log LOG = LogFactory.getLog(JobInProgress.class); JobProfile profile; JobStatus status; String jobFile = null; Path localJobFile = null; final QueueMetrics queueMetrics; TaskInProgress maps[] = new TaskInProgress[0]; TaskInProgress reduces[] = new TaskInProgress[0]; TaskInProgress cleanup[] = new TaskInProgress[0]; TaskInProgress setup[] = new TaskInProgress[0]; int numMapTasks = 0; int numReduceTasks = 0; final long memoryPerMap; final long memoryPerReduce; volatile int numSlotsPerMap = 1; volatile int numSlotsPerReduce = 1; final int maxTaskFailuresPerTracker; // Counters to track currently running/finished/failed Map/Reduce task-attempts int runningMapTasks = 0; int runningReduceTasks = 0; int finishedMapTasks = 0; int finishedReduceTasks = 0; int failedMapTasks = 0; int failedReduceTasks = 0; private static long DEFAULT_REDUCE_INPUT_LIMIT = -1L; long reduce_input_limit = -1L; private static float DEFAULT_COMPLETED_MAPS_PERCENT_FOR_REDUCE_SLOWSTART = 0.05f; int completedMapsForReduceSlowstart = 0; // runningMapTasks include speculative tasks, so we need to capture // speculative tasks separately int speculativeMapTasks = 0; int speculativeReduceTasks = 0; final int mapFailuresPercent; final int reduceFailuresPercent; int failedMapTIPs = 0; int failedReduceTIPs = 0; private volatile boolean launchedCleanup = false; private volatile boolean launchedSetup = false; private volatile boolean jobKilled = false; private volatile boolean jobFailed = false; JobPriority priority = JobPriority.NORMAL; final JobTracker jobtracker; protected Credentials tokenStorage; // NetworkTopology Node to the set of TIPs Map<Node, List<TaskInProgress>> nonRunningMapCache; // Map of NetworkTopology Node to set of running TIPs Map<Node, Set<TaskInProgress>> runningMapCache; // A list of non-local, non-running maps final List<TaskInProgress> nonLocalMaps; // Set of failed, non-running maps sorted by #failures final SortedSet<TaskInProgress> failedMaps; // A set of non-local running maps Set<TaskInProgress> nonLocalRunningMaps; // A list of non-running reduce TIPs Set<TaskInProgress> nonRunningReduces; // A set of running reduce TIPs Set<TaskInProgress> runningReduces; // A list of cleanup tasks for the map task attempts, to be launched List<TaskAttemptID> mapCleanupTasks = new LinkedList<TaskAttemptID>(); // A list of cleanup tasks for the reduce task attempts, to be launched List<TaskAttemptID> reduceCleanupTasks = new LinkedList<TaskAttemptID>(); // keep failedMaps, nonRunningReduces ordered by failure count to bias // scheduling toward failing tasks private static final Comparator<TaskInProgress> failComparator = new Comparator<TaskInProgress>() { @Override public int compare(TaskInProgress t1, TaskInProgress t2) { if (t1 == null) return -1; if (t2 == null) return 1; int failures = t2.numTaskFailures() - t1.numTaskFailures(); return (failures == 0) ? (t1.getTIPId().getId() - t2.getTIPId().getId()) : failures; } }; private final int maxLevel; /** * A special value indicating that * {@link #findNewMapTask(TaskTrackerStatus, int, int, int, double)} should * schedule any available map tasks for this job, including speculative tasks. */ private final int anyCacheLevel; /** * Number of scheduling opportunities (heartbeats) given to this Job */ private volatile long numSchedulingOpportunities; static String LOCALITY_WAIT_FACTOR = "mapreduce.job.locality.wait.factor"; static final float DEFAULT_LOCALITY_WAIT_FACTOR = 1.0f; /** * Percentage of the cluster the job is willing to wait to get better locality */ private float localityWaitFactor = 1.0f; /** * A special value indicating that * {@link #findNewMapTask(TaskTrackerStatus, int, int, int, double)} should * schedule any only off-switch and speculative map tasks for this job. */ private static final int NON_LOCAL_CACHE_LEVEL = -1; private int taskCompletionEventTracker = 0; List<TaskCompletionEvent> taskCompletionEvents; // The maximum percentage of trackers in cluster added to the 'blacklist'. private static final double CLUSTER_BLACKLIST_PERCENT = 0.25; // The maximum percentage of fetch failures allowed for a map private static final double MAX_ALLOWED_FETCH_FAILURES_PERCENT = 0.5; // No. of tasktrackers in the cluster private volatile int clusterSize = 0; // The no. of tasktrackers where >= conf.getMaxTaskFailuresPerTracker() // tasks have failed private volatile int flakyTaskTrackers = 0; // Map of trackerHostName -> no. of task failures private Map<String, Integer> trackerToFailuresMap = new TreeMap<String, Integer>(); //Confine estimation algorithms to an "oracle" class that JIP queries. private ResourceEstimator resourceEstimator; long startTime; long launchTime; long finishTime; // First *task launch time final Map<TaskType, Long> firstTaskLaunchTimes = new EnumMap<TaskType, Long>(TaskType.class); // Indicates how many times the job got restarted private final int restartCount; private JobConf conf; volatile boolean tasksInited = false; private JobInitKillStatus jobInitKillStatus = new JobInitKillStatus(); private LocalFileSystem localFs; private FileSystem fs; private JobID jobId; volatile private boolean hasSpeculativeMaps; volatile private boolean hasSpeculativeReduces; private long inputLength = 0; private String submitHostName; private String submitHostAddress; private String user; private String historyFile = ""; private boolean historyFileCopied; // Per-job counters public static enum Counter { NUM_FAILED_MAPS, NUM_FAILED_REDUCES, TOTAL_LAUNCHED_MAPS, TOTAL_LAUNCHED_REDUCES, OTHER_LOCAL_MAPS, DATA_LOCAL_MAPS, RACK_LOCAL_MAPS, SLOTS_MILLIS_MAPS, SLOTS_MILLIS_REDUCES, FALLOW_SLOTS_MILLIS_MAPS, FALLOW_SLOTS_MILLIS_REDUCES } private Counters jobCounters = new Counters(); // Maximum no. of fetch-failure notifications after which // the map task is killed private static final int MAX_FETCH_FAILURES_NOTIFICATIONS = 3; // Map of mapTaskId -> no. of fetch failures private Map<TaskAttemptID, Integer> mapTaskIdToFetchFailuresMap = new TreeMap<TaskAttemptID, Integer>(); private Object schedulingInfo; private static class FallowSlotInfo { long timestamp; int numSlots; public FallowSlotInfo(long timestamp, int numSlots) { this.timestamp = timestamp; this.numSlots = numSlots; } public long getTimestamp() { return timestamp; } public void setTimestamp(long timestamp) { this.timestamp = timestamp; } public int getNumSlots() { return numSlots; } public void setNumSlots(int numSlots) { this.numSlots = numSlots; } } private Map<TaskTracker, FallowSlotInfo> trackersReservedForMaps = new HashMap<TaskTracker, FallowSlotInfo>(); private Map<TaskTracker, FallowSlotInfo> trackersReservedForReduces = new HashMap<TaskTracker, FallowSlotInfo>(); private Path jobSubmitDir = null; final private UserGroupInformation userUGI; /** * Create an almost empty JobInProgress, which can be used only for tests */ protected JobInProgress(JobID jobid, JobConf conf, JobTracker tracker) throws IOException { this.conf = conf; this.jobId = jobid; this.numMapTasks = conf.getNumMapTasks(); this.numReduceTasks = conf.getNumReduceTasks(); this.maxLevel = NetworkTopology.DEFAULT_HOST_LEVEL; this.anyCacheLevel = this.maxLevel + 1; this.jobtracker = tracker; this.restartCount = 0; hasSpeculativeMaps = conf.getMapSpeculativeExecution(); hasSpeculativeReduces = conf.getReduceSpeculativeExecution(); this.nonLocalMaps = new LinkedList<TaskInProgress>(); this.failedMaps = new TreeSet<TaskInProgress>(failComparator); this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>(); this.runningMapCache = new IdentityHashMap<Node, Set<TaskInProgress>>(); this.nonRunningReduces = new TreeSet<TaskInProgress>(failComparator); this.runningReduces = new LinkedHashSet<TaskInProgress>(); this.resourceEstimator = new ResourceEstimator(this); this.status = new JobStatus(jobid, 0.0f, 0.0f, JobStatus.PREP); this.status.setUsername(conf.getUser()); String queueName = conf.getQueueName(); this.profile = new JobProfile(conf.getUser(), jobid, "", "", conf.getJobName(), queueName); this.memoryPerMap = conf.getMemoryForMapTask(); this.memoryPerReduce = conf.getMemoryForReduceTask(); this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker(); this.mapFailuresPercent = conf.getMaxMapTaskFailuresPercent(); this.reduceFailuresPercent = conf.getMaxReduceTaskFailuresPercent(); Queue queue = this.jobtracker.getQueueManager().getQueue(queueName); if (queue == null) { throw new IOException("Queue \"" + queueName + "\" does not exist"); } this.queueMetrics = queue.getMetrics(); // Check task limits checkTaskLimits(); this.taskCompletionEvents = new ArrayList<TaskCompletionEvent>(numMapTasks + numReduceTasks + 10); try { this.userUGI = UserGroupInformation.getCurrentUser(); } catch (IOException ie) { throw new RuntimeException(ie); } } JobInProgress(JobTracker jobtracker, final JobConf default_conf, JobInfo jobInfo, int rCount, Credentials ts) throws IOException, InterruptedException { try { this.restartCount = rCount; this.jobId = JobID.downgrade(jobInfo.getJobID()); String url = "http://" + jobtracker.getJobTrackerMachine() + ":" + jobtracker.getInfoPort() + "/jobdetails.jsp?jobid=" + jobId; this.jobtracker = jobtracker; this.status = new JobStatus(jobId, 0.0f, 0.0f, JobStatus.PREP); this.status.setUsername(jobInfo.getUser().toString()); this.jobtracker.getInstrumentation().addPrepJob(conf, jobId); // Add the queue-level metric below (after the profile has been initialized) this.startTime = jobtracker.getClock().getTime(); status.setStartTime(startTime); this.localFs = jobtracker.getLocalFileSystem(); this.tokenStorage = ts; // use the user supplied token to add user credentials to the conf jobSubmitDir = jobInfo.getJobSubmitDir(); user = jobInfo.getUser().toString(); userUGI = UserGroupInformation.createRemoteUser(user); if (ts != null) { for (Token<? extends TokenIdentifier> token : ts.getAllTokens()) { userUGI.addToken(token); } } fs = userUGI.doAs(new PrivilegedExceptionAction<FileSystem>() { public FileSystem run() throws IOException { return jobSubmitDir.getFileSystem(default_conf); } }); /** check for the size of jobconf **/ Path submitJobFile = JobSubmissionFiles.getJobConfPath(jobSubmitDir); FileStatus fstatus = fs.getFileStatus(submitJobFile); if (fstatus.getLen() > jobtracker.MAX_JOBCONF_SIZE) { throw new IOException("Exceeded max jobconf size: " + fstatus.getLen() + " limit: " + jobtracker.MAX_JOBCONF_SIZE); } this.localJobFile = default_conf.getLocalPath(JobTracker.SUBDIR + "/" + jobId + ".xml"); Path jobFilePath = JobSubmissionFiles.getJobConfPath(jobSubmitDir); jobFile = jobFilePath.toString(); fs.copyToLocalFile(jobFilePath, localJobFile); conf = new JobConf(localJobFile); if (conf.getUser() == null) { this.conf.setUser(user); } if (!conf.getUser().equals(user)) { String desc = "The username " + conf.getUser() + " obtained from the " + "conf doesn't match the username " + user + " the user " + "authenticated as"; AuditLogger.logFailure(user, Operation.SUBMIT_JOB.name(), conf.getUser(), jobId.toString(), desc); throw new IOException(desc); } this.priority = conf.getJobPriority(); this.status.setJobPriority(this.priority); String queueName = conf.getQueueName(); this.profile = new JobProfile(user, jobId, jobFile, url, conf.getJobName(), queueName); Queue queue = this.jobtracker.getQueueManager().getQueue(queueName); if (queue == null) { throw new IOException("Queue \"" + queueName + "\" does not exist"); } this.queueMetrics = queue.getMetrics(); this.queueMetrics.addPrepJob(conf, jobId); this.submitHostName = conf.getJobSubmitHostName(); this.submitHostAddress = conf.getJobSubmitHostAddress(); this.numMapTasks = conf.getNumMapTasks(); this.numReduceTasks = conf.getNumReduceTasks(); this.memoryPerMap = conf.getMemoryForMapTask(); this.memoryPerReduce = conf.getMemoryForReduceTask(); this.taskCompletionEvents = new ArrayList<TaskCompletionEvent>(numMapTasks + numReduceTasks + 10); // Construct the jobACLs status.setJobACLs(jobtracker.getJobACLsManager().constructJobACLs(conf)); this.mapFailuresPercent = conf.getMaxMapTaskFailuresPercent(); this.reduceFailuresPercent = conf.getMaxReduceTaskFailuresPercent(); this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker(); hasSpeculativeMaps = conf.getMapSpeculativeExecution(); hasSpeculativeReduces = conf.getReduceSpeculativeExecution(); // a limit on the input size of the reduce. // we check to see if the estimated input size of // of each reduce is less than this value. If not // we fail the job. A value of -1 just means there is no // limit set. reduce_input_limit = -1L; this.maxLevel = jobtracker.getNumTaskCacheLevels(); this.anyCacheLevel = this.maxLevel + 1; this.nonLocalMaps = new LinkedList<TaskInProgress>(); this.failedMaps = new TreeSet<TaskInProgress>(failComparator); this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>(); this.runningMapCache = new IdentityHashMap<Node, Set<TaskInProgress>>(); this.nonRunningReduces = new TreeSet<TaskInProgress>(failComparator); this.runningReduces = new LinkedHashSet<TaskInProgress>(); this.resourceEstimator = new ResourceEstimator(this); this.reduce_input_limit = conf.getLong("mapreduce.reduce.input.limit", DEFAULT_REDUCE_INPUT_LIMIT); // register job's tokens for renewal DelegationTokenRenewal.registerDelegationTokensForRenewal(jobInfo.getJobID(), ts, jobtracker.getConf()); // Check task limits checkTaskLimits(); } finally { //close all FileSystems that was created above for the current user //At this point, this constructor is called in the context of an RPC, and //hence the "current user" is actually referring to the kerberos //authenticated user (if security is ON). FileSystem.closeAllForUGI(UserGroupInformation.getCurrentUser()); } } /** * Get the QueueMetrics object associated with this job * @return QueueMetrics */ public QueueMetrics getQueueMetrics() { return this.queueMetrics; } private void checkTaskLimits() throws IOException { // if the number of tasks is larger than a configured value // then fail the job. int maxTasks = jobtracker.getMaxTasksPerJob(); LOG.info(jobId + ": nMaps=" + numMapTasks + " nReduces=" + numReduceTasks + " max=" + maxTasks); if (maxTasks > 0 && (numMapTasks + numReduceTasks) > maxTasks) { throw new IOException("The number of tasks for this job " + (numMapTasks + numReduceTasks) + " exceeds the configured limit " + maxTasks); } } /** * Called when the job is complete */ public void cleanUpMetrics() { // per job metrics is disabled for now. } private void printCache(Map<Node, List<TaskInProgress>> cache) { LOG.info("The taskcache info:"); for (Map.Entry<Node, List<TaskInProgress>> n : cache.entrySet()) { List<TaskInProgress> tips = n.getValue(); LOG.info("Cached TIPs on node: " + n.getKey()); for (TaskInProgress tip : tips) { LOG.info("tip : " + tip.getTIPId()); } } } private Map<Node, List<TaskInProgress>> createCache(TaskSplitMetaInfo[] splits, int maxLevel) throws UnknownHostException { Map<Node, List<TaskInProgress>> cache = new IdentityHashMap<Node, List<TaskInProgress>>(maxLevel); Set<String> uniqueHosts = new TreeSet<String>(); for (int i = 0; i < splits.length; i++) { String[] splitLocations = splits[i].getLocations(); if (splitLocations == null || splitLocations.length == 0) { nonLocalMaps.add(maps[i]); continue; } for (String host : splitLocations) { Node node = jobtracker.resolveAndAddToTopology(host); uniqueHosts.add(host); LOG.info("tip:" + maps[i].getTIPId() + " has split on node:" + node); for (int j = 0; j < maxLevel; j++) { List<TaskInProgress> hostMaps = cache.get(node); if (hostMaps == null) { hostMaps = new ArrayList<TaskInProgress>(); cache.put(node, hostMaps); hostMaps.add(maps[i]); } //check whether the hostMaps already contains an entry for a TIP //This will be true for nodes that are racks and multiple nodes in //the rack contain the input for a tip. Note that if it already //exists in the hostMaps, it must be the last element there since //we process one TIP at a time sequentially in the split-size order if (hostMaps.get(hostMaps.size() - 1) != maps[i]) { hostMaps.add(maps[i]); } node = node.getParent(); } } } // Calibrate the localityWaitFactor - Do not override user intent! if (localityWaitFactor == DEFAULT_LOCALITY_WAIT_FACTOR) { int jobNodes = uniqueHosts.size(); int clusterNodes = jobtracker.getNumberOfUniqueHosts(); if (clusterNodes > 0) { localityWaitFactor = Math.min((float) jobNodes / clusterNodes, localityWaitFactor); } LOG.info(jobId + " LOCALITY_WAIT_FACTOR=" + localityWaitFactor); } return cache; } /** * Check if the job has been initialized. * @return <code>true</code> if the job has been initialized, * <code>false</code> otherwise */ public boolean inited() { return tasksInited; } /** * Get the user for the job */ public String getUser() { return user; } boolean hasRestarted() { return restartCount > 0; } boolean getMapSpeculativeExecution() { return hasSpeculativeMaps; } boolean getReduceSpeculativeExecution() { return hasSpeculativeReduces; } long getMemoryForMapTask() { return memoryPerMap; } long getMemoryForReduceTask() { return memoryPerReduce; } /** * Get the number of slots required to run a single map task-attempt. * @return the number of slots required to run a single map task-attempt */ int getNumSlotsPerMap() { return numSlotsPerMap; } /** * Set the number of slots required to run a single map task-attempt. * This is typically set by schedulers which support high-ram jobs. * @param slots the number of slots required to run a single map task-attempt */ void setNumSlotsPerMap(int numSlotsPerMap) { this.numSlotsPerMap = numSlotsPerMap; } /** * Get the number of slots required to run a single reduce task-attempt. * @return the number of slots required to run a single reduce task-attempt */ int getNumSlotsPerReduce() { return numSlotsPerReduce; } /** * Set the number of slots required to run a single reduce task-attempt. * This is typically set by schedulers which support high-ram jobs. * @param slots the number of slots required to run a single reduce * task-attempt */ void setNumSlotsPerReduce(int numSlotsPerReduce) { this.numSlotsPerReduce = numSlotsPerReduce; } /** * Construct the splits, etc. This is invoked from an async * thread so that split-computation doesn't block anyone. */ public synchronized void initTasks() throws IOException, KillInterruptedException, UnknownHostException { if (tasksInited || isComplete()) { return; } synchronized (jobInitKillStatus) { if (jobInitKillStatus.killed || jobInitKillStatus.initStarted) { return; } jobInitKillStatus.initStarted = true; } LOG.info("Initializing " + jobId); final long startTimeFinal = this.startTime; // log job info as the user running the job try { userUGI.doAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { JobHistory.JobInfo.logSubmitted(getJobID(), conf, jobFile, startTimeFinal, hasRestarted()); return null; } }); } catch (InterruptedException ie) { throw new IOException(ie); } // log the job priority setPriority(this.priority); // // generate security keys needed by Tasks // generateAndStoreTokens(); // // read input splits and create a map per a split // TaskSplitMetaInfo[] splits = createSplits(jobId); if (numMapTasks != splits.length) { throw new IOException("Number of maps in JobConf doesn't match number of " + "recieved splits for job " + jobId + "! " + "numMapTasks=" + numMapTasks + ", #splits=" + splits.length); } numMapTasks = splits.length; // Sanity check the locations so we don't create/initialize unnecessary tasks for (TaskSplitMetaInfo split : splits) { NetUtils.verifyHostnames(split.getLocations()); } jobtracker.getInstrumentation().addWaitingMaps(getJobID(), numMapTasks); jobtracker.getInstrumentation().addWaitingReduces(getJobID(), numReduceTasks); this.queueMetrics.addWaitingMaps(getJobID(), numMapTasks); this.queueMetrics.addWaitingReduces(getJobID(), numReduceTasks); maps = new TaskInProgress[numMapTasks]; for (int i = 0; i < numMapTasks; ++i) { inputLength += splits[i].getInputDataLength(); maps[i] = new TaskInProgress(jobId, jobFile, splits[i], jobtracker, conf, this, i, numSlotsPerMap); } LOG.info("Input size for job " + jobId + " = " + inputLength + ". Number of splits = " + splits.length); // Set localityWaitFactor before creating cache localityWaitFactor = conf.getFloat(LOCALITY_WAIT_FACTOR, DEFAULT_LOCALITY_WAIT_FACTOR); if (numMapTasks > 0) { nonRunningMapCache = createCache(splits, maxLevel); } // set the launch time this.launchTime = jobtracker.getClock().getTime(); // // Create reduce tasks // this.reduces = new TaskInProgress[numReduceTasks]; for (int i = 0; i < numReduceTasks; i++) { reduces[i] = new TaskInProgress(jobId, jobFile, numMapTasks, i, jobtracker, conf, this, numSlotsPerReduce); nonRunningReduces.add(reduces[i]); } // Calculate the minimum number of maps to be complete before // we should start scheduling reduces completedMapsForReduceSlowstart = (int) Math.ceil((conf.getFloat("mapred.reduce.slowstart.completed.maps", DEFAULT_COMPLETED_MAPS_PERCENT_FOR_REDUCE_SLOWSTART) * numMapTasks)); // ... use the same for estimating the total output of all maps resourceEstimator.setThreshhold(completedMapsForReduceSlowstart); // create cleanup two cleanup tips, one map and one reduce. cleanup = new TaskInProgress[2]; // cleanup map tip. This map doesn't use any splits. Just assign an empty // split. TaskSplitMetaInfo emptySplit = JobSplit.EMPTY_TASK_SPLIT; cleanup[0] = new TaskInProgress(jobId, jobFile, emptySplit, jobtracker, conf, this, numMapTasks, 1); cleanup[0].setJobCleanupTask(); // cleanup reduce tip. cleanup[1] = new TaskInProgress(jobId, jobFile, numMapTasks, numReduceTasks, jobtracker, conf, this, 1); cleanup[1].setJobCleanupTask(); // create two setup tips, one map and one reduce. setup = new TaskInProgress[2]; // setup map tip. This map doesn't use any split. Just assign an empty // split. setup[0] = new TaskInProgress(jobId, jobFile, emptySplit, jobtracker, conf, this, numMapTasks + 1, 1); setup[0].setJobSetupTask(); // setup reduce tip. setup[1] = new TaskInProgress(jobId, jobFile, numMapTasks, numReduceTasks + 1, jobtracker, conf, this, 1); setup[1].setJobSetupTask(); synchronized (jobInitKillStatus) { jobInitKillStatus.initDone = true; // set this before the throw to make sure cleanup works properly tasksInited = true; if (jobInitKillStatus.killed) { throw new KillInterruptedException("Job " + jobId + " killed in init"); } } JobHistory.JobInfo.logInited(profile.getJobID(), this.launchTime, numMapTasks, numReduceTasks); // Log the number of map and reduce tasks LOG.info("Job " + jobId + " initialized successfully with " + numMapTasks + " map tasks and " + numReduceTasks + " reduce tasks."); } TaskSplitMetaInfo[] createSplits(org.apache.hadoop.mapreduce.JobID jobId) throws IOException { TaskSplitMetaInfo[] allTaskSplitMetaInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, jobtracker.getConf(), jobSubmitDir); return allTaskSplitMetaInfo; } ///////////////////////////////////////////////////// // Accessors for the JobInProgress ///////////////////////////////////////////////////// public JobProfile getProfile() { return profile; } public JobStatus getStatus() { return status; } public synchronized long getLaunchTime() { return launchTime; } Map<TaskType, Long> getFirstTaskLaunchTimes() { return firstTaskLaunchTimes; } public long getStartTime() { return startTime; } public long getFinishTime() { return finishTime; } public int desiredMaps() { return numMapTasks; } public synchronized int finishedMaps() { return finishedMapTasks; } public int desiredReduces() { return numReduceTasks; } public synchronized int runningMaps() { return runningMapTasks; } public synchronized int runningReduces() { return runningReduceTasks; } public synchronized int finishedReduces() { return finishedReduceTasks; } public synchronized int pendingMaps() { return numMapTasks - runningMapTasks - failedMapTIPs - finishedMapTasks + speculativeMapTasks; } public synchronized int pendingReduces() { return numReduceTasks - runningReduceTasks - failedReduceTIPs - finishedReduceTasks + speculativeReduceTasks; } /** * Return total number of map and reduce tasks desired by the job. * @return total number of map and reduce tasks desired by the job */ public int desiredTasks() { return desiredMaps() + desiredReduces(); } public int getNumSlotsPerTask(TaskType taskType) { if (taskType == TaskType.MAP) { return numSlotsPerMap; } else if (taskType == TaskType.REDUCE) { return numSlotsPerReduce; } else { return 1; } } public JobPriority getPriority() { return this.priority; } public void setPriority(JobPriority priority) { if (priority == null) { this.priority = JobPriority.NORMAL; } else { this.priority = priority; } synchronized (this) { status.setJobPriority(priority); } // log and change to the job's priority JobHistory.JobInfo.logJobPriority(jobId, priority); } // Update the job start/launch time (upon restart) and log to history synchronized void updateJobInfo(long startTime, long launchTime) { // log and change to the job's start/launch time this.startTime = startTime; this.launchTime = launchTime; JobHistory.JobInfo.logJobInfo(jobId, startTime, launchTime); } /** * Get the number of times the job has restarted */ int getNumRestarts() { return restartCount; } long getInputLength() { return inputLength; } boolean isCleanupLaunched() { return launchedCleanup; } boolean isSetupLaunched() { return launchedSetup; } /** * Get all the tasks of the desired type in this job. * @param type {@link TaskType} of the tasks required * @return An array of {@link TaskInProgress} matching the given type. * Returns an empty array if no tasks are found for the given type. */ TaskInProgress[] getTasks(TaskType type) { TaskInProgress[] tasks = null; switch (type) { case MAP: { tasks = maps; } break; case REDUCE: { tasks = reduces; } break; case JOB_SETUP: { tasks = setup; } break; case JOB_CLEANUP: { tasks = cleanup; } break; default: { tasks = new TaskInProgress[0]; } break; } return tasks; } /** * Return the nonLocalRunningMaps * @return */ Set<TaskInProgress> getNonLocalRunningMaps() { return nonLocalRunningMaps; } /** * Return the runningMapCache * @return */ Map<Node, Set<TaskInProgress>> getRunningMapCache() { return runningMapCache; } /** * Return runningReduces * @return */ Set<TaskInProgress> getRunningReduces() { return runningReduces; } /** * Get the job configuration * @return the job's configuration */ JobConf getJobConf() { return conf; } /** * Return a vector of completed TaskInProgress objects */ public synchronized Vector<TaskInProgress> reportTasksInProgress(boolean shouldBeMap, boolean shouldBeComplete) { Vector<TaskInProgress> results = new Vector<TaskInProgress>(); TaskInProgress tips[] = null; if (shouldBeMap) { tips = maps; } else { tips = reduces; } for (int i = 0; i < tips.length; i++) { if (tips[i].isComplete() == shouldBeComplete) { results.add(tips[i]); } } return results; } /** * Return a vector of cleanup TaskInProgress objects */ public synchronized Vector<TaskInProgress> reportCleanupTIPs(boolean shouldBeComplete) { Vector<TaskInProgress> results = new Vector<TaskInProgress>(); for (int i = 0; i < cleanup.length; i++) { if (cleanup[i].isComplete() == shouldBeComplete) { results.add(cleanup[i]); } } return results; } /** * Return a vector of setup TaskInProgress objects */ public synchronized Vector<TaskInProgress> reportSetupTIPs(boolean shouldBeComplete) { Vector<TaskInProgress> results = new Vector<TaskInProgress>(); for (int i = 0; i < setup.length; i++) { if (setup[i].isComplete() == shouldBeComplete) { results.add(setup[i]); } } return results; } //////////////////////////////////////////////////// // Status update methods //////////////////////////////////////////////////// /** * Assuming {@link JobTracker} is locked on entry. */ public synchronized void updateTaskStatus(TaskInProgress tip, TaskStatus status) { double oldProgress = tip.getProgress(); // save old progress boolean wasRunning = tip.isRunning(); boolean wasComplete = tip.isComplete(); boolean wasPending = tip.isOnlyCommitPending(); TaskAttemptID taskid = status.getTaskID(); boolean wasAttemptRunning = tip.isAttemptRunning(taskid); // If the TIP is already completed and the task reports as SUCCEEDED then // mark the task as KILLED. // In case of task with no promotion the task tracker will mark the task // as SUCCEEDED. // User has requested to kill the task, but TT reported SUCCEEDED, // mark the task KILLED. if ((wasComplete || tip.wasKilled(taskid)) && (status.getRunState() == TaskStatus.State.SUCCEEDED)) { status.setRunState(TaskStatus.State.KILLED); } // If the job is complete and a task has just reported its // state as FAILED_UNCLEAN/KILLED_UNCLEAN, // make the task's state FAILED/KILLED without launching cleanup attempt. // Note that if task is already a cleanup attempt, // we don't change the state to make sure the task gets a killTaskAction if ((this.isComplete() || jobFailed || jobKilled) && !tip.isCleanupAttempt(taskid)) { if (status.getRunState() == TaskStatus.State.FAILED_UNCLEAN) { status.setRunState(TaskStatus.State.FAILED); } else if (status.getRunState() == TaskStatus.State.KILLED_UNCLEAN) { status.setRunState(TaskStatus.State.KILLED); } } boolean change = tip.updateStatus(status); if (change) { TaskStatus.State state = status.getRunState(); // get the TaskTrackerStatus where the task ran TaskTracker taskTracker = this.jobtracker.getTaskTracker(tip.machineWhereTaskRan(taskid)); TaskTrackerStatus ttStatus = (taskTracker == null) ? null : taskTracker.getStatus(); String httpTaskLogLocation = null; if (null != ttStatus) { String host; if (NetUtils.getStaticResolution(ttStatus.getHost()) != null) { host = NetUtils.getStaticResolution(ttStatus.getHost()); } else { host = ttStatus.getHost(); } httpTaskLogLocation = "http://" + host + ":" + ttStatus.getHttpPort(); //+ "/tasklog?plaintext=true&attemptid=" + status.getTaskID(); } TaskCompletionEvent taskEvent = null; if (state == TaskStatus.State.SUCCEEDED) { taskEvent = new TaskCompletionEvent(taskCompletionEventTracker, taskid, tip.idWithinJob(), status.getIsMap() && !tip.isJobCleanupTask() && !tip.isJobSetupTask(), TaskCompletionEvent.Status.SUCCEEDED, httpTaskLogLocation); taskEvent.setTaskRunTime((int) (status.getFinishTime() - status.getStartTime())); tip.setSuccessEventNumber(taskCompletionEventTracker); } else if (state == TaskStatus.State.COMMIT_PENDING) { // If it is the first attempt reporting COMMIT_PENDING // ask the task to commit. if (!wasComplete && !wasPending) { tip.doCommit(taskid); } return; } else if (state == TaskStatus.State.FAILED_UNCLEAN || state == TaskStatus.State.KILLED_UNCLEAN) { tip.incompleteSubTask(taskid, this.status); // add this task, to be rescheduled as cleanup attempt if (tip.isMapTask()) { mapCleanupTasks.add(taskid); } else { reduceCleanupTasks.add(taskid); } // Remove the task entry from jobtracker jobtracker.removeTaskEntry(taskid); } //For a failed task update the JT datastructures. else if (state == TaskStatus.State.FAILED || state == TaskStatus.State.KILLED) { // Get the event number for the (possibly) previously successful // task. If there exists one, then set that status to OBSOLETE int eventNumber; if ((eventNumber = tip.getSuccessEventNumber()) != -1) { TaskCompletionEvent t = this.taskCompletionEvents.get(eventNumber); if (t.getTaskAttemptId().equals(taskid)) t.setTaskStatus(TaskCompletionEvent.Status.OBSOLETE); } // Tell the job to fail the relevant task failedTask(tip, taskid, status, taskTracker, wasRunning, wasComplete, wasAttemptRunning); // Did the task failure lead to tip failure? TaskCompletionEvent.Status taskCompletionStatus = (state == TaskStatus.State.FAILED) ? TaskCompletionEvent.Status.FAILED : TaskCompletionEvent.Status.KILLED; if (tip.isFailed()) { taskCompletionStatus = TaskCompletionEvent.Status.TIPFAILED; } taskEvent = new TaskCompletionEvent(taskCompletionEventTracker, taskid, tip.idWithinJob(), status.getIsMap() && !tip.isJobCleanupTask() && !tip.isJobSetupTask(), taskCompletionStatus, httpTaskLogLocation); } // Add the 'complete' task i.e. successful/failed // It _is_ safe to add the TaskCompletionEvent.Status.SUCCEEDED // *before* calling TIP.completedTask since: // a. One and only one task of a TIP is declared as a SUCCESS, the // other (speculative tasks) are marked KILLED by the TaskCommitThread // b. TIP.completedTask *does not* throw _any_ exception at all. if (taskEvent != null) { this.taskCompletionEvents.add(taskEvent); taskCompletionEventTracker++; JobTrackerStatistics.TaskTrackerStat ttStat = jobtracker.getStatistics() .getTaskTrackerStat(tip.machineWhereTaskRan(taskid)); if (ttStat != null) { // ttStat can be null in case of lost tracker ttStat.incrTotalTasks(); } if (state == TaskStatus.State.SUCCEEDED) { completedTask(tip, status); if (ttStat != null) { ttStat.incrSucceededTasks(); } } } } // // Update JobInProgress status // if (LOG.isDebugEnabled()) { LOG.debug( "Taking progress for " + tip.getTIPId() + " from " + oldProgress + " to " + tip.getProgress()); } if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) { double progressDelta = tip.getProgress() - oldProgress; if (tip.isMapTask()) { this.status.setMapProgress((float) (this.status.mapProgress() + progressDelta / maps.length)); } else { this.status.setReduceProgress( (float) (this.status.reduceProgress() + (progressDelta / reduces.length))); } } } String getHistoryFile() { return historyFile; } synchronized void setHistoryFile(String file) { this.historyFile = file; } /** * Returns the job-level counters. * * @return the job-level counters. */ public synchronized Counters getJobCounters() { return jobCounters; } /** * Returns map phase counters by summing over all map tasks in progress. * This method returns true if counters are within limit or false. */ public synchronized boolean getMapCounters(Counters counters) { try { counters = incrementTaskCounters(counters, maps); } catch (CountersExceededException ce) { LOG.info("Counters Exceeded for Job: " + jobId, ce); return false; } return true; } /** * Returns map phase counters by summing over all map tasks in progress. * This method returns true if counters are within limits and false otherwise. */ public synchronized boolean getReduceCounters(Counters counters) { try { counters = incrementTaskCounters(counters, reduces); } catch (CountersExceededException ce) { LOG.info("Counters Exceeded for Job: " + jobId, ce); return false; } return true; } /** * Returns the total job counters, by adding together the job, * the map and the reduce counters. This method returns true if * counters are within limits and false otherwise. */ public synchronized boolean getCounters(Counters result) { try { result.incrAllCounters(getJobCounters()); incrementTaskCounters(result, maps); incrementTaskCounters(result, reduces); } catch (CountersExceededException ce) { LOG.info("Counters Exceeded for Job: " + jobId, ce); return false; } return true; } /** * Increments the counters with the counters from each task. * @param counters the counters to increment * @param tips the tasks to add in to counters * @return counters the same object passed in as counters */ private Counters incrementTaskCounters(Counters counters, TaskInProgress[] tips) { for (TaskInProgress tip : tips) { counters.incrAllCounters(tip.getCounters()); } return counters; } ///////////////////////////////////////////////////// // Create/manage tasks ///////////////////////////////////////////////////// /** * Return a MapTask, if appropriate, to run on the given tasktracker */ public synchronized Task obtainNewMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts) throws IOException { if (status.getRunState() != JobStatus.RUNNING) { LOG.info("Cannot create task split for " + profile.getJobID()); try { throw new IOException("state = " + status.getRunState()); } catch (IOException ioe) { ioe.printStackTrace(); } return null; } int target = findNewMapTask(tts, clusterSize, numUniqueHosts, anyCacheLevel, status.mapProgress()); if (target == -1) { return null; } Task result = maps[target].getTaskToRun(tts.getTrackerName()); if (result != null) { addRunningTaskToTIP(maps[target], result.getTaskID(), tts, true); resetSchedulingOpportunities(); } return result; } /* * Return task cleanup attempt if any, to run on a given tracker */ public Task obtainTaskCleanupTask(TaskTrackerStatus tts, boolean isMapSlot) throws IOException { if (!tasksInited) { return null; } synchronized (this) { if (this.status.getRunState() != JobStatus.RUNNING || jobFailed || jobKilled) { return null; } String taskTracker = tts.getTrackerName(); if (!shouldRunOnTaskTracker(taskTracker)) { return null; } TaskAttemptID taskid = null; TaskInProgress tip = null; if (isMapSlot) { if (!mapCleanupTasks.isEmpty()) { taskid = mapCleanupTasks.remove(0); tip = maps[taskid.getTaskID().getId()]; } } else { if (!reduceCleanupTasks.isEmpty()) { taskid = reduceCleanupTasks.remove(0); tip = reduces[taskid.getTaskID().getId()]; } } if (tip != null) { return tip.addRunningTask(taskid, taskTracker, true); } return null; } } public synchronized Task obtainNewNodeLocalMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts) throws IOException { if (!tasksInited) { LOG.info("Cannot create task split for " + profile.getJobID()); try { throw new IOException("state = " + status.getRunState()); } catch (IOException ioe) { ioe.printStackTrace(); } return null; } int target = findNewMapTask(tts, clusterSize, numUniqueHosts, 1, status.mapProgress()); if (target == -1) { return null; } Task result = maps[target].getTaskToRun(tts.getTrackerName()); if (result != null) { addRunningTaskToTIP(maps[target], result.getTaskID(), tts, true); resetSchedulingOpportunities(); } return result; } public synchronized Task obtainNewNodeOrRackLocalMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts) throws IOException { if (!tasksInited) { LOG.info("Cannot create task split for " + profile.getJobID()); try { throw new IOException("state = " + status.getRunState()); } catch (IOException ioe) { ioe.printStackTrace(); } return null; } int target = findNewMapTask(tts, clusterSize, numUniqueHosts, maxLevel, status.mapProgress()); if (target == -1) { return null; } Task result = maps[target].getTaskToRun(tts.getTrackerName()); if (result != null) { addRunningTaskToTIP(maps[target], result.getTaskID(), tts, true); resetSchedulingOpportunities(); } return result; } public synchronized Task obtainNewNonLocalMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts) throws IOException { if (!tasksInited) { LOG.info("Cannot create task split for " + profile.getJobID()); try { throw new IOException("state = " + status.getRunState()); } catch (IOException ioe) { ioe.printStackTrace(); } return null; } int target = findNewMapTask(tts, clusterSize, numUniqueHosts, NON_LOCAL_CACHE_LEVEL, status.mapProgress()); if (target == -1) { return null; } Task result = maps[target].getTaskToRun(tts.getTrackerName()); if (result != null) { addRunningTaskToTIP(maps[target], result.getTaskID(), tts, true); // DO NOT reset for off-switch! } return result; } public void schedulingOpportunity() { ++numSchedulingOpportunities; } public void resetSchedulingOpportunities() { numSchedulingOpportunities = 0; } public long getNumSchedulingOpportunities() { return numSchedulingOpportunities; } private static final long OVERRIDE = 1000000; public void overrideSchedulingOpportunities() { numSchedulingOpportunities = OVERRIDE; } /** * Check if we can schedule an off-switch task for this job. * * @param numTaskTrackers number of tasktrackers * @return <code>true</code> if we can schedule off-switch, * <code>false</code> otherwise * We check the number of missed opportunities for the job. * If it has 'waited' long enough we go ahead and schedule. */ public boolean scheduleOffSwitch(int numTaskTrackers) { long missedTaskTrackers = getNumSchedulingOpportunities(); long requiredSlots = Math.min((desiredMaps() - finishedMaps()), numTaskTrackers); return (requiredSlots * localityWaitFactor) < missedTaskTrackers; } /** * Return a CleanupTask, if appropriate, to run on the given tasktracker * */ public Task obtainJobCleanupTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts, boolean isMapSlot) throws IOException { if (!tasksInited) { return null; } synchronized (this) { if (!canLaunchJobCleanupTask()) { return null; } String taskTracker = tts.getTrackerName(); // Update the last-known clusterSize this.clusterSize = clusterSize; if (!shouldRunOnTaskTracker(taskTracker)) { return null; } List<TaskInProgress> cleanupTaskList = new ArrayList<TaskInProgress>(); if (isMapSlot) { cleanupTaskList.add(cleanup[0]); } else { cleanupTaskList.add(cleanup[1]); } TaskInProgress tip = findTaskFromList(cleanupTaskList, tts, numUniqueHosts, false); if (tip == null) { return null; } // Now launch the cleanupTask Task result = tip.getTaskToRun(tts.getTrackerName()); if (result != null) { addRunningTaskToTIP(tip, result.getTaskID(), tts, true); if (jobFailed) { result.setJobCleanupTaskState(org.apache.hadoop.mapreduce.JobStatus.State.FAILED); } else if (jobKilled) { result.setJobCleanupTaskState(org.apache.hadoop.mapreduce.JobStatus.State.KILLED); } else { result.setJobCleanupTaskState(org.apache.hadoop.mapreduce.JobStatus.State.SUCCEEDED); } } return result; } } /** * Check whether cleanup task can be launched for the job. * * Cleanup task can be launched if it is not already launched * or job is Killed * or all maps and reduces are complete * @return true/false */ private synchronized boolean canLaunchJobCleanupTask() { // check if the job is running if (status.getRunState() != JobStatus.RUNNING && status.getRunState() != JobStatus.PREP) { return false; } // check if cleanup task has been launched already or if setup isn't // launched already. The later check is useful when number of maps is // zero. if (launchedCleanup || !isSetupFinished()) { return false; } // check if job has failed or killed if (jobKilled || jobFailed) { return true; } // Check if all maps and reducers have finished. boolean launchCleanupTask = ((finishedMapTasks + failedMapTIPs) == (numMapTasks)); if (launchCleanupTask) { launchCleanupTask = ((finishedReduceTasks + failedReduceTIPs) == numReduceTasks); } return launchCleanupTask; } /** * Return a SetupTask, if appropriate, to run on the given tasktracker * */ public Task obtainJobSetupTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts, boolean isMapSlot) throws IOException { if (!tasksInited) { return null; } synchronized (this) { if (!canLaunchSetupTask()) { return null; } String taskTracker = tts.getTrackerName(); // Update the last-known clusterSize this.clusterSize = clusterSize; if (!shouldRunOnTaskTracker(taskTracker)) { return null; } List<TaskInProgress> setupTaskList = new ArrayList<TaskInProgress>(); if (isMapSlot) { setupTaskList.add(setup[0]); } else { setupTaskList.add(setup[1]); } TaskInProgress tip = findTaskFromList(setupTaskList, tts, numUniqueHosts, false); if (tip == null) { return null; } // Now launch the setupTask Task result = tip.getTaskToRun(tts.getTrackerName()); if (result != null) { addRunningTaskToTIP(tip, result.getTaskID(), tts, true); } return result; } } public synchronized boolean scheduleReduces() { return finishedMapTasks >= completedMapsForReduceSlowstart; } /** * Check whether setup task can be launched for the job. * * Setup task can be launched after the tasks are inited * and Job is in PREP state * and if it is not already launched * or job is not Killed/Failed * @return true/false */ private synchronized boolean canLaunchSetupTask() { return (tasksInited && status.getRunState() == JobStatus.PREP && !launchedSetup && !jobKilled && !jobFailed); } /** * Return a ReduceTask, if appropriate, to run on the given tasktracker. * We don't have cache-sensitivity for reduce tasks, as they * work on temporary MapRed files. */ public synchronized Task obtainNewReduceTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts) throws IOException { if (status.getRunState() != JobStatus.RUNNING) { LOG.info("Cannot create task split for " + profile.getJobID()); return null; } /** check to see if we have any misbehaving reducers. If the expected output * for reducers is huge then we just fail the job and error out. The estimated * size is divided by 2 since the resource estimator returns the amount of disk * space the that the reduce will use (which is 2 times the input, space for merge + reduce * input). **/ long estimatedReduceInputSize = resourceEstimator.getEstimatedReduceInputSize() / 2; if (((estimatedReduceInputSize) > reduce_input_limit) && (reduce_input_limit > 0L)) { // make sure jobtracker lock is held LOG.info("Exceeded limit for reduce input size: Estimated:" + estimatedReduceInputSize + " Limit: " + reduce_input_limit + " Failing Job " + jobId); status.setFailureInfo("Job exceeded Reduce Input limit " + " Limit: " + reduce_input_limit + " Estimated: " + estimatedReduceInputSize); jobtracker.failJob(this); return null; } // Ensure we have sufficient map outputs ready to shuffle before // scheduling reduces if (!scheduleReduces()) { return null; } int target = findNewReduceTask(tts, clusterSize, numUniqueHosts, status.reduceProgress()); if (target == -1) { return null; } Task result = reduces[target].getTaskToRun(tts.getTrackerName()); if (result != null) { addRunningTaskToTIP(reduces[target], result.getTaskID(), tts, true); } return result; } // returns the (cache)level at which the nodes matches private int getMatchingLevelForNodes(Node n1, Node n2) { int count = 0; do { if (n1.equals(n2)) { return count; } ++count; n1 = n1.getParent(); n2 = n2.getParent(); } while (n1 != null); return this.maxLevel; } /** * Populate the data structures as a task is scheduled. * * Assuming {@link JobTracker} is locked on entry. * * @param tip The tip for which the task is added * @param id The attempt-id for the task * @param tts task-tracker status * @param isScheduled Whether this task is scheduled from the JT or has * joined back upon restart */ synchronized void addRunningTaskToTIP(TaskInProgress tip, TaskAttemptID id, TaskTrackerStatus tts, boolean isScheduled) { // Make an entry in the tip if the attempt is not scheduled i.e externally // added if (!isScheduled) { tip.addRunningTask(id, tts.getTrackerName()); } final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation(); // keeping the earlier ordering intact String name; String splits = ""; Enum counter = null; if (tip.isJobSetupTask()) { launchedSetup = true; name = Values.SETUP.name(); } else if (tip.isJobCleanupTask()) { launchedCleanup = true; name = Values.CLEANUP.name(); } else if (tip.isMapTask()) { ++runningMapTasks; name = Values.MAP.name(); counter = Counter.TOTAL_LAUNCHED_MAPS; splits = tip.getSplitNodes(); if (tip.getActiveTasks().size() > 1) speculativeMapTasks++; metrics.launchMap(id); this.queueMetrics.launchMap(id); } else { ++runningReduceTasks; name = Values.REDUCE.name(); counter = Counter.TOTAL_LAUNCHED_REDUCES; if (tip.getActiveTasks().size() > 1) speculativeReduceTasks++; metrics.launchReduce(id); this.queueMetrics.launchReduce(id); } // Note that the logs are for the scheduled tasks only. Tasks that join on // restart has already their logs in place. if (tip.isFirstAttempt(id)) { JobHistory.Task.logStarted(tip.getTIPId(), name, tip.getExecStartTime(), splits); setFirstTaskLaunchTime(tip); } if (!tip.isJobSetupTask() && !tip.isJobCleanupTask()) { jobCounters.incrCounter(counter, 1); } //TODO The only problem with these counters would be on restart. // The jobtracker updates the counter only when the task that is scheduled // if from a non-running tip and is local (data, rack ...). But upon restart // as the reports come from the task tracker, there is no good way to infer // when exactly to increment the locality counters. The only solution is to // increment the counters for all the tasks irrespective of // - whether the tip is running or not // - whether its a speculative task or not // // So to simplify, increment the data locality counter whenever there is // data locality. if (tip.isMapTask() && !tip.isJobSetupTask() && !tip.isJobCleanupTask()) { // increment the data locality counter for maps Node tracker = jobtracker.getNode(tts.getHost()); int level = this.maxLevel; // find the right level across split locations for (String local : maps[tip.getIdWithinJob()].getSplitLocations()) { Node datanode = jobtracker.getNode(local); int newLevel = this.maxLevel; if (tracker != null && datanode != null) { newLevel = getMatchingLevelForNodes(tracker, datanode); } if (newLevel < level) { level = newLevel; // an optimization if (level == 0) { break; } } } switch (level) { case 0: LOG.info("Choosing data-local task " + tip.getTIPId()); jobCounters.incrCounter(Counter.DATA_LOCAL_MAPS, 1); break; case 1: LOG.info("Choosing rack-local task " + tip.getTIPId()); jobCounters.incrCounter(Counter.RACK_LOCAL_MAPS, 1); break; default: // check if there is any locality if (level != this.maxLevel) { LOG.info("Choosing cached task at level " + level + tip.getTIPId()); jobCounters.incrCounter(Counter.OTHER_LOCAL_MAPS, 1); } break; } } } void setFirstTaskLaunchTime(TaskInProgress tip) { TaskType key = tip.getFirstTaskType(); synchronized (firstTaskLaunchTimes) { // Could be optimized to do only one lookup with a little more code if (!firstTaskLaunchTimes.containsKey(key)) { firstTaskLaunchTimes.put(key, tip.getExecStartTime()); } } } static String convertTrackerNameToHostName(String trackerName) { // Ugly! // Convert the trackerName to it's host name int indexOfColon = trackerName.indexOf(":"); String trackerHostName = (indexOfColon == -1) ? trackerName : trackerName.substring(0, indexOfColon); return trackerHostName.substring("tracker_".length()); } /** * Note that a task has failed on a given tracker and add the tracker * to the blacklist iff too many trackers in the cluster i.e. * (clusterSize * CLUSTER_BLACKLIST_PERCENT) haven't turned 'flaky' already. * * @param taskTracker task-tracker on which a task failed */ synchronized void addTrackerTaskFailure(String trackerName, TaskTracker taskTracker) { if (flakyTaskTrackers < (clusterSize * CLUSTER_BLACKLIST_PERCENT)) { String trackerHostName = convertTrackerNameToHostName(trackerName); Integer trackerFailures = trackerToFailuresMap.get(trackerHostName); if (trackerFailures == null) { trackerFailures = 0; } trackerToFailuresMap.put(trackerHostName, ++trackerFailures); // Check if this tasktracker has turned 'flaky' if (trackerFailures.intValue() == maxTaskFailuresPerTracker) { ++flakyTaskTrackers; // Cancel reservations if appropriate if (taskTracker != null) { if (trackersReservedForMaps.containsKey(taskTracker)) { taskTracker.unreserveSlots(TaskType.MAP, this); } if (trackersReservedForReduces.containsKey(taskTracker)) { taskTracker.unreserveSlots(TaskType.REDUCE, this); } } LOG.info("TaskTracker at '" + trackerHostName + "' turned 'flaky'"); } } } public synchronized void reserveTaskTracker(TaskTracker taskTracker, TaskType type, int numSlots) { Map<TaskTracker, FallowSlotInfo> map = (type == TaskType.MAP) ? trackersReservedForMaps : trackersReservedForReduces; long now = jobtracker.getClock().getTime(); FallowSlotInfo info = map.get(taskTracker); int reservedSlots = 0; if (info == null) { info = new FallowSlotInfo(now, numSlots); reservedSlots = numSlots; } else { // Increment metering info if the reservation is changing if (info.getNumSlots() != numSlots) { Enum<Counter> counter = (type == TaskType.MAP) ? Counter.FALLOW_SLOTS_MILLIS_MAPS : Counter.FALLOW_SLOTS_MILLIS_REDUCES; long fallowSlotMillis = (now - info.getTimestamp()) * info.getNumSlots(); jobCounters.incrCounter(counter, fallowSlotMillis); // Update reservedSlots = numSlots - info.getNumSlots(); info.setTimestamp(now); info.setNumSlots(numSlots); } } map.put(taskTracker, info); if (type == TaskType.MAP) { jobtracker.getInstrumentation().addReservedMapSlots(reservedSlots); this.queueMetrics.addReservedMapSlots(reservedSlots); } else { jobtracker.getInstrumentation().addReservedReduceSlots(reservedSlots); this.queueMetrics.addReservedReduceSlots(reservedSlots); } jobtracker.incrementReservations(type, reservedSlots); } public synchronized void unreserveTaskTracker(TaskTracker taskTracker, TaskType type) { Map<TaskTracker, FallowSlotInfo> map = (type == TaskType.MAP) ? trackersReservedForMaps : trackersReservedForReduces; FallowSlotInfo info = map.get(taskTracker); if (info == null) { LOG.warn("Cannot find information about fallow slots for " + taskTracker.getTrackerName()); return; } long now = jobtracker.getClock().getTime(); Enum<Counter> counter = (type == TaskType.MAP) ? Counter.FALLOW_SLOTS_MILLIS_MAPS : Counter.FALLOW_SLOTS_MILLIS_REDUCES; long fallowSlotMillis = (now - info.getTimestamp()) * info.getNumSlots(); jobCounters.incrCounter(counter, fallowSlotMillis); map.remove(taskTracker); if (type == TaskType.MAP) { jobtracker.getInstrumentation().decReservedMapSlots(info.getNumSlots()); this.queueMetrics.decReservedMapSlots(info.getNumSlots()); } else { jobtracker.getInstrumentation().decReservedReduceSlots(info.getNumSlots()); this.queueMetrics.decReservedReduceSlots(info.getNumSlots()); } jobtracker.decrementReservations(type, info.getNumSlots()); } public int getNumReservedTaskTrackersForMaps() { return trackersReservedForMaps.size(); } public int getNumReservedTaskTrackersForReduces() { return trackersReservedForReduces.size(); } private int getTrackerTaskFailures(String trackerName) { String trackerHostName = convertTrackerNameToHostName(trackerName); Integer failedTasks = trackerToFailuresMap.get(trackerHostName); return (failedTasks != null) ? failedTasks.intValue() : 0; } /** * Get the black listed trackers for the job * * @return List of blacklisted tracker names */ List<String> getBlackListedTrackers() { List<String> blackListedTrackers = new ArrayList<String>(); for (Map.Entry<String, Integer> e : trackerToFailuresMap.entrySet()) { if (e.getValue().intValue() >= maxTaskFailuresPerTracker) { blackListedTrackers.add(e.getKey()); } } return blackListedTrackers; } /** * Get the no. of 'flaky' tasktrackers for a given job. * * @return the no. of 'flaky' tasktrackers for a given job. */ int getNoOfBlackListedTrackers() { return flakyTaskTrackers; } /** * Get the information on tasktrackers and no. of errors which occurred * on them for a given job. * * @return the map of tasktrackers and no. of errors which occurred * on them for a given job. */ synchronized Map<String, Integer> getTaskTrackerErrors() { // Clone the 'trackerToFailuresMap' and return the copy Map<String, Integer> trackerErrors = new TreeMap<String, Integer>(trackerToFailuresMap); return trackerErrors; } /** * Remove a map TIP from the lists for running maps. * Called when a map fails/completes (note if a map is killed, * it won't be present in the list since it was completed earlier) * @param tip the tip that needs to be retired */ private synchronized void retireMap(TaskInProgress tip) { if (runningMapCache == null) { LOG.warn("Running cache for maps missing!! " + "Job details are missing."); return; } String[] splitLocations = tip.getSplitLocations(); // Remove the TIP from the list for running non-local maps if (splitLocations == null || splitLocations.length == 0) { nonLocalRunningMaps.remove(tip); return; } // Remove from the running map caches for (String host : splitLocations) { Node node = jobtracker.getNode(host); for (int j = 0; j < maxLevel; ++j) { Set<TaskInProgress> hostMaps = runningMapCache.get(node); if (hostMaps != null) { hostMaps.remove(tip); if (hostMaps.size() == 0) { runningMapCache.remove(node); } } node = node.getParent(); } } } /** * Remove a reduce TIP from the list for running-reduces * Called when a reduce fails/completes * @param tip the tip that needs to be retired */ private synchronized void retireReduce(TaskInProgress tip) { if (runningReduces == null) { LOG.warn("Running list for reducers missing!! " + "Job details are missing."); return; } runningReduces.remove(tip); } /** * Adds a map tip to the list of running maps. * @param tip the tip that needs to be scheduled as running */ protected synchronized void scheduleMap(TaskInProgress tip) { if (runningMapCache == null) { LOG.warn("Running cache for maps is missing!! " + "Job details are missing."); return; } String[] splitLocations = tip.getSplitLocations(); // Add the TIP to the list of non-local running TIPs if (splitLocations == null || splitLocations.length == 0) { nonLocalRunningMaps.add(tip); return; } for (String host : splitLocations) { Node node = jobtracker.getNode(host); for (int j = 0; j < maxLevel; ++j) { Set<TaskInProgress> hostMaps = runningMapCache.get(node); if (hostMaps == null) { // create a cache if needed hostMaps = new LinkedHashSet<TaskInProgress>(); runningMapCache.put(node, hostMaps); } hostMaps.add(tip); node = node.getParent(); } } } /** * Adds a reduce tip to the list of running reduces * @param tip the tip that needs to be scheduled as running */ protected synchronized void scheduleReduce(TaskInProgress tip) { if (runningReduces == null) { LOG.warn("Running cache for reducers missing!! " + "Job details are missing."); return; } runningReduces.add(tip); } /** * Adds the failed TIP in the front of the list for non-running maps * @param tip the tip that needs to be failed */ private synchronized void failMap(TaskInProgress tip) { if (failedMaps == null) { LOG.warn("Failed cache for maps is missing! Job details are missing."); return; } // Ignore locality for subsequent scheduling on this TIP. Always schedule // it ahead of other tasks. failedMaps.add(tip); } /** * Adds a failed TIP in the front of the list for non-running reduces * @param tip the tip that needs to be failed */ private synchronized void failReduce(TaskInProgress tip) { if (nonRunningReduces == null) { LOG.warn("Failed cache for reducers missing!! " + "Job details are missing."); return; } nonRunningReduces.add(tip); } /** * Find a non-running task in the passed list of TIPs * @param tips a collection of TIPs * @param ttStatus the status of tracker that has requested a task to run * @param numUniqueHosts number of unique hosts that run trask trackers * @param removeFailedTip whether to remove the failed tips */ private synchronized TaskInProgress findTaskFromList(Collection<TaskInProgress> tips, TaskTrackerStatus ttStatus, int numUniqueHosts, boolean removeFailedTip) { Iterator<TaskInProgress> iter = tips.iterator(); while (iter.hasNext()) { TaskInProgress tip = iter.next(); // Select a tip if // 1. runnable : still needs to be run and is not completed // 2. ~running : no other node is running it // 3. earlier attempt failed : has not failed on this host // and has failed on all the other hosts // A TIP is removed from the list if // (1) this tip is scheduled // (2) if the passed list is a level 0 (host) cache // (3) when the TIP is non-schedulable (running, killed, complete) if (tip.isRunnable() && !tip.isRunning()) { // check if the tip has failed on this host if (!tip.hasFailedOnMachine(ttStatus.getHost()) || tip.getNumberOfFailedMachines() >= numUniqueHosts) { // check if the tip has failed on all the nodes iter.remove(); return tip; } else if (removeFailedTip) { // the case where we want to remove a failed tip from the host cache // point#3 in the TIP removal logic above iter.remove(); } } else { // see point#3 in the comment above for TIP removal logic iter.remove(); } } return null; } /** * Find a speculative task * @param list a list of tips * @param ttStatus status of the tracker that has requested a tip * @param avgProgress the average progress for speculation * @param currentTime current time in milliseconds * @param shouldRemove whether to remove the tips * @return a tip that can be speculated on the tracker */ protected synchronized TaskInProgress findSpeculativeTask(Collection<TaskInProgress> list, TaskTrackerStatus ttStatus, double avgProgress, long currentTime, boolean shouldRemove) { Iterator<TaskInProgress> iter = list.iterator(); while (iter.hasNext()) { TaskInProgress tip = iter.next(); // should never be true! (since we delete completed/failed tasks) if (!tip.isRunning() || !tip.isRunnable()) { iter.remove(); continue; } if (!tip.hasRunOnMachine(ttStatus.getHost(), ttStatus.getTrackerName())) { if (tip.hasSpeculativeTask(currentTime, avgProgress)) { // In case of shared list we don't remove it. Since the TIP failed // on this tracker can be scheduled on some other tracker. if (shouldRemove) { iter.remove(); //this tracker is never going to run it again } return tip; } } else { // Check if this tip can be removed from the list. // If the list is shared then we should not remove. if (shouldRemove) { // This tracker will never speculate this tip iter.remove(); } } } return null; } /** * Find new map task * @param tts The task tracker that is asking for a task * @param clusterSize The number of task trackers in the cluster * @param numUniqueHosts The number of hosts that run task trackers * @param avgProgress The average progress of this kind of task in this job * @param maxCacheLevel The maximum topology level until which to schedule * maps. * A value of {@link #anyCacheLevel} implies any * available task (node-local, rack-local, off-switch and * speculative tasks). * A value of {@link #NON_LOCAL_CACHE_LEVEL} implies only * off-switch/speculative tasks should be scheduled. * @return the index in tasks of the selected task (or -1 for no task) */ private synchronized int findNewMapTask(final TaskTrackerStatus tts, final int clusterSize, final int numUniqueHosts, final int maxCacheLevel, final double avgProgress) { if (numMapTasks == 0) { if (LOG.isDebugEnabled()) { LOG.debug("No maps to schedule for " + profile.getJobID()); } return -1; } String taskTracker = tts.getTrackerName(); TaskInProgress tip = null; // // Update the last-known clusterSize // this.clusterSize = clusterSize; if (!shouldRunOnTaskTracker(taskTracker)) { return -1; } // Check to ensure this TaskTracker has enough resources to // run tasks from this job long outSize = resourceEstimator.getEstimatedMapOutputSize(); long availSpace = tts.getResourceStatus().getAvailableSpace(); if (availSpace < outSize) { LOG.warn("No room for map task. Node " + tts.getHost() + " has " + availSpace + " bytes free; but we expect map to take " + outSize); return -1; //see if a different TIP might work better. } // When scheduling a map task: // 0) Schedule a failed task without considering locality // 1) Schedule non-running tasks // 2) Schedule speculative tasks // 3) Schedule tasks with no location information // First a look up is done on the non-running cache and on a miss, a look // up is done on the running cache. The order for lookup within the cache: // 1. from local node to root [bottom up] // 2. breadth wise for all the parent nodes at max level // We fall to linear scan of the list ((3) above) if we have misses in the // above caches // 0) Schedule the task with the most failures, unless failure was on this // machine tip = findTaskFromList(failedMaps, tts, numUniqueHosts, false); if (tip != null) { // Add to the running list scheduleMap(tip); LOG.info("Choosing a failed task " + tip.getTIPId()); return tip.getIdWithinJob(); } Node node = jobtracker.getNode(tts.getHost()); // // 1) Non-running TIP : // // 1. check from local node to the root [bottom up cache lookup] // i.e if the cache is available and the host has been resolved // (node!=null) if (node != null) { Node key = node; int level = 0; // maxCacheLevel might be greater than this.maxLevel if findNewMapTask is // called to schedule any task (local, rack-local, off-switch or // speculative) tasks or it might be NON_LOCAL_CACHE_LEVEL (i.e. -1) if // findNewMapTask is (i.e. -1) if findNewMapTask is to only schedule // off-switch/speculative tasks int maxLevelToSchedule = Math.min(maxCacheLevel, maxLevel); for (level = 0; level < maxLevelToSchedule; ++level) { List<TaskInProgress> cacheForLevel = nonRunningMapCache.get(key); if (cacheForLevel != null) { tip = findTaskFromList(cacheForLevel, tts, numUniqueHosts, level == 0); if (tip != null) { // Add to running cache scheduleMap(tip); // remove the cache if its empty if (cacheForLevel.size() == 0) { nonRunningMapCache.remove(key); } return tip.getIdWithinJob(); } } key = key.getParent(); } // Check if we need to only schedule a local task (node-local/rack-local) if (level == maxCacheLevel) { return -1; } } //2. Search breadth-wise across parents at max level for non-running // TIP if // - cache exists and there is a cache miss // - node information for the tracker is missing (tracker's topology // info not obtained yet) // collection of node at max level in the cache structure Collection<Node> nodesAtMaxLevel = jobtracker.getNodesAtMaxLevel(); // get the node parent at max level Node nodeParentAtMaxLevel = (node == null) ? null : JobTracker.getParentNode(node, maxLevel - 1); for (Node parent : nodesAtMaxLevel) { // skip the parent that has already been scanned if (parent == nodeParentAtMaxLevel) { continue; } List<TaskInProgress> cache = nonRunningMapCache.get(parent); if (cache != null) { tip = findTaskFromList(cache, tts, numUniqueHosts, false); if (tip != null) { // Add to the running cache scheduleMap(tip); // remove the cache if empty if (cache.size() == 0) { nonRunningMapCache.remove(parent); } LOG.info("Choosing a non-local task " + tip.getTIPId()); return tip.getIdWithinJob(); } } } // 3. Search non-local tips for a new task tip = findTaskFromList(nonLocalMaps, tts, numUniqueHosts, false); if (tip != null) { // Add to the running list scheduleMap(tip); LOG.info("Choosing a non-local task " + tip.getTIPId()); return tip.getIdWithinJob(); } // // 2) Running TIP : // if (hasSpeculativeMaps) { long currentTime = jobtracker.getClock().getTime(); // 1. Check bottom up for speculative tasks from the running cache if (node != null) { Node key = node; for (int level = 0; level < maxLevel; ++level) { Set<TaskInProgress> cacheForLevel = runningMapCache.get(key); if (cacheForLevel != null) { tip = findSpeculativeTask(cacheForLevel, tts, avgProgress, currentTime, level == 0); if (tip != null) { if (cacheForLevel.size() == 0) { runningMapCache.remove(key); } return tip.getIdWithinJob(); } } key = key.getParent(); } } // 2. Check breadth-wise for speculative tasks for (Node parent : nodesAtMaxLevel) { // ignore the parent which is already scanned if (parent == nodeParentAtMaxLevel) { continue; } Set<TaskInProgress> cache = runningMapCache.get(parent); if (cache != null) { tip = findSpeculativeTask(cache, tts, avgProgress, currentTime, false); if (tip != null) { // remove empty cache entries if (cache.size() == 0) { runningMapCache.remove(parent); } LOG.info("Choosing a non-local task " + tip.getTIPId() + " for speculation"); return tip.getIdWithinJob(); } } } // 3. Check non-local tips for speculation tip = findSpeculativeTask(nonLocalRunningMaps, tts, avgProgress, currentTime, false); if (tip != null) { LOG.info("Choosing a non-local task " + tip.getTIPId() + " for speculation"); return tip.getIdWithinJob(); } } return -1; } /** * Find new reduce task * @param tts The task tracker that is asking for a task * @param clusterSize The number of task trackers in the cluster * @param numUniqueHosts The number of hosts that run task trackers * @param avgProgress The average progress of this kind of task in this job * @return the index in tasks of the selected task (or -1 for no task) */ private synchronized int findNewReduceTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts, double avgProgress) { if (numReduceTasks == 0) { if (LOG.isDebugEnabled()) { LOG.debug("No reduces to schedule for " + profile.getJobID()); } return -1; } String taskTracker = tts.getTrackerName(); TaskInProgress tip = null; // Update the last-known clusterSize this.clusterSize = clusterSize; if (!shouldRunOnTaskTracker(taskTracker)) { return -1; } // 1. check for a never-executed reduce tip // reducers don't have a cache and so pass -1 to explicitly call that out tip = findTaskFromList(nonRunningReduces, tts, numUniqueHosts, false); if (tip != null) { scheduleReduce(tip); return tip.getIdWithinJob(); } // 2. check for a reduce tip to be speculated if (hasSpeculativeReduces) { tip = findSpeculativeTask(runningReduces, tts, avgProgress, jobtracker.getClock().getTime(), false); if (tip != null) { scheduleReduce(tip); return tip.getIdWithinJob(); } } return -1; } private boolean shouldRunOnTaskTracker(String taskTracker) { // // Check if too many tasks of this job have failed on this // tasktracker prior to assigning it a new one. // int taskTrackerFailedTasks = getTrackerTaskFailures(taskTracker); if ((flakyTaskTrackers < (clusterSize * CLUSTER_BLACKLIST_PERCENT)) && taskTrackerFailedTasks >= maxTaskFailuresPerTracker) { if (LOG.isDebugEnabled()) { String flakyTracker = convertTrackerNameToHostName(taskTracker); LOG.debug("Ignoring the black-listed tasktracker: '" + flakyTracker + "' for assigning a new task"); } return false; } return true; } /** * Metering: Occupied Slots * (Finish - Start) * @param tip {@link TaskInProgress} to be metered which just completed, * cannot be <code>null</code> * @param status {@link TaskStatus} of the completed task, cannot be * <code>null</code> */ private void meterTaskAttempt(TaskInProgress tip, TaskStatus status) { Counter slotCounter = (tip.isMapTask()) ? Counter.SLOTS_MILLIS_MAPS : Counter.SLOTS_MILLIS_REDUCES; jobCounters.incrCounter(slotCounter, tip.getNumSlotsRequired() * (status.getFinishTime() - status.getStartTime())); } /** * A taskid assigned to this JobInProgress has reported in successfully. */ public synchronized boolean completedTask(TaskInProgress tip, TaskStatus status) { TaskAttemptID taskid = status.getTaskID(); int oldNumAttempts = tip.getActiveTasks().size(); final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation(); // Metering meterTaskAttempt(tip, status); // Sanity check: is the TIP already complete? // It _is_ safe to not decrement running{Map|Reduce}Tasks and // finished{Map|Reduce}Tasks variables here because one and only // one task-attempt of a TIP gets to completedTask. This is because // the TaskCommitThread in the JobTracker marks other, completed, // speculative tasks as _complete_. if (tip.isComplete()) { // Mark this task as KILLED tip.alreadyCompletedTask(taskid); // Let the JobTracker cleanup this taskid if the job isn't running if (this.status.getRunState() != JobStatus.RUNNING) { jobtracker.markCompletedTaskAttempt(status.getTaskTracker(), taskid); } return false; } LOG.info("Task '" + taskid + "' has completed " + tip.getTIPId() + " successfully."); // Mark the TIP as complete tip.completed(taskid); resourceEstimator.updateWithCompletedTask(status, tip); // Update jobhistory TaskTrackerStatus ttStatus = this.jobtracker.getTaskTrackerStatus(status.getTaskTracker()); String trackerHostname = jobtracker.getNode(ttStatus.getHost()).toString(); String taskType = getTaskType(tip); if (status.getIsMap()) { JobHistory.MapAttempt.logStarted(status.getTaskID(), status.getStartTime(), status.getTaskTracker(), ttStatus.getHttpPort(), taskType); JobHistory.MapAttempt.logFinished(status.getTaskID(), status.getFinishTime(), trackerHostname, taskType, status.getStateString(), status.getCounters()); } else { JobHistory.ReduceAttempt.logStarted(status.getTaskID(), status.getStartTime(), status.getTaskTracker(), ttStatus.getHttpPort(), taskType); JobHistory.ReduceAttempt.logFinished(status.getTaskID(), status.getShuffleFinishTime(), status.getSortFinishTime(), status.getFinishTime(), trackerHostname, taskType, status.getStateString(), status.getCounters()); } JobHistory.Task.logFinished(tip.getTIPId(), taskType, tip.getExecFinishTime(), status.getCounters()); int newNumAttempts = tip.getActiveTasks().size(); if (tip.isJobSetupTask()) { // setup task has finished. kill the extra setup tip killSetupTip(!tip.isMapTask()); // Job can start running now. this.status.setSetupProgress(1.0f); // move the job to running state if the job is in prep state if (this.status.getRunState() == JobStatus.PREP) { changeStateTo(JobStatus.RUNNING); JobHistory.JobInfo.logStarted(profile.getJobID()); } } else if (tip.isJobCleanupTask()) { // cleanup task has finished. Kill the extra cleanup tip if (tip.isMapTask()) { // kill the reduce tip cleanup[1].kill(); } else { cleanup[0].kill(); } // // The Job is done // if the job is failed, then mark the job failed. if (jobFailed) { terminateJob(JobStatus.FAILED); } // if the job is killed, then mark the job killed. if (jobKilled) { terminateJob(JobStatus.KILLED); } else { jobComplete(); } // The job has been killed/failed/successful // JobTracker should cleanup this task jobtracker.markCompletedTaskAttempt(status.getTaskTracker(), taskid); } else if (tip.isMapTask()) { runningMapTasks -= 1; // check if this was a sepculative task if (oldNumAttempts > 1) { speculativeMapTasks -= (oldNumAttempts - newNumAttempts); } finishedMapTasks += 1; metrics.completeMap(taskid); this.queueMetrics.completeMap(taskid); // remove the completed map from the resp running caches retireMap(tip); if ((finishedMapTasks + failedMapTIPs) == (numMapTasks)) { this.status.setMapProgress(1.0f); if (canLaunchJobCleanupTask()) { checkCounterLimitsAndFail(); } } } else { runningReduceTasks -= 1; if (oldNumAttempts > 1) { speculativeReduceTasks -= (oldNumAttempts - newNumAttempts); } finishedReduceTasks += 1; metrics.completeReduce(taskid); this.queueMetrics.completeReduce(taskid); // remove the completed reduces from the running reducers set retireReduce(tip); if ((finishedReduceTasks + failedReduceTIPs) == (numReduceTasks)) { this.status.setReduceProgress(1.0f); if (canLaunchJobCleanupTask()) { checkCounterLimitsAndFail(); } } } return true; } /** * add up the counters and fail the job * if it exceeds the counters. Make sure we do not * recalculate the coutners after we fail the job. Currently * this is taken care by terminateJob() since it does not * calculate the counters. */ private void checkCounterLimitsAndFail() { boolean mapIsFine, reduceIsFine, jobIsFine = true; mapIsFine = getMapCounters(new Counters()); reduceIsFine = getReduceCounters(new Counters()); jobIsFine = getCounters(new Counters()); if (!(mapIsFine && reduceIsFine && jobIsFine)) { status.setFailureInfo("Counters Exceeded: Limit: " + Counters.MAX_COUNTER_LIMIT); jobtracker.failJob(this); } } /** * Job state change must happen thru this call */ private void changeStateTo(int newState) { int oldState = this.status.getRunState(); if (oldState == newState) { return; //old and new states are same } this.status.setRunState(newState); //update the metrics if (oldState == JobStatus.PREP) { this.jobtracker.getInstrumentation().decPrepJob(conf, jobId); this.queueMetrics.decPrepJob(conf, jobId); } else if (oldState == JobStatus.RUNNING) { this.jobtracker.getInstrumentation().decRunningJob(conf, jobId); this.queueMetrics.decRunningJob(conf, jobId); } if (newState == JobStatus.PREP) { this.jobtracker.getInstrumentation().addPrepJob(conf, jobId); this.queueMetrics.addPrepJob(conf, jobId); } else if (newState == JobStatus.RUNNING) { this.jobtracker.getInstrumentation().addRunningJob(conf, jobId); this.queueMetrics.addRunningJob(conf, jobId); } } /** * The job is done since all it's component tasks are either * successful or have failed. */ private void jobComplete() { final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation(); // // All tasks are complete, then the job is done! // if (this.status.getRunState() == JobStatus.RUNNING) { changeStateTo(JobStatus.SUCCEEDED); this.status.setCleanupProgress(1.0f); if (maps.length == 0) { this.status.setMapProgress(1.0f); } if (reduces.length == 0) { this.status.setReduceProgress(1.0f); } this.finishTime = jobtracker.getClock().getTime(); LOG.info("Job " + this.status.getJobID() + " has completed successfully."); // Log the job summary (this should be done prior to logging to // job-history to ensure job-counters are in-sync JobSummary.logJobSummary(this, jobtracker.getClusterStatus(false)); Counters mapCounters = new Counters(); boolean isFine = getMapCounters(mapCounters); mapCounters = (isFine ? mapCounters : new Counters()); Counters reduceCounters = new Counters(); isFine = getReduceCounters(reduceCounters); ; reduceCounters = (isFine ? reduceCounters : new Counters()); Counters jobCounters = new Counters(); isFine = getCounters(jobCounters); jobCounters = (isFine ? jobCounters : new Counters()); // Log job-history JobHistory.JobInfo.logFinished(this.status.getJobID(), finishTime, this.finishedMapTasks, this.finishedReduceTasks, failedMapTasks, failedReduceTasks, mapCounters, reduceCounters, jobCounters); // Note that finalize will close the job history handles which garbage collect // might try to finalize garbageCollect(); metrics.completeJob(this.conf, this.status.getJobID()); this.queueMetrics.completeJob(this.conf, this.status.getJobID()); } } private synchronized void terminateJob(int jobTerminationState) { if ((status.getRunState() == JobStatus.RUNNING) || (status.getRunState() == JobStatus.PREP)) { this.finishTime = jobtracker.getClock().getTime(); this.status.setMapProgress(1.0f); this.status.setReduceProgress(1.0f); this.status.setCleanupProgress(1.0f); if (jobTerminationState == JobStatus.FAILED) { changeStateTo(JobStatus.FAILED); // Log the job summary JobSummary.logJobSummary(this, jobtracker.getClusterStatus(false)); // Log to job-history JobHistory.JobInfo.logFailed(this.status.getJobID(), finishTime, this.finishedMapTasks, this.finishedReduceTasks, this.status.getFailureInfo()); } else { changeStateTo(JobStatus.KILLED); // Log the job summary JobSummary.logJobSummary(this, jobtracker.getClusterStatus(false)); // Log to job-history JobHistory.JobInfo.logKilled(this.status.getJobID(), finishTime, this.finishedMapTasks, this.finishedReduceTasks); } garbageCollect(); jobtracker.getInstrumentation().terminateJob(this.conf, this.status.getJobID()); if (jobTerminationState == JobStatus.FAILED) { jobtracker.getInstrumentation().failedJob(this.conf, this.status.getJobID()); this.queueMetrics.failedJob(this.conf, this.status.getJobID()); } else { jobtracker.getInstrumentation().killedJob(this.conf, this.status.getJobID()); this.queueMetrics.killedJob(this.conf, this.status.getJobID()); } } } /** * Terminate the job and all its component tasks. * Calling this will lead to marking the job as failed/killed. Cleanup * tip will be launched. If the job has not inited, it will directly call * terminateJob as there is no need to launch cleanup tip. * This method is reentrant. * @param jobTerminationState job termination state */ private synchronized void terminate(int jobTerminationState) { if (!tasksInited) { //init could not be done, we just terminate directly. terminateJob(jobTerminationState); return; } if ((status.getRunState() == JobStatus.RUNNING) || (status.getRunState() == JobStatus.PREP)) { LOG.info("Killing job '" + this.status.getJobID() + "'"); if (jobTerminationState == JobStatus.FAILED) { if (jobFailed) {//reentrant return; } jobFailed = true; } else if (jobTerminationState == JobStatus.KILLED) { if (jobKilled) {//reentrant return; } jobKilled = true; } // clear all unclean tasks clearUncleanTasks(); // // kill all TIPs. // for (int i = 0; i < setup.length; i++) { setup[i].kill(); } for (int i = 0; i < maps.length; i++) { maps[i].kill(); } for (int i = 0; i < reduces.length; i++) { reduces[i].kill(); } } } private void cancelReservedSlots() { // Make a copy of the set of TaskTrackers to prevent a // ConcurrentModificationException ... Set<TaskTracker> tm = new HashSet<TaskTracker>(trackersReservedForMaps.keySet()); for (TaskTracker tt : tm) { tt.unreserveSlots(TaskType.MAP, this); } Set<TaskTracker> tr = new HashSet<TaskTracker>(trackersReservedForReduces.keySet()); for (TaskTracker tt : tr) { tt.unreserveSlots(TaskType.REDUCE, this); } } private void clearUncleanTasks() { TaskAttemptID taskid = null; TaskInProgress tip = null; while (!mapCleanupTasks.isEmpty()) { taskid = mapCleanupTasks.remove(0); tip = maps[taskid.getTaskID().getId()]; updateTaskStatus(tip, tip.getTaskStatus(taskid)); } while (!reduceCleanupTasks.isEmpty()) { taskid = reduceCleanupTasks.remove(0); tip = reduces[taskid.getTaskID().getId()]; updateTaskStatus(tip, tip.getTaskStatus(taskid)); } } /** * Kill the job and all its component tasks. This method should be called from * jobtracker and should return fast as it locks the jobtracker. */ public void kill() { boolean killNow = false; synchronized (jobInitKillStatus) { jobInitKillStatus.killed = true; //if not in middle of init, terminate it now if (!jobInitKillStatus.initStarted || jobInitKillStatus.initDone) { //avoiding nested locking by setting flag killNow = true; } } if (killNow) { terminate(JobStatus.KILLED); } } /** * Fails the job and all its component tasks. This should be called only from * {@link JobInProgress} or {@link JobTracker}. Look at * {@link JobTracker#failJob(JobInProgress)} for more details. */ synchronized void fail() { terminate(JobStatus.FAILED); } /** * A task assigned to this JobInProgress has reported in as failed. * Most of the time, we'll just reschedule execution. However, after * many repeated failures we may instead decide to allow the entire * job to fail or succeed if the user doesn't care about a few tasks failing. * * Even if a task has reported as completed in the past, it might later * be reported as failed. That's because the TaskTracker that hosts a map * task might die before the entire job can complete. If that happens, * we need to schedule reexecution so that downstream reduce tasks can * obtain the map task's output. */ private void failedTask(TaskInProgress tip, TaskAttemptID taskid, TaskStatus status, TaskTracker taskTracker, boolean wasRunning, boolean wasComplete, boolean wasAttemptRunning) { final JobTrackerInstrumentation metrics = jobtracker.getInstrumentation(); // check if the TIP is already failed boolean wasFailed = tip.isFailed(); // Mark the taskid as FAILED or KILLED tip.incompleteSubTask(taskid, this.status); boolean isRunning = tip.isRunning(); boolean isComplete = tip.isComplete(); boolean metricsDone = isComplete(); // job metrics garbage collected if (wasAttemptRunning) { // We are decrementing counters without looking for isRunning , // because we increment the counters when we obtain // new map task attempt or reduce task attempt.We do not really check // for tip being running. // Whenever we obtain new task attempt following counters are incremented. // ++runningMapTasks; //......... // metrics.launchMap(id); // hence we are decrementing the same set. // Except after garbageCollect in a different thread. if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) { if (tip.isMapTask() && !metricsDone) { runningMapTasks -= 1; metrics.failedMap(taskid); this.queueMetrics.failedMap(taskid); } else if (!metricsDone) { runningReduceTasks -= 1; metrics.failedReduce(taskid); this.queueMetrics.failedReduce(taskid); } } // Metering meterTaskAttempt(tip, status); } //update running count on task failure. if (wasRunning && !isRunning) { if (tip.isJobCleanupTask()) { launchedCleanup = false; } else if (tip.isJobSetupTask()) { launchedSetup = false; } else if (tip.isMapTask()) { // remove from the running queue and put it in the non-running cache // if the tip is not complete i.e if the tip still needs to be run if (!isComplete) { retireMap(tip); failMap(tip); } } else { // remove from the running queue and put in the failed queue if the tip // is not complete if (!isComplete) { retireReduce(tip); failReduce(tip); } } } // The case when the map was complete but the task tracker went down. // However, we don't need to do any metering here... if (wasComplete && !isComplete) { if (tip.isMapTask()) { // Put the task back in the cache. This will help locality for cases // where we have a different TaskTracker from the same rack/switch // asking for a task. // We bother about only those TIPs that were successful // earlier (wasComplete and !isComplete) // (since they might have been removed from the cache of other // racks/switches, if the input split blocks were present there too) failMap(tip); finishedMapTasks -= 1; } } // update job history // get taskStatus from tip TaskStatus taskStatus = tip.getTaskStatus(taskid); String taskTrackerName = taskStatus.getTaskTracker(); String taskTrackerHostName = convertTrackerNameToHostName(taskTrackerName); int taskTrackerPort = -1; TaskTrackerStatus taskTrackerStatus = (taskTracker == null) ? null : taskTracker.getStatus(); if (taskTrackerStatus != null) { taskTrackerPort = taskTrackerStatus.getHttpPort(); } long startTime = taskStatus.getStartTime(); long finishTime = taskStatus.getFinishTime(); List<String> taskDiagnosticInfo = tip.getDiagnosticInfo(taskid); String diagInfo = taskDiagnosticInfo == null ? "" : StringUtils.arrayToString(taskDiagnosticInfo.toArray(new String[0])); String taskType = getTaskType(tip); if (taskStatus.getIsMap()) { JobHistory.MapAttempt.logStarted(taskid, startTime, taskTrackerName, taskTrackerPort, taskType); if (taskStatus.getRunState() == TaskStatus.State.FAILED) { JobHistory.MapAttempt.logFailed(taskid, finishTime, taskTrackerHostName, diagInfo, taskType); } else { JobHistory.MapAttempt.logKilled(taskid, finishTime, taskTrackerHostName, diagInfo, taskType); } } else { JobHistory.ReduceAttempt.logStarted(taskid, startTime, taskTrackerName, taskTrackerPort, taskType); if (taskStatus.getRunState() == TaskStatus.State.FAILED) { JobHistory.ReduceAttempt.logFailed(taskid, finishTime, taskTrackerHostName, diagInfo, taskType); } else { JobHistory.ReduceAttempt.logKilled(taskid, finishTime, taskTrackerHostName, diagInfo, taskType); } } // After this, try to assign tasks with the one after this, so that // the failed task goes to the end of the list. if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) { if (tip.isMapTask()) { failedMapTasks++; } else { failedReduceTasks++; } } // // Note down that a task has failed on this tasktracker // if (status.getRunState() == TaskStatus.State.FAILED) { addTrackerTaskFailure(taskTrackerName, taskTracker); } // // Let the JobTracker know that this task has failed // jobtracker.markCompletedTaskAttempt(status.getTaskTracker(), taskid); // // Check if we need to kill the job because of too many failures or // if the job is complete since all component tasks have completed // We do it once per TIP and that too for the task that fails the TIP if (!wasFailed && tip.isFailed()) { // // Allow upto 'mapFailuresPercent' of map tasks to fail or // 'reduceFailuresPercent' of reduce tasks to fail // boolean killJob = tip.isJobCleanupTask() || tip.isJobSetupTask() ? true : tip.isMapTask() ? ((++failedMapTIPs * 100) > (mapFailuresPercent * numMapTasks)) : ((++failedReduceTIPs * 100) > (reduceFailuresPercent * numReduceTasks)); if (killJob) { String failureInfo = ""; if (tip.isJobCleanupTask()) { failureInfo = "JobCleanup Task Failure, Task: " + tip.getTIPId(); } else if (tip.isJobSetupTask()) { failureInfo = "JobSetup Task Failure, Task: " + tip.getTIPId(); } else if (tip.isMapTask()) { failureInfo = "# of failed Map Tasks exceeded allowed limit. FailedCount: " + failedMapTIPs + ". LastFailedTask: " + tip.getTIPId(); } else { failureInfo = "# of failed Reduce Tasks exceeded allowed limit. FailedCount: " + failedReduceTIPs + ". LastFailedTask: " + tip.getTIPId(); } this.status.setFailureInfo(failureInfo); LOG.info("Aborting job " + profile.getJobID()); JobHistory.Task.logFailed(tip.getTIPId(), taskType, finishTime, diagInfo); if (tip.isJobCleanupTask()) { // kill the other tip if (tip.isMapTask()) { cleanup[1].kill(); } else { cleanup[0].kill(); } terminateJob(JobStatus.FAILED); } else { if (tip.isJobSetupTask()) { // kill the other tip killSetupTip(!tip.isMapTask()); } fail(); } } // // Update the counters // if (!tip.isJobCleanupTask() && !tip.isJobSetupTask()) { if (tip.isMapTask()) { jobCounters.incrCounter(Counter.NUM_FAILED_MAPS, 1); } else { jobCounters.incrCounter(Counter.NUM_FAILED_REDUCES, 1); } } } } void killSetupTip(boolean isMap) { if (isMap) { setup[0].kill(); } else { setup[1].kill(); } } boolean isSetupFinished() { if (setup[0].isComplete() || setup[0].isFailed() || setup[1].isComplete() || setup[1].isFailed()) { return true; } return false; } /** * Fail a task with a given reason, but without a status object. * * Assuming {@link JobTracker} is locked on entry. * * @param tip The task's tip * @param taskid The task id * @param reason The reason that the task failed * @param trackerName The task tracker the task failed on */ public void failedTask(TaskInProgress tip, TaskAttemptID taskid, String reason, TaskStatus.Phase phase, TaskStatus.State state, String trackerName) { TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(), taskid, 0.0f, tip.isMapTask() ? numSlotsPerMap : numSlotsPerReduce, state, reason, reason, trackerName, phase, new Counters()); // update the actual start-time of the attempt TaskStatus oldStatus = tip.getTaskStatus(taskid); long startTime = oldStatus == null ? jobtracker.getClock().getTime() : oldStatus.getStartTime(); status.setStartTime(startTime); status.setFinishTime(jobtracker.getClock().getTime()); boolean wasComplete = tip.isComplete(); updateTaskStatus(tip, status); boolean isComplete = tip.isComplete(); if (wasComplete && !isComplete) { // mark a successful tip as failed String taskType = getTaskType(tip); JobHistory.Task.logFailed(tip.getTIPId(), taskType, tip.getExecFinishTime(), reason, taskid); } } /** * The job is dead. We're now GC'ing it, getting rid of the job * from all tables. Be sure to remove all of this job's tasks * from the various tables. */ void garbageCollect() { synchronized (this) { // Cancel task tracker reservation cancelReservedSlots(); // Waiting metrics are incremented in JobInProgress.initTasks() // If a job gets an exception before that, we do not want to // incorrectly decrement. if (tasksInited) { jobtracker.getInstrumentation().decWaitingMaps(getJobID(), pendingMaps()); jobtracker.getInstrumentation().decWaitingReduces(getJobID(), pendingReduces()); this.queueMetrics.decWaitingMaps(getJobID(), pendingMaps()); this.queueMetrics.decWaitingReduces(getJobID(), pendingReduces()); } // Let the JobTracker know that a job is complete jobtracker.storeCompletedJob(this); jobtracker.finalizeJob(this); try { // Definitely remove the local-disk copy of the job file if (localJobFile != null) { localFs.delete(localJobFile, true); localJobFile = null; } Path tempDir = jobtracker.getSystemDirectoryForJob(getJobID()); CleanupQueue.getInstance().addToQueue(new PathDeletionContext(tempDir, conf)); } catch (IOException e) { LOG.warn("Error cleaning up " + profile.getJobID() + ": " + e); } cleanUpMetrics(); // free up the memory used by the data structures this.failedMaps.clear(); this.nonRunningMapCache = null; this.runningMapCache = null; this.nonRunningReduces = null; this.runningReduces = null; } // remove jobs delegation tokens if (conf.getBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, true)) { DelegationTokenRenewal.removeDelegationTokenRenewalForJob(jobId); } // else don't remove it.May be used by spawned tasks //close the user's FS try { fs.close(); } catch (IOException ie) { LOG.warn("Ignoring exception " + StringUtils.stringifyException(ie) + " while closing FileSystem for " + userUGI); } } /** * Return the TaskInProgress that matches the tipid. */ public synchronized TaskInProgress getTaskInProgress(TaskID tipid) { if (tipid.isMap()) { if (tipid.equals(cleanup[0].getTIPId())) { // cleanup map tip return cleanup[0]; } if (tipid.equals(setup[0].getTIPId())) { //setup map tip return setup[0]; } for (int i = 0; i < maps.length; i++) { if (tipid.equals(maps[i].getTIPId())) { return maps[i]; } } } else { if (tipid.equals(cleanup[1].getTIPId())) { // cleanup reduce tip return cleanup[1]; } if (tipid.equals(setup[1].getTIPId())) { //setup reduce tip return setup[1]; } for (int i = 0; i < reduces.length; i++) { if (tipid.equals(reduces[i].getTIPId())) { return reduces[i]; } } } return null; } /** * Find the details of someplace where a map has finished * @param mapId the id of the map * @return the task status of the completed task */ public synchronized TaskStatus findFinishedMap(int mapId) { TaskInProgress tip = maps[mapId]; if (tip.isComplete()) { TaskStatus[] statuses = tip.getTaskStatuses(); for (int i = 0; i < statuses.length; i++) { if (statuses[i].getRunState() == TaskStatus.State.SUCCEEDED) { return statuses[i]; } } } return null; } synchronized int getNumTaskCompletionEvents() { return taskCompletionEvents.size(); } synchronized public TaskCompletionEvent[] getTaskCompletionEvents(int fromEventId, int maxEvents) { TaskCompletionEvent[] events = TaskCompletionEvent.EMPTY_ARRAY; if (taskCompletionEvents.size() > fromEventId) { int actualMax = Math.min(maxEvents, (taskCompletionEvents.size() - fromEventId)); events = taskCompletionEvents.subList(fromEventId, actualMax + fromEventId).toArray(events); } return events; } synchronized void fetchFailureNotification(TaskInProgress tip, TaskAttemptID mapTaskId, String mapTrackerName, TaskAttemptID reduceTaskId, String reduceTrackerName) { Integer fetchFailures = mapTaskIdToFetchFailuresMap.get(mapTaskId); fetchFailures = (fetchFailures == null) ? 1 : (fetchFailures + 1); mapTaskIdToFetchFailuresMap.put(mapTaskId, fetchFailures); LOG.info("Failed fetch notification #" + fetchFailures + " for map task: " + mapTaskId + " running on tracker: " + mapTrackerName + " and reduce task: " + reduceTaskId + " running on tracker: " + reduceTrackerName); float failureRate = (float) fetchFailures / runningReduceTasks; // declare faulty if fetch-failures >= max-allowed-failures boolean isMapFaulty = failureRate >= MAX_ALLOWED_FETCH_FAILURES_PERCENT; if (fetchFailures >= MAX_FETCH_FAILURES_NOTIFICATIONS && isMapFaulty) { LOG.info("Too many fetch-failures for output of task: " + mapTaskId + " ... killing it"); failedTask(tip, mapTaskId, "Too many fetch-failures", (tip.isMapTask() ? TaskStatus.Phase.MAP : TaskStatus.Phase.REDUCE), TaskStatus.State.FAILED, mapTrackerName); mapTaskIdToFetchFailuresMap.remove(mapTaskId); } } /** * @return The JobID of this JobInProgress. */ public JobID getJobID() { return jobId; } /** * @return submitHostName of this JobInProgress. */ public String getJobSubmitHostName() { return this.submitHostName; } /** * @return submitHostAddress of this JobInProgress. */ public String getJobSubmitHostAddress() { return this.submitHostAddress; } public synchronized Object getSchedulingInfo() { return this.schedulingInfo; } public synchronized void setSchedulingInfo(Object schedulingInfo) { this.schedulingInfo = schedulingInfo; this.status.setSchedulingInfo(schedulingInfo.toString()); } /** * To keep track of kill and initTasks status of this job. initTasks() take * a lock on JobInProgress object. kill should avoid waiting on * JobInProgress lock since it may take a while to do initTasks(). */ private static class JobInitKillStatus { //flag to be set if kill is called boolean killed; boolean initStarted; boolean initDone; } boolean isComplete() { return status.isJobComplete(); } /** * Get the task type for logging it to {@link JobHistory}. */ private String getTaskType(TaskInProgress tip) { if (tip.isJobCleanupTask()) { return Values.CLEANUP.name(); } else if (tip.isJobSetupTask()) { return Values.SETUP.name(); } else if (tip.isMapTask()) { return Values.MAP.name(); } else { return Values.REDUCE.name(); } } /** * Test method to set the cluster sizes */ void setClusterSize(int clusterSize) { this.clusterSize = clusterSize; } static class JobSummary { static final Log LOG = LogFactory.getLog(JobSummary.class); // Escape sequences static final char EQUALS = '='; static final char[] charsToEscape = { StringUtils.COMMA, EQUALS, StringUtils.ESCAPE_CHAR }; static class SummaryBuilder { final StringBuilder buffer = new StringBuilder(); // A little optimization for a very common case SummaryBuilder add(String key, long value) { return _add(key, Long.toString(value)); } <T> SummaryBuilder add(String key, T value) { return _add(key, StringUtils.escapeString(String.valueOf(value), StringUtils.ESCAPE_CHAR, charsToEscape)); } SummaryBuilder add(SummaryBuilder summary) { if (buffer.length() > 0) buffer.append(StringUtils.COMMA); buffer.append(summary.buffer); return this; } SummaryBuilder _add(String key, String value) { if (buffer.length() > 0) buffer.append(StringUtils.COMMA); buffer.append(key).append(EQUALS).append(value); return this; } @Override public String toString() { return buffer.toString(); } } static SummaryBuilder getTaskLaunchTimesSummary(JobInProgress job) { SummaryBuilder summary = new SummaryBuilder(); Map<TaskType, Long> timeMap = job.getFirstTaskLaunchTimes(); synchronized (timeMap) { for (Map.Entry<TaskType, Long> e : timeMap.entrySet()) { summary.add("first" + StringUtils.camelize(e.getKey().name()) + "TaskLaunchTime", e.getValue().longValue()); } } return summary; } /** * Log a summary of the job's runtime. * * @param job {@link JobInProgress} whose summary is to be logged, cannot * be <code>null</code>. * @param cluster {@link ClusterStatus} of the cluster on which the job was * run, cannot be <code>null</code> */ public static void logJobSummary(JobInProgress job, ClusterStatus cluster) { JobStatus status = job.getStatus(); JobProfile profile = job.getProfile(); Counters jobCounters = job.getJobCounters(); long mapSlotSeconds = (jobCounters.getCounter(Counter.SLOTS_MILLIS_MAPS) + jobCounters.getCounter(Counter.FALLOW_SLOTS_MILLIS_MAPS)) / 1000; long reduceSlotSeconds = (jobCounters.getCounter(Counter.SLOTS_MILLIS_REDUCES) + jobCounters.getCounter(Counter.FALLOW_SLOTS_MILLIS_REDUCES)) / 1000; SummaryBuilder summary = new SummaryBuilder().add("jobId", job.getJobID()) .add("submitTime", job.getStartTime()).add("launchTime", job.getLaunchTime()) .add(getTaskLaunchTimesSummary(job)).add("finishTime", job.getFinishTime()) .add("numMaps", job.getTasks(TaskType.MAP).length) .add("numSlotsPerMap", job.getNumSlotsPerMap()) .add("numReduces", job.getTasks(TaskType.REDUCE).length) .add("numSlotsPerReduce", job.getNumSlotsPerReduce()).add("user", profile.getUser()) .add("queue", profile.getQueueName()) .add("status", JobStatus.getJobRunState(status.getRunState())) .add("mapSlotSeconds", mapSlotSeconds).add("reduceSlotsSeconds", reduceSlotSeconds) .add("clusterMapCapacity", cluster.getMaxMapTasks()) .add("clusterReduceCapacity", cluster.getMaxReduceTasks()).add("jobName", profile.getJobName()); LOG.info(summary); } } /** * generate job token and save it into the file * @throws IOException */ private void generateAndStoreTokens() throws IOException { Path jobDir = jobtracker.getSystemDirectoryForJob(jobId); Path keysFile = new Path(jobDir, TokenCache.JOB_TOKEN_HDFS_FILE); if (tokenStorage == null) { tokenStorage = new Credentials(); } //create JobToken file and write token to it JobTokenIdentifier identifier = new JobTokenIdentifier(new Text(jobId.toString())); Token<JobTokenIdentifier> token = new Token<JobTokenIdentifier>(identifier, jobtracker.getJobTokenSecretManager()); token.setService(identifier.getJobId()); TokenCache.setJobToken(token, tokenStorage); // write TokenStorage out tokenStorage.writeTokenStorageFile(keysFile, jobtracker.getConf()); LOG.info("jobToken generated and stored with users keys in " + keysFile.toUri().getPath()); } /** * Get the level of locality that a given task would have if launched on * a particular TaskTracker. Returns 0 if the task has data on that machine, * 1 if it has data on the same rack, etc (depending on number of levels in * the network hierarchy). */ int getLocalityLevel(TaskInProgress tip, TaskTrackerStatus tts) { Node tracker = jobtracker.getNode(tts.getHost()); int level = this.maxLevel; // find the right level across split locations for (String local : maps[tip.getIdWithinJob()].getSplitLocations()) { Node datanode = jobtracker.getNode(local); int newLevel = this.maxLevel; if (tracker != null && datanode != null) { newLevel = getMatchingLevelForNodes(tracker, datanode); } if (newLevel < level) { level = newLevel; // an optimization if (level == 0) { break; } } } return level; } }