Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred.gridmix; import org.apache.hadoop.mapred.TaskStatus.State; import org.apache.hadoop.tools.rumen.JobStoryProducer; import org.apache.hadoop.tools.rumen.JobStory; import org.apache.hadoop.tools.rumen.MapTaskAttemptInfo; import org.apache.hadoop.tools.rumen.ReduceTaskAttemptInfo; import org.apache.hadoop.tools.rumen.TaskInfo; import org.apache.hadoop.tools.rumen.TaskAttemptInfo; import org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapred.JobConf; import java.util.ArrayList; import java.util.Random; import java.util.Arrays; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.TimeUnit; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class DebugJobProducer implements JobStoryProducer { public static final Log LOG = LogFactory.getLog(DebugJobProducer.class); final ArrayList<JobStory> submitted; private final Configuration conf; private final AtomicInteger numJobs; public DebugJobProducer(int numJobs, Configuration conf) { super(); MockJob.reset(); this.conf = conf; this.numJobs = new AtomicInteger(numJobs); this.submitted = new ArrayList<JobStory>(); } @Override public JobStory getNextJob() throws IOException { if (numJobs.getAndDecrement() > 0) { final MockJob ret = new MockJob(conf); submitted.add(ret); return ret; } return null; } @Override public void close() { } static double[] getDistr(Random r, double mindist, int size) { assert 0.0 <= mindist && mindist <= 1.0; final double min = mindist / size; final double rem = 1.0 - min * size; final double[] tmp = new double[size]; for (int i = 0; i < tmp.length - 1; ++i) { tmp[i] = r.nextDouble() * rem; } tmp[tmp.length - 1] = rem; Arrays.sort(tmp); final double[] ret = new double[size]; ret[0] = tmp[0] + min; for (int i = 1; i < size; ++i) { ret[i] = tmp[i] - tmp[i - 1] + min; } return ret; } /** * Generate random task data for a synthetic job. */ static class MockJob implements JobStory { static final int MIN_REC = 1 << 14; static final int MIN_BYTES = 1 << 20; static final int VAR_REC = 1 << 14; static final int VAR_BYTES = 4 << 20; static final int MAX_MAP = 5; static final int MAX_RED = 3; final Configuration conf; static void initDist(Random r, double min, int[] recs, long[] bytes, long tot_recs, long tot_bytes) { final double[] recs_dist = getDistr(r, min, recs.length); final double[] bytes_dist = getDistr(r, min, recs.length); long totalbytes = 0L; int totalrecs = 0; for (int i = 0; i < recs.length; ++i) { recs[i] = (int) Math.round(tot_recs * recs_dist[i]); bytes[i] = Math.round(tot_bytes * bytes_dist[i]); totalrecs += recs[i]; totalbytes += bytes[i]; } // Add/remove excess recs[0] += totalrecs - tot_recs; bytes[0] += totalbytes - tot_bytes; if (LOG.isInfoEnabled()) { LOG.info("DIST: " + Arrays.toString(recs) + " " + tot_recs + "/" + totalrecs + " " + Arrays.toString(bytes) + " " + tot_bytes + "/" + totalbytes); } } private static final AtomicInteger seq = new AtomicInteger(0); // set timestamp in the past private static final AtomicLong timestamp = new AtomicLong( System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(60, TimeUnit.DAYS)); private final int id; private final String name; private final int[] m_recsIn, m_recsOut, r_recsIn, r_recsOut; private final long[] m_bytesIn, m_bytesOut, r_bytesIn, r_bytesOut; private final long submitTime; public MockJob(Configuration conf) { final Random r = new Random(); final long seed = r.nextLong(); r.setSeed(seed); id = seq.getAndIncrement(); name = String.format("MOCKJOB%05d", id); this.conf = conf; LOG.info(name + " (" + seed + ")"); submitTime = timestamp.addAndGet(TimeUnit.MILLISECONDS.convert(r.nextInt(10), TimeUnit.SECONDS)); m_recsIn = new int[r.nextInt(MAX_MAP) + 1]; m_bytesIn = new long[m_recsIn.length]; m_recsOut = new int[m_recsIn.length]; m_bytesOut = new long[m_recsIn.length]; r_recsIn = new int[r.nextInt(MAX_RED) + 1]; r_bytesIn = new long[r_recsIn.length]; r_recsOut = new int[r_recsIn.length]; r_bytesOut = new long[r_recsIn.length]; // map input final long map_recs = r.nextInt(VAR_REC) + MIN_REC; final long map_bytes = r.nextInt(VAR_BYTES) + MIN_BYTES; initDist(r, 0.5, m_recsIn, m_bytesIn, map_recs, map_bytes); // shuffle final long shuffle_recs = r.nextInt(VAR_REC) + MIN_REC; final long shuffle_bytes = r.nextInt(VAR_BYTES) + MIN_BYTES; initDist(r, 0.5, m_recsOut, m_bytesOut, shuffle_recs, shuffle_bytes); initDist(r, 0.8, r_recsIn, r_bytesIn, shuffle_recs, shuffle_bytes); // reduce output final long red_recs = r.nextInt(VAR_REC) + MIN_REC; final long red_bytes = r.nextInt(VAR_BYTES) + MIN_BYTES; initDist(r, 0.5, r_recsOut, r_bytesOut, red_recs, red_bytes); if (LOG.isDebugEnabled()) { int iMapBTotal = 0, oMapBTotal = 0, iRedBTotal = 0, oRedBTotal = 0; int iMapRTotal = 0, oMapRTotal = 0, iRedRTotal = 0, oRedRTotal = 0; for (int i = 0; i < m_recsIn.length; ++i) { iMapRTotal += m_recsIn[i]; iMapBTotal += m_bytesIn[i]; oMapRTotal += m_recsOut[i]; oMapBTotal += m_bytesOut[i]; } for (int i = 0; i < r_recsIn.length; ++i) { iRedRTotal += r_recsIn[i]; iRedBTotal += r_bytesIn[i]; oRedRTotal += r_recsOut[i]; oRedBTotal += r_bytesOut[i]; } LOG.debug(String.format("%s: M (%03d) %6d/%10d -> %6d/%10d" + " R (%03d) %6d/%10d -> %6d/%10d @%d", name, m_bytesIn.length, iMapRTotal, iMapBTotal, oMapRTotal, oMapBTotal, r_bytesIn.length, iRedRTotal, iRedBTotal, oRedRTotal, oRedBTotal, submitTime)); } } @Override public String getName() { return name; } @Override public String getUser() { String s = String.format("foobar%d", id); GridmixTestUtils.createHomeAndStagingDirectory(s, (JobConf) conf); return s; } @Override public JobID getJobID() { return new JobID("job_mock_" + name, id); } @Override public Values getOutcome() { return Values.SUCCESS; } @Override public long getSubmissionTime() { return submitTime; } @Override public int getNumberMaps() { return m_bytesIn.length; } @Override public int getNumberReduces() { return r_bytesIn.length; } @Override public TaskInfo getTaskInfo(TaskType taskType, int taskNumber) { switch (taskType) { case MAP: return new TaskInfo(m_bytesIn[taskNumber], m_recsIn[taskNumber], m_bytesOut[taskNumber], m_recsOut[taskNumber], -1); case REDUCE: return new TaskInfo(r_bytesIn[taskNumber], r_recsIn[taskNumber], r_bytesOut[taskNumber], r_recsOut[taskNumber], -1); default: throw new IllegalArgumentException("Not interested"); } } @Override public InputSplit[] getInputSplits() { throw new UnsupportedOperationException(); } @Override public TaskAttemptInfo getTaskAttemptInfo(TaskType taskType, int taskNumber, int taskAttemptNumber) { switch (taskType) { case MAP: return new MapTaskAttemptInfo(State.SUCCEEDED, new TaskInfo(m_bytesIn[taskNumber], m_recsIn[taskNumber], m_bytesOut[taskNumber], m_recsOut[taskNumber], -1), 100); case REDUCE: return new ReduceTaskAttemptInfo(State.SUCCEEDED, new TaskInfo(r_bytesIn[taskNumber], r_recsIn[taskNumber], r_bytesOut[taskNumber], r_recsOut[taskNumber], -1), 100, 100, 100); } throw new UnsupportedOperationException(); } @Override public TaskAttemptInfo getMapTaskAttemptInfoAdjusted(int taskNumber, int taskAttemptNumber, int locality) { throw new UnsupportedOperationException(); } @Override public org.apache.hadoop.mapred.JobConf getJobConf() { throw new UnsupportedOperationException(); } @Override public String getQueueName() { String qName = "q" + ((id % 2) + 1); return qName; } public static void reset() { seq.set(0); timestamp.set(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(60, TimeUnit.DAYS)); } } }