List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:org.apache.tez.dag.app.rm.TestTaskScheduler.java
License:Apache License
@SuppressWarnings({ "unchecked" }) @Test(timeout = 10000)// w ww . j av a 2 s .c o m public void testTaskSchedulerWithReuse() throws Exception { RackResolver.init(new YarnConfiguration()); TaskSchedulerAppCallback mockApp = mock(TaskSchedulerAppCallback.class); AppContext mockAppContext = mock(AppContext.class); when(mockAppContext.getAMState()).thenReturn(DAGAppMasterState.RUNNING); TezAMRMClientAsync<CookieContainerRequest> mockRMClient = mock(TezAMRMClientAsync.class); String appHost = "host"; int appPort = 0; String appUrl = "url"; TaskSchedulerWithDrainableAppCallback scheduler = new TaskSchedulerWithDrainableAppCallback(mockApp, new AlwaysMatchesContainerMatcher(), appHost, appPort, appUrl, mockRMClient, mockAppContext); final TaskSchedulerAppCallbackDrainable drainableAppCallback = scheduler.getDrainableAppCallback(); Configuration conf = new Configuration(); // to match all in the same pass conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); // to release immediately after deallocate conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0); scheduler.init(conf); drainableAppCallback.drain(); RegisterApplicationMasterResponse mockRegResponse = mock(RegisterApplicationMasterResponse.class); Resource mockMaxResource = mock(Resource.class); Map<ApplicationAccessType, String> mockAcls = mock(Map.class); when(mockRegResponse.getMaximumResourceCapability()).thenReturn(mockMaxResource); when(mockRegResponse.getApplicationACLs()).thenReturn(mockAcls); when(mockRMClient.registerApplicationMaster(anyString(), anyInt(), anyString())) .thenReturn(mockRegResponse); Resource mockClusterResource = mock(Resource.class); when(mockRMClient.getAvailableResources()).thenReturn(mockClusterResource); scheduler.start(); drainableAppCallback.drain(); Object mockTask1 = mock(Object.class); when(mockTask1.toString()).thenReturn("task1"); Object mockCookie1 = mock(Object.class); Resource mockCapability = mock(Resource.class); String[] hosts = { "host1", "host5" }; String[] racks = { "/default-rack", "/default-rack" }; final Priority mockPriority1 = Priority.newInstance(1); final Priority mockPriority2 = Priority.newInstance(2); final Priority mockPriority3 = Priority.newInstance(3); final Priority mockPriority4 = Priority.newInstance(4); final Priority mockPriority5 = Priority.newInstance(5); Object mockTask2 = mock(Object.class); when(mockTask2.toString()).thenReturn("task2"); Object mockCookie2 = mock(Object.class); Object mockTask3 = mock(Object.class); when(mockTask3.toString()).thenReturn("task3"); Object mockCookie3 = mock(Object.class); ArgumentCaptor<CookieContainerRequest> requestCaptor = ArgumentCaptor .forClass(CookieContainerRequest.class); scheduler.allocateTask(mockTask1, mockCapability, hosts, racks, mockPriority1, null, mockCookie1); drainableAppCallback.drain(); verify(mockRMClient, times(1)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest request1 = requestCaptor.getValue(); scheduler.allocateTask(mockTask2, mockCapability, hosts, racks, mockPriority2, null, mockCookie2); drainableAppCallback.drain(); verify(mockRMClient, times(2)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest request2 = requestCaptor.getValue(); scheduler.allocateTask(mockTask3, mockCapability, hosts, racks, mockPriority3, null, mockCookie3); drainableAppCallback.drain(); verify(mockRMClient, times(3)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest request3 = requestCaptor.getValue(); List<Container> containers = new ArrayList<Container>(); // sending lower priority container first to make sure its not matched Container mockContainer4 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer4.getNodeId().getHost()).thenReturn("host4"); when(mockContainer4.toString()).thenReturn("container4"); when(mockContainer4.getPriority()).thenReturn(mockPriority4); ContainerId mockCId4 = mock(ContainerId.class); when(mockContainer4.getId()).thenReturn(mockCId4); when(mockCId4.toString()).thenReturn("container4"); containers.add(mockContainer4); Container mockContainer1 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer1.getNodeId().getHost()).thenReturn("host1"); when(mockContainer1.getPriority()).thenReturn(mockPriority1); when(mockContainer1.toString()).thenReturn("container1"); ContainerId mockCId1 = mock(ContainerId.class); when(mockContainer1.getId()).thenReturn(mockCId1); when(mockCId1.toString()).thenReturn("container1"); containers.add(mockContainer1); Container mockContainer2 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer2.getNodeId().getHost()).thenReturn("host2"); when(mockContainer2.getPriority()).thenReturn(mockPriority2); when(mockContainer2.toString()).thenReturn("container2"); ContainerId mockCId2 = mock(ContainerId.class); when(mockContainer2.getId()).thenReturn(mockCId2); when(mockCId2.toString()).thenReturn("container2"); containers.add(mockContainer2); Container mockContainer3 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer3.getNodeId().getHost()).thenReturn("host3"); when(mockContainer3.getPriority()).thenReturn(mockPriority3); when(mockContainer3.toString()).thenReturn("container3"); ContainerId mockCId3 = mock(ContainerId.class); when(mockContainer3.getId()).thenReturn(mockCId3); when(mockCId3.toString()).thenReturn("container3"); containers.add(mockContainer3); ArrayList<CookieContainerRequest> hostContainers = new ArrayList<CookieContainerRequest>(); hostContainers.add(request1); ArrayList<CookieContainerRequest> rackContainers = new ArrayList<CookieContainerRequest>(); rackContainers.add(request2); ArrayList<CookieContainerRequest> anyContainers = new ArrayList<CookieContainerRequest>(); anyContainers.add(request3); final List<ArrayList<CookieContainerRequest>> hostList = new LinkedList<ArrayList<CookieContainerRequest>>(); hostList.add(hostContainers); final List<ArrayList<CookieContainerRequest>> rackList = new LinkedList<ArrayList<CookieContainerRequest>>(); rackList.add(rackContainers); final List<ArrayList<CookieContainerRequest>> anyList = new LinkedList<ArrayList<CookieContainerRequest>>(); anyList.add(anyContainers); final List<ArrayList<CookieContainerRequest>> emptyList = new LinkedList<ArrayList<CookieContainerRequest>>(); // return pri1 requests for host1 when(mockRMClient.getMatchingRequestsForTopPriority(eq("host1"), (Resource) any())) .thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return hostList; } }); // second request matched to rack. RackResolver by default puts hosts in // /default-rack. We need to workaround by returning rack matches only once when(mockRMClient.getMatchingRequestsForTopPriority(eq("/default-rack"), (Resource) any())) .thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return rackList; } }).thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return emptyList; } }); // third request matched to ANY when(mockRMClient.getMatchingRequestsForTopPriority(eq(ResourceRequest.ANY), (Resource) any())) .thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return anyList; } }).thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return emptyList; } }); when(mockRMClient.getTopPriority()).then(new Answer<Priority>() { @Override public Priority answer(InvocationOnMock invocation) throws Throwable { int allocations = drainableAppCallback.count.get(); if (allocations == 0) { return mockPriority1; } if (allocations == 1) { return mockPriority2; } if (allocations == 2) { return mockPriority3; } if (allocations == 3) { return mockPriority4; } return null; } }); AtomicBoolean drainNotifier = new AtomicBoolean(false); scheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier; scheduler.onContainersAllocated(containers); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); // exact number allocations returned verify(mockApp, times(3)).taskAllocated(any(), any(), (Container) any()); // first container allocated verify(mockApp).taskAllocated(mockTask1, mockCookie1, mockContainer1); verify(mockApp).taskAllocated(mockTask2, mockCookie2, mockContainer2); verify(mockApp).taskAllocated(mockTask3, mockCookie3, mockContainer3); verify(mockRMClient).removeContainerRequest(request1); verify(mockRMClient).removeContainerRequest(request2); verify(mockRMClient).removeContainerRequest(request3); // verify unwanted container released verify(mockRMClient).releaseAssignedContainer(mockCId4); // deallocate allocated task assertTrue(scheduler.deallocateTask(mockTask1, true)); drainableAppCallback.drain(); verify(mockApp).containerBeingReleased(mockCId1); verify(mockRMClient).releaseAssignedContainer(mockCId1); // deallocate allocated container Assert.assertEquals(mockTask2, scheduler.deallocateContainer(mockCId2)); drainableAppCallback.drain(); verify(mockRMClient).releaseAssignedContainer(mockCId2); verify(mockRMClient, times(3)).releaseAssignedContainer((ContainerId) any()); List<ContainerStatus> statuses = new ArrayList<ContainerStatus>(); ContainerStatus mockStatus1 = mock(ContainerStatus.class); when(mockStatus1.getContainerId()).thenReturn(mockCId1); statuses.add(mockStatus1); ContainerStatus mockStatus2 = mock(ContainerStatus.class); when(mockStatus2.getContainerId()).thenReturn(mockCId2); statuses.add(mockStatus2); ContainerStatus mockStatus3 = mock(ContainerStatus.class); when(mockStatus3.getContainerId()).thenReturn(mockCId3); statuses.add(mockStatus3); ContainerStatus mockStatus4 = mock(ContainerStatus.class); when(mockStatus4.getContainerId()).thenReturn(mockCId4); statuses.add(mockStatus4); scheduler.onContainersCompleted(statuses); drainableAppCallback.drain(); // released container status returned verify(mockApp).containerCompleted(mockTask1, mockStatus1); verify(mockApp).containerCompleted(mockTask2, mockStatus2); // currently allocated container status returned and not released verify(mockApp).containerCompleted(mockTask3, mockStatus3); // no other statuses returned verify(mockApp, times(3)).containerCompleted(any(), (ContainerStatus) any()); verify(mockRMClient, times(3)).releaseAssignedContainer((ContainerId) any()); // verify blacklisting verify(mockRMClient, times(0)).addNodeToBlacklist((NodeId) any()); String badHost = "host6"; NodeId badNodeId = mock(NodeId.class); when(badNodeId.getHost()).thenReturn(badHost); scheduler.blacklistNode(badNodeId); verify(mockRMClient, times(1)).addNodeToBlacklist(badNodeId); Object mockTask4 = mock(Object.class); when(mockTask4.toString()).thenReturn("task4"); Object mockCookie4 = mock(Object.class); scheduler.allocateTask(mockTask4, mockCapability, null, null, mockPriority4, null, mockCookie4); drainableAppCallback.drain(); verify(mockRMClient, times(4)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest request4 = requestCaptor.getValue(); anyContainers.clear(); anyContainers.add(request4); Container mockContainer5 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer5.getNodeId().getHost()).thenReturn(badHost); when(mockContainer5.getNodeId()).thenReturn(badNodeId); ContainerId mockCId5 = mock(ContainerId.class); when(mockContainer5.toString()).thenReturn("container5"); when(mockCId5.toString()).thenReturn("container5"); when(mockContainer5.getId()).thenReturn(mockCId5); when(mockContainer5.getPriority()).thenReturn(mockPriority4); containers.clear(); containers.add(mockContainer5); when(mockRMClient.getMatchingRequestsForTopPriority(eq(ResourceRequest.ANY), (Resource) any())) .thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return anyList; } }).thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return emptyList; } }); drainNotifier.set(false); scheduler.onContainersAllocated(containers); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); // no new allocation verify(mockApp, times(3)).taskAllocated(any(), any(), (Container) any()); // verify blacklisted container released verify(mockRMClient).releaseAssignedContainer(mockCId5); verify(mockRMClient, times(4)).releaseAssignedContainer((ContainerId) any()); // verify request added back verify(mockRMClient, times(5)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest request5 = requestCaptor.getValue(); anyContainers.clear(); anyContainers.add(request5); Container mockContainer6 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer6.getNodeId().getHost()).thenReturn("host7"); ContainerId mockCId6 = mock(ContainerId.class); when(mockContainer6.getId()).thenReturn(mockCId6); when(mockContainer6.toString()).thenReturn("container6"); when(mockCId6.toString()).thenReturn("container6"); containers.clear(); containers.add(mockContainer6); when(mockRMClient.getMatchingRequestsForTopPriority(eq(ResourceRequest.ANY), (Resource) any())) .thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return anyList; } }).thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return emptyList; } }); drainNotifier.set(false); scheduler.onContainersAllocated(containers); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); // new allocation verify(mockApp, times(4)).taskAllocated(any(), any(), (Container) any()); verify(mockApp).taskAllocated(mockTask4, mockCookie4, mockContainer6); // deallocate allocated task assertTrue(scheduler.deallocateTask(mockTask4, true)); drainableAppCallback.drain(); verify(mockApp).containerBeingReleased(mockCId6); verify(mockRMClient).releaseAssignedContainer(mockCId6); verify(mockRMClient, times(5)).releaseAssignedContainer((ContainerId) any()); // test unblacklist scheduler.unblacklistNode(badNodeId); verify(mockRMClient, times(1)).removeNodeFromBlacklist(badNodeId); assertEquals(0, scheduler.blacklistedNodes.size()); // verify container level matching // fudge the top level priority to prevent containers from being released // if top level priority is higher than newly allocated containers then // they will not be released final AtomicBoolean fudgePriority = new AtomicBoolean(true); when(mockRMClient.getTopPriority()).then(new Answer<Priority>() { @Override public Priority answer(InvocationOnMock invocation) throws Throwable { if (fudgePriority.get()) { return mockPriority4; } return mockPriority5; } }); // add a dummy task to prevent release of allocated containers Object mockTask5 = mock(Object.class); when(mockTask5.toString()).thenReturn("task5"); Object mockCookie5 = mock(Object.class); scheduler.allocateTask(mockTask5, mockCapability, hosts, racks, mockPriority5, null, mockCookie5); verify(mockRMClient, times(6)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest request6 = requestCaptor.getValue(); drainableAppCallback.drain(); // add containers so that we can reference one of them for container specific // allocation containers.clear(); Container mockContainer7 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer7.getNodeId().getHost()).thenReturn("host5"); ContainerId mockCId7 = mock(ContainerId.class); when(mockContainer7.toString()).thenReturn("container7"); when(mockCId7.toString()).thenReturn("container7"); when(mockContainer7.getId()).thenReturn(mockCId7); when(mockContainer7.getPriority()).thenReturn(mockPriority5); containers.add(mockContainer7); Container mockContainer8 = mock(Container.class, RETURNS_DEEP_STUBS); when(mockContainer8.getNodeId().getHost()).thenReturn("host5"); ContainerId mockCId8 = mock(ContainerId.class); when(mockContainer8.toString()).thenReturn("container8"); when(mockCId8.toString()).thenReturn("container8"); when(mockContainer8.getId()).thenReturn(mockCId8); when(mockContainer8.getPriority()).thenReturn(mockPriority5); containers.add(mockContainer8); drainNotifier.set(false); scheduler.onContainersAllocated(containers); drainableAppCallback.drain(); verify(mockRMClient, times(5)).releaseAssignedContainer((ContainerId) any()); Object mockTask6 = mock(Object.class); when(mockTask6.toString()).thenReturn("task6"); Object mockCookie6 = mock(Object.class); // allocate request with container affinity scheduler.allocateTask(mockTask6, mockCapability, mockCId7, mockPriority5, null, mockCookie6); drainableAppCallback.drain(); verify(mockRMClient, times(7)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest request7 = requestCaptor.getValue(); hostContainers.clear(); hostContainers.add(request6); hostContainers.add(request7); when(mockRMClient.getMatchingRequestsForTopPriority(eq("host5"), (Resource) any())) .thenAnswer(new Answer<List<? extends Collection<CookieContainerRequest>>>() { @Override public List<? extends Collection<CookieContainerRequest>> answer(InvocationOnMock invocation) throws Throwable { return hostList; } }); // stop fudging top priority fudgePriority.set(false); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); verify(mockApp, times(6)).taskAllocated(any(), any(), (Container) any()); // container7 allocated to the task with affinity for it verify(mockApp).taskAllocated(mockTask6, mockCookie6, mockContainer7); // deallocate allocated task assertTrue(scheduler.deallocateTask(mockTask5, true)); assertTrue(scheduler.deallocateTask(mockTask6, true)); drainableAppCallback.drain(); verify(mockApp).containerBeingReleased(mockCId7); verify(mockApp).containerBeingReleased(mockCId8); verify(mockRMClient).releaseAssignedContainer(mockCId7); verify(mockRMClient).releaseAssignedContainer(mockCId8); verify(mockRMClient, times(7)).releaseAssignedContainer((ContainerId) any()); float progress = 0.5f; when(mockApp.getProgress()).thenReturn(progress); Assert.assertEquals(progress, scheduler.getProgress(), 0); List<NodeReport> mockUpdatedNodes = mock(List.class); scheduler.onNodesUpdated(mockUpdatedNodes); drainableAppCallback.drain(); verify(mockApp).nodesUpdated(mockUpdatedNodes); Exception mockException = mock(Exception.class); scheduler.onError(mockException); drainableAppCallback.drain(); verify(mockApp).onError(mockException); scheduler.onShutdownRequest(); drainableAppCallback.drain(); verify(mockApp).appShutdownRequested(); String appMsg = "success"; AppFinalStatus finalStatus = new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); scheduler.stop(); drainableAppCallback.drain(); verify(mockRMClient).unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); verify(mockRMClient).stop(); scheduler.close(); }
From source file:org.apache.tez.dag.app.rm.TestTaskScheduler.java
License:Apache License
@SuppressWarnings("unchecked") @Test(timeout = 5000)/*from www .j av a2 s . c o m*/ public void testTaskSchedulerRandomReuseExpireTime() throws Exception { RackResolver.init(new YarnConfiguration()); TaskSchedulerAppCallback mockApp = mock(TaskSchedulerAppCallback.class); AppContext mockAppContext = mock(AppContext.class); when(mockAppContext.getAMState()).thenReturn(DAGAppMasterState.RUNNING); TezAMRMClientAsync<CookieContainerRequest> mockRMClient = mock(TezAMRMClientAsync.class); String appHost = "host"; int appPort = 0; String appUrl = "url"; TaskSchedulerWithDrainableAppCallback scheduler1 = new TaskSchedulerWithDrainableAppCallback(mockApp, new AlwaysMatchesContainerMatcher(), appHost, appPort, appUrl, mockRMClient, mockAppContext); TaskSchedulerWithDrainableAppCallback scheduler2 = new TaskSchedulerWithDrainableAppCallback(mockApp, new AlwaysMatchesContainerMatcher(), appHost, appPort, appUrl, mockRMClient, mockAppContext); long minTime = 1000l; long maxTime = 100000l; Configuration conf1 = new Configuration(); conf1.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, minTime); conf1.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, minTime); scheduler1.init(conf1); Configuration conf2 = new Configuration(); conf2.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, minTime); conf2.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, maxTime); scheduler2.init(conf2); RegisterApplicationMasterResponse mockRegResponse = mock(RegisterApplicationMasterResponse.class); Resource mockMaxResource = mock(Resource.class); Map<ApplicationAccessType, String> mockAcls = mock(Map.class); when(mockRegResponse.getMaximumResourceCapability()).thenReturn(mockMaxResource); when(mockRegResponse.getApplicationACLs()).thenReturn(mockAcls); when(mockRMClient.registerApplicationMaster(anyString(), anyInt(), anyString())) .thenReturn(mockRegResponse); Resource mockClusterResource = mock(Resource.class); when(mockRMClient.getAvailableResources()).thenReturn(mockClusterResource); scheduler1.start(); scheduler2.start(); // when min == max the expire time is always min for (int i = 0; i < 10; ++i) { Assert.assertEquals(minTime, scheduler1.getHeldContainerExpireTime(0)); } long lastExpireTime = 0; // when min < max the expire time is random in between min and max for (int i = 0; i < 10; ++i) { long currExpireTime = scheduler2.getHeldContainerExpireTime(0); Assert.assertTrue("min: " + minTime + " curr: " + currExpireTime + " max: " + maxTime, (minTime <= currExpireTime && currExpireTime <= maxTime)); Assert.assertNotEquals(lastExpireTime, currExpireTime); lastExpireTime = currExpireTime; } String appMsg = "success"; AppFinalStatus finalStatus = new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); scheduler1.stop(); scheduler1.close(); scheduler2.stop(); scheduler2.close(); }
From source file:org.apache.tez.dag.library.vertexmanager.TestShuffleVertexManager.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) @Test(timeout = 5000)/*from w w w. j a v a 2 s .c om*/ public void testShuffleVertexManagerAutoParallelism() throws Exception { Configuration conf = new Configuration(); conf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true); conf.setLong(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, 1000L); ShuffleVertexManager manager = null; HashMap<String, EdgeProperty> mockInputVertices = new HashMap<String, EdgeProperty>(); String mockSrcVertexId1 = "Vertex1"; EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); String mockSrcVertexId2 = "Vertex2"; EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); String mockSrcVertexId3 = "Vertex3"; EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); final String mockManagedVertexId = "Vertex4"; mockInputVertices.put(mockSrcVertexId1, eProp1); mockInputVertices.put(mockSrcVertexId2, eProp2); mockInputVertices.put(mockSrcVertexId3, eProp3); final VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class); when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices); when(mockContext.getVertexName()).thenReturn(mockManagedVertexId); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(4); //Check via setters ShuffleVertexManager.ShuffleVertexManagerConfigBuilder configurer = ShuffleVertexManager .createConfigBuilder(null); VertexManagerPluginDescriptor pluginDesc = configurer.setAutoReduceParallelism(true) .setDesiredTaskInputSize(1000l).setMinTaskParallelism(10).setSlowStartMaxSrcCompletionFraction(0.5f) .build(); when(mockContext.getUserPayload()).thenReturn(pluginDesc.getUserPayload()); manager = ReflectionUtils.createClazzInstance(pluginDesc.getClassName(), new Class[] { VertexManagerPluginContext.class }, new Object[] { mockContext }); manager.initialize(); verify(mockContext, times(1)).vertexReconfigurationPlanned(); // Tez notified of reconfig Assert.assertTrue(manager.enableAutoParallelism == true); Assert.assertTrue(manager.desiredTaskInputDataSize == 1000l); Assert.assertTrue(manager.minTaskParallelism == 10); Assert.assertTrue(manager.slowStartMinSrcCompletionFraction == 0.25f); Assert.assertTrue(manager.slowStartMaxSrcCompletionFraction == 0.5f); configurer = ShuffleVertexManager.createConfigBuilder(null); pluginDesc = configurer.setAutoReduceParallelism(false).build(); when(mockContext.getUserPayload()).thenReturn(pluginDesc.getUserPayload()); manager = ReflectionUtils.createClazzInstance(pluginDesc.getClassName(), new Class[] { VertexManagerPluginContext.class }, new Object[] { mockContext }); manager.initialize(); verify(mockContext, times(1)).vertexReconfigurationPlanned(); // Tez not notified of reconfig Assert.assertTrue(manager.enableAutoParallelism == false); Assert.assertTrue( manager.desiredTaskInputDataSize == ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE_DEFAULT); Assert.assertTrue(manager.minTaskParallelism == 1); Assert.assertTrue( manager.slowStartMinSrcCompletionFraction == ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION_DEFAULT); Assert.assertTrue( manager.slowStartMaxSrcCompletionFraction == ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION_DEFAULT); final HashSet<Integer> scheduledTasks = new HashSet<Integer>(); doAnswer(new Answer() { public Object answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); scheduledTasks.clear(); List<TaskWithLocationHint> tasks = (List<TaskWithLocationHint>) args[0]; for (TaskWithLocationHint task : tasks) { scheduledTasks.add(task.getTaskIndex()); } return null; } }).when(mockContext).scheduleVertexTasks(anyList()); final Map<String, EdgeManagerPlugin> newEdgeManagers = new HashMap<String, EdgeManagerPlugin>(); doAnswer(new Answer() { public Object answer(InvocationOnMock invocation) throws Exception { when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(2); newEdgeManagers.clear(); for (Entry<String, EdgeManagerPluginDescriptor> entry : ((Map<String, EdgeManagerPluginDescriptor>) invocation .getArguments()[2]).entrySet()) { final UserPayload userPayload = entry.getValue().getUserPayload(); EdgeManagerPluginContext emContext = new EdgeManagerPluginContext() { @Override public UserPayload getUserPayload() { return userPayload == null ? null : userPayload; } @Override public String getSourceVertexName() { return null; } @Override public String getDestinationVertexName() { return null; } @Override public int getSourceVertexNumTasks() { return 2; } @Override public int getDestinationVertexNumTasks() { return 2; } }; EdgeManagerPlugin edgeManager = ReflectionUtils.createClazzInstance( entry.getValue().getClassName(), new Class[] { EdgeManagerPluginContext.class }, new Object[] { emContext }); edgeManager.initialize(); newEdgeManagers.put(entry.getKey(), edgeManager); } return null; } }).when(mockContext).setVertexParallelism(eq(2), any(VertexLocationHint.class), anyMap(), anyMap()); // check initialization manager = createManager(conf, mockContext, 0.1f, 0.1f); // Tez notified of reconfig verify(mockContext, times(2)).vertexReconfigurationPlanned(); Assert.assertTrue(manager.bipartiteSources == 2); // source vertices have 0 tasks. when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(0); when(mockContext.getVertexNumTasks(mockSrcVertexId2)).thenReturn(0); when(mockContext.getVertexNumTasks(mockSrcVertexId3)).thenReturn(1); // check waiting for notification before scheduling manager.onVertexStarted(null); Assert.assertFalse(manager.pendingTasks.isEmpty()); // source vertices have 0 tasks. so only 1 notification needed. triggers scheduling manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); Assert.assertTrue(manager.pendingTasks.isEmpty()); verify(mockContext, times(1)).doneReconfiguringVertex(); // reconfig done Assert.assertTrue(scheduledTasks.size() == 4); // all tasks scheduled scheduledTasks.clear(); // TODO TEZ-1714 locking verify(mockContext, times(1)).vertexManagerDone(); // notified after scheduling all tasks // check scheduling only after onVertexStarted manager = createManager(conf, mockContext, 0.1f, 0.1f); // Tez notified of reconfig verify(mockContext, times(3)).vertexReconfigurationPlanned(); Assert.assertTrue(manager.bipartiteSources == 2); // source vertices have 0 tasks. so only 1 notification needed. does not trigger scheduling manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); verify(mockContext, times(1)).doneReconfiguringVertex(); // reconfig done Assert.assertTrue(scheduledTasks.size() == 0); // no tasks scheduled manager.onVertexStarted(null); verify(mockContext, times(2)).doneReconfiguringVertex(); // reconfig done Assert.assertTrue(manager.pendingTasks.isEmpty()); Assert.assertTrue(scheduledTasks.size() == 4); // all tasks scheduled when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(2); when(mockContext.getVertexNumTasks(mockSrcVertexId2)).thenReturn(2); ByteBuffer payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(5000L).build().toByteString() .asReadOnlyByteBuffer(); VertexManagerEvent vmEvent = VertexManagerEvent.create("Vertex", payload); // parallelism not change due to large data size manager = createManager(conf, mockContext, 0.1f, 0.1f); verify(mockContext, times(4)).vertexReconfigurationPlanned(); // Tez notified of reconfig manager.onVertexStarted(null); Assert.assertTrue(manager.pendingTasks.size() == 4); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 4); manager.onVertexManagerEventReceived(vmEvent); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); manager.onSourceTaskCompleted(mockSrcVertexId1, new Integer(0)); verify(mockContext, times(0)).setVertexParallelism(anyInt(), any(VertexLocationHint.class), anyMap(), anyMap()); verify(mockContext, times(2)).doneReconfiguringVertex(); // trigger scheduling manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); verify(mockContext, times(0)).setVertexParallelism(anyInt(), any(VertexLocationHint.class), anyMap(), anyMap()); verify(mockContext, times(3)).doneReconfiguringVertex(); // reconfig done Assert.assertEquals(0, manager.pendingTasks.size()); // all tasks scheduled Assert.assertEquals(4, scheduledTasks.size()); // TODO TEZ-1714 locking verify(mockContext, times(2)).vertexManagerDone(); // notified after scheduling all tasks Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted); Assert.assertEquals(5000L, manager.completedSourceTasksOutputSize); /** * Test for TEZ-978 * Delay determining parallelism until enough data has been received. */ scheduledTasks.clear(); payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(1L).build().toByteString() .asReadOnlyByteBuffer(); vmEvent = VertexManagerEvent.create("Vertex", payload); //min/max fraction of 0.01/0.75 would ensure that we hit determineParallelism code path on receiving first event itself. manager = createManager(conf, mockContext, 0.01f, 0.75f); manager.onVertexStarted(null); Assert.assertEquals(4, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); //First task in src1 completed with small payload manager.onVertexManagerEventReceived(vmEvent); //small payload manager.onSourceTaskCompleted(mockSrcVertexId1, new Integer(0)); Assert.assertTrue(manager.determineParallelismAndApply() == false); Assert.assertEquals(4, manager.pendingTasks.size()); Assert.assertEquals(0, scheduledTasks.size()); // no tasks scheduled Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted); Assert.assertEquals(1, manager.numVertexManagerEventsReceived); Assert.assertEquals(1L, manager.completedSourceTasksOutputSize); //Second task in src1 completed with small payload manager.onVertexManagerEventReceived(vmEvent); //small payload manager.onSourceTaskCompleted(mockSrcVertexId1, new Integer(0)); //Still overall data gathered has not reached threshold; So, ensure parallelism can be determined later Assert.assertTrue(manager.determineParallelismAndApply() == false); Assert.assertEquals(4, manager.pendingTasks.size()); Assert.assertEquals(0, scheduledTasks.size()); // no tasks scheduled Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted); Assert.assertEquals(2, manager.numVertexManagerEventsReceived); Assert.assertEquals(2L, manager.completedSourceTasksOutputSize); //First task in src2 completed (with larger payload) to trigger determining parallelism payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(1200L).build().toByteString() .asReadOnlyByteBuffer(); vmEvent = VertexManagerEvent.create("Vertex", payload); manager.onVertexManagerEventReceived(vmEvent); Assert.assertTrue(manager.determineParallelismAndApply()); //ensure parallelism is determined verify(mockContext, times(1)).setVertexParallelism(eq(2), any(VertexLocationHint.class), anyMap(), anyMap()); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); manager.onSourceTaskCompleted(mockSrcVertexId2, new Integer(0)); Assert.assertEquals(1, manager.pendingTasks.size()); Assert.assertEquals(1, scheduledTasks.size()); Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted); Assert.assertEquals(3, manager.numVertexManagerEventsReceived); Assert.assertEquals(1202L, manager.completedSourceTasksOutputSize); //Test for max fraction. Min fraction is just instruction to framework, but honor max fraction when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(20); when(mockContext.getVertexNumTasks(mockSrcVertexId2)).thenReturn(20); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(40); scheduledTasks.clear(); payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(100L).build().toByteString() .asReadOnlyByteBuffer(); vmEvent = VertexManagerEvent.create("Vertex", payload); //min/max fraction of 0.0/0.2 manager = createManager(conf, mockContext, 0.0f, 0.2f); manager.onVertexStarted(null); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); Assert.assertEquals(40, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(40, manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); //send 7 events with payload size as 100 for (int i = 0; i < 7; i++) { manager.onVertexManagerEventReceived(vmEvent); //small payload manager.onSourceTaskCompleted(mockSrcVertexId1, new Integer(i)); //should not change parallelism verify(mockContext, times(0)).setVertexParallelism(eq(4), any(VertexLocationHint.class), anyMap(), anyMap()); } //send 8th event with payload size as 100 manager.onVertexManagerEventReceived(vmEvent); manager.onSourceTaskCompleted(mockSrcVertexId2, new Integer(8)); //Since max threshold (40 * 0.2 = 8) is met, vertex manager should determine parallelism verify(mockContext, times(1)).setVertexParallelism(eq(4), any(VertexLocationHint.class), anyMap(), anyMap()); //reset context for next test when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(2); when(mockContext.getVertexNumTasks(mockSrcVertexId2)).thenReturn(2); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(4); // parallelism changed due to small data size scheduledTasks.clear(); payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(500L).build().toByteString() .asReadOnlyByteBuffer(); vmEvent = VertexManagerEvent.create("Vertex", payload); manager = createManager(conf, mockContext, 0.5f, 0.5f); manager.onVertexStarted(null); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); Assert.assertEquals(4, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks); // task completion from non-bipartite stage does nothing manager.onSourceTaskCompleted(mockSrcVertexId3, new Integer(0)); Assert.assertEquals(4, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(4, manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); manager.onVertexManagerEventReceived(vmEvent); manager.onSourceTaskCompleted(mockSrcVertexId1, new Integer(0)); Assert.assertEquals(4, manager.pendingTasks.size()); Assert.assertEquals(0, scheduledTasks.size()); // no tasks scheduled Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted); Assert.assertEquals(1, manager.numVertexManagerEventsReceived); Assert.assertEquals(500L, manager.completedSourceTasksOutputSize); // ignore duplicate completion manager.onSourceTaskCompleted(mockSrcVertexId1, new Integer(0)); Assert.assertEquals(4, manager.pendingTasks.size()); Assert.assertEquals(0, scheduledTasks.size()); // no tasks scheduled Assert.assertEquals(1, manager.numBipartiteSourceTasksCompleted); Assert.assertEquals(500L, manager.completedSourceTasksOutputSize); manager.onVertexManagerEventReceived(vmEvent); manager.onSourceTaskCompleted(mockSrcVertexId2, new Integer(1)); // managedVertex tasks reduced verify(mockContext, times(2)).setVertexParallelism(eq(2), any(VertexLocationHint.class), anyMap(), anyMap()); Assert.assertEquals(2, newEdgeManagers.size()); // TODO improve tests for parallelism Assert.assertEquals(0, manager.pendingTasks.size()); // all tasks scheduled Assert.assertEquals(2, scheduledTasks.size()); Assert.assertTrue(scheduledTasks.contains(new Integer(0))); Assert.assertTrue(scheduledTasks.contains(new Integer(1))); Assert.assertEquals(2, manager.numBipartiteSourceTasksCompleted); Assert.assertEquals(2, manager.numVertexManagerEventsReceived); Assert.assertEquals(1000L, manager.completedSourceTasksOutputSize); // more completions dont cause recalculation of parallelism manager.onSourceTaskCompleted(mockSrcVertexId2, new Integer(0)); verify(mockContext, times(2)).setVertexParallelism(eq(2), any(VertexLocationHint.class), anyMap(), anyMap()); Assert.assertEquals(2, newEdgeManagers.size()); EdgeManagerPlugin edgeManager = newEdgeManagers.values().iterator().next(); Map<Integer, List<Integer>> targets = Maps.newHashMap(); DataMovementEvent dmEvent = DataMovementEvent.create(1, ByteBuffer.wrap(new byte[0])); // 4 source task outputs - same as original number of partitions Assert.assertEquals(4, edgeManager.getNumSourceTaskPhysicalOutputs(0)); // 4 destination task inputs - 2 source tasks + 2 merged partitions Assert.assertEquals(4, edgeManager.getNumDestinationTaskPhysicalInputs(0)); edgeManager.routeDataMovementEventToDestination(dmEvent, 1, dmEvent.getSourceIndex(), targets); Assert.assertEquals(1, targets.size()); Map.Entry<Integer, List<Integer>> e = targets.entrySet().iterator().next(); Assert.assertEquals(0, e.getKey().intValue()); Assert.assertEquals(1, e.getValue().size()); Assert.assertEquals(3, e.getValue().get(0).intValue()); targets.clear(); dmEvent = DataMovementEvent.create(2, ByteBuffer.wrap(new byte[0])); edgeManager.routeDataMovementEventToDestination(dmEvent, 0, dmEvent.getSourceIndex(), targets); Assert.assertEquals(1, targets.size()); e = targets.entrySet().iterator().next(); Assert.assertEquals(1, e.getKey().intValue()); Assert.assertEquals(1, e.getValue().size()); Assert.assertEquals(0, e.getValue().get(0).intValue()); targets.clear(); edgeManager.routeInputSourceTaskFailedEventToDestination(2, targets); Assert.assertEquals(2, targets.size()); for (Map.Entry<Integer, List<Integer>> entry : targets.entrySet()) { Assert.assertTrue(entry.getKey().intValue() == 0 || entry.getKey().intValue() == 1); Assert.assertEquals(2, entry.getValue().size()); Assert.assertEquals(4, entry.getValue().get(0).intValue()); Assert.assertEquals(5, entry.getValue().get(1).intValue()); } }
From source file:org.apache.tez.dag.library.vertexmanager.TestShuffleVertexManager.java
License:Apache License
/** * Tasks should be scheduled only when all source vertices are configured completely */// ww w . j av a 2 s. co m @Test(timeout = 5000) public void test_Tez1649_with_scatter_gather_edges() { Configuration conf = new Configuration(); conf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true); conf.setLong(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, 1000L); ShuffleVertexManager manager = null; HashMap<String, EdgeProperty> mockInputVertices_R2 = new HashMap<String, EdgeProperty>(); String r1 = "R1"; EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); String m2 = "M2"; EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); String m3 = "M3"; EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); final String mockManagedVertexId_R2 = "R2"; mockInputVertices_R2.put(r1, eProp1); mockInputVertices_R2.put(m2, eProp2); mockInputVertices_R2.put(m3, eProp3); final VertexManagerPluginContext mockContext_R2 = mock(VertexManagerPluginContext.class); when(mockContext_R2.getInputVertexEdgeProperties()).thenReturn(mockInputVertices_R2); when(mockContext_R2.getVertexName()).thenReturn(mockManagedVertexId_R2); when(mockContext_R2.getVertexNumTasks(mockManagedVertexId_R2)).thenReturn(3); when(mockContext_R2.getVertexNumTasks(r1)).thenReturn(3); when(mockContext_R2.getVertexNumTasks(m2)).thenReturn(3); when(mockContext_R2.getVertexNumTasks(m3)).thenReturn(3); final Map<String, EdgeManagerPlugin> edgeManagerR2 = new HashMap<String, EdgeManagerPlugin>(); doAnswer(new Answer() { public Object answer(InvocationOnMock invocation) throws Exception { when(mockContext_R2.getVertexNumTasks(mockManagedVertexId_R2)).thenReturn(2); edgeManagerR2.clear(); for (Entry<String, EdgeManagerPluginDescriptor> entry : ((Map<String, EdgeManagerPluginDescriptor>) invocation .getArguments()[2]).entrySet()) { final UserPayload userPayload = entry.getValue().getUserPayload(); EdgeManagerPluginContext emContext = new EdgeManagerPluginContext() { @Override public UserPayload getUserPayload() { return userPayload == null ? null : userPayload; } @Override public String getSourceVertexName() { return null; } @Override public String getDestinationVertexName() { return null; } @Override public int getSourceVertexNumTasks() { return 2; } @Override public int getDestinationVertexNumTasks() { return 2; } }; EdgeManagerPlugin edgeManager = ReflectionUtils.createClazzInstance( entry.getValue().getClassName(), new Class[] { EdgeManagerPluginContext.class }, new Object[] { emContext }); edgeManager.initialize(); edgeManagerR2.put(entry.getKey(), edgeManager); } return null; } }).when(mockContext_R2).setVertexParallelism(eq(2), any(VertexLocationHint.class), anyMap(), anyMap()); ByteBuffer payload = VertexManagerEventPayloadProto.newBuilder().setOutputSize(50L).build().toByteString() .asReadOnlyByteBuffer(); VertexManagerEvent vmEvent = VertexManagerEvent.create("Vertex", payload); // check initialization manager = createManager(conf, mockContext_R2, 0.001f, 0.001f); Assert.assertTrue(manager.bipartiteSources == 3); final HashSet<Integer> scheduledTasks = new HashSet<Integer>(); doAnswer(new Answer() { public Object answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); scheduledTasks.clear(); List<TaskWithLocationHint> tasks = (List<TaskWithLocationHint>) args[0]; for (TaskWithLocationHint task : tasks) { scheduledTasks.add(task.getTaskIndex()); } return null; } }).when(mockContext_R2).scheduleVertexTasks(anyList()); manager.onVertexStarted(null); manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED)); manager.onVertexManagerEventReceived(vmEvent); Assert.assertEquals(3, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(9, manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 9); //Send events for all tasks of m3. manager.onSourceTaskCompleted(m3, new Integer(0)); manager.onSourceTaskCompleted(m3, new Integer(1)); manager.onSourceTaskCompleted(m3, new Integer(2)); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 9); //Send an event for m2. But still we need to wait for at least 1 event from r1. manager.onSourceTaskCompleted(m2, new Integer(0)); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 9); //Ensure that setVertexParallelism is not called for R2. verify(mockContext_R2, times(0)).setVertexParallelism(anyInt(), any(VertexLocationHint.class), anyMap(), anyMap()); // complete configuration of r1 triggers the scheduling manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED)); verify(mockContext_R2, times(1)).setVertexParallelism(eq(1), any(VertexLocationHint.class), anyMap(), anyMap()); Assert.assertTrue(manager.pendingTasks.size() == 0); // all tasks scheduled Assert.assertTrue(scheduledTasks.size() == 3); //try with zero task vertices scheduledTasks.clear(); when(mockContext_R2.getInputVertexEdgeProperties()).thenReturn(mockInputVertices_R2); when(mockContext_R2.getVertexName()).thenReturn(mockManagedVertexId_R2); when(mockContext_R2.getVertexNumTasks(mockManagedVertexId_R2)).thenReturn(3); when(mockContext_R2.getVertexNumTasks(r1)).thenReturn(0); when(mockContext_R2.getVertexNumTasks(m2)).thenReturn(0); when(mockContext_R2.getVertexNumTasks(m3)).thenReturn(3); manager = createManager(conf, mockContext_R2, 0.001f, 0.001f); manager.onVertexStarted(null); Assert.assertEquals(3, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(3, manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3); // Only need completed configuration notification from m3 manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED)); manager.onSourceTaskCompleted(m3, new Integer(0)); Assert.assertTrue(manager.pendingTasks.size() == 0); // all tasks scheduled Assert.assertTrue(scheduledTasks.size() == 3); }
From source file:org.apache.tez.dag.library.vertexmanager.TestShuffleVertexManager.java
License:Apache License
@Test(timeout = 5000) public void test_Tez1649_with_mixed_edges() { Configuration conf = new Configuration(); conf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true); conf.setLong(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, 1000L); ShuffleVertexManager manager = null; HashMap<String, EdgeProperty> mockInputVertices = new HashMap<String, EdgeProperty>(); String r1 = "R1"; EdgeProperty eProp1 = EdgeProperty.create(EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); String m2 = "M2"; EdgeProperty eProp2 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); String m3 = "M3"; EdgeProperty eProp3 = EdgeProperty.create(EdgeProperty.DataMovementType.BROADCAST, EdgeProperty.DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("out"), InputDescriptor.create("in")); final String mockManagedVertexId = "R2"; mockInputVertices.put(r1, eProp1);/*from w w w . j ava2s. c o m*/ mockInputVertices.put(m2, eProp2); mockInputVertices.put(m3, eProp3); VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class); when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices); when(mockContext.getVertexName()).thenReturn(mockManagedVertexId); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(3); when(mockContext.getVertexNumTasks(r1)).thenReturn(3); when(mockContext.getVertexNumTasks(m2)).thenReturn(3); when(mockContext.getVertexNumTasks(m3)).thenReturn(3); // check initialization manager = createManager(conf, mockContext, 0.001f, 0.001f); Assert.assertTrue(manager.bipartiteSources == 1); final HashSet<Integer> scheduledTasks = new HashSet<Integer>(); doAnswer(new Answer() { public Object answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); scheduledTasks.clear(); List<TaskWithLocationHint> tasks = (List<TaskWithLocationHint>) args[0]; for (TaskWithLocationHint task : tasks) { scheduledTasks.add(task.getTaskIndex()); } return null; } }).when(mockContext).scheduleVertexTasks(anyList()); manager.onVertexStarted(null); manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED)); Assert.assertEquals(3, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(3, manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); //Send events for 2 tasks of r1. manager.onSourceTaskCompleted(r1, new Integer(0)); manager.onSourceTaskCompleted(r1, new Integer(1)); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3); //Send an event for m2. manager.onSourceTaskCompleted(m2, new Integer(0)); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3); //Send an event for m2. manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED)); Assert.assertTrue(manager.pendingTasks.size() == 0); // all tasks scheduled Assert.assertTrue(scheduledTasks.size() == 3); //Scenario when numBipartiteSourceTasksCompleted == totalNumBipartiteSourceTasks. //Still, wait for a configuration to be completed from other edges scheduledTasks.clear(); manager = createManager(conf, mockContext, 0.001f, 0.001f); manager.onVertexStarted(null); manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED)); when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices); when(mockContext.getVertexName()).thenReturn(mockManagedVertexId); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(3); when(mockContext.getVertexNumTasks(r1)).thenReturn(3); when(mockContext.getVertexNumTasks(m2)).thenReturn(3); when(mockContext.getVertexNumTasks(m3)).thenReturn(3); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 3); manager.onSourceTaskCompleted(r1, new Integer(0)); manager.onSourceTaskCompleted(r1, new Integer(1)); manager.onSourceTaskCompleted(r1, new Integer(2)); //Tasks from non-scatter edges of m2 and m3 are not complete. Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled manager.onVertexStateUpdated(new VertexStateUpdate(m2, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED)); //Got an event from other edges. Schedule all Assert.assertTrue(manager.pendingTasks.size() == 0); // all tasks scheduled Assert.assertTrue(scheduledTasks.size() == 3); //try with a zero task vertex (with non-scatter-gather edges) scheduledTasks.clear(); manager = createManager(conf, mockContext, 0.001f, 0.001f); manager.onVertexStarted(null); when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices); when(mockContext.getVertexName()).thenReturn(mockManagedVertexId); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(3); when(mockContext.getVertexNumTasks(r1)).thenReturn(3); //scatter gather when(mockContext.getVertexNumTasks(m2)).thenReturn(0); //broadcast when(mockContext.getVertexNumTasks(m3)).thenReturn(3); //broadcast manager = createManager(conf, mockContext, 0.001f, 0.001f); manager.onVertexStarted(null); manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED)); Assert.assertEquals(3, manager.pendingTasks.size()); // no tasks scheduled Assert.assertEquals(3, manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); //Send 2 events for tasks of r1. manager.onSourceTaskCompleted(r1, new Integer(0)); manager.onSourceTaskCompleted(r1, new Integer(1)); Assert.assertTrue(manager.pendingTasks.size() == 3); // no tasks scheduled Assert.assertTrue(scheduledTasks.size() == 0); // event from m3 triggers scheduling. no need for m2 since it has 0 tasks manager.onVertexStateUpdated(new VertexStateUpdate(m3, VertexState.CONFIGURED)); Assert.assertTrue(manager.pendingTasks.size() == 0); // all tasks scheduled Assert.assertTrue(scheduledTasks.size() == 3); //try with all zero task vertices in non-SG edges scheduledTasks.clear(); manager = createManager(conf, mockContext, 0.001f, 0.001f); manager.onVertexStarted(null); when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices); when(mockContext.getVertexName()).thenReturn(mockManagedVertexId); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(3); when(mockContext.getVertexNumTasks(r1)).thenReturn(3); //scatter gather when(mockContext.getVertexNumTasks(m2)).thenReturn(0); //broadcast when(mockContext.getVertexNumTasks(m3)).thenReturn(0); //broadcast //Send 1 events for tasks of r1. manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED)); manager.onSourceTaskCompleted(r1, new Integer(0)); Assert.assertTrue(manager.pendingTasks.size() == 0); // all tasks scheduled Assert.assertTrue(scheduledTasks.size() == 3); }
From source file:org.apache.tez.dag.library.vertexmanager.TestShuffleVertexManagerUtils.java
License:Apache License
static ShuffleVertexManager createShuffleVertexManager(Configuration conf, VertexManagerPluginContext context, Boolean enableAutoParallelism, Long desiredTaskInputSize, Float min, Float max) { if (min != null) { conf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, min); } else {//www.j a va2 s. c o m conf.unset(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION); } if (max != null) { conf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, max); } else { conf.unset(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION); } if (enableAutoParallelism != null) { conf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, enableAutoParallelism); } if (desiredTaskInputSize != null) { conf.setLong(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, desiredTaskInputSize); } UserPayload payload; try { payload = TezUtils.createUserPayloadFromConf(conf); } catch (IOException e) { throw new RuntimeException(e); } when(context.getUserPayload()).thenReturn(payload); ShuffleVertexManager manager = new ShuffleVertexManager(context); manager.initialize(); return manager; }
From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java
License:Apache License
public DAG createDAG(FileSystem remoteFs, Configuration conf, Path remoteStagingDir, int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount, boolean writeSplitsToDFS, boolean generateSplitsInAM) throws IOException, YarnException { Configuration mapStageConf = new JobConf(conf); mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper); mapStageConf.setLong(MAP_SLEEP_TIME, mapSleepTime); mapStageConf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); mapStageConf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); mapStageConf.setInt(MAP_SLEEP_COUNT, mapSleepCount); mapStageConf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); mapStageConf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); mapStageConf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); mapStageConf.setInt(IREDUCE_TASKS_COUNT, numIReducer); mapStageConf.set(MRJobConfig.MAP_CLASS_ATTR, SleepMapper.class.getName()); mapStageConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SleepInputFormat.class.getName()); if (numIReducer == 0 && numReducer == 0) { mapStageConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); }/*from w ww. ja v a 2s. c o m*/ MRHelpers.translateMRConfToTez(mapStageConf); Configuration[] intermediateReduceStageConfs = null; if (iReduceStagesCount > 0 && numIReducer > 0) { intermediateReduceStageConfs = new JobConf[iReduceStagesCount]; for (int i = 1; i <= iReduceStagesCount; ++i) { JobConf iReduceStageConf = new JobConf(conf); iReduceStageConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, iReduceSleepTime); iReduceStageConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, iReduceSleepCount); iReduceStageConf.setInt(MRJobConfig.NUM_REDUCES, numIReducer); iReduceStageConf.set(MRJobConfig.REDUCE_CLASS_ATTR, ISleepReducer.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); iReduceStageConf.set(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); MRHelpers.translateMRConfToTez(iReduceStageConf); intermediateReduceStageConfs[i - 1] = iReduceStageConf; } } Configuration finalReduceConf = null; if (numReducer > 0) { finalReduceConf = new JobConf(conf); finalReduceConf.setLong(MRRSleepJob.REDUCE_SLEEP_TIME, reduceSleepTime); finalReduceConf.setInt(MRRSleepJob.REDUCE_SLEEP_COUNT, reduceSleepCount); finalReduceConf.setInt(MRJobConfig.NUM_REDUCES, numReducer); finalReduceConf.set(MRJobConfig.REDUCE_CLASS_ATTR, SleepReducer.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, IntWritable.class.getName()); finalReduceConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, NullOutputFormat.class.getName()); MRHelpers.translateMRConfToTez(finalReduceConf); } MRHelpers.configureMRApiUsage(mapStageConf); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { MRHelpers.configureMRApiUsage(intermediateReduceStageConfs[i]); } } if (numReducer > 0) { MRHelpers.configureMRApiUsage(finalReduceConf); } DataSourceDescriptor dataSource = null; if (!generateSplitsInAM && writeSplitsToDFS) { LOG.info("Writing splits to DFS"); dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(mapStageConf, remoteStagingDir, true); } else { dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class) .generateSplitsInAM(generateSplitsInAM).build(); } DAG dag = DAG.create("MRRSleepJob"); String jarPath = ClassUtil.findContainingJar(getClass()); if (jarPath == null) { throw new TezUncheckedException( "Could not find any jar containing" + " MRRSleepJob.class in the classpath"); } Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath }, mapStageConf); Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>(); LocalResource dagJarLocalRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); List<Vertex> vertices = new ArrayList<Vertex>(); UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf); int numTasks = generateSplitsInAM ? -1 : numMapper; Map<String, String> mapEnv = Maps.newHashMap(); MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, mapEnv, true); Map<String, String> reduceEnv = Maps.newHashMap(); MRHelpers.updateEnvBasedOnMRTaskEnv(mapStageConf, reduceEnv, false); Vertex mapVertex = Vertex.create("map", ProcessorDescriptor.create(MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks, MRHelpers.getResourceForMRMapper(mapStageConf)); mapVertex.addTaskLocalFiles(commonLocalResources).addDataSource("MRInput", dataSource) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRMapper(mapStageConf)).setTaskEnvironment(mapEnv); vertices.add(mapVertex); if (iReduceStagesCount > 0 && numIReducer > 0) { for (int i = 0; i < iReduceStagesCount; ++i) { Configuration iconf = intermediateReduceStageConfs[i]; UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf); Vertex ivertex = Vertex.create("ireduce" + (i + 1), ProcessorDescriptor.create(ReduceProcessor.class.getName()) .setUserPayload(iReduceUserPayload), numIReducer, MRHelpers.getResourceForMRReducer(intermediateReduceStageConfs[i])); ivertex.addTaskLocalFiles(commonLocalResources) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(intermediateReduceStageConfs[i])) .setTaskEnvironment(reduceEnv); vertices.add(ivertex); } } Vertex finalReduceVertex = null; if (numReducer > 0) { UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf); finalReduceVertex = Vertex.create("reduce", ProcessorDescriptor.create(ReduceProcessor.class.getName()).setUserPayload(reducePayload), numReducer, MRHelpers.getResourceForMRReducer(finalReduceConf)); finalReduceVertex.addTaskLocalFiles(commonLocalResources) .addDataSink("MROutput", MROutputLegacy.createConfigBuilder(finalReduceConf, NullOutputFormat.class).build()) .setTaskLaunchCmdOpts(MRHelpers.getJavaOptsForMRReducer(finalReduceConf)) .setTaskEnvironment(reduceEnv); vertices.add(finalReduceVertex); } else { // Map only job mapVertex.addDataSink("MROutput", MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build()); } Map<String, String> partitionerConf = Maps.newHashMap(); partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName()); OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName(), partitionerConf) .configureInput().useLegacyInput().done().build(); for (int i = 0; i < vertices.size(); ++i) { dag.addVertex(vertices.get(i)); if (i != 0) { dag.addEdge( Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty())); } } return dag; }
From source file:org.apache.tez.mapreduce.examples.MRRSleepJob.java
License:Apache License
@VisibleForTesting public Job createJob(int numMapper, int numReducer, int iReduceStagesCount, int numIReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, long iReduceSleepTime, int iReduceSleepCount) throws IOException { Configuration conf = getConf(); conf.setLong(MAP_SLEEP_TIME, mapSleepTime); conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime); conf.setLong(IREDUCE_SLEEP_TIME, iReduceSleepTime); conf.setInt(MAP_SLEEP_COUNT, mapSleepCount); conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount); conf.setInt(IREDUCE_SLEEP_COUNT, iReduceSleepCount); conf.setInt(MRJobConfig.NUM_MAPS, numMapper); conf.setInt(IREDUCE_STAGES_COUNT, iReduceStagesCount); conf.setInt(IREDUCE_TASKS_COUNT, numIReducer); // Configure intermediate reduces conf.setInt(org.apache.tez.mapreduce.hadoop.MRJobConfig.MRR_INTERMEDIATE_STAGES, iReduceStagesCount); LOG.info("Running MRR with " + iReduceStagesCount + " IR stages"); for (int i = 1; i <= iReduceStagesCount; ++i) { // Set reducer class for intermediate reduce conf.setClass(//from w w w. j av a 2 s .c o m MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduce.class"), ISleepReducer.class, Reducer.class); // Set reducer output key class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.key.class"), IntWritable.class, Object.class); // Set reducer output value class conf.setClass(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.map.output.value.class"), IntWritable.class, Object.class); conf.setInt(MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(i, "mapreduce.job.reduces"), numIReducer); } Job job = Job.getInstance(conf, "sleep"); job.setNumReduceTasks(numReducer); job.setJarByClass(MRRSleepJob.class); job.setNumReduceTasks(numReducer); job.setMapperClass(SleepMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SleepReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(SleepInputFormat.class); job.setPartitionerClass(MRRSleepJobPartitioner.class); job.setSpeculativeExecution(false); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }
From source file:org.apache.tez.mapreduce.TestMRRJobs.java
License:Apache License
@BeforeClass public static void setup() throws IOException { try {//from w w w . j ava 2 s . c o m conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).racks(null).build(); remoteFs = dfsCluster.getFileSystem(); } catch (IOException io) { throw new RuntimeException("problem starting mini dfs cluster", io); } if (!(new File(MiniTezCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test."); return; } if (mrrTezCluster == null) { mrrTezCluster = new MiniTezCluster(TestMRRJobs.class.getName(), 1, 1, 1); Configuration conf = new Configuration(); conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir"); conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l); mrrTezCluster.init(conf); mrrTezCluster.start(); } }
From source file:org.apache.tez.test.MiniTezCluster.java
License:Apache License
@Override public void serviceInit(Configuration conf) throws Exception { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME); // Use libs from cluster since no build is available conf.setBoolean(TezConfiguration.TEZ_USE_CLUSTER_HADOOP_LIBS, true); // blacklisting disabled to prevent scheduling issues conf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), "apps_staging_dir" + Path.SEPARATOR).getAbsolutePath()); }// w ww .j av a 2s .c o m if (conf.get(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC) == null) { // nothing defined. set quick delete value conf.setLong(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, 0l); } File appJarLocalFile = new File(MiniTezCluster.APPJAR); if (!appJarLocalFile.exists()) { String message = "TezAppJar " + MiniTezCluster.APPJAR + " not found. Exiting."; LOG.info(message); throw new TezUncheckedException(message); } else { LOG.info("Using Tez AppJar: " + appJarLocalFile.getAbsolutePath()); } FileSystem fs = FileSystem.get(conf); Path testRootDir = fs.makeQualified(new Path("target", getName() + "-tmpDir")); Path appRemoteJar = new Path(testRootDir, "TezAppJar.jar"); // Copy AppJar and make it public. Path appMasterJar = new Path(MiniTezCluster.APPJAR); fs.copyFromLocalFile(appMasterJar, appRemoteJar); fs.setPermission(appRemoteJar, new FsPermission("777")); conf.set(TezConfiguration.TEZ_LIB_URIS, appRemoteJar.toUri().toString()); LOG.info("Set TEZ-LIB-URI to: " + conf.get(TezConfiguration.TEZ_LIB_URIS)); // VMEM monitoring disabled, PMEM monitoring enabled. conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); try { Path stagingPath = FileContext.getFileContext(conf) .makeQualified(new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR))); /* * Re-configure the staging path on Windows if the file system is localFs. * We need to use a absolute path that contains the drive letter. The unit * test could run on a different drive than the AM. We can run into the * issue that job files are localized to the drive where the test runs on, * while the AM starts on a different drive and fails to find the job * metafiles. Using absolute path can avoid this ambiguity. */ if (Path.WINDOWS) { if (LocalFileSystem.class.isInstance(stagingPath.getFileSystem(conf))) { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(conf.get(MRJobConfig.MR_AM_STAGING_DIR)).getAbsolutePath()); } } FileContext fc = FileContext.getFileContext(stagingPath.toUri(), conf); if (fc.util().exists(stagingPath)) { LOG.info(stagingPath + " exists! deleting..."); fc.delete(stagingPath, true); } LOG.info("mkdir: " + stagingPath); fc.mkdir(stagingPath, null, true); //mkdir done directory as well String doneDir = JobHistoryUtils.getConfiguredHistoryServerDoneDirPrefix(conf); Path doneDirPath = fc.makeQualified(new Path(doneDir)); fc.mkdir(doneDirPath, null, true); } catch (IOException e) { throw new TezUncheckedException("Could not create staging directory. ", e); } conf.set(MRConfig.MASTER_ADDRESS, "test"); //configure the shuffle service in NM conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] { ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID }); conf.setClass( String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID), ShuffleHandler.class, Service.class); // Non-standard shuffle port conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); conf.setClass(YarnConfiguration.NM_CONTAINER_EXECUTOR, DefaultContainerExecutor.class, ContainerExecutor.class); // TestMRJobs is for testing non-uberized operation only; see TestUberAM // for corresponding uberized tests. conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); super.serviceInit(conf); }