Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.giraph; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.giraph.aggregators.TextAggregatorWriter; import org.apache.giraph.combiner.SimpleSumMessageCombiner; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.GiraphConstants; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; import org.apache.giraph.examples.GeneratedVertexReader; import org.apache.giraph.examples.SimpleCombinerComputation; import org.apache.giraph.examples.SimpleFailComputation; import org.apache.giraph.examples.SimpleMasterComputeComputation; import org.apache.giraph.examples.SimpleMsgComputation; import org.apache.giraph.examples.SimplePageRankComputation; import org.apache.giraph.examples.SimplePageRankComputation.SimplePageRankVertexInputFormat; import org.apache.giraph.examples.SimpleShortestPathsComputation; import org.apache.giraph.examples.SimpleSuperstepComputation; import org.apache.giraph.examples.SimpleSuperstepComputation.SimpleSuperstepVertexInputFormat; import org.apache.giraph.examples.SimpleSuperstepComputation.SimpleSuperstepVertexOutputFormat; import org.apache.giraph.graph.Vertex; import org.apache.giraph.io.VertexInputFormat; import org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexOutputFormat; import org.apache.giraph.job.GiraphJob; import org.apache.giraph.job.HadoopUtils; import org.apache.giraph.utils.NoOpComputation; import org.apache.giraph.worker.InputSplitPathOrganizer; import org.apache.giraph.zk.ZooKeeperExt; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.zookeeper.KeeperException; import org.junit.Test; import com.google.common.base.Charsets; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.io.Closeables; /** * Unit test for many simple BSP applications. */ public class TestBspBasic extends BspCase { public TestBspBasic() { super(TestBspBasic.class.getName()); } /** * Just instantiate the vertex (all functions are implemented) and the * VertexInputFormat using reflection. * * @throws IllegalAccessException * @throws InstantiationException * @throws InterruptedException * @throws IOException * @throws InvocationTargetException * @throws IllegalArgumentException * @throws NoSuchMethodException * @throws SecurityException */ @Test public void testInstantiateVertex() throws InstantiationException, IllegalAccessException, IOException, InterruptedException, IllegalArgumentException, InvocationTargetException, SecurityException, NoSuchMethodException { System.out.println("testInstantiateVertex: java.class.path=" + System.getProperty("java.class.path")); GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleSuperstepComputation.class); conf.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class); GiraphJob job = prepareJob(getCallingMethodName(), conf); ImmutableClassesGiraphConfiguration configuration = new ImmutableClassesGiraphConfiguration( job.getConfiguration()); Vertex<LongWritable, IntWritable, FloatWritable> vertex = configuration.createVertex(); vertex.initialize(new LongWritable(1), new IntWritable(1)); System.out.println("testInstantiateVertex: Got vertex " + vertex); VertexInputFormat<LongWritable, IntWritable, FloatWritable> inputFormat = configuration .createWrappedVertexInputFormat(); List<InputSplit> splitArray = inputFormat.getSplits(HadoopUtils.makeJobContext(), 1); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); DataOutputStream outputStream = new DataOutputStream(byteArrayOutputStream); ((Writable) splitArray.get(0)).write(outputStream); System.out.println("testInstantiateVertex: Example output split = " + byteArrayOutputStream.toString()); } private static class NullComputation extends NoOpComputation<NullWritable, NullWritable, NullWritable, NullWritable> { } /** * Test whether vertices with NullWritable for vertex value type, edge value * type and message value type can be instantiated. */ @Test public void testInstantiateNullVertex() throws IOException { GiraphConfiguration nullConf = new GiraphConfiguration(); nullConf.setComputationClass(NullComputation.class); ImmutableClassesGiraphConfiguration<NullWritable, NullWritable, NullWritable> immutableClassesGiraphConfiguration = new ImmutableClassesGiraphConfiguration<NullWritable, NullWritable, NullWritable>( nullConf); NullWritable vertexValue = immutableClassesGiraphConfiguration.createVertexValue(); NullWritable edgeValue = immutableClassesGiraphConfiguration.createEdgeValue(); Writable messageValue = immutableClassesGiraphConfiguration.getOutgoingMessageValueFactory().newInstance(); assertSame(vertexValue.getClass(), NullWritable.class); assertSame(vertexValue, edgeValue); assertSame(edgeValue, messageValue); } /** * Do some checks for local job runner. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testLocalJobRunnerConfig() throws IOException, InterruptedException, ClassNotFoundException { if (runningInDistributedMode()) { System.out.println("testLocalJobRunnerConfig: Skipping for " + "non-local"); return; } GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleSuperstepComputation.class); conf.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class); GiraphJob job = prepareJob(getCallingMethodName(), conf); conf = job.getConfiguration(); conf.setWorkerConfiguration(5, 5, 100.0f); GiraphConstants.SPLIT_MASTER_WORKER.set(conf, true); try { job.run(true); fail(); } catch (IllegalArgumentException e) { } GiraphConstants.SPLIT_MASTER_WORKER.set(conf, false); try { job.run(true); fail(); } catch (IllegalArgumentException e) { } conf.setWorkerConfiguration(1, 1, 100.0f); job.run(true); } /** * Run a sample BSP job in JobTracker, kill a task, and make sure * the job fails (not enough attempts to restart) * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testBspFail() throws IOException, InterruptedException, ClassNotFoundException { // Allow this test only to be run on a real Hadoop setup if (!runningInDistributedMode()) { System.out.println("testBspFail: not executed for local setup."); return; } GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleFailComputation.class); conf.setVertexInputFormatClass(SimplePageRankVertexInputFormat.class); GiraphJob job = prepareJob(getCallingMethodName(), conf, getTempPath(getCallingMethodName())); job.getConfiguration().setInt("mapred.map.max.attempts", 1); assertTrue(!job.run(true)); } /** * Run a sample BSP job locally and test supersteps. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testBspSuperStep() throws IOException, InterruptedException, ClassNotFoundException { String callingMethod = getCallingMethodName(); Path outputPath = getTempPath(callingMethod); GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleSuperstepComputation.class); conf.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class); conf.setVertexOutputFormatClass(SimpleSuperstepVertexOutputFormat.class); GiraphJob job = prepareJob(callingMethod, conf, outputPath); Configuration configuration = job.getConfiguration(); // GeneratedInputSplit will generate 10 vertices GeneratedVertexReader.READER_VERTICES.set(configuration, 10); assertTrue(job.run(true)); if (!runningInDistributedMode()) { FileStatus fileStatus = getSinglePartFileStatus(configuration, outputPath); assertEquals(49l, fileStatus.getLen()); } } /** * Run a sample BSP job locally and test messages. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testBspMsg() throws IOException, InterruptedException, ClassNotFoundException { GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleMsgComputation.class); conf.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class); GiraphJob job = prepareJob(getCallingMethodName(), conf); assertTrue(job.run(true)); } /** * Run a sample BSP job locally with no vertices and make sure * it completes. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testEmptyVertexInputFormat() throws IOException, InterruptedException, ClassNotFoundException { GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleMsgComputation.class); conf.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class); GiraphJob job = prepareJob(getCallingMethodName(), conf); GeneratedVertexReader.READER_VERTICES.set(job.getConfiguration(), 0); assertTrue(job.run(true)); } /** * Run a sample BSP job locally with message combiner and * checkout output value. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testBspCombiner() throws IOException, InterruptedException, ClassNotFoundException { GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleCombinerComputation.class); conf.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class); conf.setMessageCombinerClass(SimpleSumMessageCombiner.class); GiraphJob job = prepareJob(getCallingMethodName(), conf); assertTrue(job.run(true)); } /** * Run a test to see if the InputSplitPathOrganizer can correctly sort * locality information from a mocked znode of data. * @throws IOException * @throws KeeperException * @throws InterruptedException */ @Test public void testInputSplitPathOrganizer() throws IOException, KeeperException, InterruptedException { final List<String> testList = new ArrayList<String>(); Collections.addAll(testList, "remote2", "local", "remote1"); final String localHost = "node.LOCAL.com"; final String testListName = "test_list_parent_znode"; // build output just as we do to store hostlists in ZNODES ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); String last = "node.test4.com\tnode.test5.com\tnode.test6.com"; Text.writeString(dos, last); byte[] remote1 = baos.toByteArray(); baos = new ByteArrayOutputStream(); dos = new DataOutputStream(baos); String middle = "node.test1.com\tnode.test2.com\tnode.test3.com"; Text.writeString(dos, middle); byte[] remote2 = baos.toByteArray(); baos = new ByteArrayOutputStream(); dos = new DataOutputStream(baos); String first = "node.testx.com\tnode.LOCAL.com\tnode.testy.com"; Text.writeString(dos, first); byte[] local = baos.toByteArray(); ZooKeeperExt zk = mock(ZooKeeperExt.class); when(zk.getChildrenExt(testListName, false, false, true)).thenReturn(testList); when(zk.getData("remote1", false, null)).thenReturn(remote1); when(zk.getData("remote2", false, null)).thenReturn(remote2); when(zk.getData("local", false, null)).thenReturn(local); InputSplitPathOrganizer lis = new InputSplitPathOrganizer(zk, testListName, localHost, true); final List<String> resultList = Lists.newArrayList(lis.getPathList()); assertEquals("local", resultList.get(0)); } /** * Run a sample BSP job locally and test shortest paths. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testBspShortestPaths() throws IOException, InterruptedException, ClassNotFoundException { Path outputPath = getTempPath(getCallingMethodName()); GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleShortestPathsComputation.class); conf.setVertexInputFormatClass(SimplePageRankVertexInputFormat.class); conf.setVertexOutputFormatClass(JsonLongDoubleFloatDoubleVertexOutputFormat.class); SimpleShortestPathsComputation.SOURCE_ID.set(conf, 0); GiraphJob job = prepareJob(getCallingMethodName(), conf, outputPath); assertTrue(job.run(true)); int numResults = getNumResults(job.getConfiguration(), outputPath); int expectedNumResults = runningInDistributedMode() ? 15 : 5; assertEquals(expectedNumResults, numResults); } /** * Run a sample BSP job locally and test PageRank with AggregatorWriter. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testBspPageRankWithAggregatorWriter() throws IOException, InterruptedException, ClassNotFoundException { Path outputPath = getTempPath(getCallingMethodName()); GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimplePageRankComputation.class); conf.setAggregatorWriterClass(TextAggregatorWriter.class); conf.setMasterComputeClass(SimplePageRankComputation.SimplePageRankMasterCompute.class); conf.setVertexInputFormatClass(SimplePageRankVertexInputFormat.class); conf.setVertexOutputFormatClass(SimplePageRankComputation.SimplePageRankVertexOutputFormat.class); conf.setWorkerContextClass(SimplePageRankComputation.SimplePageRankWorkerContext.class); GiraphJob job = prepareJob(getCallingMethodName(), conf, outputPath); GiraphConfiguration configuration = job.getConfiguration(); Path aggregatorValues = getTempPath("aggregatorValues"); configuration.setInt(TextAggregatorWriter.FREQUENCY, TextAggregatorWriter.ALWAYS); configuration.set(TextAggregatorWriter.FILENAME, aggregatorValues.toString()); assertTrue(job.run(true)); FileSystem fs = FileSystem.get(configuration); Path valuesFile = new Path(aggregatorValues.toString() + "_0"); try { if (!runningInDistributedMode()) { double maxPageRank = SimplePageRankComputation.SimplePageRankWorkerContext.getFinalMax(); double minPageRank = SimplePageRankComputation.SimplePageRankWorkerContext.getFinalMin(); long numVertices = SimplePageRankComputation.SimplePageRankWorkerContext.getFinalSum(); System.out.println("testBspPageRank: maxPageRank=" + maxPageRank + " minPageRank=" + minPageRank + " numVertices=" + numVertices); FSDataInputStream in = null; BufferedReader reader = null; try { Map<Integer, Double> minValues = Maps.newHashMap(); Map<Integer, Double> maxValues = Maps.newHashMap(); Map<Integer, Long> vertexCounts = Maps.newHashMap(); in = fs.open(valuesFile); reader = new BufferedReader(new InputStreamReader(in, Charsets.UTF_8)); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\t"); int superstep = Integer.parseInt(tokens[0].split("=")[1]); String value = (tokens[1].split("=")[1]); String aggregatorName = (tokens[1].split("=")[0]); if ("min".equals(aggregatorName)) { minValues.put(superstep, Double.parseDouble(value)); } if ("max".equals(aggregatorName)) { maxValues.put(superstep, Double.parseDouble(value)); } if ("sum".equals(aggregatorName)) { vertexCounts.put(superstep, Long.parseLong(value)); } } int maxSuperstep = SimplePageRankComputation.MAX_SUPERSTEPS; assertEquals(maxSuperstep + 2, minValues.size()); assertEquals(maxSuperstep + 2, maxValues.size()); assertEquals(maxSuperstep + 2, vertexCounts.size()); assertEquals(maxPageRank, maxValues.get(maxSuperstep), 0d); assertEquals(minPageRank, minValues.get(maxSuperstep), 0d); assertEquals(numVertices, (long) vertexCounts.get(maxSuperstep)); } finally { Closeables.close(in, true); Closeables.close(reader, true); } } } finally { fs.delete(valuesFile, false); } } /** * Run a sample BSP job locally and test MasterCompute. * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @Test public void testBspMasterCompute() throws IOException, InterruptedException, ClassNotFoundException { GiraphConfiguration conf = new GiraphConfiguration(); conf.setComputationClass(SimpleMasterComputeComputation.class); conf.setVertexInputFormatClass(SimplePageRankVertexInputFormat.class); conf.setMasterComputeClass(SimpleMasterComputeComputation.SimpleMasterCompute.class); conf.setWorkerContextClass(SimpleMasterComputeComputation.SimpleMasterComputeWorkerContext.class); GiraphJob job = prepareJob(getCallingMethodName(), conf); assertTrue(job.run(true)); if (!runningInDistributedMode()) { double finalSum = SimpleMasterComputeComputation.SimpleMasterComputeWorkerContext.getFinalSum(); System.out.println("testBspMasterCompute: finalSum=" + finalSum); assertEquals(32.5, finalSum, 0d); } } /** * Test halting at superstep 0 */ @Test public void testHaltSuperstep0() throws IOException, InterruptedException, ClassNotFoundException { GiraphConfiguration conf = new GiraphConfiguration(); GiraphConstants.MAX_NUMBER_OF_SUPERSTEPS.set(conf, 0); conf.setComputationClass(SimpleMsgComputation.class); conf.setVertexInputFormatClass(SimpleSuperstepVertexInputFormat.class); GiraphJob job = prepareJob(getCallingMethodName(), conf); assertTrue(job.run(true)); } }