Java tutorial
/* * Cloud9: A Hadoop toolkit for working with big data * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package edu.umd.shrawanraina; import java.io.IOException; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; import tl.lin.data.array.ArrayListOfFloatsWritable; import tl.lin.data.array.ArrayListOfIntsWritable; import tl.lin.data.map.HMapIF; import tl.lin.data.map.MapIF; import com.google.common.base.Preconditions; import edu.umd.cloud9.mapreduce.lib.input.NonSplitableSequenceFileInputFormat; /** * <p> * Main driver program for running the basic (non-Schimmy) implementation of * PageRank. * </p> * * <p> * The starting and ending iterations will correspond to paths * <code>/base/path/iterXXXX</code> and <code>/base/path/iterYYYY</code>. As a * example, if you specify 0 and 10 as the starting and ending iterations, the * driver program will start with the graph structure stored at * <code>/base/path/iter0000</code>; final results will be stored at * <code>/base/path/iter0010</code>. * </p> * * @see RunPageRankSchimmy * @author Jimmy Lin * @author Michael Schatz */ public class RunPersonalizedPageRankBasic extends Configured implements Tool { private static final Logger LOG = Logger.getLogger(RunPersonalizedPageRankBasic.class); private static enum PageRank { nodes, edges, massMessages, massMessagesSaved, massMessagesReceived, missingStructure }; // Mapper, no in-mapper combining. private static class MapClass extends Mapper<IntWritable, PageRankNodeUpd, IntWritable, PageRankNodeUpd> { // The neighbor to which we're sending messages. private static final IntWritable neighbor = new IntWritable(); // Contents of the messages: partial PageRank mass. private static final PageRankNodeUpd intermediateMass = new PageRankNodeUpd(); // For passing along node structure. private static final PageRankNodeUpd intermediateStructure = new PageRankNodeUpd(); private static Map<Integer, Integer> SOURCE_MAP = new HashMap<Integer, Integer>(); @Override public void setup(Context context) { List<String> sourceList = Arrays.asList(context.getConfiguration().getStrings("sources")); for (String src : sourceList) SOURCE_MAP.put(Integer.parseInt(src), sourceList.indexOf(src)); if (SOURCE_MAP.size() == 0) { throw new RuntimeException(SOURCE_MAP + " cannot be empty"); } } @Override public void map(IntWritable nid, PageRankNodeUpd node, Context context) throws IOException, InterruptedException { // Pass along node structure. intermediateStructure.setNodeId(node.getNodeId()); intermediateStructure.setType(PageRankNodeUpd.Type.Structure); intermediateStructure.setAdjacencyList(node.getAdjacenyList()); //System.out.println("Id: 1 <<<<<<< "+nid); //System.out.println("pagerank: 1 <<<<<<< "+intermediateStructure.getPageRank()); //System.out.println("pagerankList: 1 <<<<<<< "+intermediateStructure.getPageRankList()); context.write(nid, intermediateStructure); int massMessages = 0; // Distribute PageRank mass to neighbors (along outgoing edges). if (node.getAdjacenyList().size() > 0) { // Each neighbor gets an equal share of PageRank mass. ArrayListOfIntsWritable list = node.getAdjacenyList(); ArrayListOfFloatsWritable pagerank = node.getPageRankList(); for (int i = 0; i < pagerank.size(); ++i) { pagerank.set(i, pagerank.get(i) - (float) StrictMath.log(list.size())); } //float mass = node.getPageRank() - (float) StrictMath.log(list.size()); context.getCounter(PageRank.edges).increment(list.size()); // Iterate over neighbors. for (int i = 0; i < list.size(); i++) { neighbor.set(list.get(i)); intermediateMass.setNodeId(list.get(i)); intermediateMass.setType(PageRankNodeUpd.Type.Mass); //intermediateMass.setPageRank(mass); intermediateMass.setPageRankList(pagerank); //System.out.println("Id: 2 <<<<<<< "+neighbor); //System.out.println("pagerank: 2 <<<<<<< "+node.getPageRank()); //System.out.println("pagerankList: 2 <<<<<<< "+node.getPageRankList()); // Emit messages with PageRank mass to neighbors. context.write(neighbor, intermediateMass); massMessages++; } } // Bookkeeping. context.getCounter(PageRank.nodes).increment(1); context.getCounter(PageRank.massMessages).increment(massMessages); } } // Mapper with in-mapper combiner optimization. private static class MapWithInMapperCombiningClass extends Mapper<IntWritable, PageRankNodeUpd, IntWritable, PageRankNodeUpd> { // For buffering PageRank mass contributes keyed by destination node. private static final Map<Integer, ArrayListOfFloatsWritable> map1 = new HashMap<Integer, ArrayListOfFloatsWritable>(); private static final HMapIF map2 = new HMapIF(); private static Map<Integer, Integer> SOURCE_MAP = new HashMap<Integer, Integer>(); // For passing along node structure. private static final PageRankNodeUpd intermediateStructure = new PageRankNodeUpd(); @Override public void setup(Context context) { List<String> sourceList = Arrays.asList(context.getConfiguration().getStrings("sources")); for (String src : sourceList) SOURCE_MAP.put(Integer.parseInt(src), sourceList.indexOf(src)); if (SOURCE_MAP.size() == 0) { throw new RuntimeException(SOURCE_MAP + " cannot be empty"); } } @Override public void map(IntWritable nid, PageRankNodeUpd node, Context context) throws IOException, InterruptedException { // Pass along node structure. intermediateStructure.setNodeId(node.getNodeId()); intermediateStructure.setType(PageRankNodeUpd.Type.Structure); intermediateStructure.setAdjacencyList(node.getAdjacenyList()); context.write(nid, intermediateStructure); int massMessages = 0; int massMessagesSaved = 0; // Distribute PageRank mass to neighbors (along outgoing edges). if (node.getAdjacenyList().size() > 0) { // Each neighbor gets an equal share of PageRank mass. ArrayListOfIntsWritable list = node.getAdjacenyList(); ArrayListOfFloatsWritable pagerank = node.getPageRankList(); for (int i = 0; i < pagerank.size(); ++i) { pagerank.set(i, pagerank.get(i) - (float) StrictMath.log(list.size())); } //float mass = node.getPageRank() - (float) StrictMath.log(list.size()); context.getCounter(PageRank.edges).increment(list.size()); // Iterate over neighbors. for (int i = 0; i < list.size(); i++) { int neighbor = list.get(i); if (map1.containsKey(neighbor)) { // Already message destined for that node; add PageRank // mass contribution. massMessagesSaved++; ArrayListOfFloatsWritable temp = map1.get(neighbor); for (int j = 0; j < temp.size(); j++) { pagerank.set(i, sumLogProbs(temp.get(j), pagerank.get(j))); } map1.put(neighbor, pagerank); //map2.put(neighbor, sumLogProbs(map2.get(neighbor), mass)); } else { // New destination node; add new entry in map. massMessages++; map1.put(neighbor, pagerank); //map2.put(neighbor, mass); } } } // Bookkeeping. context.getCounter(PageRank.nodes).increment(1); context.getCounter(PageRank.massMessages).increment(massMessages); context.getCounter(PageRank.massMessagesSaved).increment(massMessagesSaved); } @Override public void cleanup(Context context) throws IOException, InterruptedException { // Now emit the messages all at once. IntWritable k = new IntWritable(); PageRankNodeUpd mass = new PageRankNodeUpd(); for (Map.Entry<Integer, ArrayListOfFloatsWritable> e : map1.entrySet()) { k.set(e.getKey()); mass.setNodeId(e.getKey()); mass.setType(PageRankNodeUpd.Type.Mass); mass.setPageRankList(e.getValue()); //mass.setPageRank(map2.get(e.getKey())); //System.out.println("Id: 2 <<<<<<<"+mass.getNodeId()); //System.out.println("pagerank: 2 <<<<<<<"+mass.getPageRank()); //System.out.println("pagerankList: 2 <<<<<<<"+mass.getPageRankList()); context.write(k, mass); } } } // Reduce: sums incoming PageRank contributions, rewrite graph structure. private static class ReduceClass extends Reducer<IntWritable, PageRankNodeUpd, IntWritable, PageRankNodeUpd> { // For keeping track of PageRank mass encountered, so we can compute // missing PageRank mass lost // through dangling nodes. private float totalMass = Float.NEGATIVE_INFINITY; private static Map<Integer, Integer> SOURCE_MAP = new HashMap<Integer, Integer>(); @Override public void setup(Context context) { List<String> sourceList = Arrays.asList(context.getConfiguration().getStrings("sources")); //System.out.println("sourceList:<<<<<<< "+sourceList); for (String src : sourceList) SOURCE_MAP.put(Integer.parseInt(src), sourceList.indexOf(src)); if (SOURCE_MAP.size() == 0) { throw new RuntimeException(SOURCE_MAP + " cannot be empty"); } } @Override public void reduce(IntWritable nid, Iterable<PageRankNodeUpd> iterable, Context context) throws IOException, InterruptedException { Iterator<PageRankNodeUpd> values = iterable.iterator(); // Create the node structure that we're going to assemble back // together from shuffled pieces. PageRankNodeUpd node = new PageRankNodeUpd(); node.setType(PageRankNodeUpd.Type.Complete); node.setNodeId(nid.get()); int massMessagesReceived = 0; int structureReceived = 0; //float mass = Float.NEGATIVE_INFINITY; float[] massList = new float[SOURCE_MAP.size()]; //System.out.println("source size:<<<<<<< "+SOURCE_MAP); for (int i = 0; i < massList.length; ++i) { massList[i] = Float.NEGATIVE_INFINITY; } while (values.hasNext()) { PageRankNodeUpd n = values.next(); if (n.getType().equals(PageRankNodeUpd.Type.Structure)) { // This is the structure; update accordingly. ArrayListOfIntsWritable list = n.getAdjacenyList(); structureReceived++; node.setAdjacencyList(list); } else { // This is a message that contains PageRank mass; // accumulate. ArrayListOfFloatsWritable pagerank = n.getPageRankList(); for (int i = 0; i < massList.length; i++) { //System.out.println("mass size:<<<<<<< "+massList.length); //System.out.println("pagerank size:<<<<<<< "+pagerank.size()); massList[i] = sumLogProbs(massList[i], pagerank.get(i)); } //mass = sumLogProbs(mass, n.getPageRank()); massMessagesReceived++; } } // Update the final accumulated PageRank mass. node.setPageRankList(new ArrayListOfFloatsWritable(massList)); //node.setPageRank(mass); context.getCounter(PageRank.massMessagesReceived).increment(massMessagesReceived); // Error checking. if (structureReceived == 1) { // Everything checks out, emit final node structure with updated // PageRank value. //System.out.println("Id: 3 <<<<<<<"+nid); //System.out.println("pagerank: 3 <<<<<<<"+node.getPageRank()); //System.out.println("pagerankList: 3 <<<<<<<"+node.getPageRankList()); context.write(nid, node); // Keep track of total PageRank mass. totalMass = sumLogProbs(totalMass, massList[0]); //System.out.println("TotalMass: 4 <<<<<<<"+totalMass); } else if (structureReceived == 0) { // We get into this situation if there exists an edge pointing // to a node which has no // corresponding node structure (i.e., PageRank mass was passed // to a non-existent node)... // log and count but move on. context.getCounter(PageRank.missingStructure).increment(1); LOG.warn("No structure received for nodeid: " + nid.get() + " mass: " + massMessagesReceived); // It's important to note that we don't add the PageRank mass to // total... if PageRank mass // was sent to a non-existent node, it should simply vanish. } else { // This shouldn't happen! throw new RuntimeException("Multiple structure received for nodeid: " + nid.get() + " mass: " + massMessagesReceived + " struct: " + structureReceived); } } @Override public void cleanup(Context context) throws IOException { Configuration conf = context.getConfiguration(); String taskId = conf.get("mapred.task.id"); String path = conf.get("PageRankMassPath"); Preconditions.checkNotNull(taskId); Preconditions.checkNotNull(path); // Write to a file the amount of PageRank mass we've seen in this // reducer. FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataOutputStream out = fs.create(new Path(path + "/" + taskId), false); //System.out.println("Path: <<<<<<< "+path + "/" + taskId); out.writeFloat(totalMass); out.close(); } } // Mapper that distributes the missing PageRank mass (lost at the dangling // nodes) and takes care // of the random jump factor. private static class MapPageRankMassDistributionClass extends Mapper<IntWritable, PageRankNodeUpd, IntWritable, PageRankNodeUpd> { private static Map<Integer, Integer> SOURCE_MAP = new HashMap<Integer, Integer>(); private float missingMass = 0.0f; private int nodeCnt = 0; @Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); List<String> sourceList = Arrays.asList(conf.getStrings("sources")); for (String src : sourceList) SOURCE_MAP.put(Integer.parseInt(src), sourceList.indexOf(src)); if (SOURCE_MAP.size() == 0) { throw new RuntimeException(SOURCE_MAP + " cannot be empty"); } missingMass = conf.getFloat("MissingMass", 0.0f); nodeCnt = conf.getInt("NodeCount", 0); } @Override public void map(IntWritable nid, PageRankNodeUpd node, Context context) throws IOException, InterruptedException { ArrayListOfFloatsWritable pList = node.getPageRankList(); //float p = node.getPageRank(); //float jump = (float) (Math.log(ALPHA) - Math.log(nodeCnt)); //float link = (float) Math.log(1.0f - ALPHA) + sumLogProbs(p,(float) (Math.log(missingMass) - Math.log(nodeCnt))); //p = sumLogProbs(jump, link); //node.setPageRank(p); for (int i = 0; i < SOURCE_MAP.size(); i++) { float jump = Float.NEGATIVE_INFINITY; float link = Float.NEGATIVE_INFINITY; if (SOURCE_MAP.containsKey(nid.get())) { int pos = SOURCE_MAP.get(nid.get()); if (pos == i) { jump = (float) Math.log(ALPHA); link = (float) Math.log(1.0f - ALPHA) + sumLogProbs(pList.get(i), (float) Math.log(missingMass)); } else { link = (float) Math.log(1.0f - ALPHA) + pList.get(i); } } else { link = (float) Math.log(1.0f - ALPHA) + pList.get(i); } pList.set(i, sumLogProbs(jump, link)); node.setPageRankList(new ArrayListOfFloatsWritable(pList)); } //System.out.println("Id: 4 <<<<<<< "+nid); //System.out.println("pagerank: 4 <<<<<<< "+node.getPageRank()); //System.out.println("pagerankList: 4 <<<<<<< "+node.getPageRankList()); context.write(nid, node); } } // Random jump factor. private static float ALPHA = 0.15f; private static NumberFormat formatter = new DecimalFormat("0000"); /** * Dispatches command-line arguments to the tool via the {@code ToolRunner}. */ public static void main(String[] args) throws Exception { ToolRunner.run(new RunPersonalizedPageRankBasic(), args); } public RunPersonalizedPageRankBasic() { } private static final String BASE = "base"; private static final String NUM_NODES = "numNodes"; private static final String START = "start"; private static final String END = "end"; private static final String COMBINER = "useCombiner"; private static final String INMAPPER_COMBINER = "useInMapperCombiner"; private static final String RANGE = "range"; private static final String SOURCES = "sources"; /** * Runs this tool. */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(COMBINER, "use combiner")); options.addOption(new Option(INMAPPER_COMBINER, "user in-mapper combiner")); options.addOption(new Option(RANGE, "use range partitioner")); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("base path").create(BASE)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("start iteration").create(START)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("end iteration").create(END)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption( OptionBuilder.withArgName("node").hasArg().withDescription("source nodes").create(SOURCES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(BASE) || !cmdline.hasOption(START) || !cmdline.hasOption(END) || !cmdline.hasOption(NUM_NODES) || !cmdline.hasOption(SOURCES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String basePath = cmdline.getOptionValue(BASE); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); int s = Integer.parseInt(cmdline.getOptionValue(START)); int e = Integer.parseInt(cmdline.getOptionValue(END)); String sources = cmdline.getOptionValue(SOURCES); boolean useCombiner = cmdline.hasOption(COMBINER); boolean useInmapCombiner = cmdline.hasOption(INMAPPER_COMBINER); boolean useRange = cmdline.hasOption(RANGE); LOG.info("Tool name: RunPageRank"); LOG.info(" - base path: " + basePath); LOG.info(" - num nodes: " + n); LOG.info(" - start iteration: " + s); LOG.info(" - end iteration: " + e); LOG.info(" - sources: " + Arrays.asList(sources.split("\\s*(,)\\s*"))); LOG.info(" - use combiner: " + useCombiner); LOG.info(" - use in-mapper combiner: " + useInmapCombiner); LOG.info(" - user range partitioner: " + useRange); Configuration conf = getConf(); conf.setStrings("sources", sources); // Iterate PageRank. for (int i = s; i < e; i++) { iteratePageRank(sources, i, i + 1, basePath, n, useCombiner, useInmapCombiner); } return 0; } // Run each iteration. private void iteratePageRank(String sources, int i, int j, String basePath, int numNodes, boolean useCombiner, boolean useInMapperCombiner) throws Exception { // Each iteration consists of two phases (two MapReduce jobs). // Job 1: distribute PageRank mass along outgoing edges. float mass = phase1(sources, i, j, basePath, numNodes, useCombiner, useInMapperCombiner); // Find out how much PageRank mass got lost at the dangling nodes. float missing = 1.0f - (float) StrictMath.exp(mass); // Job 2: distribute missing mass, take care of random jump factor. phase2(sources, i, j, missing, basePath, numNodes); } private float phase1(String sources, int i, int j, String basePath, int numNodes, boolean useCombiner, boolean useInMapperCombiner) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase1"); job.setJarByClass(RunPersonalizedPageRankBasic.class); String in = basePath + "/iter" + formatter.format(i); String out = basePath + "/iter" + formatter.format(j) + "t"; String outm = out + "-mass"; // We need to actually count the number of part files to get the number // of partitions (because // the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) numPartitions++; } //System.out.println("Output <<<<<<<: "+outm); LOG.info("PageRank: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + numNodes); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInMapperCombiner); LOG.info("computed number of partitions: " + numPartitions); LOG.info(" - sources: " + Arrays.asList(sources.split("\\s*(,)\\s*"))); int numReduceTasks = numPartitions; job.getConfiguration().setInt("NodeCount", numNodes); job.getConfiguration().setInt("NodeCount", numNodes); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); // job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m"); job.getConfiguration().set("PageRankMassPath", outm); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNodeUpd.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNodeUpd.class); //job.setMapperClass(useInMapperCombiner ? MapWithInMapperCombiningClass.class: MapClass.class); //job.setMapperClass(MapWithInMapperCombiningClass.class); job.setMapperClass(MapClass.class); if (useCombiner) { //job.setCombinerClass(CombineClass.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(getConf()).delete(new Path(out), true); FileSystem.get(getConf()).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); float mass = Float.NEGATIVE_INFINITY; FileSystem fs = FileSystem.get(getConf()); for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); mass = sumLogProbs(mass, fin.readFloat()); fin.close(); } return mass; } private void phase2(String sources, int i, int j, float missing, String basePath, int numNodes) throws Exception { Job job = Job.getInstance(getConf()); job.setJobName("PageRank:Basic:iteration" + j + ":Phase2"); job.setJarByClass(RunPersonalizedPageRankBasic.class); LOG.info("missing PageRank mass: " + missing); LOG.info("number of nodes: " + numNodes); String in = basePath + "/iter" + formatter.format(j) + "t"; String out = basePath + "/iter" + formatter.format(j); LOG.info("PageRank: iteration " + j + ": Phase2"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - sources: " + Arrays.asList(sources.split("\\s*(,)\\s*"))); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setFloat("MissingMass", (float) missing); job.getConfiguration().setInt("NodeCount", numNodes); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNodeUpd.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNodeUpd.class); job.setMapperClass(MapPageRankMassDistributionClass.class); FileSystem.get(getConf()).delete(new Path(out), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); } // Adds two log probs. private static float sumLogProbs(float a, float b) { if (a == Float.NEGATIVE_INFINITY) return b; if (b == Float.NEGATIVE_INFINITY) return a; if (a < b) { return (float) (b + StrictMath.log1p(StrictMath.exp(a - b))); } return (float) (a + StrictMath.log1p(StrictMath.exp(b - a))); } }