at.illecker.hama.rootbeer.examples.pagerank.cpu.PageRankCpu.java Source code

Java tutorial

Introduction

Here is the source code for at.illecker.hama.rootbeer.examples.pagerank.cpu.PageRankCpu.java

Source

package at.illecker.hama.rootbeer.examples.pagerank.cpu;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.HashPartitioner;
import org.apache.hama.bsp.SequenceFileInputFormat;
import org.apache.hama.bsp.TextOutputFormat;
import org.apache.hama.commons.io.TextArrayWritable;
import org.apache.hama.graph.AverageAggregator;
import org.apache.hama.graph.Edge;
import org.apache.hama.graph.GraphJob;
import org.apache.hama.graph.Vertex;
import org.apache.hama.graph.VertexInputReader;

/**
 * @author Real pagerank with dangling node contribution from
 *         https://github.com/apache
 *         /hama/blob/trunk/examples/src/main/java/org/apache
 *         /hama/examples/PageRank.java
 * 
 *         100000 nodes / 1000000 edges (8 cores 8GB mem) using 8 bsp tasks:
 * 
 *         pagerank Hama 0.6.2: 98.794 secs - Hama 0.5.0: 84.925secs
 * 
 */
public class PageRankCpu {
    private static final Log LOG = LogFactory.getLog(PageRankCpu.class);

    public static class PageRankVertexCpu extends Vertex<Text, NullWritable, DoubleWritable> {

        // DAMPING_FACTOR: the probability, at any step,
        // that the person will continue
        static double DAMPING_FACTOR = 0.85;
        static double MAXIMUM_CONVERGENCE_ERROR = 0.001;

        @Override
        public void setup(HamaConfiguration conf) {
            String val = conf.get("hama.pagerank.alpha");
            if (val != null) {
                DAMPING_FACTOR = Double.parseDouble(val);
            }
            val = conf.get("hama.graph.max.convergence.error");
            if (val != null) {
                MAXIMUM_CONVERGENCE_ERROR = Double.parseDouble(val);
            }
        }

        @Override
        public void compute(Iterable<DoubleWritable> messages) throws IOException {
            // initialize this vertex to 1 / count of global vertices in this graph
            if (this.getSuperstepCount() == 0) {
                this.setValue(new DoubleWritable(1.0 / this.getNumVertices()));

            } else if (this.getSuperstepCount() >= 1) {

                double sum = 0;
                for (DoubleWritable msg : messages) {
                    sum += msg.get();
                }
                double alpha = (1.0d - DAMPING_FACTOR) / this.getNumVertices();
                setValue(new DoubleWritable(alpha + (sum * DAMPING_FACTOR)));
                aggregate(0, this.getValue());
            }

            // if we have not reached our global error yet, then proceed.
            DoubleWritable globalError = getAggregatedValue(0);

            if (globalError != null && this.getSuperstepCount() > 2
                    && MAXIMUM_CONVERGENCE_ERROR > globalError.get()) {
                System.out.println(globalError);
                voteToHalt();
            } else {
                // in each superstep we are going to send a new rank to our neighbours
                sendMessageToNeighbors(new DoubleWritable(this.getValue().get() / this.getEdges().size()));
            }
        }
    }

    public static class PagerankSeqReader
            extends VertexInputReader<Text, TextArrayWritable, Text, NullWritable, DoubleWritable> {
        @Override
        public boolean parseVertex(Text key, TextArrayWritable value,
                Vertex<Text, NullWritable, DoubleWritable> vertex) throws Exception {
            vertex.setVertexID(key);

            for (Writable v : value.get()) {
                vertex.addEdge(new Edge<Text, NullWritable>((Text) v, null));
            }

            return true;
        }
    }

    public static GraphJob createJob(String[] args, HamaConfiguration conf) throws IOException {
        GraphJob job = new GraphJob(conf, PageRankCpu.class);
        job.setJobName("Pagerank CPU");

        job.setVertexClass(PageRankVertexCpu.class);
        job.setInputPath(new Path(args[0]));
        job.setOutputPath(new Path(args[1]));

        // set the defaults
        job.setMaxIteration(30);
        job.set("hama.pagerank.alpha", "0.85");
        // reference vertices to itself, because we don't have a dangling node
        // contribution here
        job.set("hama.graph.self.ref", "true");
        job.set("hama.graph.max.convergence.error", "0.001");

        if (args.length == 3) {
            job.setNumBspTask(Integer.parseInt(args[2]));
        }

        LOG.info("DEBUG: NumBspTask: " + job.getNumBspTask());
        LOG.info("DEBUG: bsp.job.split.file: " + job.get("bsp.job.split.file"));
        LOG.info("DEBUG: bsp.peers.num: " + job.get("bsp.peers.num"));
        LOG.info("DEBUG: bsp.tasks.maximum: " + job.get("bsp.tasks.maximum"));
        LOG.info("DEBUG: bsp.input.dir: " + job.get("bsp.input.dir"));

        // error
        job.setAggregatorClass(AverageAggregator.class);

        // Vertex reader
        job.setVertexInputReaderClass(PagerankSeqReader.class);

        job.setVertexIDClass(Text.class);
        job.setVertexValueClass(DoubleWritable.class);
        job.setEdgeValueClass(NullWritable.class);

        job.setInputFormat(SequenceFileInputFormat.class);

        job.setPartitioner(HashPartitioner.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        return job;
    }

    private static void printUsage() {
        System.out.println("Usage: <input> <output> [tasks]");
        System.exit(-1);
    }

    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        if (args.length < 2)
            printUsage();

        HamaConfiguration conf = new HamaConfiguration();
        GraphJob pageJob = createJob(args, conf);

        long startTime = System.currentTimeMillis();
        if (pageJob.waitForCompletion(true)) {
            System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
        }
    }
}