com.bark.hadoop.lab3.PageRankMapper.java Source code

Java tutorial

Introduction

Here is the source code for com.bark.hadoop.lab3.PageRankMapper.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.bark.hadoop.lab3;

import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class PageRankMapper extends Mapper<LongWritable, Text, Text, Text> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        /**
         * We have N, we don't need to read two files For first iteration: Read
         * adjacency graph, if no number found which is true for first
         * iteration, augment each line with 1/N, do stuff \n --- Reducer should
         * output e.g. A 0.25 B C (src) (PageRank) (outlink)... \n For next
         * iterations: Read adjacency graph, number will be found (due to output
         * format of reducer of last iteration), do stuff... \n
         */
        String test = value.toString();
        test = test.replaceAll("\t", " ").replaceFirst(" ", "\t");

        double basePageRank = 0;
        boolean hasPageRank = false;
        double pageRank = 0;
        /**
         * Pattern to distinguish our inserted numbers from numbers in titles
         * is: _!(numbers.numbers)
         */
        Pattern pt = Pattern.compile("(_!\\d+.\\S+)");
        Matcher mt = pt.matcher(test);
        if (mt.find()) {
            pageRank = Double.parseDouble(mt.group(1).substring(2));
            hasPageRank = true;
        }
        /**
         * If it's the first iteration, distribute 1/N among outLinks
         */
        if (!hasPageRank) {
            try {
                pageRank = 1d / (context.getConfiguration().getInt("N", 0));
            } catch (ArithmeticException ae) {
                /**
                 * Catch division by zero (if 'N' was not set)
                 */
                Logger.getLogger(PageRankMapper.class.getName()).log(Level.SEVERE, ae.getMessage(), ae);
            }
        }
        /**
         * Split input line into key,value
         */
        String[] split = test.split("\t");
        /**
         * Emit this node's (1-d)/N and it's adjacency outGraph if not empty
         */
        // d = 0.85
        basePageRank = (1 - 0.85) / (context.getConfiguration().getInt("N", 0));
        String output = "";
        output += "_!" + basePageRank;
        if (split.length > 1) {
            //split[1] => outlinks string 
            String[] outlinks = split[1].split(" ");
            for (int i = hasPageRank ? 1 : 0; i < outlinks.length; i++) {
                output += " " + outlinks[i];
            }
        }
        context.write(new Text(split[0]), new Text(output.trim()));
        /**
         * Emit pageRank/|outLinks| to all outLinks if not empty: Split on \t to
         * get separate key(index 0) from values(index 1), Split values on space
         * to separate out links(ignore the first(index 0),the pageRank, unless
         * hasPageRank=false)
         */
        if (split.length > 1) {
            String[] outlinks = split[1].split(" ");
            /**
             * Input has no outLinks, only has basePageRank, already taken care
             * of in previous emit, return
             */
            if (hasPageRank && outlinks.length == 1) {
                return;
            }
            /**
             * d = 0.85
             */
            pageRank *= 0.85;
            /**
             * Divide pageRank over number of outLinks
             */
            pageRank /= hasPageRank ? (outlinks.length - 1) : outlinks.length;
            for (int i = hasPageRank ? 1 : 0; i < outlinks.length; i++) {
                context.write(new Text(outlinks[i]), new Text("_!" + pageRank));
            }
        }
    }
}