org.sf.xrime.algorithms.pagerank.PageRankStep.java Source code

Java tutorial

Introduction

Here is the source code for org.sf.xrime.algorithms.pagerank.PageRankStep.java

Source

/*
 * Copyright (C) IBM Corp. 2009.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.sf.xrime.algorithms.pagerank;

import java.io.IOException;
import java.net.InetAddress;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.sf.xrime.ProcessorExecutionException;
import org.sf.xrime.algorithms.GraphAlgorithm;
import org.sf.xrime.algorithms.pagerank.normal.PageRankAlgorithm;
import org.sf.xrime.model.vertex.LabeledAdjSetVertex;
import org.sf.xrime.utils.NetUtils;
import org.sf.xrime.utils.SequenceTempDirMgr;

/**
 * As we know, PageRank is an iterated algorithm. PageRankStep is an iteration.
 * PageRankStep also implement 
 * @author Cai Bin 
 */
public class PageRankStep extends GraphAlgorithm implements ZeroOutDegreeVertexRankCollector {
    /**
     * Continue indicator file name in destination working directory
     */
    private String continueFileName = "continue";

    /**
     * Key in JobConf for continue indicator file name
     */
    static final public String continueFileKey = "xrime.algorithm.PageRank.continue.flag";

    /**
     * Key in JobConf for JobTracker host name, which used to open a ZeroOutDegreeVertexRankCollector server
     */
    static final public String rankCollectorHost = "xrime.algorithm.PageRank.collector.host";

    /**
     * Key in JobConf for JobTracker port, which used to open a ZeroOutDegreeVertexRankCollector server
     */
    static final public String rankCollectorPort = "xrime.algorithm.PageRank.collector.port";

    /**
     * Whether more iteration is needed.
     */
    private boolean end = false;

    private JobConf jobConf;
    private FileSystem client = null;

    private double stopThreshold = 0.01;
    private double dampingFactor = 1;
    //  private long   vertexNumber=0;

    private long zeroOutDegreeVertexCount = 0L;
    private double zeroOutDegreeVertexRank = 0.0;
    private double zeroOutDegreeVertexRankPrevious = 0.0;
    private Set<String> reported = new HashSet<String>();

    static final public long protocolVersion = 5473L;
    private Server server;
    private InetAddress ipAddress = null;
    private int ipPort = 0;

    public String getContinueFlag() {
        return continueFileName;
    }

    public void setContinueFlag(String continueFlag) {
        this.continueFileName = continueFlag;
    }

    private String continueFlagFile() throws IllegalAccessException {
        Path filePath = new Path(context.getDestination().getPath().toString() + "/" + continueFileName);
        return filePath.toString();
    }

    public boolean isEnd() {
        return end;
    }

    public FileSystem getClient() {
        return client;
    }

    public void setClient(FileSystem client) {
        this.client = client;
    }

    public void setStopThreshold(double stopThreshold) {
        this.stopThreshold = stopThreshold;
    }

    public void setDampingFactor(double dampingFactor) {
        this.dampingFactor = dampingFactor;
    }
    //  
    //  public void setVertexNumber(long vertexNumber) {
    //    this.vertexNumber = vertexNumber;
    //  }

    public long getZeroOutDegreeVertexCount() {
        return zeroOutDegreeVertexCount;
    }

    public double getZeroOutDegreeVertexRank() {
        return zeroOutDegreeVertexRank;
    }

    public void resetZeroOutDegreeVertexInfo() {
        synchronized (reported) {
            reported.clear();
            zeroOutDegreeVertexRank = 0.0;
            zeroOutDegreeVertexCount = 0L;
        }
    }

    public double getZeroOutDegreeVertexRankPrevious() {
        return zeroOutDegreeVertexRankPrevious;
    }

    public void setZeroOutDegreeVertexRankPrevious(double zeroOutDegreeVertexRankPrevious) {
        this.zeroOutDegreeVertexRankPrevious = zeroOutDegreeVertexRankPrevious;
    }

    @Override
    public void execute() throws ProcessorExecutionException {
        try {
            if (server == null) {
                startServer(); // start the server
            }
            resetZeroOutDegreeVertexInfo(); // reset zeroOutDegreeVertexRank &
                                            // zeroOutDegreeVertexCount

            // {{ algorithm properties
            context.setParameter(continueFileKey, continueFlagFile());
            context.setParameter(rankCollectorHost, ipAddress.getHostName());
            context.setParameter(rankCollectorPort, Integer.toString(ipPort));
            context.setParameter(PageRankAlgorithm.pageRankDampingFactorKey, Double.toString(dampingFactor));
            context.setParameter(PageRankAlgorithm.pageRankStopThresholdKey, Double.toString(stopThreshold));
            // }} algorithm properties

            // {{ Mapper, Reducer configuration
            jobConf = new JobConf(context, PageRankStep.class);
            jobConf.setJobName("BFS");

            FileInputFormat.setInputPaths(jobConf, context.getSource().getPath());

            jobConf.setInputFormat(SequenceFileInputFormat.class);
            jobConf.setMapperClass(PageRankMapper.class);
            jobConf.setNumMapTasks(getMapperNum());
            jobConf.setMapOutputValueClass(ObjectWritable.class);

            jobConf.setReducerClass(PageRankReducer.class);
            jobConf.setNumReduceTasks(getReducerNum());

            jobConf.setOutputKeyClass(Text.class);
            jobConf.setOutputValueClass(LabeledAdjSetVertex.class);

            FileOutputFormat.setOutputPath(jobConf, context.getDestination().getPath());
            jobConf.setOutputFormat(SequenceFileOutputFormat.class);
            // }} Mapper, Reducer configuration

            this.runningJob = JobClient.runJob(jobConf);

            // {{ end indicator
            if (client == null) {
                client = FileSystem.get(jobConf);
            }

            if (client.exists(new Path(continueFlagFile()))) {
                end = false;
                client.delete(new Path(continueFlagFile()), true);
            } else {
                end = true;
            }
            // }} end indicator

            // {{ correction
            if (zeroOutDegreeVertexRank > 0) {
                SequenceTempDirMgr dirmgr = new SequenceTempDirMgr(context.getDestination().getPath().toString());
                Path tmpFile = dirmgr.getTempDir();

                // prop.setProperty(PageRankAlgorithm.pageRankVertexAdjKey,
                // Double.toString(dampingFactor*zeroOutDegreeVertexRank/vertexNumber));
                context.setParameter(PageRankAlgorithm.pageRankVertexAdjKey,
                        Double.toString(dampingFactor * zeroOutDegreeVertexRank));
                System.out.println(
                        "dampingFactor*zeroOutDegreeVertexRank: " + dampingFactor * zeroOutDegreeVertexRank);

                // {{ Mapper, Reducer configuration
                jobConf = new JobConf(context, PageRankStep.class);
                jobConf.setJobName("BFS");

                FileInputFormat.setInputPaths(jobConf, context.getDestination().getPath());

                jobConf.setInputFormat(SequenceFileInputFormat.class);
                jobConf.setMapperClass(PageRankCorrectionMapper.class);
                jobConf.setNumMapTasks(getMapperNum());
                jobConf.setNumReduceTasks(0); // no reducer here

                jobConf.setOutputKeyClass(Text.class);
                jobConf.setOutputValueClass(LabeledAdjSetVertex.class);

                FileOutputFormat.setOutputPath(jobConf, tmpFile);
                jobConf.setOutputFormat(SequenceFileOutputFormat.class);
                // }} Mapper, Reducer configuration

                this.runningJob = JobClient.runJob(jobConf);

                // {{End? We need additional check here since PageRankCorrectionMapper
                // will generate new PR value.
                if (client.exists(new Path(continueFlagFile()))) {
                    end = false;
                    client.delete(new Path(continueFlagFile()), true);
                } else {
                    end = true;
                }
                // }}End? We need additional check here since PageRankCorrectionMapper
                // will generate new PR value.

                client.delete(context.getDestination().getPath(), true);
                client.rename(tmpFile, context.getDestination().getPath());
            }
            // }} correction
        } catch (IOException e) {
            end = true;
            throw new ProcessorExecutionException(e);
        } catch (IllegalAccessException e) {
            throw new ProcessorExecutionException(e);
        }
    }

    public synchronized void startServer() throws IOException {
        if (server != null) {
            return;
        }

        ipAddress = NetUtils.getLocalIP();
        ipPort = NetUtils.getFreePort();
        server = RPC.getServer(this, ipAddress.getHostName(), ipPort, context);
        server.start();
    }

    public void stopServer() {
        server.stop();
    }

    @Override
    public void postRank(String tanskID, long count, double rank) {
        synchronized (reported) {
            if (!reported.contains(tanskID)) {
                zeroOutDegreeVertexRank += rank;
                zeroOutDegreeVertexCount += count;
                reported.add(tanskID);
            }
        }
    }

    @Override
    public long getProtocolVersion(String protocol, long clientVersion) throws IOException {
        return protocolVersion;
    }
}