me.tingri.graphs.cc.ConnectedComponents.java Source code

Java tutorial

Introduction

Here is the source code for me.tingri.graphs.cc.ConnectedComponents.java

Source

/***********************************************************************
 * PEGASUS: Peta-Scale Graph Mining System
 * Authors: U Kang, Duen Horng Chau, and Christos Faloutsos
 * <p/>
 * This software is licensed under Apache License, Version 2.0 (the  "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * -------------------------------------------------------------------------
 * File: ConCmpt.java
 * - HCC: Find Connected Components of graph
 * Version: 2.0
 ***********************************************************************/
package me.tingri.graphs.cc;

import me.tingri.graphs.gimv.JoinMapper;
import me.tingri.graphs.gimv.JoinReducer;
import me.tingri.graphs.gimv.MergeMapper;
import me.tingri.graphs.gimv.MergeReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import static me.tingri.util.CONSTANTS.*;

/**
 * Heavy rewrite of original source code
 * Created by Sandeep Kunkunuru on 12/23/15.
 */
public class ConnectedComponents extends Configured implements Tool {

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run(new Configuration(), new ConnectedComponents(), args);

        System.exit(result);
    }

    protected static int printUsage() {
        System.out.println(
                "ConnectedComponents <edge_path> <vector_path> <# of nodes> <# of reducers> <makesym or nosym> <restart> <state check cadence>");

        ToolRunner.printGenericCommandUsage(System.out);

        return -1;
    }

    public int run(String[] args) throws Exception {
        if (args.length < 5 || args.length > 7)
            return printUsage();

        Path edgePath = new Path(args[0]);
        Path vecPath = new Path(args[1]);
        Path curVectorPath = new Path(vecPath.toString() + "_WIP");
        Path tempVectorPath = new Path(curVectorPath.toString() + "_temp");
        Path nextVectorPath = new Path(curVectorPath.toString() + "_next");
        Path stateCheckTempPath = new Path(curVectorPath.toString() + "_STATE_CHECK_TEMP");
        long numOfNodes = Long.parseLong(args[2]);
        int numOfReducers = Integer.parseInt(args[3]);
        String makeSymmetric = (MAKE_SYMMETRIC.equalsIgnoreCase(args[4])) ? "1" : "0";
        int stateCheckCadence = (args.length == 7) ? Integer.parseInt(args[6]) : STATE_CHECK_CADENCE;

        FileSystem fs = FileSystem.get(getConf());

        //start from where we stopped i.e. if a vector_path exists and restart is requested jump straight to loop
        if (!fs.exists(vecPath) || args.length < 6 || !RESTART.equalsIgnoreCase(args[5]))
            Utility.generateVector(new JobConf(getConf(), ConnectedComponents.class), fs, edgePath, curVectorPath,
                    makeSymmetric, numOfReducers);
        else
            Utility.rename(fs, vecPath, curVectorPath);

        //TODO for debugging retain all intermediate vectors

        long changed = -1;

        // Iteratively calculate neighbor with minimum id.
        for (int i = 1; i <= MAX_ITERATIONS; i++) {
            join(fs, edgePath, curVectorPath, tempVectorPath, makeSymmetric, numOfReducers);

            merge(fs, tempVectorPath, nextVectorPath, numOfReducers);

            if (i % stateCheckCadence == 0) {
                changed = Utility.stateCheck(new JobConf(getConf(), ConnectedComponents.class), fs, curVectorPath,
                        nextVectorPath, stateCheckTempPath, numOfReducers);
                System.out.println(
                        "Iteration " + i + " : changed = " + changed + ", unchanged = " + (numOfNodes - changed));
            }

            Utility.rename(fs, nextVectorPath, curVectorPath);

            // Stop when there are no more changes to vector
            if (changed == 0)
                break;
        }

        Utility.rename(fs, curVectorPath, vecPath);

        if (changed != 0)
            System.out.println("Convergence has not been achieved in " + MAX_ITERATIONS
                    + " iterations. Final Results are in" + vecPath);
        else
            System.out.println("Convergence has been achieved. Final Results are in " + vecPath);

        return 0;
    }

    protected RunningJob join(FileSystem fs, Path edgePath, Path vecPath, Path tempVectorPath, String makeSymmetric,
            int numOfReducers) throws Exception {
        Utility.deleteIfExists(fs, tempVectorPath);

        JobConf conf = new JobConf(getConf(), ConnectedComponents.class);
        conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR);
        conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);
        conf.set(MAKE_SYMMETRIC, makeSymmetric);

        conf.setJobName("ConnectedComponents_Join");

        conf.setMapperClass(JoinMapper.class);
        conf.setReducerClass(JoinReducer.class);

        FileInputFormat.setInputPaths(conf, edgePath, vecPath);
        FileOutputFormat.setOutputPath(conf, tempVectorPath);

        conf.setNumReduceTasks(numOfReducers);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Text.class);

        return JobClient.runJob(conf);
    }

    protected RunningJob merge(FileSystem fs, Path tempVectorPath, Path nextVectorPath, int numOfReducers)
            throws Exception {
        Utility.deleteIfExists(fs, nextVectorPath);

        JobConf conf = new JobConf(getConf(), ConnectedComponents.class);
        conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);

        conf.setJobName("ConnectedComponents_Merge");

        conf.setMapperClass(MergeMapper.class);
        conf.setReducerClass(MergeReducer.class);

        FileInputFormat.setInputPaths(conf, tempVectorPath);
        FileOutputFormat.setOutputPath(conf, nextVectorPath);

        conf.setNumReduceTasks(numOfReducers);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Text.class);

        return JobClient.runJob(conf);
    }

}