me.tingri.graphs.cc.block.ConnectedComponentsBlock.java Source code

Java tutorial

Introduction

Here is the source code for me.tingri.graphs.cc.block.ConnectedComponentsBlock.java

Source

package me.tingri.graphs.cc.block;
/***********************************************************************
 * PEGASUS: Peta-Scale Graph Mining System
 * Authors: U Kang, Duen Horng Chau, and Christos Faloutsos
 * <p/>
 * This software is licensed under Apache License, Version 2.0 (the  "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * -------------------------------------------------------------------------
 * File: CCBlock.java
 * - HCC: Find Connected Components of graph using block multiplication. This is a block-based version of HCC.
 * Version: 2.0
 ***********************************************************************/

import me.tingri.graphs.cc.Utility;
import me.tingri.graphs.gimv.block.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import static me.tingri.util.CONSTANTS.*;

/**
 * Heavy rewrite of original source code
 * Created by Sandeep Kunkunuru on 02/02/16.
 */
public class ConnectedComponentsBlock extends Configured implements Tool {

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run(new Configuration(), new ConnectedComponentsBlock(), args);

        System.exit(result);
    }

    protected static int printUsage() {
        System.out.println(
                "CCBlock <edgePath> <curVectorPath> <# of nodes> <# of reducers> <fast or normal> <blockWidth> <makesym> <restart>");
        System.out.println("Block width has to be <= 32,767");

        ToolRunner.printGenericCommandUsage(System.out);

        return -1;
    }

    public int run(String[] args) throws Exception {
        if (args.length != 7 && args.length != 8)
            return printUsage();

        Path edgePath = new Path(args[0]);
        Path vecPath = new Path(args[1]);
        Path curVectorPath = new Path(vecPath.toString() + "_WIP");
        Path tempVectorPath = new Path(curVectorPath.toString() + "_temp");
        Path nextVectorPath = new Path(curVectorPath.toString() + "_next");
        Path stateCheckTempPath = new Path(curVectorPath.toString() + "_STATE_CHECK_TEMP");
        long numOfNodes = Long.parseLong(args[2]);
        int numOfReducers = Integer.parseInt(args[3]);
        int recurDiagonalMult = (FAST.equalsIgnoreCase(args[4])) ? 1 : 0;
        short blockWidth = Short.parseShort(args[5]);
        String makeSymmetric = (MAKE_SYMMETRIC.equalsIgnoreCase(args[6])) ? "1" : "0";

        System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n");
        System.out.println("[PEGASUS] Computing connected component using block method. Reducers = " + numOfReducers
                + ", blockWidth = " + blockWidth);

        long changed = -1;
        FileSystem fs = FileSystem.get(getConf());

        //start from where we stopped i.e. if a vector_path exists and restart is requested jump straight to loop
        if (!fs.exists(vecPath) || args.length != 8 || !RESTART.equalsIgnoreCase(args[7]))
            Utility.generateVector(new JobConf(getConf(), ConnectedComponentsBlock.class), fs, edgePath,
                    curVectorPath, makeSymmetric, numOfReducers);
        else
            Utility.rename(fs, vecPath, curVectorPath);

        // Iteratively calculate neighborhood function.
        for (int i = 0; i < MAX_ITERATIONS; i++) {
            join(blockWidth, recurDiagonalMult, edgePath, curVectorPath, tempVectorPath, numOfReducers,
                    makeSymmetric, fs);

            merge(blockWidth, tempVectorPath, nextVectorPath, numOfReducers, fs);

            changed = Utility.stateCheck(new JobConf(getConf(), ConnectedComponentsBlock.class), fs, curVectorPath,
                    nextVectorPath, stateCheckTempPath, numOfReducers);

            System.out.println(
                    "Iteration " + i + " : changed = " + changed + ", unchanged = " + (numOfNodes - changed));

            Utility.rename(fs, nextVectorPath, curVectorPath);

            // Stop when the minimum neighborhood doesn't change
            if (changed == 0)
                break;
        }
        Utility.rename(fs, curVectorPath, vecPath);

        if (changed != 0)
            System.out.println("Convergence has not been achieved in " + MAX_ITERATIONS
                    + " iterations. Final Results are in" + vecPath);
        else
            System.out.println("Convergence has been achieved. Final Results are in " + vecPath);

        return 0;
    }

    protected RunningJob join(short blockWidth, int recurDiagonalMult, Path edgePath, Path curVectorPath,
            Path tempVectorPath, int numOfReducers, String makeSymmetric, FileSystem fs) throws Exception {
        Utility.deleteIfExists(fs, tempVectorPath);

        JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class);

        conf.set(BLOCK_WIDTH, "" + blockWidth);
        conf.set(RECURSIVE_DIAG_MULT, "" + recurDiagonalMult);
        conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR);
        conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);
        conf.set(SEPARATOR_WITHIN_VALUE, SPACE);
        conf.set(MAKE_SYMMETRIC, makeSymmetric);

        conf.setJobName("CCBlock_join");

        conf.setMapperClass(JoinMapper.class);
        conf.setReducerClass(JoinReducer.class);

        FileInputFormat.setInputPaths(conf, edgePath, curVectorPath);
        FileOutputFormat.setOutputPath(conf, tempVectorPath);

        conf.setNumReduceTasks(numOfReducers);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Text.class);

        return JobClient.runJob(conf);
    }

    protected RunningJob merge(short blockWidth, Path tempVectorPath, Path nextVectorPath, int numOfReducers,
            FileSystem fs) throws Exception {
        Utility.deleteIfExists(fs, nextVectorPath);

        JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class);
        conf.set(BLOCK_WIDTH, "" + blockWidth);
        conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);

        conf.setJobName("CCBlock_reduce");

        conf.setMapperClass(MergeMapper.class);
        conf.setReducerClass(MergeReducer.class);

        FileInputFormat.setInputPaths(conf, tempVectorPath);
        FileOutputFormat.setOutputPath(conf, nextVectorPath);

        conf.setNumReduceTasks(numOfReducers);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Text.class);

        return JobClient.runJob(conf);
    }
}