com.placeiq.piqconnect.BlocksBuilder.java Source code

Java tutorial

Introduction

Here is the source code for com.placeiq.piqconnect.BlocksBuilder.java

Source

/**
 * PIQConnect: Connected-component analysis for Big Graph
 *
 * __________.___________  _________                                     __
 * \______   \   \_____  \ \_   ___ \  ____   ____   ____   ____   _____/  |_
 *  |     ___/   |/  / \  \/    \  \/ /  _ \ /    \ /    \_/ __ \_/ ___\   __\
 *  |    |   |   /   \_/.  \     \___(  <_> )   |  \   |  \  ___/\  \___|  |
 *  |____|   |___\_____\ \_/\______  /\____/|___|  /___|  /\___  >\___  >__|
 *                      \__>       \/            \/     \/     \/     \/
 *
 * Copyright (c) 2014 PlaceIQ, Inc
 *
 * This software is licensed under Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ----------------------------------------------------------------------------
 * Author: Jerome Serrano <jerome.serrano@placeiq.com>
 * Date: 2015-01-09
 * ---------------------------------------------------------------------------*/

package com.placeiq.piqconnect;

import com.google.common.base.Objects;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

public class BlocksBuilder extends Configured implements Tool {

    private static final Logger LOG = LogManager.getLogger(BlocksBuilder.class);

    public static class LightBlockWritable implements Writable {

        private int index1 = -1;
        private int index2 = -1;
        private long value = -1;

        public LightBlockWritable() {
        }

        @Override
        public void write(DataOutput out) throws IOException {
            if (value != -1) {
                WritableUtils.writeVInt(out, -(index1 + 1));
                WritableUtils.writeVLong(out, value);
            } else {
                WritableUtils.writeVInt(out, index1 + 1);
                WritableUtils.writeVInt(out, index2);
                WritableUtils.writeVLong(out, value);
            }
        }

        @Override
        public void readFields(DataInput in) throws IOException {
            int v = WritableUtils.readVInt(in);
            if (v < 0) { // vector
                this.index1 = -v - 1;
                this.index2 = -1;
                this.value = WritableUtils.readVLong(in);
            } else { // matrix
                this.index1 = v - 1;
                this.index2 = WritableUtils.readVInt(in);
                this.value = -1;
            }
        }

        public void setVector(int idx, long value) {
            this.index1 = idx;
            this.index2 = -1;
            this.value = value;
        }

        public void setMatrix(int idxRow, int idxColumn) {
            this.index1 = idxRow;
            this.index2 = idxColumn;
            this.value = -1;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o)
                return true;
            if (o == null || getClass() != o.getClass())
                return false;

            LightBlockWritable that = (LightBlockWritable) o;

            if (index1 != that.index1)
                return false;
            if (index2 != that.index2)
                return false;
            if (value != that.value)
                return false;

            return true;
        }

        @Override
        public int hashCode() {
            int result = index1;
            result = 31 * result + index2;
            result = 31 * result + (int) (value ^ (value >>> 32));
            return result;
        }

        @Override
        public String toString() {
            return Objects.toStringHelper(this).add("index1", index1).add("index2", index2).add("value", value)
                    .toString();
        }
    }

    public static class MapStage1 extends Mapper<LongWritable, Text, BlockIndexWritable, LightBlockWritable> {

        private final BlockIndexWritable KEY = new BlockIndexWritable();
        private final LightBlockWritable VALUE = new LightBlockWritable();

        private int blockSize;
        private boolean isVector;

        @Override
        public void setup(Context ctx) {
            Configuration conf = ctx.getConfiguration();
            blockSize = conf.getInt(Constants.PROP_BLOCK_SIZE, 32);
            isVector = conf.getBoolean(Constants.PROP_IS_VECTOR, false);
        }

        @Override
        public void map(LongWritable key, Text value, Context ctx) throws IOException, InterruptedException {
            String[] line = value.toString().split("\t");

            if (isVector) {
                long rowIdx = Long.parseLong(line[0]);
                long blockIdx = rowIdx / blockSize;
                int inBlockIdx = (int) (rowIdx % blockSize);

                VALUE.setVector(inBlockIdx, Long.parseLong(line[1]));
                KEY.setVectorIndex(blockIdx);
                ctx.write(KEY, VALUE);
            } else {
                long rowIdx = Long.parseLong(line[0]);
                long colIdx = Long.parseLong(line[1]);

                if (colIdx == rowIdx) {
                    ctx.getCounter(PiqConnectCounter.NUMBER_SELF_LOOP).increment(1);
                    return;
                }

                long blockRowIdx = rowIdx / blockSize;
                long blockColIdx = colIdx / blockSize;
                int inBlockRowIdx = (int) (rowIdx % blockSize);
                int inBlockColIdx = (int) (colIdx % blockSize);

                VALUE.setMatrix(inBlockRowIdx, inBlockColIdx);
                KEY.setMatrixIndex(blockRowIdx, blockColIdx);
                ctx.write(KEY, VALUE);

                VALUE.setMatrix(inBlockColIdx, inBlockRowIdx);
                KEY.setMatrixIndex(blockColIdx, blockRowIdx);
                ctx.write(KEY, VALUE);
            }
        }
    }

    public static class RedStage1
            extends Reducer<BlockIndexWritable, LightBlockWritable, BlockIndexWritable, BlockWritable> {
        private BlockWritable VALUE = null;
        private int blockSize = 32;
        private boolean isVector = false;

        @Override
        public void setup(Context ctx) {
            Configuration conf = ctx.getConfiguration();
            blockSize = conf.getInt(Constants.PROP_BLOCK_SIZE, 32);
            isVector = conf.getBoolean(Constants.PROP_IS_VECTOR, false);
            VALUE = new BlockWritable(blockSize,
                    isVector ? BlockWritable.TYPE.VECTOR_INITIAL : BlockWritable.TYPE.MATRIX);
        }

        @Override
        public void reduce(BlockIndexWritable key, Iterable<LightBlockWritable> values, Context ctx)
                throws IOException, InterruptedException {
            if (isVector) {
                VALUE.resetVector();
                boolean initVector = false;
                for (LightBlockWritable block : values) {
                    if (!initVector) {
                        VALUE.setVectorInitialValue(blockSize);
                        initVector = true;
                    }
                    VALUE.setVectorElem(block.index1, block.value);
                }
                ctx.write(key, VALUE);
            } else {
                VALUE.resetMatrix();
                for (LightBlockWritable block : values) {
                    VALUE.addMatrixElem(block.index1, block.index2);
                }
                ctx.write(key, VALUE);
            }
        }
    }

    private Path pathEdges = null;
    private Path pathOutput = null;
    private int blockSize = 1;
    private int numberOfReducers = 1;
    private boolean isVector = false;

    public static void main(final String[] args) throws Exception {
        final int result = ToolRunner.run(new Configuration(), new BlocksBuilder(), args);
        System.exit(result);
    }

    public int run(final String[] args) throws Exception {
        pathEdges = new Path(args[0]);
        pathOutput = new Path(args[1]);
        blockSize = Integer.parseInt(args[2]);
        numberOfReducers = Integer.parseInt(args[3]);
        isVector = args[4].equals("vector");

        if (!configStage1().waitForCompletion(true)) {
            LOG.error("Failed to execute BlocksBuilder");
            return -1;
        }
        return 0;
    }

    protected Job configStage1() throws Exception {
        FileSystem fs = FileSystem.get(getConf());
        fs.delete(pathOutput, true); // useful ?

        Configuration conf = getConf();
        conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
        conf.setBoolean(Constants.PROP_IS_VECTOR, isVector);
        conf.set("mapred.output.compression.type", "BLOCK"); // useful ?

        Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder");
        job.setJarByClass(BlocksBuilder.class);
        job.setMapperClass(MapStage1.class);
        job.setReducerClass(RedStage1.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setNumReduceTasks(numberOfReducers);
        job.setMapOutputKeyClass(BlockIndexWritable.class);
        job.setMapOutputValueClass(LightBlockWritable.class);
        job.setOutputKeyClass(BlockIndexWritable.class);
        job.setOutputValueClass(BlockWritable.class);

        FileInputFormat.setInputPaths(job, pathEdges);
        SequenceFileOutputFormat.setOutputPath(job, pathOutput);
        SequenceFileOutputFormat.setCompressOutput(job, true);

        Runner.setCompression(job);

        return job;
    }
}