org.apache.mahout.graph.common.EnumerateTrianglesJob.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mahout.graph.common.EnumerateTrianglesJob.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.graph.common;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.graph.model.Triangle;
import org.apache.mahout.graph.model.UndirectedEdge;
import org.apache.mahout.graph.model.Vertex;

/** Enumerates all triangles of an undirected graph. */
public class EnumerateTrianglesJob extends AbstractJob {

    public static final String TMP_AUGMENTED_EDGES = "augmentedEdges";
    public static final String TMP_EDGES_WITH_DEGREES = "edgesWithDegrees";
    public static final String TMP_CLOSING_EDGES = "closingEdges";
    public static final String TMP_OPEN_TRIADS = "openTriads";

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new EnumerateTrianglesJob(), args);
    }

    @Override
    public int run(String[] args) throws Exception {
        addInputOption();
        addOutputOption();
        addOption("text", "t", "output in textformat?", String.valueOf(Boolean.FALSE));

        Map<String, String> parsedArgs = parseArguments(args);
        if (parsedArgs == null) {
            return -1;
        }

        Class<? extends FileOutputFormat> outputFormat = Boolean.parseBoolean(parsedArgs.get("--text"))
                ? TextOutputFormat.class
                : SequenceFileOutputFormat.class;

        /* scatter the edges to each of the vertices and count degree */
        Job scatter = prepareJob(getInputPath(), getTempPath(TMP_AUGMENTED_EDGES), ScatterEdgesMapper.class,
                Vertex.class, Vertex.class, SumDegreesReducer.class, UndirectedEdge.class, VertexWithDegree.class);
        scatter.waitForCompletion(true);

        /* join augmented edges with partial degree information to to complete records */
        Job join = prepareJob(getTempPath(TMP_AUGMENTED_EDGES), getTempPath(TMP_EDGES_WITH_DEGREES), Mapper.class,
                UndirectedEdge.class, VertexWithDegree.class, JoinDegreesReducer.class,
                UndirectedEdgeWithDegrees.class, NullWritable.class);
        join.waitForCompletion(true);

        /* scatter the edges to lower degree vertex and build open triads */
        Job scatterToLower = prepareJob(getTempPath(TMP_EDGES_WITH_DEGREES), getTempPath(TMP_OPEN_TRIADS),
                ScatterEdgesToLowerDegreeVertexMapper.class, Vertex.class, Vertex.class,
                BuildOpenTriadsReducer.class, JoinableUndirectedEdge.class, VertexOrMarker.class);
        scatterToLower.waitForCompletion(true);

        /* necessary as long as we don't have access to an undeprecated MultipleInputs  */
        Job prepareInput = prepareJob(getTempPath(TMP_EDGES_WITH_DEGREES), getTempPath(TMP_CLOSING_EDGES),
                PrepareInputMapper.class, JoinableUndirectedEdge.class, VertexOrMarker.class, Reducer.class,
                JoinableUndirectedEdge.class, VertexOrMarker.class);
        prepareInput.setGroupingComparatorClass(JoinableUndirectedEdge.GroupingComparator.class);
        prepareInput.waitForCompletion(true);

        /* join opentriads and edges pairwise to get all triangles */
        Job joinTriads = prepareJob(getCombinedTempPath(TMP_OPEN_TRIADS, TMP_CLOSING_EDGES), getOutputPath(),
                SequenceFileInputFormat.class, Mapper.class, JoinableUndirectedEdge.class, VertexOrMarker.class,
                JoinTrianglesReducer.class, Triangle.class, NullWritable.class, outputFormat);
        joinTriads.setGroupingComparatorClass(JoinableUndirectedEdge.GroupingComparator.class);
        joinTriads.waitForCompletion(true);

        return 0;
    }

    /** Sends every edge to each vertex  */
    public static class ScatterEdgesMapper extends Mapper<UndirectedEdge, Object, Vertex, Vertex> {

        @Override
        protected void map(UndirectedEdge edge, Object value, Context ctx)
                throws IOException, InterruptedException {
            ctx.write(edge.firstVertex(), edge.secondVertex());
            ctx.write(edge.secondVertex(), edge.firstVertex());
        }
    }

    /** Sums up the count of edges for each vertex and augments all edges with a degree information for the key vertex */
    public static class SumDegreesReducer extends Reducer<Vertex, Vertex, UndirectedEdge, VertexWithDegree> {

        @Override
        protected void reduce(Vertex vertex, Iterable<Vertex> connectedVertices, Context ctx)
                throws IOException, InterruptedException {
            FastIDSet connectedVertexIds = new FastIDSet();
            for (Vertex connectedVertex : connectedVertices) {
                connectedVertexIds.add(connectedVertex.id());
            }

            int degree = connectedVertexIds.size();
            VertexWithDegree vertexWithDegree = new VertexWithDegree(vertex, degree);
            LongPrimitiveIterator connectedVertexIdsIterator = connectedVertexIds.iterator();
            while (connectedVertexIdsIterator.hasNext()) {
                Vertex connectedVertex = new Vertex(connectedVertexIdsIterator.nextLong());
                ctx.write(new UndirectedEdge(vertex, connectedVertex), vertexWithDegree);
            }
        }
    }

    /** Joins identical edges assuring degree augmentations for both nodes */
    public static class JoinDegreesReducer
            extends Reducer<UndirectedEdge, VertexWithDegree, UndirectedEdgeWithDegrees, NullWritable> {

        @Override
        protected void reduce(UndirectedEdge edge, Iterable<VertexWithDegree> verticesWithDegrees, Context ctx)
                throws IOException, InterruptedException {
            Iterator<VertexWithDegree> iterator = verticesWithDegrees.iterator();
            VertexWithDegree firstVertexWithDegree = iterator.next().clone();
            VertexWithDegree secondVertexWithDegree = iterator.next().clone();
            ctx.write(new UndirectedEdgeWithDegrees(firstVertexWithDegree, secondVertexWithDegree),
                    NullWritable.get());
        }
    }

    /** Finds the lower degree vertex of an edge and emits key-value-pairs to bin under this lower degree vertex. */
    public static class ScatterEdgesToLowerDegreeVertexMapper
            extends Mapper<UndirectedEdgeWithDegrees, Writable, Vertex, Vertex> {

        @Override
        protected void map(UndirectedEdgeWithDegrees edge, Writable value, Context ctx)
                throws IOException, InterruptedException {
            VertexWithDegree first = edge.getFirstVertexWithDegree();
            VertexWithDegree second = edge.getSecondVertexWithDegree();

            if (first.degree() < second.degree()) {
                ctx.write(first.vertex(), second.vertex());
            } else {
                ctx.write(second.vertex(), first.vertex());
            }
        }
    }

    /**
     * Builds open triads from edges by pairwise joining the edges on the lower degree vertex which is the apex of the triad. Emits key-value pairs
     * where the value is the triad and the key is the two outside vertices.
     */
    public static class BuildOpenTriadsReducer
            extends Reducer<Vertex, Vertex, JoinableUndirectedEdge, VertexOrMarker> {

        @Override
        protected void reduce(Vertex vertex, Iterable<Vertex> vertices, Context ctx)
                throws IOException, InterruptedException {
            FastIDSet bufferedVertexIDs = new FastIDSet();
            for (Vertex firstVertexOfMissingEdge : vertices) {
                LongPrimitiveIterator bufferedVertexIdsIterator = bufferedVertexIDs.iterator();
                while (bufferedVertexIdsIterator.hasNext()) {
                    Vertex secondVertexOfMissingEdge = new Vertex(bufferedVertexIdsIterator.nextLong());
                    UndirectedEdge missingEdge = new UndirectedEdge(firstVertexOfMissingEdge,
                            secondVertexOfMissingEdge);
                    ctx.write(new JoinableUndirectedEdge(missingEdge, false), new VertexOrMarker(vertex));
                }
                bufferedVertexIDs.add(firstVertexOfMissingEdge.id());
            }
        }
    }

    public static class PrepareInputMapper
            extends Mapper<UndirectedEdgeWithDegrees, Writable, JoinableUndirectedEdge, VertexOrMarker> {

        @Override
        protected void map(UndirectedEdgeWithDegrees edgeWithDegrees, Writable value, Context ctx)
                throws IOException, InterruptedException {
            Vertex firstVertex = edgeWithDegrees.getFirstVertexWithDegree().vertex();
            Vertex secondVertex = edgeWithDegrees.getSecondVertexWithDegree().vertex();
            ctx.write(new JoinableUndirectedEdge(firstVertex, secondVertex, true), VertexOrMarker.MARKER);
        }
    }

    public static class JoinTrianglesReducer
            extends Reducer<JoinableUndirectedEdge, VertexOrMarker, Triangle, NullWritable> {

        @Override
        protected void reduce(JoinableUndirectedEdge joinableEdge, Iterable<VertexOrMarker> verticesAndMarker,
                Context ctx) throws IOException, InterruptedException {
            Iterator<VertexOrMarker> verticesAndMarkerIterator = verticesAndMarker.iterator();
            VertexOrMarker possibleMarker = verticesAndMarkerIterator.next();
            if (possibleMarker.isMarker()) {
                UndirectedEdge edge = joinableEdge.edge();
                while (verticesAndMarkerIterator.hasNext()) {
                    Vertex connectingVertex = verticesAndMarkerIterator.next().vertex();
                    ctx.write(new Triangle(connectingVertex, edge.firstVertex(), edge.secondVertex()),
                            NullWritable.get());
                }
            }
        }
    }
}