org.apache.mahout.graph.components.FindComponentsJob.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mahout.graph.components.FindComponentsJob.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.graph.components;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.graph.components.FlaggedVertex.PayloadType;
import org.apache.mahout.graph.model.UndirectedEdge;
import org.apache.mahout.graph.model.Vertex;
import org.apache.mahout.graph.triangles.EnumerateTrianglesJob;

/**
 * Finds components of a graph.
 * 
 */
public class FindComponentsJob extends AbstractJob {

    public enum Counter {
        ZONES_CONNECTED
    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new EnumerateTrianglesJob(), args);
    }

    @Override
    public int run(String[] args) throws Exception {

        addInputOption();
        addOutputOption();

        Map<String, String> parsedArgs = parseArguments(args);
        if (parsedArgs == null) {
            return -1;
        }

        Path tempDirPath = new Path(parsedArgs.get("--tempDir"));

        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();

        AtomicInteger currentPhase = new AtomicInteger();

        Path edgesPath = inputPath;
        Path zoneAssignmentsPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis()));

        if (shouldRunNextPhase(parsedArgs, currentPhase)) {
            /*
             * Prepare Input
             */
            Job prepareAssignments = prepareJob(edgesPath, zoneAssignmentsPath, SequenceFileInputFormat.class,
                    PrepareAssignmentsFileMapper.class, Vertex.class, Vertex.class,
                    PrepareAssignmentsFileReducer.class, Vertex.class, FlaggedVertex.class,
                    SequenceFileOutputFormat.class);

            prepareAssignments.waitForCompletion(true);
        }

        if (shouldRunNextPhase(parsedArgs, currentPhase)) {

            /*
             * As long as there may be zones connected
             */
            while (true) {

                Path scatterEdgesAndAssignZoneOutputPath = new Path(tempDirPath,
                        String.valueOf(System.currentTimeMillis()));

                /*
                 * Scatter edges and forward zone assignments,
                 * assign one zone to edges
                 */
                Job scatterEdgesAndAssignZone = prepareJob(
                        new Path(zoneAssignmentsPath.toString() + "," + edgesPath.toString()),
                        scatterEdgesAndAssignZoneOutputPath, SequenceFileInputFormat.class,
                        ScatterEdgesAndForwardZoneAssignmentsMapper.class, JoinableVertex.class,
                        FlaggedVertex.class, AssignOneZoneToEdgesReducer.class, UndirectedEdge.class, Vertex.class,
                        SequenceFileOutputFormat.class);
                scatterEdgesAndAssignZone.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class);
                scatterEdgesAndAssignZone.waitForCompletion(true);

                Path findInterzoneEdgesOutputPath = new Path(tempDirPath,
                        String.valueOf(System.currentTimeMillis()));

                /*
                 * Find interzone edges
                 */
                Job findInterzoneEdges = prepareJob(scatterEdgesAndAssignZoneOutputPath,
                        findInterzoneEdgesOutputPath, SequenceFileInputFormat.class, Mapper.class,
                        UndirectedEdge.class, Vertex.class, FindInterzoneEdgesReducer.class, Vertex.class,
                        FlaggedVertex.class, SequenceFileOutputFormat.class);

                findInterzoneEdges.waitForCompletion(true);

                /*
                 * Break if there are no new interzone edges
                 */
                if (findInterzoneEdges.getCounters().findCounter(Counter.ZONES_CONNECTED).getValue() == 0L) {
                    break;
                }

                Path assignNewZonesOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis()));

                /*
                 * Assign new zones
                 */
                Job assignNewZones = prepareJob(
                        new Path(zoneAssignmentsPath.toString() + "," + findInterzoneEdgesOutputPath.toString()),
                        assignNewZonesOutputPath, SequenceFileInputFormat.class,
                        BinZoneAssignmentsAndInterzoneEdgesMapper.class, JoinableVertex.class, FlaggedVertex.class,
                        AssignNewZonesToVerticesReducer.class, Vertex.class, FlaggedVertex.class,
                        SequenceFileOutputFormat.class);

                assignNewZones.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class);
                assignNewZones.waitForCompletion(true);

                zoneAssignmentsPath = assignNewZonesOutputPath;
            }
        }
        FileSystem system = FileSystem.get(getConf());
        FileUtil.copy(system, zoneAssignmentsPath, system, outputPath, false, getConf());
        return 0;
    }

    /**
     * Prepares the initial assignments file. One zone assignment for each node
     */
    public static class PrepareAssignmentsFileMapper extends Mapper<Vertex, FlaggedVertex, Vertex, Vertex> {

        @Override
        public void map(Vertex from, FlaggedVertex to, Context ctx) throws IOException, InterruptedException {
            // assign zone representatives to each node
            ctx.write(from, from);
            ctx.write(to.getVertex(), from);
        }
    }

    /**
     * Prepares the initial assignments file. One zone assignment for each node
     */
    public static class PrepareAssignmentsFileReducer extends Reducer<Vertex, Vertex, Vertex, FlaggedVertex> {

        @Override
        public void reduce(Vertex from, Iterable<Vertex> representatives, Context ctx)
                throws IOException, InterruptedException {
            TreeSet<Long> ids = new TreeSet<Long>();
            for (Vertex representative : representatives) {
                // output just one representative
                ids.add(representative.getId());
            }
            ctx.write(from, FlaggedVertex.createZoneAssignment(ids.iterator().next()));
        }
    }

    /**
     * Scatters zone assignments and edges. This {@link Mapper} has two inputs:
     * 
     * <ul>
     * <li>edge file</li>
     * <li>assignments file</li>
     * </ul>
     * 
     * Forward the zone assignments.<br />
     * Forward the edges to each of the vertices.
     */
    public static class ScatterEdgesAndForwardZoneAssignmentsMapper
            extends Mapper<Vertex, FlaggedVertex, JoinableVertex, FlaggedVertex> {

        @Override
        public void map(Vertex first, FlaggedVertex secondOrRepresentative, Context ctx)
                throws IOException, InterruptedException {
            switch (secondOrRepresentative.getType()) {
            case UndirectedEdge:
                Vertex second = secondOrRepresentative.getVertex();
                // write the edge to each vertex
                ctx.write(new JoinableVertex(first, false), FlaggedVertex.createUndirectedEdge(second));
                ctx.write(new JoinableVertex(second, false), FlaggedVertex.createUndirectedEdge(first));
                break;
            case ZoneAssignment:
                // forward the assignment
                ctx.write(new JoinableVertex(first, true), secondOrRepresentative);
                break;
            default:
                throw new IllegalArgumentException();

            }
        }
    }

    /**
     * Joins zones and edges. Input is a zone assignment and a set of edges
     * the incident to the vertex that is assigned with the assignment.
     * <p>
     */
    public static class AssignOneZoneToEdgesReducer
            extends Reducer<JoinableVertex, FlaggedVertex, UndirectedEdge, Vertex> {
        @Override
        public void reduce(JoinableVertex first, Iterable<FlaggedVertex> verticesAndZone, Context ctx)
                throws IOException, InterruptedException {
            Iterator<FlaggedVertex> iterator = verticesAndZone.iterator();
            FlaggedVertex fv = iterator.next(); //prepended input due to JoinableVertex, exactly one
            if (!fv.getType().equals(PayloadType.ZoneAssignment))
                throw new IllegalArgumentException();
            Vertex assignment = fv.getVertex();
            while (iterator.hasNext()) {
                FlaggedVertex next = iterator.next();
                if (!next.getType().equals(PayloadType.UndirectedEdge))
                    throw new IllegalArgumentException();
                Vertex second = next.getVertex();
                ctx.write(new UndirectedEdge(first.getVertex(), second), assignment);
            }
        }
    }

    /**
     * Find the minimum zone for each edge.
     * <p>
     * For each other zone, output an interzone edge, which is a record with key
     * other zone and value minimum zone that is to be assigned.
     */
    public static class FindInterzoneEdgesReducer extends Reducer<UndirectedEdge, Vertex, Vertex, FlaggedVertex> {
        @Override
        public void reduce(UndirectedEdge edge, Iterable<Vertex> assignments, Context ctx)
                throws IOException, InterruptedException {
            Set<Long> ids = new TreeSet<Long>();
            for (Vertex ass : assignments) {
                ids.add(ass.getId());
            }
            Iterator<Long> i = ids.iterator();
            long minZone = i.next();
            ids.remove(minZone);
            for (Long other : ids) {
                ctx.getCounter(Counter.ZONES_CONNECTED).increment(1L);
                ctx.write(new Vertex(other), FlaggedVertex.createInterzoneEdge(minZone));
            }
        }
    }

    /**
     * This {@linkplain Mapper } takes two inputs:
     * <ul>
     * <li>interzone edges file</li>
     * <li>assignments file</li>
     * </ul>
     * 
     * Zone assignments to be keyed under the zone representative
     * Forward the interzone edges.
     */
    public static class BinZoneAssignmentsAndInterzoneEdgesMapper
            extends Mapper<Vertex, FlaggedVertex, JoinableVertex, FlaggedVertex> {

        @Override
        public void map(Vertex vertex, FlaggedVertex vertexOrRepresentative, Context ctx)
                throws IOException, InterruptedException {
            switch (vertexOrRepresentative.getType()) {
            case InterzoneEdge:
                // forward the interzone edge
                ctx.write(new JoinableVertex(vertex, true), vertexOrRepresentative);
                break;
            case ZoneAssignment:
                // assignment -> bin the vertex assigned under the zone
                ctx.write(new JoinableVertex(vertexOrRepresentative.getVertex(), false),
                        FlaggedVertex.createZoneEntry(vertex));
                break;
            default:
                throw new IllegalArgumentException();
            }
        }
    }

    /**
     * Assigns new zone representatives to vertices.<p>
     * Forwards vertices when no interzone edge indicates a better zone assignment.
     *
     */
    public static class AssignNewZonesToVerticesReducer
            extends Reducer<JoinableVertex, FlaggedVertex, Vertex, FlaggedVertex> {
        @Override
        public void reduce(JoinableVertex oldRepresentative,
                Iterable<FlaggedVertex> betterRepresentativesAndVertices, Context ctx)
                throws IOException, InterruptedException {
            Set<Long> ids = new TreeSet<Long>();
            for (FlaggedVertex vertexOrRepresentative : betterRepresentativesAndVertices) {
                switch (vertexOrRepresentative.getType()) {
                case InterzoneEdge: //prepended input due to JoinableVertex
                    // assignment -> put the improved representative to set
                    ids.add(vertexOrRepresentative.getVertex().getId());
                    break;
                case ZoneEntry:
                    // entry -> assign the best of the better representatives
                    if (ids.isEmpty()) {
                        // this component does not change
                        ctx.write(vertexOrRepresentative.getVertex(),
                                FlaggedVertex.createZoneAssignment(oldRepresentative.getVertex()));
                    } else {
                        // we have interzone edges for this component
                        ctx.write(vertexOrRepresentative.getVertex(),
                                FlaggedVertex.createZoneAssignment(ids.iterator().next()));
                    }
                    break;
                default:
                    throw new IllegalArgumentException();
                }
            }
        }
    }

}