org.sf.xrime.algorithms.clique.maximal.InducedNeighborhoodGenerate.java Source code

Java tutorial

Introduction

Here is the source code for org.sf.xrime.algorithms.clique.maximal.InducedNeighborhoodGenerate.java

Source

/*
 * Copyright (C) IBM Corp. 2009.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.sf.xrime.algorithms.clique.maximal;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.sf.xrime.ProcessorExecutionException;
import org.sf.xrime.algorithms.GraphAlgorithm;
import org.sf.xrime.algorithms.utils.GraphAlgorithmMapReduceBase;
import org.sf.xrime.model.edge.AdjVertexEdge;
import org.sf.xrime.model.edge.Edge;
import org.sf.xrime.model.edge.EdgeSet;
import org.sf.xrime.model.vertex.LabeledAdjSetVertex;

/**
 * This algorithm is used to generate induced neighborhood from neighborhood.
 * @author xue
 */
public class InducedNeighborhoodGenerate extends GraphAlgorithm {
    /**
     * Default constructor.
     */
    public InducedNeighborhoodGenerate() {
        super();
    }

    /**
     * Mapper. Tell each neighbor the neighborhood of myself.
     * @author xue
     */
    public static class MapClass extends GraphAlgorithmMapReduceBase
            implements Mapper<Text, LabeledAdjSetVertex, Text, LabeledAdjSetVertex> {

        @Override
        public void map(Text key, LabeledAdjSetVertex value, OutputCollector<Text, LabeledAdjSetVertex> output,
                Reporter reporter) throws IOException {
            // Tell each neighbor my neighborhood.
            for (AdjVertexEdge oppo : value.getOpposites()) {
                // Generate a notifier.
                LabeledAdjSetVertex notifier = new LabeledAdjSetVertex();
                notifier.setId(oppo.getOpposite());
                notifier.setLabel(ConstantLabels.NEIGHBOR_NEIGHBORS, value);
                // Notify the neighbor.
                output.collect(new Text(oppo.getOpposite()), notifier);
            }

            // Make myself shown in reducer.
            output.collect(key, value);
        }
    }

    /**
     * Reducer. Summarize the neighborhood of all my neighbors and generate my induced
     * neighborhood. And, filter out some vertexes from calculating the maximal clique.
     * 
     * @author xue
     */
    public static class ReduceClass extends GraphAlgorithmMapReduceBase
            implements Reducer<Text, LabeledAdjSetVertex, Text, LabeledAdjSetVertex> {

        @Override
        public void reduce(Text key, Iterator<LabeledAdjSetVertex> values,
                OutputCollector<Text, LabeledAdjSetVertex> output, Reporter reporter) throws IOException {
            // Ids of neighbors of this vertex.
            HashSet<String> my_neighbors = new HashSet<String>();
            // Used to record the mapping from id of my neighbor, to the id set of the neighbor and its
            // neighbors.
            HashMap<String, HashSet<String>> neighbor_nhs = new HashMap<String, HashSet<String>>();
            // Indicate whether we have already checked the neighor's (neighborhood + neighbor itself) for
            // containment.
            HashMap<String, String> neighbor_nhs_checked = new HashMap<String, String>();
            // Edge set which represents the induced neighborhood of this vertex.
            HashSet<Edge> induced_neighborhood = new HashSet<Edge>();

            while (values.hasNext()) {
                LabeledAdjSetVertex curr_vertex = values.next();
                if (curr_vertex.getLabel(ConstantLabels.NEIGHBOR_NEIGHBORS) == null) {
                    // This is myself. Collect the ids of my neighbors.
                    for (AdjVertexEdge oppo : curr_vertex.getOpposites()) {
                        my_neighbors.add(oppo.getOpposite());
                    }
                } else {
                    // Get the neighborhood of this neighbor.
                    LabeledAdjSetVertex neighbor_nh = (LabeledAdjSetVertex) curr_vertex
                            .getLabel(ConstantLabels.NEIGHBOR_NEIGHBORS);
                    // Generate a set of vertex ids which includes the neighbor and all its neighbors.
                    HashSet<String> temp_idset = new HashSet<String>();
                    // Add the neighbor's id first.
                    temp_idset.add(neighbor_nh.getId());
                    // For each neighbor of the neighbor.
                    for (AdjVertexEdge oppo : neighbor_nh.getOpposites()) {
                        // Then add all ids of the neighbors of the neighbor.
                        temp_idset.add(oppo.getOpposite());
                    }
                    if (my_neighbors.size() != 0) {
                        if (temp_idset.size() > my_neighbors.size() && temp_idset.containsAll(my_neighbors)) {
                            // Already know all my neighbors, and they are all included in one
                            // of my neighbor's (neighborhood + the neighbor itself). 

                            // Need to distinguish "<" and "<="
                            if (temp_idset.size() > (my_neighbors.size() + 1)) {
                                // I (this vertex) should not participate in the following calculation.
                                return;
                            } else if (neighbor_nh.getId().compareTo(key.toString()) <= 0) {
                                // When the set is the same, filter out the vertex which has lexically larger id.
                                return;
                            }
                        }
                        // Indicate that this neighbor's (neighborhood + id) has already been checked.
                        neighbor_nhs_checked.put(neighbor_nh.getId(), "checked");
                    }
                    // Record the mapping from the id of this vertex's neighbor to temp_idset.
                    neighbor_nhs.put(neighbor_nh.getId(), temp_idset);
                }
            }

            // Determine whether this vertex (I, me, my) should be removed from the following maximal
            // clique calculation. If the my neighbors are all contained in one of my neighbor's 
            // corresponding set, I should not paticipate in the calculation of maximal cliques.
            // Speculative check has already been done in the while loop above, but we still need
            // to check here.
            for (String neighbor_id : neighbor_nhs.keySet()) {
                // Whether have we already check this neighbor's (neighborhood + id).
                if (neighbor_nhs_checked.get(neighbor_id) != null)
                    continue;
                // Get this neighbor's (neighborhood + id).
                HashSet<String> temp_set = neighbor_nhs.get(neighbor_id);
                if (temp_set.size() > my_neighbors.size() && temp_set.containsAll(my_neighbors)) {
                    // Need to distinguish "<" and "<="
                    if (temp_set.size() > (my_neighbors.size() + 1)) {
                        // This vertex should not participate in the following calculation.
                        return;
                    } else if (neighbor_id.compareTo(key.toString()) <= 0) {
                        // When the set is the same, filter out the vertex which has lexically larger id.
                        return;
                    }
                }
            }

            // Find out the potential edges in the induced neighborhood of this vertex, which
            // come from this neighbor and its neighbors.
            for (String neighbor_id : neighbor_nhs.keySet()) {
                // A neighbor of mine and its neighbors.
                HashSet<String> temp_set = neighbor_nhs.get(neighbor_id);
                temp_set.remove(neighbor_id);
                for (String neighbor_nb : temp_set) {
                    if (neighbor_nb.compareTo(key.toString()) == 0 || // (neighbor_id, I) does not belong to my induced
                                                                      // neighborhood. 
                            (!my_neighbors.contains(neighbor_nb))) { // neighbor_nb does not belong to my neighborhood.
                        // Do nothing.
                    } else {
                        if (neighbor_id.compareTo(neighbor_nb) <= 0) {
                            // One edge may be added twice, since neighbor_id and neighbor_nb are mutual neighbors.
                            // The set can deal with this.
                            Edge new_edge = new Edge(neighbor_id, neighbor_nb);
                            induced_neighborhood.add(new_edge);
                        } else {
                            Edge new_edge = new Edge(neighbor_nb, neighbor_id);
                            induced_neighborhood.add(new_edge);
                        }
                    }
                }
            }

            // Generate and emit the final result.
            LabeledAdjSetVertex result = new LabeledAdjSetVertex();
            result.setId(key.toString());
            // Neighbors.
            HashSet<AdjVertexEdge> result_vertex_set = new HashSet<AdjVertexEdge>();
            for (String id : my_neighbors) {
                result_vertex_set.add(new AdjVertexEdge(id));
            }
            result.setOpposites(result_vertex_set);
            // Edges in induced neighborhood.
            EdgeSet result_edge_set = new EdgeSet();
            result_edge_set.setEdges(induced_neighborhood);
            result.setLabel(ConstantLabels.INDUCED_NEIGHBORHOOD, result_edge_set);
            output.collect(key, result);
        }
    }

    @Override
    public void execute() throws ProcessorExecutionException {
        JobConf conf = new JobConf(context, InducedNeighborhoodGenerate.class);
        conf.setJobName("InducedNeighborhoodGenerate");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(LabeledAdjSetVertex.class);
        conf.setMapperClass(MapClass.class);
        // No combiner is permitted, since the logic of reducer depends on the completeness
        // of information.
        conf.setReducerClass(ReduceClass.class);
        // makes the file format suitable for machine processing.
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        // Enable compression.
        conf.setCompressMapOutput(true);
        conf.setMapOutputCompressorClass(GzipCodec.class);
        try {
            FileInputFormat.setInputPaths(conf, getSource().getPath());
            FileOutputFormat.setOutputPath(conf, getDestination().getPath());
        } catch (IllegalAccessException e1) {
            throw new ProcessorExecutionException(e1);
        }
        conf.setNumMapTasks(getMapperNum());
        conf.setNumReduceTasks(getReducerNum());

        try {
            this.runningJob = JobClient.runJob(conf);
        } catch (IOException e) {
            throw new ProcessorExecutionException(e);
        }
    }
}