Java tutorial
/* * Copyright (C) IBM Corp. 2009. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.sf.xrime.algorithms.partitions.connected.bi; import java.io.IOException; import java.util.Iterator; import java.util.TreeSet; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.sf.xrime.ProcessorExecutionException; import org.sf.xrime.algorithms.GraphAlgorithm; import org.sf.xrime.algorithms.utils.GraphAlgorithmMapReduceBase; import org.sf.xrime.model.edge.Edge; import org.sf.xrime.model.edge.EdgeComparator; import org.sf.xrime.model.edge.EdgeSet; /** * Just like weakly.alg_1.VertexSetJoin, used to calculate the transitive closure of the * share edge relationship between circles (edge sets). * @author xue */ public class EdgeSetJoin extends GraphAlgorithm { /** * Default constructor. */ public EdgeSetJoin() { super(); } /** * Emit the edge at lower layer as k2, the edge at higher layer as v2. * @author xue */ public static class MapClass extends GraphAlgorithmMapReduceBase implements Mapper<Text, EdgeSet, Text, EdgeSet> { @Override public void map(Text key, EdgeSet value, OutputCollector<Text, EdgeSet> output, Reporter reporter) throws IOException { // Generate the higher layer edge first. int index_of_sharp = key.toString().indexOf(ConstantLabels.NON_ID_CHAR); String from = key.toString().substring(0, index_of_sharp); String to = key.toString().substring(index_of_sharp + 1, key.toString().length()); Edge higher_edge = new Edge(from, to); // Generate the higher layer edge set. EdgeSet result_set = new EdgeSet(); result_set.addEdge(higher_edge); // Emit. for (Edge edge : value.getEdges()) { String k2 = edge.getFrom() + ConstantLabels.NON_ID_CHAR + edge.getTo(); output.collect(new Text(k2), result_set); } } } /** * Merge higher layer edge sets with the same lower edge. * @author xue */ public static class ReduceClass extends GraphAlgorithmMapReduceBase implements Reducer<Text, EdgeSet, Text, EdgeSet> { @Override public void reduce(Text key, Iterator<EdgeSet> values, OutputCollector<Text, EdgeSet> output, Reporter reporter) throws IOException { // Sort the edges in the higher layer edge sets. TreeSet<Edge> edge_set = new TreeSet<Edge>(new EdgeComparator()); while (values.hasNext()) { EdgeSet curr_set = values.next(); // No need to do deep clone, since the edges are generated by readFields. edge_set.addAll(curr_set.getEdges()); } // k3 is the lexically smallest edge in the set. String k3 = edge_set.first().getFrom() + ConstantLabels.NON_ID_CHAR + edge_set.first().getTo(); EdgeSet v3 = new EdgeSet(); v3.setEdges(edge_set); // Emit. output.collect(new Text(k3), v3); } } @Override public void execute() throws ProcessorExecutionException { JobConf conf = new JobConf(context, EdgeSetJoin.class); conf.setJobName("EdgeSetJoin"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(EdgeSet.class); conf.setMapperClass(MapClass.class); // Since this is a join operation, combiner is not permitted here. conf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); try { FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); } catch (IllegalAccessException e1) { throw new ProcessorExecutionException(e1); } conf.setNumMapTasks(getMapperNum()); conf.setNumReduceTasks(getReducerNum()); conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); try { this.runningJob = JobClient.runJob(conf); } catch (IOException e) { throw new ProcessorExecutionException(e); } } }