Java tutorial
/* Copyright (C) 2012 Intel Corporation. * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more about this software visit: * http://www.01.org/GraphBuilder */ package com.intel.hadoop.graphbuilder.partition.mapreduce.edge; import java.io.IOException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.log4j.Logger; import com.intel.hadoop.graphbuilder.graph.GraphOutput; import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleGraphOutput; import com.intel.hadoop.graphbuilder.parser.FieldParser; import com.intel.hadoop.graphbuilder.parser.GraphParser; import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressKeyType; import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressValueType; /** * The MapRedue class takes from input directory a list of edges and vertices, * and output 2 parts: partitioned graphs and a list of distributed vertex * records. * <p> * Input directory: Can take multiple input directories containing list of * edges. Output directory structure: * <ul> * <li>$outputdir/partition{$i}/subpart{$j}/edata for edge data.</li> * <li>Metafile: $outputdir/partition{$i}/subpart{$j} for meta info.</li> * <li>Graph structure: $outputdir/partition{$i}/subpart{$j}/edgelist for * adjacency structure.</li> * <li>VertexRecords: $outputdir/vrecord list of vertex records.</li> * </ul> * </p> * */ public class EdgeIngressMR { private static final Logger LOG = Logger.getLogger(EdgeIngressMR.class); /** MapReduce Job Counters. */ public static enum COUNTER { NUM_VERTICES, NUM_EDGES }; /** * Default constructor, initialize with parsers. * * @param graphparser * @param vidparser * @param vdataparser * @param edataparser */ public EdgeIngressMR(Class graphparser, Class vidparser, Class vdataparser, Class edataparser) { gzip = false; jobName = "Ingress Mapreduce Driver"; setParser(graphparser, vidparser, vdataparser, edataparser); conf = new JobConf(EdgeIngressMR.class); } /** * Set the parser class. * * @param parser */ public void setParser(Class graphparser, Class vidparser, Class vdataparser, Class edataparser) { try { this.graphparser = (GraphParser) graphparser.newInstance(); this.vidparser = (FieldParser) vidparser.newInstance(); this.vdataparser = (FieldParser) vdataparser.newInstance(); this.edataparser = (FieldParser) edataparser.newInstance(); } catch (InstantiationException e) { e.printStackTrace(); LOG.fatal("Parser classes: \n" + graphparser + "\n" + vidparser + "\n" + vdataparser + "\n" + edataparser + " do not exist."); } catch (IllegalAccessException e) { e.printStackTrace(); LOG.fatal("Parser classes: \n" + graphparser + "\n" + vidparser + "\n" + vdataparser + "\n" + edataparser + " do not exist."); } } /** * Set the job name. * * @param name */ public void setJobName(String name) { this.jobName = name; } /** * Set option for using gzip compression in output. * * @param gzip */ public void useGzip(boolean gzip) { this.gzip = gzip; } /** * Set the ingress strategy {random, oblivious}. * * @see {ObliviousIngress} * @see {RandomIngress} * @param ingress */ public void setIngress(String ingress) { if (ingress.equals("random") || ingress.equals("greedy")) this.ingress = ingress; else { LOG.error("Unknown ingress method: " + ingress + "\n Supported ingress methods: oblivious, random"); LOG.error("Use the default oblivious ingress"); this.ingress = "greedy"; } } /** * Set the intermediate key value class. * * @param keyClass * @param valClass */ public void setKeyValueClass(Class keyClass, Class valClass) { try { this.mapkeytype = (IngressKeyType) keyClass.newInstance(); this.mapvaltype = (IngressValueType) valClass.newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } } /** * @return JobConf of the current job. */ public JobConf getConf() { return conf; } /** * @param inputpath * @param outputpath * @param numProcs * @param ingress * @throws IOException */ public void run(String[] inputpaths, String outputpath, int numProcs, String ingress) throws IOException { this.setIngress(ingress); conf.setJobName(jobName); if (this.subpartPerPartition <= 0) this.subpartPerPartition = 8; LOG.info("===== Job: Partition edges and create vertex records ========="); LOG.info("input: " + StringUtils.join(inputpaths, ",")); LOG.info("output: " + outputpath); LOG.info("numProc = " + numProcs); LOG.info("subpartPerPartition = " + subpartPerPartition); LOG.info("keyclass = " + this.mapkeytype.getClass().getName()); LOG.info("valclass = " + this.mapvaltype.getClass().getName()); LOG.debug("graphparser = " + this.graphparser.getClass().getName()); LOG.debug("vidparser = " + this.vidparser.getClass().getName()); LOG.debug("vdataparser = " + this.vdataparser.getClass().getName()); LOG.debug("edataparser = " + this.edataparser.getClass().getName()); LOG.info("ingress = " + this.ingress); LOG.info("gzip = " + Boolean.toString(gzip)); LOG.info("==============================================================="); conf.set("ingress", this.ingress); conf.setInt("numProcs", numProcs); conf.set("GraphParser", graphparser.getClass().getName()); conf.set("VidParser", vidparser.getClass().getName()); conf.set("VdataParser", vdataparser.getClass().getName()); conf.set("EdataParser", edataparser.getClass().getName()); conf.setInt("subpartPerPartition", subpartPerPartition); conf.setMapOutputKeyClass(this.mapkeytype.getClass()); conf.setMapOutputValueClass(this.mapvaltype.getClass()); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(EdgeIngressMapper.class); conf.setCombinerClass(EdgeIngressCombiner.class); conf.setReducerClass(EdgeIngressReducer.class); // GraphOutput output = new GLGraphOutput(numProcs); GraphOutput output = new SimpleGraphOutput(); output.init(conf); conf.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(PartitionedGraphOutputFormat.class); if (gzip) { TextOutputFormat.setCompressOutput(conf, true); TextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); } for (String path : inputpaths) FileInputFormat.addInputPath(conf, new Path(path)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); if (!checkTypes()) { LOG.fatal("Type check failed." + "Please check the parsers are consistent with key/val types."); return; } JobClient.runJob(conf); LOG.info("================== Done ====================================\n"); } /** * Ensure the keytype, valuetype are consistent with the parser type. * @return true if type check. */ private boolean checkTypes() { boolean check = true; if (!(mapkeytype.createVid().getClass()).equals(mapvaltype.getGraphTypeFactory().createVid().getClass())) { LOG.fatal("VidType is not consistant between MapKeyType: " + mapkeytype.createVid().getClass().getName() + " and MapValueType: " + mapvaltype.getGraphTypeFactory().createVid().getClass().getName()); check = false; } if (!(vidparser.getType()).equals(mapkeytype.createVid().getClass())) { LOG.fatal("VidType is not consistant between MapKeyType: " + mapkeytype.createVid().getClass().getName() + " and Parser: " + vidparser.getType().getName()); check = false; } if (!(vdataparser.getType().equals(mapvaltype.getGraphTypeFactory().createVdata().getClass()))) { LOG.fatal("VertexDataType is not consistant between MapValueType: " + mapvaltype.getGraphTypeFactory().createVdata().getClass().getName() + " and Parser: " + vdataparser.getType().getName()); check = false; } if (!(edataparser.getType().equals(mapvaltype.getGraphTypeFactory().createEdata().getClass()))) { LOG.fatal("EdgeDataType is not consistant between MapValueType: " + mapvaltype.getGraphTypeFactory().createEdata().getClass().getName() + " and Parser: " + edataparser.getType().getName()); check = false; } return check; } /** * Set the number of subpartitions per real partition. * @param n number of subpartitions per real partition. */ public void setTotalSubPartition(int n) { this.subpartPerPartition = n; } private JobConf conf; private GraphParser graphparser; private FieldParser vidparser; private FieldParser vdataparser; private FieldParser edataparser; private boolean gzip; private String jobName; private String ingress; private int subpartPerPartition; private IngressKeyType mapkeytype; private IngressValueType mapvaltype; }