com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMR.java Source code

Java tutorial

Introduction

Here is the source code for com.intel.hadoop.graphbuilder.partition.mapreduce.edge.EdgeIngressMR.java

Source

/* Copyright (C) 2012 Intel Corporation.
 *     All rights reserved.
 *           
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
 * For more about this software visit:
 *      http://www.01.org/GraphBuilder 
 */
package com.intel.hadoop.graphbuilder.partition.mapreduce.edge;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.log4j.Logger;

import com.intel.hadoop.graphbuilder.graph.GraphOutput;
import com.intel.hadoop.graphbuilder.graph.simplegraph.SimpleGraphOutput;
import com.intel.hadoop.graphbuilder.parser.FieldParser;
import com.intel.hadoop.graphbuilder.parser.GraphParser;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressKeyType;
import com.intel.hadoop.graphbuilder.partition.mapreduce.keyvalue.IngressValueType;

/**
 * The MapRedue class takes from input directory a list of edges and vertices,
 * and output 2 parts: partitioned graphs and a list of distributed vertex
 * records.
 * <p>
 * Input directory: Can take multiple input directories containing list of
 * edges. Output directory structure:
 * <ul>
 * <li>$outputdir/partition{$i}/subpart{$j}/edata for edge data.</li>
 * <li>Metafile: $outputdir/partition{$i}/subpart{$j} for meta info.</li>
 * <li>Graph structure: $outputdir/partition{$i}/subpart{$j}/edgelist for
 * adjacency structure.</li>
 * <li>VertexRecords: $outputdir/vrecord list of vertex records.</li>
 * </ul>
 * </p>
 * 
 */
public class EdgeIngressMR {

    private static final Logger LOG = Logger.getLogger(EdgeIngressMR.class);

    /** MapReduce Job Counters. */
    public static enum COUNTER {
        NUM_VERTICES, NUM_EDGES
    };

    /**
     * Default constructor, initialize with parsers.
     * 
     * @param graphparser
     * @param vidparser
     * @param vdataparser
     * @param edataparser
     */
    public EdgeIngressMR(Class graphparser, Class vidparser, Class vdataparser, Class edataparser) {
        gzip = false;
        jobName = "Ingress Mapreduce Driver";
        setParser(graphparser, vidparser, vdataparser, edataparser);
        conf = new JobConf(EdgeIngressMR.class);
    }

    /**
     * Set the parser class.
     * 
     * @param parser
     */
    public void setParser(Class graphparser, Class vidparser, Class vdataparser, Class edataparser) {
        try {
            this.graphparser = (GraphParser) graphparser.newInstance();
            this.vidparser = (FieldParser) vidparser.newInstance();
            this.vdataparser = (FieldParser) vdataparser.newInstance();
            this.edataparser = (FieldParser) edataparser.newInstance();
        } catch (InstantiationException e) {
            e.printStackTrace();
            LOG.fatal("Parser classes: \n" + graphparser + "\n" + vidparser + "\n" + vdataparser + "\n"
                    + edataparser + " do not exist.");
        } catch (IllegalAccessException e) {
            e.printStackTrace();
            LOG.fatal("Parser classes: \n" + graphparser + "\n" + vidparser + "\n" + vdataparser + "\n"
                    + edataparser + " do not exist.");
        }
    }

    /**
     * Set the job name.
     * 
     * @param name
     */
    public void setJobName(String name) {
        this.jobName = name;
    }

    /**
     * Set option for using gzip compression in output.
     * 
     * @param gzip
     */
    public void useGzip(boolean gzip) {
        this.gzip = gzip;
    }

    /**
     * Set the ingress strategy {random, oblivious}.
     * 
     * @see {ObliviousIngress}
     * @see {RandomIngress}
     * @param ingress
     */
    public void setIngress(String ingress) {
        if (ingress.equals("random") || ingress.equals("greedy"))
            this.ingress = ingress;
        else {
            LOG.error("Unknown ingress method: " + ingress + "\n Supported ingress methods: oblivious, random");
            LOG.error("Use the default oblivious ingress");
            this.ingress = "greedy";
        }
    }

    /**
     * Set the intermediate key value class.
     * 
     * @param keyClass
     * @param valClass
     */
    public void setKeyValueClass(Class keyClass, Class valClass) {
        try {
            this.mapkeytype = (IngressKeyType) keyClass.newInstance();
            this.mapvaltype = (IngressValueType) valClass.newInstance();
        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        }
    }

    /**
     * @return JobConf of the current job.
     */
    public JobConf getConf() {
        return conf;
    }

    /**
     * @param inputpath
     * @param outputpath
     * @param numProcs
     * @param ingress
     * @throws IOException
     */
    public void run(String[] inputpaths, String outputpath, int numProcs, String ingress) throws IOException {
        this.setIngress(ingress);
        conf.setJobName(jobName);
        if (this.subpartPerPartition <= 0)
            this.subpartPerPartition = 8;

        LOG.info("===== Job: Partition edges and create vertex records =========");
        LOG.info("input: " + StringUtils.join(inputpaths, ","));
        LOG.info("output: " + outputpath);
        LOG.info("numProc = " + numProcs);
        LOG.info("subpartPerPartition = " + subpartPerPartition);
        LOG.info("keyclass = " + this.mapkeytype.getClass().getName());
        LOG.info("valclass = " + this.mapvaltype.getClass().getName());
        LOG.debug("graphparser = " + this.graphparser.getClass().getName());
        LOG.debug("vidparser = " + this.vidparser.getClass().getName());
        LOG.debug("vdataparser = " + this.vdataparser.getClass().getName());
        LOG.debug("edataparser = " + this.edataparser.getClass().getName());
        LOG.info("ingress = " + this.ingress);
        LOG.info("gzip = " + Boolean.toString(gzip));
        LOG.info("===============================================================");

        conf.set("ingress", this.ingress);
        conf.setInt("numProcs", numProcs);
        conf.set("GraphParser", graphparser.getClass().getName());
        conf.set("VidParser", vidparser.getClass().getName());
        conf.set("VdataParser", vdataparser.getClass().getName());
        conf.set("EdataParser", edataparser.getClass().getName());
        conf.setInt("subpartPerPartition", subpartPerPartition);

        conf.setMapOutputKeyClass(this.mapkeytype.getClass());
        conf.setMapOutputValueClass(this.mapvaltype.getClass());

        conf.setOutputKeyClass(IntWritable.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(EdgeIngressMapper.class);
        conf.setCombinerClass(EdgeIngressCombiner.class);
        conf.setReducerClass(EdgeIngressReducer.class);

        // GraphOutput output = new GLGraphOutput(numProcs);
        GraphOutput output = new SimpleGraphOutput();
        output.init(conf);

        conf.setInputFormat(TextInputFormat.class);
        // conf.setOutputFormat(PartitionedGraphOutputFormat.class);

        if (gzip) {
            TextOutputFormat.setCompressOutput(conf, true);
            TextOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);
        }

        for (String path : inputpaths)
            FileInputFormat.addInputPath(conf, new Path(path));
        FileOutputFormat.setOutputPath(conf, new Path(outputpath));

        if (!checkTypes()) {
            LOG.fatal("Type check failed." + "Please check the parsers are consistent with key/val types.");
            return;
        }

        JobClient.runJob(conf);
        LOG.info("================== Done ====================================\n");
    }

    /**
     * Ensure the keytype, valuetype are consistent with the parser type.
     * @return true if type check.
     */
    private boolean checkTypes() {
        boolean check = true;

        if (!(mapkeytype.createVid().getClass()).equals(mapvaltype.getGraphTypeFactory().createVid().getClass())) {
            LOG.fatal("VidType is not consistant between MapKeyType: " + mapkeytype.createVid().getClass().getName()
                    + " and MapValueType: " + mapvaltype.getGraphTypeFactory().createVid().getClass().getName());
            check = false;
        }

        if (!(vidparser.getType()).equals(mapkeytype.createVid().getClass())) {
            LOG.fatal("VidType is not consistant between MapKeyType: " + mapkeytype.createVid().getClass().getName()
                    + " and Parser: " + vidparser.getType().getName());
            check = false;
        }

        if (!(vdataparser.getType().equals(mapvaltype.getGraphTypeFactory().createVdata().getClass()))) {
            LOG.fatal("VertexDataType is not consistant between MapValueType: "
                    + mapvaltype.getGraphTypeFactory().createVdata().getClass().getName() + " and Parser: "
                    + vdataparser.getType().getName());
            check = false;
        }

        if (!(edataparser.getType().equals(mapvaltype.getGraphTypeFactory().createEdata().getClass()))) {
            LOG.fatal("EdgeDataType is not consistant between MapValueType: "
                    + mapvaltype.getGraphTypeFactory().createEdata().getClass().getName() + " and Parser: "
                    + edataparser.getType().getName());
            check = false;
        }

        return check;
    }

    /**
     * Set the number of subpartitions per real partition.
     * @param n number of subpartitions per real partition.
     */
    public void setTotalSubPartition(int n) {
        this.subpartPerPartition = n;
    }

    private JobConf conf;
    private GraphParser graphparser;
    private FieldParser vidparser;
    private FieldParser vdataparser;
    private FieldParser edataparser;

    private boolean gzip;
    private String jobName;
    private String ingress;
    private int subpartPerPartition;
    private IngressKeyType mapkeytype;
    private IngressValueType mapvaltype;
}