org.lab41.mapreduce.IdUsingBulkLoaderDriver.java Source code

Java tutorial

Introduction

Here is the source code for org.lab41.mapreduce.IdUsingBulkLoaderDriver.java

Source

/*
 * Copyright 2014 In-Q-Tel Inc.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package org.lab41.mapreduce;

import com.thinkaurelius.faunus.FaunusGraph;
import com.thinkaurelius.faunus.FaunusVertex;
import com.thinkaurelius.faunus.Holder;
import com.thinkaurelius.titan.diskstorage.StorageException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.lab41.HdfsUtil;
import org.lab41.hbase.HbaseConfigurator;
import org.lab41.hbase.TitanHbaseThreePartSplitter;
import org.lab41.mapreduce.blueprints.BlueprintsGraphOutputMapReduce;
import org.lab41.schema.GraphSchemaWriter;
import org.lab41.schema.KroneckerGraphSchemaWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.lab41.Settings.*;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;

public class IdUsingBulkLoaderDriver extends BaseBullkLoaderDriver {
    Logger logger = LoggerFactory.getLogger(IdUsingBulkLoaderDriver.class);

    public int configureAndRunJobs(Configuration conf) throws IOException, ClassNotFoundException,
            InterruptedException, StorageException, InstantiationException, IllegalAccessException {

        logger.info("IdUsingBulkLoaderDriver");

        Configuration baseConfiguration = getConf();

        StringWriter stringWriter = new StringWriter();
        baseConfiguration.writeXml(stringWriter);
        stringWriter.toString();

        getAdditionalProperties(baseConfiguration, propsPath);
        getAdditionalProperties(baseConfiguration, sysPath);

        String hbaseSiteXmlPath = hbaseSiteXml;
        InputStream hbaseSiteXmlIS = getInputStreamForPath(hbaseSiteXmlPath, baseConfiguration);

        configureHbase(baseConfiguration, hbaseSiteXmlIS);

        //Configure the First adn Second jobs
        FaunusGraph faunusGraph = new FaunusGraph(baseConfiguration);

        String job1Outputpath = faunusGraph.getOutputLocation().toString();
        Path intermediatePath = new Path(job1Outputpath + "/job1");

        FileSystem fs = FileSystem.get(baseConfiguration);
        Job job1 = configureJob1(faunusGraph, intermediatePath, baseConfiguration, fs);
        Job job2 = configureJob2(baseConfiguration, faunusGraph, fs);

        //no longer need the faunus graph.
        faunusGraph.shutdown();
        if (job1.waitForCompletion(true)) {
            logger.info("SUCCESS 1: Cleaning up HBASE ");
            HBaseAdmin hBaseAdmin = new HBaseAdmin(baseConfiguration);
            hBaseAdmin.majorCompact(baseConfiguration.get("faunus.graph.output.titan.storage.tablename"));

            boolean betweenSplit = conf.getBoolean(BETWEEN_SPLIT_KEY, BETWEEN_SPLIT_DEFUALT);
            if (betweenSplit) {
                hBaseAdmin.split(baseConfiguration.get("faunus.graph.output.titan.storage.tablename"));
            }
            hBaseAdmin.balancer();

            logger.info("HBASE Clean up complete- starting next job");

            if (job2.waitForCompletion(true)) {
                logger.info("SUCCESS 2");
            }

        }
        return 1;
    }

    private Job configureJob2(Configuration baseConfiguration, FaunusGraph faunusGraph, FileSystem fs)
            throws IOException {
        Configuration job2Config = new Configuration(baseConfiguration);
        /** Job  2 Configuration **/
        Job job2 = new Job(job2Config);
        job2.setInputFormatClass(SequenceFileInputFormat.class);
        job2.setOutputFormatClass(faunusGraph.getGraphOutputFormat());
        job2.setJobName("IdUsingBulkLoader Job2: " + faunusGraph.getInputLocation());
        job2.setJarByClass(IdUsingBulkLoaderDriver.class);
        job2.setMapperClass(IdUsingBulkLoaderMapReduce.EdgeMapper.class);
        job2.setMapOutputKeyClass(NullWritable.class);
        job2.setMapOutputValueClass(FaunusVertex.class);

        FileInputFormat.setInputPaths(job2, faunusGraph.getInputLocation());
        job2.setNumReduceTasks(0);

        String strJob2OutputPath = faunusGraph.getOutputLocation().toString();
        Path job2Path = new Path(strJob2OutputPath + "/job2");

        if (fs.isDirectory(job2Path)) {
            logger.info("Exists" + strJob2OutputPath + " --deleteing");
            fs.delete(job2Path, true);
        }

        FileOutputFormat.setOutputPath(job2, job2Path);

        return job2;
    }

    private Job configureJob1(FaunusGraph faunusGraph, Path intermediatePath, Configuration baseConfiguration,
            FileSystem fs) throws IOException {

        Configuration job1Config = new Configuration(baseConfiguration);
        /** Job 1 Configuration **/

        Job job1 = new Job(job1Config);
        job1.setJobName("IdUsingBulkLoader Job1" + faunusGraph.getInputLocation());
        job1.setJarByClass(IdUsingBulkLoaderDriver.class);
        job1.setMapperClass(IdUsingBulkLoaderMapReduce.VertexMapper.class);
        job1.setMapOutputKeyClass(LongWritable.class);
        job1.setMapOutputValueClass(Holder.class);

        job1.setNumReduceTasks(0);

        job1.setInputFormatClass(faunusGraph.getGraphInputFormat());
        job1.setOutputFormatClass(SequenceFileOutputFormat.class);

        if (fs.isDirectory(intermediatePath)) {
            logger.info("Exists" + intermediatePath + " -- deleting!");
            fs.delete(intermediatePath, true);
        }

        FileOutputFormat.setOutputPath(job1, intermediatePath);
        Path inputPath = faunusGraph.getInputLocation();
        FileInputFormat.setInputPaths(job1, inputPath);
        return job1;
    }

    public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(new IdUsingBulkLoaderDriver(), args);

        System.exit(exitCode);
    }

}