org.trend.hgraph.mapreduce.pagerank.ImportPageRanks.java Source code

Java tutorial

Introduction

Here is the source code for org.trend.hgraph.mapreduce.pagerank.ImportPageRanks.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.trend.hgraph.mapreduce.pagerank;

import java.io.IOException;

import org.apache.commons.lang.Validate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.trend.hgraph.HBaseGraphConstants;

/**
 * Import pageRanks from <code>SeuquenceFile</code> into HBase. 
 * By key => rowKey, value => pageRank.
 * @author scott_miao
 */
public class ImportPageRanks extends Configured implements Tool {

    private static Logger LOGGER = LoggerFactory.getLogger(ImportPageRanks.class);

    private static class ImportPageRanksMapper extends Mapper<Text, DoubleWritable, Text, DoubleWritable> {

        private HTable vertexTable;

        /*
         * (non-Javadoc)
         * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object,
         * org.apache.hadoop.mapreduce.Mapper.Context)
         */
        @Override
        protected void map(Text key, DoubleWritable value, Context context)
                throws IOException, InterruptedException {
            Put put = null;
            String rowKey = Bytes.toString(key.getBytes()).trim();
            try {
                put = new Put(Bytes.toBytes(rowKey));
                // set rank value
                put.add(Bytes.toBytes(HBaseGraphConstants.HBASE_GRAPH_TABLE_COLFAM_PROPERTY_NAME),
                        Bytes.toBytes(Constants.PAGE_RANK_CQ_NAME
                                + HBaseGraphConstants.HBASE_GRAPH_TABLE_COLFAM_PROPERTY_NAME_DELIMITER + "String"),
                        Bytes.toBytes("" + value.get()));
                // set update flag to 1 (true)
                put.add(Bytes.toBytes(HBaseGraphConstants.HBASE_GRAPH_TABLE_COLFAM_PROPERTY_NAME),
                        Bytes.toBytes(Constants.PAGE_RANK_CQ_UPDATED_NAME
                                + HBaseGraphConstants.HBASE_GRAPH_TABLE_COLFAM_PROPERTY_NAME_DELIMITER + "String"),
                        Bytes.toBytes("1"));
                vertexTable.put(put);
            } catch (IOException e) {
                System.err.println("import pageRank failed !!");
                e.printStackTrace(System.err);
                throw e;
            }
        }

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            vertexTable.close();
        }

        /*
         * (non-Javadoc)
         * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
         */
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            String vertexTableName = context.getConfiguration()
                    .get(HBaseGraphConstants.HBASE_GRAPH_TABLE_VERTEX_NAME_KEY);
            Validate.notEmpty(vertexTableName, "vertexTableName shall always not be empty");
            vertexTable = new HTable(context.getConfiguration(), vertexTableName);
        }

    }

    /**
     * Default constructor.
     */
    public ImportPageRanks() {
        super();
    }

    /**
     * Constructor for test.
     * @param conf
     */
    protected ImportPageRanks(Configuration conf) {
        super(conf);
    }

    /* (non-Javadoc)
     * @see org.apache.hadoop.util.Tool#run(java.lang.String[])
     */
    @Override
    public int run(String[] args) throws Exception {
        if (null == args || args.length != 2) {
            System.err.println("shall pass only 2 options");
            printUsage();
            return 1;
        }

        String inputPath = args[0];
        String vertexTableName = args[1];
        LOGGER.info("pass two options:" + inputPath + ", " + vertexTableName);

        Configuration conf = getConf();
        conf.set(HBaseGraphConstants.HBASE_GRAPH_TABLE_VERTEX_NAME_KEY, vertexTableName);

        Job job = createSubmittableJob(conf, inputPath);
        String jobName = job.getJobName();
        LOGGER.info("start to run job:" + jobName);
        boolean succeed = job.waitForCompletion(true);
        if (!succeed)
            return 1;
        LOGGER.info("run job:" + jobName + " finished");
        return 0;
    }

    public static Job createSubmittableJob(Configuration conf, String inputPath) throws IOException {
        String vertexTableName = conf.get(HBaseGraphConstants.HBASE_GRAPH_TABLE_VERTEX_NAME_KEY);
        Validate.notEmpty(vertexTableName, "vertexTableName shall always not be empty");
        Validate.notEmpty(inputPath, "inputPath shall always not be empty");

        long timestamp = System.currentTimeMillis();
        Job job = null;
        String jobName = null;
        try {
            jobName = "ImportPageRanks_" + timestamp;
            LOGGER.info("start to run job:" + jobName);
            job = new Job(conf, jobName);
            job.setJarByClass(ImportPageRanks.class);

            LOGGER.info("inputPath=" + inputPath);
            LOGGER.info("vertexTableName=" + vertexTableName);

            FileInputFormat.setInputPaths(job, new Path(inputPath));
            job.setMapperClass(ImportPageRanksMapper.class);
            job.setInputFormatClass(SequenceFileInputFormat.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(DoubleWritable.class);

            // only mapper
            job.setOutputFormatClass(NullOutputFormat.class);
            job.setNumReduceTasks(0);

            Utils.setAuthenticationToken(job, LOGGER);
        } catch (IOException e) {
            LOGGER.error("run " + jobName + " failed", e);
            throw e;
        }
        return job;
    }

    private static void printUsage() {
        System.err.println(ImportPageRanks.class.getName() + " Usage: <input-path> <hbase-vertex-table>");
        System.err.println("Import the pageRank intermediate data into <hbase-vertex-table>");
    }

    /**
     * entry point.
     * @throws Exception
     * @see #printUsage()
     */
    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        int retCode = ToolRunner.run(conf, new ImportPageRanks(), args);
        System.exit(retCode);
    }

}