com.liveramp.hank.hadoop.HadoopDomainBuilder.java Source code

Java tutorial

Introduction

Here is the source code for com.liveramp.hank.hadoop.HadoopDomainBuilder.java

Source

/**
 *  Copyright 2011 LiveRamp
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.liveramp.hank.hadoop;

import java.io.IOException;

import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.liveramp.hank.config.CoordinatorConfigurator;
import com.liveramp.hank.config.InvalidConfigurationException;
import com.liveramp.hank.config.yaml.YamlCoordinatorConfigurator;
import com.liveramp.hank.util.CommandLineChecker;

public class HadoopDomainBuilder extends AbstractHadoopDomainBuilder {

    private static final Logger LOG = LoggerFactory.getLogger(HadoopDomainBuilder.class);

    private final String inputPath;
    private final Class<? extends InputFormat> inputFormatClass;
    private final Class<? extends Mapper> mapperClass;

    public HadoopDomainBuilder(final String inputPath, final Class<? extends InputFormat> inputFormatClass,
            final Class<? extends Mapper> mapperClass) {
        this.inputPath = inputPath;
        this.inputFormatClass = inputFormatClass;
        this.mapperClass = mapperClass;
    }

    public HadoopDomainBuilder(JobConf conf, final String inputPath,
            final Class<? extends InputFormat> inputFormatClass, final Class<? extends Mapper> mapperClass) {
        super(conf);
        this.inputPath = inputPath;
        this.inputFormatClass = inputFormatClass;
        this.mapperClass = mapperClass;
    }

    // Use a non-default output format
    @Override
    protected void configureJob(JobConf conf) {
        // Input specification
        conf.setInputFormat(inputFormatClass);
        FileInputFormat.setInputPaths(conf, inputPath);
        // Mapper class and key/value classes
        conf.setMapperClass(mapperClass);
        conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class);
        conf.setMapOutputValueClass(ValueWritable.class);
        // Reducer class and key/value classes
        conf.setReducerClass(DomainBuilderReducer.class);
        conf.setOutputKeyClass(KeyAndPartitionWritable.class);
        conf.setOutputValueClass(ValueWritable.class);
        // Partitioner
        conf.setPartitionerClass(DomainBuilderPartitioner.class);
    }

    public static void main(String[] args) throws IOException, InvalidConfigurationException {
        CommandLineChecker.check(args,
                new String[] { "domain name", "config path", "jobjar", "input path", "output_path" },
                HadoopDomainBuilder.class);
        String domainName = args[0];
        CoordinatorConfigurator configurator = new YamlCoordinatorConfigurator(args[1]);
        String jobJar = args[2];
        String inputPath = args[3];
        String outputPath = args[4];

        DomainBuilderProperties properties = new DomainBuilderProperties(domainName, configurator)
                .setOutputPath(outputPath);
        JobConf conf = new JobConf();
        conf.setJar(jobJar);
        conf.setJobName(HadoopDomainBuilder.class.getSimpleName() + " Domain " + domainName + ", Output path: "
                + outputPath);
        HadoopDomainBuilder builder = new HadoopDomainBuilder(conf, inputPath, SequenceFileInputFormat.class,
                DomainBuilderMapperDefault.class);
        LOG.info("Building Hank domain " + domainName + " from input " + inputPath
                + " and coordinator configuration " + configurator);
        // TODO: Create DomainVersionProperties
        throw new NotImplementedException("TODO: Create DomainVersionProperties");
        // builder.buildHankDomain(properties, null);
    }
}