gaffer.operation.simple.hdfs.handler.job.factory.AbstractAddElementsFromHdfsJobFactory.java Source code

Java tutorial

Introduction

Here is the source code for gaffer.operation.simple.hdfs.handler.job.factory.AbstractAddElementsFromHdfsJobFactory.java

Source

/*
 * Copyright 2016 Crown Copyright
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package gaffer.operation.simple.hdfs.handler.job.factory;

import gaffer.commonutil.CommonConstants;
import gaffer.operation.simple.hdfs.operation.AddElementsFromHdfs;
import gaffer.store.Store;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

public abstract class AbstractAddElementsFromHdfsJobFactory implements AddElementsFromHdfsJobFactory {

    private static final Logger LOGGER = LoggerFactory.getLogger(AbstractAddElementsFromHdfsJobFactory.class);

    /**
     * Creates a job with the store specific job initialisation and then applies the operation specific
     * {@link gaffer.operation.simple.hdfs.handler.job.initialiser.JobInitialiser}.
     *
     * @param operation the add elements from hdfs operation
     * @param store     the store executing the operation
     * @return the created job
     * @throws IOException for IO issues
     */
    public Job createJob(final AddElementsFromHdfs operation, final Store store) throws IOException {
        final JobConf jobConf = createJobConf(operation, store);
        setupJobConf(jobConf, operation, store);

        final Job job = Job.getInstance(jobConf);
        setupJob(job, operation, store);

        // Apply Operation Specific Job Configuration
        if (null != operation.getJobInitialiser()) {
            operation.getJobInitialiser().initialiseJob(job, operation, store);
        }

        return job;
    }

    protected JobConf createJobConf(final AddElementsFromHdfs operation, final Store store) throws IOException {
        return new JobConf(new Configuration());
    }

    protected void setupJobConf(final JobConf jobConf, final AddElementsFromHdfs operation, final Store store)
            throws IOException {
        LOGGER.info("Setting up job conf");
        jobConf.set(SCHEMA, new String(store.getSchema().toJson(false), CommonConstants.UTF_8));
        LOGGER.info("Added {} {} to job conf", SCHEMA,
                new String(store.getSchema().toJson(false), CommonConstants.UTF_8));
        jobConf.set(MAPPER_GENERATOR, operation.getMapperGeneratorClassName());
        LOGGER.info("Added {} of {} to job conf", MAPPER_GENERATOR, operation.getMapperGeneratorClassName());
        jobConf.set(VALIDATE, String.valueOf(operation.isValidate()));
        LOGGER.info("Added {} option of {} to job conf", VALIDATE, operation.isValidate());
        Integer numTasks = operation.getNumMapTasks();
        if (null != numTasks) {
            jobConf.setNumMapTasks(numTasks);
            LOGGER.info("Set number of map tasks to {} on job conf", numTasks);
        }
        numTasks = operation.getNumReduceTasks();
        if (null != numTasks) {
            jobConf.setNumReduceTasks(numTasks);
            LOGGER.info("Set number of reduce tasks to {} on job conf", numTasks);
        }
    }

    protected void setupJob(final Job job, final AddElementsFromHdfs operation, final Store store)
            throws IOException {
        job.setJarByClass(getClass());
        job.setJobName(getJobName(operation.getMapperGeneratorClassName(), operation.getOutputPath()));
    }

    protected String getJobName(final String mapperGenerator, final String outputPath) {
        return "Ingest HDFS data: Generator=" + mapperGenerator + ", output=" + outputPath;
    }

}