org.apache.sqoop.mapreduce.PGBulkloadExportJob.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sqoop.mapreduce.PGBulkloadExportJob.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.sqoop.mapreduce;

import java.io.IOException;
import com.cloudera.sqoop.manager.ExportJobContext;
import com.cloudera.sqoop.util.ExportException;
import com.cloudera.sqoop.SqoopOptions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.sqoop.config.ConfigurationHelper;
import org.apache.sqoop.lib.DelimiterSet;
import org.apache.sqoop.manager.ConnManager;
import org.apache.sqoop.mapreduce.db.DBConfiguration;
import org.apache.sqoop.orm.TableClassName;

/**
 * Class that runs an export job using pg_bulkload in the mapper.
 */
public class PGBulkloadExportJob extends ExportJobBase {

    public static final Log LOG = LogFactory.getLog(PGBulkloadExportJob.class.getName());

    public PGBulkloadExportJob(final ExportJobContext context) {
        super(context);
    }

    public PGBulkloadExportJob(final ExportJobContext ctxt, final Class<? extends Mapper> mapperClass,
            final Class<? extends InputFormat> inputFormatClass,
            final Class<? extends OutputFormat> outputFormatClass) {
        super(ctxt, mapperClass, inputFormatClass, outputFormatClass);
    }

    @Override
    protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
            throws ClassNotFoundException, IOException {
        super.configureInputFormat(job, tableName, tableClassName, splitByCol);
        ConnManager mgr = context.getConnManager();
        String username = options.getUsername();
        if (null == username || username.length() == 0) {
            DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(),
                    options.getFetchSize(), options.getConnectionParams());
        } else {
            DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(),
                    username, options.getPassword(), options.getFetchSize(), options.getConnectionParams());
        }
    }

    @Override
    protected Class<? extends Mapper> getMapperClass() {
        return PGBulkloadExportMapper.class;
    }

    protected Class<? extends Reducer> getReducerClass() {
        return PGBulkloadExportReducer.class;
    }

    private void setDelimiter(String prop, char val, Configuration conf) {
        switch (val) {
        case DelimiterSet.NULL_CHAR:
            break;
        case '\t':
        default:
            conf.set(prop, String.valueOf(val));
        }
    }

    @Override
    protected void propagateOptionsToJob(Job job) {
        super.propagateOptionsToJob(job);
        SqoopOptions opts = context.getOptions();
        Configuration conf = job.getConfiguration();
        conf.setIfUnset("pgbulkload.bin", "pg_bulkload");
        if (opts.getNullStringValue() != null) {
            conf.set("pgbulkload.null.string", opts.getNullStringValue());
        }
        setDelimiter("pgbulkload.input.field.delim", opts.getInputFieldDelim(), conf);
        setDelimiter("pgbulkload.input.record.delim", opts.getInputRecordDelim(), conf);
        setDelimiter("pgbulkload.input.enclosedby", opts.getInputEnclosedBy(), conf);
        setDelimiter("pgbulkload.input.escapedby", opts.getInputEscapedBy(), conf);
        conf.setBoolean("pgbulkload.input.encloserequired", opts.isInputEncloseRequired());
        conf.setIfUnset("pgbulkload.check.constraints", "YES");
        conf.setIfUnset("pgbulkload.parse.errors", "INFINITE");
        conf.setIfUnset("pgbulkload.duplicate.errors", "INFINITE");
        conf.set("mapred.jar", context.getJarFile());
        conf.setBoolean("mapred.map.tasks.speculative.execution", false);
        conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
        conf.setInt("mapred.map.max.attempts", 1);
        conf.setInt("mapred.reduce.max.attempts", 1);
        conf.setIfUnset("mapred.reduce.tasks", "1");
        if (context.getOptions().doClearStagingTable()) {
            conf.setBoolean("pgbulkload.clear.staging.table", true);
        }
    }

    @Override
    public void runExport() throws ExportException, IOException {
        ConnManager cmgr = context.getConnManager();
        SqoopOptions options = context.getOptions();
        Configuration conf = options.getConf();
        DBConfiguration dbConf = null;
        String outputTableName = context.getTableName();
        String tableName = outputTableName;
        String tableClassName = new TableClassName(options).getClassForTable(outputTableName);

        LOG.info("Beginning export of " + outputTableName);
        loadJars(conf, context.getJarFile(), tableClassName);

        try {
            Job job = new Job(conf);
            dbConf = new DBConfiguration(job.getConfiguration());
            dbConf.setOutputTableName(tableName);
            configureInputFormat(job, tableName, tableClassName, null);
            configureOutputFormat(job, tableName, tableClassName);
            configureNumTasks(job);
            propagateOptionsToJob(job);
            job.setMapperClass(getMapperClass());
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(Text.class);
            job.setReducerClass(getReducerClass());
            cacheJars(job, context.getConnManager());
            setJob(job);

            boolean success = runJob(job);
            if (!success) {
                throw new ExportException("Export job failed!");
            }
        } catch (InterruptedException ie) {
            throw new IOException(ie);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        } finally {
            unloadJars();
        }
    }

    @Override
    protected int configureNumTasks(Job job) throws IOException {
        SqoopOptions options = context.getOptions();
        int numMapTasks = options.getNumMappers();
        if (numMapTasks < 1) {
            numMapTasks = SqoopOptions.DEFAULT_NUM_MAPPERS;
            LOG.warn("Invalid mapper count; using " + numMapTasks + " mappers.");
        }

        ConfigurationHelper.setJobNumMaps(job, numMapTasks);
        return numMapTasks;
    }

    private void clearStagingTable(DBConfiguration dbConf, String tableName) throws IOException {
        // clearing stagingtable is done each mapper tasks
    }
}