org.apache.sqoop.mapreduce.odps.HdfsOdpsImportJob.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sqoop.mapreduce.odps.HdfsOdpsImportJob.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.sqoop.mapreduce.odps;

import com.aliyun.odps.*;
import com.aliyun.odps.account.AliyunAccount;
import com.aliyun.odps.tunnel.TableTunnel;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.config.ConfigurationHelper;
import com.cloudera.sqoop.lib.SqoopRecord;
import com.cloudera.sqoop.mapreduce.JobBase;
import com.cloudera.sqoop.orm.TableClassName;
import com.cloudera.sqoop.util.ImportException;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.sqoop.lib.FieldMapProcessor;
import org.apache.sqoop.manager.ConnManager;
import org.apache.sqoop.manager.ImportJobContext;
import org.apache.sqoop.mapreduce.AvroExportMapper;
import org.apache.sqoop.mapreduce.AvroInputFormat;
import org.apache.sqoop.mapreduce.DelegatingOutputFormat;
import org.apache.sqoop.mapreduce.ExportInputFormat;
import org.apache.sqoop.mapreduce.ExportJobBase;
import org.apache.sqoop.mapreduce.ExportJobBase.FileType;
//import org.apache.sqoop.mapreduce.ParquetExportMapper;
import org.apache.sqoop.mapreduce.SequenceFileExportMapper;
import org.apache.sqoop.mapreduce.TextExportMapper;
import org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities;
import org.apache.sqoop.odps.OdpsConstants;
import org.apache.sqoop.odps.OdpsUploadProcessor;
import org.apache.sqoop.odps.OdpsUtil;
import org.apache.sqoop.util.PerfCounters;
import org.mortbay.log.Log;
import org.apache.sqoop.mapreduce.ParquetJob;

import com.aliyun.odps.Column;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.OdpsType;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.Tables;
import org.kitesdk.data.mapreduce.DatasetKeyInputFormat;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetNotFoundException;
import org.kitesdk.data.Datasets;
import org.kitesdk.data.Formats;

import parquet.avro.AvroSchemaConverter;
import parquet.hadoop.ParquetFileReader;
import parquet.hadoop.metadata.ParquetMetadata;
import parquet.schema.MessageType;

import java.util.ArrayList;
import java.util.Arrays;

public class HdfsOdpsImportJob extends JobBase {
    public static final String SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS = "sqoop.export.mapred.map.max.attempts";

    private static final String HADOOP_MAP_TASK_MAX_ATTEMTPS = "mapred.map.max.attempts";

    /** Number of map tasks to use for an export. */
    public static final String EXPORT_MAP_TASKS_KEY = "sqoop.mapreduce.export.map.tasks";

    /**
     * The (inferred) type of a file or group of files.
     */
    public enum FileType {
        SEQUENCE_FILE, AVRO_DATA_FILE, HCATALOG_MANAGED_FILE, PARQUET_FILE, UNKNOWN
    }

    private FileType fileType;

    protected ImportJobContext context;

    //  protected TableTunnel.UploadSession uploadSession;

    private boolean autoCreatePartition = true;
    private boolean disableDynamicPartitions = false;

    public HdfsOdpsImportJob(SqoopOptions opts, final ImportJobContext context) {
        super(opts, null, null, null);
        this.context = context;
    }

    public Job createJob(Configuration configuration) throws IOException {
        // Put the SqoopOptions into job if requested
        if (configuration.getBoolean(SERIALIZE_SQOOPOPTIONS, SERIALIZE_SQOOPOPTIONS_DEFAULT)) {
            putSqoopOptionsToConfiguration(options, configuration);
        }

        return new Job(configuration);
    }

    private boolean isDynamicPartitions(String partitionValues) {
        if (StringUtils.isEmpty(partitionValues)) {
            return false;
        }
        return partitionValues.contains("%");
    }

    public void runImport(String tableName, String ormJarFile) throws IOException {
        String tableClassName = null;
        tableClassName = new TableClassName(options).getClassForTable(tableName);
        Configuration conf = options.getConf();
        loadJars(conf, ormJarFile, tableClassName);
        SqoopOptions options = context.getOptions();

        if (!isDynamicPartitions(options.getOdpsPartitionValue())) {
            options.setOdpsDisableDynamicPartitions(true);
            Log.info("Detect odps static partition specified.");
        }
        this.disableDynamicPartitions = options.isOdpsDisableDynamicPartitions();

        Job job = createJob(conf);
        job.getConfiguration().set("mapred.jar", ormJarFile);

        if (options.getMapreduceJobName() != null) {
            job.setJobName(options.getMapreduceJobName());
        }

        propagateOptionsToJob(job);

        try {
            configureInputFormat(job, tableName, tableClassName, null);
            configureOutputFormat(job, tableName, tableClassName);
            configureMapper(job, tableName, tableClassName);
            configureNumTasks(job);
            cacheJars(job, null);
            jobSetup(job);
            setJob(job);

            boolean success = runJob(job);
            if (!success) {
                throw new ImportException("Import job failed!");
            }

            if (options.isValidationEnabled()) {
                // TODO  validateImport
            }

        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        } catch (ImportException ie) {
            throw new IOException(ie);
        } catch (InterruptedException ire) {
            throw new IOException(ire);
        } finally {
            unloadJars();
        }

    }

    @Override
    protected boolean runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException {

        PerfCounters perfCounters = new PerfCounters();
        perfCounters.startClock();

        boolean success = doSubmitJob(job);
        perfCounters.stopClock();

        Counters jobCounters = job.getCounters();
        // If the job has been retired, these may be unavailable.
        if (null == jobCounters) {
            displayRetiredJobNotice(LOG);
        } else {
            perfCounters
                    .addBytes(jobCounters.getGroup("FileSystemCounters").findCounter("HDFS_BYTES_READ").getValue());
            LOG.info("Transferred " + perfCounters.toString());
            long numRecords = ConfigurationHelper.getNumMapInputRecords(job);
            LOG.info("Exported " + numRecords + " records.");
        }

        return success;
    }

    protected boolean doSubmitJob(Job job) throws IOException, InterruptedException, ClassNotFoundException {
        return job.waitForCompletion(true);
    }

    @Override
    protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
            throws ClassNotFoundException, IOException {
        fileType = getInputFileType();

        super.configureInputFormat(job, tableName, tableClassName, splitByCol);

        if (isHCatJob) {
            SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName,
                    job.getConfiguration());
            return;
        } else if (fileType == FileType.AVRO_DATA_FILE) {
            LOG.debug("Configuring for Avro export");
            configureGenericRecordExportInputFormat(job, tableName);
        } else if (fileType == FileType.PARQUET_FILE) {
            LOG.debug("Configuring for Parquet export");
            configureGenericRecordExportInputFormat(job, tableName);
            FileSystem fs = FileSystem.get(job.getConfiguration());
            String uri = "dataset:" + fs.makeQualified(getInputPath());
            Exception caughtException = null;
            try {
                DatasetKeyInputFormat.configure(job).readFrom(uri);
            } catch (DatasetNotFoundException e) {
                LOG.warn(e.getMessage(), e);
                LOG.warn("Trying to get data schema from parquet file directly");
                caughtException = e;
            }
            if (caughtException != null && caughtException instanceof DatasetNotFoundException) {
                DatasetDescriptor descriptor = getDatasetDescriptorFromParquetFile(job, fs, uri);
                Dataset dataset = Datasets.create(uri, descriptor, GenericRecord.class);
                DatasetKeyInputFormat.configure(job).readFrom(dataset);
            }
        }

        FileInputFormat.addInputPath(job, getInputPath());
    }

    /**
     * Resolve a database-specific type to the Java type that should contain it.
     * @param sqlType     sql type
     * @return the name of a Java type to hold the sql datatype, or null if none.
     */
    public String toJavaType(OdpsType odpsType) {
        if (odpsType == OdpsType.VARCHAR) {
            return "String";
        } else if (odpsType == OdpsType.STRING) {
            return "String";
        } else if (odpsType == OdpsType.CHAR) {
            return "String";
        } else if (odpsType == OdpsType.DECIMAL) {
            return "java.math.BigDecimal";
        } else if (odpsType == OdpsType.BOOLEAN) {
            return "Boolean";
        } else if (odpsType == OdpsType.TINYINT) {
            return "Integer";
        } else if (odpsType == OdpsType.SMALLINT) {
            return "Integer";
        } else if (odpsType == OdpsType.BIGINT) {
            return "Long";
        } else if (odpsType == OdpsType.FLOAT) {
            return "Double";
        } else if (odpsType == OdpsType.DOUBLE) {
            return "Double";
        } else if (odpsType == OdpsType.DATE) {
            return "java.sql.Date";
        } else {
            // TODO(aaron): Support DISTINCT, ARRAY, STRUCT, REF, JAVA_OBJECT.
            // Return null indicating database-specific manager should return a
            // java data type if it can find one for any nonstandard type.
            return null;
        }
    }

    public static final PathFilter HIDDEN_FILES_PATH_FILTER = new PathFilter() {

        @Override
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    };

    private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
            throws IOException {

        ArrayList<FileStatus> files = new ArrayList<FileStatus>();
        FileStatus[] dirs;
        dirs = fs.globStatus(fs.makeQualified(getInputPath()));
        for (int i = 0; (dirs != null && i < dirs.length); i++) {
            files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
            // We only check one file, so exit the loop when we have at least
            // one.
            if (files.size() > 0) {
                break;
            }
        }

        ParquetMetadata parquetMetadata;
        try {
            parquetMetadata = ParquetFileReader.readFooter(job.getConfiguration(),
                    fs.makeQualified(files.get(0).getPath()));
        } catch (IOException e) {
            LOG.error("Wrong file format. Please check the export file's format.", e);
            throw e;
        }
        MessageType schema = parquetMetadata.getFileMetaData().getSchema();
        Schema avroSchema = new AvroSchemaConverter().convert(schema);
        DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
                .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
        return descriptor;
    }

    private void configureGenericRecordExportInputFormat(Job job, String tableName) throws IOException {
        if (options.getOdpsTable() != null) {
            MapWritable columnTypes = new MapWritable();
            Map<String, OdpsType> colTypeMap = getColTypeMap();
            for (Map.Entry<String, OdpsType> e : colTypeMap.entrySet()) {
                String column = e.getKey();
                if (column != null) {
                    Text columnName = new Text(column);
                    Text columnType = new Text(toJavaType(e.getValue()));
                    columnTypes.put(columnName, columnType);
                }
            }
            DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
            return;
        }
        ConnManager connManager = context.getConnManager();
        Map<String, Integer> columnTypeInts;
        if (options.getCall() == null) {
            columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery());
        } else {
            columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall());
        }
        String[] specifiedColumns = options.getColumns();
        MapWritable columnTypes = new MapWritable();
        for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
            String column = e.getKey();
            column = (specifiedColumns == null) ? column : options.getColumnNameCaseInsensitive(column);
            if (column != null) {
                Text columnName = new Text(column);
                Text columnType = new Text(connManager.toJavaType(tableName, column, e.getValue()));
                columnTypes.put(columnName, columnType);
            }
        }
        DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
    }

    @Override
    protected void configureMapper(Job job, String tableName, String tableClassName) {
        job.setOutputKeyClass(SqoopRecord.class);
        job.setOutputValueClass(NullWritable.class);
        job.setMapperClass(getMapperClass());
    }

    @Override
    protected Class<? extends Mapper> getMapperClass() {
        if (isHCatJob) {
            return SqoopHCatUtilities.getExportOdpsMapperClass();
        }
        switch (fileType) {
        case SEQUENCE_FILE:
            return SequenceFileExportMapper.class;
        case AVRO_DATA_FILE:
            return AvroExportMapper.class;
        case PARQUET_FILE:
            return ParquetExportMapper.class;
        case UNKNOWN:
        default:
            return TextExportMapper.class;
        }
    }

    @Override
    protected Class<? extends OutputFormat> getOutputFormatClass() throws ClassNotFoundException {
        return DelegatingOutputFormat.class;
    }

    @Override
    protected void configureOutputFormat(Job job, String tableName, String tableClassName)
            throws ClassNotFoundException {
        job.setOutputFormatClass(getOutputFormatClass());
        Configuration conf = job.getConfiguration();
        conf.setClass("sqoop.output.delegate.field.map.processor.class", OdpsUploadProcessor.class,
                FieldMapProcessor.class);

        conf.setStrings(OdpsConstants.INPUT_COL_NAMES, options.getColumns());

        String odpsTableName = options.getOdpsTable();
        if (odpsTableName == null) {
            odpsTableName = tableName;
        }
        conf.set(OdpsConstants.TABLE_NAME, odpsTableName);
        conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
        conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
        conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());

        String tunnelEndPoint = options.getOdpsTunnelEndPoint();
        if (tunnelEndPoint != null) {
            conf.set(OdpsConstants.TUNNEL_ENDPOINT, options.getOdpsTunnelEndPoint());
        }

        conf.set(OdpsConstants.PROJECT, options.getOdpsProject());

        String partitionKey = options.getOdpsPartitionKey();
        String partitionValue = options.getOdpsPartitionValue();
        if (partitionKey != null && partitionValue != null) {
            conf.set(OdpsConstants.PARTITION_KEY, partitionKey);
            conf.set(OdpsConstants.PARTITION_VALUE, partitionValue);
        }
        conf.setBoolean(OdpsConstants.CREATE_TABLE, options.isOdpsCreateTable());
        String dateFormat = options.getOdpsInputDateFormat();
        if (dateFormat != null) {
            conf.set(OdpsConstants.DATE_FORMAT, dateFormat);
        }
        conf.setInt(OdpsConstants.RETRY_COUNT, options.getOdpsRetryCount());
        conf.setInt(OdpsConstants.BATCH_SIZE, options.getOdpsBatchSize());
        conf.setBoolean(OdpsConstants.USE_COMPRESS_IN_UPLOAD, options.isOdpsUseCompressInUpload());

        job.getConfiguration().set(ExportJobBase.SQOOP_EXPORT_TABLE_CLASS_KEY, tableClassName);
    }

    protected void jobSetup(Job job) throws IOException, ImportException {
        boolean isCreateTable = options.isOdpsCreateTable();

        String tableName = Preconditions.checkNotNull(options.getOdpsTable(),
                "Import to ODPS error: Table name not specified");
        String accessID = Preconditions.checkNotNull(options.getOdpsAccessID(),
                "Error: ODPS access ID not specified");
        String accessKey = Preconditions.checkNotNull(options.getOdpsAccessKey(),
                "Error: ODPS access key not specified");
        String project = Preconditions.checkNotNull(options.getOdpsProject(), "Error: ODPS project not specified");
        String endpoint = Preconditions.checkNotNull(options.getOdpsEndPoint(),
                "Error: ODPS endpoint not specified");

        Odps odps = new Odps(new AliyunAccount(accessID, accessKey));
        odps.setUserAgent(OdpsUtil.getUserAgent());
        odps.setDefaultProject(project);
        odps.setEndpoint(endpoint);

        Tables tables = odps.tables();
        boolean existsTable;
        try {
            existsTable = tables.exists(tableName);
        } catch (OdpsException e) {
            throw new ImportException("ODPS exception", e);
        }

        if (!existsTable) {
            Map<String, OdpsType> colTypeMap = getColTypeMap();
            if (!isCreateTable) {
                LOG.warn("Could not find ODPS table " + tableName);
                LOG.warn("This job may fail. Either explicitly create the table,");
                LOG.warn("or re-run with --odps-create-table.");
            } else {
                TableSchema schema = new TableSchema();
                for (Map.Entry<String, OdpsType> colTypeEntry : colTypeMap.entrySet()) {
                    schema.addColumn(new Column(colTypeEntry.getKey(), colTypeEntry.getValue()));
                }
                String partitionColumns = options.getOdpsPartitionKey();
                if (StringUtils.isNotEmpty(partitionColumns)) {
                    String[] partitionCols = partitionColumns.split(",");
                    for (int i = 0; i < partitionCols.length; i++) {
                        schema.addPartitionColumn(new Column(partitionCols[i].trim(), OdpsType.STRING));
                    }
                }
                try {
                    tables.create(tableName, schema);
                } catch (OdpsException e) {
                    throw new ImportException("Create table failed", e);
                }
            }
        } else {
        }

        if (options.isOdpsDisableDynamicPartitions()) {
            Table odpsTable = odps.tables().get(tableName);
            String partitionSpec = getPartitionSpec(odpsTable, strToArray(options.getOdpsPartitionKey()),
                    strToArray(options.getOdpsPartitionValue()), Maps.newHashMap());
            if (options.isOverwriteOdpsTable()) {
                try {
                    if (!odpsTable.isPartitioned()) {
                        odpsTable.truncate();
                    } else {
                        odpsTable.deletePartition(new PartitionSpec(partitionSpec), true);
                    }
                } catch (Exception e) {
                    throw new RuntimeException(String.format("Truncate:%s failed.", tableName), e);
                }
            }

            if (autoCreatePartition) {
                try {
                    if (odpsTable.isPartitioned()) {
                        odpsTable.createPartition(new PartitionSpec(partitionSpec), true);
                    }
                } catch (OdpsException e) {
                    throw new RuntimeException("Create partition failed. ", e);
                }
            }

        }
    }

    private Map buildPartitionMap(Table odpsTable) {
        Map partMap = Maps.newHashMap();
        for (Partition partition : odpsTable.getPartitions()) {
            partMap.put(partition.getPartitionSpec().toString(), true);
            //      TableSchema schema = tables.get(tableName).getSchema();
            //      for(Map.Entry<String, OdpsType> colTypeEntry: colTypeMap.entrySet()) {
            //        Column column = schema.getColumn(colTypeEntry.getKey().toLowerCase());
            //        if (column.getType() != colTypeEntry.getValue()) {
            //          throw new ImportException("Column type of ODPS table not"
            //              + " consistent with user specification. Column name: "
            //              + colTypeEntry.getKey());
            //        }
            //      }
        }
        return partMap;
    }

    private String[] strToArray(String s) {
        if (s == null) {
            return null;
        }
        return s.split(",");
    }

    private String getPartitionSpec(Table odpsTable, String[] partKeys, String[] partValues, Map partitionMap) {
        if (partKeys == null || partValues == null) {
            return null;
        }
        if (partKeys.length != partValues.length) {
            throw new RuntimeException("Numbers of partition key and " + "partition value are not equal.");
        }
        StringBuilder sb = new StringBuilder();
        String sep = "";
        for (int i = 0; i < partKeys.length; i++) {
            String realPartVal = partValues[i];
            sb.append(sep).append(partKeys[i]).append("='").append(realPartVal).append("'");
            sep = ",";
        }
        String partitionSpec = sb.toString();
        return partitionSpec;
    }

    private Map<String, OdpsType> getColTypeMap() {
        Map colTypeMap = Maps.newLinkedHashMap();
        Properties userMapping = options.getMapColumnOdps();
        String[] columnNames = options.getColumns();

        for (Object column : userMapping.keySet()) {
            boolean found = false;
            for (String c : columnNames) {
                if (c.equals(column)) {
                    found = true;
                    break;
                }
            }
            if (!found) {
                throw new IllegalArgumentException(
                        "No column by the name " + column + "found while importing data");
            }
        }
        for (String col : columnNames) {
            String userType = userMapping.getProperty(col);
            if (userType != null) {
                colTypeMap.put(col, OdpsType.valueOf(userType.toUpperCase()));
            } else {
                LOG.warn("Column " + col + " type not specified, defaulting to STRING.");
                colTypeMap.put(col, OdpsType.STRING);
            }
        }
        return colTypeMap;
    }

    @Override
    protected Class<? extends InputFormat> getInputFormatClass() throws ClassNotFoundException {
        if (isHCatJob) {
            return SqoopHCatUtilities.getInputFormatClass();
        }
        switch (fileType) {
        case AVRO_DATA_FILE:
            return AvroInputFormat.class;
        case PARQUET_FILE:
            return DatasetKeyInputFormat.class;
        default:
            Class<? extends InputFormat> configuredIF = super.getInputFormatClass();
            if (null == configuredIF) {
                return ExportInputFormat.class;
            } else {
                return configuredIF;
            }
        }
    }

    protected Path getInputPath() throws IOException {
        Path inputPath = new Path(context.getOptions().getExportDir());
        Configuration conf = options.getConf();
        inputPath = inputPath.makeQualified(FileSystem.get(conf));
        return inputPath;
    }

    @Override
    protected int configureNumMapTasks(Job job) throws IOException {
        int numMaps = super.configureNumMapTasks(job);
        job.getConfiguration().setInt(EXPORT_MAP_TASKS_KEY, numMaps);
        return numMaps;
    }

    @Override
    protected void propagateOptionsToJob(Job job) {
        super.propagateOptionsToJob(job);
        Configuration conf = job.getConfiguration();

        // This is export job where re-trying failed mapper mostly don't make sense. By
        // default we will force MR to run only one attempt per mapper. User or connector
        // developer can override this behavior by setting SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS:
        //
        // * Positive number - we will allow specified number of attempts
        // * Negative number - we will default to Hadoop's default number of attempts
        //
        // This is important for most connectors as they are directly committing data to
        // final table and hence re-running one mapper will lead to a misleading errors
        // of inserting duplicate rows.
        int sqoopMaxAttempts = conf.getInt(SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS, 1);
        System.out.println("sqoopMaxAttempts:" + sqoopMaxAttempts);
        if (sqoopMaxAttempts > 1) {
            conf.setInt(HADOOP_MAP_TASK_MAX_ATTEMTPS, sqoopMaxAttempts);
            conf.setInt("mapreduce.map.maxattempts", sqoopMaxAttempts);
        }

        conf.setBoolean("mapred.map.tasks.speculative.execution", false);
        conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
        conf.setBoolean(OdpsConstants.ODPS_DISABLE_DYNAMIC_PARTITIONS, this.disableDynamicPartitions);
        //    conf.set(OdpsConstants.ODPS_TUNNEL_UPLOAD_SESSION_ID, uploadSession.getId());
    }

    /**
     * @return true if p is a SequenceFile, or a directory containing
     * SequenceFiles.
     */
    public static boolean isSequenceFiles(Configuration conf, Path p) throws IOException {
        return getFileType(conf, p) == FileType.SEQUENCE_FILE;
    }

    /**
     * @return the type of the file represented by p (or the files in p, if a
     * directory)
     */
    public static FileType getFileType(Configuration conf, Path p) throws IOException {
        FileSystem fs = p.getFileSystem(conf);

        try {
            FileStatus stat = fs.getFileStatus(p);

            if (null == stat) {
                // Couldn't get the item.
                LOG.warn("Input path " + p + " does not exist");
                return FileType.UNKNOWN;
            }

            if (stat.isDir()) {
                FileStatus[] subitems = fs.listStatus(p);
                if (subitems == null || subitems.length == 0) {
                    LOG.warn("Input path " + p + " contains no files");
                    return FileType.UNKNOWN; // empty dir.
                }

                // Pick a child entry to examine instead.
                boolean foundChild = false;
                for (int i = 0; i < subitems.length; i++) {
                    stat = subitems[i];
                    if (!stat.isDir() && !stat.getPath().getName().startsWith("_")) {
                        foundChild = true;
                        break; // This item is a visible file. Check it.
                    }
                }

                if (!foundChild) {
                    stat = null; // Couldn't find a reasonable candidate.
                }
            }

            if (null == stat) {
                LOG.warn("null FileStatus object in isSequenceFiles(); " + "assuming false.");
                return FileType.UNKNOWN;
            }

            Path target = stat.getPath();
            return fromMagicNumber(target, conf);
        } catch (FileNotFoundException fnfe) {
            LOG.warn("Input path " + p + " does not exist");
            return FileType.UNKNOWN; // doesn't exist!
        }
    }

    /**
     * @param file a file to test.
     * @return true if 'file' refers to a SequenceFile.
     */
    private static FileType fromMagicNumber(Path file, Configuration conf) {
        // Test target's header to see if it contains magic numbers indicating its
        // file type
        byte[] header = new byte[3];
        FSDataInputStream is = null;
        try {
            FileSystem fs = file.getFileSystem(conf);
            is = fs.open(file);
            is.readFully(header);
        } catch (IOException ioe) {
            // Error reading header or EOF; assume unknown
            LOG.warn("IOException checking input file header: " + ioe);
            return FileType.UNKNOWN;
        } finally {
            try {
                if (null != is) {
                    is.close();
                }
            } catch (IOException ioe) {
                // ignore; closing.
                LOG.warn("IOException closing input stream: " + ioe + "; ignoring.");
            }
        }

        if (header[0] == 'S' && header[1] == 'E' && header[2] == 'Q') {
            return FileType.SEQUENCE_FILE;
        }
        if (header[0] == 'O' && header[1] == 'b' && header[2] == 'j') {
            return FileType.AVRO_DATA_FILE;
        }
        if (header[0] == 'P' && header[1] == 'A' && header[2] == 'R') {
            return FileType.PARQUET_FILE;
        }
        return FileType.UNKNOWN;
    }

    protected FileType getInputFileType() {
        if (isHCatJob) {
            return FileType.HCATALOG_MANAGED_FILE;
        }
        try {
            return getFileType(context.getOptions().getConf(), getInputPath());
        } catch (IOException ioe) {
            return FileType.UNKNOWN;
        }
    }
}