org.apache.sqoop.job.mr.SqoopInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.sqoop.job.mr.SqoopInputFormat.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.sqoop.job.mr;

import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.log4j.Logger;
import org.apache.sqoop.common.Direction;
import org.apache.sqoop.common.SqoopException;
import org.apache.sqoop.job.MRJobConstants;
import org.apache.sqoop.job.MRExecutionError;
import org.apache.sqoop.common.PrefixContext;
import org.apache.sqoop.job.etl.Partition;
import org.apache.sqoop.job.etl.Partitioner;
import org.apache.sqoop.job.etl.PartitionerContext;
import org.apache.sqoop.schema.Schema;
import org.apache.sqoop.utils.ClassUtils;

/**
 * An InputFormat for MapReduce job.
 */
public class SqoopInputFormat extends InputFormat<SqoopSplit, NullWritable> {

    public static final Logger LOG = Logger.getLogger(SqoopInputFormat.class);

    @Override
    public RecordReader<SqoopSplit, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) {
        return new SqoopRecordReader();
    }

    @Override
    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();

        String partitionerName = conf.get(MRJobConstants.JOB_ETL_PARTITIONER);
        Partitioner partitioner = (Partitioner) ClassUtils.instantiate(partitionerName);

        PrefixContext connectorContext = new PrefixContext(conf, MRJobConstants.PREFIX_CONNECTOR_FROM_CONTEXT);
        Object connectorConnection = MRConfigurationUtils.getConnectorLinkConfig(Direction.FROM, conf);
        Object connectorJob = MRConfigurationUtils.getConnectorJobConfig(Direction.FROM, conf);
        Schema schema = MRConfigurationUtils.getConnectorSchema(Direction.FROM, conf);

        long maxPartitions = conf.getLong(MRJobConstants.JOB_ETL_EXTRACTOR_NUM, 10);
        PartitionerContext partitionerContext = new PartitionerContext(connectorContext, maxPartitions, schema);

        List<Partition> partitions = partitioner.getPartitions(partitionerContext, connectorConnection,
                connectorJob);
        List<InputSplit> splits = new LinkedList<InputSplit>();
        for (Partition partition : partitions) {
            LOG.debug("Partition: " + partition);
            SqoopSplit split = new SqoopSplit();
            split.setPartition(partition);
            splits.add(split);
        }

        if (splits.size() > maxPartitions) {
            throw new SqoopException(MRExecutionError.MAPRED_EXEC_0025,
                    String.format("Got %d, max was %d", splits.size(), maxPartitions));
        }

        return splits;
    }

    public static class SqoopRecordReader extends RecordReader<SqoopSplit, NullWritable> {

        private boolean delivered = false;
        private SqoopSplit split = null;

        @Override
        public boolean nextKeyValue() {
            if (delivered) {
                return false;
            } else {
                delivered = true;
                return true;
            }
        }

        @Override
        public SqoopSplit getCurrentKey() {
            return split;
        }

        @Override
        public NullWritable getCurrentValue() {
            return NullWritable.get();
        }

        @Override
        public void close() {
        }

        @Override
        public float getProgress() {
            return delivered ? 1.0f : 0.0f;
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) {
            this.split = (SqoopSplit) split;
        }
    }

}