org.apache.hadoop.mapred.lib.db.DBInputFormat.java Source code

Introduction

Here is the source code for org.apache.hadoop.mapred.lib.db.DBInputFormat.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred.lib.db;

import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.List;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;

@InterfaceAudience.Public
@InterfaceStability.Stable
@SuppressWarnings("deprecation")
public class DBInputFormat<T extends DBWritable> extends org.apache.hadoop.mapreduce.lib.db.DBInputFormat<T>
        implements InputFormat<LongWritable, T>, JobConfigurable {
    /**
     * A RecordReader that reads records from a SQL table.
     * Emits LongWritables containing the record number as 
     * key and DBWritables as value.  
     */
    protected class DBRecordReader extends org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>
            implements RecordReader<LongWritable, T> {
        /**
         * The constructor is kept to be compatible with M/R 1.x
         *
         * @param split The InputSplit to read data for
         * @throws SQLException
         */
        protected DBRecordReader(DBInputSplit split, Class<T> inputClass, JobConf job) throws SQLException {
            super(split, inputClass, job, connection, dbConf, conditions, fieldNames, tableName);
        }

        /**
         * @param split The InputSplit to read data for
         * @throws SQLException 
         */
        protected DBRecordReader(DBInputSplit split, Class<T> inputClass, JobConf job, Connection conn,
                DBConfiguration dbConfig, String cond, String[] fields, String table) throws SQLException {
            super(split, inputClass, job, conn, dbConfig, cond, fields, table);
        }

        /** {@inheritDoc} */
        public LongWritable createKey() {
            return new LongWritable();
        }

        /** {@inheritDoc} */
        public T createValue() {
            return super.createValue();
        }

        public long getPos() throws IOException {
            return super.getPos();
        }

        /** {@inheritDoc} */
        public boolean next(LongWritable key, T value) throws IOException {
            return super.next(key, value);
        }
    }

    /**
     * A RecordReader implementation that just passes through to a wrapped
     * RecordReader built with the new API.
     */
    private static class DBRecordReaderWrapper<T extends DBWritable> implements RecordReader<LongWritable, T> {

        private org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T> rr;

        public DBRecordReaderWrapper(org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T> inner) {
            this.rr = inner;
        }

        public void close() throws IOException {
            rr.close();
        }

        public LongWritable createKey() {
            return new LongWritable();
        }

        public T createValue() {
            return rr.createValue();
        }

        public float getProgress() throws IOException {
            return rr.getProgress();
        }

        public long getPos() throws IOException {
            return rr.getPos();
        }

        public boolean next(LongWritable key, T value) throws IOException {
            return rr.next(key, value);
        }
    }

    /**
     * A Class that does nothing, implementing DBWritable
     */
    public static class NullDBWritable extends org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable
            implements DBWritable, Writable {
    }

    /**
     * A InputSplit that spans a set of rows
     */
    protected static class DBInputSplit extends org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit
            implements InputSplit {
        /**
         * Default Constructor
         */
        public DBInputSplit() {
        }

        /**
         * Convenience Constructor
         * @param start the index of the first row to select
         * @param end the index of the last row to select
         */
        public DBInputSplit(long start, long end) {
            super(start, end);
        }
    }

    /** {@inheritDoc} */
    public void configure(JobConf job) {
        super.setConf(job);
    }

    /** {@inheritDoc} */
    public RecordReader<LongWritable, T> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
            throws IOException {

        // wrap the DBRR in a shim class to deal with API differences.
        return new DBRecordReaderWrapper<T>(
                (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) createDBRecordReader(
                        (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job));
    }

    /** {@inheritDoc} */
    public InputSplit[] getSplits(JobConf job, int chunks) throws IOException {
        List<org.apache.hadoop.mapreduce.InputSplit> newSplits = super.getSplits(Job.getInstance(job));
        InputSplit[] ret = new InputSplit[newSplits.size()];
        int i = 0;
        for (org.apache.hadoop.mapreduce.InputSplit s : newSplits) {
            org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit split = (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) s;
            ret[i++] = new DBInputSplit(split.getStart(), split.getEnd());
        }
        return ret;
    }

    /**
     * Initializes the map-part of the job with the appropriate input settings.
     * 
     * @param job The job
     * @param inputClass the class object implementing DBWritable, which is the 
     * Java object holding tuple fields.
     * @param tableName The table to read data from
     * @param conditions The condition which to select data with, eg. '(updated &gt;
     * 20070101 AND length &gt; 0)'
     * @param orderBy the fieldNames in the orderBy clause.
     * @param fieldNames The field names in the table
     * @see #setInput(JobConf, Class, String, String)
     */
    public static void setInput(JobConf job, Class<? extends DBWritable> inputClass, String tableName,
            String conditions, String orderBy, String... fieldNames) {
        job.setInputFormat(DBInputFormat.class);

        DBConfiguration dbConf = new DBConfiguration(job);
        dbConf.setInputClass(inputClass);
        dbConf.setInputTableName(tableName);
        dbConf.setInputFieldNames(fieldNames);
        dbConf.setInputConditions(conditions);
        dbConf.setInputOrderBy(orderBy);
    }

    /**
     * Initializes the map-part of the job with the appropriate input settings.
     * 
     * @param job The job
     * @param inputClass the class object implementing DBWritable, which is the 
     * Java object holding tuple fields.
     * @param inputQuery the input query to select fields. Example : 
     * "SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
     * @param inputCountQuery the input query that returns the number of records in
     * the table. 
     * Example : "SELECT COUNT(f1) FROM Mytable"
     * @see #setInput(JobConf, Class, String, String, String, String...)
     */
    public static void setInput(JobConf job, Class<? extends DBWritable> inputClass, String inputQuery,
            String inputCountQuery) {
        job.setInputFormat(DBInputFormat.class);

        DBConfiguration dbConf = new DBConfiguration(job);
        dbConf.setInputClass(inputClass);
        dbConf.setInputQuery(inputQuery);
        dbConf.setInputCountQuery(inputCountQuery);

    }
}