com.mongodb.hadoop.input.MongoInputSplit.java Source code

Java tutorial

Introduction

Here is the source code for com.mongodb.hadoop.input.MongoInputSplit.java

Source

// MongoInputSplit.java
/*
 * Copyright 2010 10gen Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.mongodb.hadoop.input;

import com.mongodb.*;
import com.mongodb.hadoop.util.*;
import org.apache.commons.logging.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.bson.*;

import java.io.*;
import java.util.*;

public class MongoInputSplit extends InputSplit implements Writable, org.apache.hadoop.mapred.InputSplit {

    public MongoInputSplit(MongoURI inputURI, String keyField, DBObject query, DBObject fields, DBObject sort,
            Object specialMin, Object specialMax, int limit, int skip, boolean noTimeout) {
        log.debug("Creating a new MongoInputSplit for MongoURI '" + inputURI + "', keyField: " + keyField
                + ", query: '" + query + "', fieldSpec: '" + fields + "', sort: '" + sort + "', limit: " + limit
                + ", skip: " + skip + " noTimeout? " + noTimeout + ".");

        _mongoURI = inputURI;
        _keyField = keyField;
        _querySpec = query;
        _fieldSpec = fields;
        _sortSpec = sort;
        _limit = limit;
        _skip = skip;
        _notimeout = noTimeout;
        _specialMin = specialMin;
        _specialMax = specialMax;
        getCursor();
        getBSONDecoder();
        getBSONEncoder();
    }

    /**
     * This is supposed to return the size of the split in bytes, but for now, for sanity sake we return the # of docs
     * in the split instead.
     *
     * @return
     */
    @Override
    public long getLength() {
        return Long.MAX_VALUE;
    }

    @Override
    public String[] getLocations() {
        return _mongoURI.getHosts().toArray(new String[_mongoURI.getHosts().size()]);
    }

    /**
     * Serialize the Split instance
     */
    public void write(final DataOutput out) throws IOException {
        BSONEncoder enc = getBSONEncoder();

        BSONObject spec = BasicDBObjectBuilder.start().add("uri", _mongoURI.toString()).add("key", _keyField)
                .add("query", _querySpec).add("field", _fieldSpec).add("sort", _sortSpec).add("limit", _limit)
                .add("skip", _skip).add("specialMin", _specialMin).add("specialMax", _specialMax)
                .add("notimeout", _notimeout).get();

        byte[] buf = enc.encode(spec);

        out.write(buf);
    }

    public void readFields(DataInput in) throws IOException {
        BSONDecoder dec = getBSONDecoder();
        BSONCallback cb = new BasicBSONCallback();
        BSONObject spec;
        // Read the BSON length from the start of the record
        byte[] l = new byte[4];
        try {
            in.readFully(l);
            int dataLen = org.bson.io.Bits.readInt(l);
            if (log.isDebugEnabled())
                log.debug("*** Expected DataLen: " + dataLen);
            byte[] data = new byte[dataLen + 4];
            System.arraycopy(l, 0, data, 0, 4);
            in.readFully(data, 4, dataLen - 4);
            dec.decode(data, cb);
            spec = (BSONObject) cb.get();
            if (log.isTraceEnabled())
                log.trace("Decoded a BSON Object: " + spec);
        } catch (Exception e) {
            /* If we can't read another length it's not an error, just return quietly. */
            // TODO - Figure out how to gracefully mark this as an empty
            log.info("No Length Header available." + e);
            spec = new BasicDBObject();
        }

        _mongoURI = new MongoURI((String) spec.get("uri"));
        _keyField = (String) spec.get("key");
        _querySpec = new BasicDBObject(((BSONObject) spec.get("query")).toMap());
        _fieldSpec = new BasicDBObject(((BSONObject) spec.get("field")).toMap());
        _sortSpec = new BasicDBObject(((BSONObject) spec.get("sort")).toMap());
        _specialMin = spec.get("specialMin");
        _specialMax = spec.get("specialMax");
        _limit = (Integer) spec.get("limit");
        _skip = (Integer) spec.get("skip");
        _notimeout = (Boolean) spec.get("notimeout");
        getCursor();
        log.info("Deserialized MongoInputSplit ... { length = " + getLength() + ", locations = "
                + Arrays.toString(getLocations()) + ", keyField = " + _keyField + ", query = " + _querySpec
                + ", fields = " + _fieldSpec + ", sort = " + _sortSpec + ", limit = " + _limit + ", skip = " + _skip
                + ", noTimeout = " + _notimeout + ", specialMin = " + _specialMin + ", specialMax = " + _specialMax
                + "}");
    }

    public DBCursor getCursor() {
        // Return the cursor with the split's query, etc. already slotted in for
        // them.
        // todo - support limit/skip
        if (_cursor == null) {
            log.debug("reading data from " + _mongoURI);
            _cursor = MongoConfigUtil.getCollection(_mongoURI).find(_querySpec, _fieldSpec).sort(_sortSpec);
            if (_notimeout)
                _cursor.setOptions(Bytes.QUERYOPTION_NOTIMEOUT);
            if (_specialMin != null)
                _cursor.addSpecial("$min", this._specialMin);
            if (_specialMax != null)
                _cursor.addSpecial("$max", this._specialMax);
            _cursor.slaveOk();
        }

        return _cursor;
    }

    BSONEncoder getBSONEncoder() {
        if (_bsonEncoder == null)
            _bsonEncoder = new BasicBSONEncoder();
        return _bsonEncoder;
    }

    BSONDecoder getBSONDecoder() {
        if (_bsonDecoder == null)
            _bsonDecoder = new BasicBSONDecoder();
        return _bsonDecoder;
    }

    @Override
    public String toString() {
        return "MongoInputSplit{URI=" + _mongoURI.toString() + ", keyField=" + _keyField + ", min=" + _specialMin
                + ", max=" + _specialMax + ", query=" + _querySpec + ", sort=" + _sortSpec + ", fields="
                + _fieldSpec + ", limit=" + _limit + ", skip=" + _skip + ", notimeout=" + _notimeout + '}';
    }

    public MongoInputSplit() {
    }

    public MongoURI getMongoURI() {
        return _mongoURI;
    }

    public DBObject getQuerySpec() {
        return _querySpec;
    }

    public DBObject getFieldSpec() {
        return _fieldSpec;
    }

    public DBObject getSortSpec() {
        return _sortSpec;
    }

    public int getLimit() {
        return _limit;
    }

    public int getSkip() {
        return _skip;
    }

    /**
     * The field to use as the Mapper Key
     */
    public String getKeyField() {
        return _keyField;
    }

    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (o == null || getClass() != o.getClass())
            return false;

        MongoInputSplit that = (MongoInputSplit) o;

        if (_limit != that._limit)
            return false;
        if (_notimeout != that._notimeout)
            return false;
        if (_skip != that._skip)
            return false;
        if (_fieldSpec != null ? !_fieldSpec.equals(that._fieldSpec) : that._fieldSpec != null)
            return false;
        if (_keyField != null ? !_keyField.equals(that._keyField) : that._keyField != null)
            return false;
        if (_mongoURI != null ? !_mongoURI.toString().equals(that._mongoURI.toString()) : that._mongoURI != null)
            return false;
        if (_querySpec != null ? !_querySpec.equals(that._querySpec) : that._querySpec != null)
            return false;
        if (_sortSpec != null ? !_sortSpec.equals(that._sortSpec) : that._sortSpec != null)
            return false;

        return true;
    }

    public int hashCode() {
        int result = _mongoURI != null ? _mongoURI.hashCode() : 0;
        result = 31 * result + (_keyField != null ? _keyField.hashCode() : 0);
        result = 31 * result + (_querySpec != null ? _querySpec.hashCode() : 0);
        result = 31 * result + (_fieldSpec != null ? _fieldSpec.hashCode() : 0);
        result = 31 * result + (_sortSpec != null ? _sortSpec.hashCode() : 0);
        result = 31 * result + (_notimeout ? 1 : 0);
        result = 31 * result + _limit;
        result = 31 * result + _skip;
        return result;
    }

    private MongoURI _mongoURI;
    private String _keyField = "_id";
    private Object _specialMin = null;
    private Object _specialMax = null;
    private DBObject _querySpec;
    private DBObject _fieldSpec;
    private DBObject _sortSpec;
    private boolean _notimeout;
    private int _limit = 0;
    private int _skip = 0;
    private long _length = -1;
    private transient DBCursor _cursor;
    private transient BSONEncoder _bsonEncoder;
    private transient BSONDecoder _bsonDecoder;

    private static final Log log = LogFactory.getLog(MongoInputSplit.class);

}