com.zjy.mongo.output.BSONFileRecordWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.zjy.mongo.output.BSONFileRecordWriter.java

Source

/*
 * Copyright 2011-2013 10gen Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.zjy.mongo.output;

import com.mongodb.BasicDBObject;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.DBObject;

import com.zjy.mongo.io.BSONWritable;
import com.zjy.mongo.io.MongoUpdateWritable;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.bson.BSONEncoder;
import org.bson.BSONObject;
import org.bson.BasicBSONEncoder;

import java.io.IOException;

public class BSONFileRecordWriter<K, V> extends RecordWriter<K, V> {

    private BSONEncoder bsonEnc = new BasicBSONEncoder();
    private FSDataOutputStream outFile = null;
    private FSDataOutputStream splitsFile = null;
    private long bytesWritten = 0L;
    private long currentSplitLen = 0;
    private long currentSplitStart = 0;
    private long splitSize;

    public BSONFileRecordWriter(final FSDataOutputStream outFile, final FSDataOutputStream splitsFile,
            final long splitSize) {
        this.outFile = outFile;
        this.splitsFile = splitsFile;
        this.splitSize = splitSize;

    }

    public BSONFileRecordWriter(final FSDataOutputStream outFile) {
        this(outFile, null, 0);
    }

    public void close(final TaskAttemptContext context) throws IOException {
        if (this.outFile != null) {
            this.outFile.close();
        }
        writeSplitData(0, true);
        if (this.splitsFile != null) {
            this.splitsFile.close();
        }
    }

    public void write(final K key, final V value) throws IOException {
        final FSDataOutputStream destination = this.outFile;

        if (value instanceof MongoUpdateWritable) {
            throw new IllegalArgumentException(
                    "MongoUpdateWriteable can only be used to output to a mongo collection, "
                            + "not a static BSON file.");
        }

        Object keyBSON = null;
        BSONObject toEncode = null;
        byte[] outputByteBuf;
        if (key != null) {
            keyBSON = BSONWritable.toBSON(key);
            if (keyBSON != null) {
                toEncode = new BasicDBObject();
            }
        }

        if (value instanceof BSONWritable) {
            if (toEncode != null) {
                toEncode.putAll(((BSONWritable) value).getDoc());
            } else {
                toEncode = ((BSONWritable) value).getDoc();
            }
        } else if (value instanceof BSONObject) {
            if (toEncode != null) {
                toEncode.putAll((BSONObject) value);
            } else {
                toEncode = (BSONObject) value;
            }
        } else {
            if (toEncode != null) {
                toEncode.put("value", BSONWritable.toBSON(value));
            } else {
                final DBObject o = new BasicDBObject();
                o.put("value", BSONWritable.toBSON(value));
                toEncode = o;
            }
        }

        if (keyBSON != null) {
            toEncode.put("_id", keyBSON);
        }

        outputByteBuf = bsonEnc.encode(toEncode);
        destination.write(outputByteBuf, 0, outputByteBuf.length);
        bytesWritten += outputByteBuf.length;
        writeSplitData(outputByteBuf.length, false);
    }

    private void writeSplitData(final int docSize, final boolean force) throws IOException {
        //If no split file is being written, bail out now
        if (this.splitsFile == null) {
            return;
        }

        // hit the threshold of a split, write it to the metadata file
        if (force || currentSplitLen + docSize >= this.splitSize) {
            BSONObject splitObj = BasicDBObjectBuilder.start().add("s", currentSplitStart).add("l", currentSplitLen)
                    .get();
            byte[] encodedObj = this.bsonEnc.encode(splitObj);
            this.splitsFile.write(encodedObj, 0, encodedObj.length);

            //reset the split len and start
            this.currentSplitLen = 0;
            this.currentSplitStart = bytesWritten - docSize;
        } else {
            // Split hasn't hit threshold yet, just add size
            this.currentSplitLen += docSize;
        }
    }

}