com.zjy.mongo.output.MongoRecordWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.zjy.mongo.output.MongoRecordWriter.java

Source

/*
 * Copyright 2011-2013 10gen Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.zjy.mongo.output;

import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;

import com.zjy.mongo.MongoOutput;
import com.zjy.mongo.io.BSONWritable;
import com.zjy.mongo.io.MongoUpdateWritable;
import com.zjy.mongo.io.MongoWritableTypes;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.bson.BSONObject;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class MongoRecordWriter<K, V> extends RecordWriter<K, V> {

    private static final Log LOG = LogFactory.getLog(MongoRecordWriter.class);
    private final List<DBCollection> collections;
    private final TaskAttemptContext context;
    private final BSONWritable bsonWritable;
    private FSDataOutputStream outputStream;

    /**
     * Create a MongoRecordWriter targeting a single DBCollection.
     * @param c a DBCollection
     * @param ctx the TaskAttemptContext
     */
    public MongoRecordWriter(final DBCollection c, final TaskAttemptContext ctx) {
        this(Arrays.asList(c), ctx);
    }

    /**
     * Create a MongoRecordWriter that targets multiple DBCollections.
     * @param c a list of DBCollections
     * @param ctx the TaskAttemptContext
     */
    public MongoRecordWriter(final List<DBCollection> c, final TaskAttemptContext ctx) {
        collections = new ArrayList<DBCollection>(c);
        context = ctx;
        bsonWritable = new BSONWritable();

        // Initialize output stream.
        try {
            FileSystem fs = FileSystem.get(ctx.getConfiguration());
            Path outputPath = MongoOutputCommitter.getTaskAttemptPath(ctx);
            LOG.info("Writing to temporary file: " + outputPath.toString());
            outputStream = fs.create(outputPath, true);
        } catch (IOException e) {
            LOG.error("Could not open temporary file for buffering Mongo output", e);
        }
    }

    @Override
    public void close(final TaskAttemptContext context) {
        if (outputStream != null) {
            try {
                outputStream.close();
            } catch (IOException e) {
                LOG.error("Could not close output stream", e);
            }
        }
    }

    @Override
    public void write(final K key, final V value) throws IOException {
        if (value instanceof MongoUpdateWritable) {
            outputStream.writeInt(MongoWritableTypes.MONGO_UPDATE_WRITABLE);
            ((MongoUpdateWritable) value).write(outputStream);
        } else {
            DBObject o = new BasicDBObject();
            if (key instanceof BSONWritable) {
                o.put("_id", ((BSONWritable) key).getDoc());
            } else if (key instanceof BSONObject) {
                o.put("_id", key);
            } else {
                o.put("_id", BSONWritable.toBSON(key));
            }

            if (value instanceof BSONWritable) {
                o.putAll(((BSONWritable) value).getDoc());
            } else if (value instanceof MongoOutput) {
                ((MongoOutput) value).appendAsValue(o);
            } else if (value instanceof BSONObject) {
                o.putAll((BSONObject) value);
            } else {
                o.put("value", BSONWritable.toBSON(value));
            }
            outputStream.writeInt(MongoWritableTypes.BSON_WRITABLE);
            bsonWritable.setDoc(o);
            bsonWritable.write(outputStream);
        }
    }

    /**
     * Add an index to be ensured before the Job starts running.
     * @param index a DBObject describing the keys of the index.
     * @param options a DBObject describing the options to apply when creating
     *                the index.
     */
    public void ensureIndex(final DBObject index, final DBObject options) {
        // just do it on one mongod
        collections.get(0).createIndex(index, options);
    }

    /**
     * Get the TaskAttemptContext associated with this MongoRecordWriter.
     * @return the TaskAttemptContext
     */
    public TaskAttemptContext getContext() {
        return context;
    }
}