com.datatorrent.contrib.mongodb.MongoDBOutputOperator.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.contrib.mongodb.MongoDBOutputOperator.java

Source

/*
 * Copyright (c) 2013 DataTorrent, Inc. ALL Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datatorrent.contrib.mongodb;

import com.datatorrent.api.annotation.InputPortFieldAnnotation;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.Operator;
import com.mongodb.*;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.validation.constraints.Min;

import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * MongoDB output adapter operator, which send insertion data to nontransaction database.<p><br>
 *
 * <br>
 * Ports:<br>
 * <b>Input</b>: Can have one input port <br>
 * <b>Output</b>: no output port<br>
 * <br>
 * Properties:<br>
 * <b>maxWindowTable</b>:the table to save the most recent inserted windowId, operatorId information for recovery use<br>
 * <b>maxWindowCollection</b>:mongoDB collection of the maxWindowTable<br>
 * <b>windowId</b>:Id of current window<br>
 * <b>operatorId</b>:Id of the operator<br>
 * <b>batchSize</b>:size for each batch insert, default value is 1000<br>
 * <b>lastWindowId</b>:last inserted windowId, is obtained at setup from maxWindowTable with specific operatorId<br>
 * <b>ignoreWindow</b>:the flag to indicate ignoring out of date window <br>
 * <b>tupleId</b>:the Id of the tuple, incrementing at each tuple process, start from 1 at beginWindow()<br>
 * <b>windowIdColumnName</b>:the name of the windowId column in maxWindowTable, should be set by the user<br>
 * <b>operatorIdColumnName</b>:the name of the operatorId column in maxWindowTable, should be set by the user<br>
 * <b>tableList</b>: the list of all the tables of the mapping<br>
 * <b>tableToDocument</b>:each tuple corresponds to one document for one collection to be inserted<br>
 * <b>tableToDocumentList</b>:for bulk insert, each table has a document list to insert. This is table and document list map <br>
 * <b>tupleId</b>:the Id of the tuple, incrementing at each tuple process, start from 1 at beginWindow()<br>
 * <b>queryFunction</b>:corresponding to the option for the ObjectId of 12 bytes format saving. The windowId, tupleId, operatorId of each tuple are saved in each collection as the column ObjectId for recovery<br>
 * It Currently has 3 format for the ObjectId. When the operator recovers, it will remove the document which has the same windowId, operatorId as maxWindowTable in the collections, and insert the documents again<br>
 * <br>
 * Compile time checks:<br>
 * None<br>
 * <br>
 * Run time checks:<br>
 * hostName
 * batchSize <br>
 * <b>data type:</br>the insertion data can support all the Objects mongoDB supports<br>
 *
 * <b>Benchmarks</b>:
 * <br>
 *
 * @since 0.3.2
 */
public abstract class MongoDBOutputOperator<T> extends MongoDBConnectable implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(MongoDBOutputOperator.class);
    protected static final int DEFAULT_BATCH_SIZE = 1000;
    @Min(1)
    protected long batchSize = DEFAULT_BATCH_SIZE;
    protected transient ArrayList<String> tableList = new ArrayList<String>(); // all the tables in the mapping
    protected transient HashMap<String, BasicDBObject> tableToDocument = new HashMap<String, BasicDBObject>(); // each table has one document to insert
    protected transient HashMap<String, List<DBObject>> tableToDocumentList = new HashMap<String, List<DBObject>>();
    protected String maxWindowTable;
    protected transient DBCollection maxWindowCollection;
    protected transient long windowId;
    protected transient int operatorId;
    //  protected transient String applicationId;
    protected transient long lastWindowId;
    protected transient boolean ignoreWindow;
    protected String windowIdColumnName;
    protected String operatorIdColumnName;
    protected transient int tupleId;
    protected int queryFunction;

    /**
     * Implement how to process tuple in derived class based on HashMap or ArrayList.
     * The tuple values are binded with SQL prepared statement to be inserted to database.
     *
     * @param tuple
     * @throws SQLException
     */
    public abstract void processTuple(T tuple);

    /**
     * The input port.
     */
    @InputPortFieldAnnotation(name = "inputPort")
    public final transient DefaultInputPort<T> inputPort = new DefaultInputPort<T>() {
        @Override
        public void process(T tuple) {
            if (ignoreWindow) {
                return; // ignore
            }

            try {
                processTuple(tuple);
            } catch (Exception ex) {
                throw new RuntimeException("Exception during process tuple", ex);
            }
        }
    };

    /**
     * init last completed windowId information with operatorId, read from maxWindowTable.
     * If the table is empty, insert a default value document
     */
    public void initLastWindowInfo() {
        maxWindowCollection = db.getCollection(maxWindowTable);
        BasicDBObject query = new BasicDBObject();
        query.put(operatorIdColumnName, operatorId);
        //    query.put(applicationIdName, "0");
        DBCursor cursor = maxWindowCollection.find(query);
        if (cursor.hasNext()) {
            Object obj = cursor.next().get(windowIdColumnName);
            lastWindowId = (Long) obj;
        } else {
            BasicDBObject doc = new BasicDBObject();
            doc.put(windowIdColumnName, (long) 0);
            //      doc.put(applicationIdName, 0);
            doc.put(operatorIdColumnName, operatorId);
            maxWindowCollection.save(doc);
        }

        System.out.println("last windowid:" + lastWindowId);
    }

    /**
     * Implement Operator Interface.
     * If windowId is less than the last completed windowId, then ignore the window.
     * If windowId is equal to the last completed windowId, then remove the documents with same windowId of the operatorId, and insert the documents later
     * If windowId is greater then the last completed windowId, then process the window
     *
     * @param windowId
     */
    @Override
    public void beginWindow(long windowId) {
        this.windowId = windowId;
        tupleId = 1;
        if (windowId < lastWindowId) {
            ignoreWindow = true;
        } else if (windowId == lastWindowId) {
            ignoreWindow = false;
            BasicDBObject query = new BasicDBObject();
            //      query.put(windowIdColumnName, windowId);
            //      query.put(operatorIdColumnName, operatorId);
            ByteBuffer bb = ByteBuffer.allocate(12);
            bb.order(ByteOrder.BIG_ENDIAN);
            StringBuilder low = new StringBuilder();
            StringBuilder high = new StringBuilder();
            if (queryFunction == 1) {
                queryFunction1(bb, high, low);
            } else if (queryFunction == 2) {
                queryFunction2(bb, high, low);
            } else if (queryFunction == 3) {
                queryFunction3(bb, high, low);
            } else {
                throw new RuntimeException("unknown queryFunction type:" + queryFunction);
            }

            query.put("_id", new BasicDBObject("$gte", new ObjectId(low.toString())).append("$lte",
                    new ObjectId(high.toString())));
            //      query.put(applicationIdName, 0);
            for (String table : tableList) {
                db.getCollection(table).remove(query);
            }
        } else {
            ignoreWindow = false;
        }
    }

    /**
     * At endWindow, if not ignoring window, then insert bulk document list
     */
    @Override
    public void endWindow() {
        if (ignoreWindow) {
            return;
        }

        BasicDBObject where = new BasicDBObject(); // update maxWindowTable for windowId information
        where.put(operatorIdColumnName, operatorId);
        BasicDBObject value = new BasicDBObject();
        value.put(operatorIdColumnName, operatorId);
        value.put(windowIdColumnName, windowId);
        maxWindowCollection.update(where, value);

        for (String table : tableList) {
            List<DBObject> docList = tableToDocumentList.get(table);
            db.getCollection(table).insert(docList);
        }
    }

    /**
     * At setup time, init last completed windowId from maxWindowTable
     *
     * @param context
     */
    @Override
    public void setup(OperatorContext context) {
        operatorId = context.getId();
        try {
            mongoClient = new MongoClient(hostName);
            db = mongoClient.getDB(dataBase);
            if (userName != null && passWord != null) {
                db.authenticate(userName, passWord.toCharArray());
            }
            initLastWindowInfo();
            for (String table : tableList) {
                tableToDocumentList.put(table, new ArrayList<DBObject>());
                tableToDocument.put(table, new BasicDBObject());
            }
        } catch (UnknownHostException ex) {
            logger.debug(ex.toString());
        }
    }

    abstract public void setColumnMapping(String[] mapping);

    @Override
    public void teardown() {
    }

    /**
     * shared processTuple for HashMap and ArrayList output Operator.
     */
    public void processTupleCommon() {
        ByteBuffer bb = ByteBuffer.allocate(12);
        bb.order(ByteOrder.BIG_ENDIAN);
        if (queryFunction == 1) {
            insertFunction1(bb);
        } else if (queryFunction == 2) {
            insertFunction2(bb);
        } else if (queryFunction == 3) {
            insertFunction3(bb);
        } else {
            throw new RuntimeException("unknown insertFunction type:" + queryFunction);
        }
        //    String str = Hex.encodeHexString(bb.array());
        StringBuilder objStr = new StringBuilder();
        for (byte b : bb.array()) {
            objStr.append(String.format("%02x", b & 0xff));
        }

        BasicDBObject doc = null;
        for (Map.Entry<String, BasicDBObject> entry : tableToDocument.entrySet()) {
            String table = entry.getKey();
            doc = entry.getValue();
            doc.put("_id", new ObjectId(objStr.toString()));
            List<DBObject> docList = tableToDocumentList.get(table);
            docList.add(doc);
            if (tupleId % batchSize == 0) { // do batch insert here
                BasicDBObject where = new BasicDBObject(); // update maxWindowTable for windowId information
                where.put(operatorIdColumnName, operatorId);
                BasicDBObject value = new BasicDBObject();
                value.put(operatorIdColumnName, operatorId);
                value.put(windowIdColumnName, windowId);
                maxWindowCollection.update(where, value);

                db.getCollection(table).insert(docList);
                tableToDocumentList.put(table, new ArrayList<DBObject>());
            } else {
                tableToDocumentList.put(table, docList);
            }
        }
        ++tupleId;
    }

    /**
     * 8B windowId | 1B opratorId | 3B tupleId
     */
    public void queryFunction1(ByteBuffer bb, StringBuilder high, StringBuilder low) {
        bb.putLong(windowId);
        byte opId = (byte) (operatorId);
        bb.put(opId);
        ByteBuffer lowbb = bb;
        lowbb.put((byte) 0);
        lowbb.put((byte) 0);
        lowbb.put((byte) 0);
        //    String str = Hex.encodeHexString(lowbb.array());
        for (byte b : lowbb.array()) {
            low.append(String.format("02x", b & 0xff));
        }

        ByteBuffer highbb = bb;
        highbb.put((byte) 0xff);
        highbb.put((byte) 0xff);
        highbb.put((byte) 0xff);
        for (byte b : highbb.array()) {
            high.append(String.format("02x", b & 0xff));
        }
    }

    /**
     * 4B baseSec | 2B windowId | 3B operatorId | 3B tupleId
     */
    public void queryFunction2(ByteBuffer bb, StringBuilder high, StringBuilder low) {
        int baseSec = (int) (windowId >> 32);
        bb.putInt(baseSec);
        short winId = (short) (windowId & 0xffff);
        bb.putShort(winId);
        Integer operId = operatorId;
        for (int i = 0; i < 3; i++) {
            byte num = (byte) (operId >> 8 * (2 - i));
            bb.put(num);
        }
        ByteBuffer lowbb = bb.duplicate();
        lowbb.put((byte) 0);
        lowbb.put((byte) 0);
        lowbb.put((byte) 0);
        for (byte b : lowbb.array()) {
            low.append(String.format("%02x", b & 0xff));
        }

        ByteBuffer highbb = bb.duplicate();
        highbb.put((byte) 0xff);
        highbb.put((byte) 0xff);
        highbb.put((byte) 0xff);
        for (byte b : highbb.array()) {
            high.append(String.format("%02x", b & 0xff));
        }
    }

    /**
     * 4B baseSec | 3B operatorId | 2B windowId | 3B tupleId
     */
    public void queryFunction3(ByteBuffer bb, StringBuilder high, StringBuilder low) {
        int baseSec = (int) (windowId >> 32);
        bb.putInt(baseSec);
        Integer operId = operatorId;
        for (int i = 0; i < 3; i++) {
            byte num = (byte) (operId >> 8 * (2 - i));
            bb.put(num);
        }
        short winId = (short) (windowId & 0xffff);
        bb.putShort(winId);

        ByteBuffer lowbb = bb.duplicate();
        lowbb.put((byte) 0);
        lowbb.put((byte) 0);
        lowbb.put((byte) 0);
        for (byte b : lowbb.array()) {
            low.append(String.format("%02x", b & 0xff));
        }
        ByteBuffer highbb = bb.duplicate();
        highbb.put((byte) 0xff);
        highbb.put((byte) 0xff);
        highbb.put((byte) 0xff);
        for (byte b : highbb.array()) {
            high.append(String.format("%02x", b & 0xff));
        }
    }

    /**
     * 8B windowId | 1B operatorId | 3B tupleId
     */
    void insertFunction1(ByteBuffer bb) {
        bb.putLong(windowId);
        byte oid = (byte) (operatorId);
        bb.put(oid);
        for (int i = 0; i < 3; i++) {
            byte num = (byte) (tupleId >> 8 * (2 - i));
            bb.put(num);
        }
    }

    /**
     * 4B baseSec | 3B operatorId | 2B windowId | 3B tupleId
     */
    void insertFunction2(ByteBuffer bb) {
        int baseSec = (int) (windowId >> 32);
        bb.putInt(baseSec);
        Integer operId = operatorId;
        for (int i = 0; i < 3; i++) {
            byte num = (byte) (operId >> 8 * (2 - i));
            bb.put(num);
        }
        bb.putShort((short) (windowId & 0xffff));
        for (int i = 0; i < 3; i++) {
            byte num = (byte) (tupleId >> 8 * (2 - i));
            bb.put(num);
        }
    }

    /**
     * 4B baseSec | 2B windowId | 3B operatorId | 3B tupleId
     */
    void insertFunction3(ByteBuffer bb) {
        int baseSec = (int) (windowId >> 32);
        bb.putInt(baseSec);
        short winId = (short) (windowId & 0xffff);
        bb.putShort(winId);
        Integer operId = operatorId;
        for (int i = 0; i < 3; i++) {
            byte num = (byte) (operId >> 8 * (2 - i));
            bb.put(num);
        }
        for (int i = 0; i < 3; i++) {
            byte num = (byte) (tupleId >> 8 * (2 - i));
            bb.put(num);
        }
    }

    public void addTable(String table) {
        tableList.add(table);
    }

    public ArrayList<String> getTableList() {
        return tableList;
    }

    public void setQueryFunction(int queryFunction) {
        this.queryFunction = queryFunction;
    }

    public long getBatchSize() {
        return batchSize;
    }

    public void setBatchSize(long batchSize) {
        this.batchSize = batchSize;
    }

    public String getMaxWindowTable() {
        return maxWindowTable;
    }

    public void setMaxWindowTable(String maxWindowTable) {
        this.maxWindowTable = maxWindowTable;
    }

    public String getWindowIdColumnName() {
        return windowIdColumnName;
    }

    public void setWindowIdColumnName(String windowIdColumnName) {
        this.windowIdColumnName = windowIdColumnName;
    }

    public String getOperatorIdColumnName() {
        return operatorIdColumnName;
    }

    public void setOperatorIdColumnName(String operatorIdColumnName) {
        this.operatorIdColumnName = operatorIdColumnName;
    }

    public long getLastWindowId() {
        return lastWindowId;
    }

    public void setLastWindowId(long lastWindowId) {
        this.lastWindowId = lastWindowId;
    }
}