Java tutorial
/** * Copyright (C) 2014 Stratio (http://stratio.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cn.cnic.bigdatalab.flume.sink.mongodb; import com.mongodb.*; import org.apache.flume.*; import org.apache.flume.conf.Configurable; import org.apache.flume.instrumentation.SinkCounter; import org.apache.flume.sink.AbstractSink; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; /** * * Reads events from a channel and writes them to MongoDB. It can read fields * from both body and headers. * * Configuration parameters are: * * <p><ul> * <li><tt>dynamic</tt> <em>(boolean)</em>: If true, the dynamic mode will be * enabled and the database and collection to use will be selected by the * event headers. Defaults to <tt>false</tt>.</li> * <li><tt>dynamicDB</tt> <em>(string)</em>: Name of the event header that will * be looked up for the database name. This will only work when dynamic * mode is enabled. Defaults to <tt>db</tt>.</li> * <li><tt>dynamicCollection</tt> <em>(string)</em>: Name of the event header * that will be looked up for the collection name. This will only work when * dynamic mode is enabled. Defaults to <tt>collection</tt>.</li> * <li><tt>mongoUri</tt> <em>(string, required)</em>: * A <a href="http://api.mongodb.org/java/current/com/mongodb/MongoClientURI.html">Mongo client URI</a> * defining the MongoDB server address and, optionally, default database * and collection. When dynamic mode is enabled, the collection defined * here will be used as a fallback.</li> * <li><tt>mappingFile</tt> <em>(string)</em>: Path to a * <a href="http://json-schema.org/">JSON Schema</a> * to be used for type mapping purposes.</li> * </ul></p> * */ public class MongoSink extends AbstractSink implements Configurable { private static final Logger log = LoggerFactory.getLogger(MongoSink.class); private static final String CONF_URI = "mongoUri"; private static final String CONF_MAPPING_FILE = "mappingFile"; private static final String CONF_BATCH_SIZE = "batchSize"; private static final String CONF_DYNAMIC = "dynamic"; private static final String CONF_DYNAMIC_DB_FIELD = "dynamicDB"; private static final String CONF_DYNAMIC_COLLECTION_FIELD = "dynamicCollection"; private static final String CONF_UPDATE_INSTEAD_REPLACE = "updateInsteadReplace"; private static final int DEFAULT_BATCH_SIZE = 25; private static final boolean DEFAULT_DYNAMIC = false; private static final String DEFAULT_DYNAMIC_DB_FIELD = "db"; private static final String DEFAULT_DYNAMIC_COLLECTION_FIELD = "collection"; private static final boolean DEFAULT_UPDATE_INSTEAD_REPLACE = false; private SinkCounter sinkCounter; private int batchSize; private MongoClient mongoClient; private MongoClientURI mongoClientURI; private DB mongoDefaultDb; private DBCollection mongoDefaultCollection; private boolean isDynamicMode; private String dynamicDBField; private String dynamicCollectionField; private EventParser eventParser; private boolean updateInsteadReplace; private final CounterGroup counterGroup = new CounterGroup(); public MongoSink() { super(); } /** * {@inheritDoc} * * @param context */ @Override public void configure(Context context) { try { if (!"INJECTED".equals(context.getString(CONF_URI))) { this.mongoClientURI = new MongoClientURI(context.getString(CONF_URI), MongoClientOptions.builder().writeConcern(WriteConcern.SAFE)); this.mongoClient = new MongoClient(mongoClientURI); if (mongoClientURI.getDatabase() != null) { this.mongoDefaultDb = mongoClient.getDB(mongoClientURI.getDatabase()); } if (mongoClientURI.getCollection() != null) { this.mongoDefaultCollection = mongoDefaultDb.getCollection(mongoClientURI.getCollection()); } } final String mappingFilename = context.getString(CONF_MAPPING_FILE); this.eventParser = (mappingFilename == null) ? new EventParser() : new EventParser(MappingDefinition.load(mappingFilename)); this.isDynamicMode = context.getBoolean(CONF_DYNAMIC, DEFAULT_DYNAMIC); if (!isDynamicMode && mongoDefaultCollection == null) { throw new MongoSinkException( "Default MongoDB collection must be specified unless dynamic mode is enabled"); } this.dynamicDBField = context.getString(CONF_DYNAMIC_DB_FIELD, DEFAULT_DYNAMIC_DB_FIELD); this.dynamicCollectionField = context.getString(CONF_DYNAMIC_COLLECTION_FIELD, DEFAULT_DYNAMIC_COLLECTION_FIELD); this.sinkCounter = new SinkCounter(this.getName()); this.batchSize = context.getInteger(CONF_BATCH_SIZE, DEFAULT_BATCH_SIZE); this.updateInsteadReplace = context.getBoolean(CONF_UPDATE_INSTEAD_REPLACE, DEFAULT_UPDATE_INSTEAD_REPLACE); } catch (IOException ex) { throw new MongoSinkException(ex); } } /** * {@inheritDoc} */ @Override public Status process() throws EventDeliveryException { log.debug("Executing MongoSink.process()..."); Status status = Status.READY; Channel channel = getChannel(); Transaction txn = channel.getTransaction(); try { txn.begin(); int count; List<Event> eventList = new ArrayList<Event>(); for (count = 0; count < batchSize; ++count) { Event event = channel.take(); if (event == null) { break; } eventList.add(event); } if (count <= 0) { sinkCounter.incrementBatchEmptyCount(); counterGroup.incrementAndGet("channel.underflow"); status = Status.BACKOFF; } else { if (count < batchSize) { sinkCounter.incrementBatchUnderflowCount(); status = Status.BACKOFF; } else { sinkCounter.incrementBatchCompleteCount(); } for (Event event : eventList) { final DBObject document = this.eventParser.parse(event); if (this.updateInsteadReplace && document.get("_id") != null) { // update requires '_id' field to match document BasicDBObject searchQuery = new BasicDBObject().append("_id", document.get("_id")); // update by _id BasicDBObject updatedDocument = new BasicDBObject().append("$set", document); getDBCollection(event).update(searchQuery, updatedDocument, true, false); } else { getDBCollection(event).save(document); } } sinkCounter.addToEventDrainAttemptCount(eventList.size()); } txn.commit(); sinkCounter.addToEventDrainSuccessCount(count); counterGroup.incrementAndGet("transaction.success"); } catch (ChannelException e) { log.error("Unexpected error while executing MongoSink.process", e); txn.rollback(); status = Status.BACKOFF; this.sinkCounter.incrementConnectionFailedCount(); } catch (Throwable t) { log.error("Unexpected error while executing MongoSink.process", t); txn.rollback(); status = Status.BACKOFF; if (t instanceof Error) { throw new MongoSinkException(t); } } finally { txn.close(); } return status; } /** * {@inheritDoc} */ @Override public synchronized void start() { this.sinkCounter.start(); super.start(); } /** * {@inheritDoc} */ @Override public synchronized void stop() { this.mongoClient.close(); this.sinkCounter.stop(); super.stop(); } private DBCollection getDBCollection(Event event) { if (!isDynamicMode) { return mongoDefaultCollection; } final Map<String, String> headers = event.getHeaders(); final String dbName = headers.get(dynamicDBField); final String collectionName = headers.get(dynamicCollectionField); if (collectionName == null) { if (mongoDefaultCollection == null) { throw new MongoSinkException("No collection specified and no default set"); } return mongoDefaultCollection; } DB db; if (dbName == null) { if (mongoDefaultDb == null) { throw new MongoSinkException("No DB specified and no default set"); } db = mongoDefaultDb; } else { db = mongoClient.getDB(dbName); } return db.getCollection(collectionName); } private List<Event> takeEventsFromChannel(Channel channel, int eventsToTake) { List<Event> events = new ArrayList<Event>(); for (int i = 0; i < eventsToTake; i++) { this.sinkCounter.incrementEventDrainAttemptCount(); events.add(channel.take()); } events.removeAll(Collections.singleton(null)); return events; } }