org.elasticsearch.river.kafka.KafkaRiver.java Source code

Java tutorial

Introduction

Here is the source code for org.elasticsearch.river.kafka.KafkaRiver.java

Source

/*
 * Licensed to ElasticSearch and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.river.kafka;

import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.jackson.core.JsonFactory;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.json.JsonXContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.ScriptService;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Properties;
import java.net.InetAddress;

import org.json.*;

import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.Message;

import org.elasticsearch.util.StatsdClient;

import java.util.concurrent.ConcurrentLinkedQueue;

/**
 *
 */
public class KafkaRiver extends AbstractRiverComponent implements River {

    private final Client client;

    private final String zkAddress;
    private final String kafkaTopic;
    private final String kafkaGroupId;
    private final boolean kafkaMsgPack;
    private final int zkSessionTimeout;
    private final int zkSyncTime;
    private final int zkAutocommitInterval;
    private final long fetchSize;

    private final String indexName;
    private final int bulkSize;
    private final TimeValue bulkTimeout;
    private final boolean ordered;
    private final TimeValue ttl;

    private final String typeField;
    private final String uidField;
    private final boolean dailyIndex;
    private final String dailyIndexField;
    private final String ttlField;

    private volatile boolean closed = false;
    private volatile Thread thread;
    private volatile ConsumerConnector consumerConnector;

    private volatile StatsdClient statsd;
    private final String statsdServer;
    private final int statsdPort;
    private final String statsdPrefix;

    private volatile ConcurrentLinkedQueue<BulkRequestBuilder> delayed;

    @SuppressWarnings({ "unchecked" })
    @Inject
    public KafkaRiver(RiverName riverName, RiverSettings settings, Client client, ScriptService scriptService) {
        super(riverName, settings);
        this.client = client;

        if (settings.settings().containsKey("kafka")) {
            Map<String, Object> kafkaSettings = (Map<String, Object>) settings.settings().get("kafka");

            zkAddress = XContentMapValues.nodeStringValue(kafkaSettings.get("zkaddress"), "localhost:2181");

            kafkaTopic = XContentMapValues.nodeStringValue(kafkaSettings.get("topic"), "info");
            kafkaGroupId = XContentMapValues.nodeStringValue(kafkaSettings.get("groupid"), "default");
            zkSessionTimeout = XContentMapValues.nodeIntegerValue(kafkaSettings.get("zk_session_timeout"), 400);
            zkSyncTime = XContentMapValues.nodeIntegerValue(kafkaSettings.get("zk_sync_time"), 200);
            zkAutocommitInterval = XContentMapValues.nodeIntegerValue(kafkaSettings.get("zk_autocommit_interval"),
                    1000);
            fetchSize = XContentMapValues.nodeLongValue(kafkaSettings.get("fetch_size"), 307200);

            kafkaMsgPack = XContentMapValues.nodeBooleanValue(kafkaSettings.get("msgpack"), false);
        } else {
            logger.warn("using localhost zookeeper");
            zkAddress = new String("localhost:2181");
            kafkaTopic = "helloworld";
            kafkaGroupId = "helloworld";
            zkSessionTimeout = 5000;
            zkSyncTime = 200;
            zkAutocommitInterval = 1000;
            fetchSize = 307200;

            kafkaMsgPack = false;
        }

        if (settings.settings().containsKey("index")) {
            Map<String, Object> indexSettings = (Map<String, Object>) settings.settings().get("index");
            indexName = XContentMapValues.nodeStringValue(indexSettings.get("name"), kafkaTopic);
            bulkSize = XContentMapValues.nodeIntegerValue(indexSettings.get("bulk_size"), 100);
            if (indexSettings.containsKey("bulk_timeout")) {
                bulkTimeout = TimeValue.parseTimeValue(
                        XContentMapValues.nodeStringValue(indexSettings.get("bulk_timeout"), "1000ms"),
                        TimeValue.timeValueMillis(1000));
            } else {
                bulkTimeout = TimeValue.timeValueMillis(1000);
            }
            ordered = XContentMapValues.nodeBooleanValue(indexSettings.get("ordered"), false);
            if (indexSettings.containsKey("ttl")) {
                ttl = TimeValue.parseTimeValue(XContentMapValues.nodeStringValue(indexSettings.get("ttl"), "0ms"),
                        TimeValue.timeValueMillis(0));
            } else {
                ttl = TimeValue.timeValueMillis(0);
            }
        } else {
            indexName = kafkaTopic;
            bulkSize = 100;
            bulkTimeout = TimeValue.timeValueMillis(1000);
            ordered = false;
            ttl = TimeValue.timeValueMillis(0);
        }

        if (settings.settings().containsKey("custom")) {
            Map<String, Object> customSettings = (Map<String, Object>) settings.settings().get("custom");
            String[] statsdString = XContentMapValues.nodeStringValue(customSettings.get("statsd"), null)
                    .split(":");
            if (statsdString.length > 1) {
                statsdServer = statsdString[0];
                statsdPort = Integer.parseInt(statsdString[1]);
            } else {
                statsdServer = null;
                statsdPort = 0;
            }

            typeField = XContentMapValues.nodeStringValue(customSettings.get("type_field"), null);
            uidField = XContentMapValues.nodeStringValue(customSettings.get("uid_field"), null);
            dailyIndex = XContentMapValues.nodeBooleanValue(customSettings.get("daily_index"), false);
            if (dailyIndex) {
                dailyIndexField = XContentMapValues.nodeStringValue(customSettings.get("daily_index_field"), null);
            } else {
                dailyIndexField = null;
            }
            ttlField = XContentMapValues.nodeStringValue(customSettings.get("ttl_field"), null);
        } else {
            typeField = null;
            uidField = null;
            ttlField = null;
            statsdServer = null;
            statsdPort = 0;
            dailyIndex = false;
            dailyIndexField = null;
        }

        String hostname;
        try {
            InetAddress addr = InetAddress.getLocalHost();
            hostname = addr.getHostName().replace('.', '-');
            logger.info("Hostname: " + hostname);
        } catch (java.net.UnknownHostException e) {
            logger.info("Exception: " + e);
            hostname = new String("unknown-host");
        }

        statsdPrefix = new String("system.elastic-river." + hostname + ".river." + kafkaTopic);
    }

    @Override
    public void start() {
        if (statsdServer != null && statsdPort > 0) {
            try {
                statsd = new StatsdClient(statsdServer, statsdPort);
            } catch (Exception e) {
                logger.warn("Exception: " + e);
            }
        } else {
            statsd = null;
        }

        Properties props = new Properties();
        props.put("zk.connect", zkAddress);
        props.put("groupid", kafkaGroupId);
        props.put("zk.sessiontimeout.ms", Integer.toString(zkSessionTimeout));
        props.put("zk.synctime.ms", Integer.toString(zkSyncTime));
        props.put("fetch.size", Long.toString(fetchSize));
        props.put("autocommit.interval.ms", Integer.toString(zkAutocommitInterval));
        props.put("consumer.timeout.ms", Long.toString(bulkTimeout.millis()));

        consumerConnector = kafka.consumer.Consumer.createJavaConsumerConnector(new ConsumerConfig(props));

        thread = EsExecutors.daemonThreadFactory(settings.globalSettings(), "kafka_river")
                .newThread(new Consumer());
        thread.start();
    }

    @Override
    public void close() {
        if (closed) {
            return;
        }

        logger.info("closing kafka river");
        closed = true;
        thread.interrupt();
    }

    private class Consumer implements Runnable {

        @Override
        public void run() {
            Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
            topicCountMap.put(kafkaTopic, new Integer(1));
            Map<String, List<KafkaStream<Message>>> consumerMap = consumerConnector
                    .createMessageStreams(topicCountMap);

            KafkaStream<Message> stream = consumerMap.get(kafkaTopic).get(0);
            ConsumerIterator<Message> it = stream.iterator();

            delayed = new ConcurrentLinkedQueue<BulkRequestBuilder>();

            while (true) {
                if (closed) {
                    // when interrupted
                    while (!delayed.isEmpty()) {
                        processRequest(delayed.poll());
                    }
                    break;
                }

                while (!delayed.isEmpty()) {
                    processRequest(delayed.poll());
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException ex) {
                        Thread.currentThread().interrupt();
                    }
                }

                BulkRequestBuilder bulkRequestBuilder = client.prepareBulk();
                ;
                try {
                    while (it.hasNext()) {
                        if (bulkRequestBuilder.numberOfActions() < bulkSize) {
                            byte[] body = getMessage(it.next().message());
                            try {
                                processBody(body, bulkRequestBuilder);
                            } catch (Exception e) {
                                logger.warn("failed to parse request. [{}]", e);
                                if (statsd != null) {
                                    statsd.increment(statsdPrefix + ".error.consume");
                                }
                                continue;
                            }
                        } else if (bulkRequestBuilder.numberOfActions() >= bulkSize) {
                            break;
                        } else {
                            logger.warn("Err..Where am I?");
                        }
                    }
                } catch (kafka.consumer.ConsumerTimeoutException e) {
                    logger.debug("consumer timeout [{}]", e);
                } catch (Exception e) {
                    logger.warn("consumer failed [{}]", e);
                    if (statsd != null)
                        statsd.increment(statsdPrefix + ".error.consume");
                } finally {
                    // finally all messages in queue should be sent
                    processRequest(bulkRequestBuilder);
                }
            }
            cleanup(0, "closing river");
        }

        private byte[] getMessage(Message message) {
            ByteBuffer buffer = message.payload();
            byte[] bytes = new byte[buffer.remaining()];
            buffer.get(bytes);
            return bytes;
        }

        private void cleanup(int code, String message) {
            try {
                consumerConnector.shutdown();
                logger.info("clean up kafka consumer [{}]", code, message);
            } catch (Exception e) {
                logger.info("failed to clean up [{}]", e, message);
            }
        }

        private void processRequest(final BulkRequestBuilder bulkRequestBuilder) {
            if (logger.isTraceEnabled()) {
                logger.trace("executing bulk with [{}] actions", bulkRequestBuilder.numberOfActions());
            }
            if (bulkRequestBuilder.numberOfActions() > 0 && statsd != null) {
                statsd.increment(statsdPrefix + ".sent", bulkRequestBuilder.numberOfActions());
            }
            if (ordered) {
                try {
                    if (bulkRequestBuilder.numberOfActions() > 0) {
                        BulkResponse response = bulkRequestBuilder.execute().actionGet();
                        if (response.hasFailures()) {
                            logger.warn("failed to execute" + response.buildFailureMessage());
                            if (statsd != null)
                                statsd.increment(statsdPrefix + ".error.bulk");
                            delayed.offer(bulkRequestBuilder);
                        }

                        long latency = response.getTookInMillis();
                        if (statsd != null)
                            statsd.timing(statsdPrefix + ".indexing", (int) latency);
                    }
                } catch (Exception e) {
                    logger.warn("failed to execute bulk", e);
                    if (statsd != null)
                        statsd.increment(statsdPrefix + ".error.bulk");
                    delayed.offer(bulkRequestBuilder);
                }
            } else {
                if (bulkRequestBuilder.numberOfActions() > 0) {
                    bulkRequestBuilder.execute(new ActionListener<BulkResponse>() {
                        @Override
                        public void onResponse(BulkResponse response) {
                            if (response.hasFailures()) {
                                logger.warn("failed to execute" + response.buildFailureMessage());
                                if (statsd != null)
                                    statsd.increment(statsdPrefix + ".error.bulk");
                                delayed.offer(bulkRequestBuilder);
                            }
                        }

                        @Override
                        public void onFailure(Throwable e) {
                            logger.warn("failed to execute bulk [{}]", e);
                            if (statsd != null)
                                statsd.increment(statsdPrefix + ".error.bulk");
                            delayed.offer(bulkRequestBuilder);
                        }
                    });
                }
            }
        }

        private void processBody(byte[] body, BulkRequestBuilder bulkRequestBuilder) throws Exception {
            if (body == null)
                return;
            String message = new String(body);
            JSONObject jsonObject = new JSONObject(message);

            String index = indexName != null ? indexName : kafkaTopic;
            if (dailyIndex) {
                Long time = dailyIndexField != null ? jsonObject.getLong(dailyIndexField)
                        : System.currentTimeMillis();
                int day = (int) ((time / 86400000) + 0);
                index = new Integer(day).toString();
            }

            if (ttl.millis() > 0) {
                long now = System.currentTimeMillis();
                long time = ttlField != null ? jsonObject.getLong(ttlField) : now;
                long newTtl = time + ttl.millis() - now;

                if (newTtl < 0) {
                    logger.debug("skipped expired doc");
                    return;
                } else {
                    message = jsonObject.put("_ttl", newTtl).toString();
                }
            }

            String type = null;
            try {
                type = typeField != null ? jsonObject.getString(typeField) : kafkaTopic;
            } catch (JSONException e) {
                logger.debug("Exception: " + e);
                type = kafkaTopic;
            }

            String uid = null;
            try {
                uid = uidField != null ? jsonObject.getString(uidField) : null;
            } catch (JSONException e) {
                logger.debug("Exception: " + e);
            }

            if (uid != null) {
                bulkRequestBuilder.add(client.prepareIndex(index, type, uid).setSource(message));
            } else {
                bulkRequestBuilder.add(client.prepareIndex(index, type).setSource(message));
            }
        }
    }
}