com.sina.app.spout.ClkKafkaSpout.java Source code

Java tutorial

Introduction

Here is the source code for com.sina.app.spout.ClkKafkaSpout.java

Source

/**
 * Copyright 2013 Netherlands Forensic Institute
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.sina.app.spout;

import static com.sina.app.spout.util.ConfigUtils.CONFIG_FAIL_HANDLER;
import static com.sina.app.spout.util.ConfigUtils.DEFAULT_FAIL_HANDLER;
import static com.sina.app.spout.util.ConfigUtils.createFailHandlerFromString;
import static com.sina.app.spout.util.ConfigUtils.createKafkaConfig;
import static com.sina.app.spout.util.ConfigUtils.getMaxBufSize;
import static com.sina.app.spout.util.ConfigUtils.getTopic;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;

import com.sina.app.bolt.util.AskFromHbase;
import com.sina.app.bolt.util.ClickLog;
import com.sina.app.bolt.util.FormatLog;
import com.sina.app.bolt.util.TimeSign;
import org.apache.commons.exec.util.StringUtils;
import org.apache.hadoop.mapreduce.tools.CLI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.ConsumerTimeoutException;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;

import com.sina.app.spout.fail.FailHandler;
import com.sina.app.spout.util.ConfigUtils;
import com.sina.app.spout.util.KafkaMessageId;

public class ClkKafkaSpout implements IRichSpout {
    private static final long serialVersionUID = -1L;
    private static final Logger LOG = LoggerFactory.getLogger(KafkaSpout.class);
    protected final SortedMap<KafkaMessageId, byte[]> _inProgress = new TreeMap<KafkaMessageId, byte[]>();
    protected final Queue<KafkaMessageId> _queue = new LinkedList<KafkaMessageId>();

    protected String _topic;
    protected String _consumer_group_id;

    protected int _bufSize;
    protected FailHandler _failHandler;
    protected ConsumerIterator<byte[], byte[]> _iterator;
    protected transient SpoutOutputCollector _collector;
    protected transient ConsumerConnector _consumer;

    public ClkKafkaSpout(String topicName, String consumer_group) {
        this._topic = topicName;
        this._consumer_group_id = consumer_group;
    }

    protected void createFailHandler(final String failHandler) {
        if (failHandler == null) {
            _failHandler = DEFAULT_FAIL_HANDLER;
        } else {
            _failHandler = createFailHandlerFromString(failHandler);
        }
    }

    protected void createConsumer(final Map<String, Object> config) {
        final Properties consumerConfig = createKafkaConfig(config);
        FormatLog formatLog = new FormatLog();
        consumerConfig.put("zookeeper.connect", formatLog.FaiKafkaSpoutZooKeeperList);

        consumerConfig.put("group.id", this._consumer_group_id);

        LOG.info("connecting kafka client to zookeeper at {} as client group {}",
                consumerConfig.getProperty("zookeeper.connect"), consumerConfig.getProperty("group.id"));
        _consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerConfig));
    }

    protected boolean fillBuffer() {
        if (!_inProgress.isEmpty() || !_queue.isEmpty()) {
            throw new IllegalStateException("cannot fill buffer when buffer or pending messages are non-empty");
        }

        if (_iterator == null) {
            final Map<String, List<KafkaStream<byte[], byte[]>>> streams = _consumer
                    .createMessageStreams(Collections.singletonMap(_topic, 1));
            _iterator = streams.get(_topic).get(0).iterator();
        }

        try {
            int size = 0;
            while (size < _bufSize && _iterator.hasNext()) {
                final MessageAndMetadata<byte[], byte[]> message = _iterator.next();
                final KafkaMessageId id = new KafkaMessageId(message.partition(), message.offset());
                _inProgress.put(id, message.message());
                size++;
            }
        } catch (final ConsumerTimeoutException e) {
        }

        if (_inProgress.size() > 0) {
            _queue.addAll(_inProgress.keySet());
            LOG.debug("buffer now has {} messages to be emitted", _queue.size());
            return true;
        } else {
            return false;
        }
    }

    @Override
    public void declareOutputFields(final OutputFieldsDeclarer declarer) {
        declarer.declareStream("GETK", new Fields("toKafka"));
        declarer.declareStream("GETH", new Fields("toHbase"));
        declarer.declareStream("NOGET", new Fields("toKafka"));
    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        return null;
    }

    @Override
    public void open(final Map config, final TopologyContext topology, final SpoutOutputCollector collector) {
        _collector = collector;
        _bufSize = getMaxBufSize((Map<String, Object>) config);
        createFailHandler((String) config.get(CONFIG_FAIL_HANDLER));
        createConsumer((Map<String, Object>) config);
        _failHandler.open(config, topology, collector);

        LOG.info("kafka spout opened, reading from topic {}, using failure policy {}", _topic,
                _failHandler.getIdentifier());
    }

    @Override
    public void close() {
        _collector = null;
        _iterator = null;
        if (_consumer != null) {
            try {
                _consumer.shutdown();
            } finally {
                _consumer = null;
            }
        }

        _failHandler.close();
    }

    @Override
    public void activate() {
        _failHandler.activate();
    }

    @Override
    public void deactivate() {
        _failHandler.deactivate();
    }

    public void Delay(byte[] message, final KafkaMessageId nextId) {
        String entry = new String(message);
        AskFromHbase askFromHbase;
        String[] clickLogs = StringUtils.split(entry, "\n");
        for (String oneLog : clickLogs) {
            ClickLog log = new ClickLog(oneLog);
            if (!log.isValid) {
                continue;
            }
            TimeSign timeSign = new TimeSign();
            SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            Date logDate;
            boolean Inserted = false;
            try {
                logDate = simpleDateFormat.parse(log.timeSign);
            } catch (ParseException e) {
                LOG.error("ClickLog error{}", e);
                continue;
            }
            askFromHbase = new AskFromHbase(log.uuid, log.tableColumnPv);
            boolean askPvExist = askFromHbase.askExist();
            if (askPvExist) {
                _collector.emit("GETK",
                        new Values(askFromHbase.clkWriteToHbase.pvFromHbase + "\t$\t" + log.logclkVal), nextId);
                _collector.emit("GETH", new Values(oneLog), nextId);
                continue;
            }
            Date last = new Date();
            while (true) {
                Date now = new Date();
                if (now.getTime() - last.getTime() > log.secondAskHbaseTimeMax)
                    break;
                String redisTime = "";
                try {
                    redisTime = timeSign.getTime();
                } catch (Exception e) {
                    LOG.error("error get redis Time{}", e);
                    break;
                }
                Date redisDate;
                try {
                    redisDate = simpleDateFormat.parse(redisTime);
                } catch (ParseException e) {
                    LOG.error("get RedisTime error{}", e);
                    break;
                }
                Long delt = redisDate.getTime() - logDate.getTime();
                if (delt > log.secondAskHbaseTimeDlt)
                    break;
                try {
                    Thread.sleep(log.secondAskHbaseSleepTime);
                    askPvExist = askFromHbase.askExist();
                    if (askPvExist) {
                        Inserted = true;
                        _collector.emit("GETK",
                                new Values(askFromHbase.clkWriteToHbase.pvFromHbase + "\t$\t" + log.logclkVal),
                                nextId);
                        _collector.emit("GETH", new Values(oneLog), nextId);
                        break;
                    }
                } catch (InterruptedException e) {
                    LOG.error("clklog sleep error{}", e);
                    break;
                }

            }
            if (Inserted)
                continue;
            _collector.emit("NOGET", new Values(oneLog), nextId);
        }
    }

    @Override
    public void nextTuple() {
        if (!_queue.isEmpty() || (_inProgress.isEmpty() && fillBuffer())) {
            final KafkaMessageId nextId = _queue.poll();
            if (nextId != null) {
                final byte[] message = _inProgress.get(nextId);
                if (message == null) {
                    throw new IllegalStateException("no pending message for next id " + nextId);
                }
                Delay(message, nextId);
                LOG.debug("emitted kafka message id {} ({} bytes payload)", nextId, message.length);
            }
        }
    }

    @Override
    public void ack(final Object o) {
        if (o instanceof KafkaMessageId) {
            final KafkaMessageId id = (KafkaMessageId) o;
            _inProgress.remove(id);
            LOG.debug("kafka message {} acknowledged", id);
            if (_inProgress.isEmpty()) {
                LOG.debug("all pending messages acknowledged, committing client offsets");
                _consumer.commitOffsets();
            }
            _failHandler.ack(id);
        }
    }

    @Override
    public void fail(final Object o) {
        if (o instanceof KafkaMessageId) {
            final KafkaMessageId id = (KafkaMessageId) o;
            if (_failHandler.shouldReplay(id)) {
                LOG.debug("kafka message id {} failed in topology, adding to buffer again", id);
                _queue.add(id);
            } else {
                LOG.debug("kafka message id {} failed in topology, delegating failure to policy", id);
                _failHandler.fail(id, _inProgress.remove(id));
            }
        }
    }
}