com.linkedin.pinot.tools.admin.command.StreamAvroIntoKafkaCommand.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.tools.admin.command.StreamAvroIntoKafkaCommand.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.tools.admin.command;

import com.google.common.primitives.Longs;
import com.google.common.util.concurrent.Uninterruptibles;
import com.linkedin.pinot.common.utils.HashUtil;
import com.linkedin.pinot.common.utils.KafkaStarterUtils;
import com.linkedin.pinot.core.indexsegment.utils.AvroUtils;
import com.linkedin.pinot.tools.Command;
import java.io.File;
import java.io.IOException;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericRecord;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;

/**
 * Class for command to stream Avro data into Kafka.
 */
public class StreamAvroIntoKafkaCommand extends AbstractBaseAdminCommand implements Command {
    private static final Logger LOGGER = LoggerFactory.getLogger(StreamAvroIntoKafkaCommand.class);

    @Option(name = "-avroFile", required = true, metaVar = "<String>", usage = "Avro file to stream.")
    private String _avroFile = null;

    @Option(name = "-help", required = false, help = true, aliases = { "-h", "--h",
            "--help" }, usage = "Print this message.")
    private boolean _help = false;

    @Option(name = "-kafkaBrokerList", required = false, metaVar = "<String>", usage = "Kafka broker list.")
    private String _kafkaBrokerList = KafkaStarterUtils.DEFAULT_KAFKA_BROKER;

    @Option(name = "-kafkaTopic", required = true, metaVar = "<String>", usage = "Kafka topic to stream into.")
    private String _kafkaTopic = null;

    @Option(name = "-zkAddress", required = false, metaVar = "<string>", usage = "Address of Zookeeper.")
    private String _zkAddress = "localhost:2181";

    @Option(name = "-millisBetweenMessages", required = false, metaVar = "<int>", usage = "Delay in milliseconds between messages (default 1000 ms)")
    private String _millisBetweenMessages = "1000";

    @Override
    public boolean getHelp() {
        return _help;
    }

    @Override
    public String getName() {
        return "StreamAvroToKafka";
    }

    @Override
    public String toString() {
        return "StreamAvroInfoKafka -avroFile " + _avroFile + " -kafkaBrokerList " + _kafkaBrokerList
                + " -kafkaTopic " + _kafkaTopic + " -millisBetweenMessages " + _millisBetweenMessages;
    }

    @Override
    public String description() {
        return "Stream the specified Avro file into a Kafka topic, which can be read by Pinot\n"
                + "by using com.linkedin.pinot.core.realtime.impl.kafka.KafkaJSONMessageDecoder as the\n"
                + "message decoder class name (stream.kafka.decoder.class.name).";
    }

    @Override
    public boolean execute() throws IOException {
        int messageDelayMillis = Integer.parseInt(_millisBetweenMessages);
        final boolean sleepRequired = 0 < messageDelayMillis;

        if (sleepRequired) {
            LOGGER.info("Streaming Avro file into Kafka topic {} with {} ms between messages", _kafkaTopic,
                    _millisBetweenMessages);
        } else {
            LOGGER.info("Streaming Avro file into Kafka topic {} with no delay between messages", _kafkaTopic);
        }

        // Create Kafka producer
        Properties properties = new Properties();
        properties.put("metadata.broker.list", _kafkaBrokerList);
        properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
        properties.put("request.required.acks", "1");

        ProducerConfig producerConfig = new ProducerConfig(properties);
        Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
        try {
            // Open the Avro file
            DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(new File(_avroFile));

            // Iterate over every record
            for (GenericRecord genericRecord : reader) {
                // Write the message to Kafka
                String recordJson = genericRecord.toString();
                byte[] bytes = recordJson.getBytes("utf-8");
                KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(_kafkaTopic,
                        Longs.toByteArray(HashUtil.hash64(bytes, bytes.length)), bytes);

                producer.send(data);

                // Sleep between messages
                if (sleepRequired) {
                    Uninterruptibles.sleepUninterruptibly(messageDelayMillis, TimeUnit.MILLISECONDS);
                }
            }

            reader.close();
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }

        savePID(System.getProperty("java.io.tmpdir") + File.separator + ".streamAvro.pid");
        return true;
    }
}