com.linkedin.pinot.core.realtime.impl.kafka.KafkaAvroMessageDecoder.java Source code

Introduction

Here is the source code for com.linkedin.pinot.core.realtime.impl.kafka.KafkaAvroMessageDecoder.java
Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.realtime.impl.kafka;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DecoderFactory;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.core.data.GenericRow;

public class KafkaAvroMessageDecoder implements KafkaMessageDecoder {
    private static final Logger LOGGER = LoggerFactory.getLogger(KafkaAvroMessageDecoder.class);

    public static final String SCHEMA_REGISTRY_REST_URL = "schema.registry.rest.url";
    public static final String SCHEMA_REGISTRY_SCHEMA_NAME = "schema.registry.schema.name";
    private org.apache.avro.Schema defaultAvroSchema;
    private Map<String, org.apache.avro.Schema> md5ToAvroSchemaMap;

    private String schemaRegistryBaseUrl;
    private String kafkaTopicName;
    private DecoderFactory decoderFactory;
    private AvroRecordToPinotRowGenerator avroRecordConvetrer;

    private static final int MAGIC_BYTE_LENGTH = 1;
    private static final int SCHEMA_HASH_LENGTH = 16;
    private static final int HEADER_LENGTH = MAGIC_BYTE_LENGTH + SCHEMA_HASH_LENGTH;

    private static final int SCHEMA_HASH_START_OFFSET = MAGIC_BYTE_LENGTH;
    private static final int SCHEMA_HASH_END_OFFSET = SCHEMA_HASH_START_OFFSET + SCHEMA_HASH_LENGTH;

    @Override
    public void init(Map<String, String> props, Schema indexingSchema, String topicName) throws Exception {
        schemaRegistryBaseUrl = props.get(SCHEMA_REGISTRY_REST_URL);
        StringUtils.chomp(schemaRegistryBaseUrl, "/");
        kafkaTopicName = topicName;

        String avroSchemaName = kafkaTopicName;
        if (props.containsKey(SCHEMA_REGISTRY_SCHEMA_NAME) && props.get(SCHEMA_REGISTRY_SCHEMA_NAME) != null
                && !props.get(SCHEMA_REGISTRY_SCHEMA_NAME).isEmpty()) {
            avroSchemaName = props.get(SCHEMA_REGISTRY_SCHEMA_NAME);
        }

        defaultAvroSchema = fetchSchema(new URL(schemaRegistryBaseUrl + "/latest_with_type=" + avroSchemaName));
        this.avroRecordConvetrer = new AvroRecordToPinotRowGenerator(indexingSchema);
        this.decoderFactory = new DecoderFactory();
        md5ToAvroSchemaMap = new HashMap<String, org.apache.avro.Schema>();
    }

    @Override
    public GenericRow decode(byte[] payload) {
        return decode(payload, 0, payload.length);
    }

    @Override
    public GenericRow decode(byte[] payload, int offset, int length) {
        if (payload == null || payload.length == 0 || length == 0) {
            return null;
        }

        byte[] md5 = Arrays.copyOfRange(payload, SCHEMA_HASH_START_OFFSET + offset,
                SCHEMA_HASH_END_OFFSET + offset);

        String md5String = hex(md5);
        org.apache.avro.Schema schema = null;
        if (md5ToAvroSchemaMap.containsKey(md5String)) {
            schema = md5ToAvroSchemaMap.get(md5String);
        } else {
            try {
                schema = fetchSchema(new URL(schemaRegistryBaseUrl + "/id=" + md5String));
                md5ToAvroSchemaMap.put(md5String, schema);
            } catch (Exception e) {
                schema = defaultAvroSchema;
                LOGGER.error("error fetching schema from md5 String", e);
            }
        }
        DatumReader<Record> reader = new GenericDatumReader<Record>(schema);
        try {
            GenericData.Record avroRecord = reader.read(null, decoderFactory.createBinaryDecoder(payload,
                    HEADER_LENGTH + offset, length - HEADER_LENGTH, null));
            return avroRecordConvetrer.transform(avroRecord, schema);
        } catch (IOException e) {
            LOGGER.error("Caught exception while reading message", e);
            return null;
        }
    }

    public static String hex(byte[] bytes) {
        StringBuilder builder = new StringBuilder(2 * bytes.length);
        for (int i = 0; i < bytes.length; i++) {
            String hexString = Integer.toHexString(0xFF & bytes[i]);
            if (hexString.length() < 2) {
                hexString = "0" + hexString;
            }
            builder.append(hexString);
        }
        return builder.toString();
    }

    public static org.apache.avro.Schema fetchSchema(URL url) throws Exception {
        BufferedReader reader = null;

        reader = new BufferedReader(new InputStreamReader(url.openStream(), "UTF-8"));
        StringBuilder queryResp = new StringBuilder();
        for (String respLine; (respLine = reader.readLine()) != null;) {
            queryResp.append(respLine);
        }
        return org.apache.avro.Schema.parse(queryResp.toString());
    }

}