com.linkedin.pinot.core.data.manager.realtime.LLRealtimeSegmentDataManager.java Source code

Introduction

Here is the source code for com.linkedin.pinot.core.data.manager.realtime.LLRealtimeSegmentDataManager.java
Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.linkedin.pinot.core.data.manager.realtime;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.Uninterruptibles;
import com.linkedin.pinot.common.config.AbstractTableConfig;
import com.linkedin.pinot.common.config.IndexingConfig;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata;
import com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata;
import com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
import com.linkedin.pinot.common.metrics.ServerGauge;
import com.linkedin.pinot.common.metrics.ServerMeter;
import com.linkedin.pinot.common.metrics.ServerMetrics;
import com.linkedin.pinot.common.protocols.SegmentCompletionProtocol;
import com.linkedin.pinot.common.utils.CommonConstants;
import com.linkedin.pinot.common.utils.LLCSegmentName;
import com.linkedin.pinot.common.utils.NetUtil;
import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.extractors.FieldExtractorFactory;
import com.linkedin.pinot.core.data.extractors.PlainFieldExtractor;
import com.linkedin.pinot.core.data.manager.offline.SegmentDataManager;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion;
import com.linkedin.pinot.core.realtime.converter.RealtimeSegmentConverter;
import com.linkedin.pinot.core.realtime.impl.RealtimeSegmentImpl;
import com.linkedin.pinot.core.realtime.impl.kafka.KafkaHighLevelStreamProviderConfig;
import com.linkedin.pinot.core.realtime.impl.kafka.KafkaMessageDecoder;
import com.linkedin.pinot.core.realtime.impl.kafka.KafkaSimpleConsumerFactoryImpl;
import com.linkedin.pinot.core.realtime.impl.kafka.SimpleConsumerWrapper;
import com.linkedin.pinot.server.realtime.ServerSegmentCompletionProtocolHandler;
import kafka.message.MessageAndOffset;

/**
 * Segment data manager for low level consumer realtime segments, which manages consumption and segment completion.
 */
public class LLRealtimeSegmentDataManager extends SegmentDataManager {
    protected enum State {
        // The state machine starts off with this state. While in this state we consume kafka events
        // and index them in memory. We continue to be in this state until the end criteria is satisfied
        // (time or number of rows)
        INITIAL_CONSUMING,

        // In this state, we consume from kafka until we reach the _finalOffset (exclusive)
        CATCHING_UP,

        // In this state, we sleep for MAX_HOLDING_TIME_MS, and the make a segmentConsumed() call to the
        // controller.
        HOLDING,

        // We have been asked to go Online from Consuming state, and are trying a last attempt to catch up to the
        // target offset. In this state, we have a time constraint as well as a final offset constraint to look into
        // before we stop consuming.
        CONSUMING_TO_ONLINE,

        // We have been asked by the controller to retain the segment we have in memory at the current offset.
        // We should build the segment, and replace it with the in-memory segment.
        RETAINING,

        // We have been asked by the controller to commit the segment at the current offset. Build the segment
        // and make a segmentCommit() call to the controller.
        COMMITTING,

        // We have been asked to discard the in-memory segment we have. We will be serving queries, but not consuming
        // anymore rows from kafka. We wait for a helix transition to go ONLINE, at which point, we can download the
        // segment from the controller and replace it with the in-memory segment.
        DISCARDED,

        // We have replaced our in-memory segment with the segment that has been built locally.
        RETAINED,

        // We have committed our segment to the controller, and also replaced it locally with the constructed segment.
        COMMITTED,

        // Something went wrong, we need to download the segment when we get the ONLINE transition.
        ERROR;

        public boolean shouldConsume() {
            if (this.equals(INITIAL_CONSUMING) || this.equals(CATCHING_UP) || this.equals(CONSUMING_TO_ONLINE)) {
                return true;
            }
            return false;
        }

        public boolean isFinal() {
            if (this.equals(ERROR) || this.equals(COMMITTED) || this.equals(RETAINED) || this.equals(DISCARDED)) {
                return true;
            }
            return false;
        }
    }

    private static final Logger LOGGER = LoggerFactory.getLogger(LLRealtimeSegmentDataManager.class);
    private static final int KAFKA_MAX_FETCH_TIME_MILLIS = 1000;
    private static final long TIME_THRESHOLD_FOR_LOG_MINUTES = 1;
    private static final long TIME_EXTENSION_ON_EMPTY_SEGMENT_HOURS = 1;
    private static final int MSG_COUNT_THRESHOLD_FOR_LOG = 100000;

    private final LLCRealtimeSegmentZKMetadata _segmentZKMetadata;
    private final AbstractTableConfig _tableConfig;
    private final RealtimeTableDataManager _realtimeTableDataManager;
    private final KafkaMessageDecoder _messageDecoder;
    private final int _segmentMaxRowCount;
    private final String _resourceDataDir;
    private final Schema _schema;
    private final ServerMetrics _serverMetrics;
    private final RealtimeSegmentImpl _realtimeSegment;
    private volatile long _currentOffset;
    private volatile State _state;
    private volatile int _numRowsConsumed = 0;
    private long _startTimeMs = 0;
    private final String _segmentNameStr;
    private final SegmentVersion _segmentVersion;

    // Segment end criteria
    private volatile long _consumeEndTime = 0;
    private volatile long _finalOffset = -1;
    private volatile boolean _receivedStop = false;

    // It takes 30s to locate controller leader, and more if there are multiple controller failures.
    // For now, we let 31s pass for this state transition.
    private final int _maxTimeForConsumingToOnlineSec = 31;

    private Thread _consumerThread;
    private final String _kafkaTopic;
    private final int _kafkaPartitionId;
    final String _clientId;
    private final LLCSegmentName _segmentName;
    private final PlainFieldExtractor _fieldExtractor;
    private final SimpleConsumerWrapper _consumerWrapper;
    private final File _resourceTmpDir;
    private final String _tableName;
    private final List<String> _invertedIndexColumns;
    private final String _sortedColumn;
    private Logger segmentLogger = LOGGER;
    private final String _tableStreamName;
    private AtomicLong _lastUpdatedRawDocuments = new AtomicLong(0);
    private final String _instance;
    private final ServerSegmentCompletionProtocolHandler _protocolHandler;
    private final long _consumeStartTime;
    private final long _startOffset;

    private long _lastLogTime = 0;
    private int _lastConsumedCount = 0;

    // TODO each time this method is called, we print reason for stop. Good to print only once.
    private boolean endCriteriaReached() {
        Preconditions.checkState(_state.shouldConsume(), "Incorrect state %s", _state);
        long now = now();
        switch (_state) {
        case INITIAL_CONSUMING:
            // The segment has been created, and we have not posted a segmentConsumed() message on the controller yet.
            // We need to consume as much data as available, until we have either reached the max number of rows or
            // the max time we are allowed to consume.
            if (now >= _consumeEndTime) {
                if (_realtimeSegment.getRawDocumentCount() == 0) {
                    segmentLogger.info("No events came in, extending time by {} hours",
                            TIME_EXTENSION_ON_EMPTY_SEGMENT_HOURS);
                    _consumeEndTime += TimeUnit.HOURS.toMillis(TIME_EXTENSION_ON_EMPTY_SEGMENT_HOURS);
                    return false;
                }
                segmentLogger.info("Stopping consumption due to time limit start={} now={} numRows={}",
                        _startTimeMs, now, _numRowsConsumed);
                return true;
            } else if (_numRowsConsumed >= _segmentMaxRowCount) {
                segmentLogger.info("Stopping consumption due to row limit nRows={} maxNRows={}", _numRowsConsumed,
                        _segmentMaxRowCount);
                return true;
            }
            return false;

        case CATCHING_UP:
            // We have posted segmentConsumed() at least once, and the controller is asking us to catch up to a certain offset.
            // There is no time limit here, so just check to see that we are still within the offset we need to reach.
            // Going past the offset is an exception.
            if (_currentOffset == _finalOffset) {
                segmentLogger.info("Caught up to offset={}, state={}", _finalOffset, _state.toString());
                return true;
            }
            if (_currentOffset > _finalOffset) {
                segmentLogger.error("Offset higher in state={}, current={}, final={}", _state.toString(),
                        _currentOffset, _finalOffset);
                throw new RuntimeException("Past max offset");
            }
            return false;

        case CONSUMING_TO_ONLINE:
            // We are attempting to go from CONSUMING to ONLINE state. We are making a last attempt to catch up to the
            // target offset. We have a time constraint, and need to stop consuming if we cannot get to the target offset
            // within that time.
            if (_currentOffset == _finalOffset) {
                segmentLogger.info("Caught up to offset={}, state={}", _finalOffset, _state.toString());
                return true;
            } else if (now >= _consumeEndTime) {
                segmentLogger.info("Past max time budget: offset={}, state={}", _currentOffset, _state.toString());
                return true;
            }
            if (_currentOffset > _finalOffset) {
                segmentLogger.error("Offset higher in state={}, current={}, final={}", _state.toString(),
                        _currentOffset, _finalOffset);
                throw new RuntimeException("Past max offset");
            }
            return false;
        default:
            segmentLogger.error("Illegal state {}" + _state.toString());
            throw new RuntimeException("Illegal state to consume");
        }
    }

    protected void consumeLoop() {
        final long _endOffset = Long.MAX_VALUE; // No upper limit on Kafka offset
        segmentLogger.info("Starting consumption loop start offset {}, finalOffset {}", _currentOffset,
                _finalOffset);
        while (!_receivedStop && !endCriteriaReached()) {
            // Consume for the next _kafkaReadTime ms, or we get to final offset, whichever happens earlier,
            // Update _currentOffset upon return from this method
            Iterable<MessageAndOffset> messagesAndOffsets = null;
            try {
                messagesAndOffsets = _consumerWrapper.fetchMessages(_currentOffset, _endOffset,
                        KAFKA_MAX_FETCH_TIME_MILLIS);
            } catch (TimeoutException e) {
                segmentLogger.warn("Timed out when fetching messages from Kafka, retrying");
                continue;
            }

            Iterator<MessageAndOffset> msgIterator = messagesAndOffsets.iterator();

            int batchSize = 0;
            while (!_receivedStop && !endCriteriaReached() && msgIterator.hasNext()) {
                // Get a batch of messages from Kafka
                // Index each message
                MessageAndOffset messageAndOffset = msgIterator.next();
                byte[] array = messageAndOffset.message().payload().array();
                int offset = messageAndOffset.message().payload().arrayOffset();
                int length = messageAndOffset.message().payloadSize();
                GenericRow row = _messageDecoder.decode(array, offset, length);

                if (row != null) {
                    row = _fieldExtractor.transform(row);
                    boolean canTakeMore = _realtimeSegment.index(row); // Ignore the boolean return
                    if (!canTakeMore) {
                        //TODO
                        // This condition can happen when we are catching up, (due to certain failure scenarios in kafka where
                        // offsets get changed with higher generation numbers for some pinot servers but not others).
                        // Also, it may be that we push in a row into the realtime segment, but it fails to index that row
                        // for some reason., so we may end up with less number of rows in the real segment. Actually, even 0 rows.
                        // In that case, we will see an exception when generating the segment.
                        // TODO We need to come up with how the system behaves in these cases and document/handle them
                        segmentLogger.warn("We got full during indexing");
                    }
                    batchSize++;
                }
                _currentOffset = messageAndOffset.nextOffset();
                _numRowsConsumed++;
            }
            updateCurrentDocumentCountMetrics();
            if (batchSize != 0) {
                segmentLogger.debug("Indexed {} messages current offset {}", batchSize, _currentOffset);
            } else {
                // If there were no messages to be fetched from Kafka, wait for a little bit as to avoid hammering the
                // Kafka broker
                Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            }
        }
    }

    public class PartitionConsumer implements Runnable {
        public void run() {
            _startTimeMs = now();
            try {
                while (!_state.isFinal()) {
                    if (_state.shouldConsume()) {
                        consumeLoop(); // Consume until we reached the end criteria, or we are stopped.
                    }
                    if (_receivedStop) {
                        break;
                    }
                    // If we are sending segmentConsumed() to the controller, we are in HOLDING state.
                    _state = State.HOLDING;
                    SegmentCompletionProtocol.Response response = postSegmentConsumedMsg();
                    SegmentCompletionProtocol.ControllerResponseStatus status = response.getStatus();
                    long rspOffset = response.getOffset();
                    boolean success;
                    switch (status) {
                    case NOT_LEADER:
                        // Retain the same state
                        segmentLogger.warn("Got not leader response");
                        hold();
                        break;
                    case CATCH_UP:
                        if (rspOffset <= _currentOffset) {
                            // Something wrong with the controller. Back off and try again.
                            segmentLogger.error(
                                    "Invalid catchup offset {} in controller response, current offset {}",
                                    rspOffset, _currentOffset);
                            hold();
                        } else {
                            _state = State.CATCHING_UP;
                            _finalOffset = rspOffset;
                            // We will restart consumption when we loop back above.
                        }
                        break;
                    case HOLD:
                        hold();
                        break;
                    case DISCARD:
                        // Keep this in memory, but wait for the online transition, and download when it comes in.
                        _state = State.DISCARDED;
                        break;
                    case KEEP:
                        _state = State.RETAINING;
                        success = buildSegmentAndReplace();
                        if (success) {
                            _state = State.RETAINED;
                        } else {
                            // Could not build segment for some reason. We can only download it.
                            _state = State.ERROR;
                        }
                        break;
                    case COMMIT:
                        _state = State.COMMITTING;
                        success = buildSegment(true);
                        if (!success) {
                            // We could not build the segment. Go into error state.
                            _state = State.ERROR;
                        } else {
                            success = commitSegment();
                            if (success) {
                                _state = State.COMMITTED;
                            } else {
                                segmentLogger.info("Could not commit segment. Retrying after hold");
                                hold();
                            }
                        }
                        break;
                    default:
                        segmentLogger.error("Holding after response from Controller: {}", response.toJsonString());
                        hold();
                        break;
                    }
                }
            } catch (Exception e) {
                segmentLogger.error("Exception while in work", e);
                _state = State.ERROR;
            }
        }
    }

    private File makeSegmentDirPath() {
        return new File(_resourceDataDir, _segmentZKMetadata.getSegmentName());
    }

    /**
     *
     * @param buildTgz true if you want the method to also build tgz file
     * @return true if all succeeds.
     */
    protected boolean buildSegment(boolean buildTgz) {
        // Build a segment from in-memory rows.If buildTgz is true, then build the tar.gz file as well
        // TODO Use an auto-closeable object to delete temp resources.
        File tempSegmentFolder = new File(_resourceTmpDir, "tmp-" + _segmentNameStr + "-" + String.valueOf(now()));

        // lets convert the segment now
        RealtimeSegmentConverter converter = new RealtimeSegmentConverter(_realtimeSegment,
                tempSegmentFolder.getAbsolutePath(), _schema, _segmentZKMetadata.getTableName(),
                _segmentZKMetadata.getSegmentName(), _sortedColumn, _invertedIndexColumns);

        logStatistics();
        segmentLogger.info("Trying to build segment");
        final long buildStartTime = now();
        try {
            converter.build(_segmentVersion);
        } catch (Exception e) {
            segmentLogger.error("Could not build segment", e);
            FileUtils.deleteQuietly(tempSegmentFolder);
            return false;
        }
        final long buildEndTime = now();
        segmentLogger.info("Successfully built segment in {} ms", (buildEndTime - buildStartTime));
        File destDir = makeSegmentDirPath();
        FileUtils.deleteQuietly(destDir);
        try {
            FileUtils.moveDirectory(tempSegmentFolder.listFiles()[0], destDir);
            if (buildTgz) {
                TarGzCompressionUtils.createTarGzOfDirectory(destDir.getAbsolutePath());
            }
        } catch (IOException e) {
            segmentLogger.error("Exception during move/tar segment", e);
            FileUtils.deleteQuietly(tempSegmentFolder);
            return false;
        }
        FileUtils.deleteQuietly(tempSegmentFolder);
        return true;
    }

    protected boolean commitSegment() {
        // Send segmentCommit() to the controller
        // if that succeeds, swap in-memory segment with the one built.
        File destSeg = makeSegmentDirPath();
        final String segTarFileName = destSeg.getAbsolutePath() + TarGzCompressionUtils.TAR_GZ_FILE_EXTENTION;
        try {
            SegmentCompletionProtocol.Response response = _protocolHandler.segmentCommit(_currentOffset,
                    _segmentNameStr, new File(segTarFileName));
            if (!response.getStatus().equals(SegmentCompletionProtocol.ControllerResponseStatus.COMMIT_SUCCESS)) {
                segmentLogger.warn("Received controller response {}", response);
                return false;
            }
            FileUtils.deleteQuietly(new File(segTarFileName));
        } catch (FileNotFoundException e) {
            segmentLogger.error("Tar file {} not found", segTarFileName, e);
            return false;
        }
        return true;
    }

    protected boolean buildSegmentAndReplace() {
        boolean success = buildSegment(false);
        if (!success) {
            return success;
        }
        _realtimeTableDataManager.replaceLLSegment(_segmentZKMetadata.getSegmentName());
        return true;
    }

    protected void hold() {
        try {
            Thread.sleep(SegmentCompletionProtocol.MAX_HOLD_TIME_MS);
        } catch (InterruptedException e) {
            segmentLogger.warn("Interrupted while holding");
        }
    }

    protected SegmentCompletionProtocol.Response postSegmentConsumedMsg() {
        // Post segmentConsumed to current leader.
        // Retry maybe once if leader is not found.
        return _protocolHandler.segmentConsumed(_segmentNameStr, _currentOffset);
    }

    public void goOnlineFromConsuming(RealtimeSegmentZKMetadata metadata) throws InterruptedException {
        LLCRealtimeSegmentZKMetadata llcMetadata = (LLCRealtimeSegmentZKMetadata) metadata;
        final long endOffset = llcMetadata.getEndOffset();
        segmentLogger.info("State: {}, transitioning from CONSUMING to ONLINE (startOffset: {}, endOffset: {})",
                _state.toString(), _startOffset, endOffset);
        stop();
        segmentLogger.info("Consumer thread stopped in state {}", _state.toString());

        switch (_state) {
        case COMMITTED:
        case RETAINED:
            // Nothing to do. we already built local segment and swapped it with in-memory data.
            segmentLogger.info("State {}. Nothing to do", _state.toString());
            break;
        case DISCARDED:
        case ERROR:
            segmentLogger.info("State {}. Downloading to replace", _state.toString());
            downloadSegmentAndReplace(llcMetadata);
            break;
        case CATCHING_UP:
        case HOLDING:
        case INITIAL_CONSUMING:
            // Allow to catch up upto final offset, and then replace.
            if (_currentOffset > endOffset) {
                // We moved ahead of the offset that is committed in ZK.
                segmentLogger.warn("Current offset {} ahead of the offset in zk {}. Downloading to replace",
                        _currentOffset, endOffset);
                downloadSegmentAndReplace(llcMetadata);
            } else if (_currentOffset == endOffset) {
                segmentLogger.info("Current offset {} matches offset in zk {}. Replacing segment", _currentOffset,
                        endOffset);
                buildSegmentAndReplace();
            } else {
                segmentLogger.info("Attempting to catch up from offset {} to {} ", _currentOffset, endOffset);
                boolean success = catchupToFinalOffset(endOffset,
                        TimeUnit.MILLISECONDS.convert(_maxTimeForConsumingToOnlineSec, TimeUnit.SECONDS));
                if (success) {
                    segmentLogger.info("Caught up to offset {}", _currentOffset);
                    buildSegmentAndReplace();
                } else {
                    segmentLogger.info("Could not catch up to offset (current = {}). Downloading to replace",
                            _currentOffset);
                    downloadSegmentAndReplace(llcMetadata);
                }
            }
            break;
        default:
            segmentLogger.info("Downloading to replace segment while in state {}", _state.toString());
            downloadSegmentAndReplace(llcMetadata);
            break;
        }
    }

    protected void downloadSegmentAndReplace(LLCRealtimeSegmentZKMetadata metadata) {
        _realtimeTableDataManager.downloadAndReplaceSegment(_segmentNameStr, metadata);
    }

    protected long now() {
        return System.currentTimeMillis();
    }

    private boolean catchupToFinalOffset(long endOffset, long timeoutMs) {
        _finalOffset = endOffset;
        _consumeEndTime = now() + timeoutMs;
        _state = State.CONSUMING_TO_ONLINE;
        _receivedStop = false;
        consumeLoop();
        if (_currentOffset != endOffset) {
            // Timeout?
            segmentLogger.error("Could not consume up to {} (current offset {})", endOffset, _currentOffset);
            return false;
        }

        return true;
    }

    public void destroy() {
        try {
            stop();
        } catch (InterruptedException e) {
            segmentLogger.error("Could not stop consumer thread");
        }
        _realtimeSegment.destroy();
        try {
            _consumerWrapper.close();
        } catch (Exception e) {
            segmentLogger.warn("Could not close consumer wrapper", e);
        }
    }

    protected void start() {
        _consumerThread = new Thread(new PartitionConsumer(), _segmentNameStr);
        segmentLogger.info("Created new consumer thread {} for {}", _consumerThread, this.toString());
        _consumerThread.start();
    }

    /**
     * Stop the consuming thread.
     */
    public void stop() throws InterruptedException {
        _receivedStop = true;
        _consumerThread.join();
    }

    // TODO Make this a factory class.
    protected KafkaHighLevelStreamProviderConfig createStreamProviderConfig() {
        return new KafkaHighLevelStreamProviderConfig();
    }

    // Assume that this is called only on OFFLINE to CONSUMING transition.
    // If the transition is OFFLINE to ONLINE, the caller should have downloaded the segment and we don't reach here.
    public LLRealtimeSegmentDataManager(RealtimeSegmentZKMetadata segmentZKMetadata,
            AbstractTableConfig tableConfig, InstanceZKMetadata instanceZKMetadata,
            RealtimeTableDataManager realtimeTableDataManager, String resourceDataDir, Schema schema,
            ServerMetrics serverMetrics) throws Exception {
        _segmentZKMetadata = (LLCRealtimeSegmentZKMetadata) segmentZKMetadata;
        _tableConfig = tableConfig;
        _realtimeTableDataManager = realtimeTableDataManager;
        _resourceDataDir = resourceDataDir;
        _schema = schema;
        _serverMetrics = serverMetrics;
        _segmentVersion = SegmentVersion
                .fromStringOrDefault(tableConfig.getIndexingConfig().getSegmentFormatVersion());
        _instance = _realtimeTableDataManager.getServerInstance();
        _protocolHandler = new ServerSegmentCompletionProtocolHandler(_instance);

        // TODO Validate configs
        IndexingConfig indexingConfig = _tableConfig.getIndexingConfig();
        KafkaHighLevelStreamProviderConfig kafkaStreamProviderConfig = createStreamProviderConfig();
        kafkaStreamProviderConfig.init(tableConfig, instanceZKMetadata, schema);
        final String bootstrapNodes = indexingConfig.getStreamConfigs()
                .get(CommonConstants.Helix.DataSource.STREAM_PREFIX + "."
                        + CommonConstants.Helix.DataSource.Realtime.Kafka.KAFKA_BROKER_LIST);
        _kafkaTopic = kafkaStreamProviderConfig.getTopicName();
        _segmentNameStr = _segmentZKMetadata.getSegmentName();
        _segmentName = new LLCSegmentName(_segmentNameStr);
        _kafkaPartitionId = _segmentName.getPartitionId();
        _segmentMaxRowCount = kafkaStreamProviderConfig.getSizeThresholdToFlushSegment();
        _tableName = _tableConfig.getTableName();
        segmentLogger = LoggerFactory
                .getLogger(LLRealtimeSegmentDataManager.class.getName() + "_" + _segmentNameStr);
        if (indexingConfig.getSortedColumn().isEmpty()) {
            segmentLogger.info(
                    "RealtimeDataResourceZKMetadata contains no information about sorted column for segment {}",
                    _segmentName);
            _sortedColumn = null;
        } else {
            String firstSortedColumn = indexingConfig.getSortedColumn().get(0);
            if (_schema.hasColumn(firstSortedColumn)) {
                segmentLogger.info(
                        "Setting sorted column name: {} from RealtimeDataResourceZKMetadata for segment {}",
                        firstSortedColumn, _segmentName);
                _sortedColumn = firstSortedColumn;
            } else {
                segmentLogger.warn(
                        "Sorted column name: {} from RealtimeDataResourceZKMetadata is not existed in schema for segment {}.",
                        firstSortedColumn, _segmentName);
                _sortedColumn = null;
            }
        }
        //inverted index columns
        _invertedIndexColumns = indexingConfig.getInvertedIndexColumns();
        _tableStreamName = _tableName + "_" + kafkaStreamProviderConfig.getStreamName();

        List<String> invertedIndexColumns = indexingConfig.getInvertedIndexColumns();
        if (_sortedColumn != null && !invertedIndexColumns.contains(_sortedColumn)) {
            invertedIndexColumns.add(_sortedColumn);
        }
        // Start new realtime segment
        _realtimeSegment = new RealtimeSegmentImpl(schema, _segmentMaxRowCount, tableConfig.getTableName(),
                segmentZKMetadata.getSegmentName(), _kafkaTopic, _serverMetrics, invertedIndexColumns);
        _realtimeSegment.setSegmentMetadata(segmentZKMetadata, schema);

        // Create message decoder
        _messageDecoder = kafkaStreamProviderConfig.getDecoder();
        _clientId = _kafkaPartitionId + "-" + NetUtil.getHostnameOrAddress();

        // Create field extractor
        _fieldExtractor = (PlainFieldExtractor) FieldExtractorFactory.getPlainFieldExtractor(schema);
        _consumerWrapper = SimpleConsumerWrapper.forPartitionConsumption(new KafkaSimpleConsumerFactoryImpl(),
                bootstrapNodes, _clientId, _kafkaTopic, _kafkaPartitionId);
        _startOffset = _segmentZKMetadata.getStartOffset();
        _currentOffset = _startOffset;
        _resourceTmpDir = new File(resourceDataDir, "_tmp");
        if (!_resourceTmpDir.exists()) {
            _resourceTmpDir.mkdirs();
        }
        _state = State.INITIAL_CONSUMING;
        long now = now();
        _consumeStartTime = now;
        _consumeEndTime = now + kafkaStreamProviderConfig.getTimeThresholdToFlushSegment();
        start();
    }

    private void logStatistics() {
        int numErrors, numConversions, numNulls, numNullCols;
        if ((numErrors = _fieldExtractor.getTotalErrors()) > 0) {
            _serverMetrics.addMeteredTableValue(_tableStreamName, ServerMeter.ROWS_WITH_ERRORS, (long) numErrors);
        }
        Map<String, Integer> errorCount = _fieldExtractor.getErrorCount();
        for (String column : errorCount.keySet()) {
            if ((numErrors = errorCount.get(column)) > 0) {
                segmentLogger.warn("Column {} had {} rows with errors", column, numErrors);
            }
        }
        if ((numConversions = _fieldExtractor.getTotalConversions()) > 0) {
            _serverMetrics.addMeteredTableValue(_tableStreamName, ServerMeter.ROWS_NEEDING_CONVERSIONS,
                    (long) numConversions);
            segmentLogger.info("{} rows needed conversions ", numConversions);
        }
        if ((numNulls = _fieldExtractor.getTotalNulls()) > 0) {
            _serverMetrics.addMeteredTableValue(_tableStreamName, ServerMeter.ROWS_WITH_NULL_VALUES,
                    (long) numNulls);
            segmentLogger.info("{} rows had null columns", numNulls);
        }
        if ((numNullCols = _fieldExtractor.getTotalNullCols()) > 0) {
            _serverMetrics.addMeteredTableValue(_tableStreamName, ServerMeter.COLUMNS_WITH_NULL_VALUES,
                    (long) numNullCols);
            segmentLogger.info("{} columns had null values", numNullCols);
        }
    }

    // This should be done during commit? We may not always commit when we build a segment....
    // TODO Call this method when we are loading the segment, which we do from table datamanager afaik
    private void updateCurrentDocumentCountMetrics() {
        int currentRawDocs = _realtimeSegment.getRawDocumentCount();
        _serverMetrics.addValueToTableGauge(_tableName, ServerGauge.DOCUMENT_COUNT,
                (currentRawDocs - _lastUpdatedRawDocuments.get()));
        _lastUpdatedRawDocuments.set(currentRawDocs);
        final long now = now();
        final int rowsConsumed = _numRowsConsumed - _lastConsumedCount;
        final long prevTime = _lastConsumedCount == 0 ? _consumeStartTime : _lastLogTime;
        // Log every minute or 100k events
        if (now - prevTime > TimeUnit.MINUTES.toMillis(TIME_THRESHOLD_FOR_LOG_MINUTES)
                || rowsConsumed >= MSG_COUNT_THRESHOLD_FOR_LOG) {
            segmentLogger.info("Consumed {} events from (rate:{}/s), currentOffset={}", rowsConsumed,
                    (float) (rowsConsumed) * 1000 / (now - prevTime), _currentOffset);
            _lastConsumedCount = _numRowsConsumed;
            _lastLogTime = now;
        }
    }

    @Override
    public IndexSegment getSegment() {
        return _realtimeSegment;
    }

    @Override
    public String getSegmentName() {
        return _segmentNameStr;
    }

    public int getMaxTimeForConsumingToOnlineSec() {
        return _maxTimeForConsumingToOnlineSec;
    }
}