org.apache.hadoop.hive.kafka.TransactionalKafkaWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.kafka.TransactionalKafkaWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.kafka;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.AuthenticationException;
import org.apache.kafka.common.errors.OutOfOrderSequenceException;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.kafka.common.errors.TimeoutException;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

/**
 * Transactional Kafka Record Writer used to achieve Exactly once semantic.
 */
class TransactionalKafkaWriter
        implements FileSinkOperator.RecordWriter, RecordWriter<BytesWritable, KafkaWritable> {

    private static final Logger LOG = LoggerFactory.getLogger(TransactionalKafkaWriter.class);
    private static final String TRANSACTION_DIR = "transaction_states";

    private final String topic;
    private final HiveKafkaProducer<byte[], byte[]> producer;
    private final Callback callback;
    private final AtomicReference<Exception> sendExceptionRef = new AtomicReference<>();
    private final Path openTxFileName;
    private final boolean optimisticCommit;
    private final FileSystem fileSystem;
    private final Map<TopicPartition, Long> offsets = new HashMap<>();
    private final String writerIdTopicId;
    private final long producerId;
    private final short producerEpoch;
    private long sentRecords = 0L;

    /**
     *  @param topic Kafka topic.
     * @param producerProperties kafka producer properties.
     * @param queryWorkingPath the Query working directory as, table_directory/hive_query_id.
    *                         Used to store the state of the transaction and/or log sent records and partitions.
    *                         for more information see:
    *                         {@link KafkaStorageHandler#getQueryWorkingDir(org.apache.hadoop.hive.metastore.api.Table)}
     * @param fileSystem file system handler.
     * @param optimisticCommit if true the commit will happen at the task level otherwise will be delegated to HS2.
     */
    TransactionalKafkaWriter(String topic, Properties producerProperties, Path queryWorkingPath,
            FileSystem fileSystem, @Nullable Boolean optimisticCommit) {
        this.fileSystem = fileSystem;
        this.topic = Preconditions.checkNotNull(topic, "NULL topic !!");

        Preconditions.checkState(producerProperties.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG) != null,
                "set [" + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG + "] property");
        producerProperties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
                ByteArraySerializer.class.getName());
        producerProperties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
                ByteArraySerializer.class.getName());
        this.producer = new HiveKafkaProducer<>(producerProperties);
        this.optimisticCommit = optimisticCommit == null ? true : optimisticCommit;
        this.callback = (metadata, exception) -> {
            if (exception != null) {
                sendExceptionRef.compareAndSet(null, exception);
            } else {
                //According to https://kafka.apache.org/0110/javadoc/org/apache/kafka/clients/producer/KafkaProducer.html
                //Callbacks form the same TopicPartition will return in order thus this will keep track of most recent offset.
                final TopicPartition tp = new TopicPartition(metadata.topic(), metadata.partition());
                offsets.put(tp, metadata.offset());
            }
        };
        // Start Tx
        assert producer.getTransactionalId() != null;
        try {
            producer.initTransactions();
            producer.beginTransaction();
        } catch (Exception exception) {
            logHints(exception);
            if (tryToAbortTx(exception)) {
                LOG.error("Aborting Transaction [{}] cause by ERROR [{}]", producer.getTransactionalId(),
                        exception.getMessage());
                producer.abortTransaction();
            }
            LOG.error("Closing writer [{}] caused by ERROR [{}]", producer.getTransactionalId(),
                    exception.getMessage());
            producer.close(0, TimeUnit.MILLISECONDS);
            throw exception;
        }
        writerIdTopicId = String.format("WriterId [%s], Kafka Topic [%s]", producer.getTransactionalId(), topic);
        producerEpoch = this.optimisticCommit ? -1 : producer.getEpoch();
        producerId = this.optimisticCommit ? -1 : producer.getProducerId();
        LOG.info("DONE with Initialization of {}, Epoch[{}], internal ID[{}]", writerIdTopicId, producerEpoch,
                producerId);
        //Writer base working directory
        openTxFileName = this.optimisticCommit ? null
                : new Path(new Path(new Path(queryWorkingPath, TRANSACTION_DIR), producer.getTransactionalId()),
                        String.valueOf(producerEpoch));
    }

    @Override
    public void write(Writable w) throws IOException {
        checkExceptions();
        try {
            sentRecords++;
            producer.send(KafkaUtils.toProducerRecord(topic, (KafkaWritable) w), callback);
        } catch (Exception e) {
            if (tryToAbortTx(e)) {
                // producer.send() may throw a KafkaException which wraps a FencedException re throw its wrapped inner cause.
                producer.abortTransaction();
            }
            producer.close(0, TimeUnit.MILLISECONDS);
            sendExceptionRef.compareAndSet(null, e);
            checkExceptions();
        }
    }

    private void logHints(Exception e) {
        if (e instanceof TimeoutException) {
            LOG.error("Maybe Try to increase [`retry.backoff.ms`] to avoid this error [{}].", e.getMessage());
        }
    }

    /**
     * The non Abort Close method can be split into 2 parts.
     * Part one is to Flush to Kafka all the buffered Records then Log (Topic-Partition, Offset).
     * Part two is To either commit the TX or Save the state of the TX to WAL and let HS2 do the commit.
     *
     * @param abort if set to true will abort flush and exit
     * @throws IOException exception causing the failure
     */
    @Override
    public void close(boolean abort) throws IOException {
        if (abort) {
            // Case Abort, try to AbortTransaction -> Close producer ASAP -> Exit;
            LOG.warn("Aborting Transaction and Sending from {}", writerIdTopicId);
            try {
                producer.abortTransaction();
            } catch (Exception e) {
                LOG.error("Aborting Transaction {} failed due to [{}]", writerIdTopicId, e.getMessage());
            }
            producer.close(0, TimeUnit.MILLISECONDS);
            return;
        }

        // Normal Case ->  lOG and Commit then Close
        LOG.info("Flushing Kafka buffer of writerId {}", writerIdTopicId);
        producer.flush();

        // No exception good let's log to a file whatever Flushed.
        String formattedMsg = "Topic[%s] Partition [%s] -> Last offset [%s]";
        String flushedOffsetMsg = offsets.entrySet().stream()
                .map(topicPartitionLongEntry -> String.format(formattedMsg,
                        topicPartitionLongEntry.getKey().topic(), topicPartitionLongEntry.getKey().partition(),
                        topicPartitionLongEntry.getValue()))
                .collect(Collectors.joining(","));

        LOG.info("WriterId {} flushed the following [{}] ", writerIdTopicId, flushedOffsetMsg);
        // OPTIMISTIC COMMIT OR PERSIST STATE OF THE TX_WAL
        checkExceptions();
        if (optimisticCommit) {
            // Case Commit at the task level
            commitTransaction();
        } else {
            // Case delegate TX commit to HS2
            persistTxState();
        }
        checkExceptions();
        producer.close();
        LOG.info("Closed writerId [{}], Sent [{}] records to Topic [{}]", producer.getTransactionalId(),
                sentRecords, topic);
    }

    private void commitTransaction() {
        LOG.info("Attempting Optimistic commit by {}", writerIdTopicId);
        try {
            producer.commitTransaction();
        } catch (Exception e) {
            sendExceptionRef.compareAndSet(null, e);
        }
    }

    /**
     * Write the Kafka Consumer PID and Epoch to checkpoint file {@link TransactionalKafkaWriter#openTxFileName}.
     */
    private void persistTxState() {
        LOG.info("Committing state to path [{}] by [{}]", openTxFileName.toString(), writerIdTopicId);
        try (FSDataOutputStream outStream = fileSystem.create(openTxFileName)) {
            outStream.writeLong(producerId);
            outStream.writeShort(producerEpoch);
        } catch (Exception e) {
            sendExceptionRef.compareAndSet(null, e);
        }
    }

    @Override
    public void write(BytesWritable bytesWritable, KafkaWritable kafkaWritable) throws IOException {
        write(kafkaWritable);
    }

    @Override
    public void close(Reporter reporter) throws IOException {
        close(false);
    }

    @VisibleForTesting
    long getSentRecords() {
        return sentRecords;
    }

    @VisibleForTesting
    short getProducerEpoch() {
        return producerEpoch;
    }

    @VisibleForTesting
    long getProducerId() {
        return producerId;
    }

    /**
     * Checks for existing exception. In case of exception will close consumer and rethrow as IOException
     * @throws IOException abort if possible, close consumer then rethrow exception.
     */
    private void checkExceptions() throws IOException {
        if (sendExceptionRef.get() != null && sendExceptionRef.get() instanceof KafkaException
                && sendExceptionRef.get().getCause() instanceof ProducerFencedException) {
            // producer.send() may throw a KafkaException which wraps a FencedException re throw its wrapped inner cause.
            sendExceptionRef.updateAndGet(e -> (KafkaException) e.getCause());
        }
        if (sendExceptionRef.get() != null) {
            final Exception exception = sendExceptionRef.get();
            logHints(exception);
            if (tryToAbortTx(exception)) {
                LOG.error("Aborting Transaction [{}] cause by ERROR [{}]", writerIdTopicId, exception.getMessage());
                producer.abortTransaction();
            }
            LOG.error("Closing writer [{}] caused by ERROR [{}]", writerIdTopicId, exception.getMessage());
            producer.close(0, TimeUnit.MILLISECONDS);
            throw new IOException(exception);
        }
    }

    private boolean tryToAbortTx(Throwable e) {
        // According to https://kafka.apache.org/0110/javadoc/org/apache/kafka/clients/producer/KafkaProducer.html
        // We can't recover from these exceptions, so our only option is to close the producer and exit.
        boolean isNotFencedOut = !(e instanceof ProducerFencedException)
                && !(e instanceof OutOfOrderSequenceException) && !(e instanceof AuthenticationException);
        // producer.send() may throw a KafkaException which wraps a FencedException therefore check inner cause.
        boolean causeIsNotFencedOut = !(e.getCause() != null && e.getCause() instanceof ProducerFencedException);
        return isNotFencedOut && causeIsNotFencedOut;
    }

    /**
     * Given a query workingDirectory as table_directory/hive_query_id/ will fetch the open transaction states.
     * Table directory is {@link org.apache.hadoop.hive.metastore.api.Table#getSd()#getLocation()}.
     * Hive Query ID is inferred from the JobConf see {@link KafkaStorageHandler#getQueryId()}.
     *
     * The path to a transaction state is as follow.
     * .../{@code queryWorkingDir}/{@code TRANSACTION_DIR}/{@code writerId}/{@code producerEpoch}
     *
     * The actual state is stored in the file {@code producerEpoch}.
     * The file contains a {@link Long} as internal producer Id and a {@link Short} as the producer epoch.
     * According to Kafka API, highest epoch corresponds to the active Producer, therefore if there is multiple
     * {@code producerEpoch} files will pick the maximum based on {@link Short::compareTo}.
     *
     * @param fs File system handler.
     * @param queryWorkingDir Query working Directory, see:
     *                        {@link KafkaStorageHandler#getQueryWorkingDir(org.apache.hadoop.hive.metastore.api.Table)}.
     * @return Map of Transaction Ids to Pair of Kafka Producer internal ID (Long) and producer epoch (short)
     * @throws IOException if any of the IO operations fail.
     */
    static Map<String, Pair<Long, Short>> getTransactionsState(FileSystem fs, Path queryWorkingDir)
            throws IOException {
        //list all current Dir
        final Path transactionWorkingDir = new Path(queryWorkingDir, TRANSACTION_DIR);
        final FileStatus[] files = fs.listStatus(transactionWorkingDir);
        final Set<FileStatus> transactionSet = Arrays.stream(files).filter(FileStatus::isDirectory)
                .collect(Collectors.toSet());
        Set<Path> setOfTxPath = transactionSet.stream().map(FileStatus::getPath).collect(Collectors.toSet());
        ImmutableMap.Builder<String, Pair<Long, Short>> builder = ImmutableMap.builder();
        setOfTxPath.forEach(path -> {
            final String txId = path.getName();
            try {
                FileStatus[] epochFiles = fs.listStatus(path);
                // List all the Epoch if any and select the max.
                // According to Kafka API recent venison of Producer with the same TxID will have greater epoch and same PID.
                Optional<Short> maxEpoch = Arrays.stream(epochFiles).filter(FileStatus::isFile)
                        .map(fileStatus -> Short.valueOf(fileStatus.getPath().getName())).max(Short::compareTo);
                short epoch = maxEpoch.orElseThrow(() -> new RuntimeException(
                        "Missing sub directory epoch from directory [" + path.toString() + "]"));
                Path openTxFileName = new Path(path, String.valueOf(epoch));
                long internalId;
                try (FSDataInputStream inStream = fs.open(openTxFileName)) {
                    internalId = inStream.readLong();
                    short fileEpoch = inStream.readShort();
                    if (epoch != fileEpoch) {
                        throw new RuntimeException(String.format("Was expecting [%s] but got [%s] from path [%s]",
                                epoch, fileEpoch, path.toString()));
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
                builder.put(txId, Pair.of(internalId, epoch));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        return builder.build();
    }
}