org.apache.pulsar.io.hbase.sink.HbaseAbstractSink.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pulsar.io.hbase.sink.HbaseAbstractSink.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pulsar.io.hbase.sink;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.pulsar.functions.api.Record;
import org.apache.pulsar.io.core.Sink;
import org.apache.pulsar.io.core.SinkContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

/**
 * A Simple abstract class for Hbase sink
 * Users need to implement extractKeyValue function to use this sink
 */
@Slf4j
public abstract class HbaseAbstractSink<T> implements Sink<T> {

    @Data(staticConstructor = "of")
    @Setter
    @Getter
    @EqualsAndHashCode
    @ToString
    public static class TableDefinition {
        private final String rowKeyName;
        private final String familyName;
        private final List<String> qualifierNames;
    }

    private HbaseSinkConfig hbaseSinkConfig;
    private Configuration configuration;
    private Connection connection;
    private Admin admin;
    private TableName tableName;
    private Table table;

    protected TableDefinition tableDefinition;

    // for flush
    private long batchTimeMs;
    private int batchSize;
    private List<Record<T>> incomingList;
    private ScheduledExecutorService flushExecutor;

    @Override
    public void open(Map<String, Object> config, SinkContext sinkContext) throws Exception {
        hbaseSinkConfig = HbaseSinkConfig.load(config);
        Preconditions.checkNotNull(hbaseSinkConfig.getZookeeperQuorum(), "zookeeperQuorum property not set.");
        Preconditions.checkNotNull(hbaseSinkConfig.getZookeeperClientPort(),
                "zookeeperClientPort property not set.");
        Preconditions.checkNotNull(hbaseSinkConfig.getZookeeperZnodeParent(),
                "zookeeperZnodeParent property not set.");
        Preconditions.checkNotNull(hbaseSinkConfig.getTableName(), "hbase tableName property not set.");

        getTable(hbaseSinkConfig);
        tableDefinition = getTableDefinition(hbaseSinkConfig);

        batchTimeMs = hbaseSinkConfig.getBatchTimeMs();
        batchSize = hbaseSinkConfig.getBatchSize();
        incomingList = Lists.newArrayList();
        flushExecutor = Executors.newScheduledThreadPool(1);
        flushExecutor.scheduleAtFixedRate(() -> flush(), batchTimeMs, batchTimeMs, TimeUnit.MILLISECONDS);
    }

    @Override
    public void close() throws Exception {
        if (null != admin) {
            admin.close();
        }

        if (null != connection) {
            connection.close();
        }

        if (null != flushExecutor) {
            flushExecutor.shutdown();
        }
    }

    @Override
    public void write(Record<T> record) throws Exception {
        int number;
        synchronized (this) {
            if (null != record) {
                incomingList.add(record);
            }
            number = incomingList.size();
        }

        if (number == batchSize) {
            flushExecutor.submit(() -> flush());
        }
    }

    private void flush() {
        List<Put> puts = new ArrayList<>();
        List<Record<T>> toFlushList;
        synchronized (this) {
            if (incomingList.isEmpty()) {
                return;
            }
            toFlushList = incomingList;
            incomingList = Lists.newArrayList();
        }

        if (CollectionUtils.isNotEmpty(toFlushList)) {
            for (Record<T> record : toFlushList) {
                try {
                    bindValue(record, puts);
                } catch (Exception e) {
                    record.fail();
                    toFlushList.remove(record);
                    log.warn("Record flush thread was exception ", e);
                }
            }
        }

        try {
            if (CollectionUtils.isNotEmpty(puts)) {
                table.put(puts);
                admin.flush(tableName);
            }

            toFlushList.forEach(tRecord -> tRecord.ack());
            puts.clear();
            toFlushList.clear();
        } catch (Exception e) {
            toFlushList.forEach(tRecord -> tRecord.fail());
            log.error("Hbase table put data exception ", e);
        }
    }

    // bind value with a Hbase put
    public abstract void bindValue(Record<T> message, List<Put> puts) throws Exception;

    private void getTable(HbaseSinkConfig hbaseSinkConfig) throws IOException {
        configuration = HBaseConfiguration.create();
        String hbaseConfigResources = hbaseSinkConfig.getHbaseConfigResources();
        if (StringUtils.isNotBlank(hbaseConfigResources)) {
            configuration.addResource(hbaseConfigResources);
        }

        configuration.set("hbase.zookeeper.quorum", hbaseSinkConfig.getZookeeperQuorum());
        configuration.set("hbase.zookeeper.property.clientPort", hbaseSinkConfig.getZookeeperClientPort());
        configuration.set("zookeeper.znode.parent", hbaseSinkConfig.getZookeeperZnodeParent());

        connection = ConnectionFactory.createConnection(configuration);
        admin = connection.getAdmin();
        tableName = TableName.valueOf(hbaseSinkConfig.getTableName());
        if (!admin.tableExists(this.tableName)) {
            throw new IllegalArgumentException(this.tableName + " table does not exist.");
        }

        table = connection.getTable(this.tableName);
    }

    /**
     * Get the {@link TableDefinition} for the given table.
     */
    private TableDefinition getTableDefinition(HbaseSinkConfig hbaseSinkConfig) throws Exception {
        Preconditions.checkNotNull(hbaseSinkConfig.getRowKeyName(), "rowKeyName property not set.");
        Preconditions.checkNotNull(hbaseSinkConfig.getFamilyName(), "familyName property not set.");
        Preconditions.checkNotNull(hbaseSinkConfig.getQualifierNames(), "qualifierNames property not set.");

        return TableDefinition.of(hbaseSinkConfig.getRowKeyName(), hbaseSinkConfig.getFamilyName(),
                hbaseSinkConfig.getQualifierNames());
    }
}