org.apache.nifi.hbase.HBase_1_1_2_ClientService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nifi.hbase.HBase_1_1_2_ClientService.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nifi.hbase;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.ParseFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnDisabled;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.controller.ControllerServiceInitializationContext;
import org.apache.nifi.hadoop.KerberosProperties;
import org.apache.nifi.hadoop.SecurityUtil;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.scan.ResultHandler;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.reporting.InitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;

@RequiresInstanceClassLoading
@Tags({ "hbase", "client" })
@CapabilityDescription("Implementation of HBaseClientService for HBase 1.1.2. This service can be configured by providing "
        + "a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files "
        + "are provided, they will be loaded first, and the values of the additional properties will override the values from "
        + "the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase "
        + "configuration.")
@DynamicProperty(name = "The name of an HBase configuration property.", value = "The value of the given HBase configuration property.", description = "These properties will be set on the HBase configuration after loading any provided configuration files.")
public class HBase_1_1_2_ClientService extends AbstractControllerService implements HBaseClientService {

    private static final Logger logger = LoggerFactory.getLogger(HBase_1_1_2_ClientService.class);

    static final String HBASE_CONF_ZK_QUORUM = "hbase.zookeeper.quorum";
    static final String HBASE_CONF_ZK_PORT = "hbase.zookeeper.property.clientPort";
    static final String HBASE_CONF_ZNODE_PARENT = "zookeeper.znode.parent";
    static final String HBASE_CONF_CLIENT_RETRIES = "hbase.client.retries.number";

    private volatile Connection connection;
    private volatile UserGroupInformation ugi;

    private List<PropertyDescriptor> properties;
    private KerberosProperties kerberosProperties;
    private volatile File kerberosConfigFile = null;

    // Holder of cached Configuration information so validation does not reload the same config over and over
    private final AtomicReference<ValidationResources> validationResourceHolder = new AtomicReference<>();

    protected Connection getConnection() {
        return connection;
    }

    @Override
    protected void init(ControllerServiceInitializationContext config) throws InitializationException {
        kerberosConfigFile = config.getKerberosConfigurationFile();
        kerberosProperties = getKerberosProperties(kerberosConfigFile);

        List<PropertyDescriptor> props = new ArrayList<>();
        props.add(HADOOP_CONF_FILES);
        props.add(kerberosProperties.getKerberosPrincipal());
        props.add(kerberosProperties.getKerberosKeytab());
        props.add(ZOOKEEPER_QUORUM);
        props.add(ZOOKEEPER_CLIENT_PORT);
        props.add(ZOOKEEPER_ZNODE_PARENT);
        props.add(HBASE_CLIENT_RETRIES);
        props.add(PHOENIX_CLIENT_JAR_LOCATION);
        props.addAll(getAdditionalProperties());
        this.properties = Collections.unmodifiableList(props);
    }

    protected List<PropertyDescriptor> getAdditionalProperties() {
        return new ArrayList<>();
    }

    protected KerberosProperties getKerberosProperties(File kerberosConfigFile) {
        return new KerberosProperties(kerberosConfigFile);
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return properties;
    }

    @Override
    protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) {
        return new PropertyDescriptor.Builder()
                .description("Specifies the value for '" + propertyDescriptorName + "' in the HBase configuration.")
                .name(propertyDescriptorName).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).dynamic(true)
                .build();
    }

    @Override
    protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
        boolean confFileProvided = validationContext.getProperty(HADOOP_CONF_FILES).isSet();
        boolean zkQuorumProvided = validationContext.getProperty(ZOOKEEPER_QUORUM).isSet();
        boolean zkPortProvided = validationContext.getProperty(ZOOKEEPER_CLIENT_PORT).isSet();
        boolean znodeParentProvided = validationContext.getProperty(ZOOKEEPER_ZNODE_PARENT).isSet();
        boolean retriesProvided = validationContext.getProperty(HBASE_CLIENT_RETRIES).isSet();

        final List<ValidationResult> problems = new ArrayList<>();

        if (!confFileProvided
                && (!zkQuorumProvided || !zkPortProvided || !znodeParentProvided || !retriesProvided)) {
            problems.add(new ValidationResult.Builder().valid(false).subject(this.getClass().getSimpleName())
                    .explanation(
                            "ZooKeeper Quorum, ZooKeeper Client Port, ZooKeeper ZNode Parent, and HBase Client Retries are required "
                                    + "when Hadoop Configuration Files are not provided.")
                    .build());
        }

        if (confFileProvided) {
            final String configFiles = validationContext.getProperty(HADOOP_CONF_FILES).getValue();
            ValidationResources resources = validationResourceHolder.get();

            // if no resources in the holder, or if the holder has different resources loaded,
            // then load the Configuration and set the new resources in the holder
            if (resources == null || !configFiles.equals(resources.getConfigResources())) {
                getLogger().debug("Reloading validation resources");
                resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles));
                validationResourceHolder.set(resources);
            }

            final Configuration hbaseConfig = resources.getConfiguration();
            final String principal = validationContext.getProperty(kerberosProperties.getKerberosPrincipal())
                    .evaluateAttributeExpressions().getValue();
            final String keytab = validationContext.getProperty(kerberosProperties.getKerberosKeytab())
                    .evaluateAttributeExpressions().getValue();

            problems.addAll(KerberosProperties.validatePrincipalAndKeytab(this.getClass().getSimpleName(),
                    hbaseConfig, principal, keytab, getLogger()));
        }

        return problems;
    }

    /**
     * As of Apache NiFi 1.5.0, due to changes made to
     * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this
     * class to authenticate a principal with Kerberos, HBase controller services no longer
     * attempt relogins explicitly.  For more information, please read the documentation for
     * {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
     * <p/>
     * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started
     * when the HBase controller service was enabled.  The use of a separate thread to explicitly relogin could cause
     * race conditions with the implicit relogin attempts made by hadoop/HBase code on a thread that references the same
     * {@link UserGroupInformation} instance.  One of these threads could leave the
     * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
     * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
     * authentication attempts that would leave the HBase controller service in an unrecoverable state.
     *
     * @see SecurityUtil#loginKerberos(Configuration, String, String)
     */
    @OnEnabled
    public void onEnabled(final ConfigurationContext context)
            throws InitializationException, IOException, InterruptedException {
        this.connection = createConnection(context);

        // connection check
        if (this.connection != null) {
            final Admin admin = this.connection.getAdmin();
            if (admin != null) {
                admin.listTableNames();
            }
        }
    }

    protected Connection createConnection(final ConfigurationContext context)
            throws IOException, InterruptedException {
        final String configFiles = context.getProperty(HADOOP_CONF_FILES).getValue();
        final Configuration hbaseConfig = getConfigurationFromFiles(configFiles);

        // override with any properties that are provided
        if (context.getProperty(ZOOKEEPER_QUORUM).isSet()) {
            hbaseConfig.set(HBASE_CONF_ZK_QUORUM, context.getProperty(ZOOKEEPER_QUORUM).getValue());
        }
        if (context.getProperty(ZOOKEEPER_CLIENT_PORT).isSet()) {
            hbaseConfig.set(HBASE_CONF_ZK_PORT, context.getProperty(ZOOKEEPER_CLIENT_PORT).getValue());
        }
        if (context.getProperty(ZOOKEEPER_ZNODE_PARENT).isSet()) {
            hbaseConfig.set(HBASE_CONF_ZNODE_PARENT, context.getProperty(ZOOKEEPER_ZNODE_PARENT).getValue());
        }
        if (context.getProperty(HBASE_CLIENT_RETRIES).isSet()) {
            hbaseConfig.set(HBASE_CONF_CLIENT_RETRIES, context.getProperty(HBASE_CLIENT_RETRIES).getValue());
        }

        // add any dynamic properties to the HBase configuration
        for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
            final PropertyDescriptor descriptor = entry.getKey();
            if (descriptor.isDynamic()) {
                hbaseConfig.set(descriptor.getName(), entry.getValue());
            }
        }

        if (SecurityUtil.isSecurityEnabled(hbaseConfig)) {
            final String principal = context.getProperty(kerberosProperties.getKerberosPrincipal())
                    .evaluateAttributeExpressions().getValue();
            final String keyTab = context.getProperty(kerberosProperties.getKerberosKeytab())
                    .evaluateAttributeExpressions().getValue();

            getLogger().info("HBase Security Enabled, logging in as principal {} with keytab {}",
                    new Object[] { principal, keyTab });
            ugi = SecurityUtil.loginKerberos(hbaseConfig, principal, keyTab);
            getLogger().info("Successfully logged in as principal {} with keytab {}",
                    new Object[] { principal, keyTab });

            return ugi.doAs(new PrivilegedExceptionAction<Connection>() {
                @Override
                public Connection run() throws Exception {
                    return ConnectionFactory.createConnection(hbaseConfig);
                }
            });

        } else {
            getLogger().info("Simple Authentication");
            return ConnectionFactory.createConnection(hbaseConfig);
        }

    }

    protected Configuration getConfigurationFromFiles(final String configFiles) {
        final Configuration hbaseConfig = HBaseConfiguration.create();
        if (StringUtils.isNotBlank(configFiles)) {
            for (final String configFile : configFiles.split(",")) {
                hbaseConfig.addResource(new Path(configFile.trim()));
            }
        }
        return hbaseConfig;
    }

    @OnDisabled
    public void shutdown() {
        if (connection != null) {
            try {
                connection.close();
            } catch (final IOException ioe) {
                getLogger().warn("Failed to close connection to HBase due to {}", new Object[] { ioe });
            }
        }
    }

    @Override
    public void put(final String tableName, final Collection<PutFlowFile> puts) throws IOException {
        try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
            // Create one Put per row....
            final Map<String, Put> rowPuts = new HashMap<>();
            for (final PutFlowFile putFlowFile : puts) {
                //this is used for the map key as a byte[] does not work as a key.
                final String rowKeyString = new String(putFlowFile.getRow(), StandardCharsets.UTF_8);
                Put put = rowPuts.get(rowKeyString);
                if (put == null) {
                    put = new Put(putFlowFile.getRow());
                    rowPuts.put(rowKeyString, put);
                }

                for (final PutColumn column : putFlowFile.getColumns()) {
                    if (column.getTimestamp() != null) {
                        put.addColumn(column.getColumnFamily(), column.getColumnQualifier(), column.getTimestamp(),
                                column.getBuffer());
                    } else {
                        put.addColumn(column.getColumnFamily(), column.getColumnQualifier(), column.getBuffer());
                    }
                }
            }

            table.put(new ArrayList<>(rowPuts.values()));
        }
    }

    @Override
    public void put(final String tableName, final byte[] rowId, final Collection<PutColumn> columns)
            throws IOException {
        try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
            Put put = new Put(rowId);
            for (final PutColumn column : columns) {
                put.addColumn(column.getColumnFamily(), column.getColumnQualifier(), column.getBuffer());
            }
            table.put(put);
        }
    }

    @Override
    public boolean checkAndPut(final String tableName, final byte[] rowId, final byte[] family,
            final byte[] qualifier, final byte[] value, final PutColumn column) throws IOException {
        try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
            Put put = new Put(rowId);
            put.addColumn(column.getColumnFamily(), column.getColumnQualifier(), column.getBuffer());
            return table.checkAndPut(rowId, family, qualifier, value, put);
        }
    }

    @Override
    public void delete(final String tableName, final byte[] rowId) throws IOException {
        try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
            Delete delete = new Delete(rowId);
            table.delete(delete);
        }
    }

    @Override
    public void scan(final String tableName, final Collection<Column> columns, final String filterExpression,
            final long minTime, final ResultHandler handler) throws IOException {

        Filter filter = null;
        if (!StringUtils.isBlank(filterExpression)) {
            ParseFilter parseFilter = new ParseFilter();
            filter = parseFilter.parseFilterString(filterExpression);
        }

        try (final Table table = connection.getTable(TableName.valueOf(tableName));
                final ResultScanner scanner = getResults(table, columns, filter, minTime)) {

            for (final Result result : scanner) {
                final byte[] rowKey = result.getRow();
                final Cell[] cells = result.rawCells();

                if (cells == null) {
                    continue;
                }

                // convert HBase cells to NiFi cells
                final ResultCell[] resultCells = new ResultCell[cells.length];
                for (int i = 0; i < cells.length; i++) {
                    final Cell cell = cells[i];
                    final ResultCell resultCell = getResultCell(cell);
                    resultCells[i] = resultCell;
                }

                // delegate to the handler
                handler.handle(rowKey, resultCells);
            }
        }
    }

    @Override
    public void scan(final String tableName, final byte[] startRow, final byte[] endRow,
            final Collection<Column> columns, final ResultHandler handler) throws IOException {

        try (final Table table = connection.getTable(TableName.valueOf(tableName));
                final ResultScanner scanner = getResults(table, startRow, endRow, columns)) {

            for (final Result result : scanner) {
                final byte[] rowKey = result.getRow();
                final Cell[] cells = result.rawCells();

                if (cells == null) {
                    continue;
                }

                // convert HBase cells to NiFi cells
                final ResultCell[] resultCells = new ResultCell[cells.length];
                for (int i = 0; i < cells.length; i++) {
                    final Cell cell = cells[i];
                    final ResultCell resultCell = getResultCell(cell);
                    resultCells[i] = resultCell;
                }

                // delegate to the handler
                handler.handle(rowKey, resultCells);
            }
        }
    }

    // protected and extracted into separate method for testing
    protected ResultScanner getResults(final Table table, final byte[] startRow, final byte[] endRow,
            final Collection<Column> columns) throws IOException {
        final Scan scan = new Scan();
        scan.setStartRow(startRow);
        scan.setStopRow(endRow);

        if (columns != null) {
            for (Column col : columns) {
                if (col.getQualifier() == null) {
                    scan.addFamily(col.getFamily());
                } else {
                    scan.addColumn(col.getFamily(), col.getQualifier());
                }
            }
        }

        return table.getScanner(scan);
    }

    // protected and extracted into separate method for testing
    protected ResultScanner getResults(final Table table, final Collection<Column> columns, final Filter filter,
            final long minTime) throws IOException {
        // Create a new scan. We will set the min timerange as the latest timestamp that
        // we have seen so far. The minimum timestamp is inclusive, so we will get duplicates.
        // We will record any cells that have the latest timestamp, so that when we scan again,
        // we know to throw away those duplicates.
        final Scan scan = new Scan();
        scan.setTimeRange(minTime, Long.MAX_VALUE);

        if (filter != null) {
            scan.setFilter(filter);
        }

        if (columns != null) {
            for (Column col : columns) {
                if (col.getQualifier() == null) {
                    scan.addFamily(col.getFamily());
                } else {
                    scan.addColumn(col.getFamily(), col.getQualifier());
                }
            }
        }

        return table.getScanner(scan);
    }

    private ResultCell getResultCell(Cell cell) {
        final ResultCell resultCell = new ResultCell();
        resultCell.setRowArray(cell.getRowArray());
        resultCell.setRowOffset(cell.getRowOffset());
        resultCell.setRowLength(cell.getRowLength());

        resultCell.setFamilyArray(cell.getFamilyArray());
        resultCell.setFamilyOffset(cell.getFamilyOffset());
        resultCell.setFamilyLength(cell.getFamilyLength());

        resultCell.setQualifierArray(cell.getQualifierArray());
        resultCell.setQualifierOffset(cell.getQualifierOffset());
        resultCell.setQualifierLength(cell.getQualifierLength());

        resultCell.setTimestamp(cell.getTimestamp());
        resultCell.setTypeByte(cell.getTypeByte());
        resultCell.setSequenceId(cell.getSequenceId());

        resultCell.setValueArray(cell.getValueArray());
        resultCell.setValueOffset(cell.getValueOffset());
        resultCell.setValueLength(cell.getValueLength());

        resultCell.setTagsArray(cell.getTagsArray());
        resultCell.setTagsOffset(cell.getTagsOffset());
        resultCell.setTagsLength(cell.getTagsLength());
        return resultCell;
    }

    static protected class ValidationResources {
        private final String configResources;
        private final Configuration configuration;

        public ValidationResources(String configResources, Configuration configuration) {
            this.configResources = configResources;
            this.configuration = configuration;
        }

        public String getConfigResources() {
            return configResources;
        }

        public Configuration getConfiguration() {
            return configuration;
        }
    }

    @Override
    public byte[] toBytes(boolean b) {
        return Bytes.toBytes(b);
    }

    @Override
    public byte[] toBytes(float f) {
        return Bytes.toBytes(f);
    }

    @Override
    public byte[] toBytes(int i) {
        return Bytes.toBytes(i);
    }

    @Override
    public byte[] toBytes(long l) {
        return Bytes.toBytes(l);
    }

    @Override
    public byte[] toBytes(double d) {
        return Bytes.toBytes(d);
    }

    @Override
    public byte[] toBytes(String s) {
        return Bytes.toBytes(s);
    }

    @Override
    public byte[] toBytesBinary(String s) {
        return Bytes.toBytesBinary(s);
    }

    @Override
    public String toTransitUri(String tableName, String rowKey) {
        if (connection == null) {
            logger.warn("Connection has not been established, could not create a transit URI. Returning null.");
            return null;
        }
        try {
            final String masterAddress = connection.getAdmin().getClusterStatus().getMaster().getHostAndPort();
            return "hbase://" + masterAddress + "/" + tableName
                    + (rowKey != null && !rowKey.isEmpty() ? "/" + rowKey : "");
        } catch (IOException e) {
            throw new RuntimeException("Failed to get HBase Admin interface, due to " + e, e);
        }
    }
}