hrider.hbase.Connection.java Source code

Java tutorial

Introduction

Here is the source code for hrider.hbase.Connection.java

Source

package hrider.hbase;

import hrider.config.ConnectionDetails;
import hrider.config.GlobalConfig;
import hrider.data.ColumnFamily;
import hrider.data.DataCell;
import hrider.data.DataRow;
import hrider.data.TableDescriptor;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.StoreFileScanner;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.*;

/**
 * Copyright (C) 2012 NICE Systems ltd.
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * @author Igor Cher
 * @version %I%, %G%
 *          <p/>
 *          This class represents a data access to the hbase tables.
 */
public class Connection {

    //region Variables
    /**
     * The name of the muster server where the hbase is located.
     */
    private String serverName;
    /**
     * A reference to the hbase administration class.
     */
    private HBaseAdmin hbaseAdmin;
    /**
     * A reference to the tables factory class.
     */
    private TableFactory factory;
    /**
     * A configuration used to connect to hbase.
     */
    private ConnectionDetails connectionDetails;
    /**
     * A list of listeners.
     */
    private List<HbaseActionListener> listeners;
    //endregion

    //region Constructor

    /**
     * Initializes a new instance of the {@link Connection} class.
     *
     * @param connectionDetails A configuration to be used to connect to the hbase administration.
     * @throws IOException Error connecting to hbase.
     */
    public Connection(ConnectionDetails connectionDetails) throws IOException {
        this.connectionDetails = connectionDetails;
        this.serverName = connectionDetails.getZookeeper().getHost();
        this.listeners = new ArrayList<HbaseActionListener>();

        try {
            Configuration config = connectionDetails.createConfig();

            this.factory = new TableFactory(config);
            this.hbaseAdmin = new HBaseAdmin(config);
        } catch (Exception e) {
            throw new IOException("Failed to access hbase administration.", e);
        }
    }
    //endregion

    //region Public Properties

    /**
     * Gets a reference to the {@link TableFactory} instance used by connection.
     *
     * @return A reference to the {@link TableFactory} instance.
     */
    public TableFactory getTableFactory() {
        return this.factory;
    }

    /**
     * Gets a reference to the {@link Configuration} instance used by connection.
     *
     * @return A reference to the {@link Configuration} instance.
     */
    public Configuration getConfiguration() {
        return this.hbaseAdmin.getConfiguration();
    }

    /**
     * Gets a configuration used to connect to hbase.
     *
     * @return A reference to the {@link ConnectionDetails} class.
     */
    public ConnectionDetails getConnectionDetails() {
        return this.connectionDetails;
    }

    /**
     * Gets the name of the hbase muster server.
     *
     * @return The server name.
     */
    public String getServerName() {
        return this.serverName;
    }
    //endregion

    //region Public Methods

    /**
     * Gets a descriptor for the specified table.
     *
     * @param tableName The name of the table.
     * @return A new instance of the {@link TableDescriptor}.
     * @throws IOException            Error accessing hbase.
     * @throws TableNotFoundException The specified table does not exist.
     */
    public TableDescriptor getTableDescriptor(String tableName) throws IOException, TableNotFoundException {
        return new TableDescriptor(this.hbaseAdmin.getTableDescriptor(Bytes.toBytes(tableName)));
    }

    /**
     * Adds a listener for hbase related operations.
     *
     * @param listener A listener to add.
     */
    public void addListener(HbaseActionListener listener) {
        this.listeners.add(listener);
    }

    /**
     * Removes the listener.
     *
     * @param listener A listener to remove.
     */
    public void removeListener(HbaseActionListener listener) {
        this.listeners.remove(listener);
    }

    /**
     * Resets the caching.
     */
    public void reset() {
        this.factory.clear();
    }

    /**
     * Checks whether the table with the specified name exists on the hbase cluster.
     *
     * @param tableName The name of the table to check.
     * @return True if the table exists on the hbase cluster or False otherwise.
     * @throws IOException Error accessing hbase.
     */
    @SuppressWarnings("BooleanMethodNameMustStartWithQuestion")
    public boolean tableExists(String tableName) throws IOException {
        return tableName != null && this.hbaseAdmin.tableExists(tableName);
    }

    /**
     * Checks whether the specified table is enabled.
     *
     * @param tableName The name of the table to check.
     * @return True if the table is enabled or False otherwise.
     * @throws IOException Error accessing hbase.
     */
    public boolean tableEnabled(String tableName) throws IOException {
        return TableUtil.isMetaTable(tableName) || tableName != null && this.hbaseAdmin.isTableEnabled(tableName);
    }

    /**
     * Creates a new table or modifies an existing one in the hbase cluster.
     *
     * @param tableName The name of the table to create.
     * @throws IOException Error accessing hbase.
     */
    public void createOrModifyTable(String tableName) throws IOException, TableNotFoundException {
        createOrModifyTable(new TableDescriptor(tableName));
    }

    /**
     * Creates a new table or modifies an existing one in the hbase cluster.
     *
     * @param tableDescriptor The descriptor of the table to create.
     * @throws IOException Error accessing hbase.
     */
    public void createOrModifyTable(TableDescriptor tableDescriptor) throws IOException, TableNotFoundException {
        if (this.hbaseAdmin.tableExists(tableDescriptor.getName())) {
            if (this.hbaseAdmin.isTableEnabled(tableDescriptor.getName())) {
                this.hbaseAdmin.disableTable(tableDescriptor.getName());
            }

            this.hbaseAdmin.modifyTable(Bytes.toBytes(tableDescriptor.getName()), tableDescriptor.toDescriptor());
            this.hbaseAdmin.enableTable(tableDescriptor.getName());

            for (HbaseActionListener listener : this.listeners) {
                listener.tableOperation(tableDescriptor.getName(), "modified");
            }
        } else {
            this.hbaseAdmin.createTable(tableDescriptor.toDescriptor());

            for (HbaseActionListener listener : this.listeners) {
                listener.tableOperation(tableDescriptor.getName(), "created");
            }
        }
    }

    /**
     * Deletes a table from the hbase cluster.
     *
     * @param tableName The name of the table to delete.
     * @throws IOException Error accessing hbase.
     */
    public void deleteTable(String tableName) throws IOException {
        if (this.hbaseAdmin.tableExists(tableName)) {
            if (this.hbaseAdmin.isTableEnabled(tableName)) {
                this.hbaseAdmin.disableTable(tableName);
            }
            this.hbaseAdmin.deleteTable(tableName);

            for (HbaseActionListener listener : this.listeners) {
                listener.tableOperation(tableName, "deleted");
            }
        }
    }

    /**
     * Truncates a table. All the data will be removed.
     *
     * @param tableName The name of the table to be truncated.
     * @throws IOException Error accessing hbase.
     */
    public void truncateTable(String tableName) throws IOException, TableNotFoundException {
        HTableDescriptor td = this.hbaseAdmin.getTableDescriptor(Bytes.toBytes(tableName));

        // Delete your table
        if (this.hbaseAdmin.isTableEnabled(tableName)) {
            this.hbaseAdmin.disableTable(tableName);
        }
        this.hbaseAdmin.deleteTable(tableName);

        // Recreate your table
        this.hbaseAdmin.createTable(td);

        for (HbaseActionListener listener : this.listeners) {
            listener.tableOperation(tableName, "truncated");
        }
    }

    /**
     * Copies all the data from one table to another. The tables can be on different clusters.
     *
     * @param targetTable   The name of the target table.
     * @param sourceTable   The name of the source table.
     * @param sourceCluster The source cluster where the source table is located.
     * @throws IOException Error accessing hbase on one of the clusters or on both clusters.
     */
    public void copyTable(TableDescriptor targetTable, TableDescriptor sourceTable, Connection sourceCluster)
            throws IOException, TableNotFoundException {
        createOrModifyTable(targetTable);

        HTable source = sourceCluster.factory.get(sourceTable.getName());
        HTable target = this.factory.get(targetTable.getName());

        Scan scan = new Scan();
        scan.setCaching(GlobalConfig.instance().getBatchSizeForRead());

        ResultScanner scanner = source.getScanner(scan);
        try {
            List<Put> puts = new ArrayList<Put>();

            int batchSize = GlobalConfig.instance().getBatchSizeForWrite();

            boolean isValid;

            do {
                Result result = scanner.next();

                isValid = result != null;
                if (isValid) {
                    Put put = new Put(result.getRow());
                    for (KeyValue kv : result.list()) {
                        put.add(kv);
                    }

                    puts.add(put);

                    if (puts.size() == batchSize) {
                        target.put(puts);
                        puts.clear();
                    }

                    for (HbaseActionListener listener : this.listeners) {
                        listener.copyOperation(sourceCluster.serverName, sourceTable.getName(), this.serverName,
                                targetTable.getName(), result);
                    }
                }
            } while (isValid);

            // add the last puts to the table.
            if (!puts.isEmpty()) {
                target.put(puts);
            }
        } finally {
            scanner.close();
        }
    }

    /**
     * Saves a table locally to an HFile.
     *
     * @param tableName The name of the table.
     * @param path      The path tot he file.
     * @throws IOException Error accessing hbase.
     */
    public void saveTable(String tableName, String path) throws IOException {
        FileSystem fs = FileSystem.getLocal(this.getConfiguration());
        HTable table = this.factory.get(tableName);

        Configuration cacheConfig = new Configuration(this.getConfiguration());
        cacheConfig.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);

        StoreFile.Writer writer = new StoreFile.WriterBuilder(this.getConfiguration(), new CacheConfig(cacheConfig),
                fs, HColumnDescriptor.DEFAULT_BLOCKSIZE).withFilePath(new Path(path)).build();

        ResultScanner scanner = null;

        try {
            Scan scan = new Scan();
            scan.setCaching(GlobalConfig.instance().getBatchSizeForRead());

            scanner = table.getScanner(scan);

            boolean isValid;
            do {
                Result result = scanner.next();

                isValid = result != null;
                if (isValid) {
                    for (KeyValue keyValue : result.list()) {
                        writer.append(keyValue);
                    }

                    for (HbaseActionListener listener : this.listeners) {
                        listener.saveOperation(tableName, path, result);
                    }
                }
            } while (isValid);
        } finally {
            if (scanner != null) {
                scanner.close();
            }

            writer.close();
        }
    }

    /**
     * Flushes a in memory portion of the table into the HFile.
     *
     * @param tableName The name of the table to flush.
     * @throws IOException          Error accessing hbase.
     * @throws InterruptedException
     */
    public void flushTable(String tableName) throws IOException, InterruptedException {
        this.hbaseAdmin.flush(tableName);
    }

    /**
     * Enables the table.
     *
     * @param tableName The name of the table to enable.
     * @throws IOException Error accessing hbase.
     */
    public void enableTable(String tableName) throws IOException {
        this.hbaseAdmin.enableTable(tableName);
    }

    /**
     * Loads a locally saved HFile to an existing table.
     *
     * @param tableName The name of the table to load to.
     * @param path      The path to the HFile.
     * @throws IOException Error accessing hbase.
     */
    public void loadTable(String tableName, String path) throws IOException, TableNotFoundException {
        FileSystem fs = FileSystem.getLocal(this.getConfiguration());
        HTable table = this.factory.get(tableName);

        HTableDescriptor td = this.hbaseAdmin.getTableDescriptor(Bytes.toBytes(tableName));

        Collection<ColumnFamily> families = new HashSet<ColumnFamily>();
        for (HColumnDescriptor column : td.getColumnFamilies()) {
            families.add(new ColumnFamily(column));
        }

        StoreFile.Reader reader = new StoreFile.Reader(fs, new Path(path),
                new CacheConfig(this.getConfiguration()));

        try {
            StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
            //SchemaMetrics.configureGlobally(this.getConfiguration());

            // move to the first row.
            scanner.seek(new KeyValue(new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }));

            Collection<ColumnFamily> familiesToCreate = new HashSet<ColumnFamily>();

            Put put = null;
            List<Put> puts = new ArrayList<Put>();

            boolean isValid;
            int batchSize = GlobalConfig.instance().getBatchSizeForWrite();

            do {
                KeyValue kv = scanner.next();

                isValid = kv != null;
                if (isValid) {
                    ColumnFamily columnFamily = new ColumnFamily(Bytes.toStringBinary(kv.getFamily()));
                    if (!families.contains(columnFamily)) {
                        familiesToCreate.add(columnFamily);
                    }

                    if (put == null) {
                        put = new Put(kv.getRow());
                        puts.add(put);
                    }

                    if (!Arrays.equals(put.getRow(), kv.getRow())) {
                        for (HbaseActionListener listener : this.listeners) {
                            listener.loadOperation(tableName, path, put);
                        }

                        if (puts.size() == batchSize) {
                            if (!familiesToCreate.isEmpty()) {
                                createFamilies(tableName, toDescriptors(familiesToCreate));

                                families.addAll(familiesToCreate);
                                familiesToCreate.clear();
                            }

                            HTableUtil.bucketRsPut(table, puts);
                            puts.clear();
                        }

                        put = new Put(kv.getRow());
                        puts.add(put);
                    }

                    put.add(kv);
                }
            } while (isValid);

            // add the last put to the table.
            if (!puts.isEmpty()) {
                for (HbaseActionListener listener : this.listeners) {
                    listener.loadOperation(tableName, path, put);
                }

                if (!familiesToCreate.isEmpty()) {
                    createFamilies(tableName, toDescriptors(familiesToCreate));
                }

                HTableUtil.bucketRsPut(table, puts);
            }
        } finally {
            reader.close(false);
        }
    }

    /**
     * Sets or adds a rows to the table.
     *
     * @param tableName The name of the table to update.
     * @param rows      A list of rows to set/add.
     * @throws IOException Error accessing hbase.
     */
    public void setRows(String tableName, Iterable<DataRow> rows) throws IOException, TableNotFoundException {
        HTableDescriptor td = this.hbaseAdmin.getTableDescriptor(Bytes.toBytes(tableName));

        Collection<ColumnFamily> families = new ArrayList<ColumnFamily>();
        for (HColumnDescriptor column : td.getColumnFamilies()) {
            families.add(new ColumnFamily(column));
        }

        Collection<ColumnFamily> familiesToCreate = new HashSet<ColumnFamily>();
        List<Put> puts = new ArrayList<Put>();

        for (DataRow row : rows) {
            Put put = new Put(row.getKey().getValue());

            for (DataCell cell : row.getCells()) {
                if (!cell.isKey()) {
                    if (!families.contains(cell.getColumn().getColumnFamily())) {
                        familiesToCreate.add(cell.getColumn().getColumnFamily());
                    }

                    byte[] family = Bytes.toBytesBinary(cell.getColumn().getFamily());
                    byte[] column = Bytes.toBytesBinary(cell.getColumn().getName());
                    byte[] value = cell.getValueAsByteArray();

                    put.add(family, column, value);
                }
            }

            puts.add(put);

            for (HbaseActionListener listener : this.listeners) {
                listener.rowOperation(tableName, row, "added");
            }
        }

        if (!familiesToCreate.isEmpty()) {
            createFamilies(tableName, toDescriptors(familiesToCreate));
        }

        HTable table = this.factory.get(tableName);
        HTableUtil.bucketRsPut(table, puts);
    }

    /**
     * Sets or adds a row to the table.
     *
     * @param tableName The name of the table to update.
     * @param row       The row to set/add.
     * @throws IOException Error accessing hbase.
     */
    public void setRow(String tableName, DataRow row) throws IOException, TableNotFoundException {
        HTableDescriptor td = this.hbaseAdmin.getTableDescriptor(Bytes.toBytes(tableName));

        Collection<ColumnFamily> families = new ArrayList<ColumnFamily>();
        for (HColumnDescriptor column : td.getColumnFamilies()) {
            families.add(new ColumnFamily(column));
        }

        Collection<ColumnFamily> familiesToCreate = new HashSet<ColumnFamily>();

        Put put = new Put(row.getKey().getValue());
        for (DataCell cell : row.getCells()) {
            if (!cell.isKey()) {
                if (!families.contains(cell.getColumn().getColumnFamily())) {
                    familiesToCreate.add(cell.getColumn().getColumnFamily());
                }

                byte[] family = Bytes.toBytesBinary(cell.getColumn().getFamily());
                byte[] column = Bytes.toBytesBinary(cell.getColumn().getName());
                byte[] value = cell.getValueAsByteArray();

                put.add(family, column, value);
            }
        }

        if (!familiesToCreate.isEmpty()) {
            createFamilies(tableName, toDescriptors(familiesToCreate));
        }

        HTable table = this.factory.get(tableName);
        table.put(put);

        for (HbaseActionListener listener : this.listeners) {
            listener.rowOperation(tableName, row, "added");
        }
    }

    /**
     * Deletes a row from the table.
     *
     * @param tableName The name of the table.
     * @param row       The row to be deleted.
     * @throws IOException Error accessing hbase.
     */
    public void deleteRow(String tableName, DataRow row) throws IOException {
        HTable table = this.factory.get(tableName);
        table.delete(new Delete(row.getKey().getValue()));

        for (HbaseActionListener listener : this.listeners) {
            listener.rowOperation(tableName, row, "removed");
        }
    }

    /**
     * Gets a list of all table names in the hbase cluster.
     *
     * @return A list of table names.
     * @throws IOException Error accessing hbase.
     */
    public Collection<String> getTables() throws IOException {
        Collection<String> tables = new ArrayList<String>();
        for (HTableDescriptor tableDescriptor : this.hbaseAdmin.listTables()) {
            tables.add(tableDescriptor.getNameAsString());
        }
        return tables;
    }

    /**
     * Gets all column families of the specified table.
     *
     * @param tableName The name of the table which column families should be retrieved.
     * @return A list of column family names.
     * @throws IOException Error accessing hbase.
     */
    public Collection<ColumnFamily> getColumnFamilies(String tableName) throws IOException, TableNotFoundException {
        Collection<ColumnFamily> columnFamilies = new ArrayList<ColumnFamily>();

        HTableDescriptor td = this.hbaseAdmin.getTableDescriptor(Bytes.toBytes(tableName));
        for (HColumnDescriptor column : td.getColumnFamilies()) {
            columnFamilies.add(new ColumnFamily(column));
        }

        return columnFamilies;
    }

    /**
     * Gets a scanner for the specified table.
     *
     * @param tableName The name of the table to be scanned.
     * @return An instance of the scanner.
     */
    public Scanner getScanner(String tableName) {
        return new Scanner(this, tableName);
    }

    /**
     * Gets a query scanner for the specified table.
     *
     * @param tableName The name of the table to be scanned.
     * @param query     A query to be used with the scanner.
     * @return An instance of the query scanner.
     */
    public QueryScanner getScanner(String tableName, Query query) {
        return new QueryScanner(this, tableName, query);
    }
    //endregion

    //region Private Methods

    /**
     * Adds column families to the specified table.
     *
     * @param tableName The name of the table to add column families.
     * @param families  A list of column families to add.
     * @throws IOException Error accessing hbase.
     */
    private void createFamilies(String tableName, Iterable<HColumnDescriptor> families) throws IOException {
        if (this.hbaseAdmin.isTableEnabled(tableName)) {
            this.hbaseAdmin.disableTable(tableName);
        }

        for (HColumnDescriptor family : families) {
            this.hbaseAdmin.addColumn(tableName, family);

            for (HbaseActionListener listener : this.listeners) {
                listener.columnOperation(tableName, family.getNameAsString(), "added");
            }
        }

        this.hbaseAdmin.enableTable(tableName);
    }

    /**
     * Converts column family to column descriptor.
     *
     * @param families A list of column families to converters.
     * @return A list of column descriptors.
     */
    private static Iterable<HColumnDescriptor> toDescriptors(Iterable<ColumnFamily> families) {
        Collection<HColumnDescriptor> descriptors = new ArrayList<HColumnDescriptor>();
        for (ColumnFamily family : families) {
            descriptors.add(family.toDescriptor());
        }
        return descriptors;
    }
    //endregion
}