org.trafodion.sql.HBaseAccess.HBulkLoadClient.java Source code

Java tutorial

Introduction

Here is the source code for org.trafodion.sql.HBaseAccess.HBulkLoadClient.java

Source

// @@@ START COPYRIGHT @@@
//
// (C) Copyright 2014-2015 Hewlett-Packard Development Company, L.P.
//
//  Licensed under the Apache License, Version 2.0 (the "License");
//  you may not use this file except in compliance with the License.
//  You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
//  Unless required by applicable law or agreed to in writing, software
//  distributed under the License is distributed on an "AS IS" BASIS,
//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//  See the License for the specific language governing permissions and
//  limitations under the License.
//
// @@@ END COPYRIGHT @@@

package org.trafodion.sql.HBaseAccess;

import java.net.URI;
import java.net.URISyntaxException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Iterator;
import java.io.File;

import org.apache.commons.io.FileUtils;
import org.apache.log4j.PropertyConfigurator;
import org.apache.log4j.Logger;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.security.access.AccessController;
import org.apache.hadoop.hbase.security.access.UserPermission;
import org.apache.hadoop.hbase.security.access.Permission;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type;
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.io.compress.*;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.trafodion.sql.HBaseAccess.HTableClient;
//import org.trafodion.sql.HBaseAccess.HBaseClient;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.hbase.TableName;

import java.nio.ByteBuffer;
import java.nio.file.Files;

import org.apache.hive.jdbc.HiveDriver;
import java.sql.Connection;
import java.sql.Statement;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.lang.ClassNotFoundException;

public class HBulkLoadClient {

    private final static FsPermission PERM_ALL_ACCESS = FsPermission.valueOf("-rwxrwxrwx");
    private final static FsPermission PERM_HIDDEN = FsPermission.valueOf("-rwx--x--x");
    private final static String BULKLOAD_STAGING_DIR = "hbase.bulkload.staging.dir";
    private final static long MAX_HFILE_SIZE = 10737418240L; //10 GB

    public static int BLOCKSIZE = 64 * 1024;
    public static String COMPRESSION = Compression.Algorithm.NONE.getName();
    String lastError;
    static Logger logger = Logger.getLogger(HBulkLoadClient.class.getName());
    Configuration config;
    HFile.Writer writer;
    String hFileLocation;
    String hFileName;
    long maxHFileSize = MAX_HFILE_SIZE;
    FileSystem fileSys = null;
    String compression = COMPRESSION;
    int blockSize = BLOCKSIZE;
    DataBlockEncoding dataBlockEncoding = DataBlockEncoding.NONE;
    FSDataOutputStream fsOut = null;

    public HBulkLoadClient() {
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.HBulkLoadClient() called.");
    }

    public HBulkLoadClient(Configuration conf) throws IOException {
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.HBulkLoadClient(...) called.");
        config = conf;
    }

    public String getLastError() {
        return lastError;
    }

    void setLastError(String err) {
        lastError = err;
    }

    public boolean initHFileParams(String hFileLoc, String hFileNm, long userMaxSize /*in MBs*/, String tblName,
            String sampleTblName, String sampleTblDDL)
            throws UnsupportedOperationException, IOException, SQLException, ClassNotFoundException {
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.initHFileParams() called.");

        hFileLocation = hFileLoc;
        hFileName = hFileNm;

        HTable myHTable = new HTable(config, tblName);
        HTableDescriptor hTbaledesc = myHTable.getTableDescriptor();
        HColumnDescriptor[] hColDescs = hTbaledesc.getColumnFamilies();
        if (hColDescs.length > 2) //2 column family , 1 for user data, 1 for transaction metadata
        {
            myHTable.close();
            throw new UnsupportedOperationException("only two families are supported.");
        }

        compression = hColDescs[0].getCompression().getName();
        blockSize = hColDescs[0].getBlocksize();
        dataBlockEncoding = hColDescs[0].getDataBlockEncoding();

        if (userMaxSize == 0) {
            if (hTbaledesc.getMaxFileSize() == -1) {
                maxHFileSize = MAX_HFILE_SIZE;
            } else {
                maxHFileSize = hTbaledesc.getMaxFileSize();
            }
        } else
            maxHFileSize = userMaxSize * 1024 * 1024; //maxSize is in MBs

        myHTable.close();

        if (sampleTblDDL.length() > 0) {
            Class.forName("org.apache.hive.jdbc.HiveDriver");
            Connection conn = DriverManager.getConnection("jdbc:hive2://", "hive", "");
            Statement stmt = conn.createStatement();
            stmt.execute("drop table if exists " + sampleTblName);
            //System.out.println("*** DDL for Hive sample table is: " + sampleTblDDL);
            stmt.execute(sampleTblDDL);
        }

        return true;
    }

    public boolean doCreateHFile() throws IOException, URISyntaxException {
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.doCreateHFile() called.");

        if (hFileLocation == null)
            throw new NullPointerException(hFileLocation + " is not set");
        if (hFileName == null)
            throw new NullPointerException(hFileName + " is not set");

        closeHFile();

        if (fileSys == null)
            fileSys = FileSystem.get(config);

        Path hfilePath = new Path(new Path(hFileLocation), hFileName + "_" + System.currentTimeMillis());
        hfilePath = hfilePath.makeQualified(hfilePath.toUri(), null);

        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.createHFile Path: " + hfilePath);

        try {
            HFileContext hfileContext = new HFileContextBuilder().withBlockSize(blockSize)
                    .withCompression(Compression.getCompressionAlgorithmByName(compression))
                    .withDataBlockEncoding(dataBlockEncoding).build();

            writer = HFile.getWriterFactory(config, new CacheConfig(config)).withPath(fileSys, hfilePath)
                    .withFileContext(hfileContext).withComparator(KeyValue.COMPARATOR).create();
            if (logger.isDebugEnabled())
                logger.debug("HBulkLoadClient.createHFile Path: " + writer.getPath() + "Created");
        } catch (IOException e) {
            if (logger.isDebugEnabled())
                logger.debug("HBulkLoadClient.doCreateHFile Exception" + e.getMessage());
            throw e;
        }
        return true;
    }

    public boolean isNewFileNeeded() throws IOException {
        if (writer == null)
            return true;

        if (fileSys == null)
            fileSys = FileSystem.get(writer.getPath().toUri(), config);

        if (fileSys.getFileStatus(writer.getPath()).getLen() > maxHFileSize)
            return true;

        return false;
    }

    public boolean addToHFile(short rowIDLen, Object rowIDs, Object rows) throws IOException, URISyntaxException {
        if (logger.isDebugEnabled())
            logger.debug("Enter addToHFile() ");
        Put put;
        if (isNewFileNeeded()) {
            doCreateHFile();
        }
        ByteBuffer bbRows, bbRowIDs;
        short numCols, numRows;
        short colNameLen;
        int colValueLen;
        byte[] colName, colValue, rowID;

        bbRowIDs = (ByteBuffer) rowIDs;
        bbRows = (ByteBuffer) rows;
        numRows = bbRowIDs.getShort();
        HTableClient htc = new HTableClient();
        long now = System.currentTimeMillis();
        for (short rowNum = 0; rowNum < numRows; rowNum++) {
            rowID = new byte[rowIDLen];
            bbRowIDs.get(rowID, 0, rowIDLen);
            numCols = bbRows.getShort();
            for (short colIndex = 0; colIndex < numCols; colIndex++) {
                colNameLen = bbRows.getShort();
                colName = new byte[colNameLen];
                bbRows.get(colName, 0, colNameLen);
                colValueLen = bbRows.getInt();
                colValue = new byte[colValueLen];
                bbRows.get(colValue, 0, colValueLen);
                KeyValue kv = new KeyValue(rowID, htc.getFamily(colName), htc.getName(colName), now, colValue);
                writer.append(kv);
            }
        }
        if (logger.isDebugEnabled())
            logger.debug("End addToHFile() ");
        return true;
    }

    public boolean closeHFile() throws IOException {
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.closeHFile() called." + ((writer == null) ? "NULL" : "NOT NULL"));

        if (writer == null)
            return false;

        writer.close();
        return true;
    }

    private boolean createSnapshot(String tableName, String snapshotName)
            throws MasterNotRunningException, IOException, SnapshotCreationException, InterruptedException {
        HBaseAdmin admin = null;
        try {
            admin = new HBaseAdmin(config);
            List<SnapshotDescription> lstSnaps = admin.listSnapshots();
            if (!lstSnaps.isEmpty()) {
                for (SnapshotDescription snpd : lstSnaps) {
                    if (snpd.getName().compareTo(snapshotName) == 0) {
                        if (logger.isDebugEnabled())
                            logger.debug("HbulkLoadClient.createSnapshot() -- deleting: " + snapshotName + " : "
                                    + snpd.getName());
                        admin.deleteSnapshot(snapshotName);
                    }
                }
            }
            admin.snapshot(snapshotName, tableName);
        } catch (Exception e) {
            //log exeception and throw the exception again to teh parent
            if (logger.isDebugEnabled())
                logger.debug("HbulkLoadClient.createSnapshot() - Exception: " + e);
            throw e;
        } finally {
            //close HBaseAdmin instance 
            if (admin != null)
                admin.close();
        }
        return true;
    }

    private boolean restoreSnapshot(String snapshotName, String tableName)
            throws IOException, RestoreSnapshotException {
        HBaseAdmin admin = null;
        try {
            admin = new HBaseAdmin(config);
            if (!admin.isTableDisabled(tableName))
                admin.disableTable(tableName);

            admin.restoreSnapshot(snapshotName);

            admin.enableTable(tableName);
        } catch (Exception e) {
            //log exeception and throw the exception again to the parent
            if (logger.isDebugEnabled())
                logger.debug("HbulkLoadClient.restoreSnapshot() - Exception: " + e);
            throw e;
        } finally {
            //close HBaseAdmin instance 
            if (admin != null)
                admin.close();
        }

        return true;
    }

    private boolean deleteSnapshot(String snapshotName, String tableName) throws IOException {

        HBaseAdmin admin = null;
        boolean snapshotExists = false;
        try {
            admin = new HBaseAdmin(config);
            List<SnapshotDescription> lstSnaps = admin.listSnapshots();
            if (!lstSnaps.isEmpty()) {
                for (SnapshotDescription snpd : lstSnaps) {
                    //System.out.println("here 1: " + snapshotName + snpd.getName());
                    if (snpd.getName().compareTo(snapshotName) == 0) {
                        //System.out.println("deleting: " + snapshotName + " : " + snpd.getName());
                        snapshotExists = true;
                        break;
                    }
                }
            }
            if (!snapshotExists)
                return true;
            if (admin.isTableDisabled(tableName))
                admin.enableTable(tableName);
            admin.deleteSnapshot(snapshotName);
        } catch (Exception e) {
            //log exeception and throw the exception again to the parent
            if (logger.isDebugEnabled())
                logger.debug("HbulkLoadClient.restoreSnapshot() - Exception: " + e);
            throw e;
        } finally {
            //close HBaseAdmin instance 
            if (admin != null)
                admin.close();
        }
        return true;
    }

    private void doSnapshotNBulkLoad(Path hFilePath, String tableName, HTable table, LoadIncrementalHFiles loader,
            boolean snapshot) throws MasterNotRunningException, IOException, SnapshotCreationException,
            InterruptedException, RestoreSnapshotException {
        HBaseAdmin admin = new HBaseAdmin(config);
        String snapshotName = null;
        if (snapshot) {
            snapshotName = tableName + "_SNAPSHOT";
            createSnapshot(tableName, snapshotName);
            if (logger.isDebugEnabled())
                logger.debug("HbulkLoadClient.doSnapshotNBulkLoad() - snapshot created: " + snapshotName);
        }
        try {
            if (logger.isDebugEnabled())
                logger.debug("HbulkLoadClient.doSnapshotNBulkLoad() - bulk load started ");
            loader.doBulkLoad(hFilePath, table);
            if (logger.isDebugEnabled())
                logger.debug("HbulkLoadClient.doSnapshotNBulkLoad() - bulk load is done ");
        } catch (IOException e) {
            if (logger.isDebugEnabled())
                logger.debug("HbulkLoadClient.doSnapshotNBulkLoad() - Exception: " + e.toString());
            if (snapshot) {
                restoreSnapshot(snapshotName, tableName);
                if (logger.isDebugEnabled())
                    logger.debug("HbulkLoadClient.doSnapshotNBulkLoad() - snapshot restored: " + snapshotName);
                deleteSnapshot(snapshotName, tableName);
                if (logger.isDebugEnabled())
                    logger.debug("HbulkLoadClient.doSnapshotNBulkLoad() - snapshot deleted: " + snapshotName);
                throw e;
            }
        } finally {
            if (snapshot) {
                deleteSnapshot(snapshotName, tableName);
                if (logger.isDebugEnabled())
                    logger.debug("HbulkLoadClient.doSnapshotNBulkLoad() - snapshot deleted: " + snapshotName);
            }
        }

    }

    public boolean doBulkLoad(String prepLocation, String tableName, boolean quasiSecure, boolean snapshot)
            throws Exception {
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.doBulkLoad() - start");
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.doBulkLoad() - Prep Location: " + prepLocation + ", Table Name:"
                    + tableName + ", quasisecure : " + quasiSecure + ", snapshot: " + snapshot);

        HTable table = new HTable(config, tableName);
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
        Path prepPath = new Path(prepLocation);
        prepPath = prepPath.makeQualified(prepPath.toUri(), null);
        FileSystem prepFs = FileSystem.get(prepPath.toUri(), config);

        Path[] hFams = FileUtil.stat2Paths(prepFs.listStatus(prepPath));

        if (quasiSecure) {
            throw new Exception(
                    "HBulkLoadClient.doBulkLoad() - cannot perform load. Trafodion on secure HBase mode is not implemented yet");
        } else {
            if (logger.isDebugEnabled())
                logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfiles permissions");
            for (Path hfam : hFams) {
                Path[] hfiles = FileUtil.stat2Paths(prepFs.listStatus(hfam));
                prepFs.setPermission(hfam, PERM_ALL_ACCESS);
                for (Path hfile : hfiles) {
                    if (logger.isDebugEnabled())
                        logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfile permissions:" + hfile);
                    prepFs.setPermission(hfile, PERM_ALL_ACCESS);

                }
                //create _tmp dir used as temp space for Hfile processing
                FileSystem.mkdirs(prepFs, new Path(hfam, "_tmp"), PERM_ALL_ACCESS);
            }
            if (logger.isDebugEnabled())
                logger.debug(
                        "HBulkLoadClient.doBulkLoad() - bulk load started. Loading directly from preparation directory");
            doSnapshotNBulkLoad(prepPath, tableName, table, loader, snapshot);
            if (logger.isDebugEnabled())
                logger.debug("HBulkLoadClient.doBulkLoad() - bulk load is done ");
        }
        return true;
    }

    public boolean bulkLoadCleanup(String location) throws Exception {
        Path dir = new Path(location);
        dir = dir.makeQualified(dir.toUri(), null);
        FileSystem fs = FileSystem.get(dir.toUri(), config);
        fs.delete(dir, true);

        return true;

    }

    public boolean release() throws IOException {
        if (writer != null) {
            writer.close();
            writer = null;
        }
        if (fileSys != null) {
            fileSys.close();
            fileSys = null;
        }
        if (config != null) {
            config = null;
        }
        if (hFileLocation != null) {
            hFileLocation = null;
        }
        if (hFileName != null) {
            hFileName = null;
        }

        if (compression != null) {
            compression = null;
        }
        return true;
    }
}