org.apache.kylin.storage.hbase.HBaseResourceStore.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.kylin.storage.hbase.HBaseResourceStore.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.storage.hbase;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.TreeSet;
import java.util.UUID;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.StorageURL;
import org.apache.kylin.common.persistence.RawResource;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.persistence.StringEntity;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.BytesUtil;
import org.apache.kylin.common.util.HadoopUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

public class HBaseResourceStore extends ResourceStore {

    private static final Logger logger = LoggerFactory.getLogger(HBaseResourceStore.class);

    private static final String FAMILY = "f";

    private static final byte[] B_FAMILY = Bytes.toBytes(FAMILY);

    private static final String COLUMN = "c";

    private static final byte[] B_COLUMN = Bytes.toBytes(COLUMN);

    private static final String COLUMN_TS = "t";

    private static final byte[] B_COLUMN_TS = Bytes.toBytes(COLUMN_TS);

    final String tableName;
    final StorageURL metadataUrl;

    Connection getConnection() throws IOException {
        return HBaseConnection.get(metadataUrl);
    }

    public HBaseResourceStore(KylinConfig kylinConfig) throws IOException {
        super(kylinConfig);
        metadataUrl = buildMetadataUrl(kylinConfig);
        tableName = metadataUrl.getIdentifier();
        createHTableIfNeeded(tableName);
    }

    private StorageURL buildMetadataUrl(KylinConfig kylinConfig) throws IOException {
        StorageURL url = kylinConfig.getMetadataUrl();
        if (!url.getScheme().equals("hbase"))
            throw new IOException("Cannot create HBaseResourceStore. Url not match. Url: " + url);

        // control timeout for prompt error report
        Map<String, String> newParams = new LinkedHashMap<>();
        newParams.put("hbase.client.scanner.timeout.period", "10000");
        newParams.put("hbase.rpc.timeout", "5000");
        newParams.put("hbase.client.retries.number", "1");
        newParams.putAll(url.getAllParameters());

        return url.copy(newParams);
    }

    private void createHTableIfNeeded(String tableName) throws IOException {
        HBaseConnection.createHTableIfNeeded(getConnection(), tableName, FAMILY);
    }

    @Override
    protected boolean existsImpl(String resPath) throws IOException {
        Result r = getFromHTable(resPath, false, false);
        return r != null;
    }

    @Override
    protected NavigableSet<String> listResourcesImpl(String folderPath) throws IOException {
        final TreeSet<String> result = new TreeSet<>();

        visitFolder(folderPath, new KeyOnlyFilter(), new FolderVisitor() {
            @Override
            public void visit(String childPath, String fullPath, Result hbaseResult) {
                result.add(childPath);
            }
        });
        // return null to indicate not a folder
        return result.isEmpty() ? null : result;
    }

    /* override get meta store uuid method for backward compatibility */
    @Override
    public String createMetaStoreUUID() throws IOException {
        try (final Admin hbaseAdmin = HBaseConnection.get(metadataUrl).getAdmin()) {
            final String metaStoreName = metadataUrl.getIdentifier();
            final HTableDescriptor desc = hbaseAdmin.getTableDescriptor(TableName.valueOf(metaStoreName));
            String uuid = desc.getValue(HBaseConnection.HTABLE_UUID_TAG);
            if (uuid != null)
                return uuid;
            return UUID.randomUUID().toString();
        } catch (Exception e) {
            return null;
        }
    }

    @Override
    public String getMetaStoreUUID() throws IOException {
        if (!exists(ResourceStore.METASTORE_UUID_TAG)) {
            putResource(ResourceStore.METASTORE_UUID_TAG, new StringEntity(createMetaStoreUUID()), 0,
                    StringEntity.serializer);
        }
        StringEntity entity = getResource(ResourceStore.METASTORE_UUID_TAG, StringEntity.class,
                StringEntity.serializer);
        return entity.toString();
    }

    private void visitFolder(String folderPath, Filter filter, FolderVisitor visitor) throws IOException {
        assert folderPath.startsWith("/");
        String lookForPrefix = folderPath.endsWith("/") ? folderPath : folderPath + "/";
        byte[] startRow = Bytes.toBytes(lookForPrefix);
        byte[] endRow = Bytes.toBytes(lookForPrefix);
        endRow[endRow.length - 1]++;

        Table table = getConnection().getTable(TableName.valueOf(tableName));
        Scan scan = new Scan(startRow, endRow);
        if ((filter != null && filter instanceof KeyOnlyFilter) == false) {
            scan.addColumn(B_FAMILY, B_COLUMN_TS);
            scan.addColumn(B_FAMILY, B_COLUMN);
        }
        if (filter != null) {
            scan.setFilter(filter);
        }

        tuneScanParameters(scan);

        try {
            ResultScanner scanner = table.getScanner(scan);
            for (Result r : scanner) {
                String path = Bytes.toString(r.getRow());
                assert path.startsWith(lookForPrefix);
                int cut = path.indexOf('/', lookForPrefix.length());
                String child = cut < 0 ? path : path.substring(0, cut);
                visitor.visit(child, path, r);
            }
        } finally {
            IOUtils.closeQuietly(table);
        }
    }

    private void tuneScanParameters(Scan scan) {
        // divide by 10 as some resource like dictionary or snapshot can be very large
        // scan.setCaching(kylinConfig.getHBaseScanCacheRows() / 10);
        scan.setCaching(kylinConfig.getHBaseScanCacheRows());

        scan.setMaxResultSize(kylinConfig.getHBaseScanMaxResultSize());
        scan.setCacheBlocks(true);
    }

    interface FolderVisitor {
        void visit(String childPath, String fullPath, Result hbaseResult) throws IOException;
    }

    @Override
    protected List<RawResource> getAllResourcesImpl(String folderPath, long timeStart, long timeEndExclusive)
            throws IOException {
        FilterList filter = generateTimeFilterList(timeStart, timeEndExclusive);
        final List<RawResource> result = Lists.newArrayList();
        try {
            visitFolder(folderPath, filter, new FolderVisitor() {
                @Override
                public void visit(String childPath, String fullPath, Result hbaseResult) throws IOException {
                    // is a direct child (not grand child)?
                    if (childPath.equals(fullPath))
                        result.add(
                                new RawResource(getInputStream(childPath, hbaseResult), getTimestamp(hbaseResult)));
                }
            });
        } catch (IOException e) {
            for (RawResource rawResource : result) {
                IOUtils.closeQuietly(rawResource.inputStream);
            }
            throw e;
        }
        return result;
    }

    private FilterList generateTimeFilterList(long timeStart, long timeEndExclusive) {
        FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
        if (timeStart != Long.MIN_VALUE) {
            SingleColumnValueFilter timeStartFilter = new SingleColumnValueFilter(B_FAMILY, B_COLUMN_TS,
                    CompareFilter.CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(timeStart));
            filterList.addFilter(timeStartFilter);
        }
        if (timeEndExclusive != Long.MAX_VALUE) {
            SingleColumnValueFilter timeEndFilter = new SingleColumnValueFilter(B_FAMILY, B_COLUMN_TS,
                    CompareFilter.CompareOp.LESS, Bytes.toBytes(timeEndExclusive));
            filterList.addFilter(timeEndFilter);
        }
        return filterList.getFilters().size() == 0 ? null : filterList;
    }

    private InputStream getInputStream(String resPath, Result r) throws IOException {
        if (r == null) {
            return null;
        }
        byte[] value = r.getValue(B_FAMILY, B_COLUMN);
        if (value.length == 0) {
            Path redirectPath = bigCellHDFSPath(resPath);
            FileSystem fileSystem = HadoopUtil.getWorkingFileSystem(HBaseConnection.getCurrentHBaseConfiguration());

            try {
                return fileSystem.open(redirectPath);
            } catch (IOException ex) {
                throw new IOException("Failed to read resource at " + resPath, ex);
            }
        } else {
            return new ByteArrayInputStream(value);
        }
    }

    private long getTimestamp(Result r) {
        if (r == null || r.getValue(B_FAMILY, B_COLUMN_TS) == null) {
            return 0;
        } else {
            return Bytes.toLong(r.getValue(B_FAMILY, B_COLUMN_TS));
        }
    }

    @Override
    protected RawResource getResourceImpl(String resPath) throws IOException {
        Result r = getFromHTable(resPath, true, true);
        if (r == null)
            return null;
        else
            return new RawResource(getInputStream(resPath, r), getTimestamp(r));
    }

    @Override
    protected long getResourceTimestampImpl(String resPath) throws IOException {
        return getTimestamp(getFromHTable(resPath, false, true));
    }

    @Override
    protected void putResourceImpl(String resPath, InputStream content, long ts) throws IOException {
        ByteArrayOutputStream bout = new ByteArrayOutputStream();
        IOUtils.copy(content, bout);
        bout.close();

        Table table = getConnection().getTable(TableName.valueOf(tableName));
        try {
            byte[] row = Bytes.toBytes(resPath);
            Put put = buildPut(resPath, ts, row, bout.toByteArray(), table);

            table.put(put);
        } finally {
            IOUtils.closeQuietly(table);
        }
    }

    @Override
    protected long checkAndPutResourceImpl(String resPath, byte[] content, long oldTS, long newTS)
            throws IOException, IllegalStateException {
        Table table = getConnection().getTable(TableName.valueOf(tableName));
        try {
            byte[] row = Bytes.toBytes(resPath);
            byte[] bOldTS = oldTS == 0 ? null : Bytes.toBytes(oldTS);
            Put put = buildPut(resPath, newTS, row, content, table);

            boolean ok = table.checkAndPut(row, B_FAMILY, B_COLUMN_TS, bOldTS, put);
            logger.trace("Update row " + resPath + " from oldTs: " + oldTS + ", to newTs: " + newTS
                    + ", operation result: " + ok);
            if (!ok) {
                long real = getResourceTimestampImpl(resPath);
                throw new IllegalStateException(
                        "Overwriting conflict " + resPath + ", expect old TS " + oldTS + ", but it is " + real);
            }

            return newTS;
        } finally {
            IOUtils.closeQuietly(table);
        }
    }

    @Override
    protected void deleteResourceImpl(String resPath) throws IOException {
        Table table = getConnection().getTable(TableName.valueOf(tableName));
        try {
            boolean hdfsResourceExist = false;
            Result result = internalGetFromHTable(table, resPath, true, false);
            if (result != null) {
                byte[] value = result.getValue(B_FAMILY, B_COLUMN);
                if (value != null && value.length == 0) {
                    hdfsResourceExist = true;
                }
            }

            Delete del = new Delete(Bytes.toBytes(resPath));
            table.delete(del);

            if (hdfsResourceExist) { // remove hdfs cell value
                Path redirectPath = bigCellHDFSPath(resPath);
                FileSystem fileSystem = HadoopUtil
                        .getWorkingFileSystem(HBaseConnection.getCurrentHBaseConfiguration());

                if (fileSystem.exists(redirectPath)) {
                    fileSystem.delete(redirectPath, true);
                }
            }
        } finally {
            IOUtils.closeQuietly(table);
        }
    }

    @Override
    protected String getReadableResourcePathImpl(String resPath) {
        return tableName + "(key='" + resPath + "')@" + kylinConfig.getMetadataUrl();
    }

    private Result getFromHTable(String path, boolean fetchContent, boolean fetchTimestamp) throws IOException {
        Table table = getConnection().getTable(TableName.valueOf(tableName));
        try {
            return internalGetFromHTable(table, path, fetchContent, fetchTimestamp);
        } finally {
            IOUtils.closeQuietly(table);
        }

    }

    private Result internalGetFromHTable(Table table, String path, boolean fetchContent, boolean fetchTimestamp)
            throws IOException {
        byte[] rowkey = Bytes.toBytes(path);

        Get get = new Get(rowkey);

        if (!fetchContent && !fetchTimestamp) {
            get.setCheckExistenceOnly(true);
        } else {
            if (fetchContent)
                get.addColumn(B_FAMILY, B_COLUMN);
            if (fetchTimestamp)
                get.addColumn(B_FAMILY, B_COLUMN_TS);
        }

        Result result = table.get(get);
        boolean exists = result != null
                && (!result.isEmpty() || (result.getExists() != null && result.getExists()));
        return exists ? result : null;
    }

    private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, Table table) throws IOException {
        Path redirectPath = bigCellHDFSPath(resPath);
        FileSystem fileSystem = HadoopUtil.getWorkingFileSystem(HBaseConnection.getCurrentHBaseConfiguration());

        if (fileSystem.exists(redirectPath)) {
            fileSystem.delete(redirectPath, true);
        }

        FSDataOutputStream out = fileSystem.create(redirectPath);

        try {
            out.write(largeColumn);
        } finally {
            IOUtils.closeQuietly(out);
        }

        return redirectPath;
    }

    public Path bigCellHDFSPath(String resPath) {
        String hdfsWorkingDirectory = this.kylinConfig.getHdfsWorkingDirectory();
        Path redirectPath = new Path(hdfsWorkingDirectory, "resources" + resPath);
        return redirectPath;
    }

    private Put buildPut(String resPath, long ts, byte[] row, byte[] content, Table table) throws IOException {
        int kvSizeLimit = Integer
                .parseInt(getConnection().getConfiguration().get("hbase.client.keyvalue.maxsize", "10485760"));
        if (content.length > kvSizeLimit) {
            writeLargeCellToHdfs(resPath, content, table);
            content = BytesUtil.EMPTY_BYTE_ARRAY;
        }

        Put put = new Put(row);
        put.addColumn(B_FAMILY, B_COLUMN, content);
        put.addColumn(B_FAMILY, B_COLUMN_TS, Bytes.toBytes(ts));

        return put;
    }

    @Override
    public String toString() {
        return tableName + "@hbase";
    }
}