Java tutorial
/* * Copyright (c) 2015, WSO2 Inc. (http://www.wso2.org) All Rights Reserved. * * WSO2 Inc. licenses this file to you under the Apache License, * Version 2.0 (the "License"); you may not use this file except * in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.wso2.carbon.analytics.datasource.hbase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import org.wso2.carbon.analytics.datasource.commons.AnalyticsIterator; import org.wso2.carbon.analytics.datasource.commons.Record; import org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException; import org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException; import org.wso2.carbon.analytics.datasource.core.util.GenericUtils; import org.wso2.carbon.analytics.datasource.hbase.util.HBaseAnalyticsDSConstants; import org.wso2.carbon.analytics.datasource.hbase.util.HBaseRuntimeException; import org.wso2.carbon.analytics.datasource.hbase.util.HBaseUtils; import java.io.IOException; import java.util.*; /** * Subclass of java.util.Iterator for streaming in records based on timestamp ranges */ public class HBaseTimestampIterator implements AnalyticsIterator<Record> { private List<String> columns; private int tenantId; private int batchSize; private int recordsCount; private int globalCounter; private byte[] latestRow; private byte[] endRow; private static final long POSTFIX = 1L; private boolean fullyFetched; private boolean noStartTime = false; private boolean noStopTime = false; private String tableName; private Table table, indexTable; private Iterator<Record> subIterator = Collections.emptyIterator(); private static final Log log = LogFactory.getLog(HBaseTimestampIterator.class); HBaseTimestampIterator(int tenantId, String tableName, List<String> columns, long timeFrom, long timeTo, int recordsCount, Connection conn, int batchSize) throws AnalyticsException, AnalyticsTableNotAvailableException { if ((timeFrom > timeTo) || (batchSize <= 0)) { throw new AnalyticsException("Invalid parameters specified for reading data from table " + tableName + " for tenant " + tenantId); } else { this.init(conn, tenantId, tableName, columns, recordsCount, batchSize); if (timeFrom < 0) { this.noStartTime = true; /* Setting param to null, to recognize the first ever run. It will never become null after the first run. */ this.latestRow = null; } else { /* Setting the initial row to start time -1 because it will soon be incremented by 1L. */ this.latestRow = HBaseUtils.encodeLong(timeFrom - POSTFIX); } if (timeTo >= Long.MAX_VALUE - 1) { this.noStopTime = true; } else { this.endRow = HBaseUtils.encodeLong(timeTo); } /* pre-fetching from HBase and populating records for the first time */ this.fetchRecords(); } } @Override public boolean hasNext() { boolean hasMore = this.subIterator.hasNext(); if (!hasMore) { try { this.fetchRecords(); } catch (AnalyticsTableNotAvailableException e) { this.subIterator = Collections.emptyIterator(); } } return this.subIterator.hasNext(); } @Override public Record next() { if (this.hasNext()) { return this.subIterator.next(); } else { throw new NoSuchElementException("No further elements exist in iterator"); } } @Override public void remove() { /* nothing to do here, since this is a read-only iterator */ } private void fetchRecords() throws AnalyticsTableNotAvailableException { if (this.fullyFetched) { return; } List<String> currentBatch = this.populateNextRecordBatch(); if (currentBatch.size() == 0) { return; } Set<String> colSet = null; List<Record> fetchedRecords = new ArrayList<>(); List<Get> gets = new ArrayList<>(); for (String currentId : currentBatch) { Get get = new Get(Bytes.toBytes(currentId)); get.addFamily(HBaseAnalyticsDSConstants.ANALYTICS_DATA_COLUMN_FAMILY_NAME); gets.add(get); } try { /* If the list of columns to be retrieved is null, retrieve ALL columns. */ if (this.columns != null && this.columns.size() > 0) { colSet = new HashSet<>(this.columns); } Result[] results = this.table.get(gets); for (Result currentResult : results) { if (!currentResult.isEmpty()) { Record record = HBaseUtils.constructRecord(currentResult, tenantId, tableName, colSet); if (record != null) { fetchedRecords.add(record); } } } this.subIterator = fetchedRecords.iterator(); } catch (Exception e) { if (e instanceof RetriesExhaustedException) { throw new AnalyticsTableNotAvailableException(tenantId, tableName); } this.cleanup(); throw new HBaseRuntimeException( "Error reading data from table " + this.tableName + " for tenant " + this.tenantId, e); } } private List<String> populateNextRecordBatch() { List<String> currentBatch = new ArrayList<>(); if (this.recordsCount > 0) { if (this.globalCounter >= this.recordsCount) { this.fullyFetched = true; return currentBatch; } } int counter = 0; Scan indexScan = new Scan(); long latestTime; if (!this.noStartTime && (this.latestRow != null)) { latestTime = HBaseUtils.decodeLong(this.latestRow); indexScan.setStartRow(HBaseUtils.encodeLong(latestTime + POSTFIX)); } if (!this.noStopTime) { indexScan.setStopRow(this.endRow); } indexScan.addFamily(HBaseAnalyticsDSConstants.ANALYTICS_INDEX_COLUMN_FAMILY_NAME); ResultScanner resultScanner; try { resultScanner = this.indexTable.getScanner(indexScan); outer: for (Result rowResult : resultScanner) { Cell[] cells = rowResult.rawCells(); for (Cell cell : cells) { if ((this.globalCounter == this.recordsCount)) { this.fullyFetched = true; break outer; } currentBatch.add(Bytes.toString(CellUtil.cloneValue(cell))); counter++; this.globalCounter++; } if (counter >= this.batchSize) { /* Snap out of further processing, because either the batch end or the client limit has been reached. */ this.latestRow = rowResult.getRow(); break; } this.latestRow = rowResult.getRow(); } resultScanner.close(); indexTable.close(); } catch (IOException e) { throw new HBaseRuntimeException( "Error reading index data for table " + this.tableName + ", tenant " + this.tenantId, e); } if (counter < this.batchSize) { /* Checking if processing had been interrupted PRIOR TO: * - More results being scanned (counter equals 0 in this case), where there are no more records to be scanned * - Batch size becoming equal to the counter (counter < batchSize case), signifying the scan ran out of records * where the scan has either exhausted all records on the table or has reached the limit from the client. * For both of the above cases, we understand that the end of processing for this particular query is at hand * (i.e. Iterator: I die in peace now, tell my family I love them..) */ this.fullyFetched = true; this.cleanup(); } return currentBatch; } private void init(Connection conn, int tenantId, String tableName, List<String> columns, int recordsCount, int batchSize) throws AnalyticsException { this.tenantId = tenantId; this.tableName = tableName; this.columns = columns; this.recordsCount = recordsCount; this.batchSize = batchSize; this.globalCounter = 0; try { this.indexTable = conn.getTable(TableName.valueOf( HBaseUtils.generateTableName(tenantId, tableName, HBaseAnalyticsDSConstants.TableType.INDEX))); this.table = conn.getTable(TableName.valueOf( HBaseUtils.generateTableName(tenantId, tableName, HBaseAnalyticsDSConstants.TableType.DATA))); } catch (IOException e) { throw new AnalyticsException("The table " + tableName + " for tenant " + tenantId + " could not be initialized for reading: " + e.getMessage(), e); } } private void cleanup() { GenericUtils.closeQuietly(this.indexTable); GenericUtils.closeQuietly(this.table); } @Override public void close() throws IOException { cleanup(); } }