Java tutorial
package hrider.hbase; import hrider.config.GlobalConfig; import hrider.converters.TypeConverter; import hrider.data.*; import hrider.ui.MessageHandler; import org.apache.commons.lang.time.StopWatch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; import java.io.IOException; import java.util.*; /** * Copyright (C) 2012 NICE Systems ltd. * <p/> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @author Igor Cher * @version %I%, %G% * <p/> * This class represents a scanner over the hbase tables. */ @SuppressWarnings({ "OverlyNestedMethod", "ClassWithTooManyMethods" }) public class Scanner { //region Variables /** * The connection which owns the scanner. */ private Connection connection; /** * The name of the hbase table the scanner is going to be executed on. */ private String tableName; /** * Holds a total number of rows in the table. This parameter is calculated by scanning over the whole table. * This operation is time consuming and its result should be cached. */ private long rowsCount; /** * Holds a rows number in the table if the calculation has not been completed because of the timeout. */ private long partialRowsCount; /** * The number of the last loaded row. */ private long lastRow; /** * The map of column types. The key is the name of the column and the value is the type of the objects within the column. */ private Map<String, ColumnType> columnTypes; /** * A list of markers. The marker is used for pagination to mark where the previous scan has stopped. */ private Stack<Marker> markers; /** * A list of loaded rows. */ private Collection<DataRow> current; /** * Indicates if this scanner should support only forward navigation. */ private boolean forwardNavigateOnly; /** * Represents a converter for column names. */ private TypeConverter columnNameConverter; //endregion //region Constructor /** * Initializes a new instance of the {@link Scanner} class. * * @param connection The reference to the connection. * @param tableName The name of the table to be scanned. */ public Scanner(Connection connection, String tableName) { this.connection = connection; this.tableName = tableName; this.rowsCount = 0; this.lastRow = 0; this.markers = new Stack<Marker>(); } //endregion //region Public Properties /** * Gets a reference to the {@link Connection}. * * @return A reference to the {@link Connection}. */ public Connection getConnection() { return this.connection; } /** * Gets a reference to the {@link Configuration} instance used by connection. * * @return A reference to the {@link Configuration} instance. */ public Configuration getConfiguration() { return this.connection.getConfiguration(); } /** * Gets the value indicating if this scanner supports only forward navigation. * * @return True if this instance of the scanner supports only forward navigation or False if both forward and backward navigation are supported. */ public boolean getForwardNavigateOnly() { return this.forwardNavigateOnly; } /** * Sets the value indicating if this scanner should support forward navigate only. * * @param forwardNavigateOnly True if this instance of the scanner should support only forward navigation or False to support both forward and backward navigations. */ public void setForwardNavigateOnly(boolean forwardNavigateOnly) { this.forwardNavigateOnly = forwardNavigateOnly; } /** * Gets the name of the table. * * @return The table name. */ public String getTableName() { return this.tableName; } /** * Indicates that the rows count has been partially calculated. * * @return True if the rows count has been stopped before reaching end of the table or False otherwise. */ public boolean isRowsCountPartiallyCalculated() { return this.partialRowsCount > 0; } /** * Gets a list of columns. If there is no columns at this moment they will be loaded according to the provided rows number. In other words only columns * of loaded rows will be returned. * * @param rowsNumber The number of rows to look for the columns in case there is no columns loaded at this moment. * @return A list of columns. */ public Collection<ColumnQualifier> getColumns(int rowsNumber) { if (this.markers.isEmpty() || peekMarker().columns.size() != rowsNumber) { try { return loadColumns(rowsNumber); } catch (IOException e) { MessageHandler.addError(String.format("Failed to load columns for table %s.", this.tableName), e); return new ArrayList<ColumnQualifier>(); } } else { return peekMarker().columns; } } /** * Gets a mapping of column names to column types. * * @return A map of column types. */ public Map<String, ColumnType> getColumnTypes() { return this.columnTypes; } /** * Sets a mapping of column names to column types. * * @param columnTypes A new map of column types. */ public void setColumnTypes(Map<String, ColumnType> columnTypes) { this.columnTypes = columnTypes; } /** * Gets the last loaded row number. * * @return The number of the last loaded row. */ public long getLastRow() { return this.lastRow; } /** * Indicates if there is more rows to show. * * @return True if there is more rows to show or False otherwise. */ public boolean hasNext() { return this.lastRow < this.rowsCount; } /** * Indicates if the backward navigation is possible. * * @return True if there are previously loaded rows or False otherwise. */ public boolean hasPrev() { return this.markers.size() > 1; } //endregion //region Public Methods /** * Validates whether the values held by the column can be converted to the specified type. * * @param columnType The type to check. * @return True if the value can be converted to the specified type or False otherwise. */ public boolean isColumnOfType(String columnName, ColumnType columnType) { Collection<DataRow> rows = this.current; if (rows != null) { int counter = 0; Iterator<DataRow> iterator = rows.iterator(); while (counter < 10 && iterator.hasNext()) { DataRow row = iterator.next(); if (!row.isCellOfType(columnName, columnType)) { return false; } counter++; } return true; } return true; } /** * Updates a column type of all cells in the specified column. * * @param columnName The name of the column. * @param columnType The new column type. */ public void updateColumnType(String columnName, ColumnType columnType) { Collection<DataRow> rows = this.current; if (rows != null) { for (DataRow row : rows) { row.updateColumnType(columnName, columnType); } } } /** * Updates converter for the column name. * * @param converter The new column name converter. */ public void updateColumnNameConverter(TypeConverter converter) { this.columnNameConverter = converter; Collection<DataRow> rows = this.current; if (rows != null) { for (DataRow row : rows) { row.updateColumnNameConverter(converter); } } } /** * Resets the cache. * * @param startKey The key the scan should start from. This parameter can be null. */ public void resetCurrent(ConvertibleObject startKey) { this.current = null; this.rowsCount = 0; this.lastRow = 0; this.markers.clear(); if (startKey != null) { this.markers.push(new Marker(startKey, new ArrayList<DataRow>(), new ArrayList<ColumnQualifier>())); } } /** * Gets a first row in the table. * * @return A first row in the table. * @throws IOException Error accessing hbase. */ public DataRow getFirstRow() throws IOException { Scan scan = getScanner(); HTable table = this.connection.getTableFactory().get(this.tableName); ResultScanner scanner = table.getScanner(scan); try { Collection<DataRow> rows = new LinkedList<DataRow>(); Collection<ColumnQualifier> columns = new LinkedList<ColumnQualifier>(); columns.add(ColumnQualifier.KEY); loadRows(scanner, 0, 1, rows, columns); if (rows.isEmpty()) { return null; } return rows.iterator().next(); } finally { scanner.close(); } } /** * Gets a list of already loaded rows. * * @return A list of rows. */ public Collection<DataRow> current() { return this.current; } /** * Gets a list of rows loaded starting from the beginning. * * @param rowsNumber The number of rows to load. * @return A list of rows. * @throws IOException Error accessing hbase. */ public Collection<DataRow> current(int rowsNumber) throws IOException { return current(0, rowsNumber); } /** * Gets a list of rows loaded starting from the offset. * * @param offset The first row to start loading from. * @param rowsNumber The number of rows to load. * @return A list of rows. * @throws IOException Error accessing hbase. */ public Collection<DataRow> current(long offset, int rowsNumber) throws IOException { if (this.current == null || this.current.size() != rowsNumber || this.current.size() + offset != this.lastRow) { // offset should start from 1. if (offset == 0) { offset++; } this.markers.clear(); this.current = next(offset - 1, rowsNumber); this.lastRow = offset + this.current.size() - 1; } return this.current; } /** * Gets a list of rows loaded starting from the previous position. * * @param rowsNumber The number of rows to load. * @return A list of rows. * @throws IOException Error accessing hbase. */ public Collection<DataRow> next(int rowsNumber) throws IOException { this.current = next(this.markers.isEmpty() ? 0 : 1, rowsNumber); this.lastRow += this.current.size(); return this.current; } /** * Gets a list of previously loaded rows. * * @return A list of rows. * @throws IOException Error accessing hbase. */ public Collection<DataRow> prev() throws IOException { if (!this.markers.isEmpty()) { if (this.markers.size() > 1) { popMarker(); } this.lastRow -= this.current.size(); this.current = peekMarker().rows; updateColumnNameConverter(columnNameConverter); for (Map.Entry<String, ColumnType> entry : this.columnTypes.entrySet()) { updateColumnType(entry.getKey(), entry.getValue()); } } return this.current; } /** * Gets the total number of rows in the table. This value is calculated by scanning throughout the whole table to count the rows. This value is then cached * for future uses. * * @return A total number of rows in the table. * @throws IOException Error accessing hbase. */ public long getRowsCount(long timeout) throws IOException { if (this.rowsCount == 0) { this.partialRowsCount = 0; Scan scan = getScanner(); FilterList filters = new FilterList(); if (scan.getFilter() != null) { filters.addFilter(scan.getFilter()); } filters.addFilter(new FirstKeyOnlyFilter()); scan.setFilter(filters); scan.setCaching(GlobalConfig.instance().getBatchSizeForRead()); HTable table = this.connection.getTableFactory().get(this.tableName); ResultScanner scanner = table.getScanner(scan); StopWatch stopWatch = new StopWatch(); stopWatch.start(); try { int count = 0; for (Result rr = scanner.next(); rr != null; rr = scanner.next(), count++) { if (stopWatch.getTime() > timeout) { this.partialRowsCount = count; break; } } this.rowsCount = count; } finally { stopWatch.stop(); scanner.close(); } } return this.rowsCount; } //endregion //region Protected Methods /** * Gets the hbase scanner. * * @return A hbase scanner * @throws IOException Error accessing hbase. */ protected Scan getScanner() throws IOException { return new Scan(); } /** * Checks if the row is valid. The default implementation is return 'True'. This method should be overridden by the derived classes. * * @param row A row to check. * @return True if the row is valid and False otherwise. */ protected boolean isValidRow(Result row) { return true; } //endregion //region Private Methods /** * Loads a specified number of rows from the hbase. * * @param scanner The hbase scanner to retrieve the data. * @param offset The offset to start from. * @param rowsNumber The number of rows to load. * @param rows The loaded rows. This is the output parameter. * @param columns The columns loaded from rows. This is the output parameter. * @return A key of the last loaded row. Used to mark the current position for the next scan. * @throws IOException Error accessing hbase. */ protected ConvertibleObject loadRows(ResultScanner scanner, long offset, int rowsNumber, Collection<DataRow> rows, Collection<ColumnQualifier> columns) throws IOException { ColumnType keyType = this.columnTypes.get(ColumnQualifier.KEY.getName()); Map<ColumnQualifier, ColumnQualifier> loadedColumns = new HashMap<ColumnQualifier, ColumnQualifier>(); int index = 0; boolean isValid; ConvertibleObject key = null; TypeConverter nameConverter = getColumnNameConverterInternal(); HTable table = this.connection.getTableFactory().get(this.tableName); HTableDescriptor tableDescriptor = table.getTableDescriptor(); do { Result result = scanner.next(); isValid = result != null && rows.size() < rowsNumber; if (isValid && isValidRow(result)) { if (index >= offset) { key = new ConvertibleObject(keyType, result.getRow()); DataRow row = new DataRow(key); row.addCell(new DataCell(row, ColumnQualifier.KEY, key)); NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> familyMap = result .getMap(); for (NavigableMap.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> familyEntry : familyMap .entrySet()) { HColumnDescriptor columnDescriptor = tableDescriptor.getFamily(familyEntry.getKey()); for (NavigableMap.Entry<byte[], NavigableMap<Long, byte[]>> qualifierEntry : familyEntry .getValue().entrySet()) { ColumnQualifier qualifier = new ColumnQualifier(qualifierEntry.getKey(), new ColumnFamily(columnDescriptor), nameConverter); String columnName = qualifier.getFullName(); ColumnType columnType = this.columnTypes.get(columnName); if (columnType == null) { columnType = ColumnType.String; } for (NavigableMap.Entry<Long, byte[]> cell : qualifierEntry.getValue().entrySet()) { row.addCell(new DataCell(row, qualifier, new ConvertibleObject(columnType, cell.getValue()))); } if (!loadedColumns.containsKey(qualifier)) { columns.add(qualifier); loadedColumns.put(qualifier, null); } } } rows.add(row); } index++; } } while (isValid); return key; } /** * Loads column names. * * @param rowsNumber The number of rows to look for the column names. The column name is a key from key/value pairs in hbase row. * @throws IOException Error accessing hbase. */ private Collection<ColumnQualifier> loadColumns(int rowsNumber) throws IOException { Collection<ColumnQualifier> columns = new ArrayList<ColumnQualifier>(); columns.add(ColumnQualifier.KEY); Map<ColumnQualifier, ColumnQualifier> loadedColumns = new HashMap<ColumnQualifier, ColumnQualifier>(); int itemsNumber = rowsNumber <= GlobalConfig.instance().getBatchSizeForRead() ? rowsNumber : GlobalConfig.instance().getBatchSizeForRead(); Scan scan = getScanner(); scan.setCaching(itemsNumber); HTable table = this.connection.getTableFactory().get(this.tableName); HTableDescriptor tableDescriptor = table.getTableDescriptor(); ResultScanner scanner = table.getScanner(scan); try { TypeConverter nameConverter = getColumnNameConverterInternal(); Result row; int counter = 0; do { row = scanner.next(); if (row != null) { NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> familyMap = row.getMap(); for (NavigableMap.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> familyEntry : familyMap .entrySet()) { HColumnDescriptor columnDescriptor = tableDescriptor.getFamily(familyEntry.getKey()); for (byte[] quantifier : familyEntry.getValue().keySet()) { ColumnQualifier columnQualifier = new ColumnQualifier(quantifier, new ColumnFamily(columnDescriptor), nameConverter); if (!loadedColumns.containsKey(columnQualifier)) { columns.add(columnQualifier); loadedColumns.put(columnQualifier, null); } } } } counter++; } while (row != null && counter < rowsNumber); return columns; } finally { scanner.close(); } } /** * Loads the following rows. * * @param offset The offset to start from. * @param rowsNumber The number of rows to load. * @return A list of loaded rows. * @throws IOException Error accessing hbase. */ private Collection<DataRow> next(long offset, int rowsNumber) throws IOException { int itemsNumber = rowsNumber <= GlobalConfig.instance().getBatchSizeForRead() ? rowsNumber : GlobalConfig.instance().getBatchSizeForRead(); Scan scan = getScanner(); scan.setCaching(itemsNumber); if (!this.markers.isEmpty()) { scan.setStartRow(peekMarker().key.getValue()); } if (this.forwardNavigateOnly) { // Remove the current marker to reduce the memory load in case backward navigation should not be supported. popMarker(); } HTable table = this.connection.getTableFactory().get(this.tableName); ResultScanner scanner = table.getScanner(scan); try { Collection<DataRow> rows = new LinkedList<DataRow>(); Collection<ColumnQualifier> columns = new LinkedList<ColumnQualifier>(); columns.add(ColumnQualifier.KEY); ConvertibleObject lastKey = loadRows(scanner, offset, rowsNumber, rows, columns); if (lastKey != null) { this.markers.push(new Marker(lastKey, rows, columns)); } return rows; } finally { scanner.close(); } } /** * Removes a marker from the stack. * * @return A marker. */ private Marker popMarker() { if (!this.markers.isEmpty()) { return this.markers.pop(); } return null; } /** * Peeks the marker from the stack. * * @return A marker. */ private Marker peekMarker() { if (!this.markers.isEmpty()) { return this.markers.peek(); } return null; } /** * Gets configured column name converter or a default one. * * @return A type converter. */ private TypeConverter getColumnNameConverterInternal() { if (this.columnNameConverter != null) { return this.columnNameConverter; } return ColumnType.BinaryString.getConverter(); } //endregion /** * Represents a marking point between the batches of loaded rows. Each time a new batch of rows is loaded this class holds the last key to * start loading the next batch. */ private static class Marker { //region Variables /** * The last key loaded from the previous batch of rows. */ private ConvertibleObject key; /** * A list of previously loaded rows. */ private Collection<DataRow> rows; /** * A list of columns loaded from the rows. */ private Collection<ColumnQualifier> columns; //endregion //region Constructor /** * Initializes a new instance of the {@link Marker} class. * * @param key The last loaded key. * @param rows A list of loaded rows. * @param columns A list of columns loaded from the rows. */ private Marker(ConvertibleObject key, Collection<DataRow> rows, Collection<ColumnQualifier> columns) { this.key = key; this.rows = rows; this.columns = columns; } //endregion //region Public Methods @Override public boolean equals(Object obj) { return obj instanceof Marker && ((Marker) obj).key.equals(this.key); } @Override public int hashCode() { return this.key.hashCode(); } //endregion } }