Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.phoenix.schema.stats; import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CATALOG_SCHEMA; import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_STATS_TABLE; import static org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.MAPREDUCE_JOB_TYPE; import static org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.MRJobType.UPDATE_STATS; import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES; import static org.apache.phoenix.util.TestUtil.getAllSplits; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; import java.sql.Array; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Random; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.google.common.collect.Lists; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; import org.apache.phoenix.coprocessor.UngroupedAggregateRegionObserver; import org.apache.phoenix.end2end.BaseUniqueNamesOwnClusterIT; import org.apache.phoenix.jdbc.PhoenixConnection; import org.apache.phoenix.jdbc.PhoenixDatabaseMetaData; import org.apache.phoenix.query.BaseTest; import org.apache.phoenix.query.ConnectionQueryServices; import org.apache.phoenix.query.KeyRange; import org.apache.phoenix.query.QueryServices; import org.apache.phoenix.query.QueryServicesOptions; import org.apache.phoenix.schema.PTable; import org.apache.phoenix.schema.PTableImpl; import org.apache.phoenix.schema.PTableKey; import org.apache.phoenix.transaction.PhoenixTransactionProvider.Feature; import org.apache.phoenix.transaction.TransactionFactory; import org.apache.phoenix.util.MetaDataUtil; import org.apache.phoenix.util.PhoenixRuntime; import org.apache.phoenix.util.PropertiesUtil; import org.apache.phoenix.util.QueryUtil; import org.apache.phoenix.util.ReadOnlyProps; import org.apache.phoenix.util.SchemaUtil; import org.apache.phoenix.util.TestUtil; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import com.google.common.collect.Maps; /** * Base Test class for all Statistics Collection * Tests stats collection with various scenario parameters * 1. Column Encoding * 2. Transactions * 3. Namespaces * 4. Stats collection via SQL or MR job */ @RunWith(Parameterized.class) public abstract class BaseStatsCollectorIT extends BaseUniqueNamesOwnClusterIT { private static final Log LOG = LogFactory.getLog(BaseStatsCollectorIT.class); private final String tableDDLOptions; private final boolean columnEncoded; private String tableName; private String schemaName; private String fullTableName; private String physicalTableName; private final boolean userTableNamespaceMapped; private final boolean mutable; private final String transactionProvider; private static final int defaultGuidePostWidth = 20; private boolean collectStatsOnSnapshot; protected BaseStatsCollectorIT(boolean userTableNamespaceMapped, boolean collectStatsOnSnapshot) { this(false, null, userTableNamespaceMapped, false, collectStatsOnSnapshot); } protected BaseStatsCollectorIT(boolean mutable, String transactionProvider, boolean columnEncoded) { this(mutable, transactionProvider, false, columnEncoded, false); } private BaseStatsCollectorIT(boolean mutable, String transactionProvider, boolean userTableNamespaceMapped, boolean columnEncoded, boolean collectStatsOnSnapshot) { this.transactionProvider = transactionProvider; StringBuilder sb = new StringBuilder(); if (columnEncoded) { sb.append("COLUMN_ENCODED_BYTES=4"); } else { sb.append("COLUMN_ENCODED_BYTES=0"); } if (transactionProvider != null) { sb.append(",TRANSACTIONAL=true, TRANSACTION_PROVIDER='" + transactionProvider + "'"); } if (!mutable) { sb.append(",IMMUTABLE_ROWS=true"); if (!columnEncoded) { sb.append(",IMMUTABLE_STORAGE_SCHEME=" + PTableImpl.ImmutableStorageScheme.ONE_CELL_PER_COLUMN); } } this.tableDDLOptions = sb.toString(); this.userTableNamespaceMapped = userTableNamespaceMapped; this.columnEncoded = columnEncoded; this.mutable = mutable; this.collectStatsOnSnapshot = collectStatsOnSnapshot; } @BeforeClass public static void doSetup() throws Exception { // disable name space mapping at global level on both client and server side Map<String, String> serverProps = Maps.newHashMapWithExpectedSize(7); serverProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, Boolean.FALSE.toString()); serverProps.put(QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB, Long.toString(defaultGuidePostWidth)); Map<String, String> clientProps = Maps.newHashMapWithExpectedSize(2); clientProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, Boolean.FALSE.toString()); clientProps.put(QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB, Long.toString(defaultGuidePostWidth)); setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), new ReadOnlyProps(clientProps.entrySet().iterator())); } @Before public void generateTableNames() throws SQLException { schemaName = generateUniqueName(); if (userTableNamespaceMapped) { try (Connection conn = getConnection()) { conn.createStatement().execute("CREATE SCHEMA " + schemaName); } } tableName = "T_" + generateUniqueName(); fullTableName = SchemaUtil.getTableName(schemaName, tableName); physicalTableName = SchemaUtil.getPhysicalHBaseTableName(schemaName, tableName, userTableNamespaceMapped) .getString(); } private Connection getConnection() throws SQLException { return getConnection(Integer.MAX_VALUE); } private Connection getConnection(Integer statsUpdateFreq) throws SQLException { Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); props.setProperty(QueryServices.EXPLAIN_CHUNK_COUNT_ATTRIB, Boolean.TRUE.toString()); props.setProperty(QueryServices.EXPLAIN_ROW_COUNT_ATTRIB, Boolean.TRUE.toString()); props.setProperty(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB, Integer.toString(statsUpdateFreq)); props.setProperty(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, Boolean.toString(userTableNamespaceMapped)); return DriverManager.getConnection(getUrl(), props); } private void collectStatistics(Connection conn, String fullTableName) throws Exception { collectStatistics(conn, fullTableName, null); } private void collectStatistics(Connection conn, String fullTableName, String guidePostWidth) throws Exception { if (collectStatsOnSnapshot) { collectStatsOnSnapshot(conn, fullTableName, guidePostWidth); invalidateStats(conn, fullTableName); } else { String updateStatisticsSql = "UPDATE STATISTICS " + fullTableName; if (guidePostWidth != null) { updateStatisticsSql += " SET \"" + QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB + "\" = " + guidePostWidth; } LOG.info("Running SQL to collect stats: " + updateStatisticsSql); conn.createStatement().execute(updateStatisticsSql); } } private void collectStatsOnSnapshot(Connection conn, String fullTableName, String guidePostWidth) throws Exception { if (guidePostWidth != null) { conn.createStatement() .execute("ALTER TABLE " + fullTableName + " SET GUIDE_POSTS_WIDTH = " + guidePostWidth); } runUpdateStatisticsTool(fullTableName); } // Run UpdateStatisticsTool in foreground with manage snapshot option private void runUpdateStatisticsTool(String fullTableName) { UpdateStatisticsTool tool = new UpdateStatisticsTool(); tool.setConf(utility.getConfiguration()); String randomDir = getUtility().getRandomDir().toString(); final String[] cmdArgs = getArgValues(fullTableName, randomDir); try { int status = tool.run(cmdArgs); assertEquals("MR Job should complete successfully", 0, status); HBaseAdmin hBaseAdmin = utility.getHBaseAdmin(); assertEquals("Snapshot should be automatically deleted when UpdateStatisticsTool has completed", 0, hBaseAdmin.listSnapshots(tool.getSnapshotName()).size()); } catch (Exception e) { fail("Exception when running UpdateStatisticsTool for " + tableName + " Exception: " + e); } finally { Job job = tool.getJob(); assertEquals("MR Job should have been configured with UPDATE_STATS job type", job.getConfiguration().get(MAPREDUCE_JOB_TYPE), UPDATE_STATS.name()); } } private String[] getArgValues(String fullTableName, String randomDir) { final List<String> args = Lists.newArrayList(); args.add("-t"); args.add(fullTableName); args.add("-d"); args.add(randomDir); args.add("-runfg"); args.add("-ms"); return args.toArray(new String[0]); } @Test public void testUpdateEmptyStats() throws Exception { Connection conn = getConnection(); conn.setAutoCommit(true); conn.createStatement() .execute("CREATE TABLE " + fullTableName + " ( k CHAR(1) PRIMARY KEY )" + tableDDLOptions); collectStatistics(conn, fullTableName); ResultSet rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName); String explainPlan = QueryUtil.getExplainPlan(rs); assertEquals("CLIENT 1-CHUNK 0 ROWS 20 BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTableName + "\n" + " SERVER FILTER BY FIRST KEY ONLY", explainPlan); conn.close(); } @Test public void testSomeUpdateEmptyStats() throws Exception { Connection conn = getConnection(); conn.setAutoCommit(true); conn.createStatement() .execute("CREATE TABLE " + fullTableName + " ( k VARCHAR PRIMARY KEY, a.v1 VARCHAR, b.v2 VARCHAR ) " + tableDDLOptions + (tableDDLOptions.isEmpty() ? "" : ",") + "SALT_BUCKETS = 3"); conn.createStatement().execute("UPSERT INTO " + fullTableName + "(k,v1) VALUES('a','123456789')"); collectStatistics(conn, fullTableName); ResultSet rs; String explainPlan; rs = conn.createStatement().executeQuery("EXPLAIN SELECT v2 FROM " + fullTableName + " WHERE v2='foo'"); explainPlan = QueryUtil.getExplainPlan(rs); // if we are using the ONE_CELL_PER_COLUMN_FAMILY storage scheme, we will have the single kv even though there are no values for col family v2 String stats = columnEncoded && !mutable ? "4-CHUNK 1 ROWS 38 BYTES" : "3-CHUNK 0 ROWS 20 BYTES"; assertEquals("CLIENT " + stats + " PARALLEL 3-WAY FULL SCAN OVER " + physicalTableName + "\n" + " SERVER FILTER BY B.V2 = 'foo'\n" + "CLIENT MERGE SORT", explainPlan); rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName); explainPlan = QueryUtil.getExplainPlan(rs); assertEquals( "CLIENT 4-CHUNK 1 ROWS " + (columnEncoded ? "28" : TransactionFactory.Provider.OMID.name().equals(transactionProvider) ? "38" : "34") + " BYTES PARALLEL 3-WAY FULL SCAN OVER " + physicalTableName + "\n" + "CLIENT MERGE SORT", explainPlan); rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName + " WHERE k = 'a'"); explainPlan = QueryUtil.getExplainPlan(rs); assertEquals("CLIENT 1-CHUNK 1 ROWS " + (columnEncoded ? "204" : "202") + " BYTES PARALLEL 1-WAY POINT LOOKUP ON 1 KEY OVER " + physicalTableName + "\n" + "CLIENT MERGE SORT", explainPlan); conn.close(); } @Test public void testUpdateStats() throws Exception { Connection conn; PreparedStatement stmt; ResultSet rs; Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); conn = getConnection(); conn.createStatement().execute("CREATE TABLE " + fullTableName + " ( k VARCHAR, a_string_array VARCHAR(100) ARRAY[4], b_string_array VARCHAR(100) ARRAY[4] \n" + " CONSTRAINT pk PRIMARY KEY (k, b_string_array DESC))" + tableDDLOptions); String[] s; Array array; conn = upsertValues(props, fullTableName); collectStatistics(conn, fullTableName); rs = conn.createStatement().executeQuery("EXPLAIN SELECT k FROM " + fullTableName); rs.next(); long rows1 = (Long) rs.getObject(PhoenixRuntime.EXPLAIN_PLAN_ESTIMATED_ROWS_READ_COLUMN); stmt = upsertStmt(conn, fullTableName); stmt.setString(1, "z"); s = new String[] { "xyz", "def", "ghi", "jkll", null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(2, array); s = new String[] { "zya", "def", "ghi", "jkll", null, null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(3, array); stmt.execute(); conn.commit(); collectStatistics(conn, fullTableName); rs = conn.createStatement().executeQuery("EXPLAIN SELECT k FROM " + fullTableName); rs.next(); long rows2 = (Long) rs.getObject(PhoenixRuntime.EXPLAIN_PLAN_ESTIMATED_ROWS_READ_COLUMN); assertNotEquals(rows1, rows2); conn.close(); } private void testNoDuplicatesAfterUpdateStats(String splitKey) throws Throwable { Connection conn = getConnection(); PreparedStatement stmt; ResultSet rs; conn.createStatement() .execute("CREATE TABLE " + fullTableName + " ( k VARCHAR, c1.a bigint,c2.b bigint CONSTRAINT pk PRIMARY KEY (k))" + tableDDLOptions + (splitKey != null ? " split on (" + splitKey + ")" : "")); conn.createStatement().execute("upsert into " + fullTableName + " values ('abc',1,3)"); conn.createStatement().execute("upsert into " + fullTableName + " values ('def',2,4)"); conn.commit(); collectStatistics(conn, fullTableName); rs = conn.createStatement().executeQuery("SELECT k FROM " + fullTableName + " order by k desc"); assertTrue(rs.next()); assertEquals("def", rs.getString(1)); assertTrue(rs.next()); assertEquals("abc", rs.getString(1)); assertTrue(!rs.next()); conn.close(); } @Test public void testNoDuplicatesAfterUpdateStatsWithSplits() throws Throwable { testNoDuplicatesAfterUpdateStats("'abc','def'"); } @Test public void testNoDuplicatesAfterUpdateStatsWithDesc() throws Throwable { testNoDuplicatesAfterUpdateStats(null); } private Connection upsertValues(Properties props, String tableName) throws SQLException, IOException, InterruptedException { Connection conn; PreparedStatement stmt; conn = getConnection(); stmt = upsertStmt(conn, tableName); stmt.setString(1, "a"); String[] s = new String[] { "abc", "def", "ghi", "jkll", null, null, "xxx" }; Array array = conn.createArrayOf("VARCHAR", s); stmt.setArray(2, array); s = new String[] { "abc", "def", "ghi", "jkll", null, null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(3, array); stmt.execute(); conn.commit(); stmt = upsertStmt(conn, tableName); stmt.setString(1, "b"); s = new String[] { "xyz", "def", "ghi", "jkll", null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(2, array); s = new String[] { "zya", "def", "ghi", "jkll", null, null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(3, array); stmt.execute(); conn.commit(); stmt = upsertStmt(conn, tableName); stmt.setString(1, "c"); s = new String[] { "xyz", "def", "ghi", "jkll", null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(2, array); s = new String[] { "zya", "def", "ghi", "jkll", null, null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(3, array); stmt.execute(); conn.commit(); stmt = upsertStmt(conn, tableName); stmt.setString(1, "d"); s = new String[] { "xyz", "def", "ghi", "jkll", null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(2, array); s = new String[] { "zya", "def", "ghi", "jkll", null, null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(3, array); stmt.execute(); conn.commit(); stmt = upsertStmt(conn, tableName); stmt.setString(1, "b"); s = new String[] { "xyz", "def", "ghi", "jkll", null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(2, array); s = new String[] { "zya", "def", "ghi", "jkll", null, null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(3, array); stmt.execute(); conn.commit(); stmt = upsertStmt(conn, tableName); stmt.setString(1, "e"); s = new String[] { "xyz", "def", "ghi", "jkll", null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(2, array); s = new String[] { "zya", "def", "ghi", "jkll", null, null, null, "xxx" }; array = conn.createArrayOf("VARCHAR", s); stmt.setArray(3, array); stmt.execute(); conn.commit(); return conn; } private PreparedStatement upsertStmt(Connection conn, String tableName) throws SQLException { PreparedStatement stmt; stmt = conn.prepareStatement("UPSERT INTO " + tableName + " VALUES(?,?,?)"); return stmt; } @Test @Ignore //TODO remove this once https://issues.apache.org/jira/browse/TEPHRA-208 is fixed public void testCompactUpdatesStats() throws Exception { testCompactUpdatesStats(0, fullTableName); } @Test @Ignore //TODO remove this once https://issues.apache.org/jira/browse/TEPHRA-208 is fixed public void testCompactUpdatesStatsWithMinStatsUpdateFreq() throws Exception { testCompactUpdatesStats(QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS, fullTableName); } private static void invalidateStats(Connection conn, String tableName) throws SQLException { PTable ptable = conn.unwrap(PhoenixConnection.class).getMetaDataCache() .getTableRef(new PTableKey(null, tableName)).getTable(); byte[] name = ptable.getPhysicalName().getBytes(); conn.unwrap(PhoenixConnection.class).getQueryServices() .invalidateStats(new GuidePostsKey(name, SchemaUtil.getEmptyColumnFamily(ptable))); } private void testCompactUpdatesStats(Integer statsUpdateFreq, String tableName) throws Exception { int nRows = 10; Connection conn = getConnection(statsUpdateFreq); PreparedStatement stmt; conn.createStatement() .execute("CREATE TABLE " + tableName + "(k CHAR(1) PRIMARY KEY, v INTEGER, w INTEGER) " + (!tableDDLOptions.isEmpty() ? tableDDLOptions + "," : "") + ColumnFamilyDescriptorBuilder.KEEP_DELETED_CELLS + "=" + Boolean.FALSE); stmt = conn.prepareStatement("UPSERT INTO " + tableName + " VALUES(?,?,?)"); for (int i = 0; i < nRows; i++) { stmt.setString(1, Character.toString((char) ('a' + i))); stmt.setInt(2, i); stmt.setInt(3, i); stmt.executeUpdate(); } conn.commit(); TestUtil.doMajorCompaction(conn, physicalTableName); if (statsUpdateFreq != 0) { invalidateStats(conn, tableName); } else { // Confirm that when we have a non zero STATS_UPDATE_FREQ_MS_ATTRIB, after we run // UPDATATE STATISTICS, the new statistics are faulted in as expected. List<KeyRange> keyRanges = getAllSplits(conn, tableName); assertNotEquals(nRows + 1, keyRanges.size()); // If we've set MIN_STATS_UPDATE_FREQ_MS_ATTRIB, an UPDATE STATISTICS will invalidate the cache // and forcing the new stats to be pulled over. int rowCount = conn.createStatement().executeUpdate("UPDATE STATISTICS " + tableName); assertEquals(10, rowCount); } List<KeyRange> keyRanges = getAllSplits(conn, tableName); assertEquals(nRows + 1, keyRanges.size()); int nDeletedRows = conn.createStatement() .executeUpdate("DELETE FROM " + tableName + " WHERE V < " + nRows / 2); conn.commit(); assertEquals(5, nDeletedRows); Scan scan = new Scan(); scan.setRaw(true); PhoenixConnection phxConn = conn.unwrap(PhoenixConnection.class); try (Table htable = phxConn.getQueryServices().getTable(Bytes.toBytes(tableName))) { ResultScanner scanner = htable.getScanner(scan); Result result; while ((result = scanner.next()) != null) { System.out.println(result); } } TestUtil.doMajorCompaction(conn, physicalTableName); scan = new Scan(); scan.setRaw(true); phxConn = conn.unwrap(PhoenixConnection.class); try (Table htable = phxConn.getQueryServices().getTable(Bytes.toBytes(tableName))) { ResultScanner scanner = htable.getScanner(scan); Result result; while ((result = scanner.next()) != null) { System.out.println(result); } } if (statsUpdateFreq != 0) { invalidateStats(conn, tableName); } else { assertEquals(nRows + 1, keyRanges.size()); // If we've set STATS_UPDATE_FREQ_MS_ATTRIB, an UPDATE STATISTICS will invalidate the cache // and force us to pull over the new stats int rowCount = conn.createStatement().executeUpdate("UPDATE STATISTICS " + tableName); assertEquals(5, rowCount); } keyRanges = getAllSplits(conn, tableName); assertEquals(nRows / 2 + 1, keyRanges.size()); ResultSet rs = conn.createStatement() .executeQuery("SELECT SUM(GUIDE_POSTS_ROW_COUNT) FROM " + "\"" + SYSTEM_CATALOG_SCHEMA + "\".\"" + SYSTEM_STATS_TABLE + "\"" + " WHERE PHYSICAL_NAME='" + physicalTableName + "'"); rs.next(); assertEquals(nRows - nDeletedRows, rs.getLong(1)); } @Test public void testWithMultiCF() throws Exception { int nRows = 20; Connection conn = getConnection(0); PreparedStatement stmt; conn.createStatement() .execute("CREATE TABLE " + fullTableName + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, c.v INTEGER NULL, d.v INTEGER NULL) " + tableDDLOptions); stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + " VALUES(?,?, ?, ?, ?)"); byte[] val = new byte[250]; for (int i = 0; i < nRows; i++) { stmt.setString(1, Character.toString((char) ('a' + i)) + Bytes.toString(val)); stmt.setInt(2, i); stmt.setInt(3, i); stmt.setInt(4, i); stmt.setInt(5, i); stmt.executeUpdate(); } conn.commit(); stmt = conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, c.v, d.v) VALUES(?,?,?)"); for (int i = 0; i < 5; i++) { stmt.setString(1, Character.toString((char) ('a' + 'z' + i)) + Bytes.toString(val)); stmt.setInt(2, i); stmt.setInt(3, i); stmt.executeUpdate(); } conn.commit(); ResultSet rs; collectStatistics(conn, fullTableName); List<KeyRange> keyRanges = getAllSplits(conn, fullTableName); assertEquals(26, keyRanges.size()); rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName); assertEquals("CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? (mutable ? "12530" : "13902") : (TransactionFactory.Provider.OMID.name().equals(transactionProvider)) ? "25044" : "12420") + " BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTableName, QueryUtil.getExplainPlan(rs)); ConnectionQueryServices services = conn.unwrap(PhoenixConnection.class).getQueryServices(); List<HRegionLocation> regions = services.getAllTableRegions(Bytes.toBytes(physicalTableName)); assertEquals(1, regions.size()); collectStatistics(conn, fullTableName, Long.toString(1000)); keyRanges = getAllSplits(conn, fullTableName); boolean oneCellPerColFamliyStorageScheme = !mutable && columnEncoded; boolean hasShadowCells = TransactionFactory.Provider.OMID.name().equals(transactionProvider); assertEquals(oneCellPerColFamliyStorageScheme ? 13 : hasShadowCells ? 23 : 12, keyRanges.size()); rs = conn.createStatement().executeQuery( "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from \"SYSTEM\".STATS where PHYSICAL_NAME = '" + physicalTableName + "' GROUP BY COLUMN_FAMILY ORDER BY COLUMN_FAMILY"); assertTrue(rs.next()); assertEquals("A", rs.getString(1)); assertEquals(24, rs.getInt(2)); assertEquals(columnEncoded ? (mutable ? 12252 : 13624) : hasShadowCells ? 24756 : 12144, rs.getInt(3)); assertEquals(oneCellPerColFamliyStorageScheme ? 12 : hasShadowCells ? 22 : 11, rs.getInt(4)); assertTrue(rs.next()); assertEquals("B", rs.getString(1)); assertEquals(oneCellPerColFamliyStorageScheme ? 24 : 20, rs.getInt(2)); assertEquals(columnEncoded ? (mutable ? 5600 : 6972) : hasShadowCells ? 11260 : 5540, rs.getInt(3)); assertEquals(oneCellPerColFamliyStorageScheme ? 6 : hasShadowCells ? 10 : 5, rs.getInt(4)); assertTrue(rs.next()); assertEquals("C", rs.getString(1)); assertEquals(24, rs.getInt(2)); assertEquals(columnEncoded ? (mutable ? 6724 : 6988) : hasShadowCells ? 13520 : 6652, rs.getInt(3)); assertEquals(hasShadowCells ? 12 : 6, rs.getInt(4)); assertTrue(rs.next()); assertEquals("D", rs.getString(1)); assertEquals(24, rs.getInt(2)); assertEquals(columnEncoded ? (mutable ? 6724 : 6988) : hasShadowCells ? 13520 : 6652, rs.getInt(3)); assertEquals(hasShadowCells ? 12 : 6, rs.getInt(4)); assertFalse(rs.next()); // Disable stats conn.createStatement().execute( "ALTER TABLE " + fullTableName + " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=0"); collectStatistics(conn, fullTableName); // Assert that there are no more guideposts rs = conn.createStatement() .executeQuery("SELECT count(1) FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "='" + physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS NOT NULL"); assertTrue(rs.next()); assertEquals(0, rs.getLong(1)); assertFalse(rs.next()); rs = conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " + fullTableName); assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + physicalTableName, QueryUtil.getExplainPlan(rs)); } @Test public void testRowCountAndByteCounts() throws Exception { Connection conn = getConnection(); String ddl = "CREATE TABLE " + fullTableName + " (t_id VARCHAR NOT NULL,\n" + "k1 INTEGER NOT NULL,\n" + "k2 INTEGER NOT NULL,\n" + "C3.k3 INTEGER,\n" + "C2.v1 VARCHAR,\n" + "CONSTRAINT pk PRIMARY KEY (t_id, k1, k2)) " + tableDDLOptions + " split on ('e','j','o')"; conn.createStatement().execute(ddl); String[] strings = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" }; for (int i = 0; i < 26; i++) { conn.createStatement().execute("UPSERT INTO " + fullTableName + " values('" + strings[i] + "'," + i + "," + (i + 1) + "," + (i + 2) + ",'" + strings[25 - i] + "')"); } conn.commit(); ResultSet rs; collectStatistics(conn, fullTableName, Long.toString(20L)); Random r = new Random(); int count = 0; boolean hasShadowCells = TransactionFactory.Provider.OMID.name().equals(transactionProvider); while (count < 4) { int startIndex = r.nextInt(strings.length); int endIndex = r.nextInt(strings.length - startIndex) + startIndex; long rows = endIndex - startIndex; long c2Bytes = rows * (columnEncoded ? (mutable ? 37 : 48) : 35); String physicalTableName = SchemaUtil .getPhysicalTableName(Bytes.toBytes(fullTableName), userTableNamespaceMapped).toString(); rs = conn.createStatement().executeQuery( "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH) from \"SYSTEM\".STATS where PHYSICAL_NAME = '" + physicalTableName + "' AND GUIDE_POST_KEY>= cast('" + strings[startIndex] + "' as varbinary) AND GUIDE_POST_KEY<cast('" + strings[endIndex] + "' as varbinary) and COLUMN_FAMILY='C2' group by COLUMN_FAMILY"); if (startIndex < endIndex) { assertTrue(rs.next()); assertEquals("C2", rs.getString(1)); assertEquals(rows, rs.getLong(2)); // OMID with the shadow cells it creates will have more bytes, but getting // an exact byte count based on the number or rows is not possible because // it is variable on a row-by-row basis. long sumOfGuidePostsWidth = rs.getLong(3); assertTrue(hasShadowCells ? sumOfGuidePostsWidth > c2Bytes : sumOfGuidePostsWidth == c2Bytes); count++; } } } @Test public void testRowCountWhenNumKVsExceedCompactionScannerThreshold() throws Exception { StringBuilder sb = new StringBuilder(200); sb.append("CREATE TABLE " + fullTableName + "(PK1 VARCHAR NOT NULL, "); int numRows = 10; try (Connection conn = getConnection()) { int compactionScannerKVThreshold = conn.unwrap(PhoenixConnection.class).getQueryServices() .getConfiguration().getInt(HConstants.COMPACTION_KV_MAX, HConstants.COMPACTION_KV_MAX_DEFAULT); int numKvColumns = compactionScannerKVThreshold * 2; for (int i = 1; i <= numKvColumns; i++) { sb.append("KV" + i + " VARCHAR"); if (i < numKvColumns) { sb.append(", "); } } sb.append(" CONSTRAINT PK PRIMARY KEY (PK1))"); String ddl = sb.toString(); conn.createStatement().execute(ddl); sb = new StringBuilder(200); sb.append("UPSERT INTO " + fullTableName + " VALUES ("); for (int i = 1; i <= numKvColumns + 1; i++) { sb.append("?"); if (i < numKvColumns + 1) { sb.append(", "); } } sb.append(")"); String dml = sb.toString(); PreparedStatement stmt = conn.prepareStatement(dml); String keyValue = "KVVVVVV"; for (int j = 1; j <= numRows; j++) { for (int i = 1; i <= numKvColumns + 1; i++) { if (i == 1) { stmt.setString(1, "" + j); } else { stmt.setString(i, keyValue); } } stmt.executeUpdate(); } conn.commit(); collectStatistics(conn, fullTableName); String q = "SELECT SUM(GUIDE_POSTS_ROW_COUNT) FROM SYSTEM.STATS WHERE PHYSICAL_NAME = '" + physicalTableName + "'"; ResultSet rs = conn.createStatement().executeQuery(q); rs.next(); assertEquals("Number of expected rows in stats table after update stats didn't match!", numRows, rs.getInt(1)); conn.createStatement() .executeUpdate("DELETE FROM SYSTEM.STATS WHERE PHYSICAL_NAME = '" + physicalTableName + "'"); conn.commit(); TestUtil.doMajorCompaction(conn, physicalTableName); q = "SELECT SUM(GUIDE_POSTS_ROW_COUNT) FROM SYSTEM.STATS WHERE PHYSICAL_NAME = '" + physicalTableName + "'"; rs = conn.createStatement().executeQuery(q); rs.next(); assertEquals("Number of expected rows in stats table after major compaction didn't match", numRows, rs.getInt(1)); } } private void verifyGuidePostGenerated(ConnectionQueryServices queryServices, String tableName, String[] familyNames, long guidePostWidth, boolean emptyGuidePostExpected) throws Exception { try (Table statsHTable = queryServices.getTable(SchemaUtil .getPhysicalName(PhoenixDatabaseMetaData.SYSTEM_STATS_NAME_BYTES, queryServices.getProps()) .getName())) { for (String familyName : familyNames) { GuidePostsInfo gps = StatisticsUtil.readStatistics(statsHTable, new GuidePostsKey(Bytes.toBytes(tableName), Bytes.toBytes(familyName)), HConstants.LATEST_TIMESTAMP); assertTrue(emptyGuidePostExpected ? gps.isEmptyGuidePost() : !gps.isEmptyGuidePost()); assertTrue(gps.getByteCounts()[0] >= guidePostWidth); assertTrue(gps.getGuidePostTimestamps()[0] > 0); } } } @Test public void testEmptyGuidePostGeneratedWhenDataSizeLessThanGPWidth() throws Exception { try (Connection conn = getConnection()) { long guidePostWidth = 20000000; conn.createStatement().execute("CREATE TABLE " + fullTableName + " ( k INTEGER, c1.a bigint,c2.b bigint CONSTRAINT pk PRIMARY KEY (k)) GUIDE_POSTS_WIDTH=" + guidePostWidth + ", SALT_BUCKETS = 4"); conn.createStatement().execute("upsert into " + fullTableName + " values (100,1,3)"); conn.createStatement().execute("upsert into " + fullTableName + " values (101,2,4)"); conn.commit(); collectStatistics(conn, fullTableName); ConnectionQueryServices queryServices = conn.unwrap(PhoenixConnection.class).getQueryServices(); verifyGuidePostGenerated(queryServices, physicalTableName, new String[] { "C1", "C2" }, guidePostWidth, true); } } @Test public void testCollectingAllVersionsOfCells() throws Exception { try (Connection conn = getConnection()) { long guidePostWidth = 70; String ddl = "CREATE TABLE " + fullTableName + " (k INTEGER PRIMARY KEY, c1.a bigint, c2.b bigint)" + " GUIDE_POSTS_WIDTH=" + guidePostWidth + ", USE_STATS_FOR_PARALLELIZATION=true" + ", VERSIONS=3"; conn.createStatement().execute(ddl); conn.createStatement().execute("upsert into " + fullTableName + " values (100,100,3)"); conn.commit(); collectStatistics(conn, fullTableName); ConnectionQueryServices queryServices = conn.unwrap(PhoenixConnection.class).getQueryServices(); // The table only has one row. All cells just has one version, and the data size of the row // is less than the guide post width, so we generate empty guide post. verifyGuidePostGenerated(queryServices, physicalTableName, new String[] { "C1", "C2" }, guidePostWidth, true); conn.createStatement().execute("upsert into " + fullTableName + " values (100,101,4)"); conn.commit(); collectStatistics(conn, fullTableName); // We updated the row. Now each cell has two versions, and the data size of the row // is >= the guide post width, so we generate non-empty guide post. verifyGuidePostGenerated(queryServices, physicalTableName, new String[] { "C1", "C2" }, guidePostWidth, false); } } @Test public void testGuidePostWidthUsedInDefaultStatsCollector() throws Exception { String baseTable = generateUniqueName(); try (Connection conn = DriverManager.getConnection(getUrl())) { String ddl = "CREATE TABLE " + baseTable + " (k INTEGER PRIMARY KEY, a bigint, b bigint, c bigint) " + tableDDLOptions; BaseTest.createTestTable(getUrl(), ddl, null, null); conn.createStatement().execute("upsert into " + baseTable + " values (100,1,1,1)"); conn.createStatement().execute("upsert into " + baseTable + " values (101,2,2,2)"); conn.createStatement().execute("upsert into " + baseTable + " values (102,3,3,3)"); conn.createStatement().execute("upsert into " + baseTable + " values (103,4,4,4)"); conn.createStatement().execute("upsert into " + baseTable + " values (104,5,5,5)"); conn.createStatement().execute("upsert into " + baseTable + " values (105,6,6,6)"); conn.createStatement().execute("upsert into " + baseTable + " values (106,7,7,7)"); conn.createStatement().execute("upsert into " + baseTable + " values (107,8,8,8)"); conn.createStatement().execute("upsert into " + baseTable + " values (108,9,9,9)"); conn.createStatement().execute("upsert into " + baseTable + " values (109,10,10,10)"); conn.commit(); DefaultStatisticsCollector statsCollector = getDefaultStatsCollectorForTable(baseTable); statsCollector.init(); assertEquals(defaultGuidePostWidth, statsCollector.getGuidePostDepth()); // ok let's create a global index now and see what guide post width is used for it String globalIndex = "GI_" + generateUniqueName(); ddl = "CREATE INDEX " + globalIndex + " ON " + baseTable + " (a) INCLUDE (b) "; conn.createStatement().execute(ddl); statsCollector = getDefaultStatsCollectorForTable(globalIndex); statsCollector.init(); assertEquals(defaultGuidePostWidth, statsCollector.getGuidePostDepth()); // let's check out local index too if (transactionProvider == null || !TransactionFactory .getTransactionProvider(TransactionFactory.Provider.valueOf(transactionProvider)) .isUnsupported(Feature.ALLOW_LOCAL_INDEX)) { String localIndex = "LI_" + generateUniqueName(); ddl = "CREATE LOCAL INDEX " + localIndex + " ON " + baseTable + " (b) INCLUDE (c) "; conn.createStatement().execute(ddl); // local indexes reside on the same table as base data table statsCollector = getDefaultStatsCollectorForTable(baseTable); statsCollector.init(); assertEquals(defaultGuidePostWidth, statsCollector.getGuidePostDepth()); } // now let's create a view and an index on it and see what guide post width is used for // it String view = "V_" + generateUniqueName(); ddl = "CREATE VIEW " + view + " AS SELECT * FROM " + baseTable; conn.createStatement().execute(ddl); String viewIndex = "VI_" + generateUniqueName(); ddl = "CREATE INDEX " + viewIndex + " ON " + view + " (b)"; conn.createStatement().execute(ddl); String viewIndexTableName = MetaDataUtil.getViewIndexPhysicalName(baseTable); statsCollector = getDefaultStatsCollectorForTable(viewIndexTableName); statsCollector.init(); assertEquals(defaultGuidePostWidth, statsCollector.getGuidePostDepth()); /* * Fantastic! Now let's change the guide post width of the base table. This should * change the guide post width we are using in DefaultStatisticsCollector for all * indexes too. */ long newGpWidth = 500; conn.createStatement().execute("ALTER TABLE " + baseTable + " SET GUIDE_POSTS_WIDTH=" + newGpWidth); // base table and local index statsCollector = getDefaultStatsCollectorForTable(baseTable); statsCollector.init(); assertEquals(newGpWidth, statsCollector.getGuidePostDepth()); // global index table statsCollector = getDefaultStatsCollectorForTable(globalIndex); statsCollector.init(); assertEquals(newGpWidth, statsCollector.getGuidePostDepth()); // view index table statsCollector = getDefaultStatsCollectorForTable(viewIndexTableName); statsCollector.init(); assertEquals(newGpWidth, statsCollector.getGuidePostDepth()); } } private DefaultStatisticsCollector getDefaultStatsCollectorForTable(String tableName) throws Exception { RegionCoprocessorEnvironment env = getRegionEnvrionment(tableName); return (DefaultStatisticsCollector) StatisticsCollectorFactory.createStatisticsCollector(env, tableName, System.currentTimeMillis(), null, null); } private RegionCoprocessorEnvironment getRegionEnvrionment(String tableName) throws IOException, InterruptedException { return getUtility().getMiniHBaseCluster().getRegions(TableName.valueOf(tableName)).get(0) .getCoprocessorHost().findCoprocessorEnvironment(UngroupedAggregateRegionObserver.class.getName()); } }