From 1014f21ef674f98469bc8bc71a2031ece3832b7d Mon Sep 17 00:00:00 2001 From: hcduan Date: Wed, 15 Sep 2021 19:17:25 +0800 Subject: [PATCH] [bugfix] ANALYZE InnoDB wide tables incorrectly to get zero or seriously underestimated statistics Problem: Online-schema-change tool such as gh-ost follows a common pattern. During migration, existing rows are inserted to a conceptual range in the new table, and new rows could be added concurrently beyond the range. For a table with very wide rows, which is common in use cases that store serialized objects in MySQL, migrating existing rows will cause page splitting right before the end of the range, generating many one-row pages. The sampling algorithm (dict0stats.cc) used by ANALYZE for persistent statistics can not handle these one-row pages, and usually estimates n_diff as zero giving a false illusion that the table is empty or some significantly under-estimated value. Note that n_diff for the primary key is a very important estimation in that it stands for the rows of the table. Such an abnormal estimation has unexpected bad consequences in planning and external tools. Fix: The reason is that the sampling algorithm by design needs some adjustments to get a good NDV estimation across pages, which effectively ignores one-row pages. However, primary-key columns do not need such adjustment, because NDV estimation is independent on adjacent pages. In contrast, sampling secondary index does not suffer from this problem, because the limited total width of secondary index columns effectively avoids one-row pages. Introduce a system variable called innodb_stats_skip_adjustment_for_primary_key which is enabled when skipping adjustment for primary key. --- .../r/zero-stat_n_rows-of-big-rows.result | 26 ++++++++++ ...skip_adjustment_for_primary_key-master.opt | 1 + ...ats_skip_adjustment_for_primary_key.result | 25 +++++++++ ...stats_skip_adjustment_for_primary_key.test | 52 +++++++++++++++++++ .../t/zero-stat_n_rows-of-big-rows.test | 43 +++++++++++++++ storage/innobase/dict/dict0stats.cc | 20 ++++++- storage/innobase/handler/ha_innodb.cc | 9 ++++ storage/innobase/include/srv0srv.h | 1 + storage/innobase/srv/srv0srv.cc | 1 + 9 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 mysql-test/r/zero-stat_n_rows-of-big-rows.result create mode 100644 mysql-test/suite/innodb/t/innodb_stats_skip_adjustment_for_primary_key-master.opt create mode 100644 mysql-test/suite/sys_vars/r/innodb_stats_skip_adjustment_for_primary_key.result create mode 100644 mysql-test/suite/sys_vars/t/innodb_stats_skip_adjustment_for_primary_key.test create mode 100644 mysql-test/t/zero-stat_n_rows-of-big-rows.test diff --git a/mysql-test/r/zero-stat_n_rows-of-big-rows.result b/mysql-test/r/zero-stat_n_rows-of-big-rows.result new file mode 100644 index 000000000..3f8ff5f6e --- /dev/null +++ b/mysql-test/r/zero-stat_n_rows-of-big-rows.result @@ -0,0 +1,26 @@ +SET @saved_innodb_stats_persistent = @@GLOBAL.innodb_stats_persistent; +SET @saved_persistent_sample_pages = @@GLOBAL.innodb_stats_persistent_sample_pages; +SET @saved_skip_adjustment_for_primary_key = @@innodb_stats_skip_adjustment_for_primary_key; +create table t(c1 int primary key, c2 text(8000)) engine=innodb; +insert into t value(10000, repeat('a',8000)); +# Insert 9999 big records. All leaf pages will have one row except for the first. +set global innodb_stats_persistent=on; +set global innodb_stats_persistent_sample_pages=20; +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=ON; +analyze table t; +Table Op Msg_type Msg_text +test.t analyze status OK +select TABLE_ROWS from information_schema.tables where table_name ='t'; +TABLE_ROWS +9998 +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=OFF; +analyze table t; +Table Op Msg_type Msg_text +test.t analyze status OK +select TABLE_ROWS from information_schema.tables where table_name ='t'; +TABLE_ROWS +0 +drop table t; +SET GLOBAL innodb_stats_persistent = @saved_innodb_stats_persistent; +SET GLOBAL innodb_stats_persistent_sample_pages = @saved_persistent_sample_pages; +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key = @saved_skip_adjustment_for_primary_key; diff --git a/mysql-test/suite/innodb/t/innodb_stats_skip_adjustment_for_primary_key-master.opt b/mysql-test/suite/innodb/t/innodb_stats_skip_adjustment_for_primary_key-master.opt new file mode 100644 index 000000000..701b22a75 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_stats_skip_adjustment_for_primary_key-master.opt @@ -0,0 +1 @@ +--innodb_stats_skip_adjustment_for_primary_key=on diff --git a/mysql-test/suite/sys_vars/r/innodb_stats_skip_adjustment_for_primary_key.result b/mysql-test/suite/sys_vars/r/innodb_stats_skip_adjustment_for_primary_key.result new file mode 100644 index 000000000..b57fec116 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_stats_skip_adjustment_for_primary_key.result @@ -0,0 +1,25 @@ +SELECT @@innodb_stats_skip_adjustment_for_primary_key; +@@innodb_stats_skip_adjustment_for_primary_key +0 +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=1; +SELECT @@innodb_stats_skip_adjustment_for_primary_key; +@@innodb_stats_skip_adjustment_for_primary_key +1 +SET SESSION innodb_stats_skip_adjustment_for_primary_key=1; +ERROR HY000: Variable 'innodb_stats_skip_adjustment_for_primary_key' is a GLOBAL variable and should be set with SET GLOBAL +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=100; +ERROR 42000: Variable 'innodb_stats_skip_adjustment_for_primary_key' can't be set to the value of '100' +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=foo; +ERROR 42000: Variable 'innodb_stats_skip_adjustment_for_primary_key' can't be set to the value of 'foo' +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=OFF ; +SELECT @@innodb_stats_skip_adjustment_for_primary_key; +@@innodb_stats_skip_adjustment_for_primary_key +0 +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=ON ; +SELECT @@innodb_stats_skip_adjustment_for_primary_key; +@@innodb_stats_skip_adjustment_for_primary_key +1 +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=Default ; +SELECT @@innodb_stats_skip_adjustment_for_primary_key; +@@innodb_stats_skip_adjustment_for_primary_key +0 diff --git a/mysql-test/suite/sys_vars/t/innodb_stats_skip_adjustment_for_primary_key.test b/mysql-test/suite/sys_vars/t/innodb_stats_skip_adjustment_for_primary_key.test new file mode 100644 index 000000000..84fe04ca6 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_stats_skip_adjustment_for_primary_key.test @@ -0,0 +1,52 @@ +############################################################################### +# # +# Variable Name: innodb_stats_skip_adjustment_for_primary_key # +# Scope: Global # +# Access Type: Dynamic # +# Data Type: numeric # +# # +# # +# Creation Date: 2021-09-15 # +# Author : hcduan # +# # +# # +# Description: # +# * Value check # +# * Scope check # +# # +############################################################################### + + +#################################################################### +# Display default value # +#################################################################### +SELECT @@innodb_stats_skip_adjustment_for_primary_key; + +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=1; + +SELECT @@innodb_stats_skip_adjustment_for_primary_key; + +# check error +--error ER_GLOBAL_VARIABLE +SET SESSION innodb_stats_skip_adjustment_for_primary_key=1; + +# check error +--error ER_WRONG_VALUE_FOR_VAR +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=100; + +# check error +--error ER_WRONG_VALUE_FOR_VAR +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=foo; + +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=OFF ; + +SELECT @@innodb_stats_skip_adjustment_for_primary_key; + +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=ON ; + +SELECT @@innodb_stats_skip_adjustment_for_primary_key; + +# Check with default setting +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=Default ; + +SELECT @@innodb_stats_skip_adjustment_for_primary_key; diff --git a/mysql-test/t/zero-stat_n_rows-of-big-rows.test b/mysql-test/t/zero-stat_n_rows-of-big-rows.test new file mode 100644 index 000000000..e0d63baa2 --- /dev/null +++ b/mysql-test/t/zero-stat_n_rows-of-big-rows.test @@ -0,0 +1,43 @@ +# Backup the original value of the variables +SET @saved_innodb_stats_persistent = @@GLOBAL.innodb_stats_persistent; +SET @saved_persistent_sample_pages = @@GLOBAL.innodb_stats_persistent_sample_pages; +SET @saved_skip_adjustment_for_primary_key = @@innodb_stats_skip_adjustment_for_primary_key; + +create table t(c1 int primary key, c2 text(8000)) engine=innodb; +insert into t value(10000, repeat('a',8000)); + +--echo # Insert 9999 big records. All leaf pages will have one row except for the first. +--let $total_rows_number=10000 +--let $iterator=1 + +--disable_query_log +--disable_result_log + +begin; +while ($iterator < $total_rows_number) +{ + --eval insert into t values($iterator, repeat('a', 8000)); + --inc $iterator +} +commit; + +--enable_result_log +--enable_query_log + +set global innodb_stats_persistent=on; +set global innodb_stats_persistent_sample_pages=20; + +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=ON; +analyze table t; +select TABLE_ROWS from information_schema.tables where table_name ='t'; + +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key=OFF; +analyze table t; +select TABLE_ROWS from information_schema.tables where table_name ='t'; + +drop table t; + +# Cleanup +SET GLOBAL innodb_stats_persistent = @saved_innodb_stats_persistent; +SET GLOBAL innodb_stats_persistent_sample_pages = @saved_persistent_sample_pages; +SET GLOBAL innodb_stats_skip_adjustment_for_primary_key = @saved_skip_adjustment_for_primary_key; diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 585ca980c..24a1f2705 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1525,8 +1525,24 @@ static void dict_stats_analyze_index_for_n_prefix( our algo would (correctly) get an estimate that there are 2 distinct records per page (average). Having 4 pages below non-boring records, it would (wrongly) estimate the number - of distinct records to 8. */ - if (n_diff_on_leaf_page > 0) { + of distinct records to 8. + + Bugfix: ANALYZE InnoDB wide tables incorrectly to get zero + or seriously underestimated statistics + + If there was just one big record on each clustered index page, + the algorithm will get a result of zero row in the table, which + gives the false illusion that the table is empty or significantly + under-estimated value. + The original algo ignores one-row pages. However, primary-key columns + do not need such adjustment, because the number of distinct values + is independent of adjacent pages. In contrast, sampling secondary index + does not suffer from this problem, because the limited total width of + secondary index columns effectively avoids one-row pages. + */ + if (!(srv_stats_skip_adjustment_for_primary_key && + (index->type & DICT_CLUSTERED) && n_prefix == index->n_uniq) && + n_diff_on_leaf_page > 0) { n_diff_on_leaf_page--; } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9911b21f2..120eea0b3 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -21424,6 +21424,14 @@ static MYSQL_SYSVAR_BOOL( "Include delete marked records when calculating persistent statistics", nullptr, nullptr, FALSE); +static MYSQL_SYSVAR_BOOL( + stats_skip_adjustment_for_primary_key, + srv_stats_skip_adjustment_for_primary_key, + PLUGIN_VAR_OPCMDARG, + "Skip adjustment NDV for estimating primary key in the leaf page " + "when calculating persistent statistics of clustered index", + nullptr, nullptr, FALSE); + static MYSQL_SYSVAR_ULONG( io_capacity, srv_io_capacity, PLUGIN_VAR_RQCMDARG, "Number of IOPs the server can do. Tunes the background IO rate", nullptr, @@ -22758,6 +22766,7 @@ static SYS_VAR *innobase_system_variables[] = { MYSQL_SYSVAR(doublewrite_files), MYSQL_SYSVAR(doublewrite_pages), MYSQL_SYSVAR(stats_include_delete_marked), + MYSQL_SYSVAR(stats_skip_adjustment_for_primary_key), MYSQL_SYSVAR(api_enable_binlog), MYSQL_SYSVAR(api_enable_mdl), MYSQL_SYSVAR(api_disable_rowlock), diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index c5cf25638..18e51a93a 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -716,6 +716,7 @@ extern bool srv_stats_persistent; extern unsigned long long srv_stats_persistent_sample_pages; extern bool srv_stats_auto_recalc; extern bool srv_stats_include_delete_marked; +extern bool srv_stats_skip_adjustment_for_primary_key; extern ulong srv_checksum_algorithm; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 2bf8bcea4..e2bb4f05a 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -558,6 +558,7 @@ this many index pages, there are 2 ways to calculate statistics: unsigned long long srv_stats_transient_sample_pages = 8; bool srv_stats_persistent = TRUE; bool srv_stats_include_delete_marked = FALSE; +bool srv_stats_skip_adjustment_for_primary_key = FALSE; unsigned long long srv_stats_persistent_sample_pages = 20; bool srv_stats_auto_recalc = TRUE; -- 2.32.0