From 9e9e1ff011b365aac26fa7d0e470397521799040 Mon Sep 17 00:00:00 2001 From: hcduan Date: Thu, 16 Sep 2021 23:12:07 +0800 Subject: [PATCH] [bugfix] Analyze table leads to empty statistics during online rebuild DDL This is a postfix. BUG#98132 is better titled: BUG#98132 Zero dict_table_t::stat_n_rows during Online ALTER may lead to table scan thus bad performance. During Online ALTER (WL#5534), table scan might be used to query a big table thus bad performance. Because the optimizer might get the false illusion of the table is empty, table scan may beat any other access method in test_quick_select(). The direct cause is that info() reads zero dict_table_t::stat_n_rows for ha_statistics::records. Note that in dict_stats_update_persistent(), table->stat_n_rows is assigned zero with empty primary index stats leftover by dict_stats_analyze_index(). However, the problematic index statistics themselves do not play a significant role in this case. So, rewrite the test. --- .../zero-stat_n_rows-during-online-ddl.result | 439 ++++++++++++++++++ .../t/zero-stat_n_rows-during-online-ddl.test | 92 ++++ 2 files changed, 531 insertions(+) create mode 100644 mysql-test/r/zero-stat_n_rows-during-online-ddl.result create mode 100644 mysql-test/t/zero-stat_n_rows-during-online-ddl.test diff --git a/mysql-test/r/zero-stat_n_rows-during-online-ddl.result b/mysql-test/r/zero-stat_n_rows-during-online-ddl.result new file mode 100644 index 00000000000..f98706f288d --- /dev/null +++ b/mysql-test/r/zero-stat_n_rows-during-online-ddl.result @@ -0,0 +1,439 @@ +SET optimizer_trace_max_mem_size=1048576; +SET optimizer_trace="enabled=on,one_line=off"; +create table t1 (a int primary key auto_increment, x int, b int, c char(64), key(b)); +insert t1 values (1,1,1,'x'),(2,2,2,'x'); +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select n_rows,clustered_index_size from mysql.innodb_table_stats where table_name = 't1'; +n_rows clustered_index_size +144 1 +select index_name,stat_value,sample_size from mysql.innodb_index_stats where table_name = 't1' and stat_name = 'n_diff_pfx01'; +index_name stat_value sample_size +PRIMARY 144 1 +b 2 1 +# With correct stat_n_rows, it is range access. +explain select * from t1 where b >= 2; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range b b 5 NULL 16 100.00 Using index condition +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,`test`.`t1`.`x` AS `x`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where (`test`.`t1`.`b` >= 2) +SELECT TRACE FROM information_schema.OPTIMIZER_TRACE; +TRACE +{ + "steps": [ + { + "join_preparation": { + "select#": 1, + "steps": [ + { + "expanded_query": "/* select#1 */ select `t1`.`a` AS `a`,`t1`.`x` AS `x`,`t1`.`b` AS `b`,`t1`.`c` AS `c` from `t1` where (`t1`.`b` >= 2)" + } + ] + } + }, + { + "join_optimization": { + "select#": 1, + "steps": [ + { + "condition_processing": { + "condition": "WHERE", + "original_condition": "(`t1`.`b` >= 2)", + "steps": [ + { + "transformation": "equality_propagation", + "resulting_condition": "(`t1`.`b` >= 2)" + }, + { + "transformation": "constant_propagation", + "resulting_condition": "(`t1`.`b` >= 2)" + }, + { + "transformation": "trivial_condition_removal", + "resulting_condition": "(`t1`.`b` >= 2)" + } + ] + } + }, + { + "substitute_generated_columns": { + } + }, + { + "table_dependencies": [ + { + "table": "`t1`", + "row_may_be_null": false, + "map_bit": 0, + "depends_on_map_bits": [ + ] + } + ] + }, + { + "ref_optimizer_key_uses": [ + ] + }, + { + "rows_estimation": [ + { + "table": "`t1`", + "range_analysis": { + "table_scan": { + "rows": 144, + "cost": 31.9 + }, + "potential_range_indexes": [ + { + "index": "PRIMARY", + "usable": false, + "cause": "not_applicable" + }, + { + "index": "b", + "usable": true, + "key_parts": [ + "b", + "a" + ] + } + ], + "setup_range_conditions": [ + ], + "group_index_range": { + "chosen": false, + "cause": "not_group_by_or_distinct" + }, + "analyzing_range_alternatives": { + "range_scan_alternatives": [ + { + "index": "b", + "ranges": [ + "2 <= b" + ], + "index_dives_for_eq_ranges": true, + "rowid_ordered": false, + "using_mrr": false, + "index_only": false, + "rows": 16, + "cost": 20.21, + "chosen": true + } + ], + "analyzing_roworder_intersect": { + "usable": false, + "cause": "too_few_roworder_scans" + } + }, + "chosen_range_access_summary": { + "range_access_plan": { + "type": "range_scan", + "index": "b", + "rows": 16, + "ranges": [ + "2 <= b" + ] + }, + "rows_for_plan": 16, + "cost_for_plan": 20.21, + "chosen": true + } + } + } + ] + }, + { + "considered_execution_plans": [ + { + "plan_prefix": [ + ], + "table": "`t1`", + "best_access_path": { + "considered_access_paths": [ + { + "rows_to_scan": 16, + "access_type": "range", + "range_details": { + "used_index": "b" + }, + "resulting_rows": 16, + "cost": 23.41, + "chosen": true + } + ] + }, + "condition_filtering_pct": 100, + "rows_for_plan": 16, + "cost_for_plan": 23.41, + "chosen": true + } + ] + }, + { + "attaching_conditions_to_tables": { + "original_condition": "(`t1`.`b` >= 2)", + "attached_conditions_computation": [ + ], + "attached_conditions_summary": [ + { + "table": "`t1`", + "attached": "(`t1`.`b` >= 2)" + } + ] + } + }, + { + "refine_plan": [ + { + "table": "`t1`", + "pushed_index_condition": "(`t1`.`b` >= 2)", + "table_condition_attached": null + } + ] + } + ] + } + }, + { + "join_explain": { + "select#": 1, + "steps": [ + ] + } + } + ] +} +SET DEBUG_SYNC = "innodb_inplace_alter_table_enter signal ddl_in_progress wait_for finish_ddl execute 1"; +alter table t1 drop column x;; +SET DEBUG_SYNC = "now wait_for ddl_in_progress"; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select n_rows,clustered_index_size from mysql.innodb_table_stats where table_name = 't1'; +n_rows clustered_index_size +144 1 +select index_name,stat_value,sample_size from mysql.innodb_index_stats where table_name = 't1' and stat_name = 'n_diff_pfx01'; +index_name stat_value sample_size +PRIMARY 144 1 +b 2 1 +# With zero stat_n_rows, it is table scan even if the real table might be very big. +# With correct stat_n_rows, it is range access. +explain select * from t1 where b >= 2; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range b b 5 NULL 16 100.00 Using index condition +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,`test`.`t1`.`x` AS `x`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where (`test`.`t1`.`b` >= 2) +SELECT TRACE FROM information_schema.OPTIMIZER_TRACE; +TRACE +{ + "steps": [ + { + "join_preparation": { + "select#": 1, + "steps": [ + { + "expanded_query": "/* select#1 */ select `t1`.`a` AS `a`,`t1`.`x` AS `x`,`t1`.`b` AS `b`,`t1`.`c` AS `c` from `t1` where (`t1`.`b` >= 2)" + } + ] + } + }, + { + "join_optimization": { + "select#": 1, + "steps": [ + { + "condition_processing": { + "condition": "WHERE", + "original_condition": "(`t1`.`b` >= 2)", + "steps": [ + { + "transformation": "equality_propagation", + "resulting_condition": "(`t1`.`b` >= 2)" + }, + { + "transformation": "constant_propagation", + "resulting_condition": "(`t1`.`b` >= 2)" + }, + { + "transformation": "trivial_condition_removal", + "resulting_condition": "(`t1`.`b` >= 2)" + } + ] + } + }, + { + "substitute_generated_columns": { + } + }, + { + "table_dependencies": [ + { + "table": "`t1`", + "row_may_be_null": false, + "map_bit": 0, + "depends_on_map_bits": [ + ] + } + ] + }, + { + "ref_optimizer_key_uses": [ + ] + }, + { + "rows_estimation": [ + { + "table": "`t1`", + "range_analysis": { + "table_scan": { + "rows": 144, + "cost": 31.9 + }, + "potential_range_indexes": [ + { + "index": "PRIMARY", + "usable": false, + "cause": "not_applicable" + }, + { + "index": "b", + "usable": true, + "key_parts": [ + "b", + "a" + ] + } + ], + "setup_range_conditions": [ + ], + "group_index_range": { + "chosen": false, + "cause": "not_group_by_or_distinct" + }, + "analyzing_range_alternatives": { + "range_scan_alternatives": [ + { + "index": "b", + "ranges": [ + "2 <= b" + ], + "index_dives_for_eq_ranges": true, + "rowid_ordered": false, + "using_mrr": false, + "index_only": false, + "rows": 16, + "cost": 20.21, + "chosen": true + } + ], + "analyzing_roworder_intersect": { + "usable": false, + "cause": "too_few_roworder_scans" + } + }, + "chosen_range_access_summary": { + "range_access_plan": { + "type": "range_scan", + "index": "b", + "rows": 16, + "ranges": [ + "2 <= b" + ] + }, + "rows_for_plan": 16, + "cost_for_plan": 20.21, + "chosen": true + } + } + } + ] + }, + { + "considered_execution_plans": [ + { + "plan_prefix": [ + ], + "table": "`t1`", + "best_access_path": { + "considered_access_paths": [ + { + "rows_to_scan": 16, + "access_type": "range", + "range_details": { + "used_index": "b" + }, + "resulting_rows": 16, + "cost": 23.41, + "chosen": true + } + ] + }, + "condition_filtering_pct": 100, + "rows_for_plan": 16, + "cost_for_plan": 23.41, + "chosen": true + } + ] + }, + { + "attaching_conditions_to_tables": { + "original_condition": "(`t1`.`b` >= 2)", + "attached_conditions_computation": [ + ], + "attached_conditions_summary": [ + { + "table": "`t1`", + "attached": "(`t1`.`b` >= 2)" + } + ] + } + }, + { + "refine_plan": [ + { + "table": "`t1`", + "pushed_index_condition": "(`t1`.`b` >= 2)", + "table_condition_attached": null + } + ] + } + ] + } + }, + { + "join_explain": { + "select#": 1, + "steps": [ + ] + } + } + ] +} +SET DEBUG_SYNC = "now signal finish_ddl"; +select n_rows,clustered_index_size from mysql.innodb_table_stats where table_name = 't1'; +n_rows clustered_index_size +144 1 +select index_name,stat_value,sample_size from mysql.innodb_index_stats where table_name = 't1' and stat_name = 'n_diff_pfx01'; +index_name stat_value sample_size +PRIMARY 144 1 +b 2 1 +# With stat_n_rows corrected by the completion of the online alter, it is range access again. +explain select * from t1 where b >= 2; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range b b 5 NULL 16 100.00 Using index condition +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where (`test`.`t1`.`b` >= 2) +drop table t1; +SET DEBUG_SYNC = 'RESET'; diff --git a/mysql-test/t/zero-stat_n_rows-during-online-ddl.test b/mysql-test/t/zero-stat_n_rows-during-online-ddl.test new file mode 100644 index 00000000000..6848405712a --- /dev/null +++ b/mysql-test/t/zero-stat_n_rows-during-online-ddl.test @@ -0,0 +1,92 @@ +# BUG#98132 Zero dict_table_t::stat_n_rows during Online ALTER +# may lead to table scan thus bad performance. +# +# Note: +# +# This test requires non-blocking ANALYZE. Otherwise, the ANALYZE command +# used to simulate async invocation of dict_stats_update() will not finish +# until TABLE_SHARE is released by the DDL after debug sync timeout. +# +# Use the non-blocking fix in MySQL 8.0.24: +# +# BUG#32224917: ANALYZE TABLE TAKES TABLE LOCK DURING INDEX +# STATS UPDATE, CAUSES QUERY PILEUP +# +# Or the fix from Percona: +# +# https://www.percona.com/blog/2018/03/27/analyze-table-is-no-longer-a-blocking-operation/ +# +# Otherwise, temporarily disable the following invocation in mysql_admin_table(), with +# +# SET DEBUG = '+d,skip_flush_for_analyze'; +# +# bool skip_flush = false; +# DBUG_EXECUTE_IF("skip_flush_for_analyze", { skip_flush = true; }); +# +# ... +# +# } else if ((!skip_flush && open_for_modify) || fatal_error) { +# tdc_remove_table(thd, TDC_RT_REMOVE_UNUSED, table->db, +# table->table_name, false); +# } else { +# + +--source include/have_debug.inc +--source include/have_debug_sync.inc + +connect (other,localhost,root,,); + +connection default; +SET optimizer_trace_max_mem_size=1048576; # 1MB +SET optimizer_trace="enabled=on,one_line=off"; +create table t1 (a int primary key auto_increment, x int, b int, c char(64), key(b)); +insert t1 values (1,1,1,'x'),(2,2,2,'x'); +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 1; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +insert t1(x,b,c) select x,b,c from t1 where b = 2; +analyze table t1; +select n_rows,clustered_index_size from mysql.innodb_table_stats where table_name = 't1'; +select index_name,stat_value,sample_size from mysql.innodb_index_stats where table_name = 't1' and stat_name = 'n_diff_pfx01'; +--echo # With correct stat_n_rows, it is range access. +explain select * from t1 where b >= 2; +SELECT TRACE FROM information_schema.OPTIMIZER_TRACE; + +connection other; +SET DEBUG_SYNC = "innodb_inplace_alter_table_enter signal ddl_in_progress wait_for finish_ddl execute 1"; +--send alter table t1 drop column x; + +connection default; +SET DEBUG_SYNC = "now wait_for ddl_in_progress"; +# Use non-blocking ANALYZE to simulate async invocation of dict_stats_update() in the background stats thread. +# Otherwise, the next query waits the completion of the online alter which corrects the stats. +# SET DEBUG = '+d,skip_flush_for_analyze'; +analyze table t1; +# SET DEBUG = '-d,skip_flush_for_analyze'; +select n_rows,clustered_index_size from mysql.innodb_table_stats where table_name = 't1'; +select index_name,stat_value,sample_size from mysql.innodb_index_stats where table_name = 't1' and stat_name = 'n_diff_pfx01'; +--echo # With zero stat_n_rows, it is table scan even if the real table might be very big. +--echo # With correct stat_n_rows, it is range access. +explain select * from t1 where b >= 2; +SELECT TRACE FROM information_schema.OPTIMIZER_TRACE; +SET DEBUG_SYNC = "now signal finish_ddl"; + +connection other; +--reap + +connection default; +select n_rows,clustered_index_size from mysql.innodb_table_stats where table_name = 't1'; +select index_name,stat_value,sample_size from mysql.innodb_index_stats where table_name = 't1' and stat_name = 'n_diff_pfx01'; +--echo # With stat_n_rows corrected by the completion of the online alter, it is range access again. +explain select * from t1 where b >= 2; + +drop table t1; +disconnect other; +SET DEBUG_SYNC = 'RESET'; -- 2.32.0