Description:
Both foreground ANALYZE commands and the background statistics thread invoke dict_stats_analyze_index() to gather index statistics.
These InnoDB persistent statistics are kept in shared dict objects and read without latching by the cost-based optimizer to calculate costs for access paths including table scan and index-based ones.
There are two kinds of problems due to empty statistics:
1) When dict_stats_analyze_index() is in progress, empty index statistics may be read concurrently by the server. For a big enough table, REF access path could be disabled, even if it could be much more efficient than table scan.
2) When dict_stats_analyze_index() does an error return, empty index statistics lasts until the next call on the same index. There are two kinds of consequences. One is always table scan due to implied zero table rows. The other is disabled REF access path as above.
In these cases, optimal index-based access paths could be always disabled until statistics are refreshed, and table scan is used even for very big tables which hurts the server badly.
Note that "BUG#98132 Analyze table leads to empty statistics during online rebuild DDL" could be classified into the second kind. It is a serious production issue as Online DDL becomes widely used. A rare case with tree height change also falls into the second kind.
How to repeat:
Note: For tests about the abnormal return, please refer to the contributed patch.
Because rec_per_key is cached in TABLE_SHARE (table definition cache), the problem occurs only in certain cases where open_table_from_share() are called while analyze is in progress. However, the situation could be simulated by some DEBUG changes: 1) make TABLE_LIST::fetch_number_of_rows() always check the InnoDB dict cache, 2) make the analyze task pause right after emptying the shared state.
diff --git a/sql/table.cc b/sql/table.cc
index d6c2ddd3d..8f28dd387 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -6624,8 +6624,12 @@ int TABLE_LIST::fetch_number_of_rows() {
std::max(select_lex->master_unit()->query_result()->estimated_rowcount,
// Recursive reference is never a const table
(ha_rows)PLACEHOLDER_TABLE_ROW_ESTIMATE);
- } else
- error = table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+ } else {
+ uint flag = HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK;
+ DBUG_EXECUTE_IF("fetch_number_of_rows_info_const",
+ { flag |= HA_STATUS_CONST; });
+ error = table->file->info(flag);
+ }
return error;
}
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index bcb43ce0b..8135c7781 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1644,6 +1644,12 @@ static void dict_stats_analyze_index(
dict_stats_empty_index(index);
+#ifdef UNIV_DEBUG
+ if (!(index->type & DICT_CLUSTERED)) {
+ DEBUG_SYNC_C("dict_stats_analyze_index_empty_sk");
+ }
+#endif /* UNIV_DEBUG */
+
mtr_start(&mtr);
mtr_s_lock(dict_index_get_lock(index), &mtr);
Then it could be repeated by the following test:
# BUG# Zero dict_index_t::stat_n_diff_key_vals during ANALYZE
# may disable ref access thus bad performance.
--source include/have_debug.inc
--source include/have_debug_sync.inc
connect (other,localhost,root,,);
create table t1 (a int primary key auto_increment, b int);
create table t2 (a int primary key auto_increment, b int, c int, key(b));
--echo # Prepare data ...
--disable_query_log
insert t1 values (1,1), (2,2);
let $i=16;
while ($i)
{
--eval insert t2 values ($i,$i,$i)
dec $i;
}
--enable_query_log
analyze table t1;
analyze table t2;
--echo # Simulate HA_STATUS_CONST in fetch_number_of_rows(), because
--echo # ha_innobase::open() is only invoked on table cache miss.
SET SESSION DEBUG='+d,fetch_number_of_rows_info_const';
--echo # With correct n_diff, ref access is used for t2.
# Favor index statistics over index dive.
SET eq_range_index_dive_limit = 1;
EXPLAIN select * from t2 where b = 1;
SET eq_range_index_dive_limit = DEFAULT;
EXPLAIN select * from t1 join t2 using (b);
connection other;
--echo # Let ANALYZE pause right after dict_stats_empty_index() on the first secondary index.
SET DEBUG_SYNC='dict_stats_analyze_index_empty_sk wait_for continue_analyze execute 1';
--send analyze table t2;
connection default;
--echo # With zero n_diff, table scan is used for t2.
--echo # With correct n_diff, ref access is used for t2.
# Favor index statistics over index dive.
SET eq_range_index_dive_limit = 1;
EXPLAIN select * from t2 where b = 1;
SET eq_range_index_dive_limit = DEFAULT;
EXPLAIN select * from t1 join t2 using (b);
SET DEBUG_SYNC = "now signal continue_analyze";
connection other;
--reap
connection default;
disconnect other;
SET SESSION DEBUG='-d,fetch_number_of_rows_info_const';
drop table t1, t2;
SET DEBUG_SYNC = 'RESET';
Suggested fix:
Analyze with a private copy and overwrite at the end, rather than in-place empty-analyze-fill in dict_stats_analyze_index(). It does not fix concurrent read, but prevents empty statistics.
Overwrite only on a successful analyze; otherwise, the stats are leaved in the old state which is guaranteed to be initialized by a preceding dict_stats_init(). Retrying mechanism is introduced for the rare case, or ensured by the outer workflow in other situations.