Description:
When a RENAME TABLE session and the master thread are executed
concurrently, the tables involved might be evicted from the dict
cache.
Openning an evicted table by dd_table_open_on_id() is delegated to
dd_table_open_on_id_low() which gets table name by se private id
before acquiring metadata locks.
There was a chance that the openning table session got an old name
and tried to acquire the DD object with the old name, when the
RENAME table session commited changes between the actions. The case
was doomed and would cause an unexpected open failure.
Although the problem was general, it was observed as serivce
outages in production workload after gh-ost, which performed
switchover by RENAME TABLE, on instances with lots of user tables
of proper sizes. Because stale stats due to lost recalc event by
by open failure resulted in bad execution plans.
How to repeat:
With a patched unfixed version of 8.0.37, the following test caused a debug assert which verified the open failure.
The patch to fix this problem is provided in the next comment. Revert the fix as the following to verify the problem.
diff --git a/storage/innobase/dict/dict0dd.cc b/storage/innobase/dict/dict0dd.cc
index c1bed08ca63..f6f662c8863 100644
--- a/storage/innobase/dict/dict0dd.cc
+++ b/storage/innobase/dict/dict0dd.cc
@@ -567,7 +567,7 @@ static dict_table_t *dd_table_open_on_id_low(THD *thd, MDL_ticket **mdl,
dd_mdl_release(thd, mdl);
}
// The table could have been renamed. Retry.
- continue;
+ return nullptr;
}
const bool is_part = dd_table_is_partitioned(*dd_table);
Here is the test script:
SET GLOBAL DEBUG="+d,debug_stats,dict_cache";
#-- master thread
SET GLOBAL DEBUG="+d,master_debug_evict";
#-- SET GLOBAL DEBUG="+d,dict_cache_force_out_all_can_be_evicted";
#-- SET DEBUG_SYNC="before_evict_from_table_cache WAIT_FOR dict_object_closed";
#-- SET DEBUG_SYNC="after_evict_from_table_cache SIGNAL dict_object_evicted";
#-- stats bg thread
SET GLOBAL DEBUG="+d,stats_bg_debug_open";
#-- SET DEBUG_SYNC="stats_bg_before_table_open WAIT_FOR dict_object_evicted";
#-- SET DEBUG_SYNC="after_table_open_low_se_private_id SIGNAL se_priv_id_resolved";
#-- SET DEBUG_SYNC="before_table_open_low_mdl WAIT_FOR rename_commited";
#-- rename session
create table tx (c1 int, c2 varchar(10), primary key (c1));
insert tx values (1,'a'), (2,'b');
SET DEBUG_SYNC="now WAIT_FOR recalc_pool_get";
SET DEBUG_SYNC="after_table_close_for_rename_tables SIGNAL dict_object_closed";
SET DEBUG_SYNC="before_commit_for_rename_tables WAIT_FOR se_priv_id_resolved";
SET DEBUG_SYNC="after_commit_for_rename_tables SIGNAL rename_commited";
rename table tx to ty;
SET DEBUG_SYNC="now WAIT_FOR stats_updated";
drop table ty;
SET GLOBAL DEBUG=RESET;
Here's the assert stack to verify the problem:
bld-debug-8.0.37/runtime_output_directory/mysqld(my_print_stacktrace(unsigned char const*, unsigned long)+0x43) [0x4b14e4b]
bld-debug-8.0.37/runtime_output_directory/mysqld(print_fatal_signal(int)+0x390) [0x372179a]
bld-debug-8.0.37/runtime_output_directory/mysqld(handle_fatal_signal+0x69) [0x3721970]
/lib64/libpthread.so.0(+0x12cf0) [0x7f64271a0cf0]
/lib64/libc.so.6(gsignal+0x10f) [0x7f64254b9acf]
/lib64/libc.so.6(abort+0x127) [0x7f642548cea5]
/lib64/libc.so.6(+0x21d79) [0x7f642548cd79]
/lib64/libc.so.6(+0x47426) [0x7f64254b2426]
bld-debug-8.0.37/runtime_output_directory/mysqld() [0x50ccf32]
bld-debug-8.0.37/runtime_output_directory/mysqld(dict_stats_thread()+0x121) [0x50cd2f6]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::__invoke_impl<void, void (*&)()>(std::__invoke_other, void (*&)())+0x1d) [0x4cf02f7]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::__invoke_result<void (*&)()>::type std::__invoke<void (*&)()>(void (*&)())+0x20) [0x4cf0271]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::_Bind<void (*())()>::__call<void>(std::tuple<>&&, std::_Index_tuple<>)+0x1c) [0x4cf0106]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::_Bind<void (*())()>::operator()<, void>()+0x24) [0x4cefda4]
bld-debug-8.0.37/runtime_output_directory/mysqld(void Detached_thread::operator()<void (*)()>(void (*&&)())+0xb5) [0x4cef97f]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::__invoke_impl<void, Detached_thread, void (*)()>(std::__invoke_other, Detached_thread&&, void (*&&)())+0x37) [0x4cef53b]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::__invoke_result<Detached_thread, void (*)()>::type std::__invoke<Detached_thread, void (*)()>(Detached_thread&&, void (*&&)())+0x37) [0x4ceebef]
bld-debug-8.0.37/runtime_output_directory/mysqld(decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker<std::tuple<Detached_thread, void (*)()> >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>)+0x43) [0x4cf046b]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::thread::_Invoker<std::tuple<Detached_thread, void (*)()> >::operator()()+0x18) [0x4cf0426]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::thread::_State_impl<std::thread::_Invoker<std::tuple<Detached_thread, void (*)()> > >::_M_run()+0x1c) [0x4cf040a]
/lib64/libstdc++.so.6(+0xc2b13) [0x7f6425e8cb13]
/lib64/libpthread.so.0(+0x81ca) [0x7f64271961ca]
/lib64/libc.so.6(clone+0x43) [0x7f64254a4e73]
Suggested fix:
Add retry for RENAME in dd_table_open_on_id_low().