Bug #115242 Unexpected dd_table_open_on_id_low() failure on concurrent RENAME TABLE
Submitted: 6 Jun 8:43 Modified: 20 Jun 12:56
Reporter: Kaiwang CHen (OCA) Email Updates:
Status: Verified Impact on me:
Category:MySQL Server: InnoDB storage engine Severity:S3 (Non-critical)
Version:8.0.37, 8.4.0 OS:Any
Assigned to: CPU Architecture:Any
Tags: Contribution

[6 Jun 8:43] Kaiwang CHen
    When a RENAME TABLE session and the master thread are executed
    concurrently, the tables involved might be evicted from the dict
    Openning an evicted table by dd_table_open_on_id() is delegated to
    dd_table_open_on_id_low() which gets table name by se private id
    before acquiring metadata locks.
    There was a chance that the openning table session got an old name
    and tried to acquire the DD object with the old name, when the
    RENAME table session commited changes between the actions. The case
    was doomed and would cause an unexpected open failure.
    Although the problem was general, it was observed as serivce
    outages in production workload after gh-ost, which performed
    switchover by RENAME TABLE, on instances with lots of user tables
    of proper sizes. Because stale stats due to lost recalc event by
    by open failure resulted in bad execution plans.

How to repeat:
With a patched unfixed version of 8.0.37, the following test caused a debug assert which verified the open failure.

The patch to fix this problem is provided in the next comment. Revert the fix as the following to verify the problem.

diff --git a/storage/innobase/dict/dict0dd.cc b/storage/innobase/dict/dict0dd.cc
index c1bed08ca63..f6f662c8863 100644
--- a/storage/innobase/dict/dict0dd.cc
+++ b/storage/innobase/dict/dict0dd.cc
@@ -567,7 +567,7 @@ static dict_table_t *dd_table_open_on_id_low(THD *thd, MDL_ticket **mdl,
           dd_mdl_release(thd, mdl);
         // The table could have been renamed. Retry.
-        continue;
+        return nullptr;
       const bool is_part = dd_table_is_partitioned(*dd_table);

Here is the test script:

SET GLOBAL DEBUG="+d,debug_stats,dict_cache";

#-- master thread
SET GLOBAL DEBUG="+d,master_debug_evict";
#-- SET GLOBAL DEBUG="+d,dict_cache_force_out_all_can_be_evicted";
#-- SET DEBUG_SYNC="before_evict_from_table_cache WAIT_FOR dict_object_closed";
#-- SET DEBUG_SYNC="after_evict_from_table_cache SIGNAL dict_object_evicted";

#-- stats bg thread
SET GLOBAL DEBUG="+d,stats_bg_debug_open";
#-- SET DEBUG_SYNC="stats_bg_before_table_open WAIT_FOR dict_object_evicted";
#-- SET DEBUG_SYNC="after_table_open_low_se_private_id SIGNAL se_priv_id_resolved";
#-- SET DEBUG_SYNC="before_table_open_low_mdl WAIT_FOR rename_commited";

#-- rename session
create table tx (c1 int, c2 varchar(10), primary key (c1));
insert tx values (1,'a'), (2,'b');
SET DEBUG_SYNC="now WAIT_FOR recalc_pool_get";
SET DEBUG_SYNC="after_table_close_for_rename_tables SIGNAL dict_object_closed";
SET DEBUG_SYNC="before_commit_for_rename_tables WAIT_FOR se_priv_id_resolved";
SET DEBUG_SYNC="after_commit_for_rename_tables SIGNAL rename_commited";
rename table tx to ty;
SET DEBUG_SYNC="now WAIT_FOR stats_updated";
drop table ty;


Here's the assert stack to verify the problem:

bld-debug-8.0.37/runtime_output_directory/mysqld(my_print_stacktrace(unsigned char const*, unsigned long)+0x43) [0x4b14e4b]
bld-debug-8.0.37/runtime_output_directory/mysqld(print_fatal_signal(int)+0x390) [0x372179a]
bld-debug-8.0.37/runtime_output_directory/mysqld(handle_fatal_signal+0x69) [0x3721970]
/lib64/libpthread.so.0(+0x12cf0) [0x7f64271a0cf0]
/lib64/libc.so.6(gsignal+0x10f) [0x7f64254b9acf]
/lib64/libc.so.6(abort+0x127) [0x7f642548cea5]
/lib64/libc.so.6(+0x21d79) [0x7f642548cd79]
/lib64/libc.so.6(+0x47426) [0x7f64254b2426]
bld-debug-8.0.37/runtime_output_directory/mysqld() [0x50ccf32]
bld-debug-8.0.37/runtime_output_directory/mysqld(dict_stats_thread()+0x121) [0x50cd2f6]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::__invoke_impl<void, void (*&)()>(std::__invoke_other, void (*&)())+0x1d) [0x4cf02f7]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::__invoke_result<void (*&)()>::type std::__invoke<void (*&)()>(void (*&)())+0x20) [0x4cf0271]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::_Bind<void (*())()>::__call<void>(std::tuple<>&&, std::_Index_tuple<>)+0x1c) [0x4cf0106]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::_Bind<void (*())()>::operator()<, void>()+0x24) [0x4cefda4]
bld-debug-8.0.37/runtime_output_directory/mysqld(void Detached_thread::operator()<void (*)()>(void (*&&)())+0xb5) [0x4cef97f]
bld-debug-8.0.37/runtime_output_directory/mysqld(void std::__invoke_impl<void, Detached_thread, void (*)()>(std::__invoke_other, Detached_thread&&, void (*&&)())+0x37) [0x4cef53b]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::__invoke_result<Detached_thread, void (*)()>::type std::__invoke<Detached_thread, void (*)()>(Detached_thread&&, void (*&&)())+0x37) [0x4ceebef]
bld-debug-8.0.37/runtime_output_directory/mysqld(decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker<std::tuple<Detached_thread, void (*)()> >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>)+0x43) [0x4cf046b]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::thread::_Invoker<std::tuple<Detached_thread, void (*)()> >::operator()()+0x18) [0x4cf0426]
bld-debug-8.0.37/runtime_output_directory/mysqld(std::thread::_State_impl<std::thread::_Invoker<std::tuple<Detached_thread, void (*)()> > >::_M_run()+0x1c) [0x4cf040a]
/lib64/libstdc++.so.6(+0xc2b13) [0x7f6425e8cb13]
/lib64/libpthread.so.0(+0x81ca) [0x7f64271961ca]
/lib64/libc.so.6(clone+0x43) [0x7f64254a4e73]

Suggested fix:
Add retry for RENAME in dd_table_open_on_id_low().
[6 Jun 8:48] Kaiwang CHen
Fix it by making the retrying loop in dd_table_open_on_id_low() recognize the cquiring failure by old name.

(*) I confirm the code being submitted is offered under the terms of the OCA, and that I am authorized to contribute it.

Contribution: bug115242.patch (application/octet-stream, text), 10.78 KiB.

[6 Jun 9:18] MySQL Verification Team
Hello Kaiwang,

Thank you for the report and contribution.

[20 Jun 12:56] Kaiwang CHen
Add version 8.4.0