From fe56fbd4e4e474697ea346cadcb868af02b0b6d2 Mon Sep 17 00:00:00 2001 From: Nicholas Othieno Date: Thu, 19 Oct 2023 14:48:34 -0400 Subject: [PATCH] Fix startup hang when memcached enabled and DB has many tables As reported in https://bugs.mysql.com/bug.php?id=103275 , when memcached is enabled and a MySQL DB has many tables, the database hangs on startup due to a deadlock between the memcached plugin and the mysqld main thread. This deadlock is because memcached is waiting for a lock that is held by the main thread whereas the main thread is waiting on a condition variable whose notifier is unknown. The cause of this deadlock is a race condition created at startup when the mysqld main thread is resetting the shared dictionary cache while at the same time memcached is populating this same cache as it accesses underlying objects during its own initialization. To solve the problem, we ensure ensure strict ordering between shared dictionary cache resetting and memcached initialization. We use a single flag protected in a critical region to communicate completion of shared dictionary cache resetting between the mysqld main thread and the memcached thread. The memcached polls this flag with a back-off period of 1 second in case it is not set. It is expected that the back-off period never to be hit more than once as the resetting of the shared dictionary cache is a quick process, and so if the memcached thread polls the flag once and finds it unset, it is likely to find it set on the next try. Rationale for implementation ============================ The mutex lock has got 2 reasons for its usage in the implementation: - To ensure that access to flag are atomic: We do not want a scenario in which the writer thread partially updates the flag and then gets evicted from execution by the OS, and then the reader thread reads the flag while it is in an inconsitent state. This scenario in modern C++ is normally handled by std::atomic_bool (std::atomic), but is not available in C for use in memcached. - We want to ensure that there is no compiler or CPU reordering of instructions on either side of the pthread_mutex. Normally with C++ one would use std::atomic_thread_fence, but it is not usable for this case because memcached is a C library rather than a C++ one. When we attempted to use it, we got linker errors, so we decided to create a "poor man's" fence. pthread_mutex has fencing capabilities, see https://stackoverflow.com/a/24138417 , and so using it allows us to make sure that the dd cache clearing instructions fully run and set the flag, before the memcached initialization happens hence ensuring strict ordering. Reason for implementing a polling with back-off period in the memcached plugin thread: We want to avoid any additional synchronization mechanisms as our main goal is ordering and not multithreaded synchronization, so we opt not to use pthread_cond_timedwait but instead do the following: - If we successfully lock the mutex, we check the flag, and if it is set, we just move on to initialization of the memcached plugin. - If we successfully lock the mutex, but find that the flag is not set, then we want to signal to the OS that we want the memcached thread to be suspended so that other threads can take its execution slot. Even though whichever thread that is chosen to take the memcached thread's slot is random, we are at least giving a chance for the main thread to to proceed and hopefully complete dd shared dictionary cache resetting. - If we do not get the lock, we want to signal to the OS that the memached thread's execution slot can be ceded to another thread while we wait for 1 second. During this time we are expecting the resetting of the dd shared dictionary cache will have completed. I.e. we expect this back-off to only happen once. This contribution is under the OCA signed by Amazon and covering submissions to the MySQL project. --- include/my_thread.h | 2 + mysys/my_init.cc | 5 +- mysys/my_thr_init.cc | 56 +++++++++++++++++++ mysys/mysys_priv.h | 3 +- .../daemon_memcached/daemon/memcached.c | 5 ++ sql/mysqld.cc | 1 + 6 files changed, 70 insertions(+), 2 deletions(-) diff --git a/include/my_thread.h b/include/my_thread.h index 9591259f5af3..cf5551c157d8 100644 --- a/include/my_thread.h +++ b/include/my_thread.h @@ -177,5 +177,7 @@ extern void my_thread_global_end(); // Need to be extern "C" for the time being, due to memcached. extern "C" bool my_thread_init(); extern "C" void my_thread_end(); +extern void my_dd_reset_tables_and_tablespaces_set_done(); +extern "C" bool my_is_dd_reset_tables_and_tablespaces_done(); #endif /* MY_THREAD_INCLUDED */ diff --git a/mysys/my_init.cc b/mysys/my_init.cc index 223627fa624c..b1bc061987dd 100644 --- a/mysys/my_init.cc +++ b/mysys/my_init.cc @@ -457,7 +457,8 @@ PSI_mutex_key key_IO_CACHE_append_buffer_lock, key_IO_CACHE_SHARE_mutex, key_KEY_CACHE_cache_lock, key_THR_LOCK_charset, key_THR_LOCK_heap, key_THR_LOCK_lock, key_THR_LOCK_malloc, key_THR_LOCK_mutex, key_THR_LOCK_myisam, key_THR_LOCK_net, key_THR_LOCK_open, - key_THR_LOCK_threads, key_TMPDIR_mutex, key_THR_LOCK_myisam_mmap; + key_THR_LOCK_threads, key_TMPDIR_mutex, key_THR_LOCK_myisam_mmap, + key_THR_LOCK_dd_cache; #ifdef HAVE_PSI_MUTEX_INTERFACE @@ -484,6 +485,8 @@ static PSI_mutex_info all_mysys_mutexes[] = { PSI_DOCUMENT_ME}, {&key_TMPDIR_mutex, "TMPDIR_mutex", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, {&key_THR_LOCK_myisam_mmap, "THR_LOCK_myisam_mmap", PSI_FLAG_SINGLETON, 0, + PSI_DOCUMENT_ME}, + {&key_THR_LOCK_dd_cache, "THR_LOCK_dd_cache", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}}; #endif /* HAVE_PSI_MUTEX_INTERFACE */ diff --git a/mysys/my_thr_init.cc b/mysys/my_thr_init.cc index 8cf6f6f65e63..9e7f745529a2 100644 --- a/mysys/my_thr_init.cc +++ b/mysys/my_thr_init.cc @@ -56,6 +56,7 @@ #include "thr_mutex.h" static bool my_thread_global_init_done = false; +static bool is_dd_reset_tables_and_tablespaces_done = false; #ifndef NDEBUG static uint THR_thread_count = 0; static Timeout_type my_thread_end_wait_time = 5; @@ -68,6 +69,7 @@ static thread_local int THR_myerrno = 0; static thread_local int THR_winerrno = 0; #endif +mysql_mutex_t THR_LOCK_dd_cache; mysql_mutex_t THR_LOCK_myisam_mmap; mysql_mutex_t THR_LOCK_myisam; mysql_mutex_t THR_LOCK_heap; @@ -192,6 +194,8 @@ bool my_thread_global_init() { mysql_mutex_init(key_THR_LOCK_myisam, &THR_LOCK_myisam, MY_MUTEX_INIT_SLOW); mysql_mutex_init(key_THR_LOCK_myisam_mmap, &THR_LOCK_myisam_mmap, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_THR_LOCK_dd_cache, &THR_LOCK_dd_cache, + MY_MUTEX_INIT_FAST); mysql_mutex_init(key_THR_LOCK_heap, &THR_LOCK_heap, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_THR_LOCK_net, &THR_LOCK_net, MY_MUTEX_INIT_FAST); #ifndef NDEBUG @@ -242,6 +246,7 @@ void my_thread_global_end() { mysql_mutex_destroy(&THR_LOCK_lock); mysql_mutex_destroy(&THR_LOCK_myisam); mysql_mutex_destroy(&THR_LOCK_myisam_mmap); + mysql_mutex_destroy(&THR_LOCK_dd_cache); mysql_mutex_destroy(&THR_LOCK_heap); mysql_mutex_destroy(&THR_LOCK_net); mysql_mutex_destroy(&THR_LOCK_charset); @@ -377,3 +382,54 @@ static void install_sigabrt_handler() { signal(SIGABRT, my_sigabrt_handler); } #endif + +/** + Signal that the resetting of data dictionary tables and tablespaces + is done, during server initialization. + + @note This function should only be called during initialization. It was + created to ensure strict ordering between resetting of data dictionary + tables and tablespaces and initialization of the memcached plugin +*/ + +void my_dd_reset_tables_and_tablespaces_set_done(){ + mysql_mutex_lock(&THR_LOCK_dd_cache); + is_dd_reset_tables_and_tablespaces_done = true; + mysql_mutex_unlock(&THR_LOCK_dd_cache); +} + +/** + Check if the resetting of data dictionary tables and tablespaces has + completed, during server initialization. + + @note This function should only be called during initialization. It was + created to ensure strict ordering between resetting of data dictionary + tables and tablespaces and initialization of the memcached plugin +*/ + +extern "C" bool my_is_dd_reset_tables_and_tablespaces_done() { + int err = 0; + int check_count = 0; + do{ + + if (check_count > 0 && !err){ + // Lock is already held by this thread, so release it first + mysql_mutex_unlock(&THR_LOCK_dd_cache); + } + + if (check_count > 0 ){ + // After the first try always perform a back-off + // We expect this back-off to only ever happen once in + // a single run. + my_sleep(1); + } + + err = mysql_mutex_trylock(&THR_LOCK_dd_cache); + ++check_count; + // Loop if we failed to capture the lock or if we did capture it + // and found that the data dict table space has been reset + }while (err < 0 || (!err && !is_dd_reset_tables_and_tablespaces_done)); + bool retval = is_dd_reset_tables_and_tablespaces_done; + mysql_mutex_unlock(&THR_LOCK_dd_cache); + return retval; +} \ No newline at end of file diff --git a/mysys/mysys_priv.h b/mysys/mysys_priv.h index 3b8db0494ca3..171ab8c7190b 100644 --- a/mysys/mysys_priv.h +++ b/mysys/mysys_priv.h @@ -47,7 +47,8 @@ extern PSI_mutex_key key_IO_CACHE_append_buffer_lock, key_IO_CACHE_SHARE_mutex, key_KEY_CACHE_cache_lock, key_THR_LOCK_charset, key_THR_LOCK_heap, key_THR_LOCK_lock, key_THR_LOCK_malloc, key_THR_LOCK_mutex, key_THR_LOCK_myisam, key_THR_LOCK_net, key_THR_LOCK_open, - key_THR_LOCK_threads, key_TMPDIR_mutex, key_THR_LOCK_myisam_mmap; + key_THR_LOCK_threads, key_TMPDIR_mutex, key_THR_LOCK_myisam_mmap, + key_THR_LOCK_dd_cache; extern PSI_rwlock_key key_SAFE_HASH_lock; diff --git a/plugin/innodb_memcached/daemon_memcached/daemon/memcached.c b/plugin/innodb_memcached/daemon_memcached/daemon/memcached.c index 94f554595022..f46cde0eff8f 100644 --- a/plugin/innodb_memcached/daemon_memcached/daemon/memcached.c +++ b/plugin/innodb_memcached/daemon_memcached/daemon/memcached.c @@ -44,6 +44,7 @@ #define INNODB_MEMCACHED void my_thread_init(); void my_thread_end(); +bool my_is_dd_reset_tables_and_tablespaces_done(); static inline void item_set_cas(const void *cookie, item *it, uint64_t cas) { settings.engine.v1->item_set_cas(settings.engine.v0, cookie, it, cas); @@ -7865,6 +7866,10 @@ int main (int argc, char **argv) { /* initialize main thread libevent instance */ main_base = event_init(); +#ifdef INNODB_MEMCACHED + my_is_dd_reset_tables_and_tablespaces_done(); +#endif + /* Load the storage engine */ ENGINE_HANDLE *engine_handle = NULL; if (!load_engine(engine,get_server_api,settings.extensions.logger,&engine_handle)) { diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 43a8e07cf923..d39b6200ca10 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -6865,6 +6865,7 @@ static int init_server_components() { if (dd::reset_tables_and_tablespaces()) { unireg_abort(MYSQLD_ABORT_EXIT); } + my_dd_reset_tables_and_tablespaces_set_done(); ha_post_recover(); /*