commit d18019322af4136bf618ca7d2d07ea200502ec14 Author: Dmitry Lenev Date: Fri Mar 24 14:14:11 2023 +0100 Bug#110494 "Deadlock between FLUSH STATUS, COM_CHANGE_USER and SELECT FROM I_S.PROCESSLIST". Starting from 8.0.28 version of MySQL Server concurrent execution of COM_STATISTICS, COM_CHANGE_USER commands and SHOW PROCESSLIST statement sometimes led to deadlock. Same problem was observed if COM_STATISTICS was replaced with FLUSH STATUS statement and/or SHOW PROCESSLIST was replaced with SELECT from I_S.PROCESSLIST table. The deadlock occured because of regression from fix for bug#32320541 "RACE CONDITION ON SECURITY_CONTEXT::M_USER". After this patch COM_STATISTICS/FLUSH STATUS, COM_CHANGE_USER and SHOW PROCESSLIST/ SELECT ... FROM I_S.PROCESSLIST started to acquire same locks in different order. In particular: 1) Code responsible for changing user for connection started to acquire THD::LOCK_thd_security_ctx mutex and the acquired LOCK_status mutex during call to THD::cleanup_connection(), without releasing the former. 2) Implementations of COM_STATISTICS and FLUSH STATUS commands acquire LOCK_status mutex and then during iteration through all connections LOCK_thd_remove mutexes without releasing the former. 3) Finally, SHOW PROCESSLIST/I_S.PROCESSLIST implementation acquired LOCK_thd_remove mutexes and then THD::LOCK_thd_security_ctx mutex during copying information about particular connection, without releasing the former. Naturally, THD::LOCK_thd_security_ctx -> LOCK_status -> LOCK_thd_remove -> THD::LOCK_thd_security_ctx dependency loop occasionally resulted in deadlock. This patch solves the problem by reducing scope during which THD::LOCK_thd_security_ctx lock is held in COM_CHANGE_USER implementation. We no longer call THD::cleanup_connection()/lock LOCK_status while holding it, thus breaking dependency loop. diff --git a/mysql-test/r/change_user.result b/mysql-test/r/change_user.result index e0099de23a4..feeaa77849b 100644 --- a/mysql-test/r/change_user.result +++ b/mysql-test/r/change_user.result @@ -91,3 +91,47 @@ IS_USED_LOCK('bug31418') NULL FLUSH STATUS; Value of com_select did not change +# +# Bug#110494 "Deadlock between FLUSH STATUS, COM_CHANGE_USER and SELECT FROM I_S.PROCESSLIST". +# +# The original problem reported was that concurrent execution of +# COM_STATISTICS, COM_CHANGE_USER commands and SHOW FULL PROCESSLIST +# statements sometimes led to deadlock. This test uses FLUSH STATUS +# statement instead of the first command and SELECT ... FROM +# I_S.PROCESSLIST instead of the latter. They acquire the same +# locks and were affected by the same problem. +# Doing 3000 concurrent runs of each statement was enough to reproduce +# the deadlock with 80% probability on my machine. +CREATE PROCEDURE p_flush_status() +BEGIN +DECLARE x INT DEFAULT 3000; +WHILE x DO +SET x = x-1; +FLUSH STATUS; +END WHILE; +END | +CREATE PROCEDURE p_processlist() +BEGIN +DECLARE x INT DEFAULT 3000; +WHILE x DO +SET x = x-1; +SELECT COUNT(*) INTO @a FROM information_schema.processlist; +END WHILE; +END | +connect con1, localhost, root,,; +# Send: +CALL p_flush_status(); +# Send: +connect con2, localhost, root,,; +CALL p_processlist(); +connection default; +# Execute COM_CHANGE_USER command 3000 times. +connection con1; +# Reap p_flush_status(). +disconnect con1; +connection con2; +# Reap p_processlist(). +disconnect con2; +connection default; +DROP PROCEDURE p_flush_status; +DROP PROCEDURE p_processlist; diff --git a/mysql-test/t/change_user.test b/mysql-test/t/change_user.test index 1b2f4f2ea02..6f23c2f6316 100644 --- a/mysql-test/t/change_user.test +++ b/mysql-test/t/change_user.test @@ -125,3 +125,74 @@ if ($after != $before){ die The value of com_select changed during change_user; } echo Value of com_select did not change; + +--echo # +--echo # Bug#110494 "Deadlock between FLUSH STATUS, COM_CHANGE_USER and SELECT FROM I_S.PROCESSLIST". +--echo # + +--echo # The original problem reported was that concurrent execution of +--echo # COM_STATISTICS, COM_CHANGE_USER commands and SHOW FULL PROCESSLIST +--echo # statements sometimes led to deadlock. This test uses FLUSH STATUS +--echo # statement instead of the first command and SELECT ... FROM +--echo # I_S.PROCESSLIST instead of the latter. They acquire the same +--echo # locks and were affected by the same problem. +--echo # Doing 3000 concurrent runs of each statement was enough to reproduce +--echo # the deadlock with 80% probability on my machine. + +--delimiter | + +CREATE PROCEDURE p_flush_status() +BEGIN + DECLARE x INT DEFAULT 3000; + WHILE x DO + SET x = x-1; + FLUSH STATUS; + END WHILE; +END | + +CREATE PROCEDURE p_processlist() +BEGIN + DECLARE x INT DEFAULT 3000; + WHILE x DO + SET x = x-1; + SELECT COUNT(*) INTO @a FROM information_schema.processlist; + END WHILE; +END | + +--delimiter ; + +--enable_connect_log +--connect (con1, localhost, root,,) +--echo # Send: +--send CALL p_flush_status() + +--echo # Send: +--connect (con2, localhost, root,,) +--send CALL p_processlist() + +--connection default + +--echo # Execute COM_CHANGE_USER command 3000 times. +let $i = 3000; +while ($i) +{ + dec $i; +--change_user +} + +--connection con1 +--echo # Reap p_flush_status(). +--reap +--disconnect con1 +--source include/wait_until_disconnected.inc + +--connection con2 +--echo # Reap p_processlist(). +--reap +--disconnect con2 +--source include/wait_until_disconnected.inc + +--connection default +--disable_connect_log +DROP PROCEDURE p_flush_status; +DROP PROCEDURE p_processlist; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 063c473e9c3..0ba76f7ebbb 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1874,13 +1874,6 @@ bool dispatch_command(THD *thd, const COM_DATA *com_data, break; } case COM_CHANGE_USER: { - /* - LOCK_thd_security_ctx protects the THD's security-context from - inspection by SHOW PROCESSLIST while we're updating it. Nested - acquiring of LOCK_thd_data is fine (see below). - */ - MUTEX_LOCK(grd_secctx, &thd->LOCK_thd_security_ctx); - int auth_rc; thd->status_var.com_other++; @@ -1888,8 +1881,18 @@ bool dispatch_command(THD *thd, const COM_DATA *com_data, USER_CONN *save_user_connect = const_cast(thd->get_user_connect()); LEX_CSTRING save_db = thd->db(); + + /* + LOCK_thd_security_ctx protects the THD's security-context from + inspection by SHOW PROCESSLIST while we're updating it. However, + there is no need to protect this context while we're reading it, + sinceother threads are not supposed to modify it. + Nested acquiring of LOCK_thd_data is fine (see below). + */ Security_context save_security_ctx(*(thd->security_context())); + MUTEX_LOCK(grd_secctx, &thd->LOCK_thd_security_ctx); + auth_rc = acl_authenticate(thd, COM_CHANGE_USER); auth_rc |= mysql_audit_notify( thd, AUDIT_EVENT(MYSQL_AUDIT_CONNECTION_CHANGE_USER));