Bug #119235 Crash due to race condition between buf_LRU_check_size_of_non_data_objects and buf_pool_resize​​
Submitted: 27 Oct 2025 2:47 Modified: 10 Nov 2025 14:16
Reporter: Yin Peng (OCA) Email Updates:
Status: Open Impact on me:
None 
Category:MySQL Server: InnoDB storage engine Severity:S3 (Non-critical)
Version:9.5.0 OS:Any
Assigned to: CPU Architecture:Any

[27 Oct 2025 2:47] Yin Peng
Description:
The function buf_LRU_check_size_of_non_data_objects attempts to verify the size of the buffer pool without acquiring a mutex lock. This lack of synchronization may lead to a critical assertion failure if a concurrent resizing is in progress.
This issue is difficult to reproduce in release builds, but we did encounter it in our environment.

How to repeat:
Add some debug code:

diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index d3eb0adbb8e..92d32299ce9 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -90,6 +90,8 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 #ifdef UNIV_DEBUG
 #include "ut0stateful_latching_rules.h"
+#include "current_thd.h"
+#include "debug_sync.h"
 #endif /* UNIV_DEBUG */
 
 #ifdef HAVE_LIBNUMA
@@ -2208,8 +2210,27 @@ static void buf_pool_resize() {
     ut_ad(buf_pool->n_chunks_new == buf_pool->n_chunks);
     ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
 
+    DBUG_EXECUTE_IF("buf_pool_resize_simulate_wait", {
+      const char act[] =
+          "now SIGNAL "
+          "buf_pool_resize_stopped_before_setting_new_size "
+          "WAIT_FOR "
+          "buf_pool_resize_update_new_size";
+      assert(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
+    });
+
     buf_pool->curr_size = new_instance_size;
 
+    DBUG_EXECUTE_IF("buf_pool_resize_simulate_wait", {
+      const char act[] =
+          "now SIGNAL "
+          "buf_pool_resize_stopped_after_setting_new_size "
+          "WAIT_FOR "
+          "buf_pool_resize_continue";
+      assert(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
+      DBUG_SET("-d,buf_pool_resize_simulate_wait");
+    });
+
     ut_ad(srv_buf_pool_chunk_unit % UNIV_PAGE_SIZE == 0);
     buf_pool->n_chunks_new =
         new_instance_size * UNIV_PAGE_SIZE / srv_buf_pool_chunk_unit;
@@ -2671,6 +2692,9 @@ withdraw_retry:
 /** This is the thread for resizing buffer pool. It waits for an event and
 when waked up either performs a resizing and sleeps again. */
 void buf_resize_thread() {
+#ifdef UNIV_DEBUG
+  THD *thd = create_internal_thd();
+#endif
   while (srv_shutdown_state.load() < SRV_SHUTDOWN_CLEANUP) {
     os_event_wait(srv_buf_resize_event);
     os_event_reset(srv_buf_resize_event);
@@ -2694,6 +2718,9 @@ void buf_resize_thread() {
 
     buf_pool_resize();
   }
+#ifdef UNIV_DEBUG
+  destroy_internal_thd(thd);
+#endif
 }
 
 /** Clears the adaptive hash index on all pages in the buffer pool. */
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 864eac435c7..1a184df1183 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -56,6 +56,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #include "trx0trx.h"
 #include "ut0byte.h"
 #include "ut0rnd.h"
+#ifdef UNIV_DEBUG
+#include "current_thd.h"
+#include "debug_sync.h"
+#endif
 
 /** The number of blocks from the LRU_old pointer onward, including
 the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
@@ -1256,7 +1260,22 @@ static void buf_LRU_check_size_of_non_data_objects(
 {
   const size_t mb = (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE));
 
+#ifdef UNIV_DEBUG
+  auto buf_check_size_debug_wait = []() {
+    const char act[] =
+        "now SIGNAL "
+        "buf_pool_resize_update_new_size "
+        "WAIT_FOR "
+        "buf_pool_resize_stopped_after_setting_new_size";
+    assert(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
+    DBUG_SET("-d,buf_check_size_simulate_wait");
+    return true;
+  };
+#endif
+
   if (!recv_recovery_is_on() && buf_pool->curr_size == buf_pool->old_size &&
+      DBUG_EVALUATE_IF("buf_check_size_simulate_wait",
+                       buf_check_size_debug_wait(), true) &&
       UT_LIST_GET_LEN(buf_pool->free) + UT_LIST_GET_LEN(buf_pool->LRU) <
           buf_pool->curr_size / 20) {
     const bool buf_pool_full = true;

Run test script:

source include/have_debug.inc;
source include/have_debug_sync.inc;

--let $restart_parameters="restart: --innodb-buffer-pool-size=5M --innodb-buffer_pool_instances=1"

--source include/restart_mysqld.inc

--connect(con1,localhost,root)
--connection con1
set global debug='+d,buf_pool_resize_simulate_wait';
send set global innodb_buffer_pool_size = 120*1024*1024;

connection default;
set debug_sync='now wait_for buf_pool_resize_stopped_before_setting_new_size';
set debug='+d,buf_check_size_simulate_wait';
create table t1(id int,c1 int);
insert into t1 values(1,1);
set debug_sync='now signal buf_pool_resize_continue';

connection con1;
reap;
--source include/wait_condition.inc

connection default;
disconnect con1;

--source include/restart_mysqld.inc
drop table t1;

Suggested fix:
1. Eliminate the unsafe assertion.
2. Before executing the assertion logic, immediately recheck the buffer pool size. If the size has changed (indicating an ongoing resize), skip the assertion to prevent failures.
[27 Oct 2025 2:47] Yin Peng
Recheck the buffer pool size before executing the assertion logic

(*) I confirm the code being submitted is offered under the terms of the OCA, and that I am authorized to contribute it.

Contribution: patch.txt (text/plain), 4.52 KiB.

[27 Oct 2025 2:50] Yin Peng
Change the title
[10 Nov 2025 14:16] Yin Peng
Has this bug report been reviewed?