Bug #117553 Crash caused by race condition between MVCC::view_open and purge view
Submitted: 24 Feb 9:15 Modified: 24 Feb 13:10
Reporter: Yin Peng (OCA) Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: InnoDB storage engine Severity:S2 (Serious)
Version:8.4.4 OS:Any
Assigned to: CPU Architecture:Any
Tags: Contribution

[24 Feb 9:15] Yin Peng
Description:
A critical crash was observed in our environment. The failure occurred during transaction rollback operations, triggered by an assertion failure in ut_dbg_assertion_failed with the condition next_undo_offset > undo_offset. Call stack:

ut_dbg_assertion_failed (expr=expr@entry=0x1a29ce8 "next_undo_offset > undo_offset")
trx_undo_rec_copy 
trx_undo_get_undo_rec_low 
trx_undo_get_undo_rec out
trx_undo_prev_version_build
row_vers_old_has_index_entry
row_undo_mod_del_mark_or_remove_sec_low
row_undo_mod_del_mark_or_remove_sec
row_undo_mod_upd_exist_sec
row_undo_mod(undo_node_t*, que_thr_t*)
row_undo
row_undo_step(que_thr_t*)
que_thr_step (thr=<optimized out>)
que_run_threads_low 
que_run_threads(que_thr_t*)
trx_rollback_to_savepoint_low(trx_t*, trx_savept_t*)
trx_rollback_for_mysql_low (trx=0x7f34a194d270)
trx_rollback_low(trx_t*)
trx_rollback_for_mysql(trx_t*)
innobase_rollback(handlerton*, THD*, bool)
ha_rollback_low

After analysing the coredump I found that the low_limit_no of purge_sys->view is less than the trx_no of purge_sys->iter. This indicates that some rollback operations may read the purged undo logs! After diving into the code, I think the root cause is that there is a race condition between MVCC::view_open and MVCC::clone_oldest_view:

thd1                                   thd2                           purge
simple select
readview:
{ 
  m_low_limit_id = 2475083946,
  m_up_limit_id = 2475083946,
  m_low_limit_no = 2475083946
}

                                      dml and commit

                                                                    purge all undo logs
                                                                    purge_sys->view:
                                                                    { 
                                                                      m_low_limit_id = 2475254583,
                                                                      m_up_limit_id = 2475254583,
                                                                      m_low_limit_no = 2475254583
                                                                    }
                                                                    purge_sys->iter.trx_no: 2475254583
                                              

simple select
check if readview can be reused:
{ 
  m_low_limit_id = 2475083946,
  m_up_limit_id = 2475083946,
  m_low_limit_no = 2475083946
}
ReadView::m_closed = true

                                              
                                                                    MVCC::clone_oldest_view
                                                                    purge_sys->view:
                                                                    { 
                                                                      m_low_limit_id = 2475083946,
                                                                      m_up_limit_id = 2475083946,
                                                                      m_low_limit_no = 2475083946
                                                                    }

                                      dml and rollback       
                                    may read the stale undo

How to repeat:
First we need to add some debug points:

diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index a918b11b06f..6ee7ad8b1d1 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -36,6 +36,8 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 #include "srv0srv.h"
 #include "trx0sys.h"
+#include "current_thd.h"
+#include <debug_sync.h>
 
 /*
 -------------------------------------------------------------------------------
@@ -520,6 +522,11 @@ void MVCC::view_open(ReadView *&view, trx_t *trx) {
 
     if (trx_is_autocommit_non_locking(trx) && view->empty()) {
       view->m_closed = false;
+#ifdef UNIV_DEBUG
+      if (current_thd) {
+        DEBUG_SYNC(current_thd, "simulate_stop_after_setting_m_closed");
+      }
+#endif
 
       if (view->m_low_limit_id == trx_sys_get_next_trx_id_or_no()) {
         return;
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index cbbac9eb59f..15fc8250ce7 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -2407,6 +2407,15 @@ ulint trx_purge(ulint n_purge_threads, /*!< in: number of purge tasks
 
   rw_lock_x_unlock(&purge_sys->latch);
 
+  DBUG_EXECUTE_IF("simulate_stop_after_clone_oldest_view", {
+    bool end = false;
+    while (!end) {
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      ib::info() << "[TXSQL] purge thread stopped after clone oldest view";
+      DBUG_EXECUTE_IF("continue_to_purge", { end = true; });
+    }
+  });
+
 #ifdef UNIV_DEBUG
   if (srv_purge_view_update_only_debug) {
     return (0);

Then run the following test script and you will see the server has gone away:

source include/have_debug.inc;
source include/have_debug_sync.inc;

--connect(con1,localhost,root)

create table t1(id int not null primary key auto_increment, c1 int, c2 int, key k_c1(c1));

insert into t1(c1,c2) values(1,1),(2,2);
sleep 2;
connection con1;
select * from t1;

connection default;
update t1 set c2=c2+1;
update t1 set c1=c1+2;

connection default;
set global innodb_purge_stop_now=ON;
set global innodb_purge_run_now=ON;
--source include/wait_innodb_all_purged.inc

CREATE UNDO TABLESPACE undo_test_001 ADD DATAFILE 'undo_test_001.ibu';
ALTER UNDO TABLESPACE innodb_undo_001 SET INACTIVE;
--let $grep_file = $MYSQLTEST_VARDIR/log/mysqld.1.err
--let $grep_pattern = .*Completed truncate of undo tablespace innodb_undo_001.*
--source include/wait_for_pattern_in_file.inc
ALTER UNDO TABLESPACE innodb_undo_001 SET ACTIVE;

ALTER UNDO TABLESPACE innodb_undo_002 SET INACTIVE;
--let $grep_file = $MYSQLTEST_VARDIR/log/mysqld.1.err
--let $grep_pattern = .*Completed truncate of undo tablespace innodb_undo_002.*
--source include/wait_for_pattern_in_file.inc
ALTER UNDO TABLESPACE innodb_undo_002 SET ACTIVE;

ALTER UNDO TABLESPACE undo_test_001 SET INACTIVE;
--let $grep_file = $MYSQLTEST_VARDIR/log/mysqld.1.err
--let $grep_pattern = .*Completed truncate of undo tablespace undo_test_001.*
--source include/wait_for_pattern_in_file.inc
DROP UNDO TABLESPACE undo_test_001;

connection con1;
set debug_sync="simulate_stop_after_setting_m_closed signal clone_view_stopped wait_for clone_view_continue";
send select * from t1;

connection default;
set debug_sync="now wait_for clone_view_stopped";
set global debug='+d, simulate_stop_after_clone_oldest_view';
update t1 set c2=c2 +1;
--let $grep_file = $MYSQLTEST_VARDIR/log/mysqld.1.err
--let $grep_pattern = .*purge thread stopped after clone oldest.*
--source include/wait_for_pattern_in_file.inc

begin;
update t1 set c1=c1+2;
update t1 set c2 = c2 + 2;
rollback;

set debug_sync='now signal clone_view_continue';

connection con1;
reap;
connection default;
disconnect con1;

set global debug='-d, simulate_stop_after_clone_oldest_view';
set global debug='+d,continue_to_purge';
sleep 5;
set global debug='-d,continue_to_purge';
drop table t1;

Suggested fix:
The new readview of purge_sys should not be older than the old one.
[24 Feb 9:16] Yin Peng
The new readview of purge_sys should not be older than the old one.

(*) I confirm the code being submitted is offered under the terms of the OCA, and that I am authorized to contribute it.

Contribution: patch_of_purge_view.txt (text/plain), 2.67 KiB.

[24 Feb 9:22] Yin Peng
Oh, sorry, in the previous description the state of 'ReadView::m_closed' was incorrectly described. The accurate line should be:
'ReadView::m_closed = false'
[24 Feb 13:10] MySQL Verification Team
Hello yin peng,

Thank you for the report and test case.
Verified as described.

regards,
Umesh