commit 5d4be8a12bba2690afe5180e07687449d1836947 Author: Libing Song Date: Wed Mar 29 08:17:55 2023 +0800 BUG#110533 XA COMMIT may not be recovered after a crash Analysis ======== XA COMMIT/ROLLBACK has two steps: - step 1: write a XA COMMIT/ROLLBACK into binlog. - step 2: Commit/Rollback to engine. Binlog rotation could happen between step 1 and step 2. After rotation, a new binlog file is generated. If crash happened at the moment, the XA COMMIT/ROLLBACK could not be recovered correctly, since it is not in the last binlog file. XA crash safe only work on the last binlog file. Fix === Binlog rotation will be blocked by m_atomic_prep_xids if it is greater than 0. The normal transactions which have Xid_log_event will increase m_atomic_prep_xids in step 1 and decrease it after step 2. Thus binlog rotation cannot happen between step 1 and step 2. m_atomic_prep_xids can be used to protect XA COMMIT/ROLLBACK in the same way. This patch simplified the logic to increase m_atomic_prep_xids always in step 1, even the transaction is non-transactional. diff --git a/mysql-test/suite/binlog_gtid/r/binlog_gtid_xa_commit_rollback_block_rotate.result b/mysql-test/suite/binlog_gtid/r/binlog_gtid_xa_commit_rollback_block_rotate.result new file mode 100644 index 00000000000..5b3270afd84 --- /dev/null +++ b/mysql-test/suite/binlog_gtid/r/binlog_gtid_xa_commit_rollback_block_rotate.result @@ -0,0 +1,73 @@ +[ connection default ] +SET @binlog_order_commits_saved = @@GLOBAL.binlog_order_commits; +SET GLOBAL binlog_order_commits = OFF; +CREATE TABLE t1(c1 INT); +# +# XA PREPARE binlogged shall block FLUSH BINARY LOGS +######################################################################## +# +XA START "xid1"; +INSERT INTO t1 VALUES(1); +XA END "xid1"; +SET debug_sync = "RESET"; +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +XA PREPARE "xid1"; +[ connection con1 ] +SET debug_sync = "now WAIT_FOR finish_commit"; +FLUSH BINARY LOGS; +[ connection con2 ] +SET debug_sync = "now SIGNAL continue"; +[ connection con1 ] +[ connection default ] +SET debug_sync = "RESET"; +# +# XA COMMIT binlogged shall block FLUSH BINARY LOGS. +######################################################################## +# +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +XA COMMIT "xid1"; +[ connection con1 ] +SET debug_sync = "now WAIT_FOR finish_commit"; +FLUSH BINARY LOGS; +[ connection con2 ] +SET debug_sync = "now SIGNAL continue"; +[ connection con1 ] +[ connection default ] +SET debug_sync = "RESET"; +# +# XA COMMIT ONE PHASE binlogged shall block FLUSH BINARY LOGS. +######################################################################## +# +XA START "xid2"; +INSERT INTO t1 VALUES(1); +XA END "xid2"; +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +XA COMMIT "xid2" ONE PHASE; +[ connection con1 ] +SET debug_sync = "now WAIT_FOR finish_commit"; +FLUSH BINARY LOGS; +[ connection con2 ] +SET debug_sync = "now SIGNAL continue"; +[ connection con1 ] +[ connection default ] +SET debug_sync = "RESET"; +# +# XA ROLLBACK binlogged shall block FLUSH BINARY LOGS. +######################################################################## +# +XA START "xid3"; +INSERT INTO t1 VALUES(1); +XA END "xid3"; +XA PREPARE "xid3"; +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +XA ROLLBACK "xid3"; +[ connection con1 ] +SET debug_sync = "now WAIT_FOR finish_commit"; +FLUSH BINARY LOGS; +[ connection con2 ] +SET debug_sync = "now SIGNAL continue"; +[ connection con1 ] +[ connection default ] +SET debug_sync = "RESET"; +DROP TABLE t1; +SET GLOBAL binlog_order_commits = @binlog_order_commits_saved; diff --git a/mysql-test/suite/binlog_gtid/t/binlog_gtid_xa_commit_rollback_block_rotate.test b/mysql-test/suite/binlog_gtid/t/binlog_gtid_xa_commit_rollback_block_rotate.test new file mode 100644 index 00000000000..5a5a6fc3dee --- /dev/null +++ b/mysql-test/suite/binlog_gtid/t/binlog_gtid_xa_commit_rollback_block_rotate.test @@ -0,0 +1,167 @@ +################################################################################ +# Bug #110533 XA COMMIT may not be recovered after a crash +# +# XA COMMIT/ROLLBACK has two steps: +# - step 1: write into binlog +# - step 2: commit/rollback to engine. +# +# Before this patch, binlog rotation could happen between step 1 and step 2. +# After rotation, the XA COMMIT/ROLLBACK events was not in the last binlog. +# Thus XA crash-safe won't work on it. +# +# This test verifys binlog rotation cannot happens between step1 and step 2. +################################################################################ +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +# Init +--connect(con1, localhost, root,,) +--connect(con2, localhost, root,,) + +--echo [ connection default ] +--connection default +# The debug_sync point reached_finish_commit requires binlog_order_commit off. +SET @binlog_order_commits_saved = @@GLOBAL.binlog_order_commits; +SET GLOBAL binlog_order_commits = OFF; + +CREATE TABLE t1(c1 INT); + +--echo # +--echo # XA PREPARE binlogged shall block FLUSH BINARY LOGS +--echo ######################################################################## +--echo # +XA START "xid1"; +INSERT INTO t1 VALUES(1); +XA END "xid1"; + +SET debug_sync = "RESET"; +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +--send XA PREPARE "xid1" + +--echo [ connection con1 ] +--connection con1 +SET debug_sync = "now WAIT_FOR finish_commit"; +--send FLUSH BINARY LOGS + +--echo [ connection con2 ] +--connection con2 +# FLUSH BINARY LOGS is waiting for prep_xids to be 0. +let $wait_condition = + SELECT count(*) FROM performance_schema.threads t, + performance_schema.events_waits_current w + WHERE w.thread_id = t.thread_id AND w.event_name LIKE "%prep_xids_cond" + AND processlist_info = "FLUSH BINARY LOGS"; +--source include/wait_condition.inc + +# Signal XA PREPARE to continue +SET debug_sync = "now SIGNAL continue"; + +--echo [ connection con1 ] +--connection con1 +--reap + +--echo [ connection default ] +--connection default +--reap +SET debug_sync = "RESET"; + +--echo # +--echo # XA COMMIT binlogged shall block FLUSH BINARY LOGS. +--echo ######################################################################## +--echo # +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +--send XA COMMIT "xid1" + +--echo [ connection con1 ] +--connection con1 +SET debug_sync = "now WAIT_FOR finish_commit"; +--send FLUSH BINARY LOGS + +--echo [ connection con2 ] +--connection con2 +# FLUSH BINARY LOGS is waiting for prep_xids to be 0. +--source include/wait_condition.inc + +# Signal XA COMMIT to continue +SET debug_sync = "now SIGNAL continue"; + +--echo [ connection con1 ] +--connection con1 +--reap + +--echo [ connection default ] +--connection default +--reap +SET debug_sync = "RESET"; + +--echo # +--echo # XA COMMIT ONE PHASE binlogged shall block FLUSH BINARY LOGS. +--echo ######################################################################## +--echo # +XA START "xid2"; +INSERT INTO t1 VALUES(1); +XA END "xid2"; +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +--send XA COMMIT "xid2" ONE PHASE + +--echo [ connection con1 ] +--connection con1 +SET debug_sync = "now WAIT_FOR finish_commit"; +--send FLUSH BINARY LOGS + +--echo [ connection con2 ] +--connection con2 +# FLUSH BINARY LOGS is waiting for prep_xids to be 0. +--source include/wait_condition.inc + +# Signal XA COMMIT to continue +SET debug_sync = "now SIGNAL continue"; + +--echo [ connection con1 ] +--connection con1 +--reap + +--echo [ connection default ] +--connection default +--reap +SET debug_sync = "RESET"; + +--echo # +--echo # XA ROLLBACK binlogged shall block FLUSH BINARY LOGS. +--echo ######################################################################## +--echo # +XA START "xid3"; +INSERT INTO t1 VALUES(1); +XA END "xid3"; +XA PREPARE "xid3"; +SET debug_sync = "reached_finish_commit SIGNAL finish_commit WAIT_FOR continue"; +--send XA ROLLBACK "xid3" + +--echo [ connection con1 ] +--connection con1 +SET debug_sync = "now WAIT_FOR finish_commit"; +--send FLUSH BINARY LOGS + +--echo [ connection con2 ] +--connection con2 +# FLUSH BINARY LOGS is waiting for prep_xids to be 0. +--source include/wait_condition.inc + +# Signal XA ROLLBACK to continue +SET debug_sync = "now SIGNAL continue"; + +--echo [ connection con1 ] +--connection con1 +--reap + +--echo [ connection default ] +--connection default +--reap +SET debug_sync = "RESET"; + +DROP TABLE t1; +SET GLOBAL binlog_order_commits = @binlog_order_commits_saved; + +--disconnect con1 +--disconnect con2 diff --git a/sql/binlog.cc b/sql/binlog.cc index bec6a1f6c19..8eb725d098b 100644 --- a/sql/binlog.cc +++ b/sql/binlog.cc @@ -713,7 +713,7 @@ class binlog_cache_data { Binlog_cache_storage *get_cache() { return &m_cache; } int finalize(THD *thd, Log_event *end_event); int finalize(THD *thd, Log_event *end_event, XID_STATE *xs); - int flush(THD *thd, my_off_t *bytes, bool *wrote_xid); + int flush(THD *thd, my_off_t *bytes); int write_event(Log_event *event); size_t get_event_counter() { return event_counter; } size_t get_compressed_size() { return m_compressed_size; } @@ -750,11 +750,7 @@ class binlog_cache_data { bool has_incident(void) const { return flags.incident; } - bool has_xid() const { - // There should only be an XID event if we are transactional - assert((flags.transactional && flags.with_xid) || !flags.with_xid); - return flags.with_xid; - } + bool has_start() const { return flags.with_start; } bool is_trx_cache() const { return flags.transactional; } @@ -812,7 +808,6 @@ class binlog_cache_data { } flags.incident = false; - flags.with_xid = false; flags.immediate = false; flags.finalized = false; flags.with_sbr = false; @@ -879,7 +874,6 @@ class binlog_cache_data { assert(!flags.with_sbr); // No statements changing content assert(!flags.with_rbr); // No rows changing content assert(!flags.immediate); // Not a DDL - assert(!flags.with_xid); // Not a XID trx and not an atomic DDL Query return true; } return false; @@ -982,19 +976,6 @@ class binlog_cache_data { */ bool finalized : 1; - /* - This indicates that either the cache contain an XID event, or it's - an atomic DDL Query-log-event. In the latter case the flag is set up - on the statement level, namely when the Query-log-event is cached - at time the DDL transaction is not committing. - The flag therefore gets reset when the cache is cleaned due to - the statement rollback, e.g in case of a DDL post-caching execution - error. - Any statement scope flag among other things must consider its - reset policy when the statement is rolled back. - */ - bool with_xid : 1; - /* This indicates that the cache contain statements changing content. */ @@ -1127,11 +1108,6 @@ class binlog_trx_cache_data : public binlog_cache_data { binlog_cache_data::truncate(before_stmt_pos); cache_state_rollback(before_stmt_pos); before_stmt_pos = MY_OFF_T_UNDEF; - /* - Binlog statement rollback clears with_xid now as the atomic DDL statement - marker which can be set as early as at event creation and caching. - */ - flags.with_xid = false; DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos)); return; } @@ -1218,20 +1194,15 @@ class binlog_cache_mngr { @param bytes_written Pointer to variable that will be set to the number of bytes written for the flush. - @param wrote_xid Pointer to variable that will be set to @c - true if any XID event was written to the - binary log. Otherwise, the variable will not - be touched. @return Error code on error, zero if no error. */ - int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid) { + int flush(THD *thd, my_off_t *bytes_written) { my_off_t stmt_bytes = 0; my_off_t trx_bytes = 0; - assert(stmt_cache.has_xid() == 0); - int error = stmt_cache.flush(thd, &stmt_bytes, wrote_xid); + int error = stmt_cache.flush(thd, &stmt_bytes); if (error) return error; DEBUG_SYNC(thd, "after_flush_stm_cache_before_flush_trx_cache"); - error = trx_cache.flush(thd, &trx_bytes, wrote_xid); + error = trx_cache.flush(thd, &trx_bytes); if (error) return error; *bytes_written = stmt_bytes + trx_bytes; return 0; @@ -1528,12 +1499,7 @@ int binlog_cache_data::write_event(Log_event *ev) { }); return 1; } - if (ev->get_type_code() == binary_log::XID_EVENT || - ev->get_type_code() == binary_log::XA_PREPARE_LOG_EVENT) - flags.with_xid = true; if (ev->is_using_immediate_logging()) flags.immediate = true; - /* DDL gets marked as xid-requiring at its caching. */ - if (is_atomic_ddl_event(ev)) flags.with_xid = true; /* With respect to the event type being written */ if (ev->is_sbr_logging_format()) flags.with_sbr = true; if (ev->is_rbr_logging_format()) flags.with_rbr = true; @@ -2156,8 +2122,7 @@ int binlog_cache_data::finalize(THD *thd, Log_event *end_event, XID_STATE *xs) { @see binlog_cache_data::finalize */ -int binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, - bool *wrote_xid) { +int binlog_cache_data::flush(THD *thd, my_off_t *bytes_written) { /* Doing a commit or a rollback including non-transactional tables, i.e., ending a transaction where we might write the transaction @@ -2225,12 +2190,6 @@ int binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, DBUG_EXECUTE_IF("fault_injection_reinit_io_cache_while_flushing_to_file", { DBUG_SET("-d,fault_injection_reinit_io_cache"); }); - if (flags.with_xid && error == 0) *wrote_xid = true; - - /* - Reset have to be after the if above, since it clears the - with_xid flag - */ reset(); if (bytes_written) *bytes_written = bytes_in_cache; } @@ -8192,7 +8151,7 @@ TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) { /* If is atomic DDL, finalize cache for DDL and no further logging is needed. */ - else if ((is_atomic_ddl = cache_mngr->trx_cache.has_xid())) { + else if ((is_atomic_ddl = !cache_mngr->trx_cache.has_start())) { if (cache_mngr->trx_cache.finalize(thd, nullptr)) return RESULT_ABORTED; } /* @@ -8326,15 +8285,14 @@ TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) { std::pair MYSQL_BIN_LOG::flush_thread_caches(THD *thd) { binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd); my_off_t bytes = 0; - bool wrote_xid = false; - int error = cache_mngr->flush(thd, &bytes, &wrote_xid); + int error = cache_mngr->flush(thd, &bytes); if (!error && bytes > 0) { /* Note that set_trans_pos does not copy the file name. See this function documentation for more info. */ thd->set_trans_pos(log_file_name, m_binlog_file->position()); - if (wrote_xid) inc_prep_xids(thd); + inc_prep_xids(thd); } DBUG_PRINT("debug", ("bytes: %llu", bytes)); return std::make_pair(error, bytes);