diff --git a/mysql-test/suite/rpl_gtid/r/rpl_gtid_fix_sbm_spike.result b/mysql-test/suite/rpl_gtid/r/rpl_gtid_fix_sbm_spike.result new file mode 100644 index 00000000000..fabcca599e5 --- /dev/null +++ b/mysql-test/suite/rpl_gtid/r/rpl_gtid_fix_sbm_spike.result @@ -0,0 +1,39 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information. +[connection master] + +# +# 0. Prepare data. +################## +# +CREATE DATABASE tt; +CREATE TABLE tt.t1(id int); + +# +# 1. Simulate executing long transaction. +######################################### +# +## The rli->last_master_timestamp will be set to the first transaction's Gtid_log_event's when field +## after sync with master. +include/rpl_sync.inc +SET GLOBAL debug = '+d,block_to_see_sbm'; + +## Simulate long transaction. +INSERT INTO tt.t1 VALUES(sleep(10)); +INSERT INTO tt.t1 VALUES(2); +## For original MySQL logic, SBM = 10s (on the source) + 10s (on the replica) = 20s. Now SBM = 10s (on the replica). +include/assert.inc [Seconds_Behind_Master will less than or equal to 10] +## For original MySQL logic, SBM = 10s (on the source trx1) + 10s (on the replica trx1) + 10s(on the replica trx2) = 30s. +## Now SBM = 10s (on the replica trx1) + 10s(on the replica trx2) = 20s. +include/assert.inc [Seconds_Behind_Master will less than or equal to 20] + +# +# 2. Clean up +############# +# +SET GLOBAL debug = '-d,block_to_see_sbm'; +DROP DATABASE tt; +include/rpl_sync.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl_gtid/t/rpl_gtid_fix_sbm_spike.test b/mysql-test/suite/rpl_gtid/t/rpl_gtid_fix_sbm_spike.test new file mode 100644 index 00000000000..beade6b4cde --- /dev/null +++ b/mysql-test/suite/rpl_gtid/t/rpl_gtid_fix_sbm_spike.test @@ -0,0 +1,74 @@ +# ==== Purpose ==== +# This test verifies the main function of binlog realtime apply. +# +# ==== Implementation === +# 0) Prepare data. +# 1) Simulate executing long transaction. +# 1) Clean up. + +--source include/master-slave.inc +--source include/have_binlog_format_row.inc +--source include/have_debug.inc + +--echo +--echo # +--echo # 0. Prepare data. +--echo ################## +--echo # +connection master; +CREATE DATABASE tt; +CREATE TABLE tt.t1(id int); + +--echo +--echo # +--echo # 1. Simulate executing long transaction. +--echo ######################################### +--echo # +--echo ## The rli->last_master_timestamp will be set to the first transaction's Gtid_log_event's when field +--echo ## after sync with master. +connection slave; +--source include/rpl_sync.inc +SET GLOBAL debug = '+d,block_to_see_sbm'; + +--echo +--echo ## Simulate long transaction. +connection master; +INSERT INTO tt.t1 VALUES(sleep(10)); +INSERT INTO tt.t1 VALUES(2); + +connection slave; +--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE State = "waiting for handler commit" +--source include/wait_condition.inc + +--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE State = "Waiting for dependent transaction to commit" +--source include/wait_condition.inc + +let $id1= query_get_value(SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = "Waiting for dependent transaction to commit", ID, 1); + +--echo ## For original MySQL logic, SBM = 10s (on the source) + 10s (on the replica) = 20s. Now SBM = 10s (on the replica). +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master will less than or equal to 10; +let $assert_cond= $sbm <= 10; +--source include/assert.inc + +--echo ## For original MySQL logic, SBM = 10s (on the source trx1) + 10s (on the replica trx1) + 10s(on the replica trx2) = 30s. +--echo ## Now SBM = 10s (on the replica trx1) + 10s(on the replica trx2) = 20s. +--let $wait_condition= SELECT COUNT(*) = 0 FROM information_schema.processlist WHERE State = 'Waiting for dependent transaction to commit' and id = '$id1' +--source include/wait_condition.inc + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master will less than or equal to 20; +let $assert_cond= $sbm <= 20; +--source include/assert.inc + +--echo +--echo # +--echo # 2. Clean up +--echo ############# +--echo # +connection slave; +SET GLOBAL debug = '-d,block_to_see_sbm'; +connection master; +DROP DATABASE tt; +--source include/rpl_sync.inc +--source include/rpl_end.inc diff --git a/sql/binlog.cc b/sql/binlog.cc index 900a4e81e1d..d363aa8a4ba 100644 --- a/sql/binlog.cc +++ b/sql/binlog.cc @@ -8596,6 +8596,8 @@ TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) { */ else if (real_trans && xid && trn_ctx->rw_ha_count(trx_scope) > 1 && !trn_ctx->no_2pc(trx_scope)) { + DBUG_EXECUTE_IF("block_to_see_sbm", + std::this_thread::sleep_for(std::chrono::seconds(10));); Xid_log_event end_evt(thd, xid); if (cache_mngr->trx_cache.finalize(thd, &end_evt)) return RESULT_ABORTED; } diff --git a/sql/log_event.cc b/sql/log_event.cc index 775f3f08297..0f94b8ac31a 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -8466,6 +8466,37 @@ uint search_key_in_table(TABLE *table, MY_BITMAP *bi_cols, return res; } +void check_and_modify_event_time(THD *thd, timeval *when) { + /* + For single-statement transaction, the when field of Xid_log_event's + header is the start time of the statement. + + For MTS, the SBM calculation logic is to subtract the when field of the + Xid_log_event's header of the checkpoint from the current time, which + can cause significant SBM spikes in replica. + + After replica catch up the source, the SBM calculation will use the first + transaction's Gtid_log_event's when field received by the replica. The + same problem exists at this time + + Multi-statement transactions do not experience this issue because the + when field in the Xid_log_event is the time of the commit/begin + execution. + + For single-statement transactions, we call THD::set_time when committing + the transaction, so that the when field of the Xid_log_event and + Gtid_log_event headers uses the commit time, just like multi-statement + transactions. + + If the time is set using SET TIMESTAMP, the when field of Xid_log_event + and Gtid_log_event is not changed. + */ + if (!thd->in_active_multi_stmt_transaction() && + (!(thd->user_time.tv_sec || thd->user_time.tv_usec))) { + my_micro_time_to_timeval(my_micro_time(), when); + } +} + void Rows_log_event::decide_row_lookup_algorithm_and_key() { DBUG_TRACE; @@ -13172,6 +13203,7 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, bool using_trans, to_string(buf); DBUG_PRINT("info", ("%s", buf)); #endif + check_and_modify_event_time(thd, &(common_header->when)); common_header->set_is_valid(true); } diff --git a/sql/log_event.h b/sql/log_event.h index f13ed25f531..e121e9e4713 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1696,6 +1696,8 @@ class Rand_log_event : public binary_log::Rand_event, public Log_event { #endif }; +void check_and_modify_event_time(THD *thd, timeval *when); + /** @class Xid_log_event @@ -1752,6 +1754,7 @@ class Xid_log_event : public binary_log::Xid_event, public Xid_apply_log_event { Xid_log_event(THD *thd_arg, my_xid x) : binary_log::Xid_event(x), Xid_apply_log_event(thd_arg, header(), footer()) { + check_and_modify_event_time(thd, &(common_header->when)); common_header->set_is_valid(true); } int pack_info(Protocol *protocol) override;