commit 1291a22871b29a6557d9cbdf4e83f649e9ebbc54 Author: Robert Golebiowski Date: Sun Mar 5 21:24:56 2017 +0100 Fix bug 84415 / 1654091 (slave don't report Seconds_Behind_Master when running slave_parallel_workers > 0) The fix: When a task from GAQ finishes executing, last_master_timestamp gets assigned timestamp from next task in GAQ. In case GAQ is empty last_master_timestamp gets reset to 0. Because last_master_timestamp is only re-assigned when task leaves GAQ it must be also assigned when first event (from parallel execution) arrives. diff --git a/mysql-test/include/wait_for_mts_checkpoint.inc b/mysql-test/include/wait_for_mts_checkpoint.inc new file mode 100644 index 00000000000..e7b29c6c7bc --- /dev/null +++ b/mysql-test/include/wait_for_mts_checkpoint.inc @@ -0,0 +1,11 @@ +# ==== Purpose ==== +# +# Waits until mts_checkpoint_routine has been fully +# executed - from the beginning till the end +# +SET @@global.debug= "+d,mts_checkpoint"; +#If end was hit before start - clear it +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; diff --git a/mysql-test/suite/rpl/r/bug84415.result b/mysql-test/suite/rpl/r/bug84415.result new file mode 100644 index 00000000000..f2d1737f258 --- /dev/null +++ b/mysql-test/suite/rpl/r/bug84415.result @@ -0,0 +1,108 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +# +# Bug #84415: slave don't report Seconds_Behind_Master when +# running slave_parallel_workers > 0 +# +SET @@global.debug= "+d,dbug.mts.force_clock_diff_eq_0"; +SET @save.slave_parallel_workers= @@global.slave_parallel_workers; +SET @save_slave_transaction_retries= @@global.slave_transaction_retries; +SET @@global.slave_transaction_retries= 0; +SET @@global.slave_parallel_workers= 4; +include/start_slave.inc +CREATE TABLE t1 (f1 INT); +CREATE DATABASE test2; +USE test2; +CREATE TABLE t2 (f1 INT); +include/sync_slave_sql_with_master.inc +LOCK TABLE test.t1 WRITE; +LOCK TABLE test2.t2 WRITE; +USE test; +INSERT INTO t1 VALUES (1); +USE test2; +INSERT INTO t2 VALUES (1); +include/sync_slave_io_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 5 and upper_bound] +UNLOCK TABLES; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 2 and upper_bound] +UNLOCK TABLES; +include/sync_slave_sql_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be 0] +LOCK TABLE test2.t2 WRITE; +USE test; +INSERT INTO t1 VALUES (1); +USE test2; +INSERT INTO t2 VALUES (1); +include/sync_slave_io_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 2 and upper_bound] +UNLOCK TABLES; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be 0] +CREATE DATABASE test3; +USE test3; +CREATE TABLE t3 (f1 INT); +include/sync_slave_sql_with_master.inc +LOCK TABLE test.t1 WRITE; +LOCK TABLE test3.t3 WRITE; +USE test; +INSERT INTO t1 VALUES (1); +use test2; +INSERT INTO t2 VALUES (1); +USE test3; +INSERT INTO t3 VALUES (1); +include/sync_slave_io_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 7 and upper_bound] +UNLOCK TABLES; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 2 and upper_bound] +UNLOCK TABLES; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be 0] +DROP TABLE test.t1; +DROP DATABASE test2; +DROP DATABASE test3; +include/sync_slave_sql_with_master.inc +SET @@global.slave_parallel_workers= @save.slave_parallel_workers; +SET @@global.debug= "-d,dbug.mts.force_clock_diff_eq_0"; +SET @@global.slave_transaction_retries= @save_slave_transaction_retries; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/bug84415_logical_clock.result b/mysql-test/suite/rpl/r/bug84415_logical_clock.result new file mode 100644 index 00000000000..3b3ac350fcf --- /dev/null +++ b/mysql-test/suite/rpl/r/bug84415_logical_clock.result @@ -0,0 +1,122 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +# +# Bug #84415: slave don't report Seconds_Behind_Master when +# running slave_parallel_workers > 0 (test for LOGICAL_CLOCK) +# +SET @@global.debug= "+d,dbug.mts.force_clock_diff_eq_0"; +SET @save_slave_parallel_type = @@GLOBAL.slave_parallel_type; +SET @save_slave_parallel_workers = @@GLOBAL.slave_parallel_workers; +SET @save_slave_transaction_retries = @@GLOBAL.slave_transaction_retries; +SET GLOBAL slave_parallel_type = LOGICAL_CLOCK; +SET GLOBAL slave_parallel_workers = 3; +SET GLOBAL slave_transaction_retries = 0; +include/start_slave.inc +CREATE TABLE t1 (a int); +CREATE TABLE t2 (a int); +INSERT INTO t1 SET a=1; +include/sync_slave_sql_with_master.inc +LOCK TABLE test.t1 WRITE, test.t2 WRITE; +BEGIN; +INSERT INTO t1 SET a=2; +BEGIN; +INSERT INTO t2 SET a=4; +COMMIT; +COMMIT; +include/sync_slave_io_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 3 and upper_bound] +UNLOCK TABLES; +include/sync_slave_sql_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be 0] +INSERT INTO t1 SET a=1; +include/sync_slave_sql_with_master.inc +LOCK TABLE test.t1 WRITE; +LOCK TABLE test.t2 WRITE; +BEGIN; +INSERT INTO t1 SET a=2; +BEGIN; +INSERT INTO t2 SET a=3; +COMMIT; +COMMIT; +include/sync_slave_io_with_master.inc +UNLOCK TABLES; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 2 and upper_bound] +UNLOCK TABLES; +include/sync_slave_sql_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be 0] +CREATE DATABASE second_test; +CREATE TABLE second_test.t3 (f1 INT); +INSERT INTO test.t1 SET a=1; +LOCK TABLE test.t1 WRITE; +LOCK TABLE test.t2 WRITE; +LOCK TABLE second_test.t3 WRITE; +BEGIN; +INSERT INTO second_test.t3 VALUES (1); +BEGIN; +INSERT INTO test.t1 SET a=2; +BEGIN; +INSERT INTO test.t2 SET a=3; +COMMIT; +COMMIT; +COMMIT; +include/sync_slave_io_with_master.inc +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 1 and upper_bound] +UNLOCK TABLES; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 2 and upper_bound] +UNLOCK TABLES; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be between 3 and upper_bound] +UNLOCK TABLE; +SET @@global.debug= "+d,mts_checkpoint"; +SET DEBUG_SYNC="RESET"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_start"; +SET DEBUG_SYNC="now WAIT_FOR mts_checkpoint_end"; +SET @@global.debug= "-d,mts_checkpoint"; +include/assert.inc [Seconds_Behind_Master must be 0] +DROP TABLE test.t1; +DROP TABLE test.t2; +DROP DATABASE second_test; +include/stop_slave.inc +SET @@global.debug= "-d,dbug.mts.force_clock_diff_eq_0"; +SET @@GLOBAL.slave_parallel_type= @save_slave_parallel_type; +SET @@GLOBAL.slave_parallel_workers= @save_slave_parallel_workers; +SET @@GLOBAL.slave_transaction_retries= @save_slave_transaction_retries; +include/start_slave.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/bug84415.test b/mysql-test/suite/rpl/t/bug84415.test new file mode 100644 index 00000000000..752e1a0a3c9 --- /dev/null +++ b/mysql-test/suite/rpl/t/bug84415.test @@ -0,0 +1,291 @@ +--source include/have_debug.inc +--source include/have_debug_sync.inc +--let $rpl_skip_start_slave= 1 +--source include/master-slave.inc + +--echo # +--echo # Bug #84415: slave don't report Seconds_Behind_Master when +--echo # running slave_parallel_workers > 0 +--echo # + +# restart slave in MTS mode +connection slave; +SET @@global.debug= "+d,dbug.mts.force_clock_diff_eq_0"; +SET @save.slave_parallel_workers= @@global.slave_parallel_workers; +SET @save_slave_transaction_retries= @@global.slave_transaction_retries; +SET @@global.slave_transaction_retries= 0; +SET @@global.slave_parallel_workers= 4; +--source include/start_slave.inc + +# 1st scenario. slave executes insert to t1 and is blocked due to lock on t1, after 3s another worker +# on slave starts to execute insert to t2, this worker is also blocked on insert to t2. After 2 more +# seconds we check Seconds_Behind_Master. Since worker is still executing insert to t1 this means +# that slave is at least 5s behind master (might be more due to slow network connection). + +connect (slave2,127.0.0.1,root,,test,$SLAVE_MYPORT,); +connection master; + +CREATE TABLE t1 (f1 INT); +CREATE DATABASE test2; +USE test2; +CREATE TABLE t2 (f1 INT); +--source include/sync_slave_sql_with_master.inc + +connection slave1; +LOCK TABLE test.t1 WRITE; + +connection slave2; +LOCK TABLE test2.t2 WRITE; + +connection master; + +USE test; +let $start= `SELECT UNIX_TIMESTAMP()`; +INSERT INTO t1 VALUES (1); + +--real_sleep 3 + +USE test2; +let $start2= `SELECT UNIX_TIMESTAMP()`; +INSERT INTO t2 VALUES (1); +--real_sleep 2 + +# +# all events are in relay-log +# +source include/sync_slave_io_with_master.inc; + +# Check that two workers are waiting +let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# $upper_bound = "timestamp on slave after reading Seconds_Behind_Master" - +# "timestamp on master before the insert statement". + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start`; +let $assert_text= Seconds_Behind_Master must be between 5 and upper_bound; +let $assert_cond= 5 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +connection slave1; +UNLOCK TABLES; + +# Now the first worker which was 5s behind the master has completed its task. However the second +# worker is still running its task. This means that Seconds_Behind_Master should now be 2s +# or slightly more. + +# Check that one worker is still running (waiting) +let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start2`; +let $assert_text= Seconds_Behind_Master must be between 2 and upper_bound; +let $assert_cond= 2 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +connection slave2; +UNLOCK TABLES; + +--connection master +--source include/sync_slave_sql_with_master.inc + +# Now both workers have completed their tasks and no new tasks have arrived, thus +# Seconds_Behind_Master should be equal to 0. + +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +source include/wait_for_mts_checkpoint.inc; +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master must be 0; +let $assert_cond= $sbm = 0; +source include/assert.inc; + +# 2nd scenario. In this scenario we check if Seconds_Behind_Master is updated correctly when first +# worker compeletes its tasks first. + +connection slave2; +LOCK TABLE test2.t2 WRITE; + +connection master; + +USE test; +let $start= `SELECT UNIX_TIMESTAMP()`; +INSERT INTO t1 VALUES (1); + +--real_sleep 3 + +USE test2; +let $start2= `SELECT UNIX_TIMESTAMP()`; +INSERT INTO t2 VALUES (1); +--real_sleep 2 + +# +# all events are in relay-log +# +source include/sync_slave_io_with_master.inc; + +# Check that one worker is still running +let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# First worker has completed its task, second one is still running. +# We should be 2 seconds behind the master. + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start2`; +let $assert_text= Seconds_Behind_Master must be between 2 and upper_bound; +let $assert_cond= 2 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +connection slave2; +UNLOCK TABLES; + +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# Now both workers have completed their tasks and no new tasks have arrived, thus +# Seconds_Behind_Master should be equal to 0. + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master must be 0; +let $assert_cond= $sbm = 0; +source include/assert.inc; + +# 3rd scenario. Three workers are running. In this scenario we check if correct Seconds_Behind_Master +# is calculated when second worker completes first. + +--connection master + +CREATE DATABASE test3; +USE test3; +CREATE TABLE t3 (f1 INT); +--source include/sync_slave_sql_with_master.inc + +connect (slave3,127.0.0.1,root,,test,$SLAVE_MYPORT,); + +connection slave1; +LOCK TABLE test.t1 WRITE; + +connection slave3; +LOCK TABLE test3.t3 WRITE; + +connection master; + +let $start= `SELECT UNIX_TIMESTAMP()`; + +USE test; +INSERT INTO t1 VALUES (1); +--real_sleep 3 + +use test2; +INSERT INTO t2 VALUES (1); +--real_sleep 2 + +USE test3; +let $start3= `SELECT UNIX_TIMESTAMP()`; +INSERT INTO t3 VALUES (1); +--real_sleep 2 + +source include/sync_slave_io_with_master.inc; + +# Only second worker has completed. This means that longest running worker is the first one, thus +# slave should be 7s behind the master. + +# Check that two workers are still running +let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start`; +let $assert_text= Seconds_Behind_Master must be between 7 and upper_bound; +let $assert_cond= 7 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +connection slave1; +UNLOCK TABLES; + +# Check that last worker is still running +let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start3`; +let $assert_text= Seconds_Behind_Master must be between 2 and upper_bound; +let $assert_cond= 2 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +connection slave3; +UNLOCK TABLES; + +# Now all three workers have completed. Seconds_Behind_Master should be 0. + +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master must be 0; +let $assert_cond= $sbm = 0; +source include/assert.inc; + +## +# cleanup +## +connection master; +DROP TABLE test.t1; +DROP DATABASE test2; +DROP DATABASE test3; + +--source include/sync_slave_sql_with_master.inc + +SET @@global.slave_parallel_workers= @save.slave_parallel_workers; +SET @@global.debug= "-d,dbug.mts.force_clock_diff_eq_0"; +SET @@global.slave_transaction_retries= @save_slave_transaction_retries; + +--source include/rpl_end.inc + diff --git a/mysql-test/suite/rpl/t/bug84415_logical_clock.test b/mysql-test/suite/rpl/t/bug84415_logical_clock.test new file mode 100644 index 00000000000..8a31fe3e083 --- /dev/null +++ b/mysql-test/suite/rpl/t/bug84415_logical_clock.test @@ -0,0 +1,358 @@ +# Must be statement as we rely on PROCESSLIST's Info column +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_statement.inc +--let $rpl_skip_start_slave= 1 +--source include/master-slave.inc + +--echo # +--echo # Bug #84415: slave don't report Seconds_Behind_Master when +--echo # running slave_parallel_workers > 0 (test for LOGICAL_CLOCK) +--echo # + +--connection slave +SET @@global.debug= "+d,dbug.mts.force_clock_diff_eq_0"; + +SET @save_slave_parallel_type = @@GLOBAL.slave_parallel_type; +SET @save_slave_parallel_workers = @@GLOBAL.slave_parallel_workers; +SET @save_slave_transaction_retries = @@GLOBAL.slave_transaction_retries; + +SET GLOBAL slave_parallel_type = LOGICAL_CLOCK; +SET GLOBAL slave_parallel_workers = 3; +SET GLOBAL slave_transaction_retries = 0; + +--source include/start_slave.inc + +# 1st scenario. On slave - two statements are applied in parallel on one database. +# Both workers are blocked. + +connect (master2,localhost,root,,); + +connection slave; +connect (slave2,127.0.0.1,root,,test,$SLAVE_MYPORT,); + +connection master; + +CREATE TABLE t1 (a int); +CREATE TABLE t2 (a int); + +# We need one statement to be commited so the parallel +# statements have a common parent commit +INSERT INTO t1 SET a=1; +--source include/sync_slave_sql_with_master.inc + +connection slave1; +LOCK TABLE test.t1 WRITE, test.t2 WRITE; + +--connection master1 +let $start= `SELECT UNIX_TIMESTAMP()`; +BEGIN; +INSERT INTO t1 SET a=2; + +--connection master2 +BEGIN; +INSERT INTO t2 SET a=4; + +--connection master1 +COMMIT; +--connection master2 +COMMIT; + +connection master; +source include/sync_slave_io_with_master.inc; + +# Wait until all workers are blocked by locks. It implies that all +# transactions are registered into the order commit queue. +let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# Sleep on slave so we were at least 3 seconds behind the master +--real_sleep 3 +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start`; +let $assert_text= Seconds_Behind_Master must be between 3 and upper_bound; +let $assert_cond= 3 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +# Unlock tables and allow both workers to complete +connection slave1; +UNLOCK TABLES; + +connection master; +--source include/sync_slave_sql_with_master.inc + +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +source include/wait_for_mts_checkpoint.inc; + +# Now both workers have completed. Thus Seconds_Behind_Master shoud be equal to 0. +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master must be 0; +let $assert_cond= $sbm = 0; +source include/assert.inc; + +# 2nd scenario. Now only the first worker is blocked. + +# Make sure there is a parent commit +connection master; +INSERT INTO t1 SET a=1; +--source include/sync_slave_sql_with_master.inc + +connection slave1; +LOCK TABLE test.t1 WRITE; + +connection slave2; +LOCK TABLE test.t2 WRITE; + +--connection master1 +let $start= `SELECT UNIX_TIMESTAMP()`; + +BEGIN; +INSERT INTO t1 SET a=2; + +--connection master2 +BEGIN; +INSERT INTO t2 SET a=3; + +--connection master1 +COMMIT; +--connection master2 +COMMIT; + +connection master; + +source include/sync_slave_io_with_master.inc; + +# Wait until both workers are queued and blocked. +let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# Now release one worker +connection slave1; +UNLOCK TABLES; + +# Wait until released worker completes its taks +let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# Sleep for 2 seconds so Seconds_Behind_Master was at least 2 +--real_sleep 2 + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start`; +let $assert_text= Seconds_Behind_Master must be between 2 and upper_bound; +let $assert_cond= 2 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +connection slave2; +UNLOCK TABLES; + +--connection master +--source include/sync_slave_sql_with_master.inc + +# All workers should be done by now. +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master must be 0; +let $assert_cond= $sbm = 0; +source include/assert.inc; + +# 3rd scenario. Now there are 3 workers working in pararell. Two working on table +# from the same db and a 3rd one running on separate database. + +connection master; +CREATE DATABASE second_test; +CREATE TABLE second_test.t3 (f1 INT); + +# Make sure there is a parent commit +INSERT INTO test.t1 SET a=1; +--sync_slave_with_master + +connect (slave3,127.0.0.1,root,,test,$SLAVE_MYPORT,); + +# Lock all tables to block all workers. +connection slave1; +LOCK TABLE test.t1 WRITE; + +connection slave2; +LOCK TABLE test.t2 WRITE; + +connection slave3; +LOCK TABLE second_test.t3 WRITE; + +connect (master_second_test,localhost,root,,); + +--connection master_second_test +BEGIN; +INSERT INTO second_test.t3 VALUES (1); + +--connection master1 +let $start= `SELECT UNIX_TIMESTAMP()`; +BEGIN; +INSERT INTO test.t1 SET a=2; + +--connection master2 +BEGIN; +INSERT INTO test.t2 SET a=3; + +--connection master_second_test +COMMIT; +--connection master1 +COMMIT; +--connection master2 +COMMIT; + +connection master; +source include/sync_slave_io_with_master.inc; + + +# Wait for all workers to be queued +# There should be two workers watiting for locks on test database + +let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock' + AND Info LIKE 'INSERT INTO test%'; +--source include/wait_condition.inc + +# And one worker waiting for the lock on second_database +let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock' + AND Info LIKE 'INSERT INTO second_test%'; +--source include/wait_condition.inc + +# Make sure that there are 3 overall at the same point in time +let $wait_condition= SELECT count(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# Wait for a second so Second_Behind_Master was at least 1 +--real_sleep 1 +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start`; +let $assert_text= Seconds_Behind_Master must be between 1 and upper_bound; +let $assert_cond= 1 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +# Lets finish concurrent write on second_test and check if workers on test report +# correct Seconds_Behind_Master +connection slave3; +UNLOCK TABLES; + +--real_sleep 1 + +connection slave; +# Wait till worker on second_test db finishes +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock' + AND Info LIKE 'INSERT INTO second_test%'; +--source include/wait_condition.inc + +# There should be the remaining two workers hanging on test db +let $wait_condition= SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock' + AND Info LIKE 'INSERT INTO test%'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start`; +let $assert_text= Seconds_Behind_Master must be between 2 and upper_bound; +let $assert_cond= 2 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +# Lets finish worker writing to t1 +connection slave1; +UNLOCK TABLES; + +# There should be only one worker left +let $wait_condition= SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +--real_sleep 1 + +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $stop= `SELECT UNIX_TIMESTAMP()`; +let $upper_bound= `SELECT $stop - $start`; +let $assert_text= Seconds_Behind_Master must be between 3 and upper_bound; +let $assert_cond= 3 <= $sbm AND $sbm <= $upper_bound; +source include/assert.inc; + +# Release last worker +connection slave2; +UNLOCK TABLE; + +# Wait for all workers to be done +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Waiting for table metadata lock'; +--source include/wait_condition.inc +let $wait_condition= SELECT count(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State = 'Executing event' + OR State = 'update'; +--source include/wait_condition.inc + +# All catched up Seconds_Behind_Master should be 0. Lets wait some more to see if it does not increase +# Seconds_Behind_Master + +--real_sleep 1 +source include/wait_for_mts_checkpoint.inc; + +let $sbm= query_get_value("SHOW SLAVE STATUS", Seconds_Behind_Master, 1); +let $assert_text= Seconds_Behind_Master must be 0; +let $assert_cond= $sbm = 0; +source include/assert.inc; + +## +# cleanup +## +connection master; +DROP TABLE test.t1; +DROP TABLE test.t2; +DROP DATABASE second_test; + +connection slave; +#DROP USER test; + +--source include/stop_slave.inc +SET @@global.debug= "-d,dbug.mts.force_clock_diff_eq_0"; +SET @@GLOBAL.slave_parallel_type= @save_slave_parallel_type; +SET @@GLOBAL.slave_parallel_workers= @save_slave_parallel_workers; +SET @@GLOBAL.slave_transaction_retries= @save_slave_transaction_retries; +--source include/start_slave.inc + +--source include/rpl_end.inc + diff --git a/sql/log_event.cc b/sql/log_event.cc index f4e67236794..0f0ab03dc4b 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -5966,10 +5966,10 @@ int Rotate_log_event::do_update_pos(Relay_log_info *rli) if (rli->is_parallel_exec()) { bool real_event= server_id && !is_artificial_event(); + const auto ts= common_header->when.tv_sec + + static_cast(exec_time); rli->reset_notified_checkpoint(0, - real_event ? - common_header->when.tv_sec + - (time_t) exec_time : 0, + real_event ? &ts : nullptr, true/*need_data_lock=true*/); } diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 9d12ad7f42b..5460ed4311d 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -329,11 +329,12 @@ void Relay_log_info::reset_notified_relay_log_change() @param shift number of bits to shift by Worker due to the current checkpoint change. @param new_ts new seconds_behind_master timestamp value - unless zero. Zero could be due to FD event + unless nullptr. nullptr could be due to FD event or fake rotate event. @param need_data_lock False if caller has locked @c data_lock */ -void Relay_log_info::reset_notified_checkpoint(ulong shift, time_t new_ts, +void Relay_log_info::reset_notified_checkpoint(ulong shift, + const time_t *const new_ts, bool need_data_lock) { /* @@ -386,7 +387,7 @@ void Relay_log_info::reset_notified_checkpoint(ulong shift, time_t new_ts, mysql_mutex_lock(&data_lock); else mysql_mutex_assert_owner(&data_lock); - last_master_timestamp= new_ts; + last_master_timestamp= *new_ts; if (need_data_lock) mysql_mutex_unlock(&data_lock); } diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 429f843cf81..f678c520c4e 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -961,7 +961,8 @@ public: Coordinator notifies Workers about this event. Coordinator and Workers maintain a bitmap of executed group that is reset with a new checkpoint. */ - void reset_notified_checkpoint(ulong, time_t, bool); + void reset_notified_checkpoint(ulong, const time_t *const new_ts, + bool); /** Called when gaps execution is ended so it is crash-safe diff --git a/sql/rpl_rli_pdb.h b/sql/rpl_rli_pdb.h index 4fc88439067..dc1ffd97b01 100644 --- a/sql/rpl_rli_pdb.h +++ b/sql/rpl_rli_pdb.h @@ -180,6 +180,7 @@ typedef struct st_slave_job_group checkpoint_relay_log_pos= 0; checkpoint_seqno= (uint) -1; done= 0; + ts= 0; #ifndef DBUG_OFF notified= false; #endif @@ -244,7 +245,12 @@ public: /** return the value of @c data member of the head of the queue. */ - Element_type* head_queue(); + Element_type* head_queue() + { + if (empty()) + return nullptr; + return &m_Q[entry]; + } bool gt(ulong i, ulong k); // comparision of ordering of two entities /* index is within the valid range */ diff --git a/sql/rpl_slave.cc b/sql/rpl_slave.cc index f1c514776ea..6d154c9ea13 100644 --- a/sql/rpl_slave.cc +++ b/sql/rpl_slave.cc @@ -2829,6 +2829,8 @@ static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi) mysql_mutex_lock(&mi->data_lock); mi->clock_diff_with_master= (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10)); + DBUG_EXECUTE_IF("dbug.mts.force_clock_diff_eq_0", + mi->clock_diff_with_master= 0;); mysql_mutex_unlock(&mi->data_lock); } else if (check_io_slave_killed(mi->info_thd, mi, NULL)) @@ -5284,9 +5286,14 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli) If it is an artificial event, or a relay log event (IO thread generated event) or ev->when is set to 0, or a FD from master, or a heartbeat event with server_id '0' then we don't update the last_master_timestamp. + + In case of parallel execution last_master_timestamp is only updated when + an event is taken out of GAQ. Thus when last_master_timestamp is 0 we need + to initialize it with a timestamp from the first event to be executed in + parallel. */ - if (!(rli->is_parallel_exec() || - ev->is_artificial_event() || ev->is_relay_log_event() || + if ((!rli->is_parallel_exec() || rli->last_master_timestamp == 0) && + !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->common_header->when.tv_sec == 0) || ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT || ev->server_id == 0)) @@ -6787,6 +6794,7 @@ bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period, ulong cnt; bool error= FALSE; struct timespec curr_clock; + time_t ts=0; DBUG_ENTER("checkpoint_routine"); @@ -6796,6 +6804,13 @@ bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period, if (!rli->gaq->count_done(rli)) DBUG_RETURN(FALSE); } + DBUG_EXECUTE_IF("mts_checkpoint", + { + const char act[]= + "now signal mts_checkpoint_start"; + DBUG_ASSERT(!debug_sync_set_action(rli->info_thd, + STRING_WITH_LEN(act))); + };); #endif /* @@ -6918,8 +6933,10 @@ bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period, cnt is zero. This value means that the checkpoint information will be completely reset. */ - rli->reset_notified_checkpoint(cnt, rli->gaq->lwm.ts, need_data_lock); - + ts= rli->gaq->empty() + ? 0 + : reinterpret_cast(rli->gaq->head_queue())->ts; + rli->reset_notified_checkpoint(cnt, &ts, need_data_lock); /* end-of "Coordinator::"commit_positions" */ end: @@ -6927,6 +6944,13 @@ end: #ifndef DBUG_OFF if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0)) DBUG_SUICIDE(); + DBUG_EXECUTE_IF("mts_checkpoint", + { + const char act[]= + "now signal mts_checkpoint_end"; + DBUG_ASSERT(!debug_sync_set_action(rli->info_thd, + STRING_WITH_LEN(act))); + };); #endif set_timespec_nsec(&rli->last_clock, 0); @@ -9325,9 +9349,6 @@ static Log_event* next_event(Relay_log_info* rli) */ (void) mts_checkpoint_routine(rli, period, false, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR mysql_mutex_lock(end_pos_lock); - // More to the empty relay-log all assigned events done so reset it. - if (rli->gaq->empty()) - rli->last_master_timestamp= 0; if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0)) period= 10000000ULL;