diff --git a/mysql-test/suite/rpl/r/rpl_stop_slave_partial_trx.result b/mysql-test/suite/rpl/r/rpl_stop_slave_partial_trx.result new file mode 100644 index 00000000000..1568e36de17 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_stop_slave_partial_trx.result @@ -0,0 +1,81 @@ +include/master-slave.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +[connection master] +call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state"); +create database d1; +create database d2; +create database d3; +create table d1.t1 (a int) engine=innodb; +insert into d1.t1 values(1); +create table d2.t2 (a int) engine=myisam; +create table d3.t3 (a int) engine=innodb; +include/sync_slave_sql_with_master.inc +[connection slave] +lock tables d1.t1 read; +set global debug= '+d,after_executed_write_rows_event'; +[connection master] +update d1.t1 set a= 5; +update d1.t1 set a= 4; +update d1.t1 set a= 3; +update d1.t1 set a= 2; +update d1.t1 set a= 1; +set global debug= '+d,dump_thread_wait_after_send_write_rows'; +insert into d2.t2 values(1); +[connection slave] +unlock tables; +set debug_sync= 'now WAIT_FOR executed'; +set global debug= '-d,after_executed_write_rows_event'; +set @start=now(); +include/stop_slave.inc +include/assert.inc [STOP SLAVE should take at least 20 seconds] +include/start_slave.inc +[connection master] +set debug_sync= 'now SIGNAL signal.continue'; +set debug_sync= 'RESET'; +[connection master] +set @@global.debug= '-d,dump_thread_wait_after_send_write_rows'; +include/sync_slave_sql_with_master.inc +include/rpl_diff.inc +include/rpl_diff.inc +include/rpl_diff.inc +[connection master] +delete from d1.t1; +insert into d1.t1 values(1); +delete from d2.t2; +delete from d3.t3; +include/sync_slave_sql_with_master.inc +[connection slave] +lock tables d1.t1 read; +set global debug= '+d,after_executed_write_rows_event'; +[connection master] +update d1.t1 set a= 5; +update d1.t1 set a= 4; +update d1.t1 set a= 3; +update d1.t1 set a= 2; +update d1.t1 set a= 1; +set global debug= '+d,dump_thread_wait_after_send_write_rows'; +insert into d3.t3 values(1); +[connection slave] +unlock tables; +set debug_sync= 'now WAIT_FOR executed'; +set global debug= '-d,after_executed_write_rows_event'; +set @start=now(); +include/stop_slave.inc +include/assert.inc [STOP SLAVE should take less than 20 seconds] +include/start_slave.inc +[connection master] +set debug_sync= 'now SIGNAL signal.continue'; +set debug_sync= 'RESET'; +[connection master] +set global debug= '-d,dump_thread_wait_after_send_write_rows'; +include/sync_slave_sql_with_master.inc +include/rpl_diff.inc +include/rpl_diff.inc +include/rpl_diff.inc +[connection master] +drop database d1; +drop database d2; +drop database d3; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx-master.opt b/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx-master.opt new file mode 100644 index 00000000000..f7ff7101f4d --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx-master.opt @@ -0,0 +1,3 @@ +--slave_parallel_workers=8 --enforce-gtid-consistency --gtid-mode=ON --log-bin +--log-slave-updates +--debug_sync_timeout=20 diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx-slave.opt b/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx-slave.opt new file mode 100644 index 00000000000..877458543c8 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx-slave.opt @@ -0,0 +1,2 @@ +--slave_parallel_workers=8 --enforce-gtid-consistency --gtid-mode=ON --log-bin +--log-slave-updates diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx.test b/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx.test new file mode 100644 index 00000000000..7ec5a95affb --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stop_slave_partial_trx.test @@ -0,0 +1,173 @@ +# Bug #96400 : MTS STOP SLAVE takes over a minute when master crashed during event logging +# https://bugs.mysql.com/bug.php?id=96400 +# +# PS-5824: MTS STOP SLAVE takes over a minute when master crashed during event logging +# https://jira.percona.com/browse/PS-5824 +# +# FB8-105: Stop slave immediately in MTS if partial trx in the relay log can be rollbacked +# https://jira.percona.com/browse/FB8-105 +# +# This MTR test is based on patches from Facebook at: +# https://github.com/facebook/mysql-5.6/commit/cd39ae1 +# https://github.com/facebook/mysql-5.6/commit/0623a07 +# with improvements from Percona at: +# https://github.com/facebook/mysql-5.6/commit/2f459f7 + +source include/have_myisam.inc; +source include/have_debug.inc; +source include/have_debug_sync.inc; +source include/have_binlog_format_row.inc; +source include/master-slave.inc; + +call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state"); + +# Create schema +create database d1; +create database d2; +create database d3; +create table d1.t1 (a int) engine=innodb; +insert into d1.t1 values(1); # add some data for update statements +create table d2.t2 (a int) engine=myisam; # non-transactional engine +create table d3.t3 (a int) engine=innodb; # transactional engine +--source include/sync_slave_sql_with_master.inc + + + +## Test 1: STOP SLAVE when there are pending jobs in the worker queues and there +## is a partial transaction on a non-transactional table. The slave should wait +## for 1 minute to complete the partial transaction before giving up. + +# Block all d1.t1 transactions on the slave +--source include/rpl_connection_slave.inc +lock tables d1.t1 read; +set global debug= '+d,after_executed_write_rows_event'; + +# Generate some load, all of these will be blocked in the slave worker queue +--source include/rpl_connection_master.inc +let $num_updates=5; +while ($num_updates) +{ + eval update d1.t1 set a= $num_updates; + dec $num_updates; +} + +# This will stop the dump thread before sending the entire group +set global debug= '+d,dump_thread_wait_after_send_write_rows'; +insert into d2.t2 values(1); + +# wait for the dump thread reach the sync point +--let $wait_condition= select count(*)=1 from information_schema.processlist where state LIKE '%debug sync point%' and command like 'Binlog Dump%' +--source include/wait_condition.inc + +--source include/rpl_connection_slave.inc +# unblock d1.t1 +unlock tables; + +# This should take at least 20 seconds because the trx on t2 is not completely downloaded and cannot be rolled back safely +let $debug_sync_timeout= 20; # should be equal to value of --debug_sync_timeout in rpl_stop_slave_partial_trx-master.opt file +set debug_sync= 'now WAIT_FOR executed'; +set global debug= '-d,after_executed_write_rows_event'; +set @start=now(); +--source include/stop_slave.inc +--let $assert_text= STOP SLAVE should take at least $debug_sync_timeout seconds +--let $assert_cond= [SELECT TIMESTAMPDIFF(SECOND, @start, NOW())] >= $debug_sync_timeout +--source include/assert.inc +--source include/start_slave.inc + +--source include/rpl_connection_master.inc +set debug_sync= 'now SIGNAL signal.continue'; +# wait for the dump thread to come out of the waiting phase before resetting the signals +--let $wait_condition= select count(*)=0 from information_schema.processlist where state LIKE '%debug sync point%' and command='Binlog Dump' +--source include/wait_condition.inc +set debug_sync= 'RESET'; + +--source include/rpl_connection_master.inc +set @@global.debug= '-d,dump_thread_wait_after_send_write_rows'; +--source include/sync_slave_sql_with_master.inc + +# Verification +--let $rpl_diff_statement= select * from d1.t1 +--source include/rpl_diff.inc +--let $rpl_diff_statement= select * from d2.t2 +--source include/rpl_diff.inc +--let $rpl_diff_statement= select * from d3.t3 +--source include/rpl_diff.inc + +# cleanup +--source include/rpl_connection_master.inc +delete from d1.t1; +insert into d1.t1 values(1); # add some data for update statements +delete from d2.t2; +delete from d3.t3; +--source include/sync_slave_sql_with_master.inc + + + +## Test 2: STOP SLAVE when there are pending jobs in the worker queues and there +## is a partial transaction on a transactional table. The slave should stop +## immidiately after completing all pending full transactions. + +# Block all d1.t1 transactions on the slave +--source include/rpl_connection_slave.inc +lock tables d1.t1 read; +set global debug= '+d,after_executed_write_rows_event'; + +# Generate some load, all of these will be blocked in the slave worker queue +--source include/rpl_connection_master.inc +let $num_updates=5; +while ($num_updates) +{ + eval update d1.t1 set a= $num_updates; + dec $num_updates; +} + +# This will stop the dump thread before sending the entire group +set global debug= '+d,dump_thread_wait_after_send_write_rows'; +insert into d3.t3 values(1); + +# wait for the dump thread reach the sync point +--let $wait_condition= select count(*)=1 from information_schema.processlist where state LIKE '%debug sync point%' and command like 'Binlog Dump%' +--source include/wait_condition.inc + +--source include/rpl_connection_slave.inc +# unblock d1.t1 +unlock tables; + +# Since the partial transaction is on a transactional table the slave should +# stop as soon as is completes all pending full transactions +set debug_sync= 'now WAIT_FOR executed'; +set global debug= '-d,after_executed_write_rows_event'; +set @start=now(); +--source include/stop_slave.inc +--let $assert_text= STOP SLAVE should take less than $debug_sync_timeout seconds +--let $assert_cond= [SELECT TIMESTAMPDIFF(SECOND, @start, NOW())] < $debug_sync_timeout +--source include/assert.inc +--source include/start_slave.inc + +--source include/rpl_connection_master.inc +set debug_sync= 'now SIGNAL signal.continue'; +# wait for the dump thread to come out of the waiting phase before resetting the signals +--let $wait_condition= select count(*)=0 from information_schema.processlist where state LIKE '%debug sync point%' and command='Binlog Dump' +--source include/wait_condition.inc +set debug_sync= 'RESET'; + +--source include/rpl_connection_master.inc +set global debug= '-d,dump_thread_wait_after_send_write_rows'; +--source include/sync_slave_sql_with_master.inc + +# Verification +--let $rpl_diff_statement= select * from d1.t1 +--source include/rpl_diff.inc +--let $rpl_diff_statement= select * from d2.t2 +--source include/rpl_diff.inc +--let $rpl_diff_statement= select * from d3.t3 +--source include/rpl_diff.inc + +# final cleanup +--source include/rpl_connection_master.inc +drop database d1; +drop database d2; +drop database d3; + + +source include/rpl_end.inc; diff --git a/sql/rpl_binlog_sender.cc b/sql/rpl_binlog_sender.cc index 3b469a159ff..d37508bf047 100644 --- a/sql/rpl_binlog_sender.cc +++ b/sql/rpl_binlog_sender.cc @@ -501,6 +501,17 @@ int Binlog_sender::send_events(IO_CACHE *log_cache, my_off_t end_pos) if (unlikely(send_packet())) DBUG_RETURN(1); + + DBUG_EXECUTE_IF("dump_thread_wait_after_send_write_rows", { + if (event_type == binary_log::WRITE_ROWS_EVENT) { + thd->get_protocol_classic()->flush_net(); + static const char act[] = + "now " + "wait_for signal.continue"; + DBUG_ASSERT(opt_debug_sync_timeout > 0); + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + } + }); } if (unlikely(after_send_hook(log_file, in_exclude_group ? log_pos : 0))) diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 68bd47869b4..fc4ea2c0f37 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -396,6 +396,38 @@ err: DBUG_RETURN(ret); } +bool Relay_log_info::mts_workers_queue_empty() const +{ + ulong ret= 0; + + for (Slave_worker * const *it= workers.begin(); ret == 0 && it != workers.end(); ++it) + { + Slave_worker *worker= *it; + mysql_mutex_lock(&worker->jobs_lock); + ret+= worker->curr_jobs; + mysql_mutex_unlock(&worker->jobs_lock); + } + return ret == 0; +} + +/* Checks if all in-flight stmts/trx can be safely rolled back */ +bool Relay_log_info::cannot_safely_rollback() const +{ + if (!is_parallel_exec()) + return info_thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::SESSION); + + bool ret= false; + + for (Slave_worker * const *it= workers.begin(); !ret && it != workers.end(); ++it) + { + Slave_worker *worker= *it; + mysql_mutex_lock(&worker->jobs_lock); + ret= worker->info_thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::SESSION); + mysql_mutex_unlock(&worker->jobs_lock); + } + return ret; +} + static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo) { MY_STAT s; diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index b3a0d606bd1..6b5ca853ec1 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -829,6 +829,9 @@ public: mts_group_status == MTS_IN_GROUP; } + bool mts_workers_queue_empty() const; + bool cannot_safely_rollback() const; + /** While a group is executed by a Worker the relay log can change. Coordinator notifies Workers about this event. Worker is supposed diff --git a/sql/rpl_rli_pdb.cc b/sql/rpl_rli_pdb.cc index 7f2766f5640..798f2adcf60 100644 --- a/sql/rpl_rli_pdb.cc +++ b/sql/rpl_rli_pdb.cc @@ -1857,6 +1857,15 @@ int Slave_worker::slave_worker_exec_event(Log_event *ev) set_master_log_pos(static_cast(ev->common_header->log_pos)); set_gaq_index(ev->mts_group_idx); ret= ev->do_apply_event_worker(this); + + DBUG_EXECUTE_IF("after_executed_write_rows_event", { + if (ev->get_type_code() == binary_log::WRITE_ROWS_EVENT) { + static const char act[]= "now signal executed"; + DBUG_ASSERT(opt_debug_sync_timeout > 0); + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + } + };); + DBUG_RETURN(ret); } diff --git a/sql/rpl_slave.cc b/sql/rpl_slave.cc index 985abc09e69..ffa608aa128 100644 --- a/sql/rpl_slave.cc +++ b/sql/rpl_slave.cc @@ -2204,8 +2204,14 @@ bool sql_slave_killed(THD* thd, Relay_log_info* rli) if (abort_loop || thd->killed || rli->abort_slave) { rli->sql_thread_kill_accepted= true; + /* NOTE: In MTS mode if all workers are done and if the partial trx + (if any) can be rolled back safely we can accept the kill */ + const bool can_rollback= rli->abort_slave && + (!rli->is_mts_in_group() || + (rli->mts_workers_queue_empty() && + !rli->cannot_safely_rollback())); is_parallel_warn= (rli->is_parallel_exec() && - (rli->is_mts_in_group() || thd->killed)); + (!can_rollback || thd->killed)); /* Slave can execute stop being in one of two MTS or Single-Threaded mode. The modes define different criteria to accept the stop. @@ -2278,7 +2284,6 @@ bool sql_slave_killed(THD* thd, Relay_log_info* rli) } if (rli->sql_thread_kill_accepted) { - rli->last_event_start_time= 0; if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) { rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP; @@ -2295,6 +2300,10 @@ bool sql_slave_killed(THD* thd, Relay_log_info* rli) } } } + + if (rli->sql_thread_kill_accepted) + rli->last_event_start_time= 0; + DBUG_RETURN(rli->sql_thread_kill_accepted); }