Description:
A LOGICAL_CLOCK MTS slave sometimes hangs with the following symptoms:
mysql> select * from information_schema.innodb_trx;
+-----------+-----------+---------------------+-----------------------+
| trx_id | trx_state | trx_started | trx_requested_lock_id |
+-----------+-----------+---------------------+-----------------------+
| 147000245 | LOCK WAIT | 2016-08-01 19:19:36 | 147000245:257:4:5 |
| 147000244 | RUNNING | 2016-08-01 19:19:36 | NULL |
| 147000243 | LOCK WAIT | 2016-08-01 19:19:36 | 147000243:257:4:5 |
+-----------+-----------+---------------------+-----------------------+
3 rows in set (0.00 sec)
mysql> select * from information_schema.innodb_lock_waits;
+-------------------+-------------------+-----------------+-------------------+
| requesting_trx_id | requested_lock_id | blocking_trx_id | blocking_lock_id |
+-------------------+-------------------+-----------------+-------------------+
| 147000245 | 147000245:257:4:5 | 147000244 | 147000244:257:4:5 |
| 147000243 | 147000243:257:4:5 | 147000245 | 147000245:257:4:5 |
| 147000243 | 147000243:257:4:5 | 147000245 | 147000245:257:4:5 |
+-------------------+-------------------+-----------------+-------------------+
3 rows in set, 1 warning (0.00 sec)
Related call stacks would be:
ntdll!ZwWaitForKeyedEvent+0xa
ntdll!RtlSleepConditionVariableCS+0xd4
kernel32!SleepConditionVariableCS+0x28
mysqld!Commit_order_manager::wait_for_its_turn+0x157 [.\sql\rpl_slave_commit_order_manager.cc @ 82]
mysqld!MYSQL_BIN_LOG::ordered_commit+0xde [.\sql\binlog.cc @ 8892]
mysqld!MYSQL_BIN_LOG::commit+0x520 [.\sql\binlog.cc @ 8199]
mysqld!ha_commit_trans+0x1c7 [.\sql\handler.cc @ 1785]
mysqld!trans_commit+0x3b [.\sql\transaction.cc @ 239]
mysqld!Xid_log_event::do_commit+0x15 [.\sql\log_event.cc @ 6826]
mysqld!Xid_apply_log_event::do_apply_event_worker+0xdc [.\sql\log_event.cc @ 6902]
mysqld!slave_worker_exec_job_group+0x136 [.\sql\rpl_rli_pdb.cc @ 2607]
mysqld!handle_slave_worker+0x2db [.\sql\rpl_slave.cc @ 6103]
mysqld!pfs_spawn_thread+0x132 [.\storage\perfschema\pfs.cc @ 2191]
mysqld!win_thread_start+0x1b [.\mysys\my_thread.c @ 38]
mysqld!_callthreadstartex+0x17 [f:\dd\vctools\crt\crtw32\startup\threadex.c @ 376]
mysqld!_threadstartex+0x102 [f:\dd\vctools\crt\crtw32\startup\threadex.c @ 354]
---------
ntdll!ZwWaitForKeyedEvent+0xa
ntdll!RtlSleepConditionVariableCS+0xd4
kernel32!SleepConditionVariableCS+0x28
mysqld!inline_mysql_cond_wait+0x4a [.\include\mysql\psi\mysql_thread.h @ 1184]
mysqld!Mts_submode_logical_clock::wait_for_last_committed_trx+0x158 [.\sql\rpl_mts_submode.cc @ 490]
mysqld!Mts_submode_logical_clock::schedule_next_event+0x182 [.\sql\rpl_mts_submode.cc @ 657]
mysqld!schedule_next_event+0x2e [.\sql\log_event.cc @ 2724]
mysqld!Log_event::get_slave_worker+0x77d [.\sql\log_event.cc @ 2860]
mysqld!Log_event::apply_event+0xf8 [.\sql\log_event.cc @ 3345]
mysqld!apply_event_and_update_pos+0x150 [.\sql\rpl_slave.cc @ 4702]
mysqld!exec_relay_log_event+0x1f7 [.\sql\rpl_slave.cc @ 5223]
mysqld!handle_slave_sql+0x899 [.\sql\rpl_slave.cc @ 7306]
mysqld!pfs_spawn_thread+0x132 [.\storage\perfschema\pfs.cc @ 2191]
mysqld!win_thread_start+0x1b [.\mysys\my_thread.c @ 38]
mysqld!_callthreadstartex+0x17 [f:\dd\vctools\crt\crtw32\startup\threadex.c @ 376]
mysqld!_threadstartex+0x102 [f:\dd\vctools\crt\crtw32\startup\threadex.c @ 354]
kernel32!BaseThreadInitThunk+0xd
How to repeat:
Setup master/slave gtid/rbr replication. important slave configs:
[mysqld]
server-id=2
log_slave_updates=1
log_bin
sync_binlog=0
slave_parallel_workers=4
slave_parallel_type=LOGICAL_CLOCK
slave_preserve_commit_order=1
enforce-gtid-consistency=1
gtid-mode=on
relay_log_info_repository=TABLE
master_info_repository=TABLE
Run a generic DML workload in many threads on the master. Observe slave.
E.g.
set sql_mode="";
drop table if exists a,b,c,d,e;
create table a(a tinyint, b tinyint, c tinyint,
key(a),key(b),key(c),key(a,b,c),
unique key(b,c),key(c,a,b))engine=innodb;
create table b like a;
create table c like a;
create table d like a;
create table e like a;
drop procedure if exists p;
delimiter $
create procedure p()
begin
declare continue handler for sqlexception begin end;
repeat
if rand()>0.2 then start transaction; end if;
if rand()>0.1 then set @a:=floor(rand()*10); end if;
if rand()>0.1 then set @b:=floor(rand()*10); end if;
if rand()>0.1 then set @c:=floor(rand()*10); end if;
if rand()>0.5 then replace into a(a,b,c) values(@a,@b,@c); end if;
if rand()>0.5 then replace into b(a,b,c) values(@a,@b,@c); end if;
if rand()>0.5 then replace into c(a,b,c) values(@a,@b,@c); end if;
if rand()>0.5 then replace into d(a,b,c) values(@a,@b,@c); end if;
if rand()>0.5 then replace into e(a,b,c) values(@a,@b,@c); end if;
if rand()>0.5 then update a set b=@a where c=@a; end if;
if rand()>0.5 then update b set c=@b where a=@b; end if;
if rand()>0.5 then update c set d=@c where b=@c; end if;
if rand()>0.5 then update d set e=@d where c=@d; end if;
if rand()>0.5 then update e set a=@e where d=@e; end if;
if rand()>0.5 then delete from a where a=@a; end if;
if rand()>0.5 then delete from b where b=@b; end if;
if rand()>0.5 then delete from c where c=@c; end if;
if rand()>0.5 then delete from d where a=@d; end if;
if rand()>0.5 then delete from e where b=@c; end if;
if rand()>0.2 then commit; end if;
until rand()>0.9 end repeat;
end $
delimiter ;
call p(); -- run in 5 threads