####################################################################################################### # # ====== Error ====== # - "Slave I/O: Unexpected master's heartbeat data: heartbeat is not compatible with local info;" # # ==== Description ==== # - Large transactions may cause binlog file larger than 4G. 4G is the max value of pos in a # heartbeat event. If a heartbeat is sent after the large transaction, pos will be truncated, # which will may cause inaccurate Exec_Master_Log_Pos or the error mentioned above. # # ==== Testcases ==== # This test verifies 2 scenarioes that may causes this error when large transaction happens. # (1)heartbeat period time out # (2)ignored gtid causes heartbeat # ####################################################################################################### --let $rpl_skip_start_slave= 1 --let $rpl_skip_reset_master_and_slave= 1 --let $rpl_skip_change_master= 1 --source include/master-slave.inc --source include/have_debug.inc --source include/have_debug_sync.inc --let $rpl_server_number= 1 --let $rpl_start_with_gtids= 1 --source include/rpl_restart_server.inc --let $rpl_server_number= 2 --let $rpl_start_with_gtids= 1 --source include/rpl_restart_server.inc CALL mtr.add_suppression("Unexpected master's heartbeat data"); CALL mtr.add_suppression("could not queue event from master"); ####################################################################################################### # Test1: Send truncated heartbeat when timeout # # 1.Set heartbeat period to a small value(10s) # 2.Start bigtrans(not really larger than 4G, use simulated 1M instead) # 3.Ignore send heartbeat when log pos is truncated # 4.Check status Last_IO_Error and Slave_IO_Running on slave # 5.Switch binary log and start bigtrans(not really larger than 4G, use simulated 1M instead) # 6.Send heartbeat when log pos is truncated # 7.Check status Last_IO_Error and Slave_IO_Running on slave # ####################################################################################################### --echo #Test1: Send truncated heartbeat when timeout # Set heartbeat period and start slave --connection slave --disable_query_log --eval change master to master_host='127.0.0.1',master_port=$MASTER_MYPORT, master_user='root', master_heartbeat_period=10,master_auto_position=1 --enable_query_log --source include/start_slave.inc # Check slave_io_running status on slave --let slave_io_running= query_get_value("show slave status", Slave_IO_Running, 1) --let $assert_text= slave_io_running should be Yes --let $assert_cond= "$slave_io_running" = "Yes" --source include/assert.inc # Check last_io_error status on slave --let last_io_error= query_get_value("show slave status", Last_IO_Error, 1) --let $assert_text= last_io_error should be empty --let $assert_cond= "$last_io_error" = "" --source include/assert.inc --connection master # Create procedure of big trans create table t1(id int, name CHAR(250)) engine=innodb; DELIMITER //; CREATE PROCEDURE bigtrans() BEGIN declare count int; declare str varchar(255); declare i int; set count = 1; set str ='adjfjjfdlksjflksdjflkdsjflsdjdfljdsfljdsueoruweytouewptiip[t[or[ewrwerweurwrjflkjsf;aks;gsfsfdsfsjr'; set autocommit = 0; set i = 0; start transaction; while i < 6400 do insert into t1 values(i, concat(str,i)); set i = i + 1; end while; commit; set autocommit = 1; END; // DELIMITER ;// # Ignore send heartbeat event when log pos is truncated set global debug='+d,ignore_truncated_heartbeat'; call bigtrans; set debug_sync='now wait_for heartbeat_ignored'; set global debug='-d,ignore_truncated_heartbeat'; --connection slave # Check slave_io_running status on slave --let slave_io_running= query_get_value("show slave status", Slave_IO_Running, 1) --let $assert_text= slave_io_running should be Yes --let $assert_cond= "$slave_io_running" = "Yes" --source include/assert.inc # Check last_io_error status on slave --let last_io_error= query_get_value("show slave status", Last_IO_Error, 1) --let $assert_text= last_io_error should be empty --let $assert_cond= "$last_io_error" = "" --source include/assert.inc # Switch binary log and start bigtrans --connection master flush binary logs; --source include/sync_slave_sql_with_master.inc # Send heartbeat event when log pos is truncated --connection master set global debug='+d,send_truncated_heartbeat'; call bigtrans; set debug_sync='now wait_for heartbeat_truncated'; set global debug='-d,send_truncated_heartbeat'; --connection slave # Check slave_io_running status on slave --let slave_io_running= query_get_value("show slave status", Slave_IO_Running, 1) --let $assert_text= slave_io_running should be No --let $assert_cond= "$slave_io_running" = "No" --source include/assert.inc # Check last_io_error status on slave --let last_io_error= query_get_value("show slave status", Last_IO_Error, 1) --let $assert_text= last_io_error should be could not queue event from master --let $assert_cond= "$last_io_error" = "Relay log write failure: could not queue event from master" --source include/assert.inc ####################################################################################################### # Test2: Send heartbeat event caused by ignored gtids # # 1.Start bigtrans(not really larger than 4G, use simulated 1M instead) # 2.Ignore truncated heartbeat on master # 3.Set gtid_purged and heartbeat period to a large value(1800s) on slave, and start slave # 4.Check status Last_IO_Error and Slave_IO_Running and Exec_Master_Log_Pos on slave # 5.Stop slave and reset master # 6.Start bigtrans(not really larger than 4G, use simulated 1M instead) # 7.Send truncated heartbeat on master # 8.Set gtid_purged on slave, and start slave # 9.Check status Last_IO_Error and Slave_IO_Running and Exec_Master_Log_Pos on slave # ####################################################################################################### --echo #Test2: Send heartbeat event caused by ignored gtids # Start slave connection slave; --source include/start_slave.inc # Switch binlog on master --connection master set global slave_net_timeout=3600; flush binary logs; --source include/sync_slave_sql_with_master.inc --source include/stop_slave.inc # Execute first large transaction and sync to slave --connection master call bigtrans; --let master_uuid= `SELECT @@GLOBAL.SERVER_UUID` # Execute transactions to be ignored insert into t1 values(1,'aaaaaaaaaaa'); insert into t1 values(2,'aaaaaaaaaaa'); insert into t1 values(3,'aaaaaaaaaaa'); insert into t1 values(4,'aaaaaaaaaaa'); insert into t1 values(5,'aaaaaaaaaaa'); --let master_log_pos= query_get_value("show master status", Position, 1) set global debug='+d,ignore_truncated_heartbeat'; # Set gtid_purged on slave and start slave --connection slave reset master; set global slave_net_timeout=3600; change master to master_heartbeat_period=1800; --disable_query_log --eval set global gtid_purged='$master_uuid:1-6:8-11' --enable_query_log start slave; # Ignore truncated heartbeat event on master --connection master set debug_sync='now wait_for heartbeat_ignored'; set global debug='-d,ignore_truncated_heartbeat'; --real_sleep 5 # Check slave_io_running status on slave --connection slave --let slave_io_running= query_get_value("show slave status", Slave_IO_Running, 1) --let $assert_text= slave_io_running should be yes --let $assert_cond= "$slave_io_running" = "Yes" --source include/assert.inc # Check last_io_error status on slave --let last_io_error= query_get_value("show slave status", Last_IO_Error, 1) --let $assert_text= last_io_error should be empty --let $assert_cond= "$last_io_error" = "" --source include/assert.inc # Check Exec_Master_Log_Pos status on slave --let Exec_Master_Log_Pos= query_get_value("show slave status", Exec_Master_Log_Pos, 1) --let $assert_text= Exec_Master_Log_Pos should be the same as master --let $assert_cond= $Exec_Master_Log_Pos = $master_log_pos --source include/assert.inc # Switch binlog on master --connection master flush binary logs; --source include/sync_slave_sql_with_master.inc --source include/stop_slave.inc # Execute first large transaction and sync to slave --connection master call bigtrans; --let master_uuid= `SELECT @@GLOBAL.SERVER_UUID` --let master_log_pos= query_get_value("show master status", Position, 1) # Execute transactions to be ignored insert into t1 values(1,'aaaaaaaaaaa'); insert into t1 values(2,'aaaaaaaaaaa'); insert into t1 values(3,'aaaaaaaaaaa'); insert into t1 values(4,'aaaaaaaaaaa'); insert into t1 values(5,'aaaaaaaaaaa'); set global debug='+d,send_truncated_heartbeat'; # Set gtid_purged on slave and start slave --connection slave reset master; --disable_query_log --eval set global gtid_purged='$master_uuid:1-12:14-17' --enable_query_log start slave; # Wait for one truncated heartbeat event was sent on master --connection master set debug_sync='now wait_for heartbeat_truncated'; set global debug='-d,send_truncated_heartbeat'; --real_sleep 5 # Check slave_io_running status on slave --connection slave --let slave_io_running= query_get_value("show slave status", Slave_IO_Running, 1) --let $assert_text= slave_io_running should be No --let $assert_cond= "$slave_io_running" = "No" --source include/assert.inc # Check last_io_error status on slave --let last_io_error= query_get_value("show slave status", Last_IO_Error, 1) --let $assert_text= last_io_error should be could not queue event from master --let $assert_cond= "$last_io_error" = "Relay log write failure: could not queue event from master" --source include/assert.inc # Check Exec_Master_Log_Pos status on slave --let Exec_Master_Log_Pos= query_get_value("show slave status", Exec_Master_Log_Pos, 1) --let $assert_text= Exec_Master_Log_Pos should be the same as master --let $assert_cond= $Exec_Master_Log_Pos = $master_log_pos --source include/assert.inc # Cleanup master --connection master drop table t1; drop procedure bigtrans; # Cleanup slave --connection slave drop table t1; drop procedure bigtrans;