Bug #11236 SQL thread incorrect handling of temporary errors for ndbcluster
Submitted: 10 Jun 2005 11:43
Reporter: Tomas Ulin Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Cluster: Cluster (NDB) storage engine Severity:S3 (Non-critical)
Version:5.1.0 custom release OS:Any (any)
Assigned to: Tomas Ulin CPU Architecture:Any

[10 Jun 2005 11:43] Tomas Ulin
Description:
SQL Thread would not do retries on ndb table in case of replicating ndb tables

How to repeat:
##################################################################
#
# Check that retries are made on the slave on some temporary errors
#

#
# 1. Deadlock
#
--connection master
CREATE TABLE `t1` ( `nid` int(11) NOT NULL default '0',
 	            `nom` char(4) default NULL,
  		    `prenom` char(4) default NULL,
		    PRIMARY KEY USING HASH (`nid`)) 
    ENGINE=ndbcluster DEFAULT CHARSET=latin1;
INSERT INTO t1 VALUES(1,"XYZ1","ABC1");

# cause a lock on that row on the slave
--sync_slave_with_master
--connection slave
BEGIN;
UPDATE t1 SET `nom`="LOCK" WHERE `nid`=1;

# set number of retries low so we fail the retries
set GLOBAL slave_transaction_retries=1;

# now do a change to this row on the master
# will deadlock on the slave because of lock above
--connection master
UPDATE t1 SET `nom`="DEAD" WHERE `nid`=1;

# wait for deadlock to be detected
# sleep longer than dead lock detection timeout in config
# we do this 2 times, once with few retries to verify that we 
# get a failure with the set sleep, and once with the _same_
# sleep, but with more reties to get it to succed
--sleep 5

# replication should have stopped, since max retries where not enough
# verify with show slave status
--connection slave
--replace_result $MASTER_MYPORT MASTER_PORT
--replace_column 1 <Slave_IO_State> 7 <Read_Master_Log_Pos> 8 <Relay_Log_File> 9 <Relay_Log_Pos> 16 <Replicate_Ignore_Table> 22 <Exec_Master_Log_Pos> 23 <Relay_Log_Space> 33 <Seconds_Behind_Master>
SHOW SLAVE STATUS;

# now set max retries high enough to succeed, and start slave again
set GLOBAL slave_transaction_retries=10;
START SLAVE;

# wait for deadlock to be detected and retried
# should be the same sleep as above for test to be valid
--sleep 5

# commit transaction to release lock on row and let replication succeed
select * from t1 order by nid;
COMMIT;

# verify that the row succeded to be applied on the slave
--connection master
--sync_slave_with_master
--connection slave
select * from t1 order by nid;

# cleanup
--connection master
DROP TABLE t1;