Description:
I proposed one bug https://bugs.mysql.com/bug.php?id=88534. XA may lost prepared transaction and cause different between master and slave, as the xa prepare first call binlog prepare, then innodb prepare.
Last days I thought this bug again. If call InnoDB prepare first, the binlog would lost, the master would have a prepared xa transaction but the slave not.
How to repeat:
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1460,10 +1460,12 @@ int ha_prepare(THD *thd)
thd->status_var.ha_prepare_count++;
if (ht->prepare)
{
- DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
- ha_rollback_trans(thd, true);
- DBUG_RETURN(1);
- });
+ if (ht->db_type == DB_TYPE_INNODB) {
+ DBUG_EXECUTE_IF("simulate_xa_failure_prepare", {
+ ha_rollback_trans(thd, true);
+ DBUG_RETURN(1);
+ });
+ }
if (ht->prepare(ht, thd, true))
{
ha_rollback_trans(thd, true);
replication.test:
--disable_warnings
source include/master-slave.inc;
--enable_warnings
connection master;
CREATE TABLE ti (c1 INT) ENGINE=INNODB;
XA START 'x';
INSERT INTO ti VALUES(1);
XA END 'x';
SET @@session.debug = '+d,simulate_xa_failure_prepare';
--error ER_XA_RBROLLBACK
XA PREPARE 'x';
--echo #Master
XA RECOVER;
--sync_slave_with_master
connection slave;
--echo #Slave
XA RECOVER;
replication.result:
include/master-slave.inc
[connection master]
CREATE TABLE ti (c1 INT) ENGINE=INNODB;
XA START 'x';
INSERT INTO ti VALUES(1);
XA END 'x';
SET @@session.debug = '+d,simulate_xa_failure_prepare';
XA PREPARE 'x';
ERROR XA100: XA_RBROLLBACK: Transaction branch was rolled back
#Master
XA RECOVER;
formatID gtrid_length bqual_length data
#Slave
XA RECOVER;
formatID gtrid_length bqual_length data
1 1 0 x
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1479,6 +1479,7 @@ int ha_prepare(THD *thd)
}
ha_info= ha_info->next();
}
+ DBUG_EXECUTE_IF("crash_after_xa_prepare", DBUG_SUICIDE(););
DBUG_ASSERT(thd->get_transaction()->xid_state()->
has_state(XID_STATE::XA_IDLE));
replication.test:
-- source include/have_log_bin.inc
CREATE TABLE ti (c1 INT) ENGINE=INNODB;
XA START 'x';
INSERT INTO ti VALUES(1);
XA END 'x';
SET @@session.debug = '+d,crash_after_xa_prepare';
--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--error 2013
XA PREPARE 'x';
--source include/wait_until_disconnected.inc
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--source include/start_mysqld.inc
XA RECOVER;
show binlog events in 'mysql.000001';
replication.result:
CREATE TABLE ti (c1 INT) ENGINE=INNODB;
XA START 'x';
INSERT INTO ti VALUES(1);
XA END 'x';
SET @@session.debug = '+d,crash_after_xa_prepare';
XA PREPARE 'x';
ERROR HY000: Lost connection to MySQL server during query
# restart
XA RECOVER;
formatID gtrid_length bqual_length data
show binlog events in 'mysql.000001';
Log_name Pos Event_type Server_id End_log_pos Info
mysql.000001 4 Format_desc 1 123 Server ver: 5.7.19org-debug-log, Binlog ver: 4
mysql.000001 123 Previous_gtids 1 154
mysql.000001 154 Anonymous_Gtid 1 219 SET @@SESSION.GTID_NEXT= 'ANONYMOUS'
mysql.000001 219 Query 1 331 use `test`; CREATE TABLE ti (c1 INT) ENGINE=INNODB
mysql.000001 331 Anonymous_Gtid 1 396 SET @@SESSION.GTID_NEXT= 'ANONYMOUS'
mysql.000001 396 Query 1 483 XA START X'78',X'',1
mysql.000001 483 Table_map 1 528 table_id: 222 (test.ti)
mysql.000001 528 Write_rows 1 568 table_id: 222 flags: STMT_END_F
mysql.000001 568 Query 1 653 XA END X'78',X'',1
mysql.000001 653 XA_prepare 1 690 XA PREPARE X'78',X'',1
Suggested fix:
Two methods may could solve this problem.
1. Remain 'XA prepare': binlog prepare->engine prepare(ensure engine flush logs), 'XA COMMIT': binlog commit->engine commit. Write 'xa rollback' when we found the storage engine on master lost prepared transaction.
When the server restarted, we search the last binlog to check whether there's a 'xa prepare' which no 'xa commit/rollback' matched. If we find the not matched 'xa prepare x' in binlog, we check whether there's prepared xa 'x' in the storage engine. If there's no prepared xa transacton, we write 'xa rollback x' to the binlog to cancel prepared transaction on slave.
But this method not friendly to HA. If we change to slave after the master crash, there's error happened.
2. 'XA PREPARE': engine prepare->binlog prepare, 'XA COMMIT' binlog commit-> engine commit. Add one table like the 'mysql.gtid_executed' table to record the xa prepared transactions when rotate binlog. Search the last binlog to decide whether the prepared transaction in storage engine should be rollbacked, committed or remained prepared.
Have to add a table to recored the xa prepared transaction when rotate binlog, as we would not know whether the prepared transaction's binlog was logged when server restart(the binlog may purged and 'xa prepare' may recorded in the purged binlog). So have to add a table to record xa prepared transaction when rotate binlog, then we could use binlog to decide the transaction state.