commit b29141f605e5a63e354e3a6f9b1aa2afa19e57e9 Author: GAO Xiaoxin Date: Mon Feb 17 00:28:06 2020 +0800 Bug#98616 XA PREPARE/XA COMMIT/XA ROLLBACK lost if mysql crash just after binlog flush For "xa commit" and "xa rollback", mysql does not use binlog as a Transaction Coordinator Log during the start-up recover. In MYSQL_BIN_LOG::recover, the query_event of "xa commit" and "xa rollback" is ignored, and does not record the xa commit xid and xa rollback xid. And in xarecover_handlerton, there is no code to commit/rollback the recovred prepared transaction according to binlog. So the xa commit and xa rollback transaction may just lost if mysql crash. For "xa prepare" and "xa commit one phase" are more complex and worse. Take "xa prepare" for example, "xa prepare" will modify the undo page to set the "undo->state = TRX_UNDO_PREPARED" and "undo->xid = *trx->xid" (see code trx_undo_set_state_at_prepare in trx0undo.cc). But these modifications are done after binlog flushed and may lost if mysql crash just after binlog flushed. When mysql start-up recover, the innodb engine will only recover the transaction in prepared state, which means the undo->state of the recovered transaction must be TRX_UNDO_PREPARED (check trx_rollback_resurrected in trx0roll.cc). So if a "xa prepare" transaction only flush binlog, it will just be rollback during start-up recover and lost. Config mysql with innodb_flush_log_at_trx_commit=1 and sync_binlog=1. Use gdb to startup mysql, such as "gdb --args ./bin/mysqld --defaults-file=my.cnf", and set a break-point in ordered_commit (binlog.cc) and just after "DEBUG_SYNC(thd, "bgc_after_sync_stage_before_commit_stage");". The do the following test: test1: use test; create table t1 (c1 int) engine=innodb; xa start '1'; insert into t1 values (1); xa end '1'; xa prepare '1'; --------------------here gdb will catch the break-point Then quit the gdb to simulate a mysql crash, and just restart the mysql, and login to execute "xa recover", we will not find the xid '1', but if we check the binlog, we will find the "xa prepare '1'". For "xa commit one phase", "xa commit", "xa rollback" are similar, just set break-points in the same point as "xa prepare", and quit the gdb to simulate a mysql crash just after mysql flushed the binlog to disk; Then restart mysql, we will find "xa commit one phase", "xa commit", "xa rollback" lost, but the binlog contains the related transaction events. For "xa commit" and "xa rollback", we should: 1. check the "xa commit" and "xa rollback" query events and maintain two sets to record the commited x-id and rollbacked x-id in MYSQL_BIN_LOG::recover. 2. in xarecover_handlerton, we should loop all xids from engines which are generated by external TM, and find-out whether the xid is in the sets of xa commit and xa rollback; if inside, do commit_by_xid or rollback_by_xid respectively. For "xa prepare" and "xa commit one phase", we should: 1. write the "undo->xid = *trx->xid" into unde page before binlog flushed, so that these undo page modification can be flushed into related redo log. So that the mysql can know the external xid of this xa transaction during start-up recover. 2. During the innodb start-up recover, check trx_rollback_resurrected should keep all external TM transactions which has real xid, and these transcations should be handled in server layer. 3. For MYSQL_BIN_LOG::recover, should record all the xa transaction which is prepared but not commit/rollback, as a prepared_xid_set 4. In xarecover_handlerton, we should loop all xids from engines which are generated by external TM, and find-out whether the xid is in the prepared_xid_set, if inside should do xa prepare for it; if not inside, we should check whether this transaction is a prepared xa transaction before this start-up recover, if not, just do rollback_by_xid. diff --git a/mysql-test/suite/binlog/r/binlog_group_commit_flush_crash_xa.result b/mysql-test/suite/binlog/r/binlog_group_commit_flush_crash_xa.result new file mode 100644 index 0000000..4bc0595 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_group_commit_flush_crash_xa.result @@ -0,0 +1,45 @@ +call mtr.add_suppression("Found 1 prepared XA transactions"); +CREATE TABLE t1(c1 INT) ENGINE= InnoDB; +TRUNCATE TABLE t1; +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +XA START '100'; +INSERT INTO t1 VALUES(100); +XA END '100'; +XA PREPARE '100'; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +XA COMMIT '100'; +include/assert.inc [Table t1 must contain row 100] +TRUNCATE TABLE t1; +# Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +XA START '100'; +INSERT INTO t1 VALUES(100); +XA END '100'; +XA COMMIT '100' one phase; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +include/assert.inc [Table t1 must contain row 100] +TRUNCATE TABLE t1; +# Crash right after flushing binary log +XA START '100'; +INSERT INTO t1 VALUES(100); +XA END '100'; +XA PREPARE '100'; +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +XA COMMIT '100'; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +include/assert.inc [Table t1 must contain row 100] +# Crash right after flushing binary log +XA START '100'; +DELETE FROM t1 WHERE c1 = 100; +XA END '100'; +XA PREPARE '100'; +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +XA ROLLBACK '100'; +ERROR HY000: Lost connection to MySQL server during query +# Restart the master server +include/assert.inc [Table t1 must contain row 100] +DROP TABLE t1; diff --git a/mysql-test/suite/binlog/t/binlog_group_commit_flush_crash_xa-master.opt b/mysql-test/suite/binlog/t/binlog_group_commit_flush_crash_xa-master.opt new file mode 100644 index 0000000..c9c713d --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_group_commit_flush_crash_xa-master.opt @@ -0,0 +1 @@ +--sync-binlog=1 --innodb-flush-log-at-trx-commit=1 diff --git a/mysql-test/suite/binlog/t/binlog_group_commit_flush_crash_xa.test b/mysql-test/suite/binlog/t/binlog_group_commit_flush_crash_xa.test new file mode 100644 index 0000000..d38063e --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_group_commit_flush_crash_xa.test @@ -0,0 +1,114 @@ +# +# Bug#98616 XA PREPARE/XA COMMIT/XA ROLLBACK lost if mysql crash just after +# binlog flush +# +# Verify that an xa transaction can not be recovered during server +# recovery from a crash, which happened after flushing it to +# InnoDB redo log and before flushing it to binary log. And +# a transaction can be recovered during server recovery from +# a crash, which happened after flushing it to binary log. +# +--source include/not_embedded.inc +--source include/not_valgrind.inc +--source include/have_binlog_format_row.inc +-- source include/have_debug.inc +-- source include/have_innodb.inc + +call mtr.add_suppression("Found 1 prepared XA transactions"); + +CREATE TABLE t1(c1 INT) ENGINE= InnoDB; + + +# test recover for xa prepare +TRUNCATE TABLE t1; +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +XA START '100'; +INSERT INTO t1 VALUES(100); +XA END '100'; +--error 2013 +XA PREPARE '100'; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +XA COMMIT '100'; +--let $assert_text= Table t1 must contain row 100 +--let $assert_cond= [SELECT count(*) FROM t1 WHERE c1=100] = 1 +--source include/assert.inc + +# test recover for xa commit one phase +TRUNCATE TABLE t1; +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +XA START '100'; +INSERT INTO t1 VALUES(100); +XA END '100'; +--error 2013 +XA COMMIT '100' one phase; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--let $assert_text= Table t1 must contain row 100 +--let $assert_cond= [SELECT count(*) FROM t1 WHERE c1=100] = 1 +--source include/assert.inc + + +# test recover for xa commit +TRUNCATE TABLE t1; +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log +XA START '100'; +INSERT INTO t1 VALUES(100); +XA END '100'; +XA PREPARE '100'; +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +--error 2013 +XA COMMIT '100'; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--let $assert_text= Table t1 must contain row 100 +--let $assert_cond= [SELECT count(*) FROM t1 WHERE c1=100] = 1 +--source include/assert.inc + +# test recover for xa rollback +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--echo # Crash right after flushing binary log +XA START '100'; +DELETE FROM t1 WHERE c1 = 100; +XA END '100'; +XA PREPARE '100'; +SET SESSION DEBUG="+d,crash_after_flush_binlog"; +--error 2013 +XA ROLLBACK '100'; +--source include/wait_until_disconnected.inc + +--enable_reconnect +--echo # Restart the master server +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect + +--let $assert_text= Table t1 must contain row 100 +--let $assert_cond= [SELECT count(*) FROM t1 WHERE c1=100] = 1 +--source include/assert.inc + +# Cleanup +DROP TABLE t1; diff --git a/sql/binlog.cc b/sql/binlog.cc index 10e9a73..b0fe898 100644 --- a/sql/binlog.cc +++ b/sql/binlog.cc @@ -92,6 +92,37 @@ static int binlog_xa_commit(handlerton *hton, XID *xid); static int binlog_xa_rollback(handlerton *hton, XID *xid); static void exec_binlog_error_action_abort(const char* err_string); +static inline uint char_val(char X) { + return (uint)(X >= '0' && X <= '9' + ? X - '0' + : X >= 'A' && X <= 'Z' ? X - 'A' + 10 : X - 'a' + 10); +} + +// the format in binlog is: XA COMMIT X'31',X'32',1 +static XID get_xid_from_xa_commit_rollback(std::string query_sql) { + XID xid; + size_t gtrid_begin_pos = query_sql.find('\'', 0); + size_t gtrid_end_pos = query_sql.find('\'', gtrid_begin_pos + 1); + std::string gtrid; + for (size_t i = 0; i < gtrid_end_pos - gtrid_begin_pos - 1; i += 2) { + gtrid += (char)(char_val(query_sql[gtrid_begin_pos + 1 + i]) * 16 + + char_val(query_sql[gtrid_begin_pos + 1 + i + 1])); + } + size_t bqual_begin_pos = query_sql.find('\'', gtrid_end_pos + 1); + size_t bqual_end_pos = query_sql.find('\'', bqual_begin_pos + 1); + std::string bqual; + for (size_t i = 0; i < bqual_end_pos - bqual_begin_pos - 1; i += 2) { + bqual += (char)(char_val(query_sql[bqual_begin_pos + 1 + i]) * 16 + + char_val(query_sql[bqual_begin_pos + 1 + i + 1])); + } + size_t format_id_pos = query_sql.find(',', bqual_end_pos + 1); + long format_id = atol(query_sql.c_str() + format_id_pos + 1); + + xid.set(format_id, gtrid.c_str(), gtrid.size(), bqual.c_str(), bqual.size()); + return xid; +} + + /** Helper class to hold a mutex for the duration of the block. @@ -9824,6 +9855,9 @@ int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle, Log_event *ev; HASH xids; MEM_ROOT mem_root; + std::set xa_prepared; + std::map xa_committed; + /* The flag is used for handling the case that a transaction is partially written to the binlog. @@ -9848,6 +9882,14 @@ int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle, in_transaction= TRUE; if (ev->get_type_code() == binary_log::QUERY_EVENT && + !strncmp(((Query_log_event*)ev)->query, "XA START", 8)) { + in_transaction= TRUE; + XID xid = get_xid_from_xa_commit_rollback(((Query_log_event *)ev)->query); + if (xa_committed.count(xid)) //in case there are sever same xids in the last binlog + xa_committed.erase(xid); + } + + if (ev->get_type_code() == binary_log::QUERY_EVENT && !strcmp(((Query_log_event*)ev)->query, "COMMIT")) { DBUG_ASSERT(in_transaction == TRUE); @@ -9862,6 +9904,41 @@ int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle, sizeof(xev->xid)); if (!x || my_hash_insert(&xids, x)) goto err2; + } else if (ev->get_type_code() == binary_log::XA_PREPARE_LOG_EVENT) { + DBUG_ASSERT(in_transaction == TRUE); + in_transaction = FALSE; + XA_prepare_log_event *xev = (XA_prepare_log_event *)ev; + XID xid = xev->get_xid(); + if (!xev->is_one_phase()) { + xa_prepared.insert(xid); + if (xa_committed.count(xid)) + xa_committed.erase(xid); + } else { + xa_committed[xid] = true; + if (xa_prepared.count(xid)) + xa_prepared.erase(xid); + } + } else if (ev->get_type_code() == binary_log::QUERY_EVENT && + !strncmp(((Query_log_event *)ev)->query, "XA COMMIT", 9)) { + DBUG_ASSERT(in_transaction == FALSE); + in_transaction = FALSE; + + // the format in binlog is: XA COMMIT X'31',X'32',1 + // find gtrid and bqual from the single-quote is ok + XID xid = get_xid_from_xa_commit_rollback(((Query_log_event *)ev)->query); + xa_committed[xid] = true; + if (xa_prepared.count(xid)) + xa_prepared.erase(xid); + + } else if (ev->get_type_code() == binary_log::QUERY_EVENT && + !strncmp(((Query_log_event *)ev)->query, "XA ROLLBACK", 11)) { + DBUG_ASSERT(in_transaction == FALSE); + in_transaction = FALSE; + + XID xid = get_xid_from_xa_commit_rollback(((Query_log_event *)ev)->query); + xa_committed[xid] = false; + if (xa_prepared.count(xid)) + xa_prepared.erase(xid); } /* @@ -9912,7 +9989,7 @@ int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle, will result in an assert. (Production builds would be safe since ha_recover returns right away if total_ha_2pc <= opt_log_bin.) */ - if (total_ha_2pc > 1 && ha_recover(&xids)) + if (total_ha_2pc > 1 && ha_recover(&xids, &xa_committed, &xa_prepared)) goto err2; free_root(&mem_root, MYF(0)); diff --git a/sql/handler.cc b/sql/handler.cc index 0a5b01f..eb68d51 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1429,6 +1429,45 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht_arg, } /** + * Only store the xid into undo log, this func is used before doing real + * prepare or commit one phase, to store the xid in undo before flush redo + * log in ordered_commit. + @retval + 0 ok + @retval + 1 error, transaction was rolled back +*/ +int ha_set_undo_xid_for_prepare(THD *thd) +{ + int error=0; + Transaction_ctx *trn_ctx= thd->get_transaction(); + DBUG_ENTER("ha_set_undo_xid_for_prepare"); + + if (trn_ctx->is_active(Transaction_ctx::SESSION)) + { + const Ha_trx_info *ha_info= trn_ctx->ha_trx_info( + Transaction_ctx::SESSION); + while (ha_info) + { + handlerton *ht= ha_info->ht(); + if (ht->ha_set_undo_xid_for_prepare) + { + if (ht->ha_set_undo_xid_for_prepare(ht, thd, true)) + { + ha_rollback_trans(thd, true); + error=1; + break; + } + } + ha_info= ha_info->next(); + } + + } + + DBUG_RETURN(error); +} + +/** @retval 0 ok @retval diff --git a/sql/handler.h b/sql/handler.h index d221748..b5a1ff7 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -31,7 +31,8 @@ #include "sql_const.h" // SHOW_COMP_OPTION #include "sql_list.h" // SQL_I_List #include "sql_plugin_ref.h" // plugin_ref - +#include +#include #include "mysql/psi/psi.h" #include @@ -760,9 +761,11 @@ struct handlerton int (*commit)(handlerton *hton, THD *thd, bool all); int (*rollback)(handlerton *hton, THD *thd, bool all); int (*prepare)(handlerton *hton, THD *thd, bool all); + int (*ha_set_undo_xid_for_prepare)(handlerton *hton, THD *thd, bool all); int (*recover)(handlerton *hton, XID *xid_list, uint len); int (*commit_by_xid)(handlerton *hton, XID *xid); int (*rollback_by_xid)(handlerton *hton, XID *xid); + int (*prepare_by_xid)(handlerton *hton, XID *xid); handler *(*create)(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root); void (*drop_database)(handlerton *hton, char* path); int (*panic)(handlerton *hton, enum ha_panic_function flag); @@ -4082,6 +4085,7 @@ int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock= false); int ha_commit_attachable(THD *thd); int ha_rollback_trans(THD *thd, bool all); int ha_prepare(THD *thd); +int ha_set_undo_xid_for_prepare(THD *thd); /** @@ -4101,7 +4105,8 @@ int ha_prepare(THD *thd); there should be no prepared transactions in this case. */ -int ha_recover(HASH *commit_list); +int ha_recover(HASH *commit_list, + std::map *xa_committed = NULL, std::set *xa_prepared = NULL); /* transactions: interface to low-level handlerton functions. These are diff --git a/sql/log_event.h b/sql/log_event.h index 00a5611..1231425 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -2076,6 +2076,17 @@ public: my_xid.bqual_length == 0); xid= NULL; } + bool is_one_phase() const { return one_phase; } +#ifdef MYSQL_SERVER + XID get_xid() { + XID xid; + xid.set_format_id(my_xid.formatID); + xid.set_gtrid_length(my_xid.gtrid_length); + xid.set_bqual_length(my_xid.bqual_length); + xid.set_data(my_xid.data, my_xid.gtrid_length + my_xid.bqual_length); + return xid; + } +#endif Log_event_type get_type_code() { return binary_log::XA_PREPARE_LOG_EVENT; } size_t get_data_size() { diff --git a/sql/xa.cc b/sql/xa.cc index 2424958..2b3960d 100644 --- a/sql/xa.cc +++ b/sql/xa.cc @@ -71,8 +71,32 @@ struct xarecover_st XID *list; HASH *commit_list; bool dry_run; + std::map *binlog_xa_committed; //only used for start-up recover, + // to record the commited xid from + // binlog. + std::set *binlog_xa_prepared; //only used for start-up recover, + // to record the prepared xid from binlog. }; +bool operator<(const xid_t &xs1, const xid_t &xs2) { + if (xs1.get_format_id() < xs2.get_format_id()) + return true; + if (xs1.get_gtrid_length() < xs2.get_gtrid_length()) { + return true; + } else if (xs1.get_gtrid_length() > xs2.get_gtrid_length()) { + return false; + } + if (xs1.get_bqual_length() < xs2.get_bqual_length()) { + return true; + } else if (xs1.get_bqual_length() > xs2.get_bqual_length()) { + return false; + } + int ret = strncmp(xs1.get_data(), xs2.get_data(), + xs1.get_gtrid_length() + xs1.get_bqual_length()); + if (ret < 0 ) + return true; + return false; +} static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, void *arg) @@ -92,13 +116,49 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, my_xid x= info->list[i].get_my_xid(); if (!x) // not "mine" - that is generated by external TM { -#ifndef DBUG_OFF char buf[XIDDATASIZE * 4 + 6]; // see xid_to_str - XID *xid= info->list + i; - sql_print_information("ignore xid %s", xid->xid_to_str(buf)); -#endif - transaction_cache_insert_recovery(info->list + i); - info->found_foreign_xids++; + const XID &target_xid = info->list[i]; + if (info->binlog_xa_committed && !info->binlog_xa_committed->empty()) { + std::map::const_iterator commit_it = + info->binlog_xa_committed->find(target_xid); + if (commit_it != info->binlog_xa_committed->end()) { + bool is_commit = commit_it->second; + if (is_commit) { + sql_print_information("commit xid %s due to find xa commit binlog", target_xid.xid_to_str(buf)); + hton->commit_by_xid(hton, info->list + i); + } else { + sql_print_information("rollback xid %s due to find xa rollback binlog", target_xid.xid_to_str(buf)); + hton->rollback_by_xid(hton, info->list + i); + } + continue; + } + } + if (info->binlog_xa_prepared && !info->binlog_xa_prepared->empty() && hton->prepare_by_xid) { + std::set::const_iterator prepare_it = + info->binlog_xa_prepared->find(target_xid); + if (prepare_it != info->binlog_xa_prepared->end()) { + if ((info->list + i)->get_is_xa_prepared_during_recover()) { + sql_print_information("Find xid %s already prepare before current binlog", target_xid.xid_to_str(buf)); + transaction_cache_insert_recovery(info->list + i); + info->found_foreign_xids++; + continue; + } + sql_print_information("prepare xid %s due to find xa prepare binlog", target_xid.xid_to_str(buf)); + hton->prepare_by_xid(hton, info->list + i); + transaction_cache_insert_recovery(info->list + i); + info->found_foreign_xids++; + continue; + } + } + if ((info->list + i)->get_is_xa_prepared_during_recover()) { + sql_print_information("Find xid %s already prepare before current binlog", target_xid.xid_to_str(buf)); + transaction_cache_insert_recovery(info->list + i); + info->found_foreign_xids++; + } else { + sql_print_information("rollback xid %s due to find xa transaction partial executed", target_xid.xid_to_str(buf)); + hton->rollback_by_xid(hton, info->list + i); + } + continue; } if (info->dry_run) @@ -136,7 +196,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, } -int ha_recover(HASH *commit_list) +int ha_recover(HASH *commit_list, std::map *xa_committed, std::set *xa_prepared) { struct xarecover_st info; DBUG_ENTER("ha_recover"); @@ -145,6 +205,8 @@ int ha_recover(HASH *commit_list) info.dry_run= (info.commit_list == 0 && tc_heuristic_recover == TC_HEURISTIC_NOT_USED); info.list= NULL; + info.binlog_xa_committed = xa_committed; + info.binlog_xa_prepared = xa_prepared; /* commit_list and tc_heuristic_recover cannot be set both */ DBUG_ASSERT(info.commit_list == 0 || @@ -392,7 +454,12 @@ bool Sql_cmd_xa_commit::trans_xa_commit(THD *thd) else if (xid_state->has_state(XID_STATE::XA_IDLE) && m_xa_opt == XA_ONE_PHASE) { - int r= ha_commit_trans(thd, true); + /*Before do commit one phase, we should first write the xid into undo log + * which is necessary for crash recover.*/ + int r= ha_set_undo_xid_for_prepare(thd); + if ((res= MY_TEST(r))) + my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); + r= ha_commit_trans(thd, true); if ((res= MY_TEST(r))) my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); } @@ -817,6 +884,9 @@ bool Sql_cmd_xa_prepare::trans_xa_prepare(THD *thd) MDL_STATEMENT); if (thd->mdl_context.acquire_lock(&mdl_request, thd->variables.lock_wait_timeout) || + /*before the redo and binlog flash in ha_prepare, store the xid into + * undo*/ + ha_set_undo_xid_for_prepare(thd) || ha_prepare(thd)) { /* @@ -1045,7 +1115,6 @@ void XID_STATE::store_xid_info(Protocol *protocol, bool print_xid_as_hex) const } -#ifndef DBUG_OFF char* XID::xid_to_str(char *buf) const { char *s= buf; @@ -1095,7 +1164,6 @@ char* XID::xid_to_str(char *buf) const *s= 0; return buf; } -#endif extern "C" uchar *transaction_get_hash_key(const uchar *, size_t *, my_bool); diff --git a/sql/xa.h b/sql/xa.h index ccb117c..2aee9c5 100644 --- a/sql/xa.h +++ b/sql/xa.h @@ -238,6 +238,12 @@ private: */ char data[XIDDATASIZE]; + /** + * Flag indicate this xa transaction is a prepared transaction before + * server start-up. is_xa_prepared_during_recover is only used for recover + * process during server start-up.*/ + bool is_xa_prepared_during_recover; + public: xid_t() : formatID(-1), @@ -245,6 +251,17 @@ public: bqual_length(0) { memset(data, 0, XIDDATASIZE); + is_xa_prepared_during_recover = false; + } + + void set_is_xa_prepared_during_recover(bool b) + { + is_xa_prepared_during_recover = b; + } + + bool get_is_xa_prepared_during_recover() + { + return is_xa_prepared_during_recover; } long get_format_id() const @@ -356,7 +373,6 @@ public: return serialize_xid(buf, formatID, gtrid_length, bqual_length, data); } -#ifndef DBUG_OFF /** Get printable XID value. @@ -365,7 +381,6 @@ public: @return pointer to the buffer passed in the first argument */ char* xid_to_str(char *buf) const; -#endif bool eq(const xid_t *xid) const { @@ -404,6 +419,8 @@ private: friend class XID_STATE; } XID; +extern bool operator<(const xid_t &xs1, const xid_t &xs2); +extern bool operator==(const xid_t &xs1, const xid_t &xs2); class XID_STATE { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 886c795..351d843 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1090,6 +1090,21 @@ innobase_file_format_validate_and_set( /*==================================*/ const char* format_max); /*!< in: parameter value */ +/*******************************************************************************************//** +This function is used to write the xid into the undo of an X/Open XA distributed transaction. +@return 0 or error number */ +static +int +innobase_xa_ha_set_undo_xid_for_prepare( +/*================*/ + handlerton* hton, /*!< in: InnoDB handlerton */ + THD* thd, /*!< in: handle to the MySQL thread of + the user whose XA transaction should + be prepared */ + bool prepare_trx); /*!< in: true - prepare transaction + false - the current SQL statement + ended */ + /*******************************************************************//** This function is used to prepare an X/Open XA distributed transaction. @return 0 or error number */ @@ -1136,6 +1151,18 @@ innobase_rollback_by_xid( handlerton* hton, /*!< in: InnoDB handlerton */ XID* xid); /*!< in: X/Open XA transaction identification */ +/*******************************************************************//** +This function is used to prepare one X/Open XA distributed transaction +which is in the prepared state during recover +@return 0 or error number */ +static +int +innobase_prepare_by_xid( +/*=====================*/ + handlerton* hton, /*!< in: InnoDB handlerton */ + XID* xid); /*!< in: X/Open XA transaction + identification */ + /** This API handles CREATE, ALTER & DROP commands for InnoDB tablespaces. @param[in] hton Handlerton of InnoDB @@ -3535,9 +3562,11 @@ innobase_init( innobase_hton->commit = innobase_commit; innobase_hton->rollback = innobase_rollback; innobase_hton->prepare = innobase_xa_prepare; + innobase_hton->ha_set_undo_xid_for_prepare = innobase_xa_ha_set_undo_xid_for_prepare; innobase_hton->recover = innobase_xa_recover; innobase_hton->commit_by_xid = innobase_commit_by_xid; innobase_hton->rollback_by_xid = innobase_rollback_by_xid; + innobase_hton->prepare_by_xid = innobase_prepare_by_xid; innobase_hton->create = innobase_create_handler; innobase_hton->alter_tablespace = innobase_alter_tablespace; innobase_hton->drop_database = innobase_drop_database; @@ -17088,6 +17117,51 @@ innobase_get_at_most_n_mbchars( return(char_length); } +/*******************************************************************************************//** +This function is used to write the xid into the undo of an X/Open XA distributed transaction. +@return 0 or error number */ +static +int +innobase_xa_ha_set_undo_xid_for_prepare( +/*================*/ + handlerton* hton, /*!< in: InnoDB handlerton */ + THD* thd, /*!< in: handle to the MySQL thread of + the user whose XA transaction should + be prepared */ + bool prepare_trx) /*!< in: true - prepare transaction + false - the current SQL statement + ended */ +{ + trx_t* trx = check_trx_exists(thd); + + DBUG_ASSERT(hton == innodb_hton_ptr); + + thd_get_xid(thd, (MYSQL_XID*) trx->xid); + + if (prepare_trx + || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { + + /* We were instructed to prepare the whole transaction, or + this is an SQL statement end and autocommit is on */ + + ut_ad(trx_is_registered_for_2pc(trx)); + + dberr_t err = trx_set_undo_xid_for_prepare_for_mysql(trx); + + ut_ad(err == DB_SUCCESS || err == DB_FORCED_ABORT); + + if (err == DB_FORCED_ABORT) { + + innobase_rollback(hton, thd, prepare_trx); + + return(convert_error_code_to_mysql( + DB_FORCED_ABORT, 0, thd)); + } + } + return(0); +} + + /*******************************************************************//** This function is used to prepare an X/Open XA distributed transaction. @return 0 or error number */ @@ -17213,6 +17287,36 @@ innobase_xa_recover( } /*******************************************************************//** +This function is used to prepare one X/Open XA distributed transaction +which is in the prepared state +@return 0 or error number */ +static +int +innobase_prepare_by_xid( +/*===================*/ + handlerton* hton, + XID* xid) /*!< in: X/Open XA transaction identification */ +{ + trx_t* trx = trx_get_trx_by_xid(xid); + if (!trx) + return(XAER_NOTA); + trx_state_t state = trx->state; + if (state == TRX_STATE_PREPARED) + return(XA_OK); + if (trx_prepare_for_mysql(trx) != DB_SUCCESS) + return(XAER_NOTA); + + /*set is_xa_prepared_during_recover to be true, so that the ha_recover in + * init_server_components, which has no binlog_xa_prepared will not rollback + * this trx.*/ + trx->xid->set_is_xa_prepared_during_recover(true); + /*This is a new prepared transaction in recover mode, so shoud increse + * trx_sys->n_prepared_recovered_trx, otherwise xa rollback will crash*/ + ++trx_sys->n_prepared_recovered_trx; + return(XA_OK); +} + +/*******************************************************************//** This function is used to commit one X/Open XA distributed transaction which is in the prepared state @return 0 or error number */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 5edfa35..b68feda 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -280,6 +280,9 @@ Does the transaction prepare for MySQL. dberr_t trx_prepare_for_mysql(trx_t* trx); +dberr_t +trx_set_undo_xid_for_prepare_for_mysql(trx_t* trx); + /**********************************************************************//** This function is used to find number of prepared transactions and their transaction objects for a recovery. diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index f60a016..dfa35ea 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -320,7 +320,8 @@ trx_undo_set_state_at_prepare( trx_t* trx, trx_undo_t* undo, bool rollback, - mtr_t* mtr); + mtr_t* mtr, + bool is_only_for_set_xid = false); /**********************************************************************//** Adds the update undo log header as the first in the history list, and diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 5bc03c1..98b963c 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -207,7 +207,9 @@ trx_rollback_low( return(DB_SUCCESS); case TRX_STATE_ACTIVE: - ut_ad(trx->in_mysql_trx_list); + //Active user xa transcation may be In TRX_STATE_ACTIVE during recover + ut_ad((trx->is_recovered && trx->xid && !trx->xid->get_my_xid()) + || trx->in_mysql_trx_list); assert_trx_nonlocking_or_in_list(trx); return(trx_rollback_for_mysql_low(trx)); @@ -748,7 +750,16 @@ trx_rollback_resurrected( if (!is_recovered) { return(FALSE); } - + if (trx->xid && !trx->xid->get_my_xid() && trx->xid->get_gtrid_length() > 0) { + /*All External xa transaction with real xid should be handled on server + * level. If the gtrid_length is 0, this xa transaction abort before doing + * xa prepare, so just rollback it. + */ + if (state == TRX_STATE_PREPARED) + trx->xid->set_is_xa_prepared_during_recover(true); + ib::info() << "Skip handle XA transaction with xid " << trx_get_id_for_print(trx); + return(FALSE); + } switch (state) { case TRX_STATE_COMMITTED_IN_MEMORY: trx_sys_mutex_exit(); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 9f102ca..104545f 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -2745,7 +2745,8 @@ trx_prepare_low( trx_t* trx, /*!< in/out: transaction */ trx_undo_ptr_t* undo_ptr, /*!< in/out: pointer to rollback segment scheduled for prepare. */ - bool noredo_logging) /*!< in: turn-off redo logging. */ + bool noredo_logging, /*!< in: turn-off redo logging. */ + bool is_only_for_set_xid = false) /*!insert_undo, false, &mtr); + trx, undo_ptr->insert_undo, false, &mtr, is_only_for_set_xid); } if (undo_ptr->update_undo != NULL) { trx_undo_set_state_at_prepare( - trx, undo_ptr->update_undo, false, &mtr); + trx, undo_ptr->update_undo, false, &mtr, is_only_for_set_xid); } mutex_exit(&rseg->mutex); @@ -2798,6 +2799,33 @@ trx_prepare_low( } /****************************************************************//** +Set undo xid before Prepares a transaction. */ +static +void +trx_set_undo_xid_for_prepare( + /*========*/ + trx_t* trx) +{ + /* This transaction has crossed the point of no return and cannot + be rolled back asynchronously now. It must commit or rollback + synhronously. */ + + if (trx->rsegs.m_redo.rseg != NULL && trx_is_redo_rseg_updated(trx)) { + + trx_prepare_low(trx, &trx->rsegs.m_redo, false, true); + } + + DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE();); + + if (trx->rsegs.m_noredo.rseg != NULL + && trx_is_noredo_rseg_updated(trx)) { + + trx_prepare_low(trx, &trx->rsegs.m_noredo, true, true); + } + +} + +/****************************************************************//** Prepares a transaction. */ static void @@ -2811,10 +2839,6 @@ trx_prepare( lsn_t lsn = 0; - /* Only fresh user transactions can be prepared. - Recovered transactions cannot. */ - ut_a(!trx->is_recovered); - if (trx->rsegs.m_redo.rseg != NULL && trx_is_redo_rseg_updated(trx)) { lsn = trx_prepare_low(trx, &trx->rsegs.m_redo, false); @@ -2885,6 +2909,31 @@ trx_prepare( } /** +Set the transaction xid into the undo log before prepare for MySQL. +@param[in, out] trx Transaction instance to prepare */ +dberr_t +trx_set_undo_xid_for_prepare_for_mysql(trx_t* trx) +{ + trx_start_if_not_started_xa(trx, false); + + TrxInInnoDB trx_in_innodb(trx, true); + + if (trx_in_innodb.is_aborted() + && trx->killed_by != os_thread_get_curr_id()) { + + return(DB_FORCED_ABORT); + } + + trx->op_info = "set_undo_xid_for_prepare"; + + trx_set_undo_xid_for_prepare(trx); + + trx->op_info = ""; + + return(DB_SUCCESS); +} + +/** Does the transaction prepare for MySQL. @param[in, out] trx Transaction instance to prepare */ dberr_t @@ -2939,8 +2988,10 @@ trx_recover_for_mysql( /* The state of a read-write transaction cannot change from or to NOT_STARTED while we are holding the trx_sys->mutex. It may change to PREPARED, but not if - trx->is_recovered. It may also change to COMMITTED. */ - if (trx_state_eq(trx, TRX_STATE_PREPARED)) { + trx->is_recovered. It may also change to COMMITTED. + + All user xa transaction should be record in xid_list, which will be handled in xarecover_handlerton in server layer.*/ + if (trx_state_eq(trx, TRX_STATE_PREPARED) || (trx->xid && !trx->xid->get_my_xid())) { xid_list[count] = *trx->xid; if (count == 0) { @@ -3000,14 +3051,20 @@ trx_get_trx_by_xid_low( length should be the same and binary comparison of gtrid_length+bqual_length bytes should be the same */ + if (trx->is_recovered + && trx_state_eq(trx, TRX_STATE_ACTIVE) + && xid->eq(trx->xid)) { + /*This transaction is failed during xa prepare, so its state is + * TRX_STATE_ACTIVE, and should be recover to be prepared.*/ + break; + } if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_PREPARED) && xid->eq(trx->xid)) { - - /* Invalidate the XID, so that subsequent calls - will not find it. */ - trx->xid->reset(); + /* Invalidate the XID, so that subsequent calls + will not find it. */ + trx->xid->reset(); break; } } diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index f1c932c..3a0b8f8 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -1881,13 +1881,15 @@ trx_undo_set_state_at_finish( @param[in,out] undo insert_undo or update_undo log @param[in] rollback false=XA PREPARE, true=XA ROLLBACK @param[in,out] mtr mini-transaction +@param[in] is_only_for_set_xid whether only for store xid into undo before "xa prepare"/"commit one phase" @return undo log segment header page, x-latched */ page_t* trx_undo_set_state_at_prepare( trx_t* trx, trx_undo_t* undo, bool rollback, - mtr_t* mtr) + mtr_t* mtr, + bool is_only_for_set_xid) { trx_usegf_t* seg_hdr; trx_ulogf_t* undo_header; @@ -1905,6 +1907,8 @@ trx_undo_set_state_at_prepare( seg_hdr = undo_page + TRX_UNDO_SEG_HDR; if (rollback) { + if (is_only_for_set_xid) + return(undo_page); ut_ad(undo->state == TRX_UNDO_PREPARED); mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE, MLOG_2BYTES, mtr); @@ -1913,7 +1917,12 @@ trx_undo_set_state_at_prepare( /*------------------------------*/ ut_ad(undo->state == TRX_UNDO_ACTIVE); - undo->state = TRX_UNDO_PREPARED; + if (!is_only_for_set_xid) + undo->state = TRX_UNDO_PREPARED; + else + /*Do not set state to TRX_UNDO_PREPARED, which shoud be done after the + * binlog flushed in ordered_commited*/ + undo->state = TRX_UNDO_ACTIVE; undo->xid = *trx->xid; /*------------------------------*/