From 61af88072225e3558cb8589cb8eac4bc09d811f0 Mon Sep 17 00:00:00 2001 From: Przemyslaw Skibinski Date: Tue, 9 Jun 2026 15:48:11 +0200 Subject: [PATCH] InnoDB: crash recovery of unfinished ALTER TABLESPACE ENCRYPTION re-binlogs the DDL and allocates a fresh GTID, crashing startup InnoDB crash recovery can resume an unfinished ALTER TABLESPACE encryption operation by re-executing ALTER TABLESPACE through an internal startup background THD: fsp_init_resume_alter_encrypt_tablespace() -> dd::alter_tablespace_encryption() -> execute_query() -> Sql_cmd_alter_tablespace::execute(). That THD is created by create_internal_thd(), which leaves session binlogging enabled. So the replay runs through the normal SQL DDL path and emits a second binlog event and allocates a fresh GTID for the original user DDL, which was already binlogged when the user issued it. Because this happens very early in startup, the replay's in-flight commit on mysql.ibd races with GTID/binlog initialization and with the attachable transactions that open the DD and replication metadata tables, tripping debug assertions in GTID/binlog bookkeeping and startup DD access, e.g.: Gtid_state::update_gtids_impl_own_gtid(): owned_gtids.is_owned_by(thd->owned_gtid, thd->thread_id()) executed_gtids.contains_gtid() Mutex_cond_array::get_max_index() THD::Attachable_trx::~Attachable_trx() checking transaction_rollback_request (The last assertion is the one for which the existing component_keyring_file.mysql_ts_alter_encrypt_1 test is disabled under Bug#29531106.) The window is widened by slow keyring access, but the defect is in the replay itself, not the keyring backend. Treat the recovery replay as local startup repair rather than a new user-visible DDL: - Disable session binlogging for the internal dd::alter_tablespace_encryption() replay (Disable_binlog_guard). - Skip the explicit ALTER TABLESPACE DDL binlog write only for startup background encryption replays (system_thread == SYSTEM_THREAD_BACKGROUND with an ENCRYPTION clause). - When mysql.ibd itself is among the spaces being rolled forward, wait for the resume thread to finish before startup opens DD and replication metadata tables. This prevents recovery from emitting a second binlog event or allocating a fresh GTID while avoiding a broad change to normal ALTER TABLESPACE binlog behavior. Add a deterministic regression test that does not depend on slow keyring access: component_keyring_file.tablespace_encrypt_recover_gtid. With gtid_mode=ON and binary logging enabled, it crashes in the middle of ALTER TABLESPACE mysql ENCRYPTION='Y', lets InnoDB crash recovery resume the encryption from the startup background THD, and asserts that the replay does not change @@GLOBAL.gtid_executed. Without the fix the replay crashes recovery in Gtid_state::update_gtids_impl_own_gtid() (and would otherwise grow gtid_executed by one); with the fix gtid_executed is unchanged. Verified with: ./mysql-test-run.pl --suite=component_keyring_file \ tablespace_encrypt_recover_gtid --debug-server --big-test \ --retry=0 --force --parallel=1 --- .../r/tablespace_encrypt_recover_gtid.result | 49 +++++++++ ...tablespace_encrypt_recover_gtid-master.opt | 3 + .../t/tablespace_encrypt_recover_gtid.test | 100 ++++++++++++++++++ sql/dd/impl/dictionary_impl.cc | 9 +- sql/sql_tablespace.cc | 20 +++- storage/innobase/srv/srv0start.cc | 34 ++++++ 6 files changed, 213 insertions(+), 2 deletions(-) create mode 100644 mysql-test/suite/component_keyring_file/r/tablespace_encrypt_recover_gtid.result create mode 100644 mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid-master.opt create mode 100644 mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid.test diff --git a/mysql-test/suite/component_keyring_file/r/tablespace_encrypt_recover_gtid.result b/mysql-test/suite/component_keyring_file/r/tablespace_encrypt_recover_gtid.result new file mode 100644 index 000000000000..5dd470ac075c --- /dev/null +++ b/mysql-test/suite/component_keyring_file/r/tablespace_encrypt_recover_gtid.result @@ -0,0 +1,49 @@ +# ---------------------------------------------------------------------- +# Setup +# Creating local configuration file for keyring component: component_keyring_file +# Creating manifest file for current MySQL server instance +# Re-starting mysql server with manifest file +# ---------------------------------------------------------------------- +######################################################################### +# Start from a known, unencrypted mysql tablespace. +######################################################################### +SET debug='+d,skip_dd_table_access_check'; +SELECT @@GLOBAL.gtid_mode; +@@GLOBAL.gtid_mode +ON +SELECT NAME, ENCRYPTION FROM INFORMATION_SCHEMA.INNODB_TABLESPACES WHERE NAME='mysql'; +NAME ENCRYPTION +mysql N +# Snapshot the GTID set right before the crashing DDL. The crashing +# ALTER does not commit, so recovery is the only thing that could add a +# GTID afterwards. +######################################################################### +# Crash in the middle of ALTER TABLESPACE mysql ENCRYPTION='Y' +######################################################################### +SET SESSION debug='+d,alter_encrypt_tablespace_page_10'; +ALTER TABLESPACE mysql ENCRYPTION='Y'; +######################################################################### +# Restart: InnoDB recovery resumes the encryption in a background thread +######################################################################### +SET debug='+d,skip_dd_table_access_check'; +# Wait for the background encryption resume to roll forward. +SELECT NAME, ENCRYPTION FROM INFORMATION_SCHEMA.INNODB_TABLESPACES WHERE NAME='mysql'; +NAME ENCRYPTION +mysql Y +# The recovery replay must NOT have emitted a binlog event or allocated +# a GTID for the resumed ALTER TABLESPACE. +# OK: crash-recovery encryption replay did not change gtid_executed. +########### +# Cleanup # +########### +ALTER TABLESPACE mysql ENCRYPTION='N'; +SELECT NAME, ENCRYPTION FROM INFORMATION_SCHEMA.INNODB_TABLESPACES WHERE NAME='mysql'; +NAME ENCRYPTION +mysql N +# ---------------------------------------------------------------------- +# Teardown +# Removing manifest file for current MySQL server instance +# Removing local keyring file for keyring component: component_keyring_file +# Removing local configuration file for keyring component: component_keyring_file +# Restarting server without the manifest file +# ---------------------------------------------------------------------- diff --git a/mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid-master.opt b/mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid-master.opt new file mode 100644 index 000000000000..8cf3bf60b846 --- /dev/null +++ b/mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid-master.opt @@ -0,0 +1,3 @@ +--log-bin=master-bin +--gtid-mode=ON +--enforce-gtid-consistency=ON diff --git a/mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid.test b/mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid.test new file mode 100644 index 000000000000..5296411a0139 --- /dev/null +++ b/mysql-test/suite/component_keyring_file/t/tablespace_encrypt_recover_gtid.test @@ -0,0 +1,100 @@ +--source include/have_component_keyring_file.inc +################################################################################ +# Regression test: crash recovery of an unfinished ALTER TABLESPACE encryption +# must not emit a binlog event or allocate a GTID. +# +# InnoDB crash recovery can resume an unfinished ALTER TABLESPACE encryption +# operation by re-executing ALTER TABLESPACE through an internal startup +# background THD (fsp_init_resume_alter_encrypt_tablespace() -> +# dd::alter_tablespace_encryption() -> execute_query() -> +# Sql_cmd_alter_tablespace::execute()). Without the fix that replay runs through +# the normal SQL DDL path with session binlogging enabled, so it emits a second +# binlog event and allocates a fresh GTID for the original user DDL, which was +# already binlogged when the user issued it. When the replay is slow (e.g. with +# a network-backed keyring) it also overlaps with GTID/binlog startup and the +# mysql tablespace metadata access, tripping debug assertions in +# executed_gtids.contains_gtid(), Mutex_cond_array and +# THD::Attachable_trx::~Attachable_trx(). +# +# The fix treats the recovery replay as local startup repair: it disables +# session binlogging for the internal dd::alter_tablespace_encryption() replay, +# skips the explicit ALTER TABLESPACE DDL binlog write for startup background +# encryption replays, and waits for mysql tablespace encryption recovery before +# startup opens DD and replication metadata tables. As a deterministic, +# keyring-backend independent guard we assert here that crash recovery of an +# unfinished ALTER TABLESPACE mysql ENCRYPTION does NOT change +# @@GLOBAL.gtid_executed. +################################################################################ + +--source include/big_test.inc +--source include/have_debug.inc +--source include/have_log_bin.inc +# Disable in valgrind because of timeout, cf. Bug#22760145 +--source include/not_valgrind.inc + +--source ../inc/setup_component.inc + +--disable_query_log +call mtr.add_suppression("Error generating data for Data ID:"); +call mtr.add_suppression("Encryption key missing:"); +call mtr.add_suppression("Decrypting a page in doublewrite file failed:"); +--enable_query_log + +# Waiting time when (re)starting the server +--let $explicit_default_wait_counter=10000 + +--echo ######################################################################### +--echo # Start from a known, unencrypted mysql tablespace. +--echo ######################################################################### +let $restart_parameters = restart: $PLUGIN_DIR_OPT; +--source include/restart_mysqld_no_echo.inc +SET debug='+d,skip_dd_table_access_check'; + +SELECT @@GLOBAL.gtid_mode; +SELECT NAME, ENCRYPTION FROM INFORMATION_SCHEMA.INNODB_TABLESPACES WHERE NAME='mysql'; + +--echo # Snapshot the GTID set right before the crashing DDL. The crashing +--echo # ALTER does not commit, so recovery is the only thing that could add a +--echo # GTID afterwards. +--let $gtid_before = `SELECT @@GLOBAL.gtid_executed` + +--echo ######################################################################### +--echo # Crash in the middle of ALTER TABLESPACE mysql ENCRYPTION='Y' +--echo ######################################################################### +SET SESSION debug='+d,alter_encrypt_tablespace_page_10'; +--source include/expect_crash.inc +--error 0,CR_SERVER_LOST,ER_INTERNAL_ERROR +ALTER TABLESPACE mysql ENCRYPTION='Y'; + +--echo ######################################################################### +--echo # Restart: InnoDB recovery resumes the encryption in a background thread +--echo ######################################################################### +--source include/start_mysqld_no_echo.inc +SET debug='+d,skip_dd_table_access_check'; + +--echo # Wait for the background encryption resume to roll forward. +let $wait_condition = SELECT ENCRYPTION='Y' + FROM INFORMATION_SCHEMA.INNODB_TABLESPACES + WHERE NAME='mysql'; +--source include/wait_condition.inc + +SELECT NAME, ENCRYPTION FROM INFORMATION_SCHEMA.INNODB_TABLESPACES WHERE NAME='mysql'; + +--echo # The recovery replay must NOT have emitted a binlog event or allocated +--echo # a GTID for the resumed ALTER TABLESPACE. +--let $gtid_after = `SELECT @@GLOBAL.gtid_executed` +if ($gtid_before != $gtid_after) +{ + --echo # gtid_executed before recovery: $gtid_before + --echo # gtid_executed after recovery: $gtid_after + --die regression: crash-recovery encryption replay changed gtid_executed +} +--echo # OK: crash-recovery encryption replay did not change gtid_executed. + +--echo ########### +--echo # Cleanup # +--echo ########### +ALTER TABLESPACE mysql ENCRYPTION='N'; +SELECT NAME, ENCRYPTION FROM INFORMATION_SCHEMA.INNODB_TABLESPACES WHERE NAME='mysql'; + +--source ../inc/teardown_component.inc diff --git a/sql/dd/impl/dictionary_impl.cc b/sql/dd/impl/dictionary_impl.cc index 9c878869003f..a6314ca2b2a6 100644 --- a/sql/dd/impl/dictionary_impl.cc +++ b/sql/dd/impl/dictionary_impl.cc @@ -72,7 +72,7 @@ #include "sql/sql_base.h" // close_cached_tables #include "sql/sql_class.h" // THD #include "sql/system_variables.h" -#include "sql/thd_raii.h" // Disable_autocommit_guard +#include "sql/thd_raii.h" // Disable_autocommit_guard, Disable_binlog_guard #include "sql/transaction.h" // trans_commit() #include "storage/perfschema/pfs_dd_version.h" // PFS_DD_VERSION @@ -753,6 +753,13 @@ bool alter_tablespace_encryption(THD *thd, const char *tablespace_name, tablespace_name + dd::String_type(" ENCRYPTION = ") + dd::String_type(encryption ? "'Y'" : "'N'"); + /* + This is used by InnoDB crash recovery to finish a local tablespace + encryption state transition. The original user DDL was already handled, so + replay runs with session binlogging disabled; the ALTER TABLESPACE executor + also skips its explicit DDL binlog write for startup background replays. + */ + const Disable_binlog_guard binlog_guard(thd); bool res = execute_query(thd, query); error_handler.set_log_error(save_log_error); return res; diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc index b4265c31ae69..7ed56f0dc375 100644 --- a/sql/sql_tablespace.cc +++ b/sql/sql_tablespace.cc @@ -1052,7 +1052,25 @@ bool Sql_cmd_alter_tablespace::execute(THD *thd) { return true; } - if (complete_stmt(thd, hton, [&]() { rollback_on_return.disable(); })) { + /* + This normal ALTER TABLESPACE execution path is also reached during InnoDB + crash recovery: fsp_init_resume_alter_encrypt_tablespace() resumes an + interrupted (un)encryption by calling dd::alter_tablespace_encryption(), + which builds an "ALTER TABLESPACE ... ENCRYPTION = ..." string and runs it + through execute_query() -> here, on the startup background THD created by + create_internal_thd() (system_thread == SYSTEM_THREAD_BACKGROUND). + + That replay only repairs local DD/SE state for the original user DDL, which + was already binlogged when the user issued it; emitting a second binlog + event (and allocating a fresh GTID) here would be wrong. Detect the replay + and skip the explicit DDL binlog write below. + */ + const bool recovery_replay = thd->system_thread == SYSTEM_THREAD_BACKGROUND && + m_options->encryption.str != nullptr; + + if (complete_stmt( + thd, hton, [&]() { rollback_on_return.disable(); }, true, + recovery_replay)) { return true; } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index f752ca1bca4d..4c502836574f 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -52,6 +52,8 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include +#include + #include "my_dbug.h" #include "btr0btr.h" @@ -70,6 +72,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include "ibuf0ibuf.h" #include "log0buf.h" #include "log0chkp.h" +#include "log0ddl.h" #include "log0recv.h" #include "log0write.h" #include "mem0mem.h" @@ -2279,6 +2282,12 @@ void srv_start_threads_after_ddl_recovery() { /* Resume unfinished (un)encryption process in background thread. */ if (!ts_encrypt_ddl_records.empty()) { + const bool is_mysql_ibd_encryption = std::any_of( + ts_encrypt_ddl_records.begin(), ts_encrypt_ddl_records.end(), + [](const DDL_Record *ddl_record) { + return ddl_record->get_space_id() == dict_sys_t::s_dict_space_id; + }); + srv_threads.m_ts_alter_encrypt = os_thread_create(srv_ts_alter_encrypt_thread_key, 0, fsp_init_resume_alter_encrypt_tablespace); @@ -2289,6 +2298,31 @@ void srv_start_threads_after_ddl_recovery() { for which (un)encryption is to be rolled forward. */ mysql_cond_wait(&resume_encryption_cond, &resume_encryption_cond_m); mysql_mutex_unlock(&resume_encryption_cond_m); + + /* + The mysql tablespace (mysql.ibd) holds the DD and replication metadata + tables that the rest of startup opens through attachable transactions. + + The resume thread finishes the interrupted (un)encryption by re-running + the DD metadata transition via dd::alter_tablespace_encryption(), which + builds an "ALTER TABLESPACE mysql ENCRYPTION = ..." string and runs it + through execute_query() -> Sql_cmd_alter_tablespace::execute(). That is a + real, committing transaction on mysql.ibd. If we let startup continue + while that commit is still in flight, the attachable transactions opening + DD/replication tables can observe the rollback/commit bookkeeping left by + the resume THD and hit the GTID/binlog asserts this fix targets. + + So when mysql.ibd is among the spaces being rolled forward we block here + until the resume thread is done. This intentionally also waits for any + general tablespaces handled in the same batch, but that only happens in + the rare case that mysql.ibd itself was mid-(un)encryption at the crash; + the common case (only general tablespaces) keeps the existing background + behavior and their normal DML, including dict stats work, remains covered + by the usual tablespace encryption machinery. + */ + if (is_mysql_ibd_encryption) { + srv_threads.m_ts_alter_encrypt.wait(); + } } /* Start and consume all GTIDs for recovered transactions. */