From a8324f95dc4ae85ccf6f3b5c8ee04b6849188107 Mon Sep 17 00:00:00 2001 From: Przemyslaw Skibinski Date: Tue, 9 Jun 2026 17:08:18 +0200 Subject: [PATCH] InnoDB: Crash resuming interrupted ALTER TABLESPACE ENCRYPTION Resuming an interrupted ALTER TABLESPACE ... ENCRYPTION after a crash is unsafe in two independent ways. This change fixes both; either one alone can crash the server while the post-recovery resume thread finishes the operation. 1. Resume thread has no thread id ================================= fsp_init_resume_alter_encrypt_tablespace() runs the resumed ALTER on an internal THD created with create_internal_thd() but never assigns it a thread id. When binary logging is enabled the resumed DDL acquires GTID ownership keyed by thread id, and committing it aborts: rpl_gtid_state.cc:...: Gtid_state::update_gtids_impl_own_gtid(): Assertion `owned_gtids.is_owned_by(thd->owned_gtid, thd->thread_id())' Fix: call thd->set_new_thread_id() on the resume THD before running the operation, so GTID ownership bookkeeping is consistent. 2. Persisted resume progress can run ahead of the on-disk state =============================================================== A resumed (un)encryption continues from the progress counter persisted on page 0 and skips every page below it, so those pages are assumed to already be on disk in the target encryption state. But mark_all_page_dirty_in_tablespace() only dirties the pages in the buffer pool; a page's on-disk encryption state is decided later, at write time, in fil_io_set_encryption(). Page 0 (which holds the progress) and the data pages it accounts for are flushed independently, so page 0 can reach disk with progress=N while the data pages dirtied up to N have not been flushed yet. The redo-logged rewrite of those data pages is content-neutral (it re-stamps the same space id), so redo alone does not establish their on-disk encryption state either: during crash recovery a data page can be applied and flushed by the recovery writer in the old encryption state before the operation marker on page 0 is applied to fil_space_t::encryption_op_in_progress. After such a crash the resume trusts progress=N, skips those still-old pages, and for a decryption decrypt_end() erases the tablespace key, leaving pages that are encrypted on disk but no longer decryptable. The next read of one aborts: fil0fil.cc:...: Assertion `req_type.is_dblwr() || err == DB_SUCCESS' (the failing read returns DB_IO_DECRYPT_FAIL) The symmetric encryption case silently leaves pages unencrypted on disk. Fix: - mark_all_page_dirty_in_tablespace(): flush the just-processed pages with buf_LRU_flush_or_remove_pages() (which fsyncs the file via fil_flush()) before persisting the progress on page 0, so the persisted progress can never run ahead of the on-disk encryption state of the pages it covers. - resume_alter_encrypt_tablespace(): load the tablespace key from the header on resume for both encryption and decryption, not only encryption. Pages above the progress watermark can still be encrypted on disk, so reading them during the resumed pass requires the in-memory key. - load_encryption_from_header(): when page 0 is not flagged as encrypted there is no encryption info to load (an expected state after a crash that interrupted ALTER ... ENCRYPTION before the new info was persisted, or after a decryption finished); return early instead of decoding the default info, which avoids a misleading "found unexpected version" error logged from the post-recovery resume thread. Test ==== mysql_ts_alter_encrypt_resume_crash repeatedly toggles the mysql tablespace encryption and kills the server mid-operation; on the next startup the resume thread finalizes the interrupted operation. Without fix (1) the resume aborts on the GTID ownership assertion above. With (1) but without (2) the resume crashes on the DB_IO_DECRYPT_FAIL assertion (timing dependent; reproduced under --parallel/--repeat). With both fixes the server always recovers and the tablespace stays readable. --- ...mysql_ts_alter_encrypt_resume_crash.result | 32 ++++++++ .../mysql_ts_alter_encrypt_resume_crash.test | 76 +++++++++++++++++++ storage/innobase/fsp/fsp0fsp.cc | 53 +++++++++---- 3 files changed, 148 insertions(+), 13 deletions(-) create mode 100644 mysql-test/suite/component_keyring_file/r/mysql_ts_alter_encrypt_resume_crash.result create mode 100644 mysql-test/suite/component_keyring_file/t/mysql_ts_alter_encrypt_resume_crash.test diff --git a/mysql-test/suite/component_keyring_file/r/mysql_ts_alter_encrypt_resume_crash.result b/mysql-test/suite/component_keyring_file/r/mysql_ts_alter_encrypt_resume_crash.result new file mode 100644 index 000000000000..b554d94235a2 --- /dev/null +++ b/mysql-test/suite/component_keyring_file/r/mysql_ts_alter_encrypt_resume_crash.result @@ -0,0 +1,32 @@ +# ---------------------------------------------------------------------- +# Setup +# Creating local configuration file for keyring component: component_keyring_file +# Creating manifest file for current MySQL server instance +# Re-starting mysql server with manifest file +# ---------------------------------------------------------------------- +CREATE PROCEDURE toggle_mysql_ts_encryption() +BEGIN +WHILE TRUE DO +ALTER TABLESPACE mysql ENCRYPTION='Y'; +ALTER TABLESPACE mysql ENCRYPTION='N'; +END WHILE; +END// +# Toggle mysql tablespace encryption in the background. +CALL toggle_mysql_ts_encryption(); +# Kill the server in the middle of an (un)encryption operation and +# let the next startup resume it. (Inlined kill+restart so the +# machine-specific --plugin-dir restart parameter is not echoed into +# the result; restart keeps the keyring component from setup.) +# Server recovered. The mysql tablespace must still be readable. +SELECT COUNT(*) > 0 AS users_readable FROM mysql.user; +users_readable +1 +DROP PROCEDURE toggle_mysql_ts_encryption; +ALTER TABLESPACE mysql ENCRYPTION='N'; +# ---------------------------------------------------------------------- +# Teardown +# Removing manifest file for current MySQL server instance +# Removing local keyring file for keyring component: component_keyring_file +# Removing local configuration file for keyring component: component_keyring_file +# Restarting server without the manifest file +# ---------------------------------------------------------------------- diff --git a/mysql-test/suite/component_keyring_file/t/mysql_ts_alter_encrypt_resume_crash.test b/mysql-test/suite/component_keyring_file/t/mysql_ts_alter_encrypt_resume_crash.test new file mode 100644 index 000000000000..15525a126141 --- /dev/null +++ b/mysql-test/suite/component_keyring_file/t/mysql_ts_alter_encrypt_resume_crash.test @@ -0,0 +1,76 @@ +################################################################################ +# Crash-recovery resume of an interrupted ALTER TABLESPACE ... ENCRYPTION must +# leave every page in the target encryption state on disk before the operation +# is marked finished (and, for decryption, before the tablespace key is erased). +# +# Repeatedly toggling the encryption of the mysql tablespace and killing the +# server mid-operation exercises the resume path on the next startup. Before +# the fix the resume trusted the progress counter persisted on page 0, which +# could run ahead of the on-disk state of the pages it covers: page 0 (holding +# the progress) and the data pages it accounts for are flushed independently, +# and during crash recovery a data page can be flushed in the old encryption +# state before the page-0 progress marker is applied. The resume then skipped +# such still-old pages and decrypt_end() erased the key, leaving an unreadable +# page on disk and crashing the next read with: +# +# Assertion failure: ... req_type.is_dblwr() || err == DB_SUCCESS +# (the failing read returns DB_IO_DECRYPT_FAIL) +# +# This is a timing-dependent crash; run with --repeat=N to reproduce reliably +# on an unfixed server. With the fix the server always recovers and the mysql +# tablespace remains readable. +################################################################################ + +--source include/have_component_keyring_file.inc +# Kill+restart based crash test - skip where that is not meaningful. +--source include/not_valgrind.inc + +--source ../inc/setup_component.inc + +--disable_query_log +call mtr.add_suppression("Encryption key missing:"); +call mtr.add_suppression("Decrypting a page in doublewrite file failed:"); +call mtr.add_suppression("Encryption information can't be read for tablespace"); +--enable_query_log + +--connect (con1,localhost,root,,) +--connection default + +DELIMITER //; +CREATE PROCEDURE toggle_mysql_ts_encryption() +BEGIN + WHILE TRUE DO + ALTER TABLESPACE mysql ENCRYPTION='Y'; + ALTER TABLESPACE mysql ENCRYPTION='N'; + END WHILE; +END// +DELIMITER ;// + +--echo # Toggle mysql tablespace encryption in the background. +--connection con1 +--send CALL toggle_mysql_ts_encryption() + +--connection default +--sleep 5 + +--echo # Kill the server in the middle of an (un)encryption operation and +--echo # let the next startup resume it. (Inlined kill+restart so the +--echo # machine-specific --plugin-dir restart parameter is not echoed into +--echo # the result; restart keeps the keyring component from setup.) +--let $_server_id= `SELECT @@server_id` +--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect +--exec echo "wait" > $_expect_file_name +--shutdown_server 0 +--source include/wait_until_disconnected.inc +--exec echo "$restart_parameters" > $_expect_file_name +--source include/wait_until_connected_again.inc + +--echo # Server recovered. The mysql tablespace must still be readable. +SELECT COUNT(*) > 0 AS users_readable FROM mysql.user; + +--disconnect con1 +DROP PROCEDURE toggle_mysql_ts_encryption; +# Leave the tablespace unencrypted for teardown. +ALTER TABLESPACE mysql ENCRYPTION='N'; + +--source ../inc/teardown_component.inc diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 78999805c6db..ecf816b30627 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -4176,6 +4176,20 @@ static void mark_all_page_dirty_in_tablespace(THD *thd, space_id_t space_id, } mtr_commit(&mtr); + /* Flush the just-processed pages to disk before advancing the progress + persisted on page 0. buf_LRU_flush_or_remove_pages() drains the space's + flush list and fsyncs the file via fil_flush(), so once it returns the + pages are durably in their target encryption state. Advancing the progress + only after that keeps the persisted progress from ever running ahead of the + on-disk state. Otherwise a crash could leave pages below the progress still + in the old state on disk (a page's on-disk encryption state is decided at + write time in fil_io_set_encryption(), not by the redo-logged rewrite + above); the resumed operation would skip them and decrypt_end() would erase + the key, leaving unreadable pages that abort the next read with + DB_IO_DECRYPT_FAIL. */ + buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr, + false); + mtr_start(&mtr); /* Write (Un)Encryption progress on page 0 */ fsp_header_write_encryption_progress(space_id, space_flags, @@ -4775,18 +4789,26 @@ static bool load_encryption_from_header(fil_space_t *space) { ut_ad(space->id == page_get_space_id(buf_block_get_frame(block))); page_t *header_page = buf_block_get_frame(block); + /* If page 0 does not indicate an encrypted tablespace there is no encryption + info to load. This is an expected state when a crash interrupted an + ALTER ... ENCRYPTION just after writing its DDL log entry but before the new + encryption info was persisted on page 0 (or after a decryption finished). + Skip decoding in that case: attempting it would only decode the still-default + info and log a misleading "found unexpected version" error (the decode is + silenced during recovery, but this runs from the post-recovery resume + thread). */ + if (!FSP_FLAGS_GET_ENCRYPTION(fsp_header_get_flags(header_page))) { + mtr_commit(&mtr); + return false; + } + Encryption_key e_key{encryption_key, encryption_iv}; bool ret = fsp_header_get_encryption_key(space->flags, e_key, header_page); mtr_commit(&mtr); if (!ret) { - /* NOTE : In case when crash happened just after DDL Log entry, - encryption info won't be loaded even now. Which is fine. In that case - flags on page 0 should not indicate tablespace encrypted. */ - ut_ad(!FSP_FLAGS_GET_ENCRYPTION(fsp_header_get_flags(header_page))); - if (FSP_FLAGS_GET_ENCRYPTION(fsp_header_get_flags(header_page))) { - return true; - } + /* Page 0 indicated encryption but the info could not be decoded. */ + return true; } else { dberr_t err [[maybe_unused]] = fil_set_encryption( space->id, Encryption::AES, encryption_key, encryption_iv); @@ -4838,14 +4860,17 @@ static void resume_alter_encrypt_tablespace(THD *thd) { continue; } - /* If encryption was going on, make sure encryption information is - read/loaded from disk. */ - if (it->get_encryption_type() == Encryption::Progress::ENCRYPTION && - !space->can_encrypt()) { + /* + Make sure encryption information needed to read pages is loaded. This is + required for both encryption and decryption resume: pages after the + persisted progress watermark can still be encrypted on disk, and reading + them needs the in-memory key. + */ + if (!space->can_encrypt()) { if (load_encryption_from_header(space)) { - ib::error() << "Encryption information can't be read for tablesapce " + ib::error() << "Encryption information can't be read for tablespace " << space->name - << ". Skipping resume encryption operation for it."; + << ". Skipping resume (un)encryption operation for it."; continue; } } @@ -4942,6 +4967,8 @@ static void resume_alter_encrypt_tablespace(THD *thd) { void fsp_init_resume_alter_encrypt_tablespace() { THD *thd = create_internal_thd(); + thd->set_new_thread_id(); + resume_alter_encrypt_tablespace(thd); destroy_internal_thd(thd);