From 37668a76562f4988b4b747fa6a5ee0bc75ec09c6 Mon Sep 17 00:00:00 2001 From: Rahul Malik Date: Thu, 16 Jun 2022 14:57:04 +0530 Subject: [PATCH] Bug#107462 Failed upgrade attempt to 8.0.29 corrupts the data dictionary https://bugs.mysql.com/bug.php?id=107462 Problem: Failed upgrade attempt to 8.0.29 corrupts the data dictionary Analysis: 8.0.29 has changed the redo log format and shuffle the redo type code, so lower version can't parse redo log file in case of failed upgrade Fix: Recreate the empty redo log file during failed upgrade so lower version does not need to parse redo file --- share/messages_to_error_log.txt | 3 + sql/dd/impl/upgrade/server.cc | 1 + sql/mysqld.cc | 5 ++ sql/mysqld.h | 1 + storage/innobase/include/log0log.h | 7 +- storage/innobase/log/log0chkp.cc | 7 +- storage/innobase/srv/srv0start.cc | 134 ++++++++++++++++++++--------- 7 files changed, 112 insertions(+), 46 deletions(-) diff --git a/share/messages_to_error_log.txt b/share/messages_to_error_log.txt index 6b015ba96325..dedd8de3fb73 100644 --- a/share/messages_to_error_log.txt +++ b/share/messages_to_error_log.txt @@ -11641,6 +11641,9 @@ ER_THREAD_POOL_DEDICATED_LISTENERS_INVALID ER_GRP_RPL_FAILED_TO_LOG_VIEW_CHANGE eng "This member was unable to log the View_change_log_event into the binary log, hence it will leave the group. Please check that there is available disk space and add the member back to the group." +ER_IB_MSG_DOWNGRADING_LOG_FILE + eng "Upgrade failed, recreating redo logs with previous version" + # DO NOT add server-to-client messages here; # they go in messages_to_clients.txt # in the same directory as this file. diff --git a/sql/dd/impl/upgrade/server.cc b/sql/dd/impl/upgrade/server.cc index 05cc418316f8..b39993664890 100644 --- a/sql/dd/impl/upgrade/server.cc +++ b/sql/dd/impl/upgrade/server.cc @@ -583,6 +583,7 @@ static bool check_tables(THD *thd, std::unique_ptr &schema, } } } + DBUG_EXECUTE_IF("upgrade_failed_during_init", (*error_count)++;); return error_count->has_too_many_errors(); }; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 36c93e96d74d..c37785e5401f 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1207,6 +1207,7 @@ bool opt_no_monitor = false; bool opt_no_dd_upgrade = false; long opt_upgrade_mode = UPGRADE_AUTO; bool opt_initialize = false; +bool dd_init_failed_during_upgrade = false; bool opt_skip_replica_start = false; ///< If set, slave is not autostarted bool opt_enable_named_pipe = false; bool opt_local_infile, opt_replica_compressed_protocol; @@ -6328,6 +6329,10 @@ static int init_server_components() { dd::init(dd::enum_dd_init_type::DD_RESTART_OR_UPGRADE)) { LogErr(ERROR_LEVEL, ER_DD_INIT_FAILED); + if (!dd::upgrade::no_server_upgrade_required()) { + dd_init_failed_during_upgrade = true; + } + /* If clone recovery fails, we rollback the files to previous dataset and attempt to restart server. */ int exit_code = diff --git a/sql/mysqld.h b/sql/mysqld.h index 20eaab6a2945..913b56f13f8a 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -181,6 +181,7 @@ extern MYSQL_PLUGIN_IMPORT std::atomic extern bool opt_no_dd_upgrade; extern long opt_upgrade_mode; extern bool opt_initialize; +extern bool dd_init_failed_during_upgrade; extern bool opt_safe_user_create; extern bool opt_local_infile, opt_myisam_use_mmap; extern bool opt_replica_compressed_protocol; diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index d041f16344ac..2e26dac3ed63 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -931,10 +931,9 @@ void log_files_header_fill(byte *buf, lsn_t start_lsn, const char *creator, void log_files_header_flush(log_t &log, uint32_t nth_file, lsn_t start_lsn); /** Changes format of redo files to previous format version. - -@note Note this will work between the two formats 5_7_9 & current because -the only change is the version number */ -void log_files_downgrade(log_t &log); +@param[in] log redo log +@param[in] log_format previous format version */ +void log_files_downgrade(log_t &log, uint32_t log_format); /** Writes the next checkpoint info to header of the first log file. Note that two pages of the header are used alternately for consecutive diff --git a/storage/innobase/log/log0chkp.cc b/storage/innobase/log/log0chkp.cc index ad2cc4d3db91..05130378af92 100644 --- a/storage/innobase/log/log0chkp.cc +++ b/storage/innobase/log/log0chkp.cc @@ -59,6 +59,7 @@ the file COPYING.Google. #include "log0log.h" #include "log0recv.h" #include "mem0mem.h" +#include "mysqld.h" #include "srv0mon.h" #include "srv0srv.h" #include "srv0start.h" @@ -447,7 +448,7 @@ void meb_log_print_file_hdr(byte *block) { #ifndef UNIV_HOTBACKUP -void log_files_downgrade(log_t &log) { +void log_files_downgrade(log_t &log, uint32_t log_format) { ut_ad(srv_shutdown_state.load() >= SRV_SHUTDOWN_LAST_PHASE); ut_a(!log_checkpointer_is_active()); @@ -461,7 +462,7 @@ void log_files_downgrade(log_t &log) { static_cast(dest_offset / univ_page_size.physical()); /* Write old version */ - mach_write_to_4(buf + LOG_HEADER_FORMAT, LOG_HEADER_FORMAT_5_7_9); + mach_write_to_4(buf + LOG_HEADER_FORMAT, log_format); log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf)); @@ -654,7 +655,7 @@ void log_create_first_checkpoint(log_t &log, lsn_t lsn) { page_no_t block_page_no; uint64_t block_offset; - ut_a(srv_is_being_started); + ut_a(srv_is_being_started || dd_init_failed_during_upgrade); ut_a(!srv_read_only_mode); ut_a(!recv_recovery_is_on()); ut_a(buf_are_flush_lists_empty_validate()); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index fead6001b0ab..6c5a6a0a93e4 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -145,6 +145,10 @@ bool srv_startup_is_before_trx_rollback_phase = false; /** true if srv_start() has been called */ static bool srv_start_has_been_called = false; +/** Redo log format before upgrade, used to create redo log files of the same +version on failed upgrades*/ +static uint32_t log_format_before_upgrade; + /** Bit flags for tracking background thread creation. They are used to determine which threads need to be stopped if we need to abort during the initialisation step. */ @@ -522,11 +526,72 @@ static void create_log_files_rename( strcpy(logfile0, logfilename); fil_open_log_and_system_tablespace_files(); + fil_space_t::s_redo_space = fil_space_get(dict_sys_t::s_log_space_first_id); /* For cloned database it is normal to resize redo logs. */ ib::info(ER_IB_MSG_1068, ulonglong{lsn}); } +/* create the log file name +@param[in/out] logfilename buffer for log file name +retrun the dir name lenght */ +static size_t create_log_file_name(char *logfilename, size_t logfilename_size) { + size_t dirnamelen; + dirnamelen = strlen(srv_log_group_home_dir); + ut_a(dirnamelen < logfilename_size - 10 - sizeof "ib_logfile"); + memcpy(logfilename, srv_log_group_home_dir, dirnamelen); + + /* Add a path separator if needed. */ + if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) { + logfilename[dirnamelen++] = OS_PATH_SEPARATOR; + } + return dirnamelen; +} + +/* recreate the redo logs +@param[in,out] logfilename buffer for log file name +@param[in] dirnamelen length of the directory path +@param[out] logfile0 name of the first log file +@param[out] checkpoint_lsn lsn of the first created checkpoint +@param[in] flushed_lsn current flushed lsn +@param[in] num_of_files number of redo log files to be created +@return DB_SUCCESS or error code */ +static dberr_t recreate_redo_logs(char *logfilename, size_t dirnamelen, + char *&logfile0, lsn_t new_checkpoint_lsn, + lsn_t flushed_lsn, + uint32_t number_of_log_files) { + ut_ad(fil_space_t::s_redo_space != nullptr); + /* Close and free the redo log files, so that + we can replace them. */ + fil_close_log_files(true); + + RECOVERY_CRASH(5); + + log_sys_close(); + + /* Finish clone file recovery before creating new log files. We + roll forward to remove any intermediate files here. */ + clone_files_recovery(true); + + auto err = + create_log_files(logfilename, dirnamelen, flushed_lsn, + number_of_log_files, logfile0, new_checkpoint_lsn); + + if (err != DB_SUCCESS) { + return err; + } + + create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn, + logfile0); + ut_d(log_sys->disable_redo_writes = false); + + flushed_lsn = new_checkpoint_lsn; + + log_start(*log_sys, 0, flushed_lsn, flushed_lsn); + + return DB_SUCCESS; +} + /** Opens a log file. @return DB_SUCCESS or error code */ [[nodiscard]] static dberr_t open_log_file( @@ -2242,14 +2307,7 @@ dberr_t srv_start(bool create_new_db) { return (srv_init_abort(err)); } - dirnamelen = strlen(srv_log_group_home_dir); - ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile"); - memcpy(logfilename, srv_log_group_home_dir, dirnamelen); - - /* Add a path separator if needed. */ - if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) { - logfilename[dirnamelen++] = OS_PATH_SEPARATOR; - } + dirnamelen = create_log_file_name(logfilename, (sizeof logfilename)); srv_log_file_size_requested = srv_log_file_size; @@ -2635,6 +2693,8 @@ dberr_t srv_start(bool create_new_db) { log_buffer_flush_to_disk(*log_sys); } + log_format_before_upgrade = log_sys->format; + log_sys->m_allow_checkpoints.store(true, std::memory_order_release); if (!srv_force_recovery && !recv_sys->found_corrupt_log && @@ -2676,38 +2736,15 @@ dberr_t srv_start(bool create_new_db) { RECOVERY_CRASH(4); - /* Close and free the redo log files, so that - we can replace them. */ - fil_close_log_files(true); - - RECOVERY_CRASH(5); - - log_sys_close(); - - /* Finish clone file recovery before creating new log files. We - roll forward to remove any intermediate files here. */ - clone_files_recovery(true); - ib::info(ER_IB_MSG_1143); srv_log_file_size = srv_log_file_size_requested; - err = - create_log_files(logfilename, dirnamelen, flushed_lsn, - srv_n_log_files_found, logfile0, new_checkpoint_lsn); - - if (err != DB_SUCCESS) { - return (srv_init_abort(err)); - } - - create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn, - logfile0); - - ut_d(log_sys->disable_redo_writes = false); + err = recreate_redo_logs(logfilename, dirnamelen, logfile0, + new_checkpoint_lsn, flushed_lsn, + srv_n_log_files_found); - flushed_lsn = new_checkpoint_lsn; - - log_start(*log_sys, 0, flushed_lsn, flushed_lsn); + if (err != DB_SUCCESS) return (srv_init_abort(DB_ERROR)); log_start_background_threads(*log_sys); @@ -3480,7 +3517,7 @@ static lsn_t srv_shutdown_log() { log_background_threads_inactive_validate(); buf_must_be_all_freed(); - const lsn_t lsn = log_get_lsn(*log_sys); + lsn_t lsn = log_get_lsn(*log_sys); if (!srv_read_only_mode) { fil_flush_file_spaces(to_int(FIL_TYPE_TABLESPACE) | to_int(FIL_TYPE_LOG)); @@ -3488,10 +3525,30 @@ static lsn_t srv_shutdown_log() { srv_shutdown_set_state(SRV_SHUTDOWN_LAST_PHASE); - if (srv_downgrade_logs) { + /* If the upgrade fails, new redo log format is not compatible with older + versions, recreate them */ + if (dd_init_failed_during_upgrade) { + char logfilename[10000]; + char *logfile0 = nullptr; + size_t dirnamelen = create_log_file_name(logfilename, (sizeof logfilename)); + lsn_t new_checkpoint_lsn = 0; + + auto flushed_lsn = log_get_lsn(*log_sys); + + ib::info(ER_IB_MSG_DOWNGRADING_LOG_FILE); + + auto err = + recreate_redo_logs(logfilename, dirnamelen, logfile0, + new_checkpoint_lsn, flushed_lsn, srv_n_log_files); + + ut_ad(err == DB_SUCCESS); + lsn = log_get_lsn(*log_sys); + } + + if (srv_downgrade_logs || dd_init_failed_during_upgrade) { ut_a(!srv_read_only_mode); - log_files_downgrade(*log_sys); + log_files_downgrade(*log_sys, log_format_before_upgrade); fil_flush_file_redo(); } @@ -3501,7 +3558,6 @@ static lsn_t srv_shutdown_log() { ut_a(lsn == log_sys->last_checkpoint_lsn.load() || srv_force_recovery >= SRV_FORCE_NO_LOG_REDO); - ut_a(lsn == log_get_lsn(*log_sys)); if (!srv_read_only_mode) {