diff --git a/mysql-8.0.32/sql/dd/dictionary.h b/mysql-8.0.32/sql/dd/dictionary.h index f4552a903..b3498cc34 100644 --- a/mysql-8.0.32/sql/dd/dictionary.h +++ b/mysql-8.0.32/sql/dd/dictionary.h @@ -190,7 +190,7 @@ class Dictionary { [[nodiscard]] bool acquire_shared_table_mdl(THD *thd, const char *schema_name, const char *table_name, bool no_wait, - MDL_ticket **out_mdl_ticket); + MDL_ticket **out_mdl_ticket, bool transactional = false); /** Predicate to check if we have a shared meta data lock on the @@ -358,6 +358,9 @@ void release_mdl(THD *thd, MDL_ticket *mdl_ticket); /** Get Dictionary_client from THD object (the latter is opaque * in SEs). */ cache::Dictionary_client *get_dd_client(THD *thd); +/** Release mdl which scope is MDL_TRANSACTION */ +void release_transactional_mdl(THD *thd); + /** Create plugin native table. The API would only write metadata to DD and skip calling handler::create(). diff --git a/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc b/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc index 733a0c0e8..5114049d8 100644 --- a/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc +++ b/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc @@ -509,10 +509,11 @@ static bool acquire_exclusive_mdl(THD *thd, bool acquire_shared_table_mdl(THD *thd, const char *schema_name, const char *table_name, bool no_wait, - MDL_ticket **out_mdl_ticket) { + MDL_ticket **out_mdl_ticket, bool transactional) { return acquire_shared_mdl(thd, MDL_key::TABLE, schema_name, table_name, no_wait, thd->variables.lock_wait_timeout, - MDL_EXPLICIT, out_mdl_ticket); + transactional ? MDL_TRANSACTION : MDL_EXPLICIT, + out_mdl_ticket); } bool has_shared_table_mdl(THD *thd, const char *schema_name, @@ -588,6 +589,10 @@ void release_mdl(THD *thd, MDL_ticket *mdl_ticket) { thd->mdl_context.release_lock(mdl_ticket); } +void release_transactional_mdl(THD *thd) { + thd->mdl_context.release_transactional_locks(); +} + /* purecov: begin deadcode */ cache::Dictionary_client *get_dd_client(THD *thd) { return thd->dd_client(); } /* purecov: end */ @@ -708,6 +713,11 @@ bool reset_tables_and_tablespaces() { // Release transactional metadata locks. thd.thd->mdl_context.release_transactional_locks(); + /* Tell innodb to start rollback by background thread + The reason we hold on the thread is to avoid deadlock, because + both this function and background thread may acquire mdl lock.*/ + ddse->start_rollback(); + return ret; } diff --git a/mysql-8.0.32/sql/handler.h b/mysql-8.0.32/sql/handler.h index 6b24daab5..ff511fed4 100644 --- a/mysql-8.0.32/sql/handler.h +++ b/mysql-8.0.32/sql/handler.h @@ -1924,6 +1924,8 @@ typedef void (*dict_cache_reset_t)(const char *schema_name, typedef void (*dict_cache_reset_tables_and_tablespaces_t)(); +typedef void (*start_rollback_t)(); + /** Mode for data dictionary recovery. */ enum dict_recovery_mode_t { DICT_RECOVERY_INITIALIZE_SERVER, ///< First start of a new server @@ -2665,6 +2667,7 @@ struct handlerton { dict_cache_reset_t dict_cache_reset; dict_cache_reset_tables_and_tablespaces_t dict_cache_reset_tables_and_tablespaces; + start_rollback_t start_rollback; dict_recover_t dict_recover; dict_get_server_version_t dict_get_server_version; dict_set_server_version_t dict_set_server_version; diff --git a/mysql-8.0.32/storage/innobase/dict/dict0dd.cc b/mysql-8.0.32/storage/innobase/dict/dict0dd.cc index 3010f5889..3e5cad5be 100644 --- a/mysql-8.0.32/storage/innobase/dict/dict0dd.cc +++ b/mysql-8.0.32/storage/innobase/dict/dict0dd.cc @@ -303,6 +303,10 @@ void dd_mdl_release(THD *thd, MDL_ticket **mdl) { *mdl = nullptr; } +void dd_mdl_release_transactional(THD *thd) { + dd::release_transactional_mdl(thd); +} + THD *dd_thd_for_undo(const trx_t *trx) { return trx->mysql_thd == nullptr ? current_thd : trx->mysql_thd; } diff --git a/mysql-8.0.32/storage/innobase/dict/dict0dict.cc b/mysql-8.0.32/storage/innobase/dict/dict0dict.cc index 0d26f64ce..0b0c44e0f 100644 --- a/mysql-8.0.32/storage/innobase/dict/dict0dict.cc +++ b/mysql-8.0.32/storage/innobase/dict/dict0dict.cc @@ -1884,6 +1884,7 @@ static void dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); ut_a(table->get_ref_count() == 0); ut_a(table->n_rec_locks.load() == 0); + ut_ad(table->n_table_locks.load() == 0); ut_ad(dict_sys_mutex_own()); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); diff --git a/mysql-8.0.32/storage/innobase/dict/mem.cc b/mysql-8.0.32/storage/innobase/dict/mem.cc index 3a95df0bb..64f829de9 100644 --- a/mysql-8.0.32/storage/innobase/dict/mem.cc +++ b/mysql-8.0.32/storage/innobase/dict/mem.cc @@ -244,6 +244,7 @@ dict_table_t *dict_mem_table_create(const char *name, space_id_t space, table->autoinc_field_no = ULINT_UNDEFINED; table->sess_row_id = 0; table->sess_trx_id = 0; + table->n_table_locks = 0; /* If the table has an FTS index or we are in the process of building one, create the table->fts */ diff --git a/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc b/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc index 4aeacf071..da057859a 100644 --- a/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc +++ b/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc @@ -741,6 +741,7 @@ static PSI_mutex_info all_innodb_mutexes[] = { PSI_MUTEX_KEY(temp_space_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(undo_space_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(trx_sys_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), + PSI_MUTEX_KEY(trx_sys_resurrect_mutex, 0, 0, PSI_DOCUMENT_ME), #ifdef UNIV_DEBUG PSI_MUTEX_KEY(rw_lock_debug_mutex, 0, 0, PSI_DOCUMENT_ME), #endif /* UNIV_DEBUG */ @@ -2809,6 +2810,14 @@ trx_t *innobase_trx_allocate(THD *thd) /*!< in: user thread handle */ return trx; } +trx_t* current_trx() { + THD *thd = current_thd; + if (likely(thd != nullptr) && innodb_hton_ptr->slot != HA_SLOT_UNDEF) { + return thd_to_trx(thd); + } + return nullptr; +} + /** Gets the InnoDB transaction handle for a MySQL handler object, creates an InnoDB transaction struct if the corresponding MySQL thread struct still lacks one. @@ -4026,6 +4035,17 @@ static void innobase_dict_cache_reset_tables_and_tablespaces() { dict_sys_mutex_exit(); } +static void innobase_start_rollback() { + ut_a(!trx_sys->start_rollback); + trx_sys->start_rollback = true; + + /* Wait until resurrect mdl lock is done by rollback + thread. */ + while (!trx_sys->resurrect_lock_done) { + std::this_thread::sleep_for(std::chrono::microseconds(1000)); + } +} + /** Perform high-level recovery in InnoDB as part of initializing the data dictionary. @param[in] dict_recovery_mode How to do recovery @@ -5172,6 +5192,7 @@ static int innodb_init(void *p) { innobase_hton->dict_cache_reset = innobase_dict_cache_reset; innobase_hton->dict_cache_reset_tables_and_tablespaces = innobase_dict_cache_reset_tables_and_tablespaces; + innobase_hton->start_rollback = innobase_start_rollback; innobase_hton->dict_recover = innobase_dict_recover; innobase_hton->dict_get_server_version = innobase_dict_get_server_version; @@ -20083,6 +20104,7 @@ static xa_status_code innobase_commit_by_xid( /* use cases are: disconnected xa, slave xa, recovery */ trx_deregister_from_2pc(trx); ut_ad(!trx->will_lock); /* trx cache requirement */ + trx_resurrect_erase(trx); trx_free_for_background(trx); return (XA_OK); @@ -20110,6 +20132,7 @@ static xa_status_code innobase_rollback_by_xid( trx_deregister_from_2pc(trx); ut_ad(!trx->will_lock); + trx_resurrect_erase(trx); trx_free_for_background(trx); return (ret != 0 ? XAER_RMERR : XA_OK); diff --git a/mysql-8.0.32/storage/innobase/include/dict0dd.h b/mysql-8.0.32/storage/innobase/include/dict0dd.h index aff64a062..b0e03cc40 100644 --- a/mysql-8.0.32/storage/innobase/include/dict0dd.h +++ b/mysql-8.0.32/storage/innobase/include/dict0dd.h @@ -887,13 +887,15 @@ MY_COMPILER_DIAGNOSTIC_POP() @retval true if failed (my_error() will have been called) */ [[nodiscard]] static inline bool dd_mdl_acquire(THD *thd, MDL_ticket **mdl, const char *db, - const char *table); + const char *table, bool is_transactional = false); /** Release a metadata lock. @param[in,out] thd current thread @param[in,out] mdl metadata lock */ void dd_mdl_release(THD *thd, MDL_ticket **mdl); +void dd_mdl_release_transactional(THD *thd); + /** Returns thd associated with the trx or current_thd @param[in] trx transaction @return trx->mysql_thd or current_thd */ diff --git a/mysql-8.0.32/storage/innobase/include/dict0dd.ic b/mysql-8.0.32/storage/innobase/include/dict0dd.ic index 5ac19d343..67d692631 100644 --- a/mysql-8.0.32/storage/innobase/include/dict0dd.ic +++ b/mysql-8.0.32/storage/innobase/include/dict0dd.ic @@ -291,7 +291,7 @@ inline const char *get_row_format_name(enum row_type row_format) { @retval true if failed (my_error() will have been called) */ [[nodiscard]] static inline bool dd_mdl_acquire(THD *thd, MDL_ticket **mdl, const char *db, - const char *table) { + const char *table, bool is_transactional) { bool ret = false; char table_name[MAX_TABLE_NAME_LEN + 1]; const char *table_p = table; @@ -329,7 +329,7 @@ inline const char *get_row_format_name(enum row_type row_format) { #endif /* !_WIN32 */ } - ret = dd::acquire_shared_table_mdl(thd, db_p, table_p, false, mdl); + ret = dd::acquire_shared_table_mdl(thd, db_p, table_p, false, mdl, is_transactional); return (ret); } diff --git a/mysql-8.0.32/storage/innobase/include/dict0mem.h b/mysql-8.0.32/storage/innobase/include/dict0mem.h index 8c7674e5a..1ec122e76 100644 --- a/mysql-8.0.32/storage/innobase/include/dict0mem.h +++ b/mysql-8.0.32/storage/innobase/include/dict0mem.h @@ -2379,6 +2379,10 @@ detect this and will eventually quit sooner. */ - Some places assert this field is positive holding only shared latch. */ std::atomic n_rec_locks; + /** table lock counter holded by transaction. the table can't be + evicted from cache if not equal to zero. */ + std::atomic n_table_locks; + #ifndef UNIV_DEBUG private: #endif diff --git a/mysql-8.0.32/storage/innobase/include/lock0lock.h b/mysql-8.0.32/storage/innobase/include/lock0lock.h index 2661a4ae9..eae45e404 100644 --- a/mysql-8.0.32/storage/innobase/include/lock0lock.h +++ b/mysql-8.0.32/storage/innobase/include/lock0lock.h @@ -675,6 +675,7 @@ void lock_make_trx_hit_list(trx_t *trx, hit_list_t &hit_list); also removed in addition to other table-level and record-level locks. No lock, that is going to be removed, is allowed to be a wait lock. */ void lock_remove_all_on_table( + trx_t *trx, dict_table_t *table, /*!< in: table to be dropped or discarded */ bool remove_also_table_sx_locks); /*!< in: also removes diff --git a/mysql-8.0.32/storage/innobase/include/lock0priv.h b/mysql-8.0.32/storage/innobase/include/lock0priv.h index 95dbf9bdb..dd18ef2d5 100644 --- a/mysql-8.0.32/storage/innobase/include/lock0priv.h +++ b/mysql-8.0.32/storage/innobase/include/lock0priv.h @@ -1015,6 +1015,8 @@ parallel modifications turns out wrong. @return lock or NULL */ static inline bool lock_table_has(const trx_t *trx, const dict_table_t *table, enum lock_mode mode); +static inline bool lock_table_check(const trx_t *trx, const dict_table_t *table, + enum lock_mode mode); /** Handles writing the information about found deadlock to the log files and caches it for future lock_latest_err_file() calls (for example used by diff --git a/mysql-8.0.32/storage/innobase/include/lock0priv.ic b/mysql-8.0.32/storage/innobase/include/lock0priv.ic index 8e9508fdb..ed80a63f6 100644 --- a/mysql-8.0.32/storage/innobase/include/lock0priv.ic +++ b/mysql-8.0.32/storage/innobase/include/lock0priv.ic @@ -260,12 +260,63 @@ static inline void lock_reset_lock_and_trx_wait( lock->type_mode &= ~LOCK_WAIT; } + +static inline bool lock_table_check(const trx_t *trx, const dict_table_t *table, + lock_mode in_mode) { + ut_ad(!trx_mutex_own(trx)); + trx_mutex_enter(trx); + ut_ad(trx_can_be_handled_by_current_thread(trx)); + + if (in_mode != LOCK_AUTO_INC) { +#ifdef UNIV_DEBUG + if (trx->locked_tables.find(const_cast(table)) != trx->locked_tables.end()) { + trx_mutex_exit(trx); + return true; + } +#endif + } + + /* Look for stronger locks the same trx already has on the table */ + for (const lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); + lock != nullptr && lock_get_type(lock) == LOCK_TABLE; + lock = UT_LIST_GET_NEXT(trx_locks, lock)) { + lock_mode mode = lock_get_mode(lock); + + ut_ad(trx == lock->trx); + ut_ad(lock->tab_lock.table != nullptr); + + if (table == lock->tab_lock.table && + lock_mode_stronger_or_eq(mode, in_mode)) { + // we know it can't be waiting because we are the thread *running* the trx + ut_ad(!lock_get_wait(lock)); + + trx_mutex_exit(trx); + return (true); + } + } + + trx_mutex_exit(trx); + return (false); +} + + static inline bool lock_table_has(const trx_t *trx, const dict_table_t *table, lock_mode in_mode) { ut_ad(!trx_mutex_own(trx)); trx_mutex_enter(trx); ut_ad(trx_can_be_handled_by_current_thread(trx)); + if (in_mode != LOCK_AUTO_INC) { +#ifdef UNIV_DEBUG + if (trx->locked_tables.find(const_cast(table)) == trx->locked_tables.end()) { + trx_mutex_exit(trx); + return false; + } +#endif + trx_mutex_exit(trx); + return true; + } + /* Look for stronger locks the same trx already has on the table */ for (const lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); lock != nullptr && lock_get_type(lock) == LOCK_TABLE; diff --git a/mysql-8.0.32/storage/innobase/include/sync0sync.h b/mysql-8.0.32/storage/innobase/include/sync0sync.h index f7bedc316..3aa09abac 100644 --- a/mysql-8.0.32/storage/innobase/include/sync0sync.h +++ b/mysql-8.0.32/storage/innobase/include/sync0sync.h @@ -161,6 +161,7 @@ extern mysql_pfs_key_t lock_sys_page_mutex_key; extern mysql_pfs_key_t lock_sys_table_mutex_key; extern mysql_pfs_key_t lock_wait_mutex_key; extern mysql_pfs_key_t trx_sys_mutex_key; +extern mysql_pfs_key_t trx_sys_resurrect_mutex_key; extern mysql_pfs_key_t trx_sys_shard_mutex_key; extern mysql_pfs_key_t trx_sys_serialisation_mutex_key; extern mysql_pfs_key_t srv_sys_mutex_key; diff --git a/mysql-8.0.32/storage/innobase/include/sync0types.h b/mysql-8.0.32/storage/innobase/include/sync0types.h index eadacaaac..b5ed73ba7 100644 --- a/mysql-8.0.32/storage/innobase/include/sync0types.h +++ b/mysql-8.0.32/storage/innobase/include/sync0types.h @@ -416,6 +416,7 @@ enum latch_id_t { LATCH_ID_TEMP_POOL_MANAGER, LATCH_ID_TRX, LATCH_ID_TRX_SYS, + LATCH_ID_TRX_SYS_RESURRECT, LATCH_ID_TRX_SYS_SHARD, LATCH_ID_TRX_SYS_SERIALISATION, LATCH_ID_SRV_SYS, diff --git a/mysql-8.0.32/storage/innobase/include/trx0sys.h b/mysql-8.0.32/storage/innobase/include/trx0sys.h index 33b287a9f..6f82ca105 100644 --- a/mysql-8.0.32/storage/innobase/include/trx0sys.h +++ b/mysql-8.0.32/storage/innobase/include/trx0sys.h @@ -503,6 +503,9 @@ struct trx_sys_t { /** Mutex to protect serialisation_list. */ TrxSysMutex serialisation_mutex; + /** Mutex to protect resurrected_trx_tables */ + TrxSysMutex resurrect_mutex; + /** Tracks minimal transaction id which has received trx->no, but has not yet finished commit for the mtr writing the trx commit. Protected by the serialisation_mutex. Ordered on the trx->no field. */ @@ -561,6 +564,13 @@ struct trx_sys_t { /** True if XA PREPARED trxs are found. */ bool found_prepared_trx; + /** Tell background thread to start rollback. */ + volatile bool start_rollback; + + /** True if the background thread already resurrect + all mdl locks for tables waiting for rollback. */ + volatile bool resurrect_lock_done; + /** @} */ char pad_after[ut::INNODB_CACHE_LINE_SIZE]; @@ -621,6 +631,8 @@ static inline void trx_sys_serialisation_mutex_exit() { trx_sys->serialisation_mutex.exit(); } +trx_t* current_trx(); + #endif /* !UNIV_HOTBACKUP */ #include "trx0sys.ic" diff --git a/mysql-8.0.32/storage/innobase/include/trx0trx.h b/mysql-8.0.32/storage/innobase/include/trx0trx.h index 8190ea499..bb11d078b 100644 --- a/mysql-8.0.32/storage/innobase/include/trx0trx.h +++ b/mysql-8.0.32/storage/innobase/include/trx0trx.h @@ -97,7 +97,11 @@ trx_t *trx_allocate_for_background(void); /** Resurrect table locks for resurrected transactions. @param[in] all false: resurrect locks for dictionary transactions, true : resurrect locks for all transactions. */ -void trx_resurrect_locks(bool all); +void trx_resurrect_locks(); + +void trx_resurrect_modified_tables(bool all); + +void trx_resurrect_erase(trx_t *trx); /** Clear all resurrected table IDs. Needs to be called after all tables locks are resurrected. */ @@ -937,6 +941,9 @@ struct trx_t { THD *mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ + THD *recover_mysql_thd; /** Special handler for holding + resurrected mdl locks. */ + const char *mysql_log_file_name; /*!< if MySQL binlog is used, this field contains a pointer to the latest file @@ -1083,6 +1090,9 @@ struct trx_t { transaction branch */ trx_mod_tables_t mod_tables; /*!< List of tables that were modified by this transaction */ + + trx_mod_tables_t locked_tables; /*!< track tables which requires table lock */ + #endif /* !UNIV_HOTBACKUP */ /*------------------------------*/ bool api_trx; /*!< trx started by InnoDB API */ diff --git a/mysql-8.0.32/storage/innobase/lock/lock0lock.cc b/mysql-8.0.32/storage/innobase/lock/lock0lock.cc index d002914b8..b0ffd9e8c 100644 --- a/mysql-8.0.32/storage/innobase/lock/lock0lock.cc +++ b/mysql-8.0.32/storage/innobase/lock/lock0lock.cc @@ -59,6 +59,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include "usr0sess.h" #include "ut0new.h" #include "ut0vec.h" +#include "dict0dd.h" #include "my_dbug.h" #include "my_psi_config.h" @@ -3746,10 +3747,9 @@ dberr_t lock_table(ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set, In practice this call to lock_table_has was never protected in any way before, so the situation now, after protecting it with trx->mutex, can't be worse. */ - if (lock_table_has(trx, table, mode)) { + if (lock_table_check(trx, table, mode)) { /* In Debug mode we assert the same condition again, to help catch cases of race condition, if it is possible at all, for further analysis. */ - ut_ad(lock_table_has(trx, table, mode)); return (DB_SUCCESS); } @@ -3763,6 +3763,28 @@ dberr_t lock_table(ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set, trx_set_rw_mode(trx); } + if (mode != LOCK_AUTO_INC) { +#ifdef UNIV_DEBUG + ut_ad(!dict_table_is_sdi(table->id) || + mode == LOCK_IS || mode == LOCK_IX); + + ut_ad(!table->is_fts_aux() || + mode == LOCK_IS || mode == LOCK_IX); + + /* As only currrent transaction operates on the set, we + can check firstly without holding trx_t::mutex */ + if (trx->locked_tables.find(table) == trx->locked_tables.end()) { + trx_mutex_enter(trx); + trx->locked_tables.insert(table); + table->n_table_locks++; + trx_mutex_exit(trx); + } +#endif + + return (DB_SUCCESS); + } + + locksys::Shard_latch_guard table_latch_guard{UT_LOCATION_HERE, *table}; /* We have to check if the new lock is compatible with any locks @@ -4378,6 +4400,16 @@ static void lock_remove_all_on_table_for_trx( } } +#ifdef UNIV_DEBUG + if (trx->locked_tables.find(table) != trx->locked_tables.end()) { + trx->locked_tables.erase(table); + ut_a(table->n_table_locks.load() > 0); + table->n_table_locks--; + } +#endif + + ut_a(trx->recover_mysql_thd == nullptr); + trx_mutex_exit(trx); } @@ -4434,6 +4466,20 @@ static ulint lock_remove_recovered_trx_record_locks( } } +#ifdef UNIV_DEBUG + if (trx->locked_tables.find(table) != trx->locked_tables.end()) { + trx->locked_tables.erase(table); + ut_a(table->n_table_locks.load() > 0); + table->n_table_locks--; + } +#endif + + if (trx->recover_mysql_thd != nullptr) { + dd_mdl_release_transactional(trx->recover_mysql_thd); + destroy_thd(trx->recover_mysql_thd); + trx->recover_mysql_thd = nullptr; + } + trx_mutex_exit(trx); ++n_recovered_trx; } @@ -4448,6 +4494,7 @@ static ulint lock_remove_recovered_trx_record_locks( also removed in addition to other table-level and record-level locks. No lock, that is going to be removed, is allowed to be a wait lock. */ void lock_remove_all_on_table( + trx_t *trx, dict_table_t *table, /*!< in: table to be dropped or discarded */ bool remove_also_table_sx_locks) /*!< in: also removes @@ -4469,6 +4516,15 @@ void lock_remove_all_on_table( remove_also_table_sx_locks); } + lock_remove_all_on_table_for_trx(table, trx, + remove_also_table_sx_locks); + + trx_t* curr_trx = current_trx(); + if (trx != curr_trx) { + lock_remove_all_on_table_for_trx(table, curr_trx, + remove_also_table_sx_locks); + } + /* Note: Recovered transactions don't have table level IX or IS locks but can have implicit record locks that have been converted to explicit record locks. Such record locks cannot be freed by traversing the @@ -6192,6 +6248,18 @@ void lock_trx_release_locks(trx_t *trx) /*!< in/out: transaction */ ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); ut_a(ib_vector_is_empty(trx->lock.autoinc_locks)); +#ifdef UNIV_DEBUG + /* Decrease table lock counter */ + trx_mod_tables_t::const_iterator end = trx->locked_tables.end(); + for (trx_mod_tables_t::const_iterator it = trx->locked_tables.begin(); + it != end; it++) { + ut_a((*it)->n_table_locks.load() > 0); + (*it)->n_table_locks--; + } + + trx->locked_tables.clear(); +#endif + mem_heap_empty(trx->lock.lock_heap); trx_mutex_exit(trx); } @@ -6272,7 +6340,7 @@ bool lock_table_has_locks(const dict_table_t *table) { /* As soon as we return false the caller might free the table object, so it is crucial that when lock_table_dequeue() removes the last lock on the table then the thread calling it won't dereference the table pointer anymore. */ - has_locks = UT_LIST_GET_LEN(table->locks) > 0; + has_locks = (UT_LIST_GET_LEN(table->locks) > 0 || table->n_table_locks.load() > 0); } #ifdef UNIV_DEBUG diff --git a/mysql-8.0.32/storage/innobase/row/row0mysql.cc b/mysql-8.0.32/storage/innobase/row/row0mysql.cc index 571f4ef0c..86150157d 100644 --- a/mysql-8.0.32/storage/innobase/row/row0mysql.cc +++ b/mysql-8.0.32/storage/innobase/row/row0mysql.cc @@ -3282,7 +3282,7 @@ static dberr_t row_mysql_table_id_reassign(dict_table_t *table, dict_hdr_get_new_id(new_id, nullptr, nullptr, table, false); /* Remove all locks except the table-level S and X locks. */ - lock_remove_all_on_table(table, false); + lock_remove_all_on_table(current_trx(), table, false); return (DB_SUCCESS); } @@ -3938,7 +3938,7 @@ dberr_t row_drop_table_for_mysql(const char *name, trx_t *trx, bool nonatomic, if (table->get_ref_count() == 0) { /* We don't take lock on intrinsic table so nothing to remove.*/ if (!table->is_intrinsic()) { - lock_remove_all_on_table(table, true); + lock_remove_all_on_table(trx, table, true); } ut_a(table->n_rec_locks.load() == 0); } else if (table->get_ref_count() > 0 || table->n_rec_locks.load() > 0) { diff --git a/mysql-8.0.32/storage/innobase/srv/srv0start.cc b/mysql-8.0.32/storage/innobase/srv/srv0start.cc index 2938d53a7..25c58c388 100644 --- a/mysql-8.0.32/storage/innobase/srv/srv0start.cc +++ b/mysql-8.0.32/storage/innobase/srv/srv0start.cc @@ -2532,7 +2532,7 @@ static void apply_dynamic_metadata() { any tables (including data dictionary tables) can be accessed. */ void srv_dict_recover_on_restart() { /* Resurrect locks for dictionary transactions */ - trx_resurrect_locks(false); + trx_resurrect_modified_tables(false); /* Roll back any recovered data dictionary transactions, so that the data dictionary tables will be free of any locks. @@ -2575,9 +2575,7 @@ void srv_dict_recover_on_restart() { Note: The current work around fixes both issues but ideally should not be required if base issues [TODOs] are fixed. */ - trx_resurrect_locks(true); - - trx_clear_resurrected_table_ids(); + trx_resurrect_modified_tables(true); /* Do after all DD transactions recovery, to get consistent metadata */ apply_dynamic_metadata(); @@ -2646,6 +2644,7 @@ void srv_start_threads(bool bootstrap) { srv_threads.m_buf_resize.start(); if (srv_read_only_mode) { + trx_sys->resurrect_lock_done = true; purge_sys->state = PURGE_STATE_DISABLED; return; } @@ -2658,6 +2657,8 @@ void srv_start_threads(bool bootstrap) { trx_recovery_rollback_thread_key, 0, trx_recovery_rollback_thread); srv_threads.m_trx_recovery_rollback.start(); + } else { + trx_sys->resurrect_lock_done = true; } /* Create the master thread which does purge and other utility diff --git a/mysql-8.0.32/storage/innobase/sync/sync0debug.cc b/mysql-8.0.32/storage/innobase/sync/sync0debug.cc index 7f7078dd9..2747f8be6 100644 --- a/mysql-8.0.32/storage/innobase/sync/sync0debug.cc +++ b/mysql-8.0.32/storage/innobase/sync/sync0debug.cc @@ -1320,6 +1320,8 @@ static void sync_latch_meta_init() UNIV_NOTHROW { LATCH_ADD_MUTEX(TRX_SYS_RSEG, SYNC_TRX_SYS_RSEG, trx_sys_rseg_mutex_key); + LATCH_ADD_MUTEX(TRX_SYS_RESURRECT, SYNC_NO_ORDER_CHECK, trx_sys_resurrect_mutex_key); + #ifdef UNIV_DEBUG /* Mutex names starting with '.' are not tracked. They are assumed to be diagnostic mutexes used in debugging. */ diff --git a/mysql-8.0.32/storage/innobase/sync/sync0sync.cc b/mysql-8.0.32/storage/innobase/sync/sync0sync.cc index 2d9e30e5b..c803f87cc 100644 --- a/mysql-8.0.32/storage/innobase/sync/sync0sync.cc +++ b/mysql-8.0.32/storage/innobase/sync/sync0sync.cc @@ -128,6 +128,7 @@ mysql_pfs_key_t lock_sys_table_mutex_key; mysql_pfs_key_t lock_sys_page_mutex_key; mysql_pfs_key_t lock_wait_mutex_key; mysql_pfs_key_t trx_sys_mutex_key; +mysql_pfs_key_t trx_sys_resurrect_mutex_key; mysql_pfs_key_t trx_sys_shard_mutex_key; mysql_pfs_key_t trx_sys_serialisation_mutex_key; mysql_pfs_key_t srv_sys_mutex_key; diff --git a/mysql-8.0.32/storage/innobase/trx/trx0roll.cc b/mysql-8.0.32/storage/innobase/trx/trx0roll.cc index 0ac5faeb8..1482cae58 100644 --- a/mysql-8.0.32/storage/innobase/trx/trx0roll.cc +++ b/mysql-8.0.32/storage/innobase/trx/trx0roll.cc @@ -763,6 +763,7 @@ void trx_rollback_or_clean_recovered( we need to reacquire it before retrying the loop. */ if (trx_rollback_or_clean_resurrected(trx, all)) { trx_sys_mutex_enter(); + trx_resurrect_erase(trx); need_one_more_scan = true; break; } @@ -781,6 +782,15 @@ committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. Note: this is done in a background thread. */ void trx_recovery_rollback_thread() { + while (!trx_sys->start_rollback) { + std::this_thread::sleep_for(std::chrono::microseconds(1000)); + } + + /* Resurrect MDL locks for modified table */ + trx_resurrect_locks(); + + trx_sys->resurrect_lock_done = true; + THD *thd = create_internal_thd(); ut_ad(!srv_read_only_mode); @@ -792,6 +802,7 @@ void trx_recovery_rollback_thread() { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + trx_rollback_or_clean_recovered(true); destroy_internal_thd(thd); diff --git a/mysql-8.0.32/storage/innobase/trx/trx0sys.cc b/mysql-8.0.32/storage/innobase/trx/trx0sys.cc index 7a507cdfb..b5f419eb2 100644 --- a/mysql-8.0.32/storage/innobase/trx/trx0sys.cc +++ b/mysql-8.0.32/storage/innobase/trx/trx0sys.cc @@ -564,6 +564,7 @@ void trx_sys_create(void) { mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex); mutex_create(LATCH_ID_TRX_SYS_SERIALISATION, &trx_sys->serialisation_mutex); + mutex_create(LATCH_ID_TRX_SYS_RESURRECT, &trx_sys->resurrect_mutex); UT_LIST_INIT(trx_sys->serialisation_list); UT_LIST_INIT(trx_sys->rw_trx_list); @@ -587,6 +588,9 @@ void trx_sys_create(void) { new (&trx_sys->tmp_rsegs) Rsegs(); trx_sys->tmp_rsegs.set_empty(); + + trx_sys->start_rollback = false; + trx_sys->resurrect_lock_done = false; } /** Creates and initializes the transaction system at the database creation. */ @@ -650,6 +654,7 @@ void trx_sys_close(void) { /* We used placement new to create this mutex. Call the destructor. */ mutex_free(&trx_sys->serialisation_mutex); mutex_free(&trx_sys->mutex); + mutex_free(&trx_sys->resurrect_mutex); trx_sys->rw_trx_ids.~trx_ids_t(); diff --git a/mysql-8.0.32/storage/innobase/trx/trx0trx.cc b/mysql-8.0.32/storage/innobase/trx/trx0trx.cc index 29a5a9be1..f4ad3596f 100644 --- a/mysql-8.0.32/storage/innobase/trx/trx0trx.cc +++ b/mysql-8.0.32/storage/innobase/trx/trx0trx.cc @@ -83,6 +83,7 @@ typedef std::map, /** Map of resurrected transactions to affected table_id */ static trx_table_map resurrected_trx_tables; +static std::atomic resurrected_trx_done {false}; /** Dummy session used currently in MySQL interface */ sess_t *trx_dummy_sess = nullptr; @@ -221,6 +222,8 @@ static void trx_init(trx_t *trx) { trx->error_index = nullptr; + trx->recover_mysql_thd = nullptr; + /* During asynchronous rollback, we should reset forced rollback flag only after rollback is complete to avoid race with the thread owning the transaction. */ @@ -302,6 +305,7 @@ struct TrxFactory { mutex_free(&trx->undo_mutex); trx->mod_tables.~trx_mod_tables_t(); + trx->locked_tables.~trx_mod_tables_t(); ut_ad(trx->read_view == nullptr); @@ -471,6 +475,7 @@ static trx_t *trx_create_low() { /* Should have been either just initialized or .clear()ed by trx_free(). */ ut_a(trx->mod_tables.size() == 0); + ut_a(trx->locked_tables.size() == 0); return (trx); } @@ -483,6 +488,15 @@ static void trx_free(trx_t *&trx) { trx->mysql_thd = nullptr; + trx_mutex_enter(trx); + if (trx->recover_mysql_thd) { + dd_mdl_release_transactional(trx->recover_mysql_thd); + destroy_thd(trx->recover_mysql_thd); + trx->recover_mysql_thd = nullptr; + } + trx_mutex_exit(trx); + + // FIXME: We need to avoid this heap free/alloc for each commit. if (trx->lock.autoinc_locks != nullptr) { ut_ad(ib_vector_is_empty(trx->lock.autoinc_locks)); @@ -493,6 +507,8 @@ static void trx_free(trx_t *&trx) { trx->mod_tables.clear(); + ut_a(trx->locked_tables.size() == 0); + ut_ad(trx->read_view == nullptr); ut_ad(trx->is_dd_trx == false); @@ -731,18 +747,92 @@ static void trx_resurrect_table_ids(trx_t *trx, const trx_undo_ptr_t *undo_ptr, mtr_commit(&mtr); } -void trx_resurrect_locks(bool all) { - for (const auto &element : resurrected_trx_tables) { - trx_t *trx = element.first; +void trx_resurrect_erase(trx_t* trx) { + if (!resurrected_trx_done.load()) { + mutex_enter(&trx_sys->resurrect_mutex); + resurrected_trx_tables.erase(trx); + + if (resurrected_trx_tables.empty()) { + resurrected_trx_done = true; + } + + mutex_exit(&trx_sys->resurrect_mutex); + } +#ifdef UNIV_DEBUG + mutex_enter(&trx_sys->resurrect_mutex); + ut_ad(resurrected_trx_tables.find(trx) + == resurrected_trx_tables.end()); + mutex_exit(&trx_sys->resurrect_mutex); +#endif +} + +void trx_resurrect_modified_tables(bool all) { + mutex_enter(&trx_sys->resurrect_mutex); + for (trx_table_map::const_iterator t = resurrected_trx_tables.begin(); + t != resurrected_trx_tables.end(); t++) { + trx_t *trx = t->first; /* We deal only with recovered transactions. If all is false, - we skip non dictionary transactions. */ - if (!trx->is_recovered || (!all && !trx->ddl_operation)) { + * we skip non dictionary transactions. If all is true, we skip + * dictionary transactions. */ + if (!trx->is_recovered || + (!all && !trx->ddl_operation) || (all && trx->ddl_operation)) { continue; } + const table_id_set &tables = t->second; + + for (table_id_set::const_iterator i = tables.begin(); i != tables.end(); + i++) { + dict_table_t *table = dd_table_open_on_id(*i, NULL, NULL, false, true); + if (table) { + ut_ad(!table->is_temporary()); + + if (table->ibd_file_missing || table->is_temporary()) { + mutex_enter(&dict_sys->mutex); + dd_table_close(table, NULL, NULL, true); + dict_table_remove_from_cache(table); + mutex_exit(&dict_sys->mutex); + continue; + } + + if (trx->state == TRX_STATE_PREPARED && !dict_table_is_sdi(table->id)) { + trx->mod_tables.insert(table); + } + + DICT_TF2_FLAG_SET(table, DICT_TF2_RESURRECT_PREPARED); + +#ifdef UNIV_DEBUG + ut_a(trx->locked_tables.find(table) == trx->locked_tables.end()); + + trx->locked_tables.insert(table); + + /* Increase the counter so the table object won't be + evicted. */ + table->n_table_locks++; +#endif + dd_table_close(table, NULL, NULL, false); + } + } + } + + mutex_exit(&trx_sys->resurrect_mutex); +} + +void trx_resurrect_locks() { + mutex_enter(&trx_sys->resurrect_mutex); + for (const auto &element : resurrected_trx_tables) { + trx_t *trx = element.first; + + ut_a(trx->is_recovered); + const table_id_set &tables = element.second; + ut_a(!trx->mysql_thd); + ut_a(!trx->recover_mysql_thd); + + THD *recover_mysql_thd = create_thd(false, true, false , 0, 0); + for (auto id : tables) { auto table = dd_table_open_on_id(id, nullptr, nullptr, false, true); @@ -760,20 +850,36 @@ void trx_resurrect_locks(bool all) { continue; } - if (trx->state.load(std::memory_order_relaxed) == TRX_STATE_PREPARED && - !dict_table_is_sdi(table->id)) { - trx->mod_tables.insert(table); - } - DICT_TF2_FLAG_SET(table, DICT_TF2_RESURRECT_PREPARED); + /** Acquire shared mdl lock */ + std::string db_str; + std::string table_str; + dict_name::get_table(table->name.m_name, db_str, table_str); + + bool ret = dd_mdl_acquire(recover_mysql_thd, nullptr, db_str.c_str(), table_str.c_str(), true); + ut_a(!ret); - lock_table_ix_resurrect(table, trx); DBUG_PRINT("ib_trx", ("resurrect" TRX_ID_FMT " table '%s' IX lock", trx_get_id_for_print(trx), table->name.m_name)); dd_table_close(table, nullptr, nullptr, false); + + trx_mutex_enter(trx); + if (trx->recover_mysql_thd == nullptr) { + trx->recover_mysql_thd = recover_mysql_thd; + } + trx_mutex_exit(trx); + } + + if (trx->recover_mysql_thd == nullptr) { + destroy_thd(recover_mysql_thd); } } + + resurrected_trx_tables.clear(); + + resurrected_trx_done = true; + mutex_exit(&trx_sys->resurrect_mutex); } void trx_clear_resurrected_table_ids() { resurrected_trx_tables.clear(); } @@ -1259,6 +1365,7 @@ static void trx_start_low( ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK)); ut_ad(trx_can_be_handled_by_current_thread_or_is_hp_victim(trx)); + ut_ad(trx->locked_tables.size() == 0); ++trx->version; @@ -1897,7 +2004,18 @@ static void trx_release_impl_and_expl_locks(trx_t *trx, bool serialised) { trx_sys_serialisation_mutex_exit(); } + ut_ad(!trx->recover_mysql_thd || trx->is_recovered); + lock_trx_release_locks(trx); + + /* Release mdl locks if it's recovered from crash recovery */ + trx_mutex_enter(trx); + if (trx->recover_mysql_thd) { + dd_mdl_release_transactional(trx->recover_mysql_thd); + destroy_thd(trx->recover_mysql_thd); + trx->recover_mysql_thd = nullptr; + } + trx_mutex_exit(trx); } /** Commits a transaction in memory. */