diff --git a/mysql-8.0.32/sql/dd/dictionary.h b/mysql-8.0.32/sql/dd/dictionary.h index f4552a9..b3498cc 100644 --- a/mysql-8.0.32/sql/dd/dictionary.h +++ b/mysql-8.0.32/sql/dd/dictionary.h @@ -190,7 +190,7 @@ class Dictionary { [[nodiscard]] bool acquire_shared_table_mdl(THD *thd, const char *schema_name, const char *table_name, bool no_wait, - MDL_ticket **out_mdl_ticket); + MDL_ticket **out_mdl_ticket, bool transactional = false); /** Predicate to check if we have a shared meta data lock on the @@ -358,6 +358,9 @@ void release_mdl(THD *thd, MDL_ticket *mdl_ticket); /** Get Dictionary_client from THD object (the latter is opaque * in SEs). */ cache::Dictionary_client *get_dd_client(THD *thd); +/** Release mdl which scope is MDL_TRANSACTION */ +void release_transactional_mdl(THD *thd); + /** Create plugin native table. The API would only write metadata to DD and skip calling handler::create(). diff --git a/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc b/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc index 733a0c0..5114049 100644 --- a/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc +++ b/mysql-8.0.32/sql/dd/impl/dictionary_impl.cc @@ -509,10 +509,11 @@ static bool acquire_exclusive_mdl(THD *thd, bool acquire_shared_table_mdl(THD *thd, const char *schema_name, const char *table_name, bool no_wait, - MDL_ticket **out_mdl_ticket) { + MDL_ticket **out_mdl_ticket, bool transactional) { return acquire_shared_mdl(thd, MDL_key::TABLE, schema_name, table_name, no_wait, thd->variables.lock_wait_timeout, - MDL_EXPLICIT, out_mdl_ticket); + transactional ? MDL_TRANSACTION : MDL_EXPLICIT, + out_mdl_ticket); } bool has_shared_table_mdl(THD *thd, const char *schema_name, @@ -588,6 +589,10 @@ void release_mdl(THD *thd, MDL_ticket *mdl_ticket) { thd->mdl_context.release_lock(mdl_ticket); } +void release_transactional_mdl(THD *thd) { + thd->mdl_context.release_transactional_locks(); +} + /* purecov: begin deadcode */ cache::Dictionary_client *get_dd_client(THD *thd) { return thd->dd_client(); } /* purecov: end */ @@ -708,6 +713,11 @@ bool reset_tables_and_tablespaces() { // Release transactional metadata locks. thd.thd->mdl_context.release_transactional_locks(); + /* Tell innodb to start rollback by background thread + The reason we hold on the thread is to avoid deadlock, because + both this function and background thread may acquire mdl lock.*/ + ddse->start_rollback(); + return ret; } diff --git a/mysql-8.0.32/sql/handler.h b/mysql-8.0.32/sql/handler.h index 6b24daa..ff511fe 100644 --- a/mysql-8.0.32/sql/handler.h +++ b/mysql-8.0.32/sql/handler.h @@ -1924,6 +1924,8 @@ typedef void (*dict_cache_reset_t)(const char *schema_name, typedef void (*dict_cache_reset_tables_and_tablespaces_t)(); +typedef void (*start_rollback_t)(); + /** Mode for data dictionary recovery. */ enum dict_recovery_mode_t { DICT_RECOVERY_INITIALIZE_SERVER, ///< First start of a new server @@ -2665,6 +2667,7 @@ struct handlerton { dict_cache_reset_t dict_cache_reset; dict_cache_reset_tables_and_tablespaces_t dict_cache_reset_tables_and_tablespaces; + start_rollback_t start_rollback; dict_recover_t dict_recover; dict_get_server_version_t dict_get_server_version; dict_set_server_version_t dict_set_server_version; diff --git a/mysql-8.0.32/storage/innobase/dict/dict0dd.cc b/mysql-8.0.32/storage/innobase/dict/dict0dd.cc index 3010f58..3e5cad5 100644 --- a/mysql-8.0.32/storage/innobase/dict/dict0dd.cc +++ b/mysql-8.0.32/storage/innobase/dict/dict0dd.cc @@ -303,6 +303,10 @@ void dd_mdl_release(THD *thd, MDL_ticket **mdl) { *mdl = nullptr; } +void dd_mdl_release_transactional(THD *thd) { + dd::release_transactional_mdl(thd); +} + THD *dd_thd_for_undo(const trx_t *trx) { return trx->mysql_thd == nullptr ? current_thd : trx->mysql_thd; } diff --git a/mysql-8.0.32/storage/innobase/dict/dict0dict.cc b/mysql-8.0.32/storage/innobase/dict/dict0dict.cc index 0d26f64..0b0c44e 100644 --- a/mysql-8.0.32/storage/innobase/dict/dict0dict.cc +++ b/mysql-8.0.32/storage/innobase/dict/dict0dict.cc @@ -1884,6 +1884,7 @@ static void dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); ut_a(table->get_ref_count() == 0); ut_a(table->n_rec_locks.load() == 0); + ut_ad(table->n_table_locks.load() == 0); ut_ad(dict_sys_mutex_own()); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); diff --git a/mysql-8.0.32/storage/innobase/dict/mem.cc b/mysql-8.0.32/storage/innobase/dict/mem.cc index 3a95df0..64f829d 100644 --- a/mysql-8.0.32/storage/innobase/dict/mem.cc +++ b/mysql-8.0.32/storage/innobase/dict/mem.cc @@ -244,6 +244,7 @@ dict_table_t *dict_mem_table_create(const char *name, space_id_t space, table->autoinc_field_no = ULINT_UNDEFINED; table->sess_row_id = 0; table->sess_trx_id = 0; + table->n_table_locks = 0; /* If the table has an FTS index or we are in the process of building one, create the table->fts */ diff --git a/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc b/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc index 4aeacf0..9885b36 100644 --- a/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc +++ b/mysql-8.0.32/storage/innobase/handler/ha_innodb.cc @@ -741,6 +741,7 @@ static PSI_mutex_info all_innodb_mutexes[] = { PSI_MUTEX_KEY(temp_space_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(undo_space_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(trx_sys_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), + PSI_MUTEX_KEY(trx_sys_resurrect_mutex, 0, 0, PSI_DOCUMENT_ME), #ifdef UNIV_DEBUG PSI_MUTEX_KEY(rw_lock_debug_mutex, 0, 0, PSI_DOCUMENT_ME), #endif /* UNIV_DEBUG */ @@ -4026,6 +4027,17 @@ static void innobase_dict_cache_reset_tables_and_tablespaces() { dict_sys_mutex_exit(); } +static void innobase_start_rollback() { + ut_a(!trx_sys->start_rollback); + trx_sys->start_rollback = true; + + /* Wait until resurrect mdl lock is done by rollback + thread. */ + while (!trx_sys->resurrect_lock_done) { + std::this_thread::sleep_for(std::chrono::microseconds(1000)); + } +} + /** Perform high-level recovery in InnoDB as part of initializing the data dictionary. @param[in] dict_recovery_mode How to do recovery @@ -5172,6 +5184,7 @@ static int innodb_init(void *p) { innobase_hton->dict_cache_reset = innobase_dict_cache_reset; innobase_hton->dict_cache_reset_tables_and_tablespaces = innobase_dict_cache_reset_tables_and_tablespaces; + innobase_hton->start_rollback = innobase_start_rollback; innobase_hton->dict_recover = innobase_dict_recover; innobase_hton->dict_get_server_version = innobase_dict_get_server_version; @@ -20083,6 +20096,7 @@ static xa_status_code innobase_commit_by_xid( /* use cases are: disconnected xa, slave xa, recovery */ trx_deregister_from_2pc(trx); ut_ad(!trx->will_lock); /* trx cache requirement */ + trx_resurrect_erase(trx); trx_free_for_background(trx); return (XA_OK); @@ -20110,6 +20124,7 @@ static xa_status_code innobase_rollback_by_xid( trx_deregister_from_2pc(trx); ut_ad(!trx->will_lock); + trx_resurrect_erase(trx); trx_free_for_background(trx); return (ret != 0 ? XAER_RMERR : XA_OK); diff --git a/mysql-8.0.32/storage/innobase/include/dict0dd.h b/mysql-8.0.32/storage/innobase/include/dict0dd.h index aff64a0..b0e03cc 100644 --- a/mysql-8.0.32/storage/innobase/include/dict0dd.h +++ b/mysql-8.0.32/storage/innobase/include/dict0dd.h @@ -887,13 +887,15 @@ MY_COMPILER_DIAGNOSTIC_POP() @retval true if failed (my_error() will have been called) */ [[nodiscard]] static inline bool dd_mdl_acquire(THD *thd, MDL_ticket **mdl, const char *db, - const char *table); + const char *table, bool is_transactional = false); /** Release a metadata lock. @param[in,out] thd current thread @param[in,out] mdl metadata lock */ void dd_mdl_release(THD *thd, MDL_ticket **mdl); +void dd_mdl_release_transactional(THD *thd); + /** Returns thd associated with the trx or current_thd @param[in] trx transaction @return trx->mysql_thd or current_thd */ diff --git a/mysql-8.0.32/storage/innobase/include/dict0dd.ic b/mysql-8.0.32/storage/innobase/include/dict0dd.ic index 5ac19d3..67d6926 100644 --- a/mysql-8.0.32/storage/innobase/include/dict0dd.ic +++ b/mysql-8.0.32/storage/innobase/include/dict0dd.ic @@ -291,7 +291,7 @@ inline const char *get_row_format_name(enum row_type row_format) { @retval true if failed (my_error() will have been called) */ [[nodiscard]] static inline bool dd_mdl_acquire(THD *thd, MDL_ticket **mdl, const char *db, - const char *table) { + const char *table, bool is_transactional) { bool ret = false; char table_name[MAX_TABLE_NAME_LEN + 1]; const char *table_p = table; @@ -329,7 +329,7 @@ inline const char *get_row_format_name(enum row_type row_format) { #endif /* !_WIN32 */ } - ret = dd::acquire_shared_table_mdl(thd, db_p, table_p, false, mdl); + ret = dd::acquire_shared_table_mdl(thd, db_p, table_p, false, mdl, is_transactional); return (ret); } diff --git a/mysql-8.0.32/storage/innobase/include/dict0mem.h b/mysql-8.0.32/storage/innobase/include/dict0mem.h index 8c7674e..1ec122e 100644 --- a/mysql-8.0.32/storage/innobase/include/dict0mem.h +++ b/mysql-8.0.32/storage/innobase/include/dict0mem.h @@ -2379,6 +2379,10 @@ detect this and will eventually quit sooner. */ - Some places assert this field is positive holding only shared latch. */ std::atomic n_rec_locks; + /** table lock counter holded by transaction. the table can't be + evicted from cache if not equal to zero. */ + std::atomic n_table_locks; + #ifndef UNIV_DEBUG private: #endif diff --git a/mysql-8.0.32/storage/innobase/include/sync0sync.h b/mysql-8.0.32/storage/innobase/include/sync0sync.h index f7bedc3..3aa09ab 100644 --- a/mysql-8.0.32/storage/innobase/include/sync0sync.h +++ b/mysql-8.0.32/storage/innobase/include/sync0sync.h @@ -161,6 +161,7 @@ extern mysql_pfs_key_t lock_sys_page_mutex_key; extern mysql_pfs_key_t lock_sys_table_mutex_key; extern mysql_pfs_key_t lock_wait_mutex_key; extern mysql_pfs_key_t trx_sys_mutex_key; +extern mysql_pfs_key_t trx_sys_resurrect_mutex_key; extern mysql_pfs_key_t trx_sys_shard_mutex_key; extern mysql_pfs_key_t trx_sys_serialisation_mutex_key; extern mysql_pfs_key_t srv_sys_mutex_key; diff --git a/mysql-8.0.32/storage/innobase/include/sync0types.h b/mysql-8.0.32/storage/innobase/include/sync0types.h index eadacaa..b5ed73b 100644 --- a/mysql-8.0.32/storage/innobase/include/sync0types.h +++ b/mysql-8.0.32/storage/innobase/include/sync0types.h @@ -416,6 +416,7 @@ enum latch_id_t { LATCH_ID_TEMP_POOL_MANAGER, LATCH_ID_TRX, LATCH_ID_TRX_SYS, + LATCH_ID_TRX_SYS_RESURRECT, LATCH_ID_TRX_SYS_SHARD, LATCH_ID_TRX_SYS_SERIALISATION, LATCH_ID_SRV_SYS, diff --git a/mysql-8.0.32/storage/innobase/include/trx0sys.h b/mysql-8.0.32/storage/innobase/include/trx0sys.h index 33b287a..7c372b5 100644 --- a/mysql-8.0.32/storage/innobase/include/trx0sys.h +++ b/mysql-8.0.32/storage/innobase/include/trx0sys.h @@ -503,6 +503,9 @@ struct trx_sys_t { /** Mutex to protect serialisation_list. */ TrxSysMutex serialisation_mutex; + /** Mutex to protect resurrected_trx_tables */ + TrxSysMutex resurrect_mutex; + /** Tracks minimal transaction id which has received trx->no, but has not yet finished commit for the mtr writing the trx commit. Protected by the serialisation_mutex. Ordered on the trx->no field. */ @@ -561,6 +564,13 @@ struct trx_sys_t { /** True if XA PREPARED trxs are found. */ bool found_prepared_trx; + /** Tell background thread to start rollback. */ + volatile bool start_rollback; + + /** True if the background thread already resurrect + all mdl locks for tables waiting for rollback. */ + volatile bool resurrect_lock_done; + /** @} */ char pad_after[ut::INNODB_CACHE_LINE_SIZE]; diff --git a/mysql-8.0.32/storage/innobase/include/trx0trx.h b/mysql-8.0.32/storage/innobase/include/trx0trx.h index 8190ea4..bb11d07 100644 --- a/mysql-8.0.32/storage/innobase/include/trx0trx.h +++ b/mysql-8.0.32/storage/innobase/include/trx0trx.h @@ -97,7 +97,11 @@ trx_t *trx_allocate_for_background(void); /** Resurrect table locks for resurrected transactions. @param[in] all false: resurrect locks for dictionary transactions, true : resurrect locks for all transactions. */ -void trx_resurrect_locks(bool all); +void trx_resurrect_locks(); + +void trx_resurrect_modified_tables(bool all); + +void trx_resurrect_erase(trx_t *trx); /** Clear all resurrected table IDs. Needs to be called after all tables locks are resurrected. */ @@ -937,6 +941,9 @@ struct trx_t { THD *mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ + THD *recover_mysql_thd; /** Special handler for holding + resurrected mdl locks. */ + const char *mysql_log_file_name; /*!< if MySQL binlog is used, this field contains a pointer to the latest file @@ -1083,6 +1090,9 @@ struct trx_t { transaction branch */ trx_mod_tables_t mod_tables; /*!< List of tables that were modified by this transaction */ + + trx_mod_tables_t locked_tables; /*!< track tables which requires table lock */ + #endif /* !UNIV_HOTBACKUP */ /*------------------------------*/ bool api_trx; /*!< trx started by InnoDB API */ diff --git a/mysql-8.0.32/storage/innobase/lock/lock0lock.cc b/mysql-8.0.32/storage/innobase/lock/lock0lock.cc index d002914..c2f08a5 100644 --- a/mysql-8.0.32/storage/innobase/lock/lock0lock.cc +++ b/mysql-8.0.32/storage/innobase/lock/lock0lock.cc @@ -59,6 +59,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include "usr0sess.h" #include "ut0new.h" #include "ut0vec.h" +#include "dict0dd.h" #include "my_dbug.h" #include "my_psi_config.h" @@ -3763,6 +3764,28 @@ dberr_t lock_table(ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set, trx_set_rw_mode(trx); } + if (mode != LOCK_AUTO_INC) { +#ifdef UNIV_DEBUG + ut_ad(!dict_table_is_sdi(table->id) || + mode == LOCK_IS || mode == LOCK_IX); + + ut_ad(!table->is_fts_aux() || + mode == LOCK_IS || mode == LOCK_IX); + + /* As only currrent transaction operates on the set, we + can check firstly without holding trx_t::mutex */ + if (trx->locked_tables.find(table) == trx->locked_tables.end()) { + trx_mutex_enter(trx); + trx->locked_tables.insert(table); + table->n_table_locks++; + trx_mutex_exit(trx); + } +#endif + + return (DB_SUCCESS); + } + + locksys::Shard_latch_guard table_latch_guard{UT_LOCATION_HERE, *table}; /* We have to check if the new lock is compatible with any locks @@ -4378,6 +4401,16 @@ static void lock_remove_all_on_table_for_trx( } } +#ifdef UNIV_DEBUG + if (trx->locked_tables.find(table) != trx->locked_tables.end()) { + trx->locked_tables.erase(table); + ut_a(table->n_table_locks.load() > 0); + table->n_table_locks--; + } +#endif + + ut_a(trx->recover_mysql_thd == nullptr); + trx_mutex_exit(trx); } @@ -4434,6 +4467,20 @@ static ulint lock_remove_recovered_trx_record_locks( } } +#ifdef UNIV_DEBUG + if (trx->locked_tables.find(table) != trx->locked_tables.end()) { + trx->locked_tables.erase(table); + ut_a(table->n_table_locks.load() > 0); + table->n_table_locks--; + } +#endif + + if (trx->recover_mysql_thd != nullptr) { + dd_mdl_release_transactional(trx->recover_mysql_thd); + destroy_thd(trx->recover_mysql_thd); + trx->recover_mysql_thd = nullptr; + } + trx_mutex_exit(trx); ++n_recovered_trx; } @@ -6192,6 +6239,18 @@ void lock_trx_release_locks(trx_t *trx) /*!< in/out: transaction */ ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); ut_a(ib_vector_is_empty(trx->lock.autoinc_locks)); +#ifdef UNIV_DEBUG + /* Decrease table lock counter */ + trx_mod_tables_t::const_iterator end = trx->locked_tables.end(); + for (trx_mod_tables_t::const_iterator it = trx->locked_tables.begin(); + it != end; it++) { + ut_a((*it)->n_table_locks.load() > 0); + (*it)->n_table_locks--; + } + + trx->locked_tables.clear(); +#endif + mem_heap_empty(trx->lock.lock_heap); trx_mutex_exit(trx); } @@ -6272,7 +6331,7 @@ bool lock_table_has_locks(const dict_table_t *table) { /* As soon as we return false the caller might free the table object, so it is crucial that when lock_table_dequeue() removes the last lock on the table then the thread calling it won't dereference the table pointer anymore. */ - has_locks = UT_LIST_GET_LEN(table->locks) > 0; + has_locks = (UT_LIST_GET_LEN(table->locks) > 0 || table->n_table_locks.load() > 0); } #ifdef UNIV_DEBUG diff --git a/mysql-8.0.32/storage/innobase/srv/srv0start.cc b/mysql-8.0.32/storage/innobase/srv/srv0start.cc index 2938d53..25c58c3 100644 --- a/mysql-8.0.32/storage/innobase/srv/srv0start.cc +++ b/mysql-8.0.32/storage/innobase/srv/srv0start.cc @@ -2532,7 +2532,7 @@ static void apply_dynamic_metadata() { any tables (including data dictionary tables) can be accessed. */ void srv_dict_recover_on_restart() { /* Resurrect locks for dictionary transactions */ - trx_resurrect_locks(false); + trx_resurrect_modified_tables(false); /* Roll back any recovered data dictionary transactions, so that the data dictionary tables will be free of any locks. @@ -2575,9 +2575,7 @@ void srv_dict_recover_on_restart() { Note: The current work around fixes both issues but ideally should not be required if base issues [TODOs] are fixed. */ - trx_resurrect_locks(true); - - trx_clear_resurrected_table_ids(); + trx_resurrect_modified_tables(true); /* Do after all DD transactions recovery, to get consistent metadata */ apply_dynamic_metadata(); @@ -2646,6 +2644,7 @@ void srv_start_threads(bool bootstrap) { srv_threads.m_buf_resize.start(); if (srv_read_only_mode) { + trx_sys->resurrect_lock_done = true; purge_sys->state = PURGE_STATE_DISABLED; return; } @@ -2658,6 +2657,8 @@ void srv_start_threads(bool bootstrap) { trx_recovery_rollback_thread_key, 0, trx_recovery_rollback_thread); srv_threads.m_trx_recovery_rollback.start(); + } else { + trx_sys->resurrect_lock_done = true; } /* Create the master thread which does purge and other utility diff --git a/mysql-8.0.32/storage/innobase/sync/sync0debug.cc b/mysql-8.0.32/storage/innobase/sync/sync0debug.cc index 7f7078d..2747f8b 100644 --- a/mysql-8.0.32/storage/innobase/sync/sync0debug.cc +++ b/mysql-8.0.32/storage/innobase/sync/sync0debug.cc @@ -1320,6 +1320,8 @@ static void sync_latch_meta_init() UNIV_NOTHROW { LATCH_ADD_MUTEX(TRX_SYS_RSEG, SYNC_TRX_SYS_RSEG, trx_sys_rseg_mutex_key); + LATCH_ADD_MUTEX(TRX_SYS_RESURRECT, SYNC_NO_ORDER_CHECK, trx_sys_resurrect_mutex_key); + #ifdef UNIV_DEBUG /* Mutex names starting with '.' are not tracked. They are assumed to be diagnostic mutexes used in debugging. */ diff --git a/mysql-8.0.32/storage/innobase/sync/sync0sync.cc b/mysql-8.0.32/storage/innobase/sync/sync0sync.cc index 2d9e30e..c803f87 100644 --- a/mysql-8.0.32/storage/innobase/sync/sync0sync.cc +++ b/mysql-8.0.32/storage/innobase/sync/sync0sync.cc @@ -128,6 +128,7 @@ mysql_pfs_key_t lock_sys_table_mutex_key; mysql_pfs_key_t lock_sys_page_mutex_key; mysql_pfs_key_t lock_wait_mutex_key; mysql_pfs_key_t trx_sys_mutex_key; +mysql_pfs_key_t trx_sys_resurrect_mutex_key; mysql_pfs_key_t trx_sys_shard_mutex_key; mysql_pfs_key_t trx_sys_serialisation_mutex_key; mysql_pfs_key_t srv_sys_mutex_key; diff --git a/mysql-8.0.32/storage/innobase/trx/trx0roll.cc b/mysql-8.0.32/storage/innobase/trx/trx0roll.cc index 0ac5fae..1482cae 100644 --- a/mysql-8.0.32/storage/innobase/trx/trx0roll.cc +++ b/mysql-8.0.32/storage/innobase/trx/trx0roll.cc @@ -763,6 +763,7 @@ void trx_rollback_or_clean_recovered( we need to reacquire it before retrying the loop. */ if (trx_rollback_or_clean_resurrected(trx, all)) { trx_sys_mutex_enter(); + trx_resurrect_erase(trx); need_one_more_scan = true; break; } @@ -781,6 +782,15 @@ committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. Note: this is done in a background thread. */ void trx_recovery_rollback_thread() { + while (!trx_sys->start_rollback) { + std::this_thread::sleep_for(std::chrono::microseconds(1000)); + } + + /* Resurrect MDL locks for modified table */ + trx_resurrect_locks(); + + trx_sys->resurrect_lock_done = true; + THD *thd = create_internal_thd(); ut_ad(!srv_read_only_mode); @@ -792,6 +802,7 @@ void trx_recovery_rollback_thread() { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } + trx_rollback_or_clean_recovered(true); destroy_internal_thd(thd); diff --git a/mysql-8.0.32/storage/innobase/trx/trx0sys.cc b/mysql-8.0.32/storage/innobase/trx/trx0sys.cc index 7a507cd..b5f419e 100644 --- a/mysql-8.0.32/storage/innobase/trx/trx0sys.cc +++ b/mysql-8.0.32/storage/innobase/trx/trx0sys.cc @@ -564,6 +564,7 @@ void trx_sys_create(void) { mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex); mutex_create(LATCH_ID_TRX_SYS_SERIALISATION, &trx_sys->serialisation_mutex); + mutex_create(LATCH_ID_TRX_SYS_RESURRECT, &trx_sys->resurrect_mutex); UT_LIST_INIT(trx_sys->serialisation_list); UT_LIST_INIT(trx_sys->rw_trx_list); @@ -587,6 +588,9 @@ void trx_sys_create(void) { new (&trx_sys->tmp_rsegs) Rsegs(); trx_sys->tmp_rsegs.set_empty(); + + trx_sys->start_rollback = false; + trx_sys->resurrect_lock_done = false; } /** Creates and initializes the transaction system at the database creation. */ @@ -650,6 +654,7 @@ void trx_sys_close(void) { /* We used placement new to create this mutex. Call the destructor. */ mutex_free(&trx_sys->serialisation_mutex); mutex_free(&trx_sys->mutex); + mutex_free(&trx_sys->resurrect_mutex); trx_sys->rw_trx_ids.~trx_ids_t(); diff --git a/mysql-8.0.32/storage/innobase/trx/trx0trx.cc b/mysql-8.0.32/storage/innobase/trx/trx0trx.cc index 29a5a9b..f4ad359 100644 --- a/mysql-8.0.32/storage/innobase/trx/trx0trx.cc +++ b/mysql-8.0.32/storage/innobase/trx/trx0trx.cc @@ -83,6 +83,7 @@ typedef std::map, /** Map of resurrected transactions to affected table_id */ static trx_table_map resurrected_trx_tables; +static std::atomic resurrected_trx_done {false}; /** Dummy session used currently in MySQL interface */ sess_t *trx_dummy_sess = nullptr; @@ -221,6 +222,8 @@ static void trx_init(trx_t *trx) { trx->error_index = nullptr; + trx->recover_mysql_thd = nullptr; + /* During asynchronous rollback, we should reset forced rollback flag only after rollback is complete to avoid race with the thread owning the transaction. */ @@ -302,6 +305,7 @@ struct TrxFactory { mutex_free(&trx->undo_mutex); trx->mod_tables.~trx_mod_tables_t(); + trx->locked_tables.~trx_mod_tables_t(); ut_ad(trx->read_view == nullptr); @@ -471,6 +475,7 @@ static trx_t *trx_create_low() { /* Should have been either just initialized or .clear()ed by trx_free(). */ ut_a(trx->mod_tables.size() == 0); + ut_a(trx->locked_tables.size() == 0); return (trx); } @@ -483,6 +488,15 @@ static void trx_free(trx_t *&trx) { trx->mysql_thd = nullptr; + trx_mutex_enter(trx); + if (trx->recover_mysql_thd) { + dd_mdl_release_transactional(trx->recover_mysql_thd); + destroy_thd(trx->recover_mysql_thd); + trx->recover_mysql_thd = nullptr; + } + trx_mutex_exit(trx); + + // FIXME: We need to avoid this heap free/alloc for each commit. if (trx->lock.autoinc_locks != nullptr) { ut_ad(ib_vector_is_empty(trx->lock.autoinc_locks)); @@ -493,6 +507,8 @@ static void trx_free(trx_t *&trx) { trx->mod_tables.clear(); + ut_a(trx->locked_tables.size() == 0); + ut_ad(trx->read_view == nullptr); ut_ad(trx->is_dd_trx == false); @@ -731,18 +747,92 @@ static void trx_resurrect_table_ids(trx_t *trx, const trx_undo_ptr_t *undo_ptr, mtr_commit(&mtr); } -void trx_resurrect_locks(bool all) { - for (const auto &element : resurrected_trx_tables) { - trx_t *trx = element.first; +void trx_resurrect_erase(trx_t* trx) { + if (!resurrected_trx_done.load()) { + mutex_enter(&trx_sys->resurrect_mutex); + resurrected_trx_tables.erase(trx); + + if (resurrected_trx_tables.empty()) { + resurrected_trx_done = true; + } + + mutex_exit(&trx_sys->resurrect_mutex); + } +#ifdef UNIV_DEBUG + mutex_enter(&trx_sys->resurrect_mutex); + ut_ad(resurrected_trx_tables.find(trx) + == resurrected_trx_tables.end()); + mutex_exit(&trx_sys->resurrect_mutex); +#endif +} + +void trx_resurrect_modified_tables(bool all) { + mutex_enter(&trx_sys->resurrect_mutex); + for (trx_table_map::const_iterator t = resurrected_trx_tables.begin(); + t != resurrected_trx_tables.end(); t++) { + trx_t *trx = t->first; /* We deal only with recovered transactions. If all is false, - we skip non dictionary transactions. */ - if (!trx->is_recovered || (!all && !trx->ddl_operation)) { + * we skip non dictionary transactions. If all is true, we skip + * dictionary transactions. */ + if (!trx->is_recovered || + (!all && !trx->ddl_operation) || (all && trx->ddl_operation)) { continue; } + const table_id_set &tables = t->second; + + for (table_id_set::const_iterator i = tables.begin(); i != tables.end(); + i++) { + dict_table_t *table = dd_table_open_on_id(*i, NULL, NULL, false, true); + if (table) { + ut_ad(!table->is_temporary()); + + if (table->ibd_file_missing || table->is_temporary()) { + mutex_enter(&dict_sys->mutex); + dd_table_close(table, NULL, NULL, true); + dict_table_remove_from_cache(table); + mutex_exit(&dict_sys->mutex); + continue; + } + + if (trx->state == TRX_STATE_PREPARED && !dict_table_is_sdi(table->id)) { + trx->mod_tables.insert(table); + } + + DICT_TF2_FLAG_SET(table, DICT_TF2_RESURRECT_PREPARED); + +#ifdef UNIV_DEBUG + ut_a(trx->locked_tables.find(table) == trx->locked_tables.end()); + + trx->locked_tables.insert(table); + + /* Increase the counter so the table object won't be + evicted. */ + table->n_table_locks++; +#endif + dd_table_close(table, NULL, NULL, false); + } + } + } + + mutex_exit(&trx_sys->resurrect_mutex); +} + +void trx_resurrect_locks() { + mutex_enter(&trx_sys->resurrect_mutex); + for (const auto &element : resurrected_trx_tables) { + trx_t *trx = element.first; + + ut_a(trx->is_recovered); + const table_id_set &tables = element.second; + ut_a(!trx->mysql_thd); + ut_a(!trx->recover_mysql_thd); + + THD *recover_mysql_thd = create_thd(false, true, false , 0, 0); + for (auto id : tables) { auto table = dd_table_open_on_id(id, nullptr, nullptr, false, true); @@ -760,20 +850,36 @@ void trx_resurrect_locks(bool all) { continue; } - if (trx->state.load(std::memory_order_relaxed) == TRX_STATE_PREPARED && - !dict_table_is_sdi(table->id)) { - trx->mod_tables.insert(table); - } - DICT_TF2_FLAG_SET(table, DICT_TF2_RESURRECT_PREPARED); + /** Acquire shared mdl lock */ + std::string db_str; + std::string table_str; + dict_name::get_table(table->name.m_name, db_str, table_str); + + bool ret = dd_mdl_acquire(recover_mysql_thd, nullptr, db_str.c_str(), table_str.c_str(), true); + ut_a(!ret); - lock_table_ix_resurrect(table, trx); DBUG_PRINT("ib_trx", ("resurrect" TRX_ID_FMT " table '%s' IX lock", trx_get_id_for_print(trx), table->name.m_name)); dd_table_close(table, nullptr, nullptr, false); + + trx_mutex_enter(trx); + if (trx->recover_mysql_thd == nullptr) { + trx->recover_mysql_thd = recover_mysql_thd; + } + trx_mutex_exit(trx); + } + + if (trx->recover_mysql_thd == nullptr) { + destroy_thd(recover_mysql_thd); } } + + resurrected_trx_tables.clear(); + + resurrected_trx_done = true; + mutex_exit(&trx_sys->resurrect_mutex); } void trx_clear_resurrected_table_ids() { resurrected_trx_tables.clear(); } @@ -1259,6 +1365,7 @@ static void trx_start_low( ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK)); ut_ad(trx_can_be_handled_by_current_thread_or_is_hp_victim(trx)); + ut_ad(trx->locked_tables.size() == 0); ++trx->version; @@ -1897,7 +2004,18 @@ static void trx_release_impl_and_expl_locks(trx_t *trx, bool serialised) { trx_sys_serialisation_mutex_exit(); } + ut_ad(!trx->recover_mysql_thd || trx->is_recovered); + lock_trx_release_locks(trx); + + /* Release mdl locks if it's recovered from crash recovery */ + trx_mutex_enter(trx); + if (trx->recover_mysql_thd) { + dd_mdl_release_transactional(trx->recover_mysql_thd); + destroy_thd(trx->recover_mysql_thd); + trx->recover_mysql_thd = nullptr; + } + trx_mutex_exit(trx); } /** Commits a transaction in memory. */