From ed8c7c84e4f2318184ed32f12d5da2bc4b65c582 Mon Sep 17 00:00:00 2001 From: Satya Bodapati Date: Fri, 1 Feb 2019 09:10:00 +0100 Subject: [PATCH] BUG#95895 - Shutdown takes long time when table uses AUTOINC and FOREIGN KEY constraints Problem: -------- On a database with 2M tables and with buffer pool size of 20GB or more, shutdown takes approximately 12mins. Lot of tables should have auto-inc mutexes. Analysis: --------- Perf data collected during shutdown revealead about 82% of time is spent in mutex_destroy(). With a debug instrumentation patch, it is observed that the mutexes destroyed are AUTOINC. Still if all tables with AUTOINC mutex are accessed, there would be cached table objects (dict_table_t). With good amount of 'table_definition_cache', 'table_open_cache_instances' and 'open_files_limit', many tables could remain in the cache. The important observation here is about 1.6Million tables in cache and Count one mutex per each of those. Still why ~ 10mins (82% of time) for destruction of these 2.6 Million mutexes? These are custom mutexes. Implemented by InnoDB and not pthread_mutexes. For these mutexes, to collect statistics, a Counter is created for each mutex. Counter has information about spins, waits, calls. One counter is created for every mutex instance. So for AUTOINC Mutex type, there is one vector which will hold all Counters. For AUTOINC, the vector would hold 1.6Million Count elements During shutdown, each mutex is destroyed and so that removes the stat object(count) from the huge vector. This is one-by-one removal. Lets see the complexity of removal of an element from vector of size N. To remove one element, it is O(N). And to remove N elements, it is ~ O(N^2). Lets calculate this for 1.6Million. So it would be O(1.6Million ^ 2) = 2.5Trillion operations. Now we know why it took 82% of shutdown time :) Fix: ---- Similar problem doesn't exist for Block Mutexes because for stats, it uses aggregrated counters. See BlockMutexPolicy. For AUTOINC too, which can be in Millions, it doesn't make sense to collect mutex stats individually. So use the same strategy as BlockMutex. i.e to use Aggregate Stats Use ib_bpmutex_t instead of ib_mutex_t for Autoinc mutex of table. --- storage/innobase/dict/dict0dict.cc | 2 +- storage/innobase/include/dict0mem.h | 4 +++- storage/innobase/include/sync0policy.h | 6 +++--- storage/innobase/include/sync0policy.ic | 27 ++++++++++++++++++++----- storage/innobase/include/ut0mutex.h | 16 +++++++-------- 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 3464591852a..beb81d50295 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -750,7 +750,7 @@ dict_table_autoinc_alloc( void* table_void) { dict_table_t* table = static_cast(table_void); - table->autoinc_mutex = UT_NEW_NOKEY(ib_mutex_t()); + table->autoinc_mutex = UT_NEW_NOKEY(AutoIncMutex()); ut_a(table->autoinc_mutex != NULL); mutex_create(LATCH_ID_AUTOINC, table->autoinc_mutex); } diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index d00a3efd505..725b11f3e99 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1315,6 +1315,8 @@ if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on the table, DML from memcached will be blocked. */ #define DICT_TABLE_IN_DDL -1 +typedef ib_bpmutex_t AutoIncMutex; + /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_t { @@ -1640,7 +1642,7 @@ struct dict_table_t { volatile os_once::state_t autoinc_mutex_created; /** Mutex protecting the autoincrement counter. */ - ib_mutex_t* autoinc_mutex; + AutoIncMutex* autoinc_mutex; /** Autoinc counter value to give to the next inserted row. */ ib_uint64_t autoinc; diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h index 0eaefc7167a..32be1dc6de7 100644 --- a/storage/innobase/include/sync0policy.h +++ b/storage/innobase/include/sync0policy.h @@ -396,7 +396,7 @@ struct GenericPolicy /** Track agregate metrics policy, used by the page mutex. There are just too many of them to count individually. */ template -class BlockMutexPolicy +class AggregateMutexStatsPolicy #ifdef UNIV_DEBUG : public MutexDebug #endif /* UNIV_DEBUG */ @@ -406,7 +406,7 @@ class BlockMutexPolicy typedef typename latch_meta_t::CounterType::Count Count; /** Default constructor. */ - BlockMutexPolicy() + AggregateMutexStatsPolicy() : #ifdef UNIV_DEBUG MutexDebug(), @@ -418,7 +418,7 @@ class BlockMutexPolicy } /** Destructor */ - ~BlockMutexPolicy() { } + ~AggregateMutexStatsPolicy() { } /** Called when the mutex is "created". Note: Not from the constructor but when the mutex is initialised. diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic index f7598fe7854..39287ba6a56 100644 --- a/storage/innobase/include/sync0policy.ic +++ b/storage/innobase/include/sync0policy.ic @@ -32,12 +32,29 @@ std::string GenericPolicy::to_string() const } template -std::string BlockMutexPolicy::to_string() const +std::string AggregateMutexStatsPolicy::to_string() const { - /* I don't think it makes sense to keep track of the file name - and line number for each block mutex. Too much of overhead. Use the - latch id to figure out the location from the source. */ - return(sync_mutex_to_string(get_id(), "buf0buf.cc:0")); + switch (m_id) { + + case LATCH_ID_BUF_BLOCK_MUTEX: + /* I don't think it makes sense to keep track of the file name + and line number for each block mutex. Too much of overhead. + Use the latch id to figure out the location from the source. */ + return(sync_mutex_to_string(get_id(), "buf0buf.cc:0")); + + case LATCH_ID_AUTOINC: + return(sync_mutex_to_string( + get_id(), "dict_table_autoinc_alloc():0")); + + default: + /* Currently only block mutexes and autoinc mutexes use aggregrate + Latch Stat counters. If any new mutex uses this aggregrate, + add entry here */ + ut_ad(0); + } + + ut_ad(0); + return("unknown"); } #ifdef UNIV_DEBUG diff --git a/storage/innobase/include/ut0mutex.h b/storage/innobase/include/ut0mutex.h index 2614e26c7a2..fbab17028da 100644 --- a/storage/innobase/include/ut0mutex.h +++ b/storage/innobase/include/ut0mutex.h @@ -49,34 +49,34 @@ typedef OSMutex EventMutex; # ifdef HAVE_IB_LINUX_FUTEX UT_MUTEX_TYPE(TTASFutexMutex, GenericPolicy, FutexMutex); -UT_MUTEX_TYPE(TTASFutexMutex, BlockMutexPolicy, BlockFutexMutex); +UT_MUTEX_TYPE(TTASFutexMutex, AggregateMutexStatsPolicy, BlockFutexMutex); # endif /* HAVE_IB_LINUX_FUTEX */ UT_MUTEX_TYPE(TTASMutex, GenericPolicy, SpinMutex); -UT_MUTEX_TYPE(TTASMutex, BlockMutexPolicy, BlockSpinMutex); +UT_MUTEX_TYPE(TTASMutex, AggregateMutexStatsPolicy, BlockSpinMutex); UT_MUTEX_TYPE(OSTrackMutex, GenericPolicy, SysMutex); -UT_MUTEX_TYPE(OSTrackMutex, BlockMutexPolicy, BlockSysMutex); +UT_MUTEX_TYPE(OSTrackMutex, AggregateMutexStatsPolicy, BlockSysMutex); UT_MUTEX_TYPE(TTASEventMutex, GenericPolicy, SyncArrayMutex); -UT_MUTEX_TYPE(TTASEventMutex, BlockMutexPolicy, BlockSyncArrayMutex); +UT_MUTEX_TYPE(TTASEventMutex, AggregateMutexStatsPolicy, BlockSyncArrayMutex); #else /* !UNIV_DEBUG */ # ifdef HAVE_IB_LINUX_FUTEX UT_MUTEX_TYPE(TTASFutexMutex, GenericPolicy, FutexMutex); -UT_MUTEX_TYPE(TTASFutexMutex, BlockMutexPolicy, BlockFutexMutex); +UT_MUTEX_TYPE(TTASFutexMutex, AggregateMutexStatsPolicy, BlockFutexMutex); # endif /* HAVE_IB_LINUX_FUTEX */ UT_MUTEX_TYPE(TTASMutex, GenericPolicy, SpinMutex); -UT_MUTEX_TYPE(TTASMutex, BlockMutexPolicy, BlockSpinMutex); +UT_MUTEX_TYPE(TTASMutex, AggregateMutexStatsPolicy, BlockSpinMutex); UT_MUTEX_TYPE(OSTrackMutex, GenericPolicy, SysMutex); -UT_MUTEX_TYPE(OSTrackMutex, BlockMutexPolicy, BlockSysMutex); +UT_MUTEX_TYPE(OSTrackMutex, AggregateMutexStatsPolicy, BlockSysMutex); UT_MUTEX_TYPE(TTASEventMutex, GenericPolicy, SyncArrayMutex); -UT_MUTEX_TYPE(TTASEventMutex, BlockMutexPolicy, BlockSyncArrayMutex); +UT_MUTEX_TYPE(TTASEventMutex, AggregateMutexStatsPolicy, BlockSyncArrayMutex); #endif /* !UNIV_DEBUG */