commit e0f27673ce890b343e5296d2476942e7d67847d6 Author: Laurynas Biveinis Date: Mon Feb 12 04:32:44 2018 +0200 Fix bug 70500 by implementing multithreaded asynchronous LRU flusher - New per-buffer pool instance server background thread buf_lru_manager_thread, that, in a loop, flushes its assigned instance in a LRU mode, while server is in crash recovery, regular operation or in shutdown cleanup (i.e. purge) phases. It has two new helper functions, buf_lru_manager_sleep_if_needed and buf_lru_manager_adapt_sleep_time. It is registered for Performance Schema instrumentation. - Track the number of currently active LRU manager threads in an atomic counter buf_lru_manager_threads, use it to synchornize server shutdown with LRU thread exits: during SRV_SHUTDOWN_FLUSH_PHASE, wait for the LRU threads to quit, in addition to checking whether the page cleaner thread has quit; adjust sleep heuristics for both cleaner and LRU manager threads to skip any sleeps during SRV_SHUTDOWN_FLUSH_PHASE. - Remove LRU flushing related fields and code from struct page_cleaner_slot_t, page_cleaner_flush_pages_recommendation, pc_flush_slot, pc_wait_finished, buf_flush_page_cleaner_coordinator. Remove buf_flush_stats and pc_flush entirely. - Do not update the following InnoDB metrics: buffer_LRU_batch_flush_avg_time_slot, buffer_LRU_batch_flush_avg_pass, buffer_LRU_batch_flush_avg_time_thread, and buffer_LRU_batch_flush_avg_time_est. - Remove any LRU-specific flushing logic from crash recovery as the LRU manager threads are started early enough to treat those requests too. This consists of PSI_KEY(recv_writer_mutex), PSI_KEY(recv_writer_thread), recv_sys_t::writer_mutex, recv_writer_thread_active, recv_writer_thread_key, recv_writer_mutex_key, SYNC_RECV_WRITER, LATCH_ID_RECV_WRITER, recv_writer_thread. Consequently remove recv_sys_t::flush_type field (as it can be assumed to have BUF_FLUSH_LIST value unconditionally), simplify buf_flush_page_cleaner_coordinator accordingly. diff --git a/mysql-test/suite/perfschema/r/threads_innodb.result b/mysql-test/suite/perfschema/r/threads_innodb.result index 216520a81de..8bc0ceb18da 100644 --- a/mysql-test/suite/perfschema/r/threads_innodb.result +++ b/mysql-test/suite/perfschema/r/threads_innodb.result @@ -7,6 +7,7 @@ AND name NOT LIKE 'thread/innodb/trx\_recovery\_rollback\_thread' GROUP BY name; name type processlist_user processlist_host processlist_db processlist_command processlist_state processlist_info parent_thread_id role instrumented thread/innodb/buf_dump_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES +thread/innodb/buf_lru_manager_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES thread/innodb/buf_resize_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES thread/innodb/dict_stats_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES thread/innodb/fts_optimize_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index b03d417e7b0..26bcd18df0a 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -88,6 +88,9 @@ need to protect it by a mutex. It is only ever read by the thread doing the shutdown */ bool buf_page_cleaner_is_active = false; +/** The number of running LRU manager threads. 0 if LRU manager is inactive. */ +std::atomic buf_lru_manager_running_threads(0); + #ifndef UNIV_HOTBACKUP /** Factor for scan length to determine n_pages for intended oldest LSN progress */ @@ -102,6 +105,7 @@ static lsn_t buf_flush_sync_lsn = 0; #ifdef UNIV_PFS_THREAD mysql_pfs_key_t page_flush_thread_key; mysql_pfs_key_t page_flush_coordinator_thread_key; +mysql_pfs_key_t buf_lru_manager_thread_key; #endif /* UNIV_PFS_THREAD */ /** Event to synchronise with the flushing. */ @@ -129,8 +133,8 @@ struct page_cleaner_slot_t { protected by page_cleaner_t::mutex if the worker thread got the slot and set to PAGE_CLEANER_STATE_FLUSHING, - n_flushed_lru and n_flushed_list can be - updated only by the worker thread */ + n_flushed_list can be updated only by + the worker thread */ /* This value is set during state==PAGE_CLEANER_STATE_NONE */ ulint n_pages_requested; /*!< number of requested pages @@ -138,22 +142,15 @@ struct page_cleaner_slot_t { /* These values are updated during state==PAGE_CLEANER_STATE_FLUSHING, and commited with state==PAGE_CLEANER_STATE_FINISHED. The consistency is protected by the 'state' */ - ulint n_flushed_lru; - /*!< number of flushed and evicted - pages by LRU scan flushing */ ulint n_flushed_list; /*!< number of flushed pages by flush_list flushing */ bool succeeded_list; /*!< true if flush_list flushing succeeded. */ - ulint flush_lru_time; - /*!< elapsed time for LRU flushing */ ulint flush_list_time; /*!< elapsed time for flush_list flushing */ - ulint flush_lru_pass; - /*!< count to attempt LRU flushing */ ulint flush_list_pass; /*!< count to attempt flush_list flushing */ @@ -228,6 +225,14 @@ static void buf_flush_page_cleaner_thread(); +/** LRU manager thread for performing LRU flushed and evictions for buffer pool +free list refill. One thread is created for each buffer pool instace. +@param[in] arg buffer pool instance number for this thread +@return a dummy value */ +static +void +buf_lru_manager_thread(); + /******************************************************************//** Increases flush_list size in bytes with the page size in inline function */ static inline @@ -2023,26 +2028,6 @@ buf_flush_batch( return(res); } -/******************************************************************//** -Gather the aggregated stats for both flush list and LRU list flushing. -@param page_count_flush number of pages flushed from the end of the flush_list -@param page_count_LRU number of pages flushed from the end of the LRU list -*/ -static -void -buf_flush_stats( -/*============*/ - ulint page_count_flush, - ulint page_count_LRU) -{ - DBUG_PRINT("ib_buf", ("flush completed, from flush_list %u pages, " - "from LRU_list %u pages", - unsigned(page_count_flush), - unsigned(page_count_LRU))); - - srv_stats.buf_pool_flushed.add(page_count_flush + page_count_LRU); -} - /** Start a buffer flush batch for LRU or flush list @param[in] buf_pool buffer pool instance @param[in] flush_type BUF_FLUSH_LRU or BUF_FLUSH_LIST */ @@ -2302,7 +2287,7 @@ buf_flush_lists( } if (n_flushed) { - buf_flush_stats(n_flushed, 0); + srv_stats.buf_pool_flushed.add(n_flushed); } if (n_processed) { @@ -2620,9 +2605,7 @@ page_cleaner_flush_pages_recommendation( page_cleaner->flush_time = 0; page_cleaner->flush_pass = 0; - ulint lru_tm = 0; ulint list_tm = 0; - ulint lru_pass = 0; ulint list_pass = 0; for (ulint i = 0; i < page_cleaner->n_slots; i++) { @@ -2630,13 +2613,9 @@ page_cleaner_flush_pages_recommendation( slot = &page_cleaner->slots[i]; - lru_tm += slot->flush_lru_time; - lru_pass += slot->flush_lru_pass; list_tm += slot->flush_list_time; list_pass += slot->flush_list_pass; - slot->flush_lru_time = 0; - slot->flush_lru_pass = 0; slot->flush_list_time = 0; slot->flush_list_pass = 0; } @@ -2644,9 +2623,6 @@ page_cleaner_flush_pages_recommendation( mutex_exit(&page_cleaner->mutex); /* minimum values are 1, to avoid dividing by zero. */ - if (lru_tm < 1) { - lru_tm = 1; - } if (list_tm < 1) { list_tm = 1; } @@ -2654,9 +2630,6 @@ page_cleaner_flush_pages_recommendation( flush_tm = 1; } - if (lru_pass < 1) { - lru_pass = 1; - } if (list_pass < 1) { list_pass = 1; } @@ -2666,25 +2639,16 @@ page_cleaner_flush_pages_recommendation( MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT, list_tm / list_pass); - MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT, - lru_tm / lru_pass); MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD, list_tm / (srv_n_page_cleaners * flush_pass)); - MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD, - lru_tm / (srv_n_page_cleaners * flush_pass)); MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST, flush_tm * list_tm / flush_pass - / (list_tm + lru_tm)); - MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST, - flush_tm * lru_tm / flush_pass - / (list_tm + lru_tm)); + / list_tm); MONITOR_SET(MONITOR_FLUSH_AVG_TIME, flush_tm / flush_pass); MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS, list_pass / page_cleaner->n_slots); - MONITOR_SET(MONITOR_LRU_BATCH_FLUSH_AVG_PASS, - lru_pass / page_cleaner->n_slots); MONITOR_SET(MONITOR_FLUSH_AVG_PASS, flush_pass); prev_lsn = cur_lsn; @@ -2844,7 +2808,11 @@ buf_flush_page_cleaner_init(size_t n_page_cleaners) buf_flush_page_coordinator_thread, n_page_cleaners); - /* Make sure page cleaner is active. */ + os_thread_create( + buf_lru_manager_thread_key, + buf_lru_manager_thread); + + /* Make sure page cleaner and LRU managers are active. */ while (!buf_page_cleaner_is_active) { os_thread_sleep(10000); @@ -2939,9 +2907,7 @@ static ulint pc_flush_slot(void) { - ulint lru_tm = 0; ulint list_tm = 0; - int lru_pass = 0; int list_pass = 0; mutex_enter(&page_cleaner->mutex); @@ -2973,26 +2939,12 @@ pc_flush_slot(void) } if (!page_cleaner->is_running) { - slot->n_flushed_lru = 0; slot->n_flushed_list = 0; goto finish_mutex; } mutex_exit(&page_cleaner->mutex); - lru_tm = ut_time_ms(); - - /* Flush pages from end of LRU if required */ - slot->n_flushed_lru = buf_flush_LRU_list(buf_pool); - - lru_tm = ut_time_ms() - lru_tm; - lru_pass++; - - if (!page_cleaner->is_running) { - slot->n_flushed_list = 0; - goto finish; - } - /* Flush pages from flush_list if required */ if (page_cleaner->requested) { @@ -3010,16 +2962,13 @@ pc_flush_slot(void) slot->n_flushed_list = 0; slot->succeeded_list = true; } -finish: mutex_enter(&page_cleaner->mutex); finish_mutex: page_cleaner->n_slots_flushing--; page_cleaner->n_slots_finished++; slot->state = PAGE_CLEANER_STATE_FINISHED; - slot->flush_lru_time += lru_tm; slot->flush_list_time += list_tm; - slot->flush_lru_pass += lru_pass; slot->flush_list_pass += list_pass; if (page_cleaner->n_slots_requested == 0 @@ -3037,20 +2986,16 @@ finish_mutex: /** Wait until all flush requests are finished. -@param n_flushed_lru number of pages flushed and evicted from the end of the - LRU list. @param n_flushed_list number of pages flushed from the end of the flush_list. @return true if all flush_list flushing batch were success. */ static bool pc_wait_finished( - ulint* n_flushed_lru, ulint* n_flushed_list) { bool all_succeeded = true; - *n_flushed_lru = 0; *n_flushed_list = 0; os_event_wait(page_cleaner->is_finished); @@ -3066,7 +3011,6 @@ pc_wait_finished( ut_ad(slot->state == PAGE_CLEANER_STATE_FINISHED); - *n_flushed_lru += slot->n_flushed_lru; *n_flushed_list += slot->n_flushed_list; all_succeeded &= slot->succeeded_list; @@ -3086,7 +3030,7 @@ pc_wait_finished( #ifdef UNIV_LINUX /** -Set priority for page_cleaner threads. +Set priority for page_cleaner and LRU manager threads. @param[in] priority priority intended to set @return true if set as intended */ static @@ -3102,18 +3046,18 @@ buf_flush_page_cleaner_set_priority( #endif /* UNIV_LINUX */ #ifdef UNIV_DEBUG -/** Loop used to disable page cleaner threads. */ +/** Loop used to disable page cleaner and LRU manager threads. */ static void buf_flush_page_cleaner_disabled_loop(void) { - ut_ad(page_cleaner != NULL); - if (!innodb_page_cleaner_disabled_debug) { /* We return to avoid entering and exiting mutex. */ return; } + ut_ad(page_cleaner != NULL); + mutex_enter(&page_cleaner->mutex); page_cleaner->n_disabled_debug++; mutex_exit(&page_cleaner->mutex); @@ -3143,8 +3087,8 @@ buf_flush_page_cleaner_disabled_loop(void) mutex_exit(&page_cleaner->mutex); } -/** Disables page cleaner threads (coordinator and workers). -It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0). +/** Disables page cleaner threads (coordinator and workers) and LRU manager +threads. It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0). @param[in] thd thread handle @param[in] var pointer to system variable @param[out] var_ptr where the formal string goes @@ -3167,7 +3111,7 @@ buf_flush_page_cleaner_disabled_debug_update( innodb_page_cleaner_disabled_debug = false; - /* Enable page cleaner threads. */ + /* Enable page cleaner and LRU manager threads. */ while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { mutex_enter(&page_cleaner->mutex); const ulint n = page_cleaner->n_disabled_debug; @@ -3201,10 +3145,12 @@ buf_flush_page_cleaner_disabled_debug_update( mutex_enter(&page_cleaner->mutex); ut_ad(page_cleaner->n_disabled_debug - <= srv_n_page_cleaners); + <= (srv_n_page_cleaners + + buf_lru_manager_running_threads)); if (page_cleaner->n_disabled_debug - == srv_n_page_cleaners) { + == (srv_n_page_cleaners + + buf_lru_manager_running_threads)) { mutex_exit(&page_cleaner->mutex); break; @@ -3241,7 +3187,7 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) << buf_flush_page_cleaner_priority; } else { ib::info() << "If the mysqld execution user is authorized," - " page cleaner thread priority can be changed." + " page cleaner and LRU manager thread priority can be changed." " See the man page of setpriority()."; } #endif /* UNIV_LINUX */ @@ -3263,7 +3209,6 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) && recv_sys->spaces != NULL) { /* treat flushing requests during recovery. */ - ulint n_flushed_lru = 0; ulint n_flushed_list = 0; os_event_wait(recv_sys->flush_start); @@ -3274,26 +3219,11 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) break; } - switch (recv_sys->flush_type) { - case BUF_FLUSH_LRU: - /* Flush pages from end of LRU if required */ - pc_request(0, LSN_MAX); + /* Flush all pages */ + do { + pc_request(ULINT_MAX, LSN_MAX); while (pc_flush_slot() > 0) {} - pc_wait_finished(&n_flushed_lru, &n_flushed_list); - break; - - case BUF_FLUSH_LIST: - /* Flush all pages */ - do { - pc_request(ULINT_MAX, LSN_MAX); - while (pc_flush_slot() > 0) {} - } while (!pc_wait_finished(&n_flushed_lru, - &n_flushed_list)); - break; - - default: - ut_ad(0); - } + } while (!pc_wait_finished(&n_flushed_list)); os_event_reset(recv_sys->flush_start); os_event_set(recv_sys->flush_end); @@ -3302,7 +3232,6 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) os_event_wait(buf_flush_event); ulint ret_sleep = 0; - ulint n_evicted = 0; ulint n_flushed_last = 0; ulint warn_interval = 1; ulint warn_count = 0; @@ -3343,9 +3272,7 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) ib::info() << "Page cleaner took " << us << "ms to flush" - << n_flushed_last - << " and evict " - << n_evicted << " pages"; + << n_flushed_last << " pages"; if (warn_interval > 300) { warn_interval = 600; @@ -3364,7 +3291,7 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) } next_loop_time = curr_time + 1000; - n_flushed_last = n_evicted = 0; + n_flushed_last = 0; } if (ret_sleep != OS_SYNC_TIME_EXCEEDED @@ -3390,21 +3317,20 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) page_cleaner->flush_pass++; /* Wait for all slots to be finished */ - ulint n_flushed_lru = 0; ulint n_flushed_list = 0; - pc_wait_finished(&n_flushed_lru, &n_flushed_list); + pc_wait_finished(&n_flushed_list); - if (n_flushed_list > 0 || n_flushed_lru > 0) { - buf_flush_stats(n_flushed_list, n_flushed_lru); + if (n_flushed_list > 0) { + srv_stats.buf_pool_flushed.add(n_flushed_list); MONITOR_INC_VALUE_CUMULATIVE( MONITOR_FLUSH_SYNC_TOTAL_PAGE, MONITOR_FLUSH_SYNC_COUNT, MONITOR_FLUSH_SYNC_PAGES, - n_flushed_lru + n_flushed_list); + n_flushed_list); } - n_flushed = n_flushed_lru + n_flushed_list; + n_flushed = n_flushed_list; } else if (srv_check_activity(last_activity)) { ulint n_to_flush; @@ -3436,31 +3362,21 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) page_cleaner->flush_pass++ ; /* Wait for all slots to be finished */ - ulint n_flushed_lru = 0; ulint n_flushed_list = 0; - pc_wait_finished(&n_flushed_lru, &n_flushed_list); + pc_wait_finished(&n_flushed_list); - if (n_flushed_list > 0 || n_flushed_lru > 0) { - buf_flush_stats(n_flushed_list, n_flushed_lru); + if (n_flushed_list > 0) { + srv_stats.buf_pool_flushed.add(n_flushed_list); } if (ret_sleep == OS_SYNC_TIME_EXCEEDED) { last_pages = n_flushed_list; } - n_evicted += n_flushed_lru; n_flushed_last += n_flushed_list; - n_flushed = n_flushed_lru + n_flushed_list; - - if (n_flushed_lru) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, - MONITOR_LRU_BATCH_FLUSH_COUNT, - MONITOR_LRU_BATCH_FLUSH_PAGES, - n_flushed_lru); - } + n_flushed = n_flushed_list; if (n_flushed_list) { MONITOR_INC_VALUE_CUMULATIVE( @@ -3487,7 +3403,6 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) } else { /* no activity, but woken up by event */ - n_flushed = 0; } ut_d(buf_flush_page_cleaner_disabled_loop()); @@ -3512,24 +3427,27 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) when SRV_SHUTDOWN_CLEANUP is set other threads like the master and the purge threads may be working as well. We start flushing the buffer pool but can't be sure that no new pages are being - dirtied until we enter SRV_SHUTDOWN_FLUSH_PHASE phase. */ + dirtied until we enter SRV_SHUTDOWN_FLUSH_PHASE phase. Because + the LRU manager thread is also flushing at SRV_SHUTDOWN_CLEANUP + but not SRV_SHUTDOWN_FLUSH_PHASE, we only leave the + SRV_SHUTDOWN_CLEANUP loop when the LRU manager quits. */ do { pc_request(ULINT_MAX, LSN_MAX); while (pc_flush_slot() > 0) {} - ulint n_flushed_lru = 0; ulint n_flushed_list = 0; - pc_wait_finished(&n_flushed_lru, &n_flushed_list); + pc_wait_finished(&n_flushed_list); - n_flushed = n_flushed_lru + n_flushed_list; + n_flushed = n_flushed_list; /* We sleep only if there are no pages to flush */ if (n_flushed == 0) { os_thread_sleep(100000); } - } while (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); + } while (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP + || buf_lru_manager_running_threads > 0); /* At this point all threads including the master and the purge thread must have been suspended. */ @@ -3544,7 +3462,7 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) sweep and we'll come out of the loop leaving behind dirty pages in the flush_list */ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - buf_flush_wait_LRU_batch_end(); + ut_ad(buf_lru_manager_running_threads == 0); bool success; @@ -3553,14 +3471,12 @@ buf_flush_page_coordinator_thread(size_t n_page_cleaners) while (pc_flush_slot() > 0) {} - ulint n_flushed_lru = 0; ulint n_flushed_list = 0; - success = pc_wait_finished(&n_flushed_lru, &n_flushed_list); + success = pc_wait_finished(&n_flushed_list); - n_flushed = n_flushed_lru + n_flushed_list; + n_flushed = n_flushed_list; buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - buf_flush_wait_LRU_batch_end(); } while (!success || n_flushed > 0); @@ -3628,6 +3544,133 @@ buf_flush_page_cleaner_thread() my_thread_end(); } +/** Make a LRU manager thread sleep until the passed target time, if it's not +already in the past. +@param[in] next_loop_time desired wake up time */ +static +void +buf_lru_manager_sleep_if_needed( + ulint next_loop_time) +{ + /* If this is the server shutdown buffer pool flushing phase, skip the + sleep to quit this thread faster */ + if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE) + return; + + ulint cur_time = ut_time_ms(); + + if (next_loop_time > cur_time) { + /* Get sleep interval in micro seconds. We use + ut_min() to avoid long sleep in case of + wrap around. */ + os_thread_sleep(std::min(1000000UL, + (next_loop_time - cur_time) + * 1000)); + } +} + +/** Adjust the LRU manager thread sleep time based on the free list length and +the last flush result +@param[in] buf_pool buffer pool whom we are flushing +@param[in] lru_n_flushed last LRU flush page count +@param[in,out] lru_sleep_time LRU manager thread sleep time */ +static +void +buf_lru_manager_adapt_sleep_time( + const buf_pool_t* buf_pool, + ulint lru_n_flushed, + ulint* lru_sleep_time) +{ + const ulint free_len = UT_LIST_GET_LEN(buf_pool->free); + const ulint max_free_len = std::min( + UT_LIST_GET_LEN(buf_pool->LRU), srv_LRU_scan_depth); + + if (free_len < max_free_len / 100 && lru_n_flushed) { + + /* Free list filled less than 1% and the last iteration was + able to flush, no sleep */ + *lru_sleep_time = 0; + } else if (free_len > max_free_len / 5 + || (free_len < max_free_len / 100 && lru_n_flushed == 0)) { + + /* Free list filled more than 20% or no pages flushed in the + previous batch, sleep a bit more */ + *lru_sleep_time += 1; + if (*lru_sleep_time > 1000) + *lru_sleep_time = 1000; + } else if (free_len < max_free_len / 20 && *lru_sleep_time >= 50) { + + /* Free list filled less than 5%, sleep a bit less */ + *lru_sleep_time -= 50; + } else { + + /* Free lists filled between 5% and 20%, no change */ + } +} +/** LRU manager thread for performing LRU flushed and evictions for buffer pool +free list refill. One thread is created for each buffer pool instace. +@param[in] arg buffer pool instance number for this thread +@return a dummy value */ +static +void +buf_lru_manager_thread() +{ + my_thread_init(); + +#ifdef UNIV_LINUX + /* linux might be able to set different setting for each thread + worth to try to set high priority for page cleaner threads */ + if (buf_flush_page_cleaner_set_priority( + buf_flush_page_cleaner_priority)) { + + ib::info() << "lru_manager worker priority: " + << buf_flush_page_cleaner_priority; + } +#endif /* UNIV_LINUX */ + + ulong i = buf_lru_manager_running_threads.fetch_add(1); + ut_ad(i < srv_buf_pool_instances); + + buf_pool_t* buf_pool = buf_pool_from_array(i); + + ulint lru_sleep_time = 1000; + ulint next_loop_time = ut_time_ms() + lru_sleep_time; + ulint lru_n_flushed = 1; + + /* On server shutdown, the LRU manager thread runs through cleanup + phase to provide free pages for the master and purge threads. */ + while (srv_shutdown_state == SRV_SHUTDOWN_NONE + || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP) { + + ut_d(buf_flush_page_cleaner_disabled_loop()); + + buf_lru_manager_sleep_if_needed(next_loop_time); + + buf_lru_manager_adapt_sleep_time(buf_pool, lru_n_flushed, + &lru_sleep_time); + + next_loop_time = ut_time_ms() + lru_sleep_time; + + lru_n_flushed = buf_flush_LRU_list(buf_pool); + + buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU); + + if (lru_n_flushed) { + srv_stats.buf_pool_flushed.add(lru_n_flushed); + + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, + MONITOR_LRU_BATCH_FLUSH_COUNT, + MONITOR_LRU_BATCH_FLUSH_PAGES, + lru_n_flushed); + } + } + + buf_lru_manager_running_threads--; + + my_thread_end(); +} + /*******************************************************************//** Synchronously flush dirty blocks from the end of the flush list of all buffer pool instances. diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index e1ef1bef96c..79852763463 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -700,7 +700,6 @@ static PSI_mutex_info all_innodb_mutexes[] = { PSI_MUTEX_KEY(page_cleaner_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(purge_sys_pq_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(recv_sys_mutex, 0, 0, PSI_DOCUMENT_ME), - PSI_MUTEX_KEY(recv_writer_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(temp_space_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(undo_space_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), PSI_MUTEX_KEY(trx_sys_rseg_mutex, 0, 0, PSI_DOCUMENT_ME), @@ -785,7 +784,7 @@ static PSI_thread_info all_innodb_threads[] = { PSI_KEY(io_read_thread, 0, 0, PSI_DOCUMENT_ME), PSI_KEY(io_write_thread, 0, 0, PSI_DOCUMENT_ME), PSI_KEY(buf_resize_thread, 0, 0, PSI_DOCUMENT_ME), - PSI_KEY(recv_writer_thread, 0, 0, PSI_DOCUMENT_ME), + PSI_KEY(buf_lru_manager_thread, 0, 0, PSI_DOCUMENT_ME), PSI_KEY(srv_error_monitor_thread, 0, 0, PSI_DOCUMENT_ME), PSI_KEY(srv_lock_timeout_thread, 0, 0, PSI_DOCUMENT_ME), PSI_KEY(srv_master_thread, 0, 0, PSI_DOCUMENT_ME), @@ -4682,7 +4681,6 @@ innodb_init_params() + 1 /* buf_dump_thread */ + 1 /* dict_stats_thread */ + 1 /* fts_optimize_thread */ - + 1 /* recv_writer_thread */ + 1 /* trx_rollback_or_clean_all_recovered */ + 128 /* added as margin, for use of InnoDB Memcached etc. */ diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index ff5c092c8af..63d38da9438 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -39,10 +39,15 @@ Created 11/5/1995 Heikki Tuuri #include "log0log.h" #include "buf0types.h" +#include + #ifndef UNIV_HOTBACKUP /** Flag indicating if the page_cleaner is in active state. */ extern bool buf_page_cleaner_is_active; +/** The number of running LRU manager threads. 0 if LRU manager is inactive. */ +extern std::atomic buf_lru_manager_running_threads; + #ifdef UNIV_DEBUG /** Value of MySQL global variable used to disable page cleaner. */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index a2aecff5cf2..86ca7bc7c8c 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -549,20 +549,12 @@ struct recv_sys_t { state field in each recv_addr struct */ ib_mutex_t mutex; - /** mutex coordinating flushing between recv_writer_thread and - the recovery thread. */ - ib_mutex_t writer_mutex; - /** event to acticate page cleaner threads */ os_event_t flush_start; /** event to signal that the page cleaner has finished the request */ os_event_t flush_end; - /** type of the flush request. BUF_FLUSH_LRU: flush end of LRU, - keeping free blocks. BUF_FLUSH_LIST: flush all of blocks. */ - buf_flush_t flush_type; - #endif /* !UNIV_HOTBACKUP */ /** This is true when log rec application to pages is allowed; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 390b44a6e9d..0cdd19d972a 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -494,7 +494,7 @@ extern mysql_pfs_key_t io_read_thread_key; extern mysql_pfs_key_t io_write_thread_key; extern mysql_pfs_key_t page_flush_coordinator_thread_key; extern mysql_pfs_key_t page_flush_thread_key; -extern mysql_pfs_key_t recv_writer_thread_key; +extern mysql_pfs_key_t buf_lru_manager_thread_key; extern mysql_pfs_key_t srv_error_monitor_thread_key; extern mysql_pfs_key_t srv_lock_timeout_thread_key; extern mysql_pfs_key_t srv_master_thread_key; diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index c8a71ed2e5d..22fb6576296 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -129,7 +129,6 @@ extern mysql_pfs_key_t recalc_pool_mutex_key; extern mysql_pfs_key_t page_cleaner_mutex_key; extern mysql_pfs_key_t purge_sys_pq_mutex_key; extern mysql_pfs_key_t recv_sys_mutex_key; -extern mysql_pfs_key_t recv_writer_mutex_key; extern mysql_pfs_key_t rtr_active_mutex_key; extern mysql_pfs_key_t rtr_match_mutex_key; extern mysql_pfs_key_t rtr_path_mutex_key; diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h index a79ab7d72c6..4f16fc0ff84 100644 --- a/storage/innobase/include/sync0types.h +++ b/storage/innobase/include/sync0types.h @@ -315,8 +315,6 @@ enum latch_level_t { SYNC_TRX_I_S_RWLOCK, - SYNC_RECV_WRITER, - /** Level is varying. Only used with buffer pool page locks, which do not have a fixed level, but instead have their level set after the page is locked; see e.g. ibuf_bitmap_get_map_page(). */ @@ -1250,8 +1248,6 @@ struct dict_sync_check : public sync_check_functor_t { || (level != SYNC_DICT && level != SYNC_DICT_OPERATION && level != SYNC_FTS_CACHE - /* This only happens in recv_apply_hashed_log_recs. */ - && level != SYNC_RECV_WRITER && level != SYNC_NO_ORDER_CHECK)) { m_result = true; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index ac87574fa9c..fb1b39cc8b8 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -204,15 +204,6 @@ is bigger than the lsn we are able to scan up to, that is an indication that the recovery failed and the database may be corrupt. */ static lsn_t recv_max_page_lsn; -#ifndef UNIV_HOTBACKUP -# ifdef UNIV_PFS_THREAD -mysql_pfs_key_t recv_writer_thread_key; -# endif /* UNIV_PFS_THREAD */ - -/** Flag indicating if recv_writer thread is active. */ -static bool recv_writer_thread_active = false; -#endif /* !UNIV_HOTBACKUP */ - /* prototypes */ #ifndef UNIV_HOTBACKUP @@ -425,7 +416,6 @@ recv_sys_create() ut_zalloc_nokey(sizeof(*recv_sys))); mutex_create(LATCH_ID_RECV_SYS, &recv_sys->mutex); - mutex_create(LATCH_ID_RECV_WRITER, &recv_sys->writer_mutex); recv_sys->spaces = nullptr; } @@ -490,9 +480,6 @@ recv_sys_close() if (recv_sys->flush_end != nullptr) { os_event_destroy(recv_sys->flush_end); } - - ut_ad(!recv_writer_thread_active); - mutex_free(&recv_sys->writer_mutex); #endif /* !UNIV_HOTBACKUP */ call_destructor(&recv_sys->dblwr); @@ -832,60 +819,6 @@ MetadataRecover::store() mutex_exit(&dict_persist->mutex); } -/** recv_writer thread tasked with flushing dirty pages from the buffer -pools. */ -static -void -recv_writer_thread() -{ - ut_ad(!srv_read_only_mode); - - /* The code flow is as follows: - Step 1: In recv_recovery_from_checkpoint_start(). - Step 2: This recv_writer thread is started. - Step 3: In recv_recovery_from_checkpoint_finish(). - Step 4: Wait for recv_writer thread to complete. This is based - on the flag recv_writer_thread_active. - Step 5: Assert that recv_writer thread is not active anymore. - - It is possible that the thread that is started in step 2, - becomes active only after step 4 and hence the assert in - step 5 fails. So mark this thread active only if necessary. */ - mutex_enter(&recv_sys->writer_mutex); - - if (recv_recovery_on) { - recv_writer_thread_active = true; - } else { - mutex_exit(&recv_sys->writer_mutex); - return; - } - mutex_exit(&recv_sys->writer_mutex); - - while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { - - os_thread_sleep(100000); - - mutex_enter(&recv_sys->writer_mutex); - - if (!recv_recovery_on) { - mutex_exit(&recv_sys->writer_mutex); - break; - } - - /* Flush pages from end of LRU if required */ - os_event_reset(recv_sys->flush_end); - recv_sys->flush_type = BUF_FLUSH_LRU; - os_event_set(recv_sys->flush_start); - os_event_wait(recv_sys->flush_end); - - mutex_exit(&recv_sys->writer_mutex); - } - - recv_writer_thread_active = false; - - my_thread_end(); -} - /** Frees the recovery system. */ void recv_sys_free() @@ -897,7 +830,6 @@ recv_sys_free() /* wake page cleaner up to progress */ if (!srv_read_only_mode) { ut_ad(!recv_recovery_on); - ut_ad(!recv_writer_thread_active); os_event_reset(buf_flush_event); os_event_set(recv_sys->flush_start); } @@ -1433,25 +1365,16 @@ recv_apply_hashed_log_recs(bool allow_ibuf) log_mutex_exit(); - /* Stop the recv_writer thread from issuing any LRU - flush batches. */ - mutex_enter(&recv_sys->writer_mutex); - - /* Wait for any currently run batch to end. */ - buf_flush_wait_LRU_batch_end(); - os_event_reset(recv_sys->flush_end); - recv_sys->flush_type = BUF_FLUSH_LIST; - os_event_set(recv_sys->flush_start); os_event_wait(recv_sys->flush_end); - buf_pool_invalidate(); + /* Wait for any currently running batch to end. */ + buf_flush_wait_LRU_batch_end(); - /* Allow batches from recv_writer thread. */ - mutex_exit(&recv_sys->writer_mutex); + buf_pool_invalidate(); log_mutex_enter(); @@ -3884,16 +3807,6 @@ recv_init_crash_recovery() ib::info() << "Starting crash recovery."; buf_dblwr_process(); - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - - /* Spawn the background thread to flush dirty pages - from the buffer pools. */ - - os_thread_create( - recv_writer_thread_key, - recv_writer_thread); - } } #endif /* !UNIV_HOTBACKUP */ @@ -4202,39 +4115,12 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) MetadataRecover* recv_recovery_from_checkpoint_finish(bool aborting) { - /* Make sure that the recv_writer thread is done. This is - required because it grabs various mutexes and we want to - ensure that when we enable sync_order_checks there is no - mutex currently held by any thread. */ - mutex_enter(&recv_sys->writer_mutex); - /* Free the resources of the recovery system */ recv_recovery_on = false; - /* By acquring the mutex we ensure that the recv_writer thread - won't trigger any more LRU batches. Now wait for currently - in progress batches to finish. */ + /* Now wait for currently in progress batches to finish. */ buf_flush_wait_LRU_batch_end(); - mutex_exit(&recv_sys->writer_mutex); - - ulint count = 0; - - while (recv_writer_thread_active) { - - ++count; - - os_thread_sleep(100000); - - if (srv_print_verbose_log && count > 600) { - - ib::info() - << "Waiting for recv_writer to" - " finish flushing of buffer pool"; - count = 0; - } - } - MetadataRecover* metadata; if (!aborting) { diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index e9c5a2f6240..4963cdc97a0 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -376,7 +376,7 @@ static monitor_info_t innodb_counter_info[] = MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT}, {"buffer_LRU_batch_flush_avg_time_slot", "buffer", - "Avg time (ms) spent for LRU batch flushing recently per slot.", + "Avg time (ms) spent for LRU batch flushing recently per slot.", // TODO: always zero MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_SLOT}, @@ -386,7 +386,7 @@ static monitor_info_t innodb_counter_info[] = MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD}, {"buffer_LRU_batch_flush_avg_time_thread", "buffer", - "Avg time (ms) spent for LRU batch flushing recently per thread.", + "Avg time (ms) spent for LRU batch flushing recently per thread.", // TODO: always zero MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_THREAD}, @@ -398,7 +398,7 @@ static monitor_info_t innodb_counter_info[] = {"buffer_LRU_batch_flush_avg_time_est", "buffer", "Estimated time (ms) spent for LRU batch flushing recently.", MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST}, + MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_TIME_EST}, // TODO: always zero {"buffer_flush_avg_time", "buffer", "Avg time (ms) spent for flushing recently.", @@ -411,7 +411,7 @@ static monitor_info_t innodb_counter_info[] = MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_PASS}, {"buffer_LRU_batch_flush_avg_pass", "buffer", - "Number of LRU batch flushes passed during the recent Avg period.", + "Number of LRU batch flushes passed during the recent Avg period.", // TODO: always zero MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_LRU_BATCH_FLUSH_AVG_PASS}, diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc index 2e1f996fdbe..733bef04a56 100644 --- a/storage/innobase/sync/sync0debug.cc +++ b/storage/innobase/sync/sync0debug.cc @@ -553,7 +553,6 @@ LatchDebug::LatchDebug() LEVEL_MAP_INSERT(SYNC_DICT_OPERATION); LEVEL_MAP_INSERT(SYNC_TRX_I_S_LAST_READ); LEVEL_MAP_INSERT(SYNC_TRX_I_S_RWLOCK); - LEVEL_MAP_INSERT(SYNC_RECV_WRITER); LEVEL_MAP_INSERT(SYNC_LEVEL_VARYING); LEVEL_MAP_INSERT(SYNC_NO_ORDER_CHECK); @@ -814,7 +813,6 @@ LatchDebug::check_order( case SYNC_STATS_AUTO_RECALC: case SYNC_POOL: case SYNC_POOL_MANAGER: - case SYNC_RECV_WRITER: case SYNC_PARSER: case SYNC_DICT: @@ -1470,8 +1468,6 @@ sync_latch_meta_init() LATCH_ADD_MUTEX(RECV_SYS, SYNC_RECV, recv_sys_mutex_key); - LATCH_ADD_MUTEX(RECV_WRITER, SYNC_RECV_WRITER, recv_writer_mutex_key); - LATCH_ADD_MUTEX(TEMP_SPACE_RSEG, SYNC_TEMP_SPACE_RSEG, temp_space_rseg_mutex_key); diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc index c44bd53d83a..ee885d355f4 100644 --- a/storage/innobase/sync/sync0sync.cc +++ b/storage/innobase/sync/sync0sync.cc @@ -89,7 +89,6 @@ mysql_pfs_key_t recalc_pool_mutex_key; mysql_pfs_key_t page_cleaner_mutex_key; mysql_pfs_key_t purge_sys_pq_mutex_key; mysql_pfs_key_t recv_sys_mutex_key; -mysql_pfs_key_t recv_writer_mutex_key; mysql_pfs_key_t temp_space_rseg_mutex_key; mysql_pfs_key_t undo_space_rseg_mutex_key; mysql_pfs_key_t trx_sys_rseg_mutex_key;