diff --git a/mysql-test/suite/innodb/r/lru_flush.result b/mysql-test/suite/innodb/r/lru_flush.result new file mode 100644 index 0000000..f5bfcb9 --- /dev/null +++ b/mysql-test/suite/innodb/r/lru_flush.result @@ -0,0 +1,54 @@ +call mtr.add_suppression("InnoDB: Resizing redo log"); +call mtr.add_suppression("InnoDB: Starting to delete and rewrite log files"); +call mtr.add_suppression("InnoDB: New log files created"); +set global innodb_monitor_enable = all; +create table t1(s text) engine=innodb; +set @save_loops := @@innodb_flushing_avg_loops; +set @save_capacity := @@innodb_io_capacity; +set @save_scan_depth := @@innodb_lru_scan_depth; +set @save_lwm := @@innodb_adaptive_flushing_lwm; +set @save_adaptive := @@innodb_adaptive_flushing; +set @save_dirty := @@innodb_max_dirty_pages_pct; +set global innodb_io_capacity = 100; +set global innodb_lru_scan_depth = 100; +set global innodb_flushing_avg_loops = 10; +set global innodb_adaptive_flushing = 0; +set global innodb_max_dirty_pages_pct = 0; +select * from information_schema.session_variables where variable_name in ('innodb_io_capacity', +'innodb_lru_scan_depth'); +VARIABLE_NAME VARIABLE_VALUE +INNODB_IO_CAPACITY 100 +INNODB_LRU_SCAN_DEPTH 100 +set @start_lru_thread := (select count from information_schema.innodb_metrics where name = 'buffer_LRU_thread_total_pages'); +set @start_flush_list := (select count from information_schema.innodb_metrics where name = 'buffer_flush_list_total_pages'); +set @start_flush_lru := (select count from information_schema.innodb_metrics where name = 'buffer_flush_LRU_total_pages'); +commit; +select sum(length(s)) from t1; +sum(length(s)) +8192000 +begin; +update t1 set s = repeat('b', 2048); +commit; +set global innodb_adaptive_flushing = 1; +set global innodb_max_dirty_pages_pct = 99; +set global innodb_adaptive_flushing_lwm = 10; +begin; +update t1 set s = repeat('c', 4096); +commit; +select count > @start_lru_thread from information_schema.innodb_metrics where name = 'buffer_LRU_thread_total_pages'; +count > @start_lru_thread +1 +select count > @start_flush_list from information_schema.innodb_metrics where name = 'buffer_flush_list_total_pages'; +count > @start_flush_list +1 +select count > @start_flush_lru from information_schema.innodb_metrics where name = 'buffer_flush_LRU_total_pages'; +count > @start_flush_lru +1 +set global innodb_io_capacity=@save_capacity; +set global innodb_lru_scan_depth=@save_scan_depth; +set global innodb_flushing_avg_loops=@save_loops; +set global innodb_adaptive_flushing_lwm=@save_lwm; +set global innodb_adaptive_flushing=@save_adaptive; +set global innodb_max_dirty_pages_pct=@save_dirty; +drop table t1; +set global innodb_monitor_enable = default; diff --git a/mysql-test/suite/innodb/t/lru_flush-master.opt b/mysql-test/suite/innodb/t/lru_flush-master.opt new file mode 100644 index 0000000..ef2db84 --- /dev/null +++ b/mysql-test/suite/innodb/t/lru_flush-master.opt @@ -0,0 +1 @@ +--innodb_buffer_pool_size=10M --innodb_log_file_size=20M diff --git a/mysql-test/suite/innodb/t/lru_flush.test b/mysql-test/suite/innodb/t/lru_flush.test new file mode 100644 index 0000000..46d7f9c --- /dev/null +++ b/mysql-test/suite/innodb/t/lru_flush.test @@ -0,0 +1,61 @@ +call mtr.add_suppression("InnoDB: Resizing redo log"); +call mtr.add_suppression("InnoDB: Starting to delete and rewrite log files"); +call mtr.add_suppression("InnoDB: New log files created"); +-- source include/have_innodb.inc +set global innodb_monitor_enable = all; +create table t1(s text) engine=innodb; +let $n=4000; +set @save_loops := @@innodb_flushing_avg_loops; +set @save_capacity := @@innodb_io_capacity; +set @save_scan_depth := @@innodb_lru_scan_depth; +set @save_lwm := @@innodb_adaptive_flushing_lwm; +set @save_adaptive := @@innodb_adaptive_flushing; +set @save_dirty := @@innodb_max_dirty_pages_pct; +set global innodb_io_capacity = 100; +set global innodb_lru_scan_depth = 100; +set global innodb_flushing_avg_loops = 10; + +# initially make it think we are not low on redo space, but we want the LRU flushing +# so we decrease the tolerance for dirty +set global innodb_adaptive_flushing = 0; +set global innodb_max_dirty_pages_pct = 0; +select * from information_schema.session_variables where variable_name in ('innodb_io_capacity', + 'innodb_lru_scan_depth'); +set @start_lru_thread := (select count from information_schema.innodb_metrics where name = 'buffer_LRU_thread_total_pages'); +set @start_flush_list := (select count from information_schema.innodb_metrics where name = 'buffer_flush_list_total_pages'); +set @start_flush_lru := (select count from information_schema.innodb_metrics where name = 'buffer_flush_LRU_total_pages'); +--disable_query_log +begin; +while ($n) +{ + insert into t1 values(repeat('a',2048)); + dec $n; +} +--enable_query_log +commit; +select sum(length(s)) from t1; +begin; +update t1 set s = repeat('b', 2048); +commit; +sleep 5; +# now make it think we are low on redo space +# and try to not let the LRU flushes win +set global innodb_adaptive_flushing = 1; +set global innodb_max_dirty_pages_pct = 99; +set global innodb_adaptive_flushing_lwm = 10; +begin; +update t1 set s = repeat('c', 4096); +commit; +sleep 5; +select count > @start_lru_thread from information_schema.innodb_metrics where name = 'buffer_LRU_thread_total_pages'; +select count > @start_flush_list from information_schema.innodb_metrics where name = 'buffer_flush_list_total_pages'; +select count > @start_flush_lru from information_schema.innodb_metrics where name = 'buffer_flush_LRU_total_pages'; +set global innodb_io_capacity=@save_capacity; +set global innodb_lru_scan_depth=@save_scan_depth; +set global innodb_flushing_avg_loops=@save_loops; +set global innodb_adaptive_flushing_lwm=@save_lwm; +set global innodb_adaptive_flushing=@save_adaptive; +set global innodb_max_dirty_pages_pct=@save_dirty; +drop table t1; +--disable_warnings +set global innodb_monitor_enable = default; diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 540d638..ccfa4ef 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -58,13 +58,21 @@ back to FALSE at shutdown by the page_cleaner as well. Therefore no need to protect it by a mutex. It is only ever read by the thread doing the shutdown */ UNIV_INTERN ibool buf_page_cleaner_is_active = FALSE; +/** Flag for the lru_flush thread in a similar manner */ +UNIV_INTERN ibool buf_flush_lru_is_active = FALSE; /** LRU flush batch is further divided into this chunk size to reduce the wait time for the threads waiting for a clean block */ #define PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE 100 +/** replace flush_list flush with LRU flush if af_get_pct_for_lsn() + gives us less than this threshold and af_get_pct_for_dirty() gives us more than + af_get_pct_for_lsn() */ +#define PCT_LSN_NO_PRESSURE 50 + #ifdef UNIV_PFS_THREAD UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key; +UNIV_INTERN mysql_pfs_key_t buf_flush_lru_thread_key; #endif /* UNIV_PFS_THREAD */ /** If LRU list of a buf_pool is less than this size then LRU eviction @@ -75,6 +83,9 @@ in thrashing. */ /* @} */ +static ulint +buf_flush_LRU_tail_low(ulint target_scan_depth); + /******************************************************************//** Increases flush_list size in bytes with zip_size for compressed page, UNIV_PAGE_SIZE for uncompressed page in inline function */ @@ -2063,6 +2074,17 @@ config parameter innodb_LRU_scan_depth. UNIV_INTERN ulint buf_flush_LRU_tail(void) +{ + return buf_flush_LRU_tail_low(srv_LRU_scan_depth); +} + +/*********************************************************************//** +The real worker behind buf_flush_LRU_tail(). Called by the page cleaner +thread directly to have the effect of srv_LRU_scan_depth being dynamic, +otherwise through the above wrapper. +*/ +static ulint +buf_flush_LRU_tail_low(ulint target_scan_depth) /*====================*/ { ulint total_flushed = 0; @@ -2072,13 +2094,14 @@ buf_flush_LRU_tail(void) buf_pool_t* buf_pool = buf_pool_from_array(i); ulint scan_depth; - /* srv_LRU_scan_depth can be arbitrarily large value. + /* target_scan_depth could come frmo srv_LRU_scan_depth and + can be arbitrarily large value. We cap it with current LRU size. */ buf_pool_mutex_enter(buf_pool); scan_depth = UT_LIST_GET_LEN(buf_pool->LRU); buf_pool_mutex_exit(buf_pool); - scan_depth = ut_min(srv_LRU_scan_depth, scan_depth); + scan_depth = ut_min(target_scan_depth, scan_depth); /* We divide LRU flush into smaller chunks because there may be user threads waiting for the flush to @@ -2089,9 +2112,8 @@ buf_flush_LRU_tail(void) ulint n_flushed = 0; - /* Currently page_cleaner is the only thread - that can trigger an LRU flush. It is possible - that a batch triggered during last iteration is + /* It is possible + that a batch triggered during last iteration or by another thread is still running, */ if (buf_flush_LRU(buf_pool, PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE, @@ -2330,8 +2352,44 @@ page_cleaner_flush_pages_if_needed(void) MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages); prev_pages = n_pages; - n_pages = page_cleaner_do_flush_batch( - n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1)); + + /* TODO: This is a naive heuristic algorithm. Can we do this better? */ + if (pct_for_dirty > pct_for_lsn && pct_for_lsn < PCT_LSN_NO_PRESSURE) + { +#ifdef UNIV_DEBUG_FLUSH + ib_logf(IB_LOG_LEVEL_INFO, + "page_cleaner: preferring LRU flush, pct_for_dirty = %lu pct_for_lsn = %lu", + pct_for_dirty, pct_for_lsn); +#endif + /* We assume even distrubution of LRU list pages across the buffer pool instances */ + n_pages = buf_flush_LRU_tail_low(n_pages / srv_buf_pool_instances); + + if (n_pages) { + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_FLUSH_LRU_TOTAL_PAGE, + MONITOR_FLUSH_LRU_COUNT, + MONITOR_FLUSH_LRU_PAGES, + n_pages); + } + } + else + { +#ifdef UNIV_DEBUG_FLUSH + ib_logf(IB_LOG_LEVEL_INFO, + "page_cleaner: preferring flush_list flush, pct_for_dirty = %lu pct_for_lsn = %lu", + pct_for_dirty, pct_for_lsn); +#endif + n_pages = page_cleaner_do_flush_batch( + n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1)); + + if (n_pages) { + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_FLUSH_LIST_TOTAL_PAGE, + MONITOR_FLUSH_LIST_COUNT, + MONITOR_FLUSH_LIST_PAGES, + n_pages); + } + } last_lsn= cur_lsn; last_pages= n_pages + 1; @@ -2359,7 +2417,7 @@ Puts the page_cleaner thread to sleep if it has finished work in less than a second */ static void -page_cleaner_sleep_if_needed( +thread_sleep_if_needed( /*=========================*/ ulint next_loop_time) /*!< in: time when next loop iteration should start */ @@ -2376,6 +2434,65 @@ page_cleaner_sleep_if_needed( } } +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(buf_flush_lru_thread)( +/*==========================================*/ + void* arg __attribute__((unused)) + ) +{ + ulint next_loop_time = ut_time_ms() + 1000; + ulint n_flushed = 0; + ulint last_activity = srv_get_activity_count(); +#ifdef UNIV_PFS_THREAD + pfs_register_thread(buf_flush_lru_thread_key); +#endif /* UNIV_PFS_THREAD */ + + ut_ad(!srv_read_only_mode); + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "InnoDB: flush_lru thread running, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + + buf_flush_lru_is_active = TRUE; + + while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { + if (srv_check_activity(last_activity) + || buf_get_n_pending_read_ios() + || n_flushed == 0) { + thread_sleep_if_needed(next_loop_time); + } + + next_loop_time = ut_time_ms() + 1000; + if (srv_check_activity(last_activity)) { + /* Flush pages from end of LRU if required */ + n_flushed = buf_flush_LRU_tail(); +#ifdef UNIV_DEBUG_FLUSH + ib_logf(IB_LOG_LEVEL_INFO, "LRU flush: flushed %lu pages", n_flushed); +#endif + if (n_flushed) { + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_LRU_THREAD_TOTAL_PAGE, + MONITOR_LRU_THREAD_COUNT, + MONITOR_LRU_THREAD_PAGES, + n_flushed); + } + } + } + + /* On server shutdown we do nothing here, the cleaner thread can take care of the + * the cleanup, so we just set the flag and exit + */ + buf_flush_lru_is_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + /******************************************************************//** page_cleaner thread tasked with flushing dirty pages from the buffer pools. As of now we'll have only one instance of this thread. @@ -2413,7 +2530,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( if (srv_check_activity(last_activity) || buf_get_n_pending_read_ios() || n_flushed == 0) { - page_cleaner_sleep_if_needed(next_loop_time); + thread_sleep_if_needed(next_loop_time); } next_loop_time = ut_time_ms() + 1000; @@ -2421,11 +2538,8 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( if (srv_check_activity(last_activity)) { last_activity = srv_get_activity_count(); - /* Flush pages from end of LRU if required */ - n_flushed = buf_flush_LRU_tail(); - /* Flush pages from flush_list if required */ - n_flushed += page_cleaner_flush_pages_if_needed(); + n_flushed = page_cleaner_flush_pages_if_needed(); } else { n_flushed = page_cleaner_do_flush_batch( PCT_IO(100), diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 22d51a4..d43e151 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -396,6 +396,7 @@ static PSI_thread_info all_innodb_threads[] = { {&srv_master_thread_key, "srv_master_thread", 0}, {&srv_purge_thread_key, "srv_purge_thread", 0}, {&buf_page_cleaner_thread_key, "page_cleaner_thread", 0}, + {&buf_flush_lru_thread_key, "page_flush_lru_thread", 0}, {&recv_writer_thread_key, "recv_writer_thread", 0} }; # endif /* UNIV_PFS_THREAD */ diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index f116720..0c5d807 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -183,6 +183,17 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( /*==========================================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ +/******************************************************************//** +flush_lru thread tasked with flushing dirty pages from the LRU list. +As of now we'll have only one instance of this thread. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(buf_flush_lru_thread)( +/*==========================================*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ + /*********************************************************************//** Clears up tail of the LRU lists: * Put replaceable pages at the tail of LRU to the free list diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index e2ab81b..ba5ac8f 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -191,12 +191,23 @@ enum monitor_id_t { MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, MONITOR_FLUSH_BACKGROUND_COUNT, MONITOR_FLUSH_BACKGROUND_PAGES, + + MONITOR_FLUSH_LIST_TOTAL_PAGE, + MONITOR_FLUSH_LIST_COUNT, + MONITOR_FLUSH_LIST_PAGES, + MONITOR_FLUSH_LRU_TOTAL_PAGE, + MONITOR_FLUSH_LRU_COUNT, + MONITOR_FLUSH_LRU_PAGES, + MONITOR_LRU_BATCH_SCANNED, MONITOR_LRU_BATCH_SCANNED_NUM_CALL, MONITOR_LRU_BATCH_SCANNED_PER_CALL, MONITOR_LRU_BATCH_TOTAL_PAGE, MONITOR_LRU_BATCH_COUNT, MONITOR_LRU_BATCH_PAGES, + MONITOR_LRU_THREAD_TOTAL_PAGE, + MONITOR_LRU_THREAD_COUNT, + MONITOR_LRU_THREAD_PAGES, MONITOR_LRU_SINGLE_FLUSH_SCANNED, MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL, MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL, diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 6e2f76a..3c1b469 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -451,6 +451,7 @@ extern srv_stats_t srv_stats; # ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ extern mysql_pfs_key_t buf_page_cleaner_thread_key; +extern mysql_pfs_key_t buf_flush_lru_thread_key; extern mysql_pfs_key_t trx_rollback_clean_thread_key; extern mysql_pfs_key_t io_handler_thread_key; extern mysql_pfs_key_t srv_lock_timeout_thread_key; diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 80c8f7f..1b4a5b5 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -440,6 +440,38 @@ static monitor_info_t innodb_counter_info[] = MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, MONITOR_FLUSH_BACKGROUND_PAGES}, + /* Cumulative counter for pages flushed by the page cleaner thread from flush_list */ + {"buffer_flush_list_total_pages", "buffer", + "Total pages flushed by the page cleaner thread from flush_list", + MONITOR_SET_OWNER, MONITOR_FLUSH_LIST_COUNT, + MONITOR_FLUSH_LIST_TOTAL_PAGE}, + + {"buffer_flush_list", "buffer", + "Number of page cleaner thread batches from flush_list", + MONITOR_SET_MEMBER, MONITOR_FLUSH_LIST_TOTAL_PAGE, + MONITOR_FLUSH_LIST_COUNT}, + + {"buffer_flush_list_pages", "buffer", + "Pages queued by the page cleaner thread batch from flush_list", + MONITOR_SET_MEMBER, MONITOR_FLUSH_LIST_TOTAL_PAGE, + MONITOR_FLUSH_LIST_PAGES}, + + /* Cumulative counter for pages flushed by the page cleaner thread from LRU */ + {"buffer_flush_LRU_total_pages", "buffer", + "Total pages flushed by the page cleaner thread from LRU", + MONITOR_SET_OWNER, MONITOR_FLUSH_LRU_COUNT, + MONITOR_FLUSH_LRU_TOTAL_PAGE}, + + {"buffer_flush_LRU", "buffer", + "Number of page cleaner thread batches from LRU", + MONITOR_SET_MEMBER, MONITOR_FLUSH_LRU_TOTAL_PAGE, + MONITOR_FLUSH_LRU_COUNT}, + + {"buffer_flush_LRU_pages", "buffer", + "Pages queued by the page cleaner thread batch from LRU", + MONITOR_SET_MEMBER, MONITOR_FLUSH_LRU_TOTAL_PAGE, + MONITOR_FLUSH_LRU_PAGES}, + /* Cumulative counter for LRU batch scan */ {"buffer_LRU_batch_scanned", "buffer", "Total pages scanned as part of LRU batch", @@ -472,6 +504,22 @@ static monitor_info_t innodb_counter_info[] = MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_TOTAL_PAGE, MONITOR_LRU_BATCH_PAGES}, + /* Cumulative counter for pages flushed by the LRU thread */ + {"buffer_LRU_thread_total_pages", "buffer", + "Total pages flushed by the LRU thread", + MONITOR_SET_OWNER, MONITOR_LRU_THREAD_COUNT, + MONITOR_LRU_THREAD_TOTAL_PAGE}, + + {"buffer_LRU_thread", "buffer", + "Number of LRU thread batches", + MONITOR_SET_MEMBER, MONITOR_LRU_THREAD_TOTAL_PAGE, + MONITOR_LRU_THREAD_COUNT}, + + {"buffer_LRU_thread_pages", "buffer", + "Pages queued by the LRU thread batch", + MONITOR_SET_MEMBER, MONITOR_LRU_THREAD_TOTAL_PAGE, + MONITOR_LRU_THREAD_PAGES}, + /* Cumulative counter for single page LRU scans */ {"buffer_LRU_single_flush_scanned", "buffer", "Total pages scanned as part of single page LRU flush", diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index ca9d414..0208239 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1832,6 +1832,7 @@ innobase_start_or_create_for_mysql(void) + 1 /* fts_optimize_thread */ + 1 /* recv_writer_thread */ + 1 /* buf_flush_page_cleaner_thread */ + + 1 /* buf_flush_lru_thread */ + 1 /* trx_rollback_or_clean_all_recovered */ + 128 /* added as margin, for use of InnoDB Memcached etc. */ @@ -2733,6 +2734,7 @@ files_checked: if (!srv_read_only_mode) { os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL); + os_thread_create(buf_flush_lru_thread, NULL, NULL); } #ifdef UNIV_DEBUG