diff --git a/mysql-8.0.19/storage/innobase/buf/buf0buf.cc b/mysql-8.0.19/storage/innobase/buf/buf0buf.cc index 28c6c2cd6..768acdec3 100644 --- a/mysql-8.0.19/storage/innobase/buf/buf0buf.cc +++ b/mysql-8.0.19/storage/innobase/buf/buf0buf.cc @@ -5145,7 +5145,6 @@ bool buf_page_io_complete(buf_page_t *bpage, bool evict, bool sync) { enum buf_io_fix io_type; buf_pool_t *buf_pool = buf_pool_from_bpage(bpage); const ibool uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - bool have_LRU_mutex = false; ut_a(buf_page_in_file(bpage)); @@ -5157,7 +5156,7 @@ bool buf_page_io_complete(buf_page_t *bpage, bool evict, bool sync) { io_type = buf_page_get_io_fix_unlocked(bpage); ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE); - + const auto flush_type = buf_page_get_flush_type(bpage); if (io_type == BUF_IO_READ) { page_no_t read_page_no; space_id_t read_space_id; @@ -5312,25 +5311,63 @@ bool buf_page_io_complete(buf_page_t *bpage, bool evict, bool sync) { } } - mutex_enter(&buf_pool->LRU_list_mutex); + bool has_LRU_mutex = false; + + auto block_mutex = buf_page_get_mutex(bpage); + + bool lock_free_flush = false; - BPageMutex *page_mutex = buf_page_get_mutex(bpage); - mutex_enter(page_mutex); + /* If the page is already being modified or is being modified, + * then should_maintain_lru=false and should not maintain lru or flush list **/ + bool should_maintain_lru = true; - if (io_type == BUF_IO_WRITE && - ( + if (!(!srv_use_doublewrite_buf || buf_dblwr == NULL || srv_read_only_mode || + fsp_is_system_temporary(bpage->id.space())) && + flush_type == BUF_FLUSH_LRU && uncompressed) { + lock_free_flush = true; + ut_a(bpage->reserved_modification <= bpage->newest_modification); + if (bpage->reserved_modification != bpage->newest_modification) { + should_maintain_lru = false; + }else if (rw_lock_sx_lock_nowait(&((buf_block_t *)bpage)->lock, BUF_IO_WRITE)) { + if (bpage->reserved_modification != bpage->newest_modification) { + should_maintain_lru = false; + rw_lock_sx_unlock_gen(&((buf_block_t *)bpage)->lock, BUF_IO_WRITE); + } + } else { + should_maintain_lru = false; + } + } + + if (io_type == BUF_IO_WRITE && should_maintain_lru) { + /* We decide whether or not to evict the page from the + LRU list based on the flush_type. + - BUF_FLUSH_LIST: don't evict + - BUF_FLUSH_LRU: always evict + - BUF_FLUSH_SINGLE_PAGE: eviction preference is passed + by the caller explicitly. */ + ut_a(!(flush_type == BUF_FLUSH_LIST && evict)); + if (flush_type == BUF_FLUSH_LRU) { + evict = true; + } + if (evict #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - /* to keep consistency at buf_LRU_insert_zip_clean() */ - buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY || + /* The LRU mutex is required on debug in this path: + buf_flush_write_complete (called later in this method) -> + buf_flush_remove -> buf_LRU_insert_zip_clean(). + It is safe to query the page state without mutex protection, as + transition to BUF_BLOCK_ZIP_DIRTY is possible only when the page + descriptor is initialized. Assuming this thread has the IO + responsibility (which is assured earlier in this method), the + transitions from the BUF_BLOCK_ZIP_DIRTY are only allowed from this + thread and no one else can modify the state. */ + || buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU || - buf_page_get_flush_type(bpage) == BUF_FLUSH_SINGLE_PAGE || - (opt_read_only_instance && evict))) { - - have_LRU_mutex = true; /* optimistic */ - } else { - mutex_exit(&buf_pool->LRU_list_mutex); + ) { + has_LRU_mutex = true; + mutex_enter(&buf_pool->LRU_list_mutex); + } } + mutex_enter(block_mutex); #ifdef UNIV_IBUF_COUNT_DEBUG if (io_type == BUF_IO_WRITE || uncompressed) { @@ -5351,7 +5388,7 @@ bool buf_page_io_complete(buf_page_t *bpage, bool evict, bool sync) { switch (io_type) { case BUF_IO_READ: - ut_ad(!have_LRU_mutex); + ut_ad(!has_LRU_mutex); buf_page_set_io_fix(bpage, BUF_IO_NONE); @@ -5372,34 +5409,31 @@ bool buf_page_io_complete(buf_page_t *bpage, bool evict, bool sync) { break; case BUF_IO_WRITE: - /* Write means a flush operation: call the completion - routine in the flush system */ - buf_flush_write_complete(bpage); + if (lock_free_flush && should_maintain_lru == false) { + os_atomic_increment_ulint(&buf_pool->stat.n_pages_written, 1); + mutex_exit(buf_page_get_mutex(bpage)); + } else { + /* Write means a flush operation: call the completion + routine in the flush system */ + buf_flush_write_complete(bpage); - if (uncompressed) { - rw_lock_sx_unlock_gen(&((buf_block_t *)bpage)->lock, BUF_IO_WRITE); - } + if (uncompressed) { + rw_lock_sx_unlock_gen(&((buf_block_t *)bpage)->lock, BUF_IO_WRITE); + } - os_atomic_increment_ulint(&buf_pool->stat.n_pages_written, 1); - - /* We decide whether or not to evict the page from the - LRU list based on the flush_type. - * BUF_FLUSH_LIST: don't evict - * BUF_FLUSH_LRU: always evict - * BUF_FLUSH_SINGLE_PAGE: eviction preference is passed - by the caller explicitly. */ - if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) { - evict = true; - ut_ad(have_LRU_mutex); - } - if (evict && buf_LRU_free_page(bpage, true)) { - have_LRU_mutex = false; - } else { - mutex_exit(buf_page_get_mutex(bpage)); - } - if (have_LRU_mutex) { - mutex_exit(&buf_pool->LRU_list_mutex); + os_atomic_increment_ulint(&buf_pool->stat.n_pages_written, 1); + + ut_a(!(evict && !has_LRU_mutex)); + + if (evict && buf_LRU_free_page(bpage, true)) { + has_LRU_mutex = false; + } else { + mutex_exit(buf_page_get_mutex(bpage)); + } + if (has_LRU_mutex) { + mutex_exit(&buf_pool->LRU_list_mutex); + } } break; diff --git a/mysql-8.0.19/storage/innobase/buf/buf0dblwr.cc b/mysql-8.0.19/storage/innobase/buf/buf0dblwr.cc index d7bb8d4d6..747d7ec44 100644 --- a/mysql-8.0.19/storage/innobase/buf/buf0dblwr.cc +++ b/mysql-8.0.19/storage/innobase/buf/buf0dblwr.cc @@ -835,9 +835,8 @@ static void buf_dblwr_check_block( to the datafile. It is the job of the caller to sync the datafile. */ static void buf_dblwr_write_block_to_datafile( const buf_page_t *bpage, /*!< in: page to write */ - bool sync) /*!< in: true if sync IO - is requested */ -{ + bool sync, /*!< in: true if sync IO is requested */ + page_t *frame) { ut_a(buf_page_in_file(bpage)); ulint type = IORequest::WRITE; @@ -863,15 +862,21 @@ static void buf_dblwr_write_block_to_datafile( /* Our IO API is common for both reads and writes and is therefore geared towards a non-const parameter. */ + buf_block_t *block = NULL; - buf_block_t *block = - reinterpret_cast(const_cast(bpage)); - - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - buf_dblwr_check_page_lsn(block->frame); + if (frame == NULL) { + block = reinterpret_cast(const_cast(bpage)); + ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + + frame = block->frame; + } + + ut_a(frame); + ut_a(block || sync == false); + buf_dblwr_check_page_lsn(frame); err = fil_io(request, sync, bpage->id, bpage->size, 0, - bpage->size.physical(), block->frame, block); + bpage->size.physical(), frame, block); ut_a(err == DB_SUCCESS); } @@ -945,7 +950,7 @@ try_again: for (ulint len2 = 0, i = 0; i < buf_dblwr->first_free; len2 += UNIV_PAGE_SIZE, i++) { - const buf_block_t *block; + buf_block_t *block; block = (buf_block_t *)buf_dblwr->buf_block_arr[i]; @@ -963,6 +968,14 @@ try_again: /* Check that the page as written to the doublewrite buffer has sane LSN values. */ buf_dblwr_check_page_lsn(write_buf + len2); + + if (block->page.flush_type == BUF_FLUSH_LRU && + block->page.zip.data == NULL) { + ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + block->page.reserved_modification = block->page.newest_modification; + rw_lock_sx_unlock_gen(reinterpret_cast(&(block->lock)), + BUF_IO_WRITE); + } } /* Write out the first block of the doublewrite buffer */ @@ -1017,8 +1030,18 @@ flush: loop termination condition then we'll end up dispatching the same block twice from two different threads. */ ut_ad(first_free == buf_dblwr->first_free); - for (ulint i = 0; i < first_free; i++) { - buf_dblwr_write_block_to_datafile(buf_dblwr->buf_block_arr[i], false); + for (ulint len3 = 0, i = 0; i < first_free; len3 += UNIV_PAGE_SIZE, i++) { + buf_block_t *block; + + block = (buf_block_t *)buf_dblwr->buf_block_arr[i]; + if (buf_dblwr->buf_block_arr[i]->flush_type == BUF_FLUSH_LRU && + block->page.zip.data == NULL) { + buf_dblwr_write_block_to_datafile(buf_dblwr->buf_block_arr[i], false, + write_buf + len3); + } else { + buf_dblwr_write_block_to_datafile(buf_dblwr->buf_block_arr[i], false, + NULL); + } } /* Wake possible simulated aio thread to actually post the @@ -1219,7 +1242,7 @@ retry: /* We know that the write has been flushed to disk now and during recovery we will find it in the doublewrite buffer blocks. Next do the write to the intended position. */ - buf_dblwr_write_block_to_datafile(bpage, sync); + buf_dblwr_write_block_to_datafile(bpage, sync, NULL); } /** Constructor diff --git a/mysql-8.0.19/storage/innobase/include/buf0buf.h b/mysql-8.0.19/storage/innobase/include/buf0buf.h index 65d24e147..2f074454e 100644 --- a/mysql-8.0.19/storage/innobase/include/buf0buf.h +++ b/mysql-8.0.19/storage/innobase/include/buf0buf.h @@ -1283,6 +1283,10 @@ class buf_page_t { block mutex. */ /* @{ */ + lsn_t reserved_modification; + /* Save the newest_modification before release sx-latch + to determine whether the page is modified during disk flushing **/ + UT_LIST_NODE_T(buf_page_t) LRU; /*!< node of the LRU list */ #ifdef UNIV_DEBUG