diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index bb000b1..00ef13d 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -528,6 +528,157 @@ static MY_ATTRIBUTE((warn_unused_result)) bool buf_flush_or_remove_page( return (processed); } +/******************************************************************//** +Remove all dirty pages belonging to a given tablespace inside a specific +buffer pool instance when we are deleting the data file(s) of that +tablespace. The pages still remain a part of LRU and are evicted from +the list as they age towards the tail of the LRU. +@retval DB_SUCCESS if all freed +@retval DB_FAIL if not all freed +@retval DB_INTERRUPTED if the transaction was interrupted */ +static MY_ATTRIBUTE((warn_unused_result)) +dberr_t +buf_flush_or_remove_pages_optimistic( +/*======================*/ + buf_pool_t* buf_pool, /*!< buffer pool instance */ + ulint id, /*!< in: target space id for which + to remove or flush pages */ + FlushObserver* observer, /*!< in: flush observer */ + bool flush, /*!< in: flush to disk if true but + don't remove else remove without + flushing to disk */ + const trx_t* trx) /*!< to check if the operation must + be interrupted, can be 0 */ +{ + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); + buf_page_t* prev; + buf_page_t* bpage; + ulint processed = 0; + bool dummy_must_restart = false; + + buf_flush_list_mutex_enter(buf_pool); + + for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list); bpage != nullptr; + bpage = prev) { + ut_a(buf_page_in_file(bpage)); + + ut_ad(bpage->in_flush_list); + + /* Save the previous link because once we free the + page we can't rely on the links. */ + + prev = UT_LIST_GET_PREV(list, bpage); + + /* If flush observer is NULL, flush page for space id, + or flush page for flush observer. */ + if ((observer != nullptr && observer != bpage->get_flush_observer()) || + (observer == nullptr && id != bpage->id.space())) { + /* Skip this block, as it does not belong to + the target space. */ + + } else { + /* We pin the prev block so that it won't get removed + from the flush_list after buf_pool->mutex + is released in buf_flush_or_remove_page. + Therefore, we can trust the prev pointer when + the flush is successful and continue the scan from prev. + However, this is a "best-effort" strategy to + flush the related dirty pages and we might miss + some dirty pages. We will later rely on + buf_flush_or_remove_pages to ensure that all + related dirty pages are flushed. */ + if (prev) { + /* It's possible that the prev block is undergoing i/o + operations or pinned. We skip these blocks. It's also possible + that we skip some of the dirty pages we are trying to flush. + However, these blocks will eventually be flushed by + buf_flush_or_remove_pages. + We also assume that the such pinnable block is not that far away from + current page since there is limited number of i/o operations + allowed in system.*/ + while (prev != nullptr) { + ut_ad(prev->in_flush_list); + // Do a fast dirty check on block io_fix. + if (buf_page_get_io_fix_unlocked(prev) == BUF_IO_NONE) { + BPageMutex* prev_block_mutex = buf_page_get_mutex(prev); + + mutex_enter(prev_block_mutex); + // Recheck the io_fix after taking the block mutex. + if (buf_page_get_io_fix(prev) == BUF_IO_NONE) { + /* "Fix" the block so that the position cannot be + changed after we release the lru list mutex and + block mutexes. */ + buf_page_set_sticky(prev); + mutex_exit(prev_block_mutex); + break; + } + mutex_exit(prev_block_mutex); + } + prev = UT_LIST_GET_PREV(list, prev); + } + } + + /* We don't care if the flush succeeded or not since this + is an best-effort run. Silence compiler warnings. */ + if(buf_flush_or_remove_page(buf_pool, bpage, flush, &dummy_must_restart)) { + + } else { + + } + + if (prev) { + BPageMutex* prev_block_mutex = buf_page_get_mutex(prev); + + mutex_enter(prev_block_mutex); + + buf_page_unset_sticky(prev); + + mutex_exit(prev_block_mutex); + } + } + + ++processed; + + /* Yield if we have hogged the CPU and mutexes for too long. */ + if (buf_flush_try_yield(buf_pool, prev, processed, &dummy_must_restart)) { + + /* Reset the batch size counter if we had to yield. */ + + processed = 0; + } + +#ifdef DBUG_OFF + if (flush) { + DBUG_EXECUTE_IF("ib_export_flush_crash", + static ulint n_pages; + if (++n_pages == 4) {DBUG_SUICIDE();}); + } +#endif /* DBUG_OFF */ + + /* The check for trx is interrupted is expensive, we want + to check every N iterations. */ + if (!processed && trx && trx_is_interrupted(trx)) { + if (trx->flush_observer != NULL) { + if (flush) { + trx->flush_observer->interrupted(); + } else { + /* We should remove all pages with the + the flush observer. */ + continue; + } + } + + buf_flush_list_mutex_exit(buf_pool); + return DB_INTERRUPTED; + } + } + + buf_flush_list_mutex_exit(buf_pool); + + /* return DB_FAIL to initiate a round of pessimistic flush. */ + return DB_FAIL; +} + /** Remove all dirty pages belonging to a given tablespace inside a specific buffer pool instance when we are deleting the data file(s) of that tablespace. The pages still remain a part of LRU and are evicted from @@ -658,6 +809,22 @@ static void buf_flush_dirty_pages(buf_pool_t *buf_pool, space_id_t id, const trx_t *trx, bool strict) { dberr_t err; + /* Let's go through an optimistic round of flushing pages in the flush_list + in the hope that most of the target dirty pages will be flushed. + This significantly reduces the time spent in the following loop in most cases. */ + mutex_enter(&buf_pool->LRU_list_mutex); + err = buf_flush_or_remove_pages_optimistic(buf_pool, id, observer, flush, trx); + mutex_exit(&buf_pool->LRU_list_mutex); + + ut_ad(buf_flush_validate(buf_pool)); + + if (err == DB_INTERRUPTED && observer != NULL) { + ut_a(flush); + + flush = false; + err = DB_FAIL; + } + do { /* TODO: it should be possible to avoid locking the LRU list mutex here. */ diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index f4003dc..6b8c2ec 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -8567,9 +8567,8 @@ class alter_part_normal : public alter_part { dd::Partition *new_part) override { ut_ad(m_old != nullptr); - btr_drop_ahi_for_table(*m_old); - mutex_enter(&dict_sys->mutex); + btr_drop_ahi_for_table(*m_old); dd_table_close(*m_old, nullptr, nullptr, true); dict_table_remove_from_cache(*m_old); *m_old = nullptr;