------------------------------------------------------------
revno: 8781
fixes bugs: https://launchpad.net/bugs/1411692 https://launchpad.net/bugs/1411694
committer: Laurynas Biveinis <laurynas.biveinis@percona.com>
branch nick: mysql-5.7-percona-patches
timestamp: Fri 2015-01-16 21:30:41 +0200
message:
  Fix http://bugs.mysql.com/bug.php?id=75534 (Solve buffer pool mutex
  contention by splitting it). The patch:
  - Removes the buffer pool mutex. Introduces several new list/hash
    protecting mutexes, and access without any mutex to several
    variables. There atomic variables or os_rmb/os_wmb is used where
    deemed appropriate. volatile is not used.
    The new mutexes are
    - LRU_list_mutex for the LRU_list;
    - zip_free mutex for the zip_free arrays;
    - zip_hash mutex for the zip_hash hash and in_zip_hash flag;
    - free_list_mutex for the free_list and withdraw list. If desired,
      withdraw_list_mutex may be easily further split in the future.
    buf_pool->watch[] and all bpage protection has been moved to
    page_hash.
    The variables switched from buffer pool mutex protection to atomic
    operations and/or os_rmb/os_wmb. Particularly the uses of latter, while
    I tried to make them correct, might be very debatable.
    - srv_buf_pool_old_size, srv_buf_pool_size, srv_buf_pool_curr_size,
      srv_buf_pool_base_size
    - buf_pool->buddy_stat[i].used
    - buf_pool->curr_size, n_chunks_new
  - Reduces critical section length or removes it completely for
    buf_block_buf_fix_inc/dec calls.
  - Exploits the fact that freed pages must have no pointers to them
    from the buffer pool nor from any other thread except for the
    freeing one to remove redundant locking. The same applies to freshly
    allocated pages before any pointers to them are published. This
    however necessitates removing some of the debug checks that scan
    buffer pool chunks directly, as they don't have a way to freeze such
    blocks. (buf_block_align)
  - Related to above, add more consistency asserts to
    buf_page_set_state. Add some scalability asserts (!mutex_own) too.
  - buf_buddy_alloc rewritten not to require the buffer pool mutex at
    the start, which then might be released, and this fact propagated to
    the caller to make decisions to re-check things. It is now called
    with mutexes unlocked, and the caller buf_page_init_for_read
    algorithm has been simplified. All its allocations now happen with
    mutexes unlocked.
  - buf_flush_LRU_list_batch uses mutex_enter_nowait to skip over any
    currently-locked blocks.
  - Removed some outdated buf0buf.cc comments.
  
  Bugs fixed fully or partially, besides the current one:
  - http://bugs.mysql.com/bug.php?id=64344 fixed buf_page_init_for_read
    holding mutexes while allocating memory. It also should be easier to
    fix buf_LRU_free_page now.
  - http://bugs.mysql.com/bug.php?id=75503
  - http://bugs.mysql.com/bug.php?id=75504
diff:
=== modified file 'storage/innobase/btr/btr0bulk.cc'
--- storage/innobase/btr/btr0bulk.cc	2014-08-19 05:43:25 +0000
+++ storage/innobase/btr/btr0bulk.cc	2015-01-16 19:30:41 +0000
@@ -568,9 +568,7 @@
 #endif /* UNIV_DEBUG */
 
 	/* We fix the block because we will re-pin it soon. */
-	buf_page_mutex_enter(m_block);
 	buf_block_buf_fix_inc(m_block, __FILE__, __LINE__);
-	buf_page_mutex_exit(m_block);
 
 	mtr_commit(m_mtr);
 }
@@ -598,9 +596,7 @@
 		ut_ad(m_block != NULL);
 	}
 
-	buf_page_mutex_enter(m_block);
 	buf_block_buf_fix_dec(m_block);
-	buf_page_mutex_exit(m_block);
 
 #ifdef UNIV_DEBUG
 	page_header_set_ptr(m_page, NULL, PAGE_HEAP_TOP,

=== modified file 'storage/innobase/btr/btr0cur.cc'
--- storage/innobase/btr/btr0cur.cc	2014-08-26 11:08:37 +0000
+++ storage/innobase/btr/btr0cur.cc	2015-01-16 19:30:41 +0000
@@ -467,9 +467,7 @@
 			if (btr_page_get_prev(buf_block_get_frame(block), mtr)
 			    == left_page_no) {
 				/* adjust buf_fix_count */
-				buf_page_mutex_enter(block);
 				buf_block_buf_fix_dec(block);
-				buf_page_mutex_exit(block);
 
 				*latch_mode = mode;
 				return(true);
@@ -486,9 +484,7 @@
 		}
 unpin_failed:
 		/* unpin the block */
-		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
-		buf_page_mutex_exit(block);
 
 		return(false);
 
@@ -6260,33 +6256,40 @@
 	mtr_t*		mtr)	/*!< in: mini-transaction to commit */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
-	ulint		space = block->page.id.space();
-	ulint		page_no	= block->page.id.page_no();
+	page_id_t	page_id(block->page.id.space(),
+				block->page.id.page_no());
+	bool	freed	= false;
 
 	ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
 
 	mtr_commit(mtr);
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
+	buf_page_mutex_enter(block);
 
 	/* Only free the block if it is still allocated to
 	the same file page. */
 
-	if (buf_block_get_state(block)
-	    == BUF_BLOCK_FILE_PAGE
-	    && block->page.id.space() == space
-	    && block->page.id.page_no() == page_no) {
-
-		if (!buf_LRU_free_page(&block->page, all)
-		    && all && block->page.zip.data) {
+	if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
+	    && page_id.equals_to(block->page.id)) {
+
+		freed = buf_LRU_free_page(&block->page, all);
+
+		if (!freed && all && block->page.zip.data
+		    && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
+		    && page_id.equals_to(block->page.id)) {
+
 			/* Attempt to deallocate the uncompressed page
 			if the whole block cannot be deallocted. */
 
-			buf_LRU_free_page(&block->page, false);
+			freed = buf_LRU_free_page(&block->page, false);
 		}
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	if (!freed) {
+		mutex_exit(&buf_pool->LRU_list_mutex);
+		buf_page_mutex_exit(block);
+	}
 }
 
 /** Helper class used while writing blob pages, during insert or update. */

=== modified file 'storage/innobase/btr/btr0sea.cc'
--- storage/innobase/btr/btr0sea.cc	2014-08-26 11:08:37 +0000
+++ storage/innobase/btr/btr0sea.cc	2015-01-16 19:30:41 +0000
@@ -294,12 +294,9 @@
 btr_search_enable(void)
 /*====================*/
 {
-	buf_pool_mutex_enter_all();
-	if (srv_buf_pool_old_size != srv_buf_pool_size) {
-		buf_pool_mutex_exit_all();
+	os_rmb;
+	if (srv_buf_pool_old_size != srv_buf_pool_size)
 		return;
-	}
-	buf_pool_mutex_exit_all();
 
 	rw_lock_x_lock(&btr_search_latch);
 
@@ -1036,11 +1033,6 @@
 #ifdef UNIV_SEARCH_PERF_STAT
 	btr_search_n_succ++;
 #endif
-	if (!has_search_latch && buf_page_peek_if_too_old(&block->page)) {
-
-		buf_page_make_young(&block->page);
-	}
-
 	/* Increment the page get statistics though we did not really
 	fix the page: for user info only */
 
@@ -1883,7 +1875,6 @@
 	rec_offs_init(offsets_);
 
 	rw_lock_x_lock(&btr_search_latch);
-	buf_pool_mutex_enter_all();
 
 	cell_count = hash_get_n_cells(btr_search_sys->hash_index);
 
@@ -1891,11 +1882,9 @@
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if ((i != 0) && ((i % chunk_size) == 0)) {
-			buf_pool_mutex_exit_all();
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
-			buf_pool_mutex_enter_all();
 
 			if (cell_count != hash_get_n_cells(
 				btr_search_sys->hash_index)) {
@@ -1913,13 +1902,16 @@
 			hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
 
 		for (; node != NULL; node = node->next) {
-			const buf_block_t*	block
+			buf_block_t*	block
 				= buf_block_align((byte*) node->data);
 			const buf_block_t*	hash_block;
 			buf_pool_t*		buf_pool;
 			index_id_t		page_index_id;
 
 			buf_pool = buf_pool_from_bpage((buf_page_t*) block);
+			/* Prevent BUF_BLOCK_FILE_PAGE -> BUF_BLOCK_REMOVE_HASH
+			transition until we lock the block mutex */
+			mutex_enter(&buf_pool->LRU_list_mutex);
 
 			if (UNIV_LIKELY(buf_block_get_state(block)
 					== BUF_BLOCK_FILE_PAGE)) {
@@ -1953,6 +1945,9 @@
 				     == BUF_BLOCK_REMOVE_HASH);
 			}
 
+			mutex_enter(&block->mutex);
+			mutex_exit(&buf_pool->LRU_list_mutex);
+
 			ut_a(!dict_index_is_ibuf(block->index));
 			ut_ad(block->page.id.space() == block->index->space);
 
@@ -2001,6 +1996,8 @@
 					n_page_dumps++;
 				}
 			}
+
+			mutex_exit(&block->mutex);
 		}
 	}
 
@@ -2008,11 +2005,9 @@
 		/* We release btr_search_latch every once in a while to
 		give other queries a chance to run. */
 		if (i != 0) {
-			buf_pool_mutex_exit_all();
 			rw_lock_x_unlock(&btr_search_latch);
 			os_thread_yield();
 			rw_lock_x_lock(&btr_search_latch);
-			buf_pool_mutex_enter_all();
 
 			if (cell_count != hash_get_n_cells(
 				btr_search_sys->hash_index)) {
@@ -2033,7 +2028,6 @@
 		}
 	}
 
-	buf_pool_mutex_exit_all();
 	rw_lock_x_unlock(&btr_search_latch);
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);

=== modified file 'storage/innobase/buf/buf0buddy.cc'
--- storage/innobase/buf/buf0buddy.cc	2014-07-10 10:46:02 +0000
+++ storage/innobase/buf/buf0buddy.cc	2015-01-16 19:30:41 +0000
@@ -196,6 +196,7 @@
 	ulint			i)
 {
 	CheckZipFree	check(i);
+	ut_ad(mutex_own(&buf_pool->zip_free_mutex));
 	ut_list_validate(buf_pool->zip_free[i], check);
 }
 
@@ -213,7 +214,7 @@
 {
 	const ulint	size	= BUF_BUDDY_LOW << i;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->zip_free_mutex));
 	ut_ad(!ut_align_offset(buf, size));
 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 
@@ -286,7 +287,7 @@
 	ulint			i)		/*!< in: index of
 						buf_pool->zip_free[] */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->zip_free_mutex));
 	ut_ad(buf_pool->zip_free[i].start != buf);
 
 	buf_buddy_stamp_free(buf, i);
@@ -306,7 +307,7 @@
 	ulint			i)		/*!< in: index of
 						buf_pool->zip_free[] */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->zip_free_mutex));
 	ut_ad(buf_buddy_check_free(buf_pool, buf, i));
 
 	UT_LIST_REMOVE(buf_pool->zip_free[i], buf);
@@ -325,17 +326,15 @@
 {
 	buf_buddy_free_t*	buf;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_a(i < BUF_BUDDY_SIZES);
 	ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 
+	mutex_enter(&buf_pool->zip_free_mutex);
 	ut_d(buf_buddy_list_validate(buf_pool, i));
 
 	buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
 
-	if (buf_pool->curr_size < buf_pool->old_size
-	    && UT_LIST_GET_LEN(buf_pool->withdraw)
-		< buf_pool->withdraw_target) {
+	if (buf_get_withdraw_depth(buf_pool)) {
 
 		while (buf != NULL
 		       && buf_frame_will_withdrawn(
@@ -347,7 +346,10 @@
 
 	if (buf) {
 		buf_buddy_remove_from_free(buf_pool, buf, i);
+		mutex_exit(&buf_pool->zip_free_mutex);
+
 	} else if (i + 1 < BUF_BUDDY_SIZES) {
+		mutex_exit(&buf_pool->zip_free_mutex);
 		/* Attempt to split. */
 		buf = buf_buddy_alloc_zip(buf_pool, i + 1);
 
@@ -357,9 +359,13 @@
 					buf->stamp.bytes
 					+ (BUF_BUDDY_LOW << i));
 
+			mutex_enter(&buf_pool->zip_free_mutex);
 			ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
 			buf_buddy_add_to_free(buf_pool, buddy, i);
+			mutex_exit(&buf_pool->zip_free_mutex);
 		}
+	} else {
+		mutex_exit(&buf_pool->zip_free_mutex);
 	}
 
 	if (buf) {
@@ -388,12 +394,12 @@
 {
 	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
 	buf_page_t*	bpage;
-	buf_block_t*	block;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
 	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
 
+	mutex_enter(&buf_pool->zip_hash_mutex);
+
 	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
 		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
 			  && bpage->in_zip_hash && !bpage->in_page_hash),
@@ -405,16 +411,15 @@
 	ut_d(bpage->in_zip_hash = FALSE);
 	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
 
+	ut_ad(buf_pool->buddy_n_frames > 0);
+	ut_d(buf_pool->buddy_n_frames--);
+
+	mutex_exit(&buf_pool->zip_hash_mutex);
+
 	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
 	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
 
-	block = (buf_block_t*) bpage;
-	buf_page_mutex_enter(block);
-	buf_LRU_block_free_non_file_page(block);
-	buf_page_mutex_exit(block);
-
-	ut_ad(buf_pool->buddy_n_frames > 0);
-	ut_d(buf_pool->buddy_n_frames--);
+	buf_LRU_block_free_non_file_page(reinterpret_cast<buf_block_t *>(bpage));
 }
 
 /**********************************************************************//**
@@ -427,7 +432,6 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
 
@@ -439,9 +443,12 @@
 	ut_ad(!block->page.in_page_hash);
 	ut_ad(!block->page.in_zip_hash);
 	ut_d(block->page.in_zip_hash = TRUE);
+
+	mutex_enter(&buf_pool->zip_hash_mutex);
 	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
 
 	ut_d(buf_pool->buddy_n_frames++);
+	mutex_exit(&buf_pool->zip_hash_mutex);
 }
 
 /**********************************************************************//**
@@ -459,6 +466,7 @@
 					of buf_pool->zip_free[] */
 {
 	ulint	offs	= BUF_BUDDY_LOW << j;
+	ut_ad(mutex_own(&buf_pool->zip_free_mutex));
 	ut_ad(j <= BUF_BUDDY_SIZES);
 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 	ut_ad(j >= i);
@@ -481,27 +489,18 @@
 }
 
 /**********************************************************************//**
-Allocate a block.  The thread calling this function must hold
-buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
-The buf_pool_mutex may be released and reacquired.
+Allocate a block.
 @return allocated block, never NULL */
-
 void*
 buf_buddy_alloc_low(
 /*================*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	ulint		i,		/*!< in: index of buf_pool->zip_free[],
+	ulint		i)		/*!< in: index of buf_pool->zip_free[],
 					or BUF_BUDDY_SIZES */
-	ibool*		lru)		/*!< in: pointer to a variable that
-					will be assigned TRUE if storage was
-					allocated from the LRU list and
-					buf_pool->mutex was temporarily
-					released */
 {
 	buf_block_t*	block;
 
-	ut_ad(lru);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 
@@ -523,24 +522,24 @@
 	}
 
 	/* Try replacing an uncompressed page in the buffer pool. */
-	buf_pool_mutex_exit(buf_pool);
 	block = buf_LRU_get_free_block(buf_pool);
-	*lru = TRUE;
-	buf_pool_mutex_enter(buf_pool);
 
 alloc_big:
 	buf_buddy_block_register(block);
 
+	mutex_enter(&buf_pool->zip_free_mutex);
 	block = (buf_block_t*) buf_buddy_alloc_from(
 		buf_pool, block->frame, i, BUF_BUDDY_SIZES);
+	mutex_exit(&buf_pool->zip_free_mutex);
 
 func_exit:
-	buf_pool->buddy_stat[i].used++;
+	os_atomic_increment_ulint(&buf_pool->buddy_stat[i].used, 1);
 	return(block);
 }
 
 /**********************************************************************//**
-Try to relocate a block.
+Try to relocate a block. The caller must hold zip_free_mutex, and this
+function will release and lock it again.
 @return true if relocated */
 static
 bool
@@ -559,7 +558,7 @@
 	ulint		space;
 	ulint		offset;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->zip_free_mutex));
 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
 	ut_ad(!ut_align_offset(src, size));
 	ut_ad(!ut_align_offset(dst, size));
@@ -578,11 +577,15 @@
 
 	ut_ad(space != BUF_BUDDY_STAMP_FREE);
 
+	mutex_exit(&buf_pool->zip_free_mutex);
+
 	const page_id_t	page_id(space, offset);
 
 	/* If space,offset is bogus, then we know that the
 	buf_page_hash_get_low() call below will return NULL. */
 	if (!force && buf_pool != buf_pool_get(page_id)) {
+
+		mutex_enter(&buf_pool->zip_free_mutex);
 		return(false);
 	}
 
@@ -601,6 +604,7 @@
 		rw_lock_x_unlock(hash_lock);
 
 		if (!force || space != 0 || offset != 0) {
+			mutex_enter(&buf_pool->zip_free_mutex);
 			return(false);
 		}
 
@@ -619,6 +623,7 @@
 		}
 
 		if (bpage == NULL) {
+			mutex_enter(&buf_pool->zip_free_mutex);
 			return(false);
 		}
 	}
@@ -631,6 +636,7 @@
 
 		rw_lock_x_unlock(hash_lock);
 
+		mutex_enter(&buf_pool->zip_free_mutex);
 		return(false);
 	}
 
@@ -642,6 +648,8 @@
 
 	mutex_enter(block_mutex);
 
+	mutex_enter(&buf_pool->zip_free_mutex);
+
 	if (buf_page_can_relocate(bpage)) {
 		/* Relocate the compressed page. */
 		uintmax_t	usec = ut_time_us(NULL);
@@ -684,17 +692,19 @@
 {
 	buf_buddy_free_t*	buddy;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
 	ut_ad(i <= BUF_BUDDY_SIZES);
 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
+
+	mutex_enter(&buf_pool->zip_free_mutex);
+
 	ut_ad(buf_pool->buddy_stat[i].used > 0);
-
-	buf_pool->buddy_stat[i].used--;
+	os_atomic_decrement_ulint(&buf_pool->buddy_stat[i].used, 1);
 recombine:
 	UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
 
 	if (i == BUF_BUDDY_SIZES) {
+		mutex_exit(&buf_pool->zip_free_mutex);
 		buf_buddy_block_free(buf_pool, buf);
 		return;
 	}
@@ -763,13 +773,15 @@
 	buf_buddy_add_to_free(buf_pool,
 			      reinterpret_cast<buf_buddy_free_t*>(buf),
 			      i);
+	mutex_exit(&buf_pool->zip_free_mutex);
 }
 
-/** Reallocate a block.
+/** Try to reallocate a block.
 @param[in]	buf_pool	buffer pool instance
 @param[in]	buf		block to be reallocated, must be pointed
 to by the buffer pool
 @param[in]	size		block size, up to UNIV_PAGE_SIZE
+@retval true	if succeeded or if failed because the block was fixed
 @retval false	if failed because of no free blocks. */
 
 bool
@@ -781,7 +793,6 @@
 	buf_block_t*	block = NULL;
 	ulint		i = buf_buddy_get_slot(size);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
 	ut_ad(i <= BUF_BUDDY_SIZES);
 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
@@ -802,23 +813,29 @@
 
 		buf_buddy_block_register(block);
 
+		mutex_enter(&buf_pool->zip_free_mutex);
 		block = reinterpret_cast<buf_block_t*>(
 			buf_buddy_alloc_from(
 				buf_pool, block->frame, i, BUF_BUDDY_SIZES));
+	} else {
+		mutex_enter(&buf_pool->zip_free_mutex);
 	}
 
-	buf_pool->buddy_stat[i].used++;
+	os_atomic_increment_ulint(&buf_pool->buddy_stat[i].used, 1);
 
 	/* Try to relocate the buddy of buf to the free block. */
 	if (buf_buddy_relocate(buf_pool, buf, block, i, true)) {
+		mutex_exit(&buf_pool->zip_free_mutex);
 		/* succeeded */
 		buf_buddy_free_low(buf_pool, buf, i);
-	} else {
-		/* failed */
-		buf_buddy_free_low(buf_pool, block, i);
+		return(true);
 	}
 
-	return(true); /* free_list was enough */
+	/* failed */
+	mutex_exit(&buf_pool->zip_free_mutex);
+	buf_buddy_free_low(buf_pool, block, i);
+
+	return(false);
 }
 
 /** Combine all pairs of free buddies.
@@ -828,7 +845,7 @@
 buf_buddy_condense_free(
 	buf_pool_t*	buf_pool)
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	mutex_enter(&buf_pool->zip_free_mutex);
 	ut_ad(buf_pool->curr_size < buf_pool->old_size);
 
 	for (ulint i = 0; i < UT_ARR_SIZE(buf_pool->zip_free); ++i) {
@@ -873,7 +890,8 @@
 				/* Both buf and buddy are free.
 				Try to combine them. */
 				buf_buddy_remove_from_free(buf_pool, buf, i);
-				buf_pool->buddy_stat[i].used++;
+				os_atomic_increment_ulint(
+					&buf_pool->buddy_stat[i].used, 1);
 
 				buf_buddy_free_low(buf_pool, buf, i);
 			}
@@ -881,4 +899,5 @@
 			buf = next;
 		}
 	}
+	mutex_exit(&buf_pool->zip_free_mutex);
 }

=== modified file 'storage/innobase/buf/buf0buf.cc'
--- storage/innobase/buf/buf0buf.cc	2014-11-25 10:24:36 +0000
+++ storage/innobase/buf/buf0buf.cc	2015-01-16 19:30:41 +0000
@@ -78,21 +78,6 @@
 		IMPLEMENTATION OF THE BUFFER POOL
 		=================================
 
-Performance improvement:
-------------------------
-Thread scheduling in NT may be so slow that the OS wait mechanism should
-not be used even in waiting for disk reads to complete.
-Rather, we should put waiting query threads to the queue of
-waiting jobs, and let the OS thread do something useful while the i/o
-is processed. In this way we could remove most OS thread switches in
-an i/o-intensive benchmark like TPC-C.
-
-A possibility is to put a user space thread library between the database
-and NT. User space thread libraries might be very fast.
-
-SQL Server 7.0 can be configured to use 'fibers' which are lightweight
-threads in NT. These should be studied.
-
 		Buffer frames and blocks
 		------------------------
 Following the terminology of Gray and Reuter, we call the memory
@@ -103,24 +88,9 @@
 
 		Buffer pool struct
 		------------------
-The buffer buf_pool contains a single mutex which protects all the
+The buffer buf_pool contains several mutexes which protects all the
 control data structures of the buf_pool. The content of a buffer frame is
 protected by a separate read-write lock in its control block, though.
-These locks can be locked and unlocked without owning the buf_pool->mutex.
-The OS events in the buf_pool struct can be waited for without owning the
-buf_pool->mutex.
-
-The buf_pool->mutex is a hot-spot in main memory, causing a lot of
-memory bus traffic on multiprocessor systems when processors
-alternately access the mutex. On our Pentium, the mutex is accessed
-maybe every 10 microseconds. We gave up the solution to have mutexes
-for each control block, for instance, because it seemed to be
-complicated.
-
-A solution to reduce mutex contention of the buf_pool->mutex is to
-create a separate mutex for the page hash table. On Pentium,
-accessing the hash table takes 2 microseconds, about half
-of the total buf_pool->mutex hold time.
 
 		Control blocks
 		--------------
@@ -135,16 +105,6 @@
 address of a frame is divisible by the universal page size, which
 is a power of two.
 
-We intend to make the buffer buf_pool size on-line reconfigurable,
-that is, the buf_pool size can be changed without closing the database.
-Then the database administarator may adjust it to be bigger
-at night, for example. The control block array must
-contain enough control blocks for the maximum buffer buf_pool size
-which is used in the particular database.
-If the buf_pool size is cut, we exploit the virtual memory mechanism of
-the OS, and just refrain from using frames at high addresses. Then the OS
-can swap them to disk.
-
 The control blocks containing file pages are put to a hash table
 according to the file address of the page.
 We could speed up the access to an individual page by using
@@ -1224,7 +1184,8 @@
 }
 
 /********************************************************************//**
-Allocates a chunk of buffer frames.
+Allocates a chunk of buffer frames. If called for an existing buf_pool, its
+free_list_mutex must be locked.
 @return chunk, or NULL on failure */
 static
 buf_chunk_t*
@@ -1338,7 +1299,8 @@
 
 /*********************************************************************//**
 Finds a block in the buffer pool that points to a
-given compressed page.
+given compressed page. Used only to confirm that buffer pool does not contain a
+given pointer, thus protected by zip_free_mutex.
 @return buffer block pointing to the compressed page, or NULL */
 
 buf_block_t*
@@ -1351,7 +1313,7 @@
 	buf_chunk_t*	chunk = buf_pool->chunks;
 
 	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->zip_free_mutex));
 	for (n = buf_pool->n_chunks; n--; chunk++) {
 
 		buf_block_t* block = buf_chunk_contains_zip(chunk, data);
@@ -1424,8 +1386,6 @@
 	ulint	i;
 	ulint	curr_size = 0;
 
-	buf_pool_mutex_enter_all();
-
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
@@ -1436,8 +1396,7 @@
 	srv_buf_pool_curr_size = curr_size;
 	srv_buf_pool_old_size = srv_buf_pool_size;
 	srv_buf_pool_base_size = srv_buf_pool_size;
-
-	buf_pool_mutex_exit_all();
+	os_wmb;
 }
 
 /********************************************************************//**
@@ -1459,15 +1418,16 @@
 
 	/* 1. Initialize general fields
 	------------------------------- */
-	mutex_create("buf_pool", &buf_pool->mutex);
-
+	mutex_create("buf_pool_lru_list", &buf_pool->LRU_list_mutex);
+	mutex_create("buf_pool_free_list", &buf_pool->free_list_mutex);
+	mutex_create("buf_pool_zip_free", &buf_pool->zip_free_mutex);
+	mutex_create("buf_pool_zip_hash", &buf_pool->zip_hash_mutex);
+	mutex_create("buf_pool_flush_state", &buf_pool->flush_state_mutex);
 	mutex_create("buf_pool_zip", &buf_pool->zip_mutex);
 
 	new(&buf_pool->allocator)
 		ut_allocator<unsigned char>(mem_key_buf_buf_pool);
 
-	buf_pool_mutex_enter(buf_pool);
-
 	if (buf_pool_size > 0) {
 		buf_pool->n_chunks
 			= buf_pool_size / srv_buf_pool_chunk_unit;
@@ -1514,7 +1474,6 @@
 						chunk->mem, &chunk->mem_pfx);
 				}
 				ut_free(buf_pool->chunks);
-				buf_pool_mutex_exit(buf_pool);
 
 				return(DB_ERROR);
 			}
@@ -1573,15 +1532,15 @@
 		FlushHp(buf_pool, &buf_pool->flush_list_mutex);
 
 	/* Initialize the hazard pointer for LRU batches */
-	new(&buf_pool->lru_hp) LRUHp(buf_pool, &buf_pool->mutex);
+	new(&buf_pool->lru_hp) LRUHp(buf_pool, &buf_pool->LRU_list_mutex);
 
 	/* Initialize the iterator for LRU scan search */
-	new(&buf_pool->lru_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
+	new(&buf_pool->lru_scan_itr) LRUItr(buf_pool,
+					    &buf_pool->LRU_list_mutex);
 
 	/* Initialize the iterator for single page scan search */
-	new(&buf_pool->single_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
-
-	buf_pool_mutex_exit(buf_pool);
+	new(&buf_pool->single_scan_itr) LRUItr(buf_pool,
+					       &buf_pool->LRU_list_mutex);
 
 	return(DB_SUCCESS);
 }
@@ -1600,7 +1559,11 @@
 	buf_page_t*	bpage;
 	buf_page_t*	prev_bpage = 0;
 
-	mutex_free(&buf_pool->mutex);
+	mutex_free(&buf_pool->LRU_list_mutex);
+	mutex_free(&buf_pool->free_list_mutex);
+	mutex_free(&buf_pool->zip_free_mutex);
+	mutex_free(&buf_pool->zip_hash_mutex);
+	mutex_free(&buf_pool->flush_state_mutex);
 	mutex_free(&buf_pool->zip_mutex);
 	mutex_free(&buf_pool->flush_list_mutex);
 
@@ -1701,6 +1664,8 @@
 
 	btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
 
+	os_wmb;
+
 	return(DB_SUCCESS);
 }
 
@@ -1727,6 +1692,7 @@
 /** Reallocate a control block.
 @param[in]	buf_pool	buffer pool instance
 @param[in]	block		pointer to control block
+@retval true	if succeeded or if failed because the block was fixed
 @retval false	if failed because of no free blocks. */
 static
 bool
@@ -1737,8 +1703,7 @@
 	buf_block_t*	new_block;
 
 	ut_ad(buf_pool_withdrawing);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	new_block = buf_LRU_get_free_only(buf_pool);
 
@@ -1827,6 +1792,8 @@
 			buf_flush_relocate_on_flush_list(
 				&block->page, &new_block->page);
 		}
+		/* At this point no outside pointers to block should exist */
+		mutex_exit(&block->mutex);
 
 		/* set other flags of buf_block_t */
 		new_block->check_index_page_at_flush
@@ -1850,15 +1817,12 @@
 		buf_block_set_state(block, BUF_BLOCK_MEMORY);
 		buf_LRU_block_free_non_file_page(block);
 
-		mutex_exit(&block->mutex);
 	} else {
 		rw_lock_x_unlock(hash_lock);
 		mutex_exit(&block->mutex);
 
 		/* free new_block */
-		mutex_enter(&new_block->mutex);
 		buf_LRU_block_free_non_file_page(new_block);
-		mutex_exit(&new_block->mutex);
 	}
 
 	return(true); /* free_list was enough */
@@ -1887,7 +1851,8 @@
 	va_end(ap);
 }
 
-/** Determines if a block is intended to be withdrawn.
+/** Determines if a block is intended to be withdrawn. The caller must ensure
+that there was a sufficient memory barrier to read curr_size and old_size.
 @param[in]	buf_pool	buffer pool instance
 @param[in]	block		pointer to control block
 @retval true	if will be withdrawn */
@@ -1898,7 +1863,6 @@
 	const buf_block_t*	block)
 {
 	ut_ad(buf_pool->curr_size < buf_pool->old_size);
-	ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
 
 	const buf_chunk_t*	chunk
 		= buf_pool->chunks + buf_pool->n_chunks_new;
@@ -1916,7 +1880,8 @@
 	return(false);
 }
 
-/** Determines if a frame is intended to be withdrawn.
+/** Determines if a frame is intended to be withdrawn. The caller must ensure
+that there was a sufficient memory barrier to read curr_size and old_size.
 @param[in]	buf_pool	buffer pool instance
 @param[in]	ptr		pointer to a frame
 @retval true	if will be withdrawn */
@@ -1927,7 +1892,6 @@
 	const byte*	ptr)
 {
 	ut_ad(buf_pool->curr_size < buf_pool->old_size);
-	ut_ad(!buf_pool_resizing || buf_pool_mutex_own(buf_pool));
 
 	const buf_chunk_t*	chunk
 		= buf_pool->chunks + buf_pool->n_chunks_new;
@@ -1958,23 +1922,26 @@
 	buf_block_t*	block;
 	ulint		loop_count = 0;
 	ulint		i = buf_pool_index(buf_pool);
+	ulint		lru_len;
 
 	ib::info() << "buffer pool " << i
 		<< " : start to withdraw the last "
 		<< buf_pool->withdraw_target << " blocks.";
 
 	/* Minimize buf_pool->zip_free[i] lists */
-	buf_pool_mutex_enter(buf_pool);
 	buf_buddy_condense_free(buf_pool);
-	buf_pool_mutex_exit(buf_pool);
-
+
+	mutex_enter(&buf_pool->LRU_list_mutex);
+	lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
+	mutex_exit(&buf_pool->LRU_list_mutex);
+
+	mutex_enter(&buf_pool->free_list_mutex);
 	while (UT_LIST_GET_LEN(buf_pool->withdraw)
 	       < buf_pool->withdraw_target) {
 
 		/* try to withdraw from free_list */
 		ulint	count1 = 0;
 
-		buf_pool_mutex_enter(buf_pool);
 		block = reinterpret_cast<buf_block_t*>(
 			UT_LIST_GET_FIRST(buf_pool->free));
 		while (block != NULL
@@ -1999,12 +1966,12 @@
 					buf_pool->withdraw,
 					&block->page);
 				ut_d(block->in_withdraw_list = TRUE);
+				fprintf(stderr, "1 withdrawing block at %p\n", block);
 				count1++;
 			}
 
 			block = next_block;
 		}
-		buf_pool_mutex_exit(buf_pool);
 
 		/* reserve free_list length */
 		if (UT_LIST_GET_LEN(buf_pool->withdraw)
@@ -2013,15 +1980,12 @@
 			ulint	n_flushed = 0;
 
 			/* cap scan_depth with current LRU size. */
-			buf_pool_mutex_enter(buf_pool);
-			scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
-			buf_pool_mutex_exit(buf_pool);
-
 			scan_depth = ut_min(
 				ut_max(buf_pool->withdraw_target
 				       - UT_LIST_GET_LEN(buf_pool->withdraw),
 				       static_cast<ulint>(srv_LRU_scan_depth)),
-				scan_depth);
+				lru_len);
+			mutex_exit(&buf_pool->free_list_mutex);
 
 			buf_flush_do_batch(buf_pool, BUF_FLUSH_LRU,
 				scan_depth, 0, &n_flushed);
@@ -2034,12 +1998,15 @@
 					MONITOR_LRU_BATCH_FLUSH_PAGES,
 					n_flushed);
 			}
+		} else {
+
+			mutex_exit(&buf_pool->free_list_mutex);
 		}
 
 		/* relocate blocks/buddies in withdrawn area */
 		ulint	count2 = 0;
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 		buf_page_t*	bpage;
 		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
 		while (bpage != NULL) {
@@ -2058,18 +2025,14 @@
 
 				if (buf_page_can_relocate(bpage)) {
 					mutex_exit(block_mutex);
-					buf_pool_mutex_exit_forbid(buf_pool);
 					if(!buf_buddy_realloc(
 						buf_pool, bpage->zip.data,
 						page_zip_get_size(
 							&bpage->zip))) {
 
 						/* failed to allocate block */
-						buf_pool_mutex_exit_allow(
-							buf_pool);
 						break;
 					}
-					buf_pool_mutex_exit_allow(buf_pool);
 					mutex_enter(block_mutex);
 					count2++;
 				}
@@ -2085,17 +2048,13 @@
 
 				if (buf_page_can_relocate(bpage)) {
 					mutex_exit(block_mutex);
-					buf_pool_mutex_exit_forbid(buf_pool);
 					if(!buf_page_realloc(
 						buf_pool,
 						reinterpret_cast<buf_block_t*>(
 							bpage))) {
 						/* failed to allocate block */
-						buf_pool_mutex_exit_allow(
-							buf_pool);
 						break;
 					}
-					buf_pool_mutex_exit_allow(buf_pool);
 					count2++;
 				} else {
 					mutex_exit(block_mutex);
@@ -2108,7 +2067,10 @@
 
 			bpage = next_bpage;
 		}
-		buf_pool_mutex_exit(buf_pool);
+
+		mutex_exit(&buf_pool->LRU_list_mutex);
+
+		mutex_enter(&buf_pool->free_list_mutex);
 
 		buf_resize_status(
 			"buffer pool %lu : withdrawing blocks. (%lu/%lu)",
@@ -2125,6 +2087,8 @@
 			/* give up for now.
 			retried after user threads paused. */
 
+			mutex_exit(&buf_pool->free_list_mutex);
+
 			ib::info() << "buffer pool " << i
 				<< " : will retry to withdraw later.";
 
@@ -2132,6 +2096,7 @@
 			return(true);
 		}
 	}
+	mutex_exit(&buf_pool->free_list_mutex);
 
 	/* confirm withdrawn enough */
 	const buf_chunk_t*	chunk
@@ -2145,6 +2110,7 @@
 			/* If !=BUF_BLOCK_NOT_USED block in the
 			withdrawn area, it means corruption
 			something */
+			fprintf(stderr, "withdrawn block at %p\n", block);
 			ut_a(buf_block_get_state(block)
 				== BUF_BLOCK_NOT_USED);
 			ut_ad(block->in_withdraw_list);
@@ -2152,8 +2118,10 @@
 		++chunk;
 	}
 
+	mutex_enter(&buf_pool->free_list_mutex);
 	ib::info() << "buffer pool " << i << " : withdrawn target "
 		<< UT_LIST_GET_LEN(buf_pool->withdraw) << " blocks.";
+	mutex_exit(&buf_pool->free_list_mutex);
 
 	/* retry is not needed */
 	++buf_withdraw_clock;
@@ -2169,6 +2137,7 @@
 {
 	hash_table_t*	new_hash_table;
 
+	ut_ad(mutex_own(&buf_pool->zip_hash_mutex));
 	ut_ad(buf_pool->page_hash_old == NULL);
 
 	/* recreate page_hash */
@@ -2254,6 +2223,8 @@
 	ut_ad(!buf_pool_withdrawing);
 	ut_ad(srv_buf_pool_chunk_unit > 0);
 
+	/* Assumes that buf_resize_thread has already issued the necessary
+	memory barrier to read srv_buf_pool_size and srv_buf_pool_old_size */
 	new_instance_size = srv_buf_pool_size / srv_buf_pool_instances;
 	new_instance_size /= UNIV_PAGE_SIZE;
 
@@ -2269,19 +2240,25 @@
 	/* set new limit for all buffer pool for resizing */
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool = buf_pool_from_array(i);
-		buf_pool_mutex_enter(buf_pool);
 
+		// No locking needed to read, same thread updated those
 		ut_ad(buf_pool->curr_size == buf_pool->old_size);
 		ut_ad(buf_pool->n_chunks_new == buf_pool->n_chunks);
+		mutex_enter(&buf_pool->free_list_mutex);
 		ut_ad(UT_LIST_GET_LEN(buf_pool->withdraw) == 0);
+		mutex_exit(&buf_pool->free_list_mutex);
+#ifdef UNIV_DEBUG
+		buf_flush_list_mutex_enter(buf_pool);
 		ut_ad(buf_pool->flush_rbt == NULL);
+		buf_flush_list_mutex_exit(buf_pool);
+#endif
 
 		buf_pool->curr_size = new_instance_size;
 
 		buf_pool->n_chunks_new = new_instance_size * UNIV_PAGE_SIZE
 			/ srv_buf_pool_chunk_unit;
 
-		buf_pool_mutex_exit(buf_pool);
+		os_wmb;
 	}
 
 	/* disable AHI if needed */
@@ -2431,16 +2408,19 @@
 	/* Indicate critical path */
 	buf_pool_resizing = true;
 
-	/* Acquire all buf_pool_mutex/hash_lock */
-	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
-		buf_pool_t*	buf_pool = buf_pool_from_array(i);
-
-		buf_pool_mutex_enter(buf_pool);
-	}
-	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
-		buf_pool_t*	buf_pool = buf_pool_from_array(i);
-
+	/* Acquire all buffer pool mutexes and hash table locks */
+	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+		buf_pool_t*	buf_pool = buf_pool_from_array(i);
+
+		/* TODO: while we certainly lock a lot here, it does not
+		necessarily buy us enough correctness, see a comment at
+		buf_block_align. */
+		mutex_enter(&buf_pool->LRU_list_mutex);
 		hash_lock_x_all(buf_pool->page_hash);
+		mutex_enter(&buf_pool->zip_free_mutex);
+		mutex_enter(&buf_pool->free_list_mutex);
+		mutex_enter(&buf_pool->zip_hash_mutex);
+		mutex_enter(&buf_pool->flush_state_mutex);
 	}
 
 	buf_chunk_map_reg = UT_NEW_NOKEY(buf_pool_chunk_map_t());
@@ -2617,8 +2597,12 @@
 	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
 		buf_pool_t*	buf_pool = buf_pool_from_array(i);
 
+		mutex_exit(&buf_pool->flush_state_mutex);
+		mutex_exit(&buf_pool->zip_hash_mutex);
+		mutex_exit(&buf_pool->free_list_mutex);
+		mutex_exit(&buf_pool->zip_free_mutex);
 		hash_unlock_x_all(buf_pool->page_hash);
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 
 		ut_free(buf_pool->chunks_old);
 		buf_pool->chunks_old = NULL;
@@ -2663,6 +2647,7 @@
 			<< srv_buf_pool_old_size
 			<< " to " << srv_buf_pool_size << ".";
 		srv_buf_pool_old_size = srv_buf_pool_size;
+		os_wmb;
 	}
 
 	/* enable AHI if needed */
@@ -2713,13 +2698,11 @@
 			break;
 		}
 
-		buf_pool_mutex_enter_all();
+		os_rmb;
 		if (srv_buf_pool_old_size == srv_buf_pool_size) {
-			buf_pool_mutex_exit_all();
 			/* nothing to do */
 			continue;
 		}
-		buf_pool_mutex_exit_all();
 
 		buf_pool_resize();
 	}
@@ -2793,7 +2776,7 @@
 	buf_page_t*	b;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
@@ -2873,7 +2856,8 @@
 {
 	ut_ad(mutex_own(m_mutex));
 	ut_ad(!bpage || buf_pool_from_bpage(bpage) == m_buf_pool);
-	ut_ad(!bpage || buf_page_in_file(bpage));
+	ut_ad(!bpage || buf_page_in_file(bpage)
+	      || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
 
 	m_hp = bpage;
 }
@@ -2951,7 +2935,7 @@
 	const buf_pool_t*	buf_pool,
 	const buf_page_t*	bpage)
 {
-	/* We must also own the appropriate hash lock. */
+	/* We must own the appropriate hash lock. */
 	ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
 	ut_ad(buf_page_in_file(bpage));
 
@@ -2972,8 +2956,9 @@
 }
 
 /** Add watch for the given page to be read in. Caller must have
-appropriate hash_lock for the bpage. This function may release the
-hash_lock and reacquire it.
+appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race
+condition with buf_LRU_free_page inserting the same page into the page hash.
+This function may release the hash_lock and reacquire it.
 @param[in]	page_id		page id
 @param[in,out]	hash_lock	hash_lock currently latched
 @return NULL if watch set, block if the page is in the buffer pool */
@@ -3007,32 +2992,26 @@
 	}
 
 	/* From this point this function becomes fairly heavy in terms
-	of latching. We acquire the buf_pool mutex as well as all the
-	hash_locks. buf_pool mutex is needed because any changes to
-	the page_hash must be covered by it and hash_locks are needed
+	of latching. We acquire all the hash_locks. They are needed
 	because we don't want to read any stale information in
 	buf_pool->watch[]. However, it is not in the critical code path
 	as this function will be called only by the purge thread. */
 
-
 	/* To obey latching order first release the hash_lock. */
 	rw_lock_x_unlock(*hash_lock);
 
-	buf_pool_mutex_enter(buf_pool);
 	hash_lock_x_all(buf_pool->page_hash);
 
-	/* If not own buf_pool_mutex, page_hash can be changed. */
-	*hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
-
 	/* We have to recheck that the page
 	was not loaded or a watch set by some other
 	purge thread. This is because of the small
 	time window between when we release the
-	hash_lock to acquire buf_pool mutex above. */
+	hash_lock to lock all the hash_locks. */
+
+	*hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 
 	bpage = buf_page_hash_get_low(buf_pool, page_id);
 	if (UNIV_LIKELY_NULL(bpage)) {
-		buf_pool_mutex_exit(buf_pool);
 		hash_unlock_x_all_but(buf_pool->page_hash, *hash_lock);
 		goto page_found;
 	}
@@ -3051,20 +3030,15 @@
 			ut_ad(!bpage->in_page_hash);
 			ut_ad(bpage->buf_fix_count == 0);
 
-			/* bpage is pointing to buf_pool->watch[],
-			which is protected by buf_pool->mutex.
-			Normally, buf_page_t objects are protected by
-			buf_block_t::mutex or buf_pool->zip_mutex or both. */
-
 			bpage->state = BUF_BLOCK_ZIP_PAGE;
 			bpage->id.copy_from(page_id);
 			bpage->buf_fix_count = 1;
+			bpage->buf_pool_index = buf_pool_index(buf_pool);
 
 			ut_d(bpage->in_page_hash = TRUE);
 			HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
 				    page_id.fold(), bpage);
 
-			buf_pool_mutex_exit(buf_pool);
 			/* Once the sentinel is in the page_hash we can
 			safely release all locks except just the
 			relevant hash_lock */
@@ -3075,7 +3049,7 @@
 		case BUF_BLOCK_ZIP_PAGE:
 			ut_ad(bpage->in_page_hash);
 			ut_ad(bpage->buf_fix_count > 0);
-			break;
+			continue;
 		default:
 			ut_error;
 		}
@@ -3092,7 +3066,7 @@
 }
 
 /** Remove the sentinel block for the watch before replacing it with a
-real block. buf_page_watch_clear() or buf_page_watch_occurred() will notice
+real block. buf_page_watch_unset() or buf_page_watch_occurred() will notice
 that the block has been replaced with the real block.
 @param[in,out]	buf_pool	buffer pool instance
 @param[in,out]	watch		sentinel for watch
@@ -3104,12 +3078,12 @@
 	buf_page_t*	watch)
 {
 #ifdef UNIV_SYNC_DEBUG
-	/* We must also own the appropriate hash_bucket mutex. */
+	/* We must own the appropriate hash_bucket mutex. */
 	rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, watch->id);
 	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
 #endif /* UNIV_SYNC_DEBUG */
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(buf_page_get_state(watch) == BUF_BLOCK_ZIP_PAGE);
 
 	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, watch->id.fold(),
 		    watch);
@@ -3128,13 +3102,6 @@
 	buf_page_t*	bpage;
 	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	/* We only need to have buf_pool mutex in case where we end
-	up calling buf_pool_watch_remove but to obey latching order
-	we acquire it here before acquiring hash_lock. This should
-	not cause too much grief as this function is only ever
-	called from the purge thread. */
-	buf_pool_mutex_enter(buf_pool);
-
 	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
 
@@ -3147,7 +3114,6 @@
 		buf_pool_watch_remove(buf_pool, bpage);
 	}
 
-	buf_pool_mutex_exit(buf_pool);
 	rw_lock_x_unlock(hash_lock);
 }
 
@@ -3192,19 +3158,59 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	ut_a(buf_page_in_file(bpage));
 
 	buf_LRU_make_block_young(bpage);
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
+}
+
+/********************************************************************//**
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex.
+@return TRUE if should be made younger */
+static
+ibool
+buf_page_peek_if_too_old(
+/*=====================*/
+	const buf_page_t*	bpage)	/*!< in: block to make younger */
+{
+	buf_pool_t*		buf_pool = buf_pool_from_bpage(bpage);
+
+	if (buf_pool->freed_page_clock == 0) {
+		/* If eviction has not started yet, do not update the
+		statistics or move blocks in the LRU list.  This is
+		either the warm-up phase or an in-memory workload. */
+		return(FALSE);
+	} else if (buf_LRU_old_threshold_ms && bpage->old) {
+		unsigned	access_time = buf_page_is_accessed(bpage);
+
+		/* It is possible that the below comparison returns an
+		unexpected result. 2^32 milliseconds pass in about 50 days,
+		so if the difference between ut_time_ms() and access_time
+		is e.g. 50 days + 15 ms, then the below will behave as if
+		it is 15 ms. This is known and fixing it would require to
+		increase buf_page_t::access_time from 32 to 64 bits. */
+		if (access_time > 0
+		    && ((ib_uint32_t) (ut_time_ms() - access_time))
+		    >= buf_LRU_old_threshold_ms) {
+			return(TRUE);
+		}
+
+		buf_pool->stat.n_pages_not_made_young++;
+		return(FALSE);
+	} else {
+		return(!buf_page_peek_if_young(bpage));
+	}
 }
 
 /********************************************************************//**
 Moves a page to the start of the buffer pool LRU list if it is too old.
 This high-level function can be used to prevent an important page from
-slipping out of the buffer pool. */
+slipping out of the buffer pool. The page must be fixed to the buffer pool. */
 static
 void
 buf_page_make_young_if_needed(
@@ -3212,10 +3218,8 @@
 	buf_page_t*	bpage)		/*!< in/out: buffer block of a
 					file page */
 {
-#ifdef UNIV_DEBUG
-	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(!buf_pool_mutex_own(buf_pool));
-#endif /* UNIV_DEBUG */
+	ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex));
+	ut_ad(bpage->buf_fix_count > 0);
 	ut_a(buf_page_in_file(bpage));
 
 	if (buf_page_peek_if_too_old(bpage)) {
@@ -3295,21 +3299,30 @@
 	buf_page_t*	bpage;
 	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
-	/* Since we need to acquire buf_pool mutex to discard
-	the uncompressed frame and because page_hash mutex resides
-	below buf_pool mutex in sync ordering therefore we must
-	first release the page_hash mutex. This means that the
-	block in question can move out of page_hash. Therefore
-	we need to check again if the block is still in page_hash. */
-	buf_pool_mutex_enter(buf_pool);
+	/* Since we need to acquire buf_pool->LRU_list_mutex to discard
+	the uncompressed frame and because page_hash mutex resides below
+	buf_pool->LRU_list_mutex in sync ordering therefore we must first
+	release the page_hash mutex. This means that the block in question
+	can move out of page_hash. Therefore we need to check again if the
+	block is still in page_hash. */
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	bpage = buf_page_hash_get(buf_pool, page_id);
 
 	if (bpage) {
-		buf_LRU_free_page(bpage, false);
+
+		BPageMutex* block_mutex = buf_page_get_mutex(bpage);
+
+		mutex_enter(block_mutex);
+
+		if (buf_LRU_free_page(bpage, false)) {
+
+			return;
+		}
+		mutex_exit(block_mutex);
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 }
 
 /** Get read access to a compressed page (usually of type
@@ -3550,7 +3563,7 @@
 	ut_ad(srv_buf_pool_chunk_unit > 0);
 
 	/* TODO: This might be still optimistic treatment.
-	buf_pool_resize() needs all buf_pool_mutex and all
+	buf_pool_resize() needs most of buffer pool mutexes and all
 	buf_pool->page_hash x-latched until actual modification.
 	It should block the other user threads and should take while
 	which is enough to done the buf_pool_chunk_map access. */
@@ -3591,11 +3604,6 @@
 		block->frame + n * UNIV_PAGE_SIZE.  Check it. */
 		ut_ad(block->frame == page_align(ptr));
 #ifdef UNIV_DEBUG
-		/* A thread that updates these fields must
-		hold buf_pool->mutex and block->mutex.  Acquire
-		only the latter. */
-		buf_page_mutex_enter(block);
-
 		switch (buf_block_get_state(block)) {
 		case BUF_BLOCK_POOL_WATCH:
 		case BUF_BLOCK_ZIP_PAGE:
@@ -3610,37 +3618,14 @@
 		case BUF_BLOCK_NOT_USED:
 		case BUF_BLOCK_READY_FOR_USE:
 		case BUF_BLOCK_MEMORY:
+		case BUF_BLOCK_REMOVE_HASH:
+		case BUF_BLOCK_FILE_PAGE:
 			/* Some data structures contain
 			"guess" pointers to file pages.  The
 			file pages may have been freed and
 			reused.  Do not complain. */
 			break;
-		case BUF_BLOCK_REMOVE_HASH:
-			/* buf_LRU_block_remove_hashed_page()
-			will overwrite the FIL_PAGE_OFFSET and
-			FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
-			0xff and set the state to
-			BUF_BLOCK_REMOVE_HASH. */
-# ifndef UNIV_DEBUG_VALGRIND
-			/* In buf_LRU_block_remove_hashed() we
-			explicitly set those values to 0xff and
-			declare them uninitialized with
-			UNIV_MEM_INVALID() after that. */
-			ut_ad(page_get_space_id(page_align(ptr))
-			      == 0xffffffff);
-			ut_ad(page_get_page_no(page_align(ptr))
-			      == 0xffffffff);
-# endif /* UNIV_DEBUG_VALGRIND */
-			break;
-		case BUF_BLOCK_FILE_PAGE:
-			ut_ad(block->page.id.space()
-			      == page_get_space_id(page_align(ptr)));
-			ut_ad(block->page.id.page_no()
-			      == page_get_page_no(page_align(ptr)));
-			break;
 		}
-
-		buf_page_mutex_exit(block);
 #endif /* UNIV_DEBUG */
 
 		return(block);
@@ -3765,22 +3750,12 @@
 	access the block (and check for IO state) after the block has been
 	added to the page hashtable. */
 
-	if (buf_block_get_io_fix(block) == BUF_IO_READ) {
+	if (buf_block_get_io_fix_unlocked(block) == BUF_IO_READ) {
 
 		/* Wait until the read operation completes */
-
-		BPageMutex*	mutex = buf_page_get_mutex(&block->page);
-
 		for (;;) {
-			buf_io_fix	io_fix;
-
-			mutex_enter(mutex);
-
-			io_fix = buf_block_get_io_fix(block);
-
-			mutex_exit(mutex);
-
-			if (io_fix == BUF_IO_READ) {
+			if (buf_block_get_io_fix_unlocked(block)
+			    == BUF_IO_READ) {
 				/* Wait by temporaly s-latch */
 				rw_lock_s_lock(&block->lock);
 				rw_lock_s_unlock(&block->lock);
@@ -3862,7 +3837,7 @@
 
 	rw_lock_s_lock(hash_lock);
 
-	/* If not own buf_pool_mutex, page_hash can be changed. */
+	/* page_hash can be changed. */
 	hash_lock = buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id);
 
 	if (block != NULL) {
@@ -3896,10 +3871,10 @@
 		/* Page not in buf_pool: needs to be read from file */
 
 		if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+
 			rw_lock_x_lock(hash_lock);
 
-			/* If not own buf_pool_mutex,
-			page_hash can be changed. */
+			/* page_hash can be changed. */
 			hash_lock = buf_page_hash_lock_x_confirm(
 				hash_lock, buf_pool, page_id);
 
@@ -3998,15 +3973,16 @@
 	rw_lock_s_unlock(hash_lock);
 
 got_block:
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+	BPageMutex*	fix_mutex = buf_page_get_mutex(&fix_block->page);
+#endif
 
 	if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) {
 
 		buf_page_t*	fix_page = &fix_block->page;
-		BPageMutex*	fix_mutex = buf_page_get_mutex(fix_page);
-		mutex_enter(fix_mutex);
+		os_rmb;
 		const bool	must_read
-			= (buf_page_get_io_fix(fix_page) == BUF_IO_READ);
-		mutex_exit(fix_mutex);
+		     = (buf_page_get_io_fix_unlocked(fix_page) == BUF_IO_READ);
 
 		if (must_read) {
 			/* The page is being read to buffer pool,
@@ -4022,9 +3998,10 @@
 		buf_page_t*	bpage;
 
 	case BUF_BLOCK_FILE_PAGE:
+		ut_ad(fix_mutex != &buf_pool->zip_mutex);
 		bpage = &block->page;
 		if (fsp_is_system_temporary(page_id.space())
-		    && buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+		    && buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
 			/* This suggest that page is being flushed.
 			Avoid returning reference to this page.
 			Instead wait for flush action to complete.
@@ -4049,10 +4026,11 @@
 		}
 
 		bpage = &block->page;
+		ut_ad(fix_mutex == &buf_pool->zip_mutex);
 
 		/* Note: We have already buffer fixed this block. */
 		if (bpage->buf_fix_count > 1
-		    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+		    || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
 
 			/* This condition often occurs when the buffer
 			is not buffer-fixed, but I/O-fixed by
@@ -4072,9 +4050,8 @@
 
 		block = buf_LRU_get_free_block(buf_pool);
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 
-		/* If not own buf_pool_mutex, page_hash can be changed. */
 		hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 
 		rw_lock_x_lock(hash_lock);
@@ -4099,10 +4076,10 @@
 			This should be extremely unlikely, for example,
 			if buf_page_get_zip() was invoked. */
 
-			buf_LRU_block_free_non_file_page(block);
-			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&buf_pool->LRU_list_mutex);
 			rw_lock_x_unlock(hash_lock);
 			buf_page_mutex_exit(block);
+			buf_LRU_block_free_non_file_page(block);
 
 			/* Try again */
 			goto loop;
@@ -4145,24 +4122,26 @@
 		/* Insert at the front of unzip_LRU list */
 		buf_unzip_LRU_add_block(block, FALSE);
 
+		mutex_exit(&buf_pool->LRU_list_mutex);
+
 		buf_block_set_io_fix(block, BUF_IO_READ);
 		rw_lock_x_lock_inline(&block->lock, 0, file, line);
 
 		UNIV_MEM_INVALID(bpage, sizeof *bpage);
 
 		rw_lock_x_unlock(hash_lock);
-		buf_pool->n_pend_unzip++;
 		mutex_exit(&buf_pool->zip_mutex);
-		buf_pool_mutex_exit(buf_pool);
 
 		access_time = buf_page_is_accessed(&block->page);
 
 		buf_page_mutex_exit(block);
 
+		os_atomic_increment(&buf_pool->n_pend_unzip, 1);
+
 		buf_page_free_descriptor(bpage);
 
 		/* Decompress the page while not holding
-		buf_pool->mutex or block->mutex. */
+		any buf_pool or block->mutex. */
 
 		/* Page checksum verification is already done when
 		the page is read from disk. Hence page checksum
@@ -4183,17 +4162,13 @@
 			}
 		}
 
-		buf_pool_mutex_enter(buf_pool);
-
 		buf_page_mutex_enter(fix_block);
 
 		buf_block_set_io_fix(fix_block, BUF_IO_NONE);
 
 		buf_page_mutex_exit(fix_block);
 
-		--buf_pool->n_pend_unzip;
-
-		buf_pool_mutex_exit(buf_pool);
+		os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
 
 		rw_lock_x_unlock(&block->lock);
 
@@ -4226,27 +4201,26 @@
 		/* Try to evict the block from the buffer pool, to use the
 		insert buffer (change buffer) as much as possible. */
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 
 		buf_block_unfix(fix_block);
 
-		/* Now we are only holding the buf_pool->mutex,
+		/* Now we are only holding the buf_pool->LRU_list_mutex,
 		not block->mutex or hash_lock. Blocks cannot be
 		relocated or enter or exit the buf_pool while we
-		are holding the buf_pool->mutex. */
+		are holding the buf_pool->LRU_list_mutex. */
+
+		fix_mutex = buf_page_get_mutex(&fix_block->page);
+		mutex_enter(fix_mutex);
 
 		if (buf_LRU_free_page(&fix_block->page, true)) {
 
-			buf_pool_mutex_exit(buf_pool);
+			if (mode == BUF_GET_IF_IN_POOL_OR_WATCH)
+				mutex_enter(&buf_pool->LRU_list_mutex);
 
-			/* If not own buf_pool_mutex,
-			page_hash can be changed. */
+			/* page_hash can be changed. */
 			hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
-
 			rw_lock_x_lock(hash_lock);
-
-			/* If not own buf_pool_mutex,
-			page_hash can be changed. */
 			hash_lock = buf_page_hash_lock_x_confirm(
 				hash_lock, buf_pool, page_id);
 
@@ -4256,6 +4230,7 @@
 				buffer pool in the first place. */
 				block = (buf_block_t*) buf_pool_watch_set(
 					page_id, &hash_lock);
+				mutex_exit(&buf_pool->LRU_list_mutex);
 			} else {
 				block = (buf_block_t*) buf_page_hash_get_low(
 					buf_pool, page_id);
@@ -4280,8 +4255,6 @@
 			return(NULL);
 		}
 
-		buf_page_mutex_enter(fix_block);
-
 		if (buf_flush_page_try(buf_pool, fix_block)) {
 
 			ib::info() << "innodb_change_buffering_debug flush "
@@ -4292,13 +4265,13 @@
 			goto loop;
 		}
 
+		mutex_exit(&buf_pool->LRU_list_mutex);
+
 		buf_page_mutex_exit(fix_block);
 
 		buf_block_fix(fix_block);
 
 		/* Failed to evict the page; change it directly */
-
-		buf_pool_mutex_exit(buf_pool);
 	}
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
@@ -4471,9 +4444,7 @@
 	}
 
 	if (!success) {
-		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
-		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -4488,9 +4459,7 @@
 			rw_lock_x_unlock(&block->lock);
 		}
 
-		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
-		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -4596,9 +4565,7 @@
 	}
 
 	if (!success) {
-		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
-		buf_page_mutex_exit(block);
 
 		return(FALSE);
 	}
@@ -4667,17 +4634,17 @@
 
 	ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
 
-	buf_page_mutex_enter(block);
+	buf_block_buf_fix_inc(block, file, line);
+
 	rw_lock_s_unlock(hash_lock);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	buf_page_mutex_enter(block);
 	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_a(page_id.equals_to(block->page.id));
+	buf_page_mutex_exit(block);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
-	buf_block_buf_fix_inc(block, file, line);
-	buf_page_mutex_exit(block);
-
 	mtr_memo_type_t	fix_type = MTR_MEMO_PAGE_S_FIX;
 	success = rw_lock_s_lock_nowait(&block->lock, file, line);
 
@@ -4692,9 +4659,7 @@
 	}
 
 	if (!success) {
-		buf_page_mutex_enter(block);
 		buf_block_buf_fix_dec(block);
-		buf_page_mutex_exit(block);
 
 		return(NULL);
 	}
@@ -4742,7 +4707,8 @@
 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
 }
 
-/** Inits a page to the buffer buf_pool.
+/** Inits a page to the buffer buf_pool. The block pointer must be private to
+the calling thread at the start of this function.
 @param[in,out]	buf_pool	buffer pool
 @param[in]	page_id		page id
 @param[in,out]	block		block to init */
@@ -4757,9 +4723,8 @@
 	buf_page_t*	hash_page;
 
 	ut_ad(buf_pool == buf_pool_get(page_id));
-	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	ut_ad(buf_page_mutex_own(block));
+	ut_ad(!mutex_own(buf_page_get_mutex(&block->page)));
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
 #ifdef UNIV_SYNC_DEBUG
@@ -4809,8 +4774,6 @@
 			<< hash_page << ", " << block;
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		buf_page_mutex_exit(block);
-		buf_pool_mutex_exit(buf_pool);
 		buf_print();
 		buf_LRU_print();
 		buf_validate();
@@ -4863,8 +4826,7 @@
 	buf_page_t*	watch_page;
 	rw_lock_t*	hash_lock;
 	mtr_t		mtr;
-	ibool		lru	= FALSE;
-	void*		data;
+	void*		data	= NULL;
 	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	ut_ad(buf_pool);
@@ -4897,7 +4859,13 @@
 		ut_ad(buf_pool_from_block(block) == buf_pool);
 	}
 
-	buf_pool_mutex_enter(buf_pool);
+	if (!block)
+		bpage = buf_page_alloc_descriptor();
+
+	if ((block && page_size.is_compressed()) || !block)
+		data = buf_buddy_alloc(buf_pool, page_size.physical());
+
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
@@ -4907,12 +4875,17 @@
 		/* The page is already in the buffer pool. */
 		watch_page = NULL;
 err_exit:
+		mutex_exit(&buf_pool->LRU_list_mutex);
 		rw_lock_x_unlock(hash_lock);
-		if (block) {
-			buf_page_mutex_enter(block);
+
+		if (bpage)
+			buf_page_free_descriptor(bpage);
+
+		if (data)
+			buf_buddy_free(buf_pool, data, page_size.physical());
+
+		if (block)
 			buf_LRU_block_free_non_file_page(block);
-			buf_page_mutex_exit(block);
-		}
 
 		bpage = NULL;
 		goto func_exit;
@@ -4928,25 +4901,39 @@
 	}
 
 	if (block) {
+
+		ut_ad(!bpage);
 		bpage = &block->page;
 
-		buf_page_mutex_enter(block);
-
 		ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
 
 		buf_page_init(buf_pool, page_id, page_size, block);
 
+		buf_page_mutex_enter(block);
+
 		/* Note: We are using the hash_lock for protection. This is
 		safe because no other thread can lookup the block from the
 		page hashtable yet. */
 
 		buf_page_set_io_fix(bpage, BUF_IO_READ);
 
-		rw_lock_x_unlock(hash_lock);
-
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
 
+		if (page_size.is_compressed()) {
+			block->page.zip.data = (page_zip_t*) data;
+
+			/* To maintain the invariant
+			block->in_unzip_LRU_list
+			== buf_page_belongs_to_unzip_LRU(&block->page)
+			we have to add this block to unzip_LRU
+			after block->page.zip.data is set. */
+			ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
+			buf_unzip_LRU_add_block(block, TRUE);
+		}
+
+		mutex_exit(&buf_pool->LRU_list_mutex);
+
 		/* We set a pass-type x-lock on the frame because then
 		the same thread which called for the read operation
 		(and is running now at this point of code) can wait
@@ -4958,71 +4945,17 @@
 
 		rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
 
-		if (page_size.is_compressed()) {
-			/* buf_pool->mutex may be released and
-			reacquired by buf_buddy_alloc().  Thus, we
-			must release block->mutex in order not to
-			break the latching order in the reacquisition
-			of buf_pool->mutex.  We also must defer this
-			operation until after the block descriptor has
-			been added to buf_pool->LRU and
-			buf_pool->page_hash. */
-			buf_page_mutex_exit(block);
-			data = buf_buddy_alloc(buf_pool, page_size.physical(),
-					       &lru);
-			buf_page_mutex_enter(block);
-			block->page.zip.data = (page_zip_t*) data;
-
-			/* To maintain the invariant
-			block->in_unzip_LRU_list
-			== buf_page_belongs_to_unzip_LRU(&block->page)
-			we have to add this block to unzip_LRU
-			after block->page.zip.data is set. */
-			ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
-			buf_unzip_LRU_add_block(block, TRUE);
-		}
+		rw_lock_x_unlock(hash_lock);
 
 		buf_page_mutex_exit(block);
 	} else {
-		rw_lock_x_unlock(hash_lock);
-
-		/* The compressed page must be allocated before the
-		control block (bpage), in order to avoid the
-		invocation of buf_buddy_relocate_block() on
-		uninitialized data. */
-		data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
-
-		rw_lock_x_lock(hash_lock);
-
-		/* If buf_buddy_alloc() allocated storage from the LRU list,
-		it released and reacquired buf_pool->mutex.  Thus, we must
-		check the page_hash again, as it may have been modified. */
-		if (UNIV_UNLIKELY(lru)) {
-
-			watch_page = buf_page_hash_get_low(buf_pool, page_id);
-
-			if (UNIV_UNLIKELY(watch_page
-			    && !buf_pool_watch_is_sentinel(buf_pool,
-							   watch_page))) {
-
-				/* The block was added by some other thread. */
-				rw_lock_x_unlock(hash_lock);
-				watch_page = NULL;
-				buf_buddy_free(buf_pool, data,
-					       page_size.physical());
-
-				bpage = NULL;
-				goto func_exit;
-			}
-		}
-
-		bpage = buf_page_alloc_descriptor();
 
 		/* Initialize the buf_pool pointer. */
 		bpage->buf_pool_index = buf_pool_index(buf_pool);
 
 		page_zip_des_init(&bpage->zip);
 		page_zip_set_size(&bpage->zip, page_size.physical());
+		ut_ad(data);
 		bpage->zip.data = (page_zip_t*) data;
 
 		bpage->size.copy_from(page_size);
@@ -5072,15 +5005,15 @@
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+		mutex_exit(&buf_pool->LRU_list_mutex);
 
 		buf_page_set_io_fix(bpage, BUF_IO_READ);
 
 		mutex_exit(&buf_pool->zip_mutex);
 	}
 
-	buf_pool->n_pend_reads++;
+	os_atomic_increment_ulint(&buf_pool->n_pend_reads, 1);
 func_exit:
-	buf_pool_mutex_exit(buf_pool);
 
 	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 
@@ -5122,7 +5055,7 @@
 
 	free_block = buf_LRU_get_free_block(buf_pool);
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	hash_lock = buf_page_hash_lock_get(buf_pool, page_id);
 	rw_lock_x_lock(hash_lock);
@@ -5140,7 +5073,7 @@
 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
 
 		/* Page can be found in buf_pool */
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 		rw_lock_x_unlock(hash_lock);
 
 		buf_block_free(free_block);
@@ -5155,37 +5088,30 @@
 
 	block = free_block;
 
-	buf_page_mutex_enter(block);
-
 	buf_page_init(buf_pool, page_id, page_size, block);
 
+	buf_page_mutex_enter(block);
+
 	rw_lock_x_unlock(hash_lock);
 
 	/* The block must be put to the LRU list */
 	buf_LRU_add_block(&block->page, FALSE);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
-	buf_pool->stat.n_pages_created++;
+	os_atomic_increment(&buf_pool->stat.n_pages_created, 1);
 
 	if (page_size.is_compressed()) {
 		void*	data;
-		ibool	lru;
 
-		/* Prevent race conditions during buf_buddy_alloc(),
-		which may release and reacquire buf_pool->mutex,
+		/* Prevent race conditions during buf_buddy_alloc()
 		by IO-fixing and X-latching the block. */
-
 		buf_page_set_io_fix(&block->page, BUF_IO_READ);
 		rw_lock_x_lock(&block->lock);
 
 		buf_page_mutex_exit(block);
-		/* buf_pool->mutex may be released and reacquired by
-		buf_buddy_alloc().  Thus, we must release block->mutex
-		in order not to break the latching order in
-		the reacquisition of buf_pool->mutex.  We also must
-		defer this operation until after the block descriptor
-		has been added to buf_pool->LRU and buf_pool->page_hash. */
-		data = buf_buddy_alloc(buf_pool, page_size.physical(), &lru);
+		data = buf_buddy_alloc(buf_pool, page_size.physical());
+		mutex_enter(&buf_pool->LRU_list_mutex);
 		buf_page_mutex_enter(block);
 		block->page.zip.data = (page_zip_t*) data;
 
@@ -5196,13 +5122,12 @@
 		block->page.zip.data is set. */
 		ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
 		buf_unzip_LRU_add_block(block, FALSE);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 
 		buf_page_set_io_fix(&block->page, BUF_IO_NONE);
 		rw_lock_x_unlock(&block->lock);
 	}
 
-	buf_pool_mutex_exit(buf_pool);
-
 	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
 
 	buf_page_set_accessed(&block->page);
@@ -5246,6 +5171,8 @@
 	const byte*	frame;
 	monitor_id_t	counter;
 
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
 	/* If the counter module is not turned on, just return */
 	if (!MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)) {
 		return;
@@ -5354,9 +5281,11 @@
 					== BUF_BLOCK_FILE_PAGE);
 	ib_uint32_t	space = bpage->id.space();
 	ibool		ret = TRUE;
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
 
 	/* First unfix and release lock on the bpage */
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
+	rw_lock_x_lock(hash_lock);
 	mutex_enter(buf_page_get_mutex(bpage));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
 	ut_ad(bpage->buf_fix_count == 0);
@@ -5370,19 +5299,18 @@
 			BUF_IO_READ);
 	}
 
-	mutex_exit(buf_page_get_mutex(bpage));
-
 	/* Find the table with specified space id, and mark it corrupted */
 	if (dict_set_corrupted_by_space(space)) {
 		buf_LRU_free_one_page(bpage);
 	} else {
+		rw_lock_x_unlock(hash_lock);
+		mutex_exit(buf_page_get_mutex(bpage));
 		ret = FALSE;
 	}
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	ut_ad(buf_pool->n_pend_reads > 0);
-	buf_pool->n_pend_reads--;
-
-	buf_pool_mutex_exit(buf_pool);
+	os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
 
 	return(ret);
 }
@@ -5404,6 +5332,7 @@
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	const ibool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
+	bool		have_LRU_mutex = false;
 
 	ut_a(buf_page_in_file(bpage));
 
@@ -5413,7 +5342,7 @@
 	ensures that this is the only thread that handles the i/o for this
 	block. */
 
-	io_type = buf_page_get_io_fix(bpage);
+	io_type = buf_page_get_io_fix_unlocked(bpage);
 	ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
 
 	if (io_type == BUF_IO_READ) {
@@ -5423,15 +5352,16 @@
 
 		if (bpage->size.is_compressed()) {
 			frame = bpage->zip.data;
-			buf_pool->n_pend_unzip++;
+			os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1);
 			if (uncompressed
 			    && !buf_zip_decompress((buf_block_t*) bpage,
 						   FALSE)) {
 
-				buf_pool->n_pend_unzip--;
+				os_atomic_decrement_ulint(
+					&buf_pool->n_pend_unzip, 1);
 				goto corrupt;
 			}
-			buf_pool->n_pend_unzip--;
+			os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
 		} else {
 			ut_a(uncompressed);
 			frame = ((buf_block_t*) bpage)->frame;
@@ -5544,8 +5474,39 @@
 		}
 	}
 
-	buf_pool_mutex_enter(buf_pool);
-	mutex_enter(buf_page_get_mutex(bpage));
+	if (io_type == BUF_IO_WRITE
+	    && (
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+		/* to keep consistency at buf_LRU_insert_zip_clean() */
+		buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY ||
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+		buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU ||
+		buf_page_get_flush_type(bpage) == BUF_FLUSH_SINGLE_PAGE)) {
+
+		have_LRU_mutex = true; /* optimistic */
+	}
+retry_mutex:
+	if (have_LRU_mutex)
+		mutex_enter(&buf_pool->LRU_list_mutex);
+
+	BPageMutex*	page_mutex = buf_page_get_mutex(bpage);
+	mutex_enter(page_mutex);
+
+	if (UNIV_UNLIKELY(io_type == BUF_IO_WRITE
+			  && (
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+			      buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
+			      ||
+#endif
+			      buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU
+			      || buf_page_get_flush_type(bpage)
+				  == BUF_FLUSH_SINGLE_PAGE)
+			  && !have_LRU_mutex)) {
+
+		mutex_exit(page_mutex);
+		have_LRU_mutex = true;
+		goto retry_mutex;
+	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
 	if (io_type == BUF_IO_WRITE || uncompressed) {
@@ -5560,19 +5521,19 @@
 	removes the newest lock debug record, without checking the thread
 	id. */
 
-	buf_page_set_io_fix(bpage, BUF_IO_NONE);
 	buf_page_monitor(bpage, io_type);
 
 	switch (io_type) {
 	case BUF_IO_READ:
+
+		ut_ad(!have_LRU_mutex);
+
+		buf_page_set_io_fix(bpage, BUF_IO_NONE);
+
 		/* NOTE that the call to ibuf may have moved the ownership of
 		the x-latch to this OS thread: do not let this confuse you in
 		debugging! */
 
-		ut_ad(buf_pool->n_pend_reads > 0);
-		buf_pool->n_pend_reads--;
-		buf_pool->stat.n_pages_read++;
-
 		if (uncompressed) {
 			rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
 					     BUF_IO_READ);
@@ -5580,6 +5541,10 @@
 
 		mutex_exit(buf_page_get_mutex(bpage));
 
+		ut_ad(buf_pool->n_pend_reads > 0);
+		os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
+		os_atomic_increment_ulint(&buf_pool->stat.n_pages_read, 1);
+
 		break;
 
 	case BUF_IO_WRITE:
@@ -5593,7 +5558,7 @@
 					      BUF_IO_WRITE);
 		}
 
-		buf_pool->stat.n_pages_written++;
+		os_atomic_increment_ulint(&buf_pool->stat.n_pages_written, 1);
 
 		/* We decide whether or not to evict the page from the
 		LRU list based on the flush_type.
@@ -5603,14 +5568,17 @@
 		by the caller explicitly. */
 		if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
 			evict = true;
+			ut_ad(have_LRU_mutex);
 		}
 
-		if (evict) {
-			mutex_exit(buf_page_get_mutex(bpage));
-			buf_LRU_free_page(bpage, true);
+		if (evict && buf_LRU_free_page(bpage, true)) {
+			have_LRU_mutex = false;
 		} else {
 			mutex_exit(buf_page_get_mutex(bpage));
 		}
+		if (have_LRU_mutex) {
+			mutex_exit(&buf_pool->LRU_list_mutex);
+		}
 
 		break;
 
@@ -5623,8 +5591,6 @@
 			      io_type == BUF_IO_READ ? "read" : "wrote",
 			      bpage->id.space(), bpage->id.page_no()));
 
-	buf_pool_mutex_exit(buf_pool);
-
 	return(true);
 }
 
@@ -5642,25 +5608,37 @@
 
 	ut_ad(buf_pool);
 
-	buf_pool_mutex_enter(buf_pool);
-
 	chunk = buf_pool->chunks;
 
 	for (i = buf_pool->n_chunks; i--; chunk++) {
 
+		mutex_enter(&buf_pool->LRU_list_mutex);
+
 		const buf_block_t* block = buf_chunk_not_freed(chunk);
 
+		mutex_exit(&buf_pool->LRU_list_mutex);
+
 		if (UNIV_LIKELY_NULL(block)) {
 			ib::fatal() << "Page " << block->page.id
 				<< " still fixed or dirty";
 		}
 	}
 
-	buf_pool_mutex_exit(buf_pool);
-
 	return(TRUE);
 }
 
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+static
+void
+buf_refresh_io_stats(
+/*=================*/
+	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
+{
+	buf_pool->last_printout_time = ut_time();
+	buf_pool->old_stat = buf_pool->stat;
+}
+
 /*********************************************************************//**
 Invalidates file pages in one buffer pool instance */
 static
@@ -5671,7 +5649,9 @@
 {
 	ulint		i;
 
-	buf_pool_mutex_enter(buf_pool);
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+
+	mutex_enter(&buf_pool->flush_state_mutex);
 
 	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
 
@@ -5689,21 +5669,21 @@
 		if (buf_pool->n_flush[i] > 0) {
 			buf_flush_t	type = static_cast<buf_flush_t>(i);
 
-			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&buf_pool->flush_state_mutex);
 			buf_flush_wait_batch_end(buf_pool, type);
-			buf_pool_mutex_enter(buf_pool);
+			mutex_enter(&buf_pool->flush_state_mutex);
 		}
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->flush_state_mutex);
 
 	ut_ad(buf_all_freed_instance(buf_pool));
 
-	buf_pool_mutex_enter(buf_pool);
-
 	while (buf_LRU_scan_and_free_block(buf_pool, true)) {
 	}
 
+	mutex_enter(&buf_pool->LRU_list_mutex);
+
 	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
 	ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
 
@@ -5711,10 +5691,10 @@
 	buf_pool->LRU_old = NULL;
 	buf_pool->LRU_old_len = 0;
 
+	mutex_exit(&buf_pool->LRU_list_mutex);
+
 	memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
 	buf_refresh_io_stats(buf_pool);
-
-	buf_pool_mutex_exit(buf_pool);
 }
 
 /*********************************************************************//**
@@ -5756,8 +5736,11 @@
 
 	ut_ad(buf_pool);
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 	hash_lock_x_all(buf_pool->page_hash);
+	mutex_enter(&buf_pool->zip_mutex);
+	mutex_enter(&buf_pool->free_list_mutex);
+	mutex_enter(&buf_pool->flush_state_mutex);
 
 	chunk = buf_pool->chunks;
 
@@ -5770,8 +5753,6 @@
 
 		for (j = chunk->size; j--; block++) {
 
-			buf_page_mutex_enter(block);
-
 			switch (buf_block_get_state(block)) {
 			case BUF_BLOCK_POOL_WATCH:
 			case BUF_BLOCK_ZIP_PAGE:
@@ -5791,7 +5772,7 @@
 				     == BUF_IO_READ
 				     || !ibuf_count_get(block->page.id));
 #endif
-				switch (buf_page_get_io_fix(&block->page)) {
+				switch (buf_page_get_io_fix_unlocked(&block->page)) {
 				case BUF_IO_NONE:
 					break;
 
@@ -5799,20 +5780,8 @@
 					switch (buf_page_get_flush_type(
 							&block->page)) {
 					case BUF_FLUSH_LRU:
-						n_lru_flush++;
-						goto assert_s_latched;
 					case BUF_FLUSH_SINGLE_PAGE:
-						n_page_flush++;
-assert_s_latched:
-						ut_a(rw_lock_is_locked(
-							     &block->lock,
-								     RW_LOCK_S)
-						     || rw_lock_is_locked(
-								&block->lock,
-								RW_LOCK_SX));
-						break;
 					case BUF_FLUSH_LIST:
-						n_list_flush++;
 						break;
 					default:
 						ut_error;
@@ -5843,13 +5812,9 @@
 				/* do nothing */
 				break;
 			}
-
-			buf_page_mutex_exit(block);
 		}
 	}
 
-	mutex_enter(&buf_pool->zip_mutex);
-
 	/* Check clean compressed-only blocks. */
 
 	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
@@ -5893,7 +5858,9 @@
 		case BUF_BLOCK_ZIP_DIRTY:
 			n_lru++;
 			n_zip++;
-			switch (buf_page_get_io_fix(b)) {
+			/* fallthrough */
+		case BUF_BLOCK_FILE_PAGE:
+			switch (buf_page_get_io_fix_unlocked(b)) {
 			case BUF_IO_NONE:
 			case BUF_IO_READ:
 			case BUF_IO_PIN:
@@ -5915,9 +5882,6 @@
 				break;
 			}
 			break;
-		case BUF_BLOCK_FILE_PAGE:
-			/* uncompressed page */
-			break;
 		case BUF_BLOCK_POOL_WATCH:
 		case BUF_BLOCK_ZIP_PAGE:
 		case BUF_BLOCK_NOT_USED:
@@ -5946,19 +5910,24 @@
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
+
+	mutex_exit(&buf_pool->LRU_list_mutex);
+
 	if (buf_pool->curr_size == buf_pool->old_size
-	    && UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+	    && UT_LIST_GET_LEN(buf_pool->free) > n_free) {
 
 		ib::fatal() << "Free list len "
 			<< UT_LIST_GET_LEN(buf_pool->free)
 			<< ", free blocks " << n_free << ". Aborting...";
 	}
 
+	mutex_exit(&buf_pool->free_list_mutex);
+
 	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
 	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
 	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush);
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->flush_state_mutex);
 
 	ut_a(buf_LRU_validate());
 	ut_a(buf_flush_validate(buf_pool));
@@ -6016,12 +5985,16 @@
 
 	counts = static_cast<ulint*>(ut_malloc_nokey(sizeof(ulint) * size));
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
+	mutex_enter(&buf_pool->free_list_mutex);
+	mutex_enter(&buf_pool->flush_state_mutex);
 	buf_flush_list_mutex_enter(buf_pool);
 
 	ib::info() << *buf_pool;
 
 	buf_flush_list_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->flush_state_mutex);
+	mutex_exit(&buf_pool->free_list_mutex);
 
 	/* Count the number of blocks belonging to each index in the buffer */
 
@@ -6062,7 +6035,7 @@
 		}
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	for (i = 0; i < n_found; i++) {
 		index = dict_index_get_if_in_cache(index_ids[i]);
@@ -6109,7 +6082,7 @@
 /*********************************************************************//**
 Returns the number of latched pages in the buffer pool.
 @return number of latched pages */
-
+static
 ulint
 buf_get_latched_pages_number_instance(
 /*==================================*/
@@ -6120,7 +6093,7 @@
 	buf_chunk_t*	chunk;
 	ulint		fixed_pages_number = 0;
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	chunk = buf_pool->chunks;
 
@@ -6137,18 +6110,16 @@
 				continue;
 			}
 
-			buf_page_mutex_enter(block);
-
 			if (block->page.buf_fix_count != 0
-			    || buf_page_get_io_fix(&block->page)
+			    || buf_page_get_io_fix_unlocked(&block->page)
 			    != BUF_IO_NONE) {
 				fixed_pages_number++;
 			}
-
-			buf_page_mutex_exit(block);
 		}
 	}
 
+	mutex_exit(&buf_pool->LRU_list_mutex);
+
 	mutex_enter(&buf_pool->zip_mutex);
 
 	/* Traverse the lists of clean and dirty compressed-only blocks. */
@@ -6179,12 +6150,16 @@
 		case BUF_BLOCK_FILE_PAGE:
 			/* uncompressed page */
 			break;
+		case BUF_BLOCK_REMOVE_HASH:
+			/* We hold flush list but not LRU list mutex here.
+			Thus encountering BUF_BLOCK_REMOVE_HASH pages is
+			possible.  */
+			break;
 		case BUF_BLOCK_POOL_WATCH:
 		case BUF_BLOCK_ZIP_PAGE:
 		case BUF_BLOCK_NOT_USED:
 		case BUF_BLOCK_READY_FOR_USE:
 		case BUF_BLOCK_MEMORY:
-		case BUF_BLOCK_REMOVE_HASH:
 			ut_error;
 			break;
 		}
@@ -6192,7 +6167,6 @@
 
 	buf_flush_list_mutex_exit(buf_pool);
 	mutex_exit(&buf_pool->zip_mutex);
-	buf_pool_mutex_exit(buf_pool);
 
 	return(fixed_pages_number);
 }
@@ -6232,6 +6206,7 @@
 {
 	ulint	pend_ios = 0;
 
+	os_rmb;
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		pend_ios += buf_pool_from_array(i)->n_pend_reads;
 	}
@@ -6339,9 +6314,6 @@
 	/* Find appropriate pool_info to store stats for this buffer pool */
 	pool_info = &all_pool_info[pool_id];
 
-	buf_pool_mutex_enter(buf_pool);
-	buf_flush_list_mutex_enter(buf_pool);
-
 	pool_info->pool_unique_id = pool_id;
 
 	pool_info->pool_size = buf_pool->curr_size;
@@ -6370,8 +6342,6 @@
 		 (buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]
 		  + buf_pool->init_flush[BUF_FLUSH_SINGLE_PAGE]);
 
-	buf_flush_list_mutex_exit(buf_pool);
-
 	current_time = time(NULL);
 	time_elapsed = 0.001 + difftime(current_time,
 					buf_pool->last_printout_time);
@@ -6453,12 +6423,11 @@
 	pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
 
 	buf_refresh_io_stats(buf_pool);
-	buf_pool_mutex_exit(buf_pool);
 }
 
 /*********************************************************************//**
 Prints info of the buffer i/o. */
-
+static
 void
 buf_print_io_instance(
 /*==================*/
@@ -6563,6 +6532,8 @@
 				ut_zalloc_nokey(sizeof *pool_info));
 	}
 
+	os_rmb;
+
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
@@ -6603,18 +6574,6 @@
 Refreshes the statistics used to print per-second averages. */
 
 void
-buf_refresh_io_stats(
-/*=================*/
-	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
-{
-	buf_pool->last_printout_time = ut_time();
-	buf_pool->old_stat = buf_pool->stat;
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-
-void
 buf_refresh_io_stats_all(void)
 /*==========================*/
 {
@@ -6660,22 +6619,22 @@
 	ulint		i;
 	ulint		pending_io = 0;
 
-	buf_pool_mutex_enter_all();
-
 	for (i = 0; i < srv_buf_pool_instances; i++) {
-		const buf_pool_t*	buf_pool;
+		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
 
-		pending_io += buf_pool->n_pend_reads
+		pending_io += buf_pool->n_pend_reads;
+
+		mutex_enter(&buf_pool->flush_state_mutex);
+		pending_io +=
 			      + buf_pool->n_flush[BUF_FLUSH_LRU]
 			      + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]
 			      + buf_pool->n_flush[BUF_FLUSH_LIST];
+		mutex_exit(&buf_pool->flush_state_mutex);
 
 	}
 
-	buf_pool_mutex_exit_all();
-
 	return(pending_io);
 }
 
@@ -6691,11 +6650,11 @@
 {
 	ulint	len;
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->free_list_mutex);
 
 	len = UT_LIST_GET_LEN(buf_pool->free);
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->free_list_mutex);
 
 	return(len);
 }
@@ -6754,6 +6713,12 @@
         std::ostream&		out,
         const buf_pool_t&	buf_pool)
 {
+	/* These locking requirements might be relaxed if desired */
+	ut_ad(mutex_own(&buf_pool.LRU_list_mutex));
+	ut_ad(mutex_own(&buf_pool.free_list_mutex));
+	ut_ad(mutex_own(&buf_pool.flush_state_mutex));
+	ut_ad(buf_flush_list_mutex_own(&buf_pool));
+
 	out << "[buffer pool instance: "
 		<< "buf_pool size=" << buf_pool.curr_size
 		<< ", database pages=" << UT_LIST_GET_LEN(buf_pool.LRU)

=== modified file 'storage/innobase/buf/buf0dblwr.cc'
--- storage/innobase/buf/buf0dblwr.cc	2014-08-26 11:08:37 +0000
+++ storage/innobase/buf/buf0dblwr.cc	2015-01-16 19:30:41 +0000
@@ -951,6 +951,7 @@
 	buf_page_t*	bpage)	/*!< in: buffer block to write */
 {
 	ut_a(buf_page_in_file(bpage));
+	ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex));
 
 try_again:
 	mutex_enter(&buf_dblwr->mutex);

=== modified file 'storage/innobase/buf/buf0dump.cc'
--- storage/innobase/buf/buf0dump.cc	2014-08-12 08:15:50 +0000
+++ storage/innobase/buf/buf0dump.cc	2015-01-16 19:30:41 +0000
@@ -53,8 +53,8 @@
 static ibool	buf_load_abort_flag = FALSE;
 
 /* Used to temporary store dump info in order to avoid IO while holding
-buffer pool mutex during dump and also to sort the contents of the dump
-before reading the pages from disk during load.
+buffer pool LRU list mutex during dump and also to sort the contents of the
+dump before reading the pages from disk during load.
 We store the space id in the high 32 bits and page no in low 32 bits. */
 typedef ib_uint64_t	buf_dump_t;
 
@@ -213,15 +213,15 @@
 
 		buf_pool = buf_pool_from_array(i);
 
-		/* obtain buf_pool mutex before allocate, since
+		/* obtain buf_pool LRU list mutex before allocate, since
 		UT_LIST_GET_LEN(buf_pool->LRU) could change */
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 
 		n_pages = UT_LIST_GET_LEN(buf_pool->LRU);
 
 		/* skip empty buffer pools */
 		if (n_pages == 0) {
-			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&buf_pool->LRU_list_mutex);
 			continue;
 		}
 
@@ -239,7 +239,7 @@
 				n_pages * sizeof(*dump)));
 
 		if (dump == NULL) {
-			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&buf_pool->LRU_list_mutex);
 			fclose(f);
 			buf_dump_status(STATUS_ERR,
 					"Cannot allocate " ULINTPF " bytes: %s",
@@ -261,7 +261,7 @@
 
 		ut_a(j == n_pages);
 
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 
 		for (j = 0; j < n_pages && !SHOULD_QUIT(); j++) {
 			ret = fprintf(f, ULINTPF "," ULINTPF "\n",

=== modified file 'storage/innobase/buf/buf0flu.cc'
--- storage/innobase/buf/buf0flu.cc	2014-11-04 13:44:51 +0000
+++ storage/innobase/buf/buf0flu.cc	2015-01-16 19:30:41 +0000
@@ -374,9 +374,8 @@
 	buf_block_t*	block,		/*!< in/out: block which is modified */
 	lsn_t		lsn)		/*!< in: oldest modification */
 {
-	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(log_flush_order_mutex_own());
-	ut_ad(buf_page_mutex_own(block));
+	ut_ad(mutex_own(buf_page_get_mutex(&block->page)));
 
 	buf_flush_list_mutex_enter(buf_pool);
 
@@ -436,15 +435,14 @@
 	buf_page_t*	prev_b;
 	buf_page_t*	b;
 
-	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(log_flush_order_mutex_own());
-	ut_ad(buf_page_mutex_own(block));
+	ut_ad(mutex_own(buf_page_get_mutex(&block->page)));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 
 	buf_flush_list_mutex_enter(buf_pool);
 
-	/* The field in_LRU_list is protected by buf_pool->mutex, which
-	we are not holding.  However, while a block is in the flush
+	/* The field in_LRU_list is protected by buf_pool->LRU_list_mutex,
+	which we are not holding.  However, while a block is in the flush
 	list, it is dirty and cannot be discarded, not from the
 	page_hash or from the LRU list.  At most, the uncompressed
 	page frame of a compressed block may be discarded or created
@@ -517,7 +515,8 @@
 
 /********************************************************************//**
 Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed.
+i.e., the transition FILE_PAGE => NOT_USED allowed. The caller must hold the
+LRU list and block mutexes.
 @return TRUE if can replace immediately */
 
 ibool
@@ -528,7 +527,7 @@
 {
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 #endif /* UNIV_DEBUG */
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(bpage->in_LRU_list);
@@ -557,17 +556,18 @@
 				buf_page_in_file(bpage) */
 	buf_flush_t	flush_type)/*!< in: type of flush */
 {
+	ut_a(buf_page_in_file(bpage)
+	     || (buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH
 #ifdef UNIV_DEBUG
-	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif /* UNIV_DEBUG */
-
-	ut_a(buf_page_in_file(bpage));
-	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+		 && !mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex)
+#endif
+		     ));
+	ut_ad(mutex_own(buf_page_get_mutex(bpage))
+	      || flush_type == BUF_FLUSH_LIST);
 	ut_ad(flush_type < BUF_FLUSH_N_TYPES);
 
 	if (bpage->oldest_modification == 0
-	    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+	    || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
 		return(false);
 	}
 
@@ -575,6 +575,7 @@
 
 	switch (flush_type) {
 	case BUF_FLUSH_LIST:
+		return(buf_page_get_state(bpage) != BUF_BLOCK_REMOVE_HASH);
 	case BUF_FLUSH_LRU:
 	case BUF_FLUSH_SINGLE_PAGE:
 		return(true);
@@ -597,8 +598,11 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_DIRTY
+	      || mutex_own(&buf_pool->LRU_list_mutex));
+#endif
 	ut_ad(bpage->in_flush_list);
 
 	buf_flush_list_mutex_enter(buf_pool);
@@ -671,7 +675,6 @@
 	buf_page_t*	prev_b = NULL;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	/* Must reside in the same buffer pool. */
 	ut_ad(buf_pool == buf_pool_from_bpage(dpage));
 
@@ -679,13 +682,6 @@
 
 	buf_flush_list_mutex_enter(buf_pool);
 
-	/* FIXME: At this point we have both buf_pool and flush_list
-	mutexes. Theoretically removal of a block from flush list is
-	only covered by flush_list mutex but currently we do
-	have buf_pool mutex in buf_flush_remove() therefore this block
-	is guaranteed to be in the flush list. We need to check if
-	this will work without the assumption of block removing code
-	having the buf_pool mutex. */
 	ut_ad(bpage->in_flush_list);
 	ut_ad(dpage->in_flush_list);
 
@@ -733,14 +729,15 @@
 /*=====================*/
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
-	buf_flush_t	flush_type;
+	buf_flush_t	flush_type = buf_page_get_flush_type(bpage);
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(bpage);
+	mutex_enter(&buf_pool->flush_state_mutex);
 
 	buf_flush_remove(bpage);
 
-	flush_type = buf_page_get_flush_type(bpage);
+	buf_page_set_io_fix(bpage, BUF_IO_NONE);
+
 	buf_pool->n_flush[flush_type]--;
 
 	if (buf_pool->n_flush[flush_type] == 0
@@ -751,6 +748,8 @@
 		os_event_set(buf_pool->no_flush[flush_type]);
 	}
 
+	mutex_exit(&buf_pool->flush_state_mutex);
+
 	buf_dblwr_update(bpage, flush_type);
 }
 #endif /* !UNIV_HOTBACKUP */
@@ -904,7 +903,7 @@
 
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
 #endif
 
 	DBUG_PRINT("ib_buf", ("flush %s %u page " UINT32PF ":" UINT32PF,
@@ -913,15 +912,13 @@
 
 	ut_ad(buf_page_in_file(bpage));
 
-	/* We are not holding buf_pool->mutex or block_mutex here.
-	Nevertheless, it is safe to access bpage, because it is
-	io_fixed and oldest_modification != 0.  Thus, it cannot be
-	relocated in the buffer pool or removed from flush_list or
-	LRU_list. */
-	ut_ad(!buf_pool_mutex_own(buf_pool));
+	/* We are not holding block_mutex here. Nevertheless, it is safe to
+	access bpage, because it is io_fixed and oldest_modification != 0.
+	Thus, it cannot be relocated in the buffer pool or removed from
+	flush_list or LRU_list. */
 	ut_ad(!buf_flush_list_mutex_own(buf_pool));
 	ut_ad(!buf_page_get_mutex(bpage)->is_owned());
-	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+	ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE);
 	ut_ad(bpage->oldest_modification != 0);
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -1008,9 +1005,10 @@
 Writes a flushable page asynchronously from the buffer pool to a file.
 NOTE: in simulated aio we must call
 os_aio_simulated_wake_handler_threads after we have posted a batch of
-writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
-held upon entering this function, and they will be released by this
-function if it returns true.
+writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this
+function.  The LRU list mutex must be held iff flush_type
+== BUF_FLUSH_SINGLE_PAGE. Both mutexes will be released by this function if it
+returns true.
 @return TRUE if the page was flushed */
 
 ibool
@@ -1024,7 +1022,15 @@
 	BPageMutex*	block_mutex;
 
 	ut_ad(flush_type < BUF_FLUSH_N_TYPES);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	/* Hold the LRU list mutex iff called for a single page LRU
+	flush. A single page LRU flush is already non-performant, and holding
+	the LRU list mutex allows us to avoid having to store the previous LRU
+	list page or to restart the LRU scan in
+	buf_flush_single_page_from_LRU(). */
+	ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE ||
+	      !mutex_own(&buf_pool->LRU_list_mutex));
+	ut_ad(flush_type != BUF_FLUSH_SINGLE_PAGE ||
+	      mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
 
@@ -1068,6 +1074,8 @@
 
 		/* We are committed to flushing by the time we get here */
 
+		mutex_enter(&buf_pool->flush_state_mutex);
+
 		buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
 		buf_page_set_flush_type(bpage, flush_type);
@@ -1078,8 +1086,12 @@
 
 		++buf_pool->n_flush[flush_type];
 
+		mutex_exit(&buf_pool->flush_state_mutex);
+
 		mutex_exit(block_mutex);
-		buf_pool_mutex_exit(buf_pool);
+
+		if (flush_type == BUF_FLUSH_SINGLE_PAGE)
+			mutex_exit(&buf_pool->LRU_list_mutex);
 
 		if (flush_type == BUF_FLUSH_LIST
 		    && is_uncompressed
@@ -1111,9 +1123,9 @@
 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /********************************************************************//**
 Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: buf_pool->mutex and block->mutex must be held upon entering this
-function, and they will be released by this function after flushing.
-This is loosely based on buf_flush_batch() and buf_flush_page().
+NOTE: block and LRU list mutexes must be held upon entering this function, and
+they will be released by this function after flushing. This is loosely based on
+buf_flush_batch() and buf_flush_page().
 @return TRUE if the page was flushed and the mutexes released */
 
 ibool
@@ -1122,16 +1134,15 @@
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_block_t*	block)		/*!< in/out: buffer control block */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	ut_ad(buf_page_mutex_own(block));
+	ut_ad(mutex_own(buf_page_get_mutex(&block->page)));
 
 	if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
 		return(FALSE);
 	}
 
-	/* The following call will release the buffer pool and
-	block mutex. */
+	/* The following call will release the LRU list and block mutexes. */
 	return(buf_flush_page(
 			buf_pool, &block->page,
 			BUF_FLUSH_SINGLE_PAGE, true));
@@ -1151,21 +1162,26 @@
 	buf_page_t*	bpage;
 	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 	bool		ret;
+	rw_lock_t*	hash_lock;
+	ib_mutex_t*	block_mutex;
 
 	ut_ad(flush_type == BUF_FLUSH_LRU
 	      || flush_type == BUF_FLUSH_LIST);
 
-	buf_pool_mutex_enter(buf_pool);
-
 	/* We only want to flush pages from this buffer pool. */
-	bpage = buf_page_hash_get(buf_pool, page_id);
+	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 
 	if (!bpage) {
 
-		buf_pool_mutex_exit(buf_pool);
 		return(false);
 	}
 
+	block_mutex = buf_page_get_mutex(bpage);
+
+	mutex_enter(block_mutex);
+
+	rw_lock_s_unlock(hash_lock);
+
 	ut_a(buf_page_in_file(bpage));
 
 	/* We avoid flushing 'non-old' blocks in an LRU flush,
@@ -1173,15 +1189,13 @@
 
 	ret = false;
 	if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
-		BPageMutex* block_mutex = buf_page_get_mutex(bpage);
 
-		mutex_enter(block_mutex);
 		if (buf_flush_ready_for_flush(bpage, flush_type)) {
 			ret = true;
 		}
-		mutex_exit(block_mutex);
 	}
-	buf_pool_mutex_exit(buf_pool);
+
+	mutex_exit(block_mutex);
 
 	return(ret);
 }
@@ -1207,6 +1221,8 @@
 	buf_pool_t*	buf_pool = buf_pool_get(page_id);
 
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+	ut_ad(!buf_flush_list_mutex_own(buf_pool));
 
 	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN
 	    || srv_flush_neighbors == 0) {
@@ -1276,6 +1292,8 @@
 	for (ulint i = low; i < high; i++) {
 
 		buf_page_t*	bpage;
+		rw_lock_t*	hash_lock;
+		ib_mutex_t*	block_mutex;
 
 		if ((count + n_flushed) >= n_to_flush) {
 
@@ -1296,17 +1314,21 @@
 
 		buf_pool = buf_pool_get(cur_page_id);
 
-		buf_pool_mutex_enter(buf_pool);
-
 		/* We only want to flush pages from this buffer pool. */
-		bpage = buf_page_hash_get(buf_pool, cur_page_id);
+		bpage = buf_page_hash_get_s_locked(buf_pool, cur_page_id,
+						   &hash_lock);
 
 		if (bpage == NULL) {
 
-			buf_pool_mutex_exit(buf_pool);
 			continue;
 		}
 
+		block_mutex = buf_page_get_mutex(bpage);
+
+		mutex_enter(block_mutex);
+
+		rw_lock_s_unlock(hash_lock);
+
 		ut_a(buf_page_in_file(bpage));
 
 		/* We avoid flushing 'non-old' blocks in an LRU flush,
@@ -1316,10 +1338,6 @@
 		    || i == page_id.page_no()
 		    || buf_page_is_old(bpage)) {
 
-			BPageMutex* block_mutex = buf_page_get_mutex(bpage);
-
-			mutex_enter(block_mutex);
-
 			if (buf_flush_ready_for_flush(bpage, flush_type)
 			    && (i == page_id.page_no()
 				|| bpage->buf_fix_count == 0)) {
@@ -1332,16 +1350,15 @@
 
 					++count;
 				} else {
+
 					mutex_exit(block_mutex);
-					buf_pool_mutex_exit(buf_pool);
 				}
 
 				continue;
-			} else {
-				mutex_exit(block_mutex);
 			}
 		}
-		buf_pool_mutex_exit(buf_pool);
+
+		mutex_exit(block_mutex);
 	}
 
 	if (count > 1) {
@@ -1357,10 +1374,11 @@
 
 /********************************************************************//**
 Check if the block is modified and ready for flushing. If the the block
-is ready to flush then flush the page and try o flush its neighbors.
+is ready to flush then flush the page and try o flush its neighbors. The caller
+must hold the buffer pool list mutex corresponding to the type of flush.
 
-@return TRUE if buf_pool mutex was released during this function.
-This does not guarantee that some pages were written as well.
+@return	TRUE if the list mutex was released during this function.  This does
+not guarantee that some pages were written as well.
 Number of pages written are incremented to the count. */
 static
 bool
@@ -1378,41 +1396,69 @@
 {
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-
-	ut_ad(buf_pool_mutex_own(buf_pool));
 #endif /* UNIV_DEBUG */
 
 	bool		flushed;
-	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
-
-	mutex_enter(block_mutex);
-
-	ut_a(buf_page_in_file(bpage));
+	BPageMutex*	block_mutex = NULL;
+
+	ut_ad(flush_type != BUF_FLUSH_SINGLE_PAGE);
+
+	ut_ad((flush_type == BUF_FLUSH_LRU
+	       && mutex_own(&buf_pool->LRU_list_mutex))
+	      || (flush_type == BUF_FLUSH_LIST
+		  && buf_flush_list_mutex_own(buf_pool)));
+
+	if (flush_type == BUF_FLUSH_LRU) {
+		block_mutex = buf_page_get_mutex(bpage);
+		mutex_enter(block_mutex);
+	}
+
+	ut_a(buf_page_in_file(bpage)
+	     || (buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH
+#ifdef UNIV_DEBUG
+		 && !mutex_own(&buf_pool->LRU_list_mutex)
+#endif
+		 ));
 
 	if (buf_flush_ready_for_flush(bpage, flush_type)) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_bpage(bpage);
 
+		if (flush_type == BUF_FLUSH_LRU) {
+			mutex_exit(&buf_pool->LRU_list_mutex);
+		}
+
 		const page_id_t	page_id = bpage->id;
 
-		mutex_exit(block_mutex);
-
-		buf_pool_mutex_exit(buf_pool);
+		if (flush_type == BUF_FLUSH_LRU) {
+			mutex_exit(block_mutex);
+		} else {
+			buf_flush_list_mutex_exit(buf_pool);
+		}
 
 		/* Try to flush also all the neighbors */
 		*count += buf_flush_try_neighbors(
 			page_id, flush_type, *count, n_to_flush);
 
-		buf_pool_mutex_enter(buf_pool);
-		flushed = TRUE;
+		if (flush_type == BUF_FLUSH_LRU) {
+			mutex_enter(&buf_pool->LRU_list_mutex);
+		} else {
+			buf_flush_list_mutex_enter(buf_pool);
+		}
+		flushed = true;
+	} else if (flush_type == BUF_FLUSH_LRU) {
+		mutex_exit(block_mutex);
+
+		flushed = false;
 	} else {
-		mutex_exit(block_mutex);
-
 		flushed = false;
 	}
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad((flush_type == BUF_FLUSH_LRU
+	       && mutex_own(&buf_pool->LRU_list_mutex))
+	      || (flush_type == BUF_FLUSH_LIST
+		  && buf_flush_list_mutex_own(buf_pool)));
 
 	return(flushed);
 }
@@ -1424,7 +1470,7 @@
 tail of the unzip_LRU and puts those freed frames in the free list.
 Note that it is a best effort attempt and it is not guaranteed that
 after a call to this function there will be 'max' blocks in the free
-list.
+list. The caller must hold the LRU list mutex.
 @return number of blocks moved to the free list. */
 static
 ulint
@@ -1439,7 +1485,7 @@
 	ulint		free_len = UT_LIST_GET_LEN(buf_pool->free);
 	ulint		lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	buf_block_t*	block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
 
@@ -1448,15 +1494,22 @@
 	       && free_len < srv_LRU_scan_depth
 	       && lru_len > UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
 
+		BPageMutex*	block_mutex = buf_page_get_mutex(&block->page);
+
 		++scanned;
+
+		mutex_enter(block_mutex);
+
 		if (buf_LRU_free_page(&block->page, false)) {
-			/* Block was freed. buf_pool->mutex potentially
-			released and reacquired */
+
+			/* Block was freed, all mutexes released */
 			++count;
+			mutex_enter(&buf_pool->LRU_list_mutex);
 			block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
 
 		} else {
 
+			mutex_exit(block_mutex);
 			block = UT_LIST_GET_PREV(unzip_LRU, block);
 		}
 
@@ -1464,7 +1517,7 @@
 		lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
 	}
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	if (count) {
 		MONITOR_INC_VALUE_CUMULATIVE(
@@ -1508,15 +1561,11 @@
 	ulint		count = 0;
 	ulint		free_len = UT_LIST_GET_LEN(buf_pool->free);
 	ulint		lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
-	ulint		withdraw_depth = 0;
-
-	ut_ad(buf_pool_mutex_own(buf_pool));
-
-	if (buf_pool->curr_size < buf_pool->old_size
-	    && buf_pool->withdraw_target > 0) {
-		withdraw_depth = buf_pool->withdraw_target
-				 - UT_LIST_GET_LEN(buf_pool->withdraw);
-	}
+	ulint		withdraw_depth;
+
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+	withdraw_depth = buf_get_withdraw_depth(buf_pool);
 
 	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 	     bpage != NULL && count + evict_count < max
@@ -1530,31 +1579,37 @@
 
 		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
-		mutex_enter(block_mutex);
+		ulint failed_acquire = mutex_enter_nowait(block_mutex);
 
-		if (buf_flush_ready_for_replace(bpage)) {
+		if (!failed_acquire && buf_flush_ready_for_replace(bpage)) {
 			/* block is ready for eviction i.e., it is
 			clean and is not IO-fixed or buffer fixed. */
-			mutex_exit(block_mutex);
 			if (buf_LRU_free_page(bpage, true)) {
 				++evict_count;
+				mutex_enter(&buf_pool->LRU_list_mutex);
+			} else {
+				mutex_exit(block_mutex);
 			}
-		} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_LRU)) {
+		} else if (!failed_acquire
+			   && buf_flush_ready_for_flush(bpage,
+							BUF_FLUSH_LRU)) {
 			/* Block is ready for flush. Dispatch an IO
 			request. The IO helper thread will put it on
 			free list in IO completion routine. */
 			mutex_exit(block_mutex);
 			buf_flush_page_and_try_neighbors(
 				bpage, BUF_FLUSH_LRU, max, &count);
+		} else if (failed_acquire) {
+			ut_ad(buf_pool->lru_hp.is_hp(prev));
 		} else {
 			/* Can't evict or dispatch this block. Go to
 			previous. */
+			mutex_exit(block_mutex);
 			ut_ad(buf_pool->lru_hp.is_hp(prev));
-			mutex_exit(block_mutex);
 		}
 
 		ut_ad(!mutex_own(block_mutex));
-		ut_ad(buf_pool_mutex_own(buf_pool));
+		ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 		free_len = UT_LIST_GET_LEN(buf_pool->free);
 		lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
@@ -1567,7 +1622,7 @@
 	should be flushed, we factor in this value. */
 	buf_lru_flush_page_count += count;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	if (evict_count) {
 		MONITOR_INC_VALUE_CUMULATIVE(
@@ -1606,6 +1661,8 @@
 	ulint			count = 0;
 	std::pair<ulint, ulint> res;
 
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
 	if (buf_LRU_evict_from_unzip_LRU(buf_pool)) {
 		count = buf_free_from_unzip_LRU_list_batch(buf_pool, max);
 	}
@@ -1645,7 +1702,6 @@
 	ulint		count = 0;
 	ulint		scanned = 0;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	/* Start from the end of the list looking for a suitable
 	block to be flushed. */
@@ -1670,7 +1726,6 @@
 
 		prev = UT_LIST_GET_PREV(list, bpage);
 		buf_pool->flush_hp.set(prev);
-		buf_flush_list_mutex_exit(buf_pool);
 
 #ifdef UNIV_DEBUG
 		bool flushed =
@@ -1678,8 +1733,6 @@
 		buf_flush_page_and_try_neighbors(
 			bpage, BUF_FLUSH_LIST, min_n, &count);
 
-		buf_flush_list_mutex_enter(buf_pool);
-
 		ut_ad(flushed || buf_pool->flush_hp.is_hp(prev));
 
 		--len;
@@ -1704,8 +1757,6 @@
 			count);
 	}
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
-
 	return(count);
 }
 
@@ -1751,13 +1802,13 @@
 		      || !sync_check_iterate(check));
 	}
 
-	buf_pool_mutex_enter(buf_pool);
-
-	/* Note: The buffer pool mutex is released and reacquired within
+	/* Note: The buffer pool mutexes are released and reacquired within
 	the flush functions. */
 	switch (flush_type) {
 	case BUF_FLUSH_LRU:
+		mutex_enter(&buf_pool->LRU_list_mutex);
 		res = buf_do_LRU_batch(buf_pool, min_n);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 		break;
 	case BUF_FLUSH_LIST:
 		res.first = buf_do_flush_list_batch(buf_pool, min_n, lsn_limit);
@@ -1767,8 +1818,6 @@
 		ut_error;
 	}
 
-	buf_pool_mutex_exit(buf_pool);
-
 	DBUG_PRINT("ib_buf",
 		   ("flush %u completed, flushed %u pages, evicted %u pages",
 		   unsigned(flush_type), unsigned(res.first),
@@ -1809,14 +1858,14 @@
 {
 	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->flush_state_mutex);
 
 	if (buf_pool->n_flush[flush_type] > 0
 	   || buf_pool->init_flush[flush_type] == TRUE) {
 
 		/* There is already a flush batch of the same type running */
 
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->flush_state_mutex);
 
 		return(FALSE);
 	}
@@ -1825,7 +1874,7 @@
 
 	os_event_reset(buf_pool->no_flush[flush_type]);
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->flush_state_mutex);
 
 	return(TRUE);
 }
@@ -1842,7 +1891,7 @@
 	ulint		flushed_page_count)/*!< in: flushed (not evicted!)
                                         page count */
 {
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->flush_state_mutex);
 
 	buf_pool->init_flush[flush_type] = FALSE;
 
@@ -1855,7 +1904,7 @@
 		os_event_set(buf_pool->no_flush[flush_type]);
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->flush_state_mutex);
 
 	if (!srv_read_only_mode && flushed_page_count) {
 		buf_dblwr_flush_buffered_writes();
@@ -2039,14 +2088,14 @@
 	buf_page_t*	bpage;
 	ibool		freed;
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	for (bpage = buf_pool->single_scan_itr.start(), scanned = 0,
 	     freed = false;
 	     bpage != NULL;
 	     ++scanned, bpage = buf_pool->single_scan_itr.get()) {
 
-		ut_ad(buf_pool_mutex_own(buf_pool));
+		ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 		buf_page_t*	prev = UT_LIST_GET_PREV(LRU, bpage);
 
@@ -2061,12 +2110,12 @@
 		if (buf_flush_ready_for_replace(bpage)) {
 			/* block is ready for eviction i.e., it is
 			clean and is not IO-fixed or buffer fixed. */
-			mutex_exit(block_mutex);
 
 			if (buf_LRU_free_page(bpage, true)) {
-				buf_pool_mutex_exit(buf_pool);
 				freed = true;
 				break;
+			} else {
+				mutex_exit(block_mutex);
 			}
 
 		} else if (buf_flush_ready_for_flush(
@@ -2098,7 +2147,7 @@
 	if (!freed) {
 		/* Can't find a single flushable page. */
 		ut_ad(!bpage);
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 	}
 
 	if (scanned) {
@@ -2109,7 +2158,7 @@
 			scanned);
 	}
 
-	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
 
 	return(freed);
 }
@@ -2134,16 +2183,8 @@
 
 	/* srv_LRU_scan_depth can be arbitrarily large value.
 	We cap it with current LRU size. */
-	buf_pool_mutex_enter(buf_pool);
 	scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
-	if (buf_pool->curr_size < buf_pool->old_size
-	    && buf_pool->withdraw_target > 0) {
-		withdraw_depth = buf_pool->withdraw_target
-				 - UT_LIST_GET_LEN(buf_pool->withdraw);
-	} else {
-		withdraw_depth = 0;
-	}
-	buf_pool_mutex_exit(buf_pool);
+	withdraw_depth = buf_get_withdraw_depth(buf_pool);
 
 	if (withdraw_depth > srv_LRU_scan_depth) {
 		scan_depth = ut_min(withdraw_depth, scan_depth);
@@ -2174,15 +2215,15 @@
 
 		buf_pool = buf_pool_from_array(i);
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->flush_state_mutex);
 
 		if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
 		   || buf_pool->init_flush[BUF_FLUSH_LRU]) {
 
-			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&buf_pool->flush_state_mutex);
 			buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
 		} else {
-			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&buf_pool->flush_state_mutex);
 		}
 	}
 }
@@ -3087,7 +3128,6 @@
 {
 	ulint		count = 0;
 
-	buf_pool_mutex_enter(buf_pool);
 	buf_flush_list_mutex_enter(buf_pool);
 
 	buf_page_t*	bpage;
@@ -3096,7 +3136,8 @@
 	     bpage != 0;
 	     bpage = UT_LIST_GET_NEXT(list, bpage)) {
 
-		ut_ad(buf_page_in_file(bpage));
+		ut_ad(buf_page_in_file(bpage)
+		      || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
 		ut_ad(bpage->in_flush_list);
 		ut_ad(bpage->oldest_modification > 0);
 
@@ -3106,7 +3147,6 @@
 	}
 
 	buf_flush_list_mutex_exit(buf_pool);
-	buf_pool_mutex_exit(buf_pool);
 
 	return(count);
 }

=== modified file 'storage/innobase/buf/buf0lru.cc'
--- storage/innobase/buf/buf0lru.cc	2014-11-04 14:08:41 +0000
+++ storage/innobase/buf/buf0lru.cc	2015-01-16 19:30:41 +0000
@@ -71,7 +71,7 @@
 /** When dropping the search hash index entries before deleting an ibd
 file, we build a local array of pages belonging to that tablespace
 in the buffer pool. Following is the size of that array.
-We also release buf_pool->mutex after scanning this many pages of the
+We also release buf_pool->LRU_list_mutex after scanning this many pages of the
 flush_list when dropping a table. This is to ensure that other threads
 are not blocked for extended period of time when using very large
 buffer pools. */
@@ -133,7 +133,7 @@
 If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
 the object will be freed.
 
-The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex
+The caller must hold buf_pool->LRU_list_mutex, the buf_page_get_mutex() mutex
 and the appropriate hash_lock. This function will release the
 buf_page_get_mutex() and the hash_lock.
 
@@ -169,7 +169,7 @@
 	buf_page_t*	bpage,		/*!< in: control block */
 	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	buf_pool->stat.LRU_bytes += bpage->size.physical();
 
@@ -186,7 +186,7 @@
 /*=========================*/
 	buf_pool_t*	buf_pool)
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	/* If the unzip_LRU list is empty, we can only use the LRU. */
 	if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
@@ -270,7 +270,7 @@
 
 	ulint	num_entries = 0;
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 scan_again:
 	for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
@@ -315,18 +315,18 @@
 			goto next_page;
 		}
 
-		/* Array full. We release the buf_pool->mutex to obey
+		/* Array full. We release the LRU list mutex to obey
 		the latching order. */
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 
 		buf_LRU_drop_page_hash_batch(
 			id, page_size, page_arr, num_entries);
 
 		num_entries = 0;
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 
-		/* Note that we released the buf_pool mutex above
+		/* Note that we released the buf_pool->LRU_list_mutex above
 		after reading the prev_bpage during processing of a
 		page_hash_batch (i.e.: when the array was full).
 		Because prev_bpage could belong to a compressed-only
@@ -340,8 +340,7 @@
 		guarantee that ALL such entries will be dropped. */
 
 		/* If, however, bpage has been removed from LRU list
-		to the free list then we should restart the scan.
-		bpage->state is protected by buf_pool mutex. */
+		to the free list then we should restart the scan. */
 		if (bpage != NULL
 		    && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
 
@@ -349,7 +348,7 @@
 		}
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	/* Drop any remaining batch of search hashed pages. */
 	buf_LRU_drop_page_hash_batch(id, page_size, page_arr, num_entries);
@@ -358,8 +357,8 @@
 
 /******************************************************************//**
 While flushing (or removing dirty) pages from a tablespace we don't
-want to hog the CPU and resources. Release the buffer pool and block
-mutex and try to force a context switch. Then reacquire the same mutexes.
+want to hog the CPU and resources. Release the LRU list and block
+mutexes and try to force a context switch. Then reacquire the same mutexes.
 The current page is "fixed" before the release of the mutexes and then
 "unfixed" again once we have reacquired the mutexes. */
 static
@@ -369,40 +368,37 @@
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_page_t*	bpage)		/*!< in/out: current page */
 {
-	BPageMutex*	block_mutex;
+	BPageMutex*	block_mutex	= buf_page_get_mutex(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+	ut_ad(mutex_own(block_mutex));
 	ut_ad(buf_page_in_file(bpage));
 
-	block_mutex = buf_page_get_mutex(bpage);
-
-	mutex_enter(block_mutex);
-
 	/* "Fix" the block so that the position cannot be
 	changed after we release the buffer pool and
 	block mutexes. */
 	buf_page_set_sticky(bpage);
 
-	/* Now it is safe to release the buf_pool->mutex. */
-	buf_pool_mutex_exit(buf_pool);
+	/* Now it is safe to release the LRU list mutex. */
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	mutex_exit(block_mutex);
 	/* Try and force a context switch. */
 	os_thread_yield();
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	mutex_enter(block_mutex);
 
 	/* "Unfix" the block now that we have both the
-	buffer pool and block mutex again. */
+	LRU list and block mutexes again. */
 	buf_page_unset_sticky(bpage);
 	mutex_exit(block_mutex);
 }
 
 /******************************************************************//**
-If we have hogged the resources for too long then release the buffer
-pool and flush list mutex and do a thread yield. Set the current page
+If we have hogged the resources for too long then release the LRU list and
+flush list mutexes and do a thread yield. Set the current page
 to "sticky" so that it is not relocated during the yield.
 @return true if yielded */
 static	__attribute__((warn_unused_result))
@@ -411,21 +407,47 @@
 /*================*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_page_t*	bpage,		/*!< in/out: bpage to remove */
-	ulint		processed)	/*!< in: number of pages processed */
+	ulint		processed,	/*!< in: number of pages processed */
+	bool*		must_restart)	/*!< in/out: if true, we have to
+					restart the flush list scan */
 {
 	/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
-	loop we release buf_pool->mutex to let other threads
+	loop we release buf_pool->LRU_list_mutex to let other threads
 	do their job but only if the block is not IO fixed. This
 	ensures that the block stays in its position in the
 	flush_list. */
 
 	if (bpage != NULL
 	    && processed >= BUF_LRU_DROP_SEARCH_SIZE
-	    && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+	    && buf_page_get_io_fix_unlocked(bpage) == BUF_IO_NONE) {
+
+		BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 		buf_flush_list_mutex_exit(buf_pool);
 
-		/* Release the buffer pool and block mutex
+		/* We don't have to worry about bpage becoming a dangling
+		pointer by a compressed page flush list relocation because
+		buf_page_get_gen() won't be called for pages from this
+		tablespace.  */
+
+		mutex_enter(block_mutex);
+		/* Recheck the I/O fix and the flush list presence now that we
+		hold the right mutex */
+		if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE
+				  || bpage->oldest_modification == 0)) {
+
+			mutex_exit(block_mutex);
+
+			*must_restart = true;
+
+			buf_flush_list_mutex_enter(buf_pool);
+
+			return false;
+		}
+
+		*must_restart = false;
+
+		/* Release the LRU list and buf_page_get_mutex() mutex
 		to give the other threads a go. */
 
 		buf_flush_yield(buf_pool, bpage);
@@ -454,18 +476,20 @@
 /*=====================*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_page_t*	bpage,		/*!< in/out: bpage to remove */
-	bool		flush)		/*!< in: flush to disk if true but
+	bool		flush,		/*!< in: flush to disk if true but
 					don't remove else remove without
 					flushing to disk */
+	bool*		must_restart)	/*!< in/out: if true, must restart the
+					flush list scan */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(buf_flush_list_mutex_own(buf_pool));
 
-	/* bpage->space and bpage->io_fix are protected by
-	buf_pool->mutex and block_mutex. It is safe to check
-	them while holding buf_pool->mutex only. */
+	/* It is safe to check bpage->space and bpage->io_fix while holding
+	buf_pool->LRU_list_mutex only. */
 
-	if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+	if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage)
+			  != BUF_IO_NONE)) {
 
 		/* We cannot remove this page during this scan
 		yet; maybe the system is currently reading it
@@ -474,24 +498,33 @@
 
 	}
 
-	BPageMutex*	block_mutex;
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 	bool		processed = false;
 
-	block_mutex = buf_page_get_mutex(bpage);
-
-	/* We have to release the flush_list_mutex to obey the
-	latching order. We are however guaranteed that the page
-	will stay in the flush_list and won't be relocated because
-	buf_flush_remove() and buf_flush_relocate_on_flush_list()
-	need buf_pool->mutex as well. */
+	/* We don't have to worry about bpage becoming a dangling
+	pointer by a compressed page flush list relocation because
+	buf_page_get_gen() won't be called for pages from this
+	tablespace.  */
 
 	buf_flush_list_mutex_exit(buf_pool);
 
 	mutex_enter(block_mutex);
 
-	ut_ad(bpage->oldest_modification != 0);
-
-	if (!flush) {
+	/* Recheck the page I/O fix and the flush list presence now
+	that we hold the right mutex. */
+	if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE
+			  || bpage->oldest_modification == 0)) {
+
+		/* The page became I/O-fixed or is not on the flush
+		list anymore, this invalidates any flush-list-page
+		pointers we have. */
+
+		mutex_exit(block_mutex);
+
+		*must_restart = true;
+		processed = false;
+
+	} else if (!flush) {
 
 		buf_flush_remove(bpage);
 
@@ -501,8 +534,8 @@
 
 	} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
 
-		/* The following call will release the buffer pool
-		and block mutex. */
+		/* The following call will release the LRU list
+		and block mutexes. */
 		processed = buf_flush_page(
 			buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false);
 
@@ -510,7 +543,7 @@
 			/* Wake possible simulated aio thread to actually
 			post the writes to the operating system */
 			os_aio_simulated_wake_handler_threads();
-			buf_pool_mutex_enter(buf_pool);
+			mutex_enter(&buf_pool->LRU_list_mutex);
 		} else {
 			mutex_exit(block_mutex);
 		}
@@ -521,7 +554,7 @@
 	buf_flush_list_mutex_enter(buf_pool);
 
 	ut_ad(!mutex_own(block_mutex));
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	return(processed);
 }
@@ -551,9 +584,12 @@
 	buf_page_t*	bpage;
 	ulint		processed = 0;
 
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
 	buf_flush_list_mutex_enter(buf_pool);
 
 rescan:
+	bool	must_restart = false;
 	bool	all_freed = true;
 
 	for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
@@ -572,15 +608,16 @@
 			/* Skip this block, as it does not belong to
 			the target space. */
 
-		} else if (!buf_flush_or_remove_page(buf_pool, bpage, flush)) {
+		} else if (!buf_flush_or_remove_page(buf_pool, bpage, flush,
+						     &must_restart)) {
 
 			/* Remove was unsuccessful, we have to try again
 			by scanning the entire list from the end.
 			This also means that we never released the
-			buf_pool mutex. Therefore we can trust the prev
+			flust list mutex. Therefore we can trust the prev
 			pointer.
 			buf_flush_or_remove_page() released the
-			flush list mutex but not the buf_pool mutex.
+			flush list mutex but not the LRU list mutex.
 			Therefore it is possible that a new page was
 			added to the flush list. For example, in case
 			where we are at the head of the flush list and
@@ -598,17 +635,23 @@
 		} else if (flush) {
 
 			/* The processing was successful. And during the
-			processing we have released the buf_pool mutex
+			processing we have released all the buf_pool mutexes
 			when calling buf_page_flush(). We cannot trust
 			prev pointer. */
 			goto rescan;
+		} else if (UNIV_UNLIKELY(must_restart)) {
+
+			ut_ad(!all_freed);
+			break;
 		}
 
 		++processed;
 
 		/* Yield if we have hogged the CPU and mutexes for too long. */
-		if (buf_flush_try_yield(buf_pool, prev, processed)) {
+		if (buf_flush_try_yield(buf_pool, prev, processed,
+					&must_restart)) {
 
+			ut_ad(!must_restart);
 			/* Reset the batch size counter if we had to yield. */
 
 			processed = 0;
@@ -654,11 +697,13 @@
 	dberr_t		err;
 
 	do {
-		buf_pool_mutex_enter(buf_pool);
+		/* TODO: it should be possible to avoid locking the LRU list
+		mutex here. */
+		mutex_enter(&buf_pool->LRU_list_mutex);
 
 		err = buf_flush_or_remove_pages(buf_pool, id, flush, trx);
 
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 
 		ut_ad(buf_flush_validate(buf_pool));
 
@@ -691,7 +736,7 @@
 	ibool		all_freed;
 
 scan_again:
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	all_freed = TRUE;
 
@@ -708,15 +753,16 @@
 
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
-		/* bpage->id.space() and bpage->io_fix are protected by
-		buf_pool->mutex and the block_mutex. It is safe to check
-		them while holding buf_pool->mutex only. */
+		/* It is safe to check bpage->id.space() and bpage->io_fix
+		while holding buf_pool->LRU_list_mutex only and later recheck
+		while holding the buf_page_get_mutex() mutex.  */
 
 		if (bpage->id.space() != id) {
 			/* Skip this block, as it does not belong to
 			the space that is being invalidated. */
 			goto next_page;
-		} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+		} else if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage)
+					 != BUF_IO_NONE)) {
 			/* We cannot remove this page during this scan
 			yet; maybe the system is currently reading it
 			in, or flushing the modifications to the file */
@@ -732,7 +778,11 @@
 
 			mutex_enter(block_mutex);
 
-			if (bpage->buf_fix_count > 0) {
+			if (UNIV_UNLIKELY(
+				    bpage->id.space() != id
+				    || bpage->buf_fix_count > 0
+				    || (buf_page_get_io_fix(bpage)
+					!= BUF_IO_NONE))) {
 
 				mutex_exit(block_mutex);
 
@@ -761,7 +811,7 @@
 			/* Do nothing, because the adaptive hash index
 			covers uncompressed pages only. */
 		} else if (((buf_block_t*) bpage)->index) {
-			buf_pool_mutex_exit(buf_pool);
+			mutex_exit(&buf_pool->LRU_list_mutex);
 
 			rw_lock_x_unlock(hash_lock);
 
@@ -802,7 +852,7 @@
 		bpage = prev_bpage;
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	if (!all_freed) {
 		os_thread_sleep(20000);
@@ -905,7 +955,8 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+	ut_ad(mutex_own(&buf_pool->zip_mutex));
 	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
 
 	/* Find the first successor of bpage in the LRU list
@@ -942,7 +993,7 @@
 					if true, otherwise scan only
 					srv_LRU_scan_depth / 2 blocks. */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) {
 		return(false);
@@ -961,12 +1012,17 @@
 
 		prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
 
+		mutex_enter(&block->mutex);
+
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 		ut_ad(block->in_unzip_LRU_list);
 		ut_ad(block->page.in_LRU_list);
 
 		freed = buf_LRU_free_page(&block->page, false);
 
+		if (!freed)
+			mutex_exit(&block->mutex);
+
 		block = prev_block;
 	}
 
@@ -993,7 +1049,7 @@
 					if true, otherwise scan only
 					up to BUF_LRU_SEARCH_SCAN_THRESHOLD */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	ulint		scanned = 0;
 	bool		freed = false;
@@ -1009,19 +1065,20 @@
 
 		buf_pool->lru_scan_itr.set(prev);
 
+		ut_ad(buf_page_in_file(bpage));
+		ut_ad(bpage->in_LRU_list);
+
+		unsigned	accessed = buf_page_is_accessed(bpage);
+
 		mutex_enter(mutex);
 
-		ut_ad(buf_page_in_file(bpage));
-		ut_ad(bpage->in_LRU_list);
-
-		unsigned	accessed = buf_page_is_accessed(bpage);
-
 		if (buf_flush_ready_for_replace(bpage)) {
-			mutex_exit(mutex);
+
 			freed = buf_LRU_free_page(bpage, true);
-		} else {
+		}
+
+		if (!freed)
 			mutex_exit(mutex);
-		}
 
 		if (freed && !accessed) {
 			/* Keep track of pages that are evicted without
@@ -1030,8 +1087,10 @@
 			++buf_pool->stat.n_ra_pages_evicted;
 		}
 
-		ut_ad(buf_pool_mutex_own(buf_pool));
 		ut_ad(!mutex_own(mutex));
+
+		if (freed)
+			break;
 	}
 
 	if (scanned) {
@@ -1058,10 +1117,24 @@
 					BUF_LRU_SEARCH_SCAN_THRESHOLD
 					blocks. */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
-
-	return(buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all)
-	       || buf_LRU_free_from_common_LRU_list(buf_pool, scan_all));
+	bool	freed = false;
+	bool	use_unzip_list = UT_LIST_GET_LEN(buf_pool->unzip_LRU) > 0;
+
+	mutex_enter(&buf_pool->LRU_list_mutex);
+
+	if (use_unzip_list) {
+		freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all);
+	}
+
+	if (!freed) {
+		freed = buf_LRU_free_from_common_LRU_list(buf_pool, scan_all);
+	}
+
+	if (!freed) {
+		mutex_exit(&buf_pool->LRU_list_mutex);
+	}
+
+	return(freed);
 }
 
 /******************************************************************//**
@@ -1106,7 +1179,7 @@
 {
 	buf_block_t*	block;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	mutex_enter(&buf_pool->free_list_mutex);
 
 	block = reinterpret_cast<buf_block_t*>(
 		UT_LIST_GET_FIRST(buf_pool->free));
@@ -1119,33 +1192,34 @@
 		ut_ad(!block->page.in_LRU_list);
 		ut_a(!buf_page_in_file(&block->page));
 		UT_LIST_REMOVE(buf_pool->free, &block->page);
+		mutex_exit(&buf_pool->free_list_mutex);
 
-		if (buf_pool->curr_size >= buf_pool->old_size
-		    || UT_LIST_GET_LEN(buf_pool->withdraw)
-			>= buf_pool->withdraw_target
+		if (!buf_get_withdraw_depth(buf_pool)
 		    || !buf_block_will_withdrawn(buf_pool, block)) {
 			/* found valid free block */
-			buf_page_mutex_enter(block);
-
 			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+
 			UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
 			ut_ad(buf_pool_from_block(block) == buf_pool);
 
-			buf_page_mutex_exit(block);
-			break;
+			return(block);
 		}
 
 		/* This should be withdrawn */
+		mutex_enter(&buf_pool->free_list_mutex);
 		UT_LIST_ADD_LAST(
 			buf_pool->withdraw,
 			&block->page);
 		ut_d(block->in_withdraw_list = TRUE);
+		fprintf(stderr, "2 withdrawing block at %p\n", block);
 
 		block = reinterpret_cast<buf_block_t*>(
 			UT_LIST_GET_FIRST(buf_pool->free));
 	}
 
+	mutex_exit(&buf_pool->free_list_mutex);
+
 	return(block);
 }
 
@@ -1160,8 +1234,6 @@
 /*===================================*/
 	const buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
-
 	if (!recv_recovery_is_on()
 	    && buf_pool->curr_size == buf_pool->old_size
 	    && UT_LIST_GET_LEN(buf_pool->free)
@@ -1255,10 +1327,10 @@
 	bool		mon_value_was	= false;
 	bool		started_monitor	= false;
 
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+
 	MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
 loop:
-	buf_pool_mutex_enter(buf_pool);
-
 	buf_LRU_check_size_of_non_data_objects(buf_pool);
 
 	/* If there is a block in the free list, take it */
@@ -1266,7 +1338,6 @@
 
 	if (block != NULL) {
 
-		buf_pool_mutex_exit(buf_pool);
 		ut_ad(buf_pool_from_block(block) == buf_pool);
 		memset(&block->page.zip, 0, sizeof block->page.zip);
 
@@ -1279,6 +1350,7 @@
 	}
 
 	freed = false;
+	os_rmb;
 	if (buf_pool->try_LRU_scan || n_iterations > 0) {
 		/* If no block was in the free list, search from the
 		end of the LRU list and try to free a block there.
@@ -1294,11 +1366,10 @@
 			TRUE again when we flush a batch from this
 			buffer pool. */
 			buf_pool->try_LRU_scan = FALSE;
+			os_wmb;
 		}
 	}
 
-	buf_pool_mutex_exit(buf_pool);
-
 	if (freed) {
 		goto loop;
 	}
@@ -1380,7 +1451,7 @@
 	ulint	new_len;
 
 	ut_a(buf_pool->LRU_old);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
 	ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
@@ -1444,7 +1515,7 @@
 /*=============*/
 	buf_pool_t*	buf_pool)
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
 
 	/* We first initialize all blocks in the LRU list as old and then use
@@ -1480,7 +1551,7 @@
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
 	ut_ad(buf_page_in_file(bpage));
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	if (buf_page_belongs_to_unzip_LRU(bpage)) {
 		buf_block_t*	block = reinterpret_cast<buf_block_t*>(bpage);
@@ -1516,7 +1587,7 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	ut_a(buf_page_in_file(bpage));
 
@@ -1599,7 +1670,7 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
 
@@ -1625,7 +1696,7 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	ut_a(buf_page_in_file(bpage));
 
@@ -1678,7 +1749,7 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	ut_a(buf_page_in_file(bpage));
 	ut_ad(!bpage->in_LRU_list);
@@ -1762,7 +1833,7 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
 	if (bpage->old) {
 		buf_pool->stat.n_pages_made_young++;
@@ -1788,12 +1859,13 @@
 Try to free a block.  If bpage is a descriptor of a compressed-only
 page, the descriptor object will be freed as well.
 
-NOTE: If this function returns true, it will temporarily
-release buf_pool->mutex.  Furthermore, the page frame will no longer be
-accessible via bpage.
-
-The caller must hold buf_pool->mutex and must not hold any
-buf_page_get_mutex() when calling this function.
+NOTE: If this function returns true, it will release the LRU list mutex,
+and release the buf_page_get_mutex() mutex. Furthermore, the page frame will no
+longer be accessible via bpage.  If this function returns false,
+the buf_page_get_mutex() might be temporarily released and relocked too.
+
+The caller must hold the LRU list and buf_page_get_mutex() mutexes.
+
 @return true if freed, false otherwise. */
 
 bool
@@ -1810,17 +1882,15 @@
 
 	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+	ut_ad(mutex_own(block_mutex));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->in_LRU_list);
 
-	rw_lock_x_lock(hash_lock);
-	mutex_enter(block_mutex);
-
 	if (!buf_page_can_relocate(bpage)) {
 
 		/* Do not free buffer fixed and I/O-fixed blocks. */
-		goto func_exit;
+		return(false);
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
@@ -1832,25 +1902,20 @@
 		/* Do not completely free dirty blocks. */
 
 		if (bpage->oldest_modification) {
-			goto func_exit;
+			return(false);
 		}
 	} else if (bpage->oldest_modification > 0
 		   && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
 
 		ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
 
-func_exit:
-		rw_lock_x_unlock(hash_lock);
-		mutex_exit(block_mutex);
 		return(false);
 
 	} else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
 		b = buf_page_alloc_descriptor();
 		ut_a(b);
-		memcpy(b, bpage, sizeof *b);
 	}
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->in_LRU_list);
 	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
@@ -1858,12 +1923,43 @@
 	DBUG_PRINT("ib_buf", ("free page " UINT32PF ":" UINT32PF,
 			      bpage->id.space(), bpage->id.page_no()));
 
-#ifdef UNIV_SYNC_DEBUG
-        ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
-#endif /* UNIV_SYNC_DEBUG */
-	ut_ad(buf_page_can_relocate(bpage));
+	mutex_exit(block_mutex);
+
+	rw_lock_x_lock(hash_lock);
+	mutex_enter(block_mutex);
+
+	if (UNIV_UNLIKELY(!buf_page_can_relocate(bpage)
+			  || ((zip || !bpage->zip.data)
+			      && bpage->oldest_modification))) {
+
+not_freed:
+		rw_lock_x_unlock(hash_lock);
+		if (b) {
+			buf_page_free_descriptor(b);
+		}
+
+		return(false);
+	} else if (UNIV_UNLIKELY(bpage->oldest_modification
+				 && (buf_page_get_state(bpage)
+				     != BUF_BLOCK_FILE_PAGE))) {
+
+		ut_ad(buf_page_get_state(bpage)
+		      == BUF_BLOCK_ZIP_DIRTY);
+		goto not_freed;
+	}
+
+	if (b) {
+		memcpy(b, bpage, sizeof *b);
+	}
 
 	if (!buf_LRU_block_remove_hashed(bpage, zip)) {
+
+		mutex_exit(&buf_pool->LRU_list_mutex);
+
+		if (b) {
+			buf_page_free_descriptor(b);
+		}
+
 		return(true);
 	}
 
@@ -1961,6 +2057,8 @@
 			buf_LRU_add_block_low(b, buf_page_is_old(b));
 		}
 
+		mutex_enter(&buf_pool->zip_mutex);
+		rw_lock_x_unlock(hash_lock);
 		if (b->state == BUF_BLOCK_ZIP_PAGE) {
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 			buf_LRU_insert_zip_clean(b);
@@ -1978,40 +2076,15 @@
 						  bpage->size.logical(),
 						  false));
 
-		mutex_exit(block_mutex);
-
 		/* Prevent buf_page_get_gen() from
-		decompressing the block while we release
-		buf_pool->mutex and block_mutex. */
-		block_mutex = buf_page_get_mutex(b);
-
-		mutex_enter(block_mutex);
-
+		decompressing the block while we release block_mutex. */
 		buf_page_set_sticky(b);
-
-		mutex_exit(block_mutex);
-
-		rw_lock_x_unlock(hash_lock);
-
-	} else {
-
-		/* There can be multiple threads doing an LRU scan to
-		free a block. The page_cleaner thread can be doing an
-		LRU batch whereas user threads can potentially be doing
-		multiple single page flushes. As we release
-		buf_pool->mutex below we need to make sure that no one
-		else considers this block as a victim for page
-		replacement. This block is already out of page_hash
-		and we are about to remove it from the LRU list and put
-		it on the free list. */
-		mutex_enter(block_mutex);
-
-		buf_page_set_sticky(bpage);
-
-		mutex_exit(block_mutex);
+		mutex_exit(&buf_pool->zip_mutex);
+		mutex_exit(block_mutex);
+
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	/* Remove possible adaptive hash index on the page.
 	The page was declared uninitialized by
@@ -2048,16 +2121,14 @@
 				checksum);
 	}
 
-	buf_pool_mutex_enter(buf_pool);
-
-	mutex_enter(block_mutex);
-
-	buf_page_unset_sticky(b != NULL ? b : bpage);
-
-	mutex_exit(block_mutex);
+	if (b) {
+		mutex_enter(&buf_pool->zip_mutex);
+		buf_page_unset_sticky(b);
+		mutex_exit(&buf_pool->zip_mutex);
+	}
 
 	buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
-
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
 	return(true);
 }
 
@@ -2072,9 +2143,6 @@
 	void*		data;
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(buf_page_mutex_own(block));
-
 	switch (buf_block_get_state(block)) {
 	case BUF_BLOCK_MEMORY:
 	case BUF_BLOCK_READY_FOR_USE:
@@ -2090,8 +2158,6 @@
 	ut_ad(!block->page.in_flush_list);
 	ut_ad(!block->page.in_LRU_list);
 
-	buf_block_set_state(block, BUF_BLOCK_NOT_USED);
-
 	UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 #ifdef UNIV_DEBUG
 	/* Wipe contents of page to reveal possible stale pointers to it */
@@ -2105,16 +2171,11 @@
 
 	if (data != NULL) {
 		block->page.zip.data = NULL;
-		buf_page_mutex_exit(block);
-		buf_pool_mutex_exit_forbid(buf_pool);
 
 		ut_ad(block->page.size.is_compressed());
 
 		buf_buddy_free(buf_pool, data, block->page.size.physical());
 
-		buf_pool_mutex_exit_allow(buf_pool);
-		buf_page_mutex_enter(block);
-
 		page_zip_set_size(&block->page.zip, 0);
 
 		block->page.size.copy_from(
@@ -2123,17 +2184,23 @@
 				    false));
 	}
 
-	if (buf_pool->curr_size < buf_pool->old_size
-	    && UT_LIST_GET_LEN(buf_pool->withdraw) < buf_pool->withdraw_target
+	if (buf_get_withdraw_depth(buf_pool)
 	    && buf_block_will_withdrawn(buf_pool, block)) {
 		/* This should be withdrawn */
+		buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+		mutex_enter(&buf_pool->free_list_mutex);
 		UT_LIST_ADD_LAST(
 			buf_pool->withdraw,
 			&block->page);
 		ut_d(block->in_withdraw_list = TRUE);
+		fprintf(stderr, "3 withdrawing block at %p\n", block);
+		mutex_exit(&buf_pool->free_list_mutex);
 	} else {
+		buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+		mutex_enter(&buf_pool->free_list_mutex);
 		UT_LIST_ADD_FIRST(buf_pool->free, &block->page);
 		ut_d(block->page.in_free_list = TRUE);
+		mutex_exit(&buf_pool->free_list_mutex);
 	}
 
 	UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
@@ -2144,7 +2211,7 @@
 If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
 the object will be freed.
 
-The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex
+The caller must hold buf_pool->LRU_list_mutex, the buf_page_get_mutex() mutex
 and the appropriate hash_lock. This function will release the
 buf_page_get_mutex() and the hash_lock.
 
@@ -2167,7 +2234,7 @@
 	buf_pool_t*		buf_pool = buf_pool_from_bpage(bpage);
 	rw_lock_t*		hash_lock;
 
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
 	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
@@ -2272,7 +2339,7 @@
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		mutex_exit(buf_page_get_mutex(bpage));
 		rw_lock_x_unlock(hash_lock);
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 		buf_print();
 		buf_LRU_print();
 		buf_validate();
@@ -2302,12 +2369,10 @@
 
 		mutex_exit(&buf_pool->zip_mutex);
 		rw_lock_x_unlock(hash_lock);
-		buf_pool_mutex_exit_forbid(buf_pool);
 
 		buf_buddy_free(buf_pool, bpage->zip.data,
 			       bpage->size.physical());
 
-		buf_pool_mutex_exit_allow(buf_pool);
 		buf_page_free_descriptor(bpage);
 		return(false);
 
@@ -2331,14 +2396,15 @@
 		page_hash. Only possibility is when while invalidating
 		a tablespace we buffer fix the prev_page in LRU to
 		avoid relocation during the scan. But that is not
-		possible because we are holding buf_pool mutex.
+		possible because we are holding LRU list mutex.
 
 		2) Not possible because in buf_page_init_for_read()
-		we do a look up of page_hash while holding buf_pool
-		mutex and since we are holding buf_pool mutex here
+		we do a look up of page_hash while holding LRU list
+		mutex and since we are holding LRU list mutex here
 		and by the time we'll release it in the caller we'd
 		have inserted the compressed only descriptor in the
 		page_hash. */
+		ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 		rw_lock_x_unlock(hash_lock);
 		mutex_exit(&((buf_block_t*) bpage)->mutex);
 
@@ -2350,12 +2416,9 @@
 			ut_ad(!bpage->in_free_list);
 			ut_ad(!bpage->in_flush_list);
 			ut_ad(!bpage->in_LRU_list);
-			buf_pool_mutex_exit_forbid(buf_pool);
 
 			buf_buddy_free(buf_pool, data, bpage->size.physical());
 
-			buf_pool_mutex_exit_allow(buf_pool);
-
 			page_zip_set_size(&bpage->zip, 0);
 
 			bpage->size.copy_from(
@@ -2389,9 +2452,6 @@
 				be in a state where it can be freed */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-
-	buf_page_mutex_enter(block);
 
 	if (buf_pool->flush_rbt == NULL) {
 		block->page.id.reset(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
@@ -2400,34 +2460,41 @@
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
 	buf_LRU_block_free_non_file_page(block);
-	buf_page_mutex_exit(block);
 }
 
 /******************************************************************//**
-Remove one page from LRU list and put it to free list */
+Remove one page from LRU list and put it to free list. The caller must hold the
+LRU list and block mutexes and have page hash latched in X. The latch and
+the block mutexes will be released. */
 
 void
 buf_LRU_free_one_page(
 /*==================*/
-	buf_page_t*	bpage)	/*!< in/out: block, must contain a file page and
+	buf_page_t*	bpage,	/*!< in/out: block, must contain a file page and
 				be in a state where it can be freed; there
 				may or may not be a hash index to the page */
+	bool		zip)    /*!< in: true if should remove also the
+				compressed page of an uncompressed page */
 {
+#if defined(UNIV_DEBUG) || defined(UNIV_SYNC_DEBUG)
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
+#endif
 
+#ifdef UNIV_SYNC_DEBUG
 	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
-	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
-
-	ut_ad(buf_pool_mutex_own(buf_pool));
-
-	rw_lock_x_lock(hash_lock);
-	mutex_enter(block_mutex);
-
-	if (buf_LRU_block_remove_hashed(bpage, true)) {
+#endif
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+	ut_ad(mutex_own(block_mutex));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
+#endif
+
+	if (buf_LRU_block_remove_hashed(bpage, zip)) {
 		buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
 	}
 
-	/* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
+	/* buf_LRU_block_remove_hashed() releases hash_lock and block mutex */
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
 	      && !rw_lock_own(hash_lock, RW_LOCK_S));
@@ -2459,7 +2526,7 @@
 	}
 
 	if (adjust) {
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 
 		if (ratio != buf_pool->LRU_old_ratio) {
 			buf_pool->LRU_old_ratio = ratio;
@@ -2471,7 +2538,7 @@
 			}
 		}
 
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 	} else {
 		buf_pool->LRU_old_ratio = ratio;
 	}
@@ -2521,6 +2588,7 @@
 	buf_LRU_stat_t	cur_stat;
 
 	/* If we haven't started eviction yet then don't update stats. */
+	os_rmb;
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
 		buf_pool = buf_pool_from_array(i);
@@ -2557,6 +2625,7 @@
 func_exit:
 	/* Clear the current entry. */
 	memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
+	os_wmb;
 }
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2571,7 +2640,7 @@
 	ulint		old_len;
 	ulint		new_len;
 
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
 
@@ -2631,6 +2700,10 @@
 
 	ut_a(buf_pool->LRU_old_len == old_len);
 
+	mutex_exit(&buf_pool->LRU_list_mutex);
+
+	mutex_enter(&buf_pool->free_list_mutex);
+
 	CheckInFreeList::validate(buf_pool);
 
 	for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->free);
@@ -2640,6 +2713,10 @@
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
 	}
 
+	mutex_exit(&buf_pool->free_list_mutex);
+
+	mutex_enter(&buf_pool->LRU_list_mutex);
+
 	CheckUnzipLRUAndLRUList::validate(buf_pool);
 
 	for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
@@ -2651,7 +2728,7 @@
 		ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 }
 
 /**********************************************************************//**
@@ -2682,7 +2759,7 @@
 /*===================*/
 	buf_pool_t*	buf_pool)
 {
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	for (const buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
 	     bpage != NULL;
@@ -2738,7 +2815,7 @@
 		mutex_exit(buf_page_get_mutex(bpage));
 	}
 
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 }
 
 /**********************************************************************//**

=== modified file 'storage/innobase/buf/buf0rea.cc'
--- storage/innobase/buf/buf0rea.cc	2014-08-29 01:31:40 +0000
+++ storage/innobase/buf/buf0rea.cc	2015-01-16 19:30:41 +0000
@@ -62,10 +62,14 @@
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 	const bool	uncompressed = (buf_page_get_state(bpage)
 					== BUF_BLOCK_FILE_PAGE);
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool,
+							   bpage->id);
+
+	mutex_enter(&buf_pool->LRU_list_mutex);
+	rw_lock_x_lock(hash_lock);
+	mutex_enter(buf_page_get_mutex(bpage));
 
 	/* First unfix and release lock on the bpage */
-	buf_pool_mutex_enter(buf_pool);
-	mutex_enter(buf_page_get_mutex(bpage));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
 	ut_ad(bpage->buf_fix_count == 0);
 
@@ -78,15 +82,13 @@
 			BUF_IO_READ);
 	}
 
-	mutex_exit(buf_page_get_mutex(bpage));
-
 	/* remove the block from LRU list */
 	buf_LRU_free_one_page(bpage);
 
+	mutex_exit(&buf_pool->LRU_list_mutex);
+
 	ut_ad(buf_pool->n_pend_reads > 0);
-	buf_pool->n_pend_reads--;
-
-	buf_pool_mutex_exit(buf_pool);
+	os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
 }
 
 /** Low-level function which reads a page asynchronously from a file to the
@@ -171,6 +173,7 @@
 			      sync ? "sync" : "async"));
 
 	ut_ad(buf_page_in_file(bpage));
+	ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex));
 
 	if (sync) {
 		thd_wait_begin(NULL, THD_WAIT_DISKIO);
@@ -296,11 +299,9 @@
 		high = space_size;
 	}
 
-	buf_pool_mutex_enter(buf_pool);
-
+	os_rmb;
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		buf_pool_mutex_exit(buf_pool);
 
 		return(0);
 	}
@@ -309,8 +310,13 @@
 	that is, reside near the start of the LRU list. */
 
 	for (i = low; i < high; i++) {
-		const buf_page_t*	bpage = buf_page_hash_get(
-			buf_pool, page_id_t(page_id.space(), i));
+
+		rw_lock_t* hash_lock;
+
+		const buf_page_t* bpage =
+			buf_page_hash_get_s_locked(buf_pool,
+						   page_id_t(page_id.space(),
+						   i), &hash_lock);
 
 		if (bpage != NULL
 		    && buf_page_is_accessed(bpage)
@@ -321,13 +327,16 @@
 			if (recent_blocks
 			    >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) {
 
-				buf_pool_mutex_exit(buf_pool);
+				rw_lock_s_unlock(hash_lock);
 				goto read_ahead;
 			}
 		}
+
+		if (bpage) {
+			rw_lock_s_unlock(hash_lock);
+		}
 	}
 
-	buf_pool_mutex_exit(buf_pool);
 	/* Do nothing */
 	return(0);
 
@@ -495,6 +504,7 @@
 	buf_page_t*	bpage;
 	buf_frame_t*	frame;
 	buf_page_t*	pred_bpage	= NULL;
+	unsigned	pred_bpage_is_accessed = 0;
 	ulint		pred_offset;
 	ulint		succ_offset;
 	ulint		count;
@@ -545,18 +555,15 @@
 
 	tablespace_version = fil_space_get_version(page_id.space());
 
-	buf_pool_mutex_enter(buf_pool);
-
 	if (high > fil_space_get_size(page_id.space())) {
-		buf_pool_mutex_exit(buf_pool);
 		/* The area is not whole, return */
 
 		return(0);
 	}
 
+	os_rmb;
 	if (buf_pool->n_pend_reads
 	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		buf_pool_mutex_exit(buf_pool);
 
 		return(0);
 	}
@@ -578,9 +585,13 @@
 
 	fail_count = 0;
 
+	rw_lock_t*	hash_lock;
+
 	for (i = low; i < high; i++) {
-		bpage = buf_page_hash_get(buf_pool,
-					  page_id_t(page_id.space(), i));
+
+		bpage = buf_page_hash_get_s_locked(buf_pool,
+						   page_id_t(page_id.space(),
+							     i), &hash_lock);
 
 		if (bpage == NULL || !buf_page_is_accessed(bpage)) {
 			/* Not accessed */
@@ -597,7 +608,7 @@
 			a little against this. */
 			int res = ut_ulint_cmp(
 				buf_page_is_accessed(bpage),
-				buf_page_is_accessed(pred_bpage));
+				pred_bpage_is_accessed);
 			/* Accesses not in the right order */
 			if (res != 0 && res != asc_or_desc) {
 				fail_count++;
@@ -606,22 +617,29 @@
 
 		if (fail_count > threshold) {
 			/* Too many failures: return */
-			buf_pool_mutex_exit(buf_pool);
+			if (bpage) {
+				rw_lock_s_unlock(hash_lock);
+			}
 			return(0);
 		}
 
-		if (bpage && buf_page_is_accessed(bpage)) {
-			pred_bpage = bpage;
+		if (bpage) {
+			if (buf_page_is_accessed(bpage)) {
+				pred_bpage = bpage;
+				pred_bpage_is_accessed
+					= buf_page_is_accessed(bpage);
+			}
+
+			rw_lock_s_unlock(hash_lock);
 		}
 	}
 
 	/* If we got this far, we know that enough pages in the area have
 	been accessed in the right order: linear read-ahead can be sensible */
 
-	bpage = buf_page_hash_get(buf_pool, page_id);
+	bpage = buf_page_hash_get_s_locked(buf_pool, page_id, &hash_lock);
 
 	if (bpage == NULL) {
-		buf_pool_mutex_exit(buf_pool);
 
 		return(0);
 	}
@@ -647,7 +665,7 @@
 	pred_offset = fil_page_get_prev(frame);
 	succ_offset = fil_page_get_next(frame);
 
-	buf_pool_mutex_exit(buf_pool);
+	rw_lock_s_unlock(hash_lock);
 
 	if ((page_id.page_no() == low)
 	    && (succ_offset == page_id.page_no() + 1)) {
@@ -790,6 +808,7 @@
 			continue;
 		}
 
+		os_rmb;
 		while (buf_pool->n_pend_reads
 		       > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
 			os_thread_sleep(500000);
@@ -856,6 +875,7 @@
 		count = 0;
 
 		buf_pool = buf_pool_get(cur_page_id);
+		os_rmb;
 		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
 
 			os_aio_simulated_wake_handler_threads();

=== modified file 'storage/innobase/fsp/fsp0fsp.cc'
--- storage/innobase/fsp/fsp0fsp.cc	2014-11-06 18:59:50 +0000
+++ storage/innobase/fsp/fsp0fsp.cc	2015-01-16 19:30:41 +0000
@@ -1443,11 +1443,9 @@
 	} else {
 		rw_lock_sx_lock(&block->lock);
 	}
-	mutex_enter(&block->mutex);
 
 	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
 
-	mutex_exit(&block->mutex);
 	mtr_memo_push(init_mtr, block, rw_latch == RW_X_LATCH
 		      ? MTR_MEMO_PAGE_X_FIX : MTR_MEMO_PAGE_SX_FIX);
 

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- storage/innobase/handler/ha_innodb.cc	2014-09-02 07:56:28 +0000
+++ storage/innobase/handler/ha_innodb.cc	2015-01-16 19:30:41 +0000
@@ -283,7 +283,11 @@
 #  ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
 	PSI_KEY(buffer_block_mutex),
 #  endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
-	PSI_KEY(buf_pool_mutex),
+	PSI_KEY(buf_pool_flush_state_mutex),
+	PSI_KEY(buf_pool_LRU_list_mutex),
+	PSI_KEY(buf_pool_free_list_mutex),
+	PSI_KEY(buf_pool_zip_free_mutex),
+	PSI_KEY(buf_pool_zip_hash_mutex),
 	PSI_KEY(buf_pool_zip_mutex),
 	PSI_KEY(cache_last_read_mutex),
 	PSI_KEY(dict_foreign_err_mutex),
@@ -14858,9 +14862,8 @@
 		return;
 	}
 
-	buf_pool_mutex_enter_all();
+	os_rmb;
 	if (srv_buf_pool_old_size != srv_buf_pool_size) {
-		buf_pool_mutex_exit_all();
 
 		push_warning_printf(thd, Sql_condition::SL_WARNING,
 				    ER_WRONG_ARGUMENTS,
@@ -14871,7 +14874,6 @@
 
 	if (srv_buf_pool_instances > 1
 	    && in_val < BUF_POOL_SIZE_THRESHOLD) {
-		buf_pool_mutex_exit_all();
 
 		push_warning_printf(thd, Sql_condition::SL_WARNING,
 				    ER_WRONG_ARGUMENTS,
@@ -14884,15 +14886,13 @@
 	srv_buf_pool_size = buf_pool_size_align(static_cast<ulint>(in_val));
 
 	innobase_buffer_pool_size = static_cast<long long>(srv_buf_pool_size);
+	os_wmb;
 
 	if (srv_buf_pool_old_size == srv_buf_pool_size) {
-		buf_pool_mutex_exit_all();
 		/* nothing to do */
 		return;
 	}
 
-	buf_pool_mutex_exit_all();
-
 	ut_snprintf(export_vars.innodb_buffer_pool_resize_status,
 		    sizeof(export_vars.innodb_buffer_pool_resize_status),
 		    "Requested to resize buffer pool.");
@@ -15683,7 +15683,7 @@
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool = &buf_pool_ptr[i];
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 
 		for (buf_block_t* block = UT_LIST_GET_LAST(
 			     buf_pool->unzip_LRU);
@@ -15695,14 +15695,24 @@
 			ut_ad(block->in_unzip_LRU_list);
 			ut_ad(block->page.in_LRU_list);
 
-			if (!buf_LRU_free_page(&block->page, false)) {
+			rw_lock_t* hash_lock
+				= buf_page_hash_lock_get(buf_pool,
+							 block->page.id);
+			rw_lock_x_lock(hash_lock);
+			mutex_enter(&block->mutex);
+
+			if (buf_page_can_relocate(&block->page)) {
+				rw_lock_x_unlock(hash_lock);
+				mutex_exit(&block->mutex);
 				all_evicted = false;
+			} else {
+				buf_LRU_free_one_page(&block->page, false);
 			}
 
 			block = prev_block;
 		}
 
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 	}
 
 	return(all_evicted);

=== modified file 'storage/innobase/handler/i_s.cc'
--- storage/innobase/handler/i_s.cc	2014-08-26 15:37:03 +0000
+++ storage/innobase/handler/i_s.cc	2015-01-16 19:30:41 +0000
@@ -2095,7 +2095,7 @@
 
 		buf_pool = buf_pool_from_array(i);
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->zip_free_mutex);
 
 		for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
 			buf_buddy_stat_t*	buddy_stat;
@@ -2104,6 +2104,7 @@
 
 			table->field[0]->store(BUF_BUDDY_LOW << x);
 			table->field[1]->store(static_cast<double>(i));
+			os_rmb;
 			table->field[2]->store(static_cast<double>(
 				buddy_stat->used));
 			table->field[3]->store(static_cast<double>(
@@ -2116,7 +2117,8 @@
 				static_cast<double>(buddy_stat->relocated_usec / 1000000));
 
 			if (reset) {
-				/* This is protected by buf_pool->mutex. */
+				/* This is protected by
+				buf_pool->zip_free_mutex. */
 				buddy_stat->relocated = 0;
 				buddy_stat->relocated_usec = 0;
 			}
@@ -2127,7 +2129,7 @@
 			}
 		}
 
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->zip_free_mutex);
 
 		if (status) {
 			break;
@@ -5424,12 +5426,16 @@
 					out: structure filled with scanned
 					info */
 {
+	BPageMutex*	mutex = buf_page_get_mutex(bpage);
+
 	ut_ad(pool_id < MAX_BUFFER_POOLS);
 
 	page_info->pool_id = pool_id;
 
 	page_info->block_id = pos;
 
+	mutex_enter(mutex);
+
 	page_info->page_state = buf_page_get_state(bpage);
 
 	/* Only fetch information for buffers that map to a tablespace,
@@ -5468,6 +5474,7 @@
 			break;
 		case BUF_IO_READ:
 			page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
+			mutex_exit(mutex);
 			return;
 		}
 
@@ -5488,6 +5495,8 @@
 	} else {
 		page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
 	}
+
+	mutex_exit(mutex);
 }
 
 /*******************************************************************//**
@@ -5537,16 +5546,10 @@
 
 			/* For each chunk, we'll pre-allocate information
 			structures to cache the page information read from
-			the buffer pool. Doing so before obtain any mutex */
+			the buffer pool */
 			info_buffer = (buf_page_info_t*) mem_heap_zalloc(
 				heap, mem_size);
 
-			/* Obtain appropriate mutexes. Since this is diagnostic
-			buffer pool info printout, we are not required to
-			preserve the overall consistency, so we can
-			release mutex periodically */
-			buf_pool_mutex_enter(buf_pool);
-
 			/* GO through each block in the chunk */
 			for (n_blocks = num_to_process; n_blocks--; block++) {
 				i_s_innodb_buffer_page_get_info(
@@ -5556,8 +5559,6 @@
 				num_page++;
 			}
 
-			buf_pool_mutex_exit(buf_pool);
-
 			/* Fill in information schema table with information
 			just collected from the buffer chunk scan */
 			status = i_s_innodb_buffer_page_fill(
@@ -6084,9 +6085,9 @@
 
 	DBUG_ENTER("i_s_innodb_fill_buffer_lru");
 
-	/* Obtain buf_pool mutex before allocate info_buffer, since
+	/* Obtain buf_pool->LRU_list_mutex before allocate info_buffer, since
 	UT_LIST_GET_LEN(buf_pool->LRU) could change */
-	buf_pool_mutex_enter(buf_pool);
+	mutex_enter(&buf_pool->LRU_list_mutex);
 
 	lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
 
@@ -6120,7 +6121,7 @@
 	ut_ad(lru_pos == UT_LIST_GET_LEN(buf_pool->LRU));
 
 exit:
-	buf_pool_mutex_exit(buf_pool);
+	mutex_exit(&buf_pool->LRU_list_mutex);
 
 	if (info_buffer) {
 		status = i_s_innodb_buf_page_lru_fill(

=== modified file 'storage/innobase/ibuf/ibuf0ibuf.cc'
--- storage/innobase/ibuf/ibuf0ibuf.cc	2014-08-26 15:37:03 +0000
+++ storage/innobase/ibuf/ibuf0ibuf.cc	2015-01-16 19:30:41 +0000
@@ -4544,7 +4544,8 @@
 	ulint		dops[IBUF_OP_COUNT];
 
 	ut_ad(block == NULL || page_id.equals_to(block->page.id));
-	ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ);
+	ut_ad(block == NULL
+	      || buf_block_get_io_fix_unlocked(block) == BUF_IO_READ);
 
 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
 	    || trx_sys_hdr_page(page_id)

=== modified file 'storage/innobase/include/buf0buddy.h'
--- storage/innobase/include/buf0buddy.h	2014-07-07 11:14:44 +0000
+++ storage/innobase/include/buf0buddy.h	2015-01-16 19:30:41 +0000
@@ -35,10 +35,10 @@
 #include "buf0types.h"
 
 /**********************************************************************//**
-Allocate a block.  The thread calling this function must hold
-buf_pool->mutex and must not hold buf_pool->zip_mutex or any
-block->mutex.  The buf_pool->mutex may be released and reacquired.
-This function should only be used for allocating compressed page frames.
+Allocate a block. This function should only be used for allocating compressed
+page frames. The thread calling this function must hold
+buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
+block->mutex.
 @return allocated block, never NULL */
 UNIV_INLINE
 byte*
@@ -46,14 +46,9 @@
 /*============*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool in which
 					the page resides */
-	ulint		size,		/*!< in: compressed page size
+	ulint		size)		/*!< in: compressed page size
 					(between UNIV_ZIP_SIZE_MIN and
 					UNIV_PAGE_SIZE) */
-	ibool*		lru)		/*!< in: pointer to a variable
-					that will be assigned TRUE if
-				       	storage was allocated from the
-				       	LRU list and buf_pool->mutex was
-				       	temporarily released */
 	__attribute__((malloc, nonnull));
 
 /**********************************************************************//**
@@ -70,11 +65,12 @@
 					up to UNIV_PAGE_SIZE */
 	__attribute__((nonnull));
 
-/** Reallocate a block.
+/** Try to reallocate a block.
 @param[in]	buf_pool	buffer pool instance
 @param[in]	buf		block to be reallocated, must be pointed
 to by the buffer pool
 @param[in]	size		block size, up to UNIV_PAGE_SIZE
+@retval true	if succeeded or if failed because the block was fixed
 @retval false	if failed because of no free blocks. */
 
 bool

=== modified file 'storage/innobase/include/buf0buddy.ic'
--- storage/innobase/include/buf0buddy.ic	2013-09-09 13:50:47 +0000
+++ storage/innobase/include/buf0buddy.ic	2015-01-16 19:30:41 +0000
@@ -33,23 +33,16 @@
 #include "sync0mutex.h"
 
 /**********************************************************************//**
-Allocate a block.  The thread calling this function must hold
-buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
-The buf_pool_mutex may be released and reacquired.
+Allocate a block.
 @return allocated block, never NULL */
 
 void*
 buf_buddy_alloc_low(
 /*================*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	ulint		i,		/*!< in: index of buf_pool->zip_free[],
+	ulint		i)		/*!< in: index of buf_pool->zip_free[],
 					or BUF_BUDDY_SIZES */
-	ibool*		lru)		/*!< in: pointer to a variable that
-					will be assigned TRUE if storage was
-					allocated from the LRU list and
-					buf_pool->mutex was temporarily
-					released */
-	__attribute__((malloc, nonnull));
+	__attribute__((malloc, nonnull, warn_unused_result));
 
 /**********************************************************************//**
 Deallocate a block. */
@@ -86,10 +79,10 @@
 }
 
 /**********************************************************************//**
-Allocate a block.  The thread calling this function must hold
-buf_pool->mutex and must not hold buf_pool->zip_mutex or any
-block->mutex.  The buf_pool->mutex may be released and reacquired.
-This function should only be used for allocating compressed page frames.
+Allocate a block. This function should only be used for allocating compressed
+page frames. The thread calling this function must hold
+buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
+block->mutex.
 @return allocated block, never NULL */
 UNIV_INLINE
 byte*
@@ -97,22 +90,16 @@
 /*============*/
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool in which
 					the page resides */
-	ulint		size,		/*!< in: compressed page size
+	ulint		size)		/*!< in: compressed page size
 					(between UNIV_ZIP_SIZE_MIN and
 					UNIV_PAGE_SIZE) */
-	ibool*		lru)		/*!< in: pointer to a variable
-					that will be assigned TRUE if
-				       	storage was allocated from the
-				       	LRU list and buf_pool->mutex was
-				       	temporarily released */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(ut_is_2pow(size));
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
 	ut_ad(size <= UNIV_PAGE_SIZE);
 
-	return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
-					   lru));
+	return(static_cast<byte*>
+		(buf_buddy_alloc_low(buf_pool,buf_buddy_get_slot(size))));
 }
 
 /**********************************************************************//**
@@ -128,7 +115,6 @@
 	ulint		size)		/*!< in: block size,
 					up to UNIV_PAGE_SIZE */
 {
-	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(ut_is_2pow(size));
 	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
 	ut_ad(size <= UNIV_PAGE_SIZE);

=== modified file 'storage/innobase/include/buf0buf.h'
--- storage/innobase/include/buf0buf.h	2014-08-26 11:08:37 +0000
+++ storage/innobase/include/buf0buf.h	2015-01-16 19:30:41 +0000
@@ -338,20 +338,6 @@
 
 #ifndef UNIV_HOTBACKUP
 /********************************************************************//**
-Acquire mutex on all buffer pool instances */
-UNIV_INLINE
-void
-buf_pool_mutex_enter_all(void);
-/*===========================*/
-
-/********************************************************************//**
-Release mutex on all buffer pool instances */
-UNIV_INLINE
-void
-buf_pool_mutex_exit_all(void);
-/*==========================*/
-
-/********************************************************************//**
 Creates the buffer pool.
 @return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
 
@@ -698,11 +684,10 @@
 	__attribute__((pure));
 
 /********************************************************************//**
-Tells if a block is still close enough to the MRU end of the LRU list
-meaning that it is not in danger of getting evicted and also implying
+Tells, for heuristics, if a block is still close enough to the MRU end of the
+LRU list meaning that it is not in danger of getting evicted and also implying
 that it has been accessed recently.
-Note that this is for heuristics only and does not reserve buffer pool
-mutex.
+The page must be either buffer-fixed, either its page hash must be locked.
 @return TRUE if block is close to MRU end of LRU */
 UNIV_INLINE
 ibool
@@ -710,16 +695,6 @@
 /*===================*/
 	const buf_page_t*	bpage);	/*!< in: block */
 /********************************************************************//**
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex.
-@return TRUE if should be made younger */
-UNIV_INLINE
-ibool
-buf_page_peek_if_too_old(
-/*=====================*/
-	const buf_page_t*	bpage);	/*!< in: block to make younger */
-/********************************************************************//**
 Gets the youngest modification log sequence number for a frame.
 Returns zero if not file page or no modification occurred yet.
 @return newest modification to page */
@@ -731,8 +706,8 @@
 					page frame */
 /********************************************************************//**
 Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool->mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
+buf_pool->LRU_list_mutex and block bufferfix count has to be zero, (2) or own
+an x-lock on the block, (3) or the block must belong to an intrinsic table. */
 UNIV_INLINE
 void
 buf_block_modify_clock_inc(
@@ -980,13 +955,6 @@
 Refreshes the statistics used to print per-second averages. */
 
 void
-buf_refresh_io_stats(
-/*=================*/
-	buf_pool_t*	buf_pool);	/*!< buffer pool instance */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-
-void
 buf_refresh_io_stats_all(void);
 /*=================*/
 /*********************************************************************//**
@@ -1132,6 +1100,19 @@
 /*================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
 	__attribute__((pure));
+
+/** Gets the io_fix state of a buffer page. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in]	pointer to the buffer page
+@return page io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix_unlocked(
+/*=========================*/
+	const buf_page_t*	bpage)
+	__attribute__((warn_unused_result));
+
 /*********************************************************************//**
 Gets the io_fix state of a block.
 @return io_fix state */
@@ -1141,6 +1122,18 @@
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
 	__attribute__((pure));
+
+/** Gets the io_fix state of a buffer block. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in]	pointer to the buffer block
+@return page io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix_unlocked(
+/*=========================*/
+	const buf_block_t*	block)
+	__attribute__((warn_unused_result));
 /*********************************************************************//**
 Sets the io_fix state of a block. */
 UNIV_INLINE
@@ -1224,8 +1217,10 @@
 	__attribute__((nonnull));
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL. Note: even though bpage is not declared a
-const we don't update its value. It is safe to make this pure.
+page frame exists, or NULL. page frame exists, or NULL. The caller must hold
+either the appropriate hash lock in any mode, either the LRU list mutex. Note:
+even though bpage is not declared a const we don't update its value. It is safe
+to make this pure.
 @return control block, or NULL */
 UNIV_INLINE
 buf_block_t*
@@ -1468,8 +1463,9 @@
 	__attribute__((nonnull, warn_unused_result));
 
 /** Add watch for the given page to be read in. Caller must have
-appropriate hash_lock for the bpage. This function may release the
-hash_lock and reacquire it.
+appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race
+condition with buf_LRU_free_page inserting the same page into the page hash.
+This function may release the hash_lock and reacquire it.
 @param[in]	page_id		page id
 @param[in,out]	hash_lock	hash_lock currently latched
 @return NULL if watch set, block if the page is in the buffer pool */
@@ -1561,6 +1557,16 @@
 
 #endif /* !UNIV_HOTBACKUP */
 
+/** Return how many more pages must be added to the withdraw list to reach the
+withdraw target of the currently ongoing buffer pool resize.
+@param[in]	buf_pool	buffer pool instance
+@return page count to be withdrawn or zero if the target is already achieved or
+if the buffer pool is not currently being resized. */
+UNIV_INLINE
+ulint
+buf_get_withdraw_depth(
+	buf_pool_t* buf_pool);
+
 /** The common buffer control block structure
 for compressed and uncompressed frames */
 
@@ -1573,21 +1579,19 @@
 	None of these bit-fields must be modified without holding
 	buf_page_get_mutex() [buf_block_t::mutex or
 	buf_pool->zip_mutex], since they can be stored in the same
-	machine word.  Some of these fields are additionally protected
-	by buf_pool->mutex. */
+	machine word.  */
 	/* @{ */
 
-	/** Page id. Protected by buf_pool mutex. */
+	/** Page id. */
 	page_id_t	id;
 
-	/** Page size. Protected by buf_pool mutex. */
+	/** Page size. */
 	page_size_t	size;
 
 	/** Count of how manyfold this block is currently bufferfixed. */
 	ib_uint32_t	buf_fix_count;
 
-	/** type of pending I/O operation; also protected by
-	buf_pool->mutex for writes only */
+	/** type of pending I/O operation. */
 	buf_io_fix	io_fix;
 
 	/** Block state. @see buf_page_in_file */
@@ -1607,7 +1611,7 @@
 #endif /* !UNIV_HOTBACKUP */
 	page_zip_des_t	zip;		/*!< compressed page; zip.data
 					(but not the data it points to) is
-					also protected by buf_pool->mutex;
+					protected by buf_pool->zip_mutex;
 					state == BUF_BLOCK_ZIP_PAGE and
 					zip.data == NULL means an active
 					buf_pool->watch */
@@ -1626,22 +1630,17 @@
 
 	UT_LIST_NODE_T(buf_page_t) list;
 					/*!< based on state, this is a
-					list node, protected either by
-					buf_pool->mutex or by
-					buf_pool->flush_list_mutex,
-					in one of the following lists in
-					buf_pool:
+					list node, protected by the
+					corresponding list mutex, in one of the
+					following lists in buf_pool:
 
 					- BUF_BLOCK_NOT_USED:	free, withdraw
 					- BUF_BLOCK_FILE_PAGE:	flush_list
 					- BUF_BLOCK_ZIP_DIRTY:	flush_list
 					- BUF_BLOCK_ZIP_PAGE:	zip_clean
 
-					If bpage is part of flush_list
-					then the node pointers are
-					covered by buf_pool->flush_list_mutex.
-					Otherwise these pointers are
-					protected by buf_pool->mutex.
+					The node pointers are protected by the
+					corresponding list mutex.
 
 					The contents of the list node
 					is undefined if !in_flush_list
@@ -1664,8 +1663,8 @@
 					reads can happen while holding
 					any one of the two mutexes */
 	ibool		in_free_list;	/*!< TRUE if in buf_pool->free; when
-					buf_pool->mutex is free, the following
-					should hold: in_free_list
+					buf_pool->free_list_mutex is free, the
+					following should hold: in_free_list
 					== (state == BUF_BLOCK_NOT_USED) */
 #endif /* UNIV_DEBUG */
 	lsn_t		newest_modification;
@@ -1689,8 +1688,8 @@
 					any one of the two mutexes */
 	/* @} */
 	/** @name LRU replacement algorithm fields
-	These fields are protected by buf_pool->mutex only (not
-	buf_pool->zip_mutex or buf_block_t::mutex). */
+	These fields are protected by both buf_pool->LRU_list_mutex and the
+	block mutex. */
 	/* @{ */
 
 	UT_LIST_NODE_T(buf_page_t) LRU;
@@ -1745,26 +1744,23 @@
 					/*!< node of the decompressed LRU list;
 					a block is in the unzip_LRU list
 					if page.state == BUF_BLOCK_FILE_PAGE
-					and page.zip.data != NULL */
+					and page.zip.data != NULL. Protected by
+					both LRU_list_mutex and the block
+					mutex. */
 #ifdef UNIV_DEBUG
 	ibool		in_unzip_LRU_list;/*!< TRUE if the page is in the
 					decompressed LRU list;
 					used in debugging */
 	ibool		in_withdraw_list;
 #endif /* UNIV_DEBUG */
-	BPageMutex	mutex;		/*!< mutex protecting this block:
-					state (also protected by the buffer
-					pool mutex), io_fix, buf_fix_count,
-					and accessed; we introduce this new
-					mutex in InnoDB-5.1 to relieve
-					contention on the buffer pool mutex */
+	BPageMutex	mutex;		/*!< mutex protecting this block. */
 	rw_lock_t	lock;		/*!< read-write lock of the buffer
 					frame */
 	unsigned	lock_hash_val:32;/*!< hashed value of the page address
 					in the record lock hash table;
 					protected by buf_block_t::lock
-					(or buf_block_t::mutex, buf_pool->mutex
-				        in buf_page_get_gen(),
+					(or buf_block_t::mutex in
+					buf_page_get_gen(),
 					buf_page_init_for_read()
 					and buf_page_create()) */
 	ibool		check_index_page_at_flush;
@@ -1787,10 +1783,11 @@
 					positioning: if the modify clock has
 					not changed, we know that the pointer
 					is still valid; this field may be
-					changed if the thread (1) owns the
-					pool mutex and the page is not
+					changed if the thread (1) owns the LRU
+					list mutex and the page is not
 					bufferfixed, or (2) the thread has an
-					x-latch on the block */
+					x-latch on the block, or (3) the block
+					must belong to an intrinsic table */
 	/* @} */
 	/** @name Hash search fields (unprotected)
 	NOTE that these fields are NOT protected by any semaphore! */
@@ -2027,25 +2024,31 @@
 				counted as page gets; this field
 				is NOT protected by the buffer
 				pool mutex */
-	ulint	n_pages_read;	/*!< number read operations */
-	ulint	n_pages_written;/*!< number write operations */
+	ulint	n_pages_read;	/*!< number of read operations. Accessed
+				atomically. */
+	ulint	n_pages_written;/*!< number of write operations. Accessed
+				atomically. */
 	ulint	n_pages_created;/*!< number of pages created
-				in the pool with no read */
+				in the pool with no read. Accessed
+				atomically. */
 	ulint	n_ra_pages_read_rnd;/*!< number of pages read in
-				as part of random read ahead */
+				as part of random read ahead. Not protected. */
 	ulint	n_ra_pages_read;/*!< number of pages read in
-				as part of read ahead */
+				as part of read ahead. Not protected. */
 	ulint	n_ra_pages_evicted;/*!< number of read ahead
 				pages that are evicted without
-				being accessed */
+				being accessed. Protected by LRU_list_mutex. */
 	ulint	n_pages_made_young; /*!< number of pages made young, in
-				calls to buf_LRU_make_block_young() */
+				calls to buf_LRU_make_block_young(). Protected
+				by LRU_list_mutex. */
 	ulint	n_pages_not_made_young; /*!< number of pages not made
 				young because the first access
 				was not long enough ago, in
-				buf_page_peek_if_too_old() */
-	ulint	LRU_bytes;	/*!< LRU size in bytes */
-	ulint	flush_list_bytes;/*!< flush_list size in bytes */
+				buf_page_peek_if_too_old(). Not protected. */
+	ulint	LRU_bytes;	/*!< LRU size in bytes. Protected by
+				LRU_list_mutex. */
+	ulint	flush_list_bytes;/*!< flush_list size in bytes.
+				Protected by flush_list_mutex */
 };
 
 /** Statistics of buddy blocks of a given size. */
@@ -2067,8 +2070,12 @@
 
 	/** @name General fields */
 	/* @{ */
-	BufPoolMutex	mutex;		/*!< Buffer pool mutex of this
-					instance */
+	BufListMutex	LRU_list_mutex; /*!< LRU list mutex */
+	BufListMutex	free_list_mutex;/*!< free and withdraw list mutex */
+	BufListMutex	zip_free_mutex; /*!< buddy allocator mutex */
+	BufListMutex	zip_hash_mutex; /*!< zip_hash mutex */
+	ib_mutex_t	flush_state_mutex;/*!< Flush state protection
+					mutex */
 	BPageMutex	zip_mutex;	/*!< Zip mutex of this buffer
 					pool instance, protects compressed
 					only pages (of type buf_page_t, not
@@ -2080,10 +2087,8 @@
 					pool for "old" blocks */
 #ifdef UNIV_DEBUG
 	ulint		buddy_n_frames; /*!< Number of frames allocated from
-					the buffer pool to the buddy system */
-#endif
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-	ulint		mutex_exit_forbidden; /*!< Forbid release mutex */
+					the buffer pool to the buddy system.
+					Protected by zip_hash_mutex. */
 #endif
 	ut_allocator<unsigned char>	allocator;	/*!< Allocator used for
 					allocating memory for the the "chunks"
@@ -2103,12 +2108,7 @@
 					buf_page_in_file() == TRUE,
 					indexed by (space_id, offset).
 					page_hash is protected by an
-					array of mutexes.
-					Changes in page_hash are protected
-					by buf_pool->mutex and the relevant
-					page_hash mutex. Lookups can happen
-					while holding the buf_pool->mutex or
-					the relevant page_hash mutex. */
+					array of mutexes. */
 	hash_table_t*	page_hash_old;	/*!< old pointer to page_hash to be
 					freed after resizing buffer pool */
 	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
@@ -2116,15 +2116,19 @@
 					zip buddy system,
 					indexed by block->frame */
 	ulint		n_pend_reads;	/*!< number of pending read
-					operations */
-	ulint		n_pend_unzip;	/*!< number of pending decompressions */
+					operations. Accessed atomically */
+	ulint		n_pend_unzip;	/*!< number of pending decompressions.
+                                        Accessed atomically. */
 
 	time_t		last_printout_time;
 					/*!< when buf_print_io was last time
-					called */
+					called. Accesses not protected. */
 	buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
 					/*!< Statistics of buddy system,
-					indexed by block size */
+					indexed by block size. Protected by
+					zip_free mutex, except for the used
+					field, which is also accessed
+					atomically */
 	buf_pool_stat_t	stat;		/*!< current statistics */
 	buf_pool_stat_t	old_stat;	/*!< old statistics */
 
@@ -2134,7 +2138,7 @@
 
 	/* @{ */
 
-	FlushListMutex	flush_list_mutex;/*!< mutex protecting the
+	BufListMutex	flush_list_mutex;/*!< mutex protecting the
 					flush list access. This mutex
 					protects flush_list, flush_rbt
 					and bpage::list pointers when
@@ -2151,14 +2155,17 @@
 					list */
 	ibool		init_flush[BUF_FLUSH_N_TYPES];
 					/*!< this is TRUE when a flush of the
-					given type is being initialized */
+					given type is being initialized.
+					Protected by flush_state_mutex. */
 	ulint		n_flush[BUF_FLUSH_N_TYPES];
 					/*!< this is the number of pending
-					writes in the given flush type */
+					writes in the given flush type.
+					Protected by flush_state_mutex. */
 	os_event_t	no_flush[BUF_FLUSH_N_TYPES];
 					/*!< this is in the set state
 					when there is no flush batch
-					of the given type running */
+					of the given type running. Protected by
+					flush_state_mutex. */
 	ib_rbt_t*	flush_rbt;	/*!< a red-black tree is used
 					exclusively during recovery to
 					speed up insertions in the
@@ -2181,7 +2188,8 @@
 					billion! A thread is allowed
 					to read this for heuristic
 					purposes without holding any
-					mutex or latch */
+					mutex or latch. For non-heuristic
+					purposes protected by LRU_list_mutex */
 	ibool		try_LRU_scan;	/*!< Set to FALSE when an LRU
 					scan for free block fails. This
 					flag is used to avoid repeated
@@ -2190,8 +2198,8 @@
 					available in the scan depth for
 					eviction. Set to TRUE whenever
 					we flush a batch from the
-					buffer pool. Protected by the
-					buf_pool->mutex */
+					buffer pool. Accessed protected by
+					memory barriers. */
 	/* @} */
 
 	/** @name LRU replacement algorithm fields */
@@ -2205,21 +2213,22 @@
 					/*!< base node of the withdraw
 					block list. It is only used during
 					shrinking buffer pool size, not to
-					reuse the blocks will be removed */
+					reuse the blocks will be removed.
+					Protected by free_list_mutex */
 
 	ulint		withdraw_target;/*!< target length of withdraw
 					block list, when withdrawing */
 
 	/** "hazard pointer" used during scan of LRU while doing
-	LRU list batch.  Protected by buf_pool::mutex */
+	LRU list batch.  Protected by buf_pool::LRU_list_mutex */
 	LRUHp		lru_hp;
 
 	/** Iterator used to scan the LRU list when searching for
-	replacable victim. Protected by buf_pool::mutex. */
+	replacable victim. Protected by buf_pool::LRU_list_mutex. */
 	LRUItr		lru_scan_itr;
 
 	/** Iterator used to scan the LRU list when searching for
-	single page flushing victim.  Protected by buf_pool::mutex. */
+	single page flushing victim.  Protected by buf_pool::LRU_list_mutex. */
 	LRUItr		single_scan_itr;
 
 	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
@@ -2242,7 +2251,8 @@
 
 	UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
 					/*!< base node of the
-					unzip_LRU list */
+					unzip_LRU list. The list is protected
+					by LRU_list_mutex. */
 
 	/* @} */
 	/** @name Buddy allocator fields
@@ -2259,8 +2269,12 @@
 
 	buf_page_t*			watch;
 					/*!< Sentinel records for buffer
-					pool watches. Protected by
-					buf_pool->mutex. */
+					pool watches. Scanning the array is
+					protected by taking all page_hash
+					latches in X. Updating or reading an
+					individual watch page is protected by
+					a corresponding individual page_hash
+					latch. */
 
 #if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
 # error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
@@ -2277,18 +2291,10 @@
         std::ostream&		out,
         const buf_pool_t&	buf_pool);
 
-/** @name Accessors for buf_pool->mutex.
-Use these instead of accessing buf_pool->mutex directly. */
+/** @name Accessors for buffer pool mutexes
+Use these instead of accessing buffer pool mutexes directly. */
 /* @{ */
 
-/** Test if a buffer pool mutex is owned. */
-#define buf_pool_mutex_own(b) mutex_own(&b->mutex)
-/** Acquire a buffer pool mutex. */
-#define buf_pool_mutex_enter(b) do {		\
-	ut_ad(!(b)->zip_mutex.is_owned());	\
-	mutex_enter(&(b)->mutex);		\
-} while (0)
-
 /** Test if flush list mutex is owned. */
 #define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex)
 
@@ -2310,7 +2316,7 @@
 	mutex_enter(&(b)->mutex);			\
 } while (0)
 
-/** Release the trx->mutex. */
+/** Release the block->mutex. */
 #define buf_page_mutex_exit(b) do {			\
 	(b)->mutex.exit();				\
 } while (0)
@@ -2358,31 +2364,6 @@
 # define buf_block_hash_lock_held_s_or_x(b, p)	(TRUE)
 #endif /* UNIV_SYNC_DEBUG */
 
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid(b) do {	\
-	ut_ad(buf_pool_mutex_own(b));		\
-	b->mutex_exit_forbidden++;		\
-} while (0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow(b) do {	\
-	ut_ad(buf_pool_mutex_own(b));		\
-	ut_a(b->mutex_exit_forbidden);	\
-	b->mutex_exit_forbidden--;		\
-} while (0)
-/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit(b) do {		\
-	ut_a(!b->mutex_exit_forbidden);		\
-	mutex_exit(&b->mutex);			\
-} while (0)
-#else
-/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid(b) ((void) 0)
-/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow(b) ((void) 0)
-/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit(b) mutex_exit(&b->mutex)
-#endif
 #endif /* !UNIV_HOTBACKUP */
 /* @} */
 

=== modified file 'storage/innobase/include/buf0buf.ic'
--- storage/innobase/include/buf0buf.ic	2014-08-26 17:48:07 +0000
+++ storage/innobase/include/buf0buf.ic	2015-01-16 19:30:41 +0000
@@ -124,7 +124,7 @@
 /*==========================*/
 	const buf_page_t*	bpage)	/*!< in: block */
 {
-	/* This is sometimes read without holding buf_pool->mutex. */
+	/* This is sometimes read without holding any buffer pool mutex. */
 	return(bpage->freed_page_clock);
 }
 
@@ -141,11 +141,10 @@
 }
 
 /********************************************************************//**
-Tells if a block is still close enough to the MRU end of the LRU list
-meaning that it is not in danger of getting evicted and also implying
+Tells, for heuristics, if a block is still close enough to the MRU end of the
+LRU list meaning that it is not in danger of getting evicted and also implying
 that it has been accessed recently.
-Note that this is for heuristics only and does not reserve buffer pool
-mutex.
+The page must be either buffer-fixed, either its page hash must be locked.
 @return TRUE if block is close to MRU end of LRU */
 UNIV_INLINE
 ibool
@@ -155,6 +154,9 @@
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
+	ut_ad(bpage->buf_fix_count > 0
+	      || buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
+
 	/* FIXME: bpage->freed_page_clock is 31 bits */
 	return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
 	       < ((ulint) bpage->freed_page_clock
@@ -162,46 +164,6 @@
 		     * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
 		     / (BUF_LRU_OLD_RATIO_DIV * 4))));
 }
-
-/********************************************************************//**
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex.
-@return TRUE if should be made younger */
-UNIV_INLINE
-ibool
-buf_page_peek_if_too_old(
-/*=====================*/
-	const buf_page_t*	bpage)	/*!< in: block to make younger */
-{
-	buf_pool_t*		buf_pool = buf_pool_from_bpage(bpage);
-
-	if (buf_pool->freed_page_clock == 0) {
-		/* If eviction has not started yet, do not update the
-		statistics or move blocks in the LRU list.  This is
-		either the warm-up phase or an in-memory workload. */
-		return(FALSE);
-	} else if (buf_LRU_old_threshold_ms && bpage->old) {
-		unsigned	access_time = buf_page_is_accessed(bpage);
-
-		/* It is possible that the below comparison returns an
-		unexpected result. 2^32 milliseconds pass in about 50 days,
-		so if the difference between ut_time_ms() and access_time
-		is e.g. 50 days + 15 ms, then the below will behave as if
-		it is 15 ms. This is known and fixing it would require to
-		increase buf_page_t::access_time from 32 to 64 bits. */
-		if (access_time > 0
-		    && ((ib_uint32_t) (ut_time_ms() - access_time))
-		    >= buf_LRU_old_threshold_ms) {
-			return(TRUE);
-		}
-
-		buf_pool->stat.n_pages_not_made_young++;
-		return(FALSE);
-	} else {
-		return(!buf_page_peek_if_young(bpage));
-	}
-}
 #endif /* !UNIV_HOTBACKUP */
 
 /*********************************************************************//**
@@ -244,6 +206,32 @@
 {
 	return(buf_page_get_state(&block->page));
 }
+
+#ifdef UNIV_DEBUG
+/** Assert that a given buffer pool page is private to the caller: no pointers
+to it exist in any buffer pool list or hash table. Accessing pages by iterating
+over buffer pool chunks is not considered here. Furthermore, assert that no
+buffer pool locks except for LRU list mutex and page hash are held.
+@param[in]	bpage	pointer to a buffer pool page */
+UNIV_INLINE
+bool
+buf_page_is_private(
+	const buf_page_t* bpage)
+{
+	buf_pool_t*		buf_pool	= buf_pool_from_bpage(bpage);
+	ut_a(!bpage->in_page_hash);
+	ut_a(!bpage->in_zip_hash);
+	ut_a(!bpage->in_flush_list);
+	ut_a(!bpage->in_free_list);
+	ut_a(!bpage->in_LRU_list);
+	ut_a(!mutex_own(buf_page_get_mutex(bpage)));
+	ut_a(!mutex_own(&buf_pool->free_list_mutex));
+	ut_a(!mutex_own(&buf_pool->zip_free_mutex));
+	ut_a(!mutex_own(&buf_pool->zip_hash_mutex));
+	return(true);
+}
+#endif
+
 /*********************************************************************//**
 Sets the state of a block. */
 UNIV_INLINE
@@ -255,6 +243,7 @@
 {
 #ifdef UNIV_DEBUG
 	enum buf_page_state	old_state	= buf_page_get_state(bpage);
+	buf_pool_t*		buf_pool	= buf_pool_from_bpage(bpage);
 
 	switch (old_state) {
 	case BUF_BLOCK_POOL_WATCH:
@@ -265,21 +254,36 @@
 		break;
 	case BUF_BLOCK_ZIP_DIRTY:
 		ut_a(state == BUF_BLOCK_ZIP_PAGE);
+		ut_a(mutex_own(buf_page_get_mutex(bpage)));
+		ut_a(buf_flush_list_mutex_own(buf_pool));
+		ut_a(bpage->in_flush_list);
 		break;
 	case BUF_BLOCK_NOT_USED:
 		ut_a(state == BUF_BLOCK_READY_FOR_USE);
+		ut_a(buf_page_is_private(bpage));
 		break;
 	case BUF_BLOCK_READY_FOR_USE:
 		ut_a(state == BUF_BLOCK_MEMORY
 		     || state == BUF_BLOCK_FILE_PAGE
 		     || state == BUF_BLOCK_NOT_USED);
+		ut_a(buf_page_is_private(bpage));
 		break;
 	case BUF_BLOCK_MEMORY:
 		ut_a(state == BUF_BLOCK_NOT_USED);
+		ut_a(buf_page_is_private(bpage));
 		break;
 	case BUF_BLOCK_FILE_PAGE:
 		ut_a(state == BUF_BLOCK_NOT_USED
 		     || state == BUF_BLOCK_REMOVE_HASH);
+		if (state == BUF_BLOCK_REMOVE_HASH) {
+			ut_a(!bpage->in_page_hash);
+			ut_a(!bpage->in_zip_hash);
+			ut_a(!bpage->in_LRU_list);
+			ut_a(!bpage->in_free_list);
+			ut_a(mutex_own(buf_page_get_mutex(bpage)));
+			ut_a(mutex_own(&buf_pool->LRU_list_mutex));
+			ut_a(buf_page_hash_lock_held_x(buf_pool, bpage));
+		}
 		break;
 	case BUF_BLOCK_REMOVE_HASH:
 		ut_a(state == BUF_BLOCK_MEMORY);
@@ -427,6 +431,21 @@
 /*================*/
 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
 {
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	return buf_page_get_io_fix_unlocked(bpage);
+}
+
+/** Gets the io_fix state of a buffer page. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in]	pointer to the buffer page
+@return page io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix_unlocked(
+/*=========================*/
+	const buf_page_t*	bpage)
+{
 	ut_ad(bpage != NULL);
 
 	enum buf_io_fix	io_fix	= bpage->io_fix;
@@ -456,6 +475,20 @@
 	return(buf_page_get_io_fix(&block->page));
 }
 
+/** Gets the io_fix state of a buffer block. Does not assert that the
+buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
+not to hold it.
+@param[in]	pointer to the buffer block
+@return page io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix_unlocked(
+/*==========================*/
+	const buf_block_t*	block)
+{
+	return(buf_page_get_io_fix_unlocked(&block->page));
+}
+
 /*********************************************************************//**
 Sets the io_fix state of a block. */
 UNIV_INLINE
@@ -465,10 +498,6 @@
 	buf_page_t*	bpage,	/*!< in/out: control block */
 	enum buf_io_fix	io_fix)	/*!< in: io_fix state */
 {
-#ifdef UNIV_DEBUG
-	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
 	bpage->io_fix = io_fix;
@@ -489,7 +518,7 @@
 
 /*********************************************************************//**
 Makes a block sticky. A sticky block implies that even after we release
-the buf_pool->mutex and the block->mutex:
+the buf_pool->LRU_list_mutex and the block->mutex:
 * it cannot be removed from the flush_list
 * the block descriptor cannot be relocated
 * it cannot be removed from the LRU list
@@ -504,10 +533,11 @@
 {
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 #endif
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+	ut_ad(bpage->in_LRU_list);
 
 	bpage->io_fix = BUF_IO_PIN;
 }
@@ -520,10 +550,6 @@
 /*==================*/
 	buf_page_t*	bpage)	/*!< in/out: control block */
 {
-#ifdef UNIV_DEBUG
-	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
 
@@ -539,10 +565,6 @@
 /*==================*/
 	const buf_page_t*	bpage)	/*!< control block being relocated */
 {
-#ifdef UNIV_DEBUG
-	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(bpage->in_LRU_list);
@@ -562,7 +584,11 @@
 {
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	/* Buffer page mutex is not strictly required here for heuristic
+	purposes even if LRU mutex is not being held.  Keep the assertion
+	for now since all the callers hold it.  */
+	ut_ad(mutex_own(buf_page_get_mutex(bpage))
+	      || mutex_own(&buf_pool->LRU_list_mutex));
 #endif
 	ut_ad(buf_page_in_file(bpage));
 
@@ -582,7 +608,7 @@
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 #endif /* UNIV_DEBUG */
 	ut_a(buf_page_in_file(bpage));
-	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 	ut_ad(bpage->in_LRU_list);
 
 #ifdef UNIV_LRU_DEBUG
@@ -627,11 +653,7 @@
 /*==================*/
 	buf_page_t*	bpage)		/*!< in/out: control block */
 {
-#ifdef UNIV_DEBUG
-	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ut_ad(!buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-#endif /* UNIV_DEBUG */
 
 	ut_a(buf_page_in_file(bpage));
 
@@ -643,7 +665,10 @@
 
 /*********************************************************************//**
 Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL.
+page frame exists, or NULL. page frame exists, or NULL. The caller must hold
+either the appropriate hash lock in any mode, either the LRU list mutex. Note:
+even though bpage is not declared a const we don't update its value. It is safe
+to make this pure.
 @return control block, or NULL */
 UNIV_INLINE
 buf_block_t*
@@ -652,6 +677,11 @@
 	buf_page_t*	bpage)	/*!< in: control block, or NULL */
 {
 	if (bpage != NULL) {
+#ifdef UNIV_DEBUG
+		buf_pool_t*	buf_pool	= buf_pool_from_bpage(bpage);
+		ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage)
+		      || mutex_own(&buf_pool->LRU_list_mutex));
+#endif
 		ut_ad(buf_page_in_file(bpage));
 
 		if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
@@ -809,19 +839,9 @@
 /*===========*/
 	buf_block_t*	block)	/*!< in, own: block to be freed */
 {
-	buf_pool_t*	buf_pool = buf_pool_from_bpage((buf_page_t*) block);
-
-	buf_pool_mutex_enter(buf_pool);
-
-	buf_page_mutex_enter(block);
-
 	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
 
 	buf_LRU_block_free_non_file_page(block);
-
-	buf_page_mutex_exit(block);
-
-	buf_pool_mutex_exit(buf_pool);
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -872,8 +892,8 @@
 
 /********************************************************************//**
 Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
+buf_pool->LRU_list_mutex and block bufferfix count has to be zero, (2) or own
+an x-lock on the block, (3) or the block must belong to an intrinsic table. */
 UNIV_INLINE
 void
 buf_block_modify_clock_inc(
@@ -885,7 +905,7 @@
 
 	/* No latch is acquired if block belongs to intrinsic table. */
 	if (!fsp_is_system_temporary(block->page.id.space())) {
-		ut_ad((buf_pool_mutex_own(buf_pool)
+		ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
 		       && (block->page.buf_fix_count == 0))
 		      || rw_lock_own_flagged(&block->lock,
 					     RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
@@ -973,6 +993,7 @@
 buf_block_unfix(
 	buf_page_t*	bpage)
 {
+	ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
 	ulint	count = os_atomic_decrement_uint32(&bpage->buf_fix_count, 1);
 	ut_ad(count + 1 != 0);
 	return(count);
@@ -1122,12 +1143,10 @@
 	if (mode == RW_LOCK_S) {
 		rw_lock_s_lock(hash_lock);
 
-		/* If not own buf_pool_mutex, page_hash can be changed. */
 		hash_lock = hash_lock_s_confirm(
 			hash_lock, buf_pool->page_hash, page_id.fold());
 	} else {
 		rw_lock_x_lock(hash_lock);
-		/* If not own buf_pool_mutex, page_hash can be changed. */
 		hash_lock = hash_lock_x_confirm(
 			hash_lock, buf_pool->page_hash, page_id.fold());
 	}
@@ -1317,36 +1336,6 @@
 }
 
 #endif /* UNIV_SYNC_DEBUG */
-/********************************************************************//**
-Acquire mutex on all buffer pool instances. */
-UNIV_INLINE
-void
-buf_pool_mutex_enter_all(void)
-/*==========================*/
-{
-	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
-		buf_pool_t*	buf_pool = buf_pool_from_array(i);
-
-		buf_pool_mutex_enter(buf_pool);
-	}
-}
-
-/********************************************************************//**
-Release mutex on all buffer pool instances. */
-UNIV_INLINE
-void
-buf_pool_mutex_exit_all(void)
-/*=========================*/
-{
-	ulint   i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
-
-		buf_pool = buf_pool_from_array(i);
-		buf_pool_mutex_exit(buf_pool);
-	}
-}
 /*********************************************************************//**
 Get the nth chunk's buffer block in the specified buffer pool.
 @return the nth chunk's buffer block. */
@@ -1396,4 +1385,24 @@
 	}
 }
 
+/** Return how many more pages must be added to the withdraw list to reach the
+withdraw target of the currently ongoing buffer pool resize.
+@param[in]	buf_pool	buffer pool instance
+@return page count to be withdrawn or zero if the target is already achieved or
+if the buffer pool is not currently being resized. */
+UNIV_INLINE
+ulint
+buf_get_withdraw_depth(
+	buf_pool_t* buf_pool)
+{
+	os_rmb;
+	if (UNIV_LIKELY(buf_pool->curr_size >= buf_pool->old_size))
+		return 0;
+	mutex_enter(&buf_pool->free_list_mutex);
+	ulint withdraw_len = UT_LIST_GET_LEN(buf_pool->withdraw);
+	mutex_exit(&buf_pool->free_list_mutex);
+	return(buf_pool->withdraw_target > withdraw_len
+		? buf_pool->withdraw_target - withdraw_len : 0);
+}
+
 #endif /* !UNIV_HOTBACKUP */

=== modified file 'storage/innobase/include/buf0flu.h'
--- storage/innobase/include/buf0flu.h	2014-11-04 13:39:53 +0000
+++ storage/innobase/include/buf0flu.h	2015-01-16 19:30:41 +0000
@@ -79,10 +79,10 @@
 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
 /********************************************************************//**
 Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: buf_pool->mutex and block->mutex must be held upon entering this
-function, and they will be released by this function after flushing.
-This is loosely based on buf_flush_batch() and buf_flush_page().
-@return TRUE if the page was flushed and the mutexes released */
+NOTE: block and LRU list mutexes must be held upon entering this function, and
+they will be released by this function after flushing. This is loosely based on
+buf_flush_batch() and buf_flush_page().
+@return TRUE if the page was flushed and the mutex released */
 
 ibool
 buf_flush_page_try(
@@ -194,7 +194,8 @@
 					set of mtr's */
 /********************************************************************//**
 Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., transition FILE_PAGE => NOT_USED allowed.
+i.e., the transition FILE_PAGE => NOT_USED allowed. The caller must hold the
+LRU list and block mutexes.
 @return TRUE if can replace immediately */
 
 ibool
@@ -265,9 +266,10 @@
 Writes a flushable page asynchronously from the buffer pool to a file.
 NOTE: in simulated aio we must call
 os_aio_simulated_wake_handler_threads after we have posted a batch of
-writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
-held upon entering this function, and they will be released by this
-function.
+writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this
+function.  The LRU list mutex must be held iff flush_type
+== BUF_FLUSH_SINGLE_PAGE. Both mutexes will be released by this function if it
+returns true.
 @return TRUE if page was flushed */
 
 ibool

=== modified file 'storage/innobase/include/buf0flu.ic'
--- storage/innobase/include/buf0flu.ic	2014-03-05 05:59:56 +0000
+++ storage/innobase/include/buf0flu.ic	2015-01-16 19:30:41 +0000
@@ -75,7 +75,6 @@
 
 		buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-		ut_ad(!buf_pool_mutex_own(buf_pool));
 		ut_ad(!buf_flush_list_mutex_own(buf_pool));
 	}
 #endif /* UNIV_DEBUG */
@@ -118,7 +117,6 @@
 
 		buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-		ut_ad(!buf_pool_mutex_own(buf_pool));
 		ut_ad(!buf_flush_list_mutex_own(buf_pool));
 
 		ut_ad(start_lsn != 0);

=== modified file 'storage/innobase/include/buf0lru.h'
--- storage/innobase/include/buf0lru.h	2013-11-04 14:38:22 +0000
+++ storage/innobase/include/buf0lru.h	2015-01-16 19:30:41 +0000
@@ -79,12 +79,13 @@
 Try to free a block.  If bpage is a descriptor of a compressed-only
 page, the descriptor object will be freed as well.
 
-NOTE: If this function returns true, it will temporarily
-release buf_pool->mutex.  Furthermore, the page frame will no longer be
-accessible via bpage.
-
-The caller must hold buf_pool->mutex and must not hold any
-buf_page_get_mutex() when calling this function.
+NOTE: this function may temporarily release and relock the
+buf_page_get_get_mutex(). Furthermore, the page frame will no longer be
+accessible via bpage. If this function returns true, it will also release
+the LRU list mutex.
+
+The caller must hold the LRU list and buf_page_get_mutex() mutexes.
+
 @return true if freed, false otherwise. */
 
 bool
@@ -93,7 +94,7 @@
 	buf_page_t*	bpage,	/*!< in: block to be freed */
 	bool		zip)	/*!< in: true if should remove also the
 				compressed page of an uncompressed page */
-	__attribute__((nonnull));
+	__attribute__((nonnull, warn_unused_result));
 /******************************************************************//**
 Try to free a replaceable block.
 @return true if found and freed */
@@ -218,14 +219,18 @@
 /*=====================*/
 
 /******************************************************************//**
-Remove one page from LRU list and put it to free list */
+Remove one page from LRU list and put it to free list. The caller must hold the
+LRU list and block mutexes and have page hash latched in X. The latch and
+the block mutexes will be released. */
 
 void
 buf_LRU_free_one_page(
 /*==================*/
-	buf_page_t*	bpage)	/*!< in/out: block, must contain a file page and
+	buf_page_t*	bpage,	/*!< in/out: block, must contain a file page and
 				be in a state where it can be freed; there
 				may or may not be a hash index to the page */
+	bool		zip = true)/*!< in: true if should remove also the
+				compressed page of an uncompressed page */
 	__attribute__((nonnull));
 
 /******************************************************************//**
@@ -297,7 +302,7 @@
 extern buf_LRU_stat_t	buf_LRU_stat_cur;
 
 /** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update().  Protected by buf_pool->mutex. */
+Updated by buf_LRU_stat_update(). Accesses protected by memory barriers. */
 extern buf_LRU_stat_t	buf_LRU_stat_sum;
 
 /********************************************************************//**

=== modified file 'storage/innobase/include/buf0types.h'
--- storage/innobase/include/buf0types.h	2014-07-04 03:01:03 +0000
+++ storage/innobase/include/buf0types.h	2015-01-16 19:30:41 +0000
@@ -119,8 +119,7 @@
 
 #ifndef UNIV_INNOCHECKSUM
 typedef ib_mutex_t BPageMutex;
-typedef ib_mutex_t BufPoolMutex;
-typedef ib_mutex_t FlushListMutex;
+typedef ib_mutex_t BufListMutex;
 #endif /* !UNIV_INNOCHECKSUM */
 
 #endif /* buf0types.h */

=== modified file 'storage/innobase/include/srv0srv.h'
--- storage/innobase/include/srv0srv.h	2014-08-26 17:48:07 +0000
+++ storage/innobase/include/srv0srv.h	2015-01-16 19:30:41 +0000
@@ -289,7 +289,7 @@
 extern ulong	srv_LRU_scan_depth;
 /** Whether or not to flush neighbors of a block */
 extern ulong	srv_flush_neighbors;
-/** Previously requested size */
+/** Previously requested size. Accesses protected by memory barriers. */
 extern ulint	srv_buf_pool_old_size;
 /** Current size as scaling factor for the other components */
 extern ulint	srv_buf_pool_base_size;

=== modified file 'storage/innobase/include/sync0sync.h'
--- storage/innobase/include/sync0sync.h	2014-06-24 13:49:46 +0000
+++ storage/innobase/include/sync0sync.h	2015-01-16 19:30:41 +0000
@@ -52,7 +52,11 @@
 /* Key defines to register InnoDB mutexes with performance schema */
 extern mysql_pfs_key_t	autoinc_mutex_key;
 extern mysql_pfs_key_t	buffer_block_mutex_key;
-extern mysql_pfs_key_t	buf_pool_mutex_key;
+extern mysql_pfs_key_t	buf_pool_flush_state_mutex_key;
+extern mysql_pfs_key_t	buf_pool_LRU_list_mutex_key;
+extern mysql_pfs_key_t	buf_pool_free_list_mutex_key;
+extern mysql_pfs_key_t	buf_pool_zip_free_mutex_key;
+extern mysql_pfs_key_t	buf_pool_zip_hash_mutex_key;
 extern mysql_pfs_key_t	buf_pool_zip_mutex_key;
 extern mysql_pfs_key_t	cache_last_read_mutex_key;
 extern mysql_pfs_key_t	dict_foreign_err_mutex_key;

=== modified file 'storage/innobase/include/sync0types.h'
--- storage/innobase/include/sync0types.h	2014-08-14 05:00:58 +0000
+++ storage/innobase/include/sync0types.h	2015-01-16 19:30:41 +0000
@@ -186,7 +186,7 @@
 Search system mutex
 |
 V
-Buffer pool mutex
+Buffer pool mutexes
 |
 V
 Log mutex
@@ -217,11 +217,13 @@
 	SYNC_DOUBLEWRITE,
 
 	SYNC_BUF_FLUSH_LIST,
-
+	SYNC_BUF_FLUSH_STATE,
+	SYNC_BUF_ZIP_HASH,
+	SYNC_BUF_FREE_LIST,
+	SYNC_BUF_ZIP_FREE,
 	SYNC_BUF_BLOCK,
 	SYNC_BUF_PAGE_HASH,
-
-	SYNC_BUF_POOL,
+	SYNC_BUF_LRU_LIST,
 
 	SYNC_POOL,
 	SYNC_POOL_MANAGER,

=== modified file 'storage/innobase/lock/lock0lock.cc'
--- storage/innobase/lock/lock0lock.cc	2014-09-02 07:56:28 +0000
+++ storage/innobase/lock/lock0lock.cc	2015-01-16 19:30:41 +0000
@@ -504,7 +504,7 @@
 	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
 		buf_pool_t*	buf_pool = buf_pool_from_array(i);
 
-		buf_pool_mutex_enter(buf_pool);
+		mutex_enter(&buf_pool->LRU_list_mutex);
 		buf_page_t*	bpage;
 		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
 
@@ -522,7 +522,7 @@
 			}
 			bpage = UT_LIST_GET_NEXT(LRU, bpage);
 		}
-		buf_pool_mutex_exit(buf_pool);
+		mutex_exit(&buf_pool->LRU_list_mutex);
 	}
 
 	lock_mutex_exit();

=== modified file 'storage/innobase/srv/srv0srv.cc'
--- storage/innobase/srv/srv0srv.cc	2014-08-26 17:48:07 +0000
+++ storage/innobase/srv/srv0srv.cc	2015-01-16 19:30:41 +0000
@@ -208,7 +208,7 @@
 ulong	srv_LRU_scan_depth	= 1024;
 /** Whether or not to flush neighbors of a block */
 ulong	srv_flush_neighbors	= 1;
-/** Previously requested size */
+/** Previously requested size. Accesses protected by memory barriers. */
 ulint	srv_buf_pool_old_size	= 0;
 /** Current size as scaling factor for the other components */
 ulint	srv_buf_pool_base_size	= 0;

=== modified file 'storage/innobase/sync/sync0debug.cc'
--- storage/innobase/sync/sync0debug.cc	2014-08-19 05:43:25 +0000
+++ storage/innobase/sync/sync0debug.cc	2015-01-16 19:30:41 +0000
@@ -638,7 +638,11 @@
 		break;
 
 	case SYNC_BUF_FLUSH_LIST:
-	case SYNC_BUF_POOL:
+	case SYNC_BUF_LRU_LIST:
+	case SYNC_BUF_FREE_LIST:
+	case SYNC_BUF_ZIP_FREE:
+	case SYNC_BUF_ZIP_HASH:
+	case SYNC_BUF_FLUSH_STATE:
 
 		/* We can have multiple mutexes of this type therefore we
 		can only check whether the greater than condition holds. */
@@ -647,22 +651,10 @@
 		break;
 
 	case SYNC_BUF_PAGE_HASH:
-
-		/* Multiple page_hash locks are only allowed during
-		buf_validate and that is where buf_pool mutex is already
-		held. */
-
-		/* Fall through */
-
 	case SYNC_BUF_BLOCK:
 
-		/* Either the thread must own the (buffer pool) buf_pool->mutex
-		or it is allowed to latch only ONE of (buffer block)
-		block->mutex or buf_pool->zip_mutex. */
-
 		if (less(latches, latch->m_level) != 0) {
 			basic_check(latches, latch->m_level - 1);
-			ut_a(find(latches, SYNC_BUF_POOL) != 0);
 		}
 		break;
 
@@ -886,9 +878,25 @@
 		  buffer_block_mutex_key);
 #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
 
-	LATCH_ADD(SrvLatches, "buf_pool",
-		  SYNC_BUF_POOL,
-		  buf_pool_mutex_key);
+	LATCH_ADD(SrvLatches, "buf_pool_lru_list",
+		  SYNC_BUF_LRU_LIST,
+		  buf_pool_LRU_list_mutex_key);
+
+	LATCH_ADD(SrvLatches, "buf_pool_free_list",
+		  SYNC_BUF_FREE_LIST,
+		  buf_pool_free_list_mutex_key);
+
+	LATCH_ADD(SrvLatches, "buf_pool_zip_free",
+		  SYNC_BUF_ZIP_FREE,
+		  buf_pool_zip_free_mutex_key);
+
+	LATCH_ADD(SrvLatches, "buf_pool_zip_hash",
+		  SYNC_BUF_ZIP_HASH,
+		  buf_pool_zip_free_mutex_key);
+
+	LATCH_ADD(SrvLatches, "buf_pool_flush_state",
+		  SYNC_BUF_FLUSH_STATE,
+		  buf_pool_flush_state_mutex_key);
 
 	LATCH_ADD(SrvLatches, "buf_pool_zip",
 		  SYNC_BUF_BLOCK,

=== modified file 'storage/innobase/sync/sync0sync.cc'
--- storage/innobase/sync/sync0sync.cc	2014-07-10 10:46:02 +0000
+++ storage/innobase/sync/sync0sync.cc	2015-01-16 19:30:41 +0000
@@ -38,7 +38,11 @@
 /* Key to register autoinc_mutex with performance schema */
 mysql_pfs_key_t	autoinc_mutex_key;
 mysql_pfs_key_t	buffer_block_mutex_key;
-mysql_pfs_key_t	buf_pool_mutex_key;
+mysql_pfs_key_t	buf_pool_flush_state_mutex_key;
+mysql_pfs_key_t	buf_pool_LRU_list_mutex_key;
+mysql_pfs_key_t	buf_pool_free_list_mutex_key;
+mysql_pfs_key_t	buf_pool_zip_free_mutex_key;
+mysql_pfs_key_t	buf_pool_zip_hash_mutex_key;
 mysql_pfs_key_t	buf_pool_zip_mutex_key;
 mysql_pfs_key_t	cache_last_read_mutex_key;
 mysql_pfs_key_t	dict_foreign_err_mutex_key;