Bug #116305 row_search_mvcc process wrong page of small buffer pool
Submitted: 6 Oct 11:05 Modified: 9 Oct 13:41
Reporter: zkong kong Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: InnoDB storage engine Severity:S2 (Serious)
Version:9.0 OS:Any
Assigned to: CPU Architecture:Any

[6 Oct 11:05] zkong kong
Description:
In our test instance, the buffer pool is set small to deploy more instances to test and got crash below:

#0  0x00007f9e91499aa1 in pthread_kill () from /lib64/libpthread.so.0
#1  0x000000000103b3e5 in handle_fatal_signal (sig=6) at /data/landun/workspace/MySQL-8.0/sql/signal_handler.cc:171
#2  <signal handler called>
#3  0x00007f9e90090387 in raise () from /lib64/libc.so.6
#4  0x00007f9e90091a78 in abort () from /lib64/libc.so.6
#5  0x0000000000cc3b60 in ut_dbg_assertion_failed (expr=<optimized out>, file=0x370e220 "/data/landun/workspace/MySQL-8.0/storage/innobase/btr/btr0pcur.cc", line=332) at /data/landun/workspace/MySQL-8.0/storage/innobase/ut/ut0dbg.cc:99
#6  0x0000000002781972 in btr_pcur_t::move_to_next_page (this=0x7f9dc458c308, mtr=0x7f9d9bdcf120) at /data/landun/workspace/MySQL-8.0/storage/innobase/btr/btr0pcur.cc:332
#7  0x000000000267a268 in move_to_next (mtr=<optimized out>, this=<optimized out>) at /data/landun/workspace/MySQL-8.0/storage/innobase/include/btr0pcur.h:973
#8  btr_pcur_t::move_to_next (this=<optimized out>, mtr=<optimized out>) at /data/landun/workspace/MySQL-8.0/storage/innobase/include/btr0pcur.h:962
#9  0x0000000002686cb5 in row_search_mvcc(unsigned char*, page_cur_mode_t, row_prebuilt_t*, unsigned long, unsigned long) () at /data/landun/workspace/MySQL-8.0/storage/innobase/row/row0sel.cc:6646
#10 0x00000000024b2131 in ha_innobase::general_fetch (this=0x7f9dc4530828, buf=0x7f9dc454fe28 "\004\340\004\071\071\071\071", direction=1, match_mode=0) at /data/landun/workspace/MySQL-8.0/storage/innobase/handler/ha_innodb.cc:10789
#11 0x000000000118ab1c in handler::ha_rnd_next (this=0x7f9dc4530828, buf=0x7f9dc454fe28 "\004\340\004\071\071\071\071") at /data/landun/workspace/MySQL-8.0/sql/handler.cc:3217
#12 0x0000000000da02d0 in TableScanIterator::Read (this=0x7f9d7f988868) at /data/landun/workspace/MySQL-8.0/sql/row_iterator.h:224
#13 0x0000000000f9f92a in Query_expression::ExecuteIteratorQuery(THD*) () at /opt/rh/devtoolset-10/root/usr/include/c++/10/bits/unique_ptr.h:421
#14 0x0000000000f9fc9f in Query_expression::execute(THD*) () at /data/landun/workspace/MySQL-8.0/sql/sql_union.cc:1358
#15 0x0000000000f04d7b in Sql_cmd_dml::execute_inner(THD*) () at /data/landun/workspace/MySQL-8.0/sql/sql_select.cc:841
#16 0x0000000000f103aa in Sql_cmd_dml::execute(THD*) () at /data/landun/workspace/MySQL-8.0/sql/sql_select.cc:610
#17 0x0000000000e9a6ed in mysql_execute_command(THD*, bool) () at /data/landun/workspace/MySQL-8.0/sql/sql_parse.cc:4777
#18 0x0000000000e9fed5 in dispatch_sql_command (thd=thd@entry=0x7f9e49448f40, parser_state=parser_state@entry=0x7f9d9bdd0bb0, log_statement=log_statement@entry=false) at /data/landun/workspace/MySQL-8.0/sql/sql_parse.cc:5586
#19 0x0000000000ea1389 in dispatch_command(THD*, COM_DATA const*, enum_server_command) () at /data/landun/workspace/MySQL-8.0/sql/sql_parse.cc:2126
#20 0x0000000000ea3324 in do_command (thd=thd@entry=0x7f9e49448f40) at /data/landun/workspace/MySQL-8.0/sql/sql_parse.cc:1398
#21 0x0000000000fec30b in threadpool_process_request(THD*) () at /data/landun/workspace/MySQL-8.0/sql/threadpool_common.cc:261
#22 0x000000000100d9b7 in handle_event (connection=0x7f9e4923b3a0) at /data/landun/workspace/MySQL-8.0/sql/threadpool_unix.cc:1658
#23 worker_main (param=param@entry=0x5095680 <all_groups+11776>) at /data/landun/workspace/MySQL-8.0/sql/threadpool_unix.cc:1751
#24 0x00000000029c83ae in pfs_spawn_thread (arg=0x7f9dcb2ebca0) at /data/landun/workspace/MySQL-8.0/storage/perfschema/pfs.cc:2898
#25 0x00007f9e91494ea5 in start_thread () from /lib64/libpthread.so.0
#26 0x00007f9e90158b0d in clone () from /lib64/libc.so.6

after some investigation:
we found there some risk that may read wrong page when pcur restore
            session 1                                                    session 2                                             session 3
 pcur restore
 run_with_hint
 Block_hint::buffer_fix_block_if_still_valid
                                                             buf_LRU_free_page
                                                             rw_lock_x_lock(page_hash)
                                                             buf_LRU_block_remove_hashed
                                                               set state BUF_BLOCK_REMOVE_HASH
                                                               rw_lock_x_unlock(page_hash);
                                                             add to free list
 rw_lock_s_lock(page_hash)
 compare page_id
                                                                                                                  buf_read_page_low                                                                                           
                                                                                                                     buf_page_init_for_read
                                                                                                                        set page id
                                                                                                                        set state BUF_BLOCK_FIEL_PAGE
 compare BUF_BLOCK_FIEL_PAGE
   buf_fix // wrong page
 rw_lock_unlock(page_hash)
 

How to repeat:
1. prepare the data
1.1./sysbench /data1/ragnar/tools/sysbench/share/sysbench/oltp_common.lua  --mysql-host=9.30.17.135 --mysql-port=8309 --mysql-db=sysbench --tables=2 --table-size=100000 --mysql-user='xxxx' --mysql-password='xxx' --threads=128 --report-interval=1  prepare

1.2 query the space id of the table sbtest1:

mysql> select space from information_schema.innodb_tablespaces where name='sysbench/sbtest1';
+-------+
| space |
+-------+
|   264 |
+-------+
1 row in set (0.00 sec)

2. add the patch to crash the server if proccess wrong page:
NOTE:the space() is the space id of the table sbtest1
2.1 wait the the block to be freed of pcur stored
diff --git a/storage/innobase/buf/buf0block_hint.cc b/storage/innobase/buf/buf0block_hint.cc
index 4a2500b7409..f61df06b426 100644
--- a/storage/innobase/buf/buf0block_hint.cc
+++ b/storage/innobase/buf/buf0block_hint.cc
@@ -70,21 +70,27 @@ void Block_hint::buffer_fix_block_if_still_valid() {
   if (m_block != nullptr) {
     const buf_pool_t *const pool = buf_pool_get(m_page_id);
     rw_lock_t *latch = buf_page_hash_lock_get(pool, m_page_id);
+    if (m_page_id.space()==264 && m_page_id.page_no()==6) sleep(20);
     rw_lock_s_lock(latch, UT_LOCATION_HERE);
     /* If not own buf_pool_mutex, page_hash can be changed. */
     latch = buf_page_hash_lock_s_confirm(latch, pool, m_page_id);
     if (buf_is_block_in_instance(pool, m_block) &&
-        m_page_id == m_block->page.id &&
-        buf_block_get_state(m_block) == BUF_BLOCK_FILE_PAGE) {
-      buf_block_buf_fix_inc(m_block, UT_LOCATION_HERE);
+        m_page_id == m_block->page.id) {
+        if (m_page_id.space()==264 && m_page_id.page_no()==6) sleep(20);
+        if (buf_block_get_state(m_block) == BUF_BLOCK_FILE_PAGE) {
+          buf_block_buf_fix_inc(m_block, UT_LOCATION_HERE);
+        } else {
+          clear();
+        }
     } else {
       clear();
     }
     rw_lock_s_unlock(latch);
   }
 }
-void Block_hint::buffer_unfix_block_if_needed(buf_block_t *block) {
+void Block_hint::buffer_unfix_block_if_needed(buf_block_t *block, page_id_t page_id) {
   if (block != nullptr) {
+    ut_a(page_id == block->page.id); // block must be the fixed one
     buf_block_buf_fix_dec(block);
   }
 }

2.2 wait the run_with_hint check failed
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 3fe3a8654e4..6d8c74b7e28 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -2269,7 +2269,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, bool zip,
       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
       rw_lock_x_unlock(hash_lock);
       mutex_exit(&((buf_block_t *)bpage)->mutex);
-
+      if (bpage->id.space()==264 && bpage->id.page_no()==6) sleep(20);
       if (zip && bpage->zip.data) {
         /* Free the compressed page. */
         void *data = bpage->zip.data;

2.3 check and crash the server when page is wrong
diff --git a/storage/innobase/include/buf0block_hint.h b/storage/innobase/include/buf0block_hint.h
index 90639b5c845..9f66f145b8b 100644
--- a/storage/innobase/include/buf0block_hint.h
+++ b/storage/innobase/include/buf0block_hint.h
@@ -52,7 +52,7 @@ class Block_hint {
     remember which block we need to unfix */
     buf_block_t *buffer_fixed_block = m_block;
     auto res = f(buffer_fixed_block);
-    buffer_unfix_block_if_needed(buffer_fixed_block);
+    buffer_unfix_block_if_needed(buffer_fixed_block, m_page_id);
     return res;
   }
 
@@ -69,7 +69,7 @@ class Block_hint {
 
   /** A helper function which decrements block->buf_fix_count if it's non-null
   @param[in]  block   A pointer to a block or nullptr */
-  static void buffer_unfix_block_if_needed(buf_block_t *block);
+  static void buffer_unfix_block_if_needed(buf_block_t *block, page_id_t page_id);
 };
 
 }  // namespace buf

3. run mysqld with buffer pool size 5M
mysql> show variables like '%buffer_pool_size%';
+-------------------------+---------+
| Variable_name           | Value   |
+-------------------------+---------+
| innodb_buffer_pool_size | 5242880 |
+-------------------------+---------+
1 row in set (0.01 sec)

3.1 session 1 
mysql> select * from sbtest1;
ERROR 2013 (HY000): Lost connection to MySQL server during query

3.2 session 2 
mysql> select * from sbtest2; // this will free the session 1's page and crash

Suggested fix:
set the page_id to MAX_INT in buf_LRU_block_remove_hashed before release the page_hash
[7 Oct 8:44] zhai weixiang
bug#116303 is duplicate of this one
[7 Oct 9:05] zhai weixiang
The scenario descrpition can refer to that in bug#116303. This report has a mini mistake that comparing page id should be done before reseting page id to UINX_MAX and adding to free list.
[7 Oct 9:51] MySQL Verification Team
Hi Mr. kong,

Thank you very much for your bug report.

Before we proceed, we do need to clear some issues regarding this bug report.

First of all, is it related to the single instance of the MySQL Server running or to several connected instances.

Second, we accept bug reports, but we test them on our official binaries, which are based on our official and unchanged source code.

Hence, we have to know if we can repeat the problem with our official binaries.

Thank you very much , in advance ........
[7 Oct 11:38] zkong kong
Hi:
First it's single instance.
Second without patch, it's difficult to reproduction the problem stably. Our stress test encounter once several days.
[7 Oct 11:50] MySQL Verification Team
Hi Mr. kong, zhai,

We have got a request from a Development regarding this bug report.

They wish if you could use the DBUG_SYNC facility to develop mtr test to demonstrate the problem.   It would be easier to reason with that way and easier for us to fix.

Thank you very much in advance.
[7 Oct 11:52] MySQL Verification Team
Hi All,

Also, as soon as we test that MTR test case, we shall verify a bug.

Thanks in advance.
[7 Oct 12:07] MySQL Verification Team
Hi,

Here is an example on how to use DEBUG_SYNC:

--- a/storage/innobase/ddl/ddl0par-scan.cc
+++ b/storage/innobase/ddl/ddl0par-scan.cc
@@ -237,6 +237,7 @@ dberr_t Parallel_cursor::scan(Builders &builders) noexcept {
             thread_ctx->get_state() != Parallel_reader::State::THREAD) {
           thread_ctx->savepoint();
           latches_released = true;
+          DEBUG_SYNC_C("ddl_bulk_inserter_latches_released");
         }
         return DB_SUCCESS;
       });

Build debug version of server and run the following test case for MTR framework:

CREATE TABLE t1 (pk CHAR(5) PRIMARY KEY);
INSERT INTO t1 VALUES ('aaaaa'), ('bbbbb'), ('bbbcc'), ('ccccc'), ('ddddd'), ('eeeee');

set global innodb_purge_stop_now=ON;
DELETE FROM t1 WHERE pk = 'bbbcc';

SET DEBUG='+d,ddl_buf_add_two';
SET DEBUG_SYNC='ddl_bulk_inserter_latches_released SIGNAL latches_released WAIT_FOR go';
--send ALTER TABLE t1 ENGINE=InnoDB, ALGORITHM=INPLACE

--connection default
SET DEBUG_SYNC='now WAIT_FOR latches_released';
SET GLOBAL innodb_purge_run_now=ON;
--source include/wait_innodb_all_purged.inc
SET DEBUG_SYNC='now SIGNAL go';

--connection con1
--reap

SET DEBUG='-d,ddl_buf_add_two';
[7 Oct 12:09] MySQL Verification Team
HI,

Here is another example:

```cpp
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -12791,6 +12791,8 @@ static bool mysql_inplace_alter_table(
     DEBUG_SYNC(thd, "alter_table_inplace_after_lock_downgrade");
     THD_STAGE_INFO(thd, stage_alter_inplace);

+    DBUG_EXECUTE_IF("alter_table_inplace_after_lock_sleep", sleep(100));
+
     if (table->file->ha_inplace_alter_table(altered_table, ha_alter_info,
                                             table_def, altered_table_def)) {
       goto rollback;
```

Step 2: 

```sql
set global innodb_print_ddl_logs = ON;

We hope that this will be helpful .......
[7 Oct 12:27] MySQL Verification Team
Hi,

You can also find many examples in mysql-test/t/ directory with DEBUG_SYNC.

Our fist comment had a mistake. It is not DBUG_SYNC, but DEBUG_SYNC. There are also other commands, like DBUG_EXECUTE_IF() and others that you can find there .......
[7 Oct 14:55] zkong kong
the patch and the mtr

Attachment: bug116305.tar.gz (application/x-gzip, text), 1.25 KiB.

[7 Oct 15:43] MySQL Verification Team
Hi Mr. kong,

Sorry, but we fail to see in your attached file any DBUG* or DEBUG* commands in your test case.

That is what we explicitly asked for.

Many thanks in advance .......
[7 Oct 16:05] MySQL Verification Team
Hi Mr. kong,

Can you also send us a result file ???

Our test is simply always failing ........
[7 Oct 23:20] zkong kong
Hi:
Without fix the mtr is always fail at the assert at Block_hint::buffer_fix_block_if_still_valid

  +          ut_ad(m_page_id == m_block->page.id);

pcur expects restore the block which was stored and the assert above must be satisfied.
[8 Oct 11:13] zkong kong
Hi:
 Use patch of zhai weixiang can solve the assert fail, Is the fix ok, please review it, thx very much!
 The attachment above has the patch and the result file of the mtr.
[9 Oct 9:52] MySQL Verification Team
Hi Mr. kong,

Your patch for the file buf0block_hint.cc contains some changes at our source code.

Are those there for repeating the test case or for fixing the bug.

Many thanks in advance.
[9 Oct 11:53] zkong kong
Hi:
 The problem is block_hint may buffer_fix the block which is not expected, the patch has the the assert to repeat the problem and the fix, please confirm the changes:

diff --git a/storage/innobase/buf/buf0block_hint.cc b/storage/innobase/buf/buf0block_hint.cc
index 4a2500b7409..2b764a6e63f 100644
--- a/storage/innobase/buf/buf0block_hint.cc
+++ b/storage/innobase/buf/buf0block_hint.cc
@@ -70,13 +70,25 @@ void Block_hint::buffer_fix_block_if_still_valid() {
   if (m_block != nullptr) {
     const buf_pool_t *const pool = buf_pool_get(m_page_id);
     rw_lock_t *latch = buf_page_hash_lock_get(pool, m_page_id);
+    DBUG_EXECUTE_IF("bug116305", { if (m_page_id.space() == 6 && m_page_id.page_no() == 4) sleep(10); }); // ... repeating test (see mtr)
     rw_lock_s_lock(latch, UT_LOCATION_HERE);
     /* If not own buf_pool_mutex, page_hash can be changed. */
     latch = buf_page_hash_lock_s_confirm(latch, pool, m_page_id);
-    if (buf_is_block_in_instance(pool, m_block) &&
-        m_page_id == m_block->page.id &&
-        buf_block_get_state(m_block) == BUF_BLOCK_FILE_PAGE) {
-      buf_block_buf_fix_inc(m_block, UT_LOCATION_HERE);
+    if (buf_is_block_in_instance(pool, m_block)) {
+      buf_block_t *ptr = m_block; // ... fix the bug add a lock to check page_id and state atomicly
+      buf_page_mutex_enter(m_block); // ... fix the bug
+      if (m_page_id == m_block->page.id) {
+        DBUG_EXECUTE_IF("bug116305", { if (m_page_id.space() == 6 && m_page_id.page_no() == 4) sleep(10); }); // ... repeating test (see mtr)
+        if (buf_block_get_state(m_block) == BUF_BLOCK_FILE_PAGE) {
+          ut_ad(m_page_id == m_block->page.id); // ... repeating test (see mtr), if the block not exprected process it will cause error
+          buf_block_buf_fix_inc(m_block, UT_LOCATION_HERE);
+        } else {
+          clear();
+        }
+      } else {
+        clear();
+      }
+      buf_page_mutex_exit(ptr); // ... fix the bug
     } else {
       clear();
     }
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 3fe3a8654e4..03aedf9a0ad 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -2269,7 +2269,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, bool zip,
       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
       rw_lock_x_unlock(hash_lock);
       mutex_exit(&((buf_block_t *)bpage)->mutex);
-
+      DBUG_EXECUTE_IF("bug116305", { if (bpage->id.space()==6 && bpage->id.page_no()==4) sleep(10); }); // ... repeating test (see mtr)
       if (zip && bpage->zip.data) {
         /* Free the compressed page. */
         void *data = bpage->zip.data;
[9 Oct 12:03] MySQL Verification Team
HI Mr. kong,

We are slightly puzzled here.

You claim that file  bug116305.tar.gz contains now both the code for the test case only.

However, you also claim that it contains a fix for the bug.

Can you send us just a patch that is necessary for the test.

Thanks in advance.
[9 Oct 12:15] zkong kong
Hi:
  Because the assert if without fix it crash stably (run the mtr), if only the test code, please double check it again, thx very much!

diff --git a/storage/innobase/buf/buf0block_hint.cc b/storage/innobase/buf/buf0block_hint.cc
index 4a2500b7409..66e22a9dd01 100644
--- a/storage/innobase/buf/buf0block_hint.cc
+++ b/storage/innobase/buf/buf0block_hint.cc
@@ -70,13 +70,19 @@ void Block_hint::buffer_fix_block_if_still_valid() {
   if (m_block != nullptr) {
     const buf_pool_t *const pool = buf_pool_get(m_page_id);
     rw_lock_t *latch = buf_page_hash_lock_get(pool, m_page_id);
+    DBUG_EXECUTE_IF("bug116305", { if (m_page_id.space() == 6 && m_page_id.page_no() == 4) sleep(10); }); // wait other session free the hint block
     rw_lock_s_lock(latch, UT_LOCATION_HERE);
     /* If not own buf_pool_mutex, page_hash can be changed. */
     latch = buf_page_hash_lock_s_confirm(latch, pool, m_page_id);
     if (buf_is_block_in_instance(pool, m_block) &&
-        m_page_id == m_block->page.id &&
-        buf_block_get_state(m_block) == BUF_BLOCK_FILE_PAGE) {
-      buf_block_buf_fix_inc(m_block, UT_LOCATION_HERE);
+        m_page_id == m_block->page.id) {
+        DBUG_EXECUTE_IF("bug116305", { if (m_page_id.space() == 6 && m_page_id.page_no() == 4) sleep(10); }); // if check page_id and state not atamiclly the page of the block may freed
+        if (buf_block_get_state(m_block) == BUF_BLOCK_FILE_PAGE) {
+          ut_ad(m_page_id == m_block->page.id); // with mtr case it must be failed here and crash
+          buf_block_buf_fix_inc(m_block, UT_LOCATION_HERE);
+        } else {
+          clear();
+        }
     } else {
       clear();
     }
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 3fe3a8654e4..03aedf9a0ad 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -2269,7 +2269,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, bool zip,
       ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
       rw_lock_x_unlock(hash_lock);
       mutex_exit(&((buf_block_t *)bpage)->mutex);
-
+      DBUG_EXECUTE_IF("bug116305", { if (bpage->id.space()==6 && bpage->id.page_no()==4) sleep(10); }); // free the block_hint block and let it's check logic failed
       if (zip && bpage->zip.data) {
         /* Free the compressed page. */
         void *data = bpage->zip.data;
[9 Oct 13:41] MySQL Verification Team
Hi Mr. kong,

Thank you for your bug report.

After applying your patch,  building a debug 9.0.1 binary and running  a test, we have repeated the problem.

This is now a verified bug report for version 9.0.

Thanks again.