commit b327a55074e5b2703dd4c1195bd5aa9d4b3899f3
Author: zhongbei.yk <zhongbei.yk@alibaba-inc.com>
Date:   Tue Sep 24 19:06:19 2024 +0800

    [Bugfix] Crash recovery is slow due to double write
    
    Description
    ===========
    MySQL bug#116209.
    MySQL bug#116239.
    
    If a large number of redo logs with type MLOG_FILE_EXTEND need to be
    applied to a space, and there are a large number of pages
    corresponding to the space in double-write, crash
    recovery will be slow even if there is no half-write problem.
    
    Analysis
    ========
    Every time the redo of MLOG_FILE_EXTEND is applied, a double-write of
    this space is restored, even if no half-write problem occurs. For
    example, if 1000 redo logs of MLOG_FILE_EXTEND need to be applied, it
    will call dblwr_recover_page() for 1000 times. This causes a lot of disk
    IO and increases the instance recovery time. Only the first time it is
    necessary to do double write recovery, the rest is not necessary.
    
    Fix
    ===
    The main reason is that the dblwr_recover_page() function returns false
    when no page corruption occurs, resulting in repeated
    dblwr_recover_page() function calls.
    
    dblwr_recover_page() should return true when no page corruption occurs
    to avoid repeated dblwr_recover_page() function calls.

diff --git a/mysql-test/include/assert_grep.inc b/mysql-test/include/assert_grep.inc
index c4fe74afb3b..198eb52f24a 100644
--- a/mysql-test/include/assert_grep.inc
+++ b/mysql-test/include/assert_grep.inc
@@ -73,17 +73,17 @@ if ($assert_match == '')
   }
 }
 
---let $_ag_assert_count = 0
+--let $_ag_arg_count = 0
 if ($assert_match != '') {
-  --inc $_ag_assert_count
+  --inc $_ag_arg_count
 }
 if ($assert_count != '') {
-  --inc $_ag_assert_count
+  --inc $_ag_arg_count
 }
 if ($assert_count_condition != '') {
-  --inc $_ag_assert_count
+  --inc $_ag_arg_count
 }
-if ($_ag_assert_count != 1) {
+if ($_ag_arg_count != 1) {
   --echo assert_text='$assert_text' assert_match='$assert_match' assert_count='$assert_count' assert_count_condition='$assert_count_condition'
   --die !!!ERROR IN TEST: you must set exactly one of $assert_match, $assert_count or $assert_count_condition
 }
diff --git a/mysql-test/suite/innodb/r/crash_recovery_is_slow_due_to_dblwr.result b/mysql-test/suite/innodb/r/crash_recovery_is_slow_due_to_dblwr.result
new file mode 100644
index 00000000000..dcbc8197fa6
--- /dev/null
+++ b/mysql-test/suite/innodb/r/crash_recovery_is_slow_due_to_dblwr.result
@@ -0,0 +1,33 @@
+# 1. Prepare
+CREATE TABLE articles (
+id INT AUTO_INCREMENT PRIMARY KEY,
+title VARCHAR(255) NOT NULL,
+body TEXT NOT NULL
+) ENGINE=InnoDB;
+CREATE PROCEDURE insert_into_table(IN num INTEGER)
+BEGIN
+declare x INT;
+set x = 1;
+while x < num do
+start transaction;
+insert into articles  (`title`, `body`) VALUES (repeat('rds', 60), repeat('mysql', 1200));
+commit;
+set x = x + 1;
+end  while;
+end$$
+SET GLOBAL innodb_log_checkpoint_now = 1;
+SET GLOBAL innodb_checkpoint_disabled = ON;
+# 2. Generate 30 MLOG_FILE_EXTEND redo logs and crash mysqld
+set global debug='+d,ib_posix_fallocate_fail_disk';
+# trigger 30 space-extend, then crash the instance
+CALL insert_into_table(10000);
+ERROR HY000: Lost connection to MySQL server during query
+# 3. Do crash recovery and print some messages in error log.
+# 4. Check error log, the table articles should only do double-write once
+#    Before this bugfix, you will find multiple entries like 'do io for dblwr space: 6 page: 0' in the error log,
+#    while the count is equal to 'the number of MLOG_FILE_EXTEND redo logs' + 1.
+#    After this bugfix, the count is 1.
+include/assert_grep.inc [the table articles should only do double-write once]
+# 5. Cleanup
+DROP TABLE articles;
+DROP PROCEDURE insert_into_table;
diff --git a/mysql-test/suite/innodb/t/crash_recovery_is_slow_due_to_dblwr-master.opt b/mysql-test/suite/innodb/t/crash_recovery_is_slow_due_to_dblwr-master.opt
new file mode 100644
index 00000000000..73ddc76e174
--- /dev/null
+++ b/mysql-test/suite/innodb/t/crash_recovery_is_slow_due_to_dblwr-master.opt
@@ -0,0 +1 @@
+--debug=+d,ib_skip_dblwr_for_space_id_not_6
diff --git a/mysql-test/suite/innodb/t/crash_recovery_is_slow_due_to_dblwr.test b/mysql-test/suite/innodb/t/crash_recovery_is_slow_due_to_dblwr.test
new file mode 100644
index 00000000000..dbb38bbfbac
--- /dev/null
+++ b/mysql-test/suite/innodb/t/crash_recovery_is_slow_due_to_dblwr.test
@@ -0,0 +1,60 @@
+# Description:
+# ============
+# MySQL bug#116209 Crash recovery is slow because of repeated scans of double-wrtie
+# This test case is used to reproduce the bug.
+# Running this test before this bugfix, you will find multiple entries like 'do io for dblwr space: 6 page: 0' in the error log
+# After this bugfix, you will find only one entry like 'do io for dblwr space: 6 page: 0' in the error log.
+#
+# While the table with space_id = 8 is the table articles we created in the test.
+
+--source include/have_debug.inc
+
+--echo # 1. Prepare
+
+CREATE TABLE articles (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    title VARCHAR(255) NOT NULL,
+    body TEXT NOT NULL
+) ENGINE=InnoDB;
+
+delimiter $$;
+CREATE PROCEDURE insert_into_table(IN num INTEGER)
+     BEGIN
+     declare x INT;
+     set x = 1;
+     while x < num do
+          start transaction;
+          insert into articles  (`title`, `body`) VALUES (repeat('rds', 60), repeat('mysql', 1200));
+          commit;
+        set x = x + 1;
+     end  while;
+end$$
+delimiter ;$$
+
+SET GLOBAL innodb_log_checkpoint_now = 1;
+SET GLOBAL innodb_checkpoint_disabled = ON;
+--echo # 2. Generate 30 MLOG_FILE_EXTEND redo logs and crash mysqld
+set global debug='+d,ib_posix_fallocate_fail_disk';
+
+--echo # trigger 30 space-extend, then crash the instance
+--source include/expect_crash.inc
+--error 2013
+CALL insert_into_table(10000);
+
+--echo # 3. Do crash recovery and print some messages in error log.
+--let $restart_parameters= restart: --debug=+d,print_info_in_error_log
+--source include/start_mysqld_no_echo.inc
+
+--echo # 4. Check error log, the table articles should only do double-write once
+--echo #    Before this bugfix, you will find multiple entries like 'do io for dblwr space: 6 page: 0' in the error log,
+--echo #    while the count is equal to 'the number of MLOG_FILE_EXTEND redo logs' + 1.
+--echo #    After this bugfix, the count is 1.
+--let $assert_file = $MYSQLTEST_VARDIR/log/mysqld.1.err
+--let $assert_text = the table articles should only do double-write once
+--let $assert_select= do io for dblwr space: 6 page: 0
+--let $assert_count = 1
+--source include/assert_grep.inc
+
+--echo # 5. Cleanup
+DROP TABLE articles;
+DROP PROCEDURE insert_into_table;
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 62695d5e433..84dd0990259 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -2495,9 +2495,18 @@ dberr_t dblwr::write(buf_flush_t flush_type, buf_page_t *bpage,
     return DB_SUCCESS;
   }
 
+  /* To repeat the bug#116209, we skip the double-write of the space that id is
+  not 6. While the table with space_id = 6 is the table articles we created in
+  the test.
+  This is to avoid eviting out pages that space id is 6 in double-write when
+  other pages are written to double-write. */
+  DBUG_EXECUTE_IF("ib_skip_dblwr_for_space_id_not_6",
+                  if (space_id != 6) goto skip_double_write;);
+
   if (srv_read_only_mode || fsp_is_system_temporary(space_id) ||
       !dblwr::is_enabled() || Double_write::s_instances == nullptr ||
       mtr_t::s_logging.dblwr_disabled()) {
+  skip_double_write:
     /* Skip the double-write buffer since it is not needed. Temporary
     tablespaces are never recovered, therefore we don't care about
     torn writes. */
@@ -3041,6 +3050,11 @@ bool dblwr::recv::Pages::dblwr_recover_page(page_no_t dblwr_page_no,
 
   request.dblwr();
 
+  /* Print the page that double-wirte did. */
+  DBUG_EXECUTE_IF("print_info_in_error_log",
+                  ib::info() << "do io for dblwr space: " << space->id
+                             << " page: " << page_no;);
+
   /* Read in the page from the data file to compare. */
   auto err = fil_io(request, true, page_id, page_size, 0, page_size.physical(),
                     buffer.begin(), nullptr);
@@ -3095,7 +3109,11 @@ bool dblwr::recv::Pages::dblwr_recover_page(page_no_t dblwr_page_no,
       available in dblwr buffer. */
     } else {
       /* Database page is fine.  No need to restore from dblwr. */
-      return false;
+
+      /* fix MySQL bug#116209.
+      Return true to mark that the page has been recovered and does not need to
+      be recovered again */
+      return true;
     }
   }
 
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 1398d14552f..9a791fad7e1 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -6676,6 +6676,13 @@ bool Fil_shard::space_extend(fil_space_t *space, page_no_t size) {
 
     DBUG_EXECUTE_IF("ib_posix_fallocate_fail_einval", ret = EINVAL;);
 
+#ifdef UNIV_DEBUG
+    static int j = 0;
+    DBUG_EXECUTE_IF("ib_posix_fallocate_fail_disk", j++;);
+    /* Generate 30 MLOG_FILE_EXTEND redo logs and crash mysqld. */
+    if (j == 30) DBUG_SUICIDE();
+#endif
+
     if (ret != 0) {
       /* We already pass the valid offset and len in, if EINVAL
       is returned, it could only mean that the file system doesn't
@@ -10398,6 +10405,12 @@ byte *fil_tablespace_redo_rename(byte *ptr, const byte *end,
 byte *fil_tablespace_redo_extend(byte *ptr, const byte *end,
                                  const page_id_t &page_id, ulint parsed_bytes,
                                  bool parse_only) {
+  /* Print the page that redo_extend did. */
+  DBUG_EXECUTE_IF("print_info_in_error_log",
+                  ib::info()
+                      << "apply MLOG_FILE_EXTEND space " << page_id.space()
+                      << " page " << page_id.page_no(););
+
   ut_a(page_id.page_no() == 0);
 
   /* We never recreate the system tablespace. */