From 783aab35b515b380c3999850a56571007caef0b1 Mon Sep 17 00:00:00 2001
From: Varun Nagaraju <varun.nagaraju@percona.com>
Date: Thu, 26 Dec 2024 01:39:25 +0530
Subject: [PATCH] PS-9391 Fixes replication break error because of HASH_SCAN

https://perconadev.atlassian.net/browse/PS-9391

Problem
=======
When a replica's slave_rows_search_algorithms is set to HASH_SCAN, the
replication may break with HA_ERR_KEY_NOT_FOUND.

Analysis
========
When a replica's slave_rows_search_algorithms is set to HASH_SCAN,
it prepares a unique key list for all the rows in a particular Row_event.
The same unique key list will later be used to retrieve all tuples associated
to each key in the list from storage engine. In the case of multiple updates
targeted at the same row like how it is shown in the testcase, it may happen
that this unique key list filled with entries which don't exist yet in the table.
This is a problem when there is an intermediate update which changes the value
of the index column to a lesser value than the original entry and that changed
value is used in another update as shown in the second part of the testcase.
It is an issue because the unique key list is a std::set which internally
sorts it's entries. When this sorting happens, the first entry of the list
could potentially be a value which doesn't exist in the table and the when
it is searched in next_record_scan() method, it fails returning
HA_ERR_KEY_NOT_FOUND error.

Solution
========
Instead of using std::set to store the distinct keys, a combination of
unordered_set and a list is used to preserve the original order of
updates and avoid duplicates at the same time which prevents the side
effects of sorting.
---
 ...pdate_of_same_row_sort_before_apply.result | 55 ++++++++++++
 ..._update_of_same_row_sort_before_apply.test | 84 +++++++++++++++++++
 sql/log_event.cc                              | 28 +++----
 sql/log_event.h                               | 28 ++++++-
 4 files changed, 177 insertions(+), 18 deletions(-)
 create mode 100644 mysql-test/suite/rpl/r/rpl_row_multi_update_of_same_row_sort_before_apply.result
 create mode 100644 mysql-test/suite/rpl/t/rpl_row_multi_update_of_same_row_sort_before_apply.test

diff --git a/mysql-test/suite/rpl/r/rpl_row_multi_update_of_same_row_sort_before_apply.result b/mysql-test/suite/rpl/r/rpl_row_multi_update_of_same_row_sort_before_apply.result
new file mode 100644
index 00000000000..d719e95124b
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_row_multi_update_of_same_row_sort_before_apply.result
@@ -0,0 +1,55 @@
+include/master-slave.inc
+Warnings:
+Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note	####	Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information.
+[connection master]
+[connection slave]
+set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
+Warnings:
+Warning	1287	'@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
+set @@global.slave_rows_search_algorithms= 'HASH_SCAN';
+Warnings:
+Warning	1287	'@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
+[connection master]
+CREATE TABLE t1 (i INT, INDEX t1_i(i));
+CREATE FUNCTION f1 () RETURNS INT BEGIN
+UPDATE t1 SET i = 11 WHERE i = 10;
+UPDATE t1 SET i = 12 WHERE i = 11;
+RETURN 0;
+END|
+include/sync_slave_sql_with_master.inc
+[connection master]
+INSERT INTO t1 VALUES (10);
+SELECT f1();
+f1()
+0
+include/sync_slave_sql_with_master.inc
+include/assert.inc ['There is only one row in table t1']
+[connection master]
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+CREATE FUNCTION f2 () RETURNS INT BEGIN
+UPDATE t1 SET i = 11 WHERE i = 12;
+UPDATE t1 SET i = 13 WHERE i = 11;
+RETURN 0;
+END|
+include/sync_slave_sql_with_master.inc
+[connection master]
+SELECT f2();
+f2()
+0
+include/sync_slave_sql_with_master.inc
+include/assert.inc ['There is only one row in table t1']
+[connection master]
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+[connection master]
+SELECT * FROM t1;
+i
+13
+DROP FUNCTION f1;
+DROP FUNCTION f2;
+DROP TABLE t1;
+[connection slave]
+set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
+Warnings:
+Warning	1287	'@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_row_multi_update_of_same_row_sort_before_apply.test b/mysql-test/suite/rpl/t/rpl_row_multi_update_of_same_row_sort_before_apply.test
new file mode 100644
index 00000000000..55162f5cfd1
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_row_multi_update_of_same_row_sort_before_apply.test
@@ -0,0 +1,84 @@
+# Bug#115741 Test whether setting slave_rows_search_algorithms to HASH_SCAN works properly
+# when multiple updates are targeted to the same row within a single update rows log event
+
+--source include/have_binlog_format_row.inc
+--source include/set_privilege_checks_user_as_system_user.inc
+--source include/master-slave.inc
+
+--source include/rpl_connection_slave.inc
+set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
+set @@global.slave_rows_search_algorithms= 'HASH_SCAN';
+
+# Scenarios considered in this test case involve a table with no primary key but an index on the column
+# When slave_rows_search_algorithms is set to HASH_SCAN, a list "m_distinct_keys" is created internally
+# to store the unique key values which, later will be used to perform a table scan to apply the events
+# for the rows present in the table.
+--source include/rpl_connection_master.inc
+# Create a table, with no primary key and an index.
+CREATE TABLE t1 (i INT, INDEX t1_i(i));
+
+# Scenario no.1
+# Sorting the elements of m_distinct_keys doesn't alter the original order of updates
+# Create a stored function so that only one Update_rows_log_event is generated.
+--delimiter |
+CREATE FUNCTION f1 () RETURNS INT BEGIN
+  UPDATE t1 SET i = 11 WHERE i = 10;
+  UPDATE t1 SET i = 12 WHERE i = 11;
+  RETURN 0;
+END|
+--delimiter ;
+--source include/sync_slave_sql_with_master.inc
+
+--source include/rpl_connection_master.inc
+INSERT INTO t1 VALUES (10);
+SELECT f1();
+
+--source include/sync_slave_sql_with_master.inc
+--let $assert_text= 'There is only one row in table t1'
+--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
+--source include/assert.inc
+
+--source include/rpl_connection_master.inc
+
+# Verify that there is no difference between tables of master and slave.
+--let diff_tables=master:test.t1, slave:test.t1
+--source include/diff_tables.inc
+
+
+# Scenario no.2
+# Sorting the elements of m_distinct_keys ALTERs the original order of updates causing
+# the case where the key value from the list doesn't exist in the table yet until other
+# UPDATEs are executed.
+--delimiter |
+CREATE FUNCTION f2 () RETURNS INT BEGIN
+  UPDATE t1 SET i = 11 WHERE i = 12;
+  UPDATE t1 SET i = 13 WHERE i = 11;
+  RETURN 0;
+END|
+--delimiter ;
+--source include/sync_slave_sql_with_master.inc
+
+--source include/rpl_connection_master.inc
+SELECT f2();
+
+--source include/sync_slave_sql_with_master.inc
+--let $assert_text= 'There is only one row in table t1'
+--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
+--source include/assert.inc
+
+--source include/rpl_connection_master.inc
+
+# Verify that there is no difference between tables of master and slave.
+--let diff_tables=master:test.t1, slave:test.t1
+--source include/diff_tables.inc
+
+# Cleanup
+--source include/rpl_connection_master.inc
+SELECT * FROM t1;
+DROP FUNCTION f1;
+DROP FUNCTION f2;
+DROP TABLE t1;
+
+--source include/rpl_connection_slave.inc
+set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
+--source include/rpl_end.inc
diff --git a/sql/log_event.cc b/sql/log_event.cc
index f00bdef8893..f373ae6fdb4 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -7870,7 +7870,7 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg,
       m_curr_row_end(nullptr),
       m_key(nullptr),
       m_key_info(nullptr),
-      m_distinct_keys(Key_compare(&m_key_info)),
+      m_distinct_keys(0, Key_hash(&m_key_info), Key_compare(&m_key_info)),
       m_distinct_key_spare_buf(nullptr) {
   DBUG_TRACE;
   common_header->type_code = event_type;
@@ -7961,7 +7961,7 @@ Rows_log_event::Rows_log_event(
       m_curr_row_end(nullptr),
       m_key(nullptr),
       m_key_info(nullptr),
-      m_distinct_keys(Key_compare(&m_key_info)),
+      m_distinct_keys(0, Key_hash(&m_key_info), Key_compare(&m_key_info)),
       m_distinct_key_spare_buf(nullptr)
 #endif
 {
@@ -9052,9 +9052,9 @@ int Rows_log_event::next_record_scan(bool first_read) {
           marker according to the next key value that we have in the list.
          */
         if (error) {
-          if (m_itr != m_distinct_keys.end()) {
-            m_key = *m_itr;
-            m_itr++;
+          if (m_distinct_key_idx != m_distinct_keys_original_order.size()) {
+            m_key = m_distinct_keys_original_order[m_distinct_key_idx];
+            m_distinct_key_idx++;
             first_read = true;
           } else {
             if (!is_trx_retryable_upon_engine_error(error))
@@ -9068,8 +9068,10 @@ int Rows_log_event::next_record_scan(bool first_read) {
       if ((error = table->file->ha_index_read_map(
                table->record[0], m_key, HA_WHOLE_KEY, HA_READ_KEY_EXACT))) {
         DBUG_PRINT("info", ("no record matching the key found in the table"));
-        if (!is_trx_retryable_upon_engine_error(error))
+        if (!is_trx_retryable_upon_engine_error(error)) {
+          sleep(10);
           error = HA_ERR_KEY_NOT_FOUND;
+        }
       }
   }
 
@@ -9088,14 +9090,11 @@ int Rows_log_event::open_record_scan() {
 
   if (m_key_index < MAX_KEY) {
     if (m_rows_lookup_algorithm == ROW_LOOKUP_HASH_SCAN) {
-      /* initialize the iterator over the list of distinct keys that we have */
-      m_itr = m_distinct_keys.begin();
-
-      /* get the first element from the list of keys and increment the
-         iterator
+      /* get the first element from the vector of keys and increment the
+         index
        */
-      m_key = *m_itr;
-      m_itr++;
+      m_key = m_distinct_keys_original_order[m_distinct_key_idx];
+      m_distinct_key_idx++;
     } else {
       /* this is an INDEX_SCAN we need to store the key in m_key */
       assert((m_rows_lookup_algorithm == ROW_LOOKUP_INDEX_SCAN) && m_key);
@@ -9142,9 +9141,10 @@ int Rows_log_event::add_key_to_distinct_keyset() {
   DBUG_TRACE;
   assert(m_key_index < MAX_KEY);
   key_copy(m_distinct_key_spare_buf, m_table->record[0], m_key_info, 0);
-  std::pair<std::set<uchar *, Key_compare>::iterator, bool> ret =
+  std::pair<std::unordered_set<uchar *, Key_hash, Key_compare>::iterator, bool> ret =
       m_distinct_keys.insert(m_distinct_key_spare_buf);
   if (ret.second) {
+    m_distinct_keys_original_order.push_back(m_distinct_key_spare_buf);
     /* Insert is successful, so allocate a new buffer for next key */
     m_distinct_key_spare_buf = (uchar *)thd->alloc(m_key_info->key_length);
     if (!m_distinct_key_spare_buf) {
diff --git a/sql/log_event.h b/sql/log_event.h
index 3e02a116560..df820270b82 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -39,7 +39,7 @@
 #include <functional>
 #include <list>
 #include <map>
-#include <set>
+#include <unordered_set>
 #include <string>
 #include <string_view>
 
@@ -2898,14 +2898,34 @@ class Rows_log_event : public virtual binary_log::Rows_event, public Log_event {
     Key_compare(KEY **ki = nullptr) : m_key_info(ki) {}
     bool operator()(uchar *k1, uchar *k2) const {
       return key_cmp2((*m_key_info)->key_part, k1, (*m_key_info)->key_length,
-                      k2, (*m_key_info)->key_length) < 0;
+                      k2, (*m_key_info)->key_length) == 0;
     }
 
    private:
     KEY **m_key_info;
   };
-  std::set<uchar *, Key_compare> m_distinct_keys;
-  std::set<uchar *, Key_compare>::iterator m_itr;
+    class Key_hash {
+    public:
+      Key_hash(KEY **ki = nullptr) : m_key_info(ki) {}
+      size_t operator()(uchar* ptr) const {
+        size_t hash = 0;
+        if (ptr) {
+            for (size_t i = 0; i < (*m_key_info)->key_length; i++) {
+                hash = hash * 31 + ptr[i]; // Simple hash combining
+            }
+        }
+        return hash;
+    }
+    private:
+      KEY **m_key_info;
+  };
+  std::unordered_set<uchar *, Key_hash, Key_compare> m_distinct_keys;
+
+  /**
+    A linear list to store the distinct keys preserving the insert order
+  */
+  std::vector<uchar *> m_distinct_keys_original_order;
+  std::size_t m_distinct_key_idx = 0;
   /**
     A spare buffer which will be used when saving the distinct keys
     for doing an index scan with HASH_SCAN search algorithm.
-- 
2.34.1