diff --git a/mysql-test/suite/rpl/r/rpl_hash_scan_repeated_row_iodku.result b/mysql-test/suite/rpl/r/rpl_hash_scan_repeated_row_iodku.result new file mode 100644 index 00000000000..a85daef9e32 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_hash_scan_repeated_row_iodku.result @@ -0,0 +1,121 @@ +include/rpl/init_source_replica.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information. +[connection master] +# +# Bug#106113: nullable unique key, repeated updates of the same row +CREATE TABLE t1 ( +id INT DEFAULT NULL, +test VARCHAR(255) DEFAULT 'hallo', +UNIQUE KEY id (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES (1,'foo'),(2,'test'),(3,'lorem'); +INSERT INTO t1 VALUES (1, 'test'), (1, 'foo'), (1, 'bar') AS new +ON DUPLICATE KEY UPDATE test=new.test; +include/rpl/sync_to_replica.inc +include/assert.inc ['Bug#106113 keeps three rows on the master'] +include/assert.inc ['Bug#106113 id=1 reaches final value bar'] +include/diff_tables.inc [master:test.t1, slave:test.t1] +[connection master] +DROP TABLE t1; +include/rpl/sync_to_replica.inc +# +# Bug#105802: nullable unique key used as hash-scan index +[connection master] +CREATE TABLE t2 ( +a VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, +b VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, +c VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, +d VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, +UNIQUE KEY k1 (a, b, c) +) ENGINE=InnoDB; +INSERT INTO t2 (`d`,`a`,`c`,`b`) VALUES +('2021-12-04 16:42:53','A','B','A'), +('2021-12-04 16:42:54','A','B','A'), +('2021-12-04 16:42:53','A','B','A'), +('2021-12-04 16:42:55','A','B','A') AS new +ON DUPLICATE KEY UPDATE +`d`=new.`d`, +`a`=new.`a`, +`c`=new.`c`, +`b`=new.`b`; +include/rpl/sync_to_replica.inc +include/assert.inc ['Bug#105802 leaves one row after duplicate-key updates'] +include/assert.inc ['Bug#105802 final value of d is 2021-12-04 16:42:55'] +include/diff_tables.inc [master:test.t2, slave:test.t2] +[connection master] +DROP TABLE t2; +include/rpl/sync_to_replica.inc +# +# Three entries in the same hash bucket: IODKU cycles through BI='foo' +# three times, placing E1(bi_start=100), E3(bi_start=300), and +# E5(bi_start=500) in the same bucket. The inner loop must find the +# true global minimum (E1) regardless of bucket iteration order. +[connection master] +CREATE TABLE t3 ( +id INT DEFAULT NULL, +test VARCHAR(255) DEFAULT 'hallo', +UNIQUE KEY id (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t3 VALUES (1,'foo'),(2,'test'),(3,'lorem'); +INSERT INTO t3 VALUES (1,'test'), (1,'foo'), (1,'bar'), (1,'foo'), (1,'baz') AS new +ON DUPLICATE KEY UPDATE test=new.test; +include/rpl/sync_to_replica.inc +include/assert.inc ['Three-entry bucket: three rows survive on master'] +include/assert.inc ['Three-entry bucket: id=1 reaches final value baz'] +include/diff_tables.inc [master:test.t3, slave:test.t3] +[connection master] +DROP TABLE t3; +include/rpl/sync_to_replica.inc +# +# Long ping-pong chain: 30-entry IODKU alternates BI between 'foo' and +# 'bar', producing 15 entries in each bucket. The inner loop must find +# the correct minimum-bi_start entry across all 15 candidates. +[connection master] +CREATE TABLE t4 ( +id INT DEFAULT NULL, +test VARCHAR(255) DEFAULT 'hallo', +UNIQUE KEY id (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t4 VALUES (1,'foo'),(2,'x'),(3,'y'); +INSERT INTO t4 VALUES +(1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'), +(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'), +(1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'), +(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'), +(1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'), +(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'),(1,'done') AS new +ON DUPLICATE KEY UPDATE test=new.test; +include/rpl/sync_to_replica.inc +include/assert.inc ['Long chain: three rows survive on master'] +include/assert.inc ['Long chain: id=1 reaches final value done'] +include/diff_tables.inc [master:test.t4, slave:test.t4] +[connection master] +DROP TABLE t4; +include/rpl/sync_to_replica.inc +# +# Bug#106918: plain UPDATE on physically identical rows — HASH_SCAN must +# apply updates in event order so physical row ordering is preserved. +# Two rows share the same before-image (c1=1, c2=1) and therefore land +# in the same hash bucket. The fix picks the entry with the smallest +# bi_start (c2=10) for the first physical row and leaves c2=20 for the +# second. An AUTO_INCREMENT primary key added afterwards assigns ids in +# physical scan order, surfacing any ordering divergence as a data +# mismatch caught by diff_tables. +[connection master] +CREATE TABLE t5 (c1 INT, c2 INT) ENGINE=InnoDB; +INSERT INTO t5 VALUES (1, 1), (1, 1), (2, 2); +SET @n = 0; +UPDATE t5 SET c2 = (@n := @n + 10) WHERE c1 = 1; +include/rpl/sync_to_replica.inc +[connection master] +ALTER TABLE t5 ADD COLUMN id INT NOT NULL AUTO_INCREMENT, ADD PRIMARY KEY (id); +include/rpl/sync_to_replica.inc +include/assert.inc ['Bug#106918 first physical row (id=1) has c2=10'] +include/assert.inc ['Bug#106918 second physical row (id=2) has c2=20'] +include/diff_tables.inc [master:test.t5, slave:test.t5] +[connection master] +DROP TABLE t5; +include/rpl/sync_to_replica.inc +include/rpl/deinit.inc diff --git a/mysql-test/suite/rpl/t/rpl_hash_scan_repeated_row_iodku.test b/mysql-test/suite/rpl/t/rpl_hash_scan_repeated_row_iodku.test new file mode 100644 index 00000000000..6e3a30c6aee --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_hash_scan_repeated_row_iodku.test @@ -0,0 +1,211 @@ +# === Purpose === +# +# Verify that HASH_SCAN applies repeated updates to the same logical row +# correctly when a single INSERT ... ON DUPLICATE KEY UPDATE statement +# produces multiple row images in one Update_rows_log_event. +# +# === References === +# +# Bug#106113: insert on duplicate key cause 1032 replication error +# Bug#105802: Slave stops with HA_ERR_KEY_NOT_FOUND (INDEX_SCAN,HASH_SCAN) +# + +--source include/have_binlog_format_row.inc +--source include/rpl/set_privilege_checks_user_as_system_user.inc +--source include/rpl/init_source_replica.inc + +# HASH_SCAN is selected automatically for tables with no usable non-nullable +# unique key. All test tables use a nullable UNIQUE KEY, so no explicit +# replica_rows_search_algorithms setting is needed (slave_rows_search_algorithms +# was removed in 8.4). + +--echo # +--echo # Bug#106113: nullable unique key, repeated updates of the same row +CREATE TABLE t1 ( + id INT DEFAULT NULL, + test VARCHAR(255) DEFAULT 'hallo', + UNIQUE KEY id (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +INSERT INTO t1 VALUES (1,'foo'),(2,'test'),(3,'lorem'); +INSERT INTO t1 VALUES (1, 'test'), (1, 'foo'), (1, 'bar') AS new + ON DUPLICATE KEY UPDATE test=new.test; + +--source include/rpl/sync_to_replica.inc + +--let $assert_text= 'Bug#106113 keeps three rows on the master' +--let $assert_cond= [SELECT COUNT(*) FROM t1] = 3 +--source include/assert.inc + +--let $assert_text= 'Bug#106113 id=1 reaches final value bar' +--let $assert_cond= [SELECT COUNT(*) FROM t1 WHERE id=1 AND test="bar"] = 1 +--source include/assert.inc + +--let diff_tables=master:test.t1, slave:test.t1 +--source include/diff_tables.inc + +--source include/rpl/connection_source.inc +DROP TABLE t1; +--source include/rpl/sync_to_replica.inc + +--echo # +--echo # Bug#105802: nullable unique key used as hash-scan index +--source include/rpl/connection_source.inc +CREATE TABLE t2 ( + a VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, + b VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, + c VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, + d VARCHAR(60) COLLATE utf8mb4_general_ci DEFAULT NULL, + UNIQUE KEY k1 (a, b, c) +) ENGINE=InnoDB; + +INSERT INTO t2 (`d`,`a`,`c`,`b`) VALUES + ('2021-12-04 16:42:53','A','B','A'), + ('2021-12-04 16:42:54','A','B','A'), + ('2021-12-04 16:42:53','A','B','A'), + ('2021-12-04 16:42:55','A','B','A') AS new + ON DUPLICATE KEY UPDATE + `d`=new.`d`, + `a`=new.`a`, + `c`=new.`c`, + `b`=new.`b`; + +--source include/rpl/sync_to_replica.inc + +--let $assert_text= 'Bug#105802 leaves one row after duplicate-key updates' +--let $assert_cond= [SELECT COUNT(*) FROM t2] = 1 +--source include/assert.inc + +--let $assert_text= 'Bug#105802 final value of d is 2021-12-04 16:42:55' +--let $assert_cond= [SELECT COUNT(*) FROM t2 WHERE d="2021-12-04 16:42:55"] = 1 +--source include/assert.inc + +--let diff_tables=master:test.t2, slave:test.t2 +--source include/diff_tables.inc + +--source include/rpl/connection_source.inc +DROP TABLE t2; +--source include/rpl/sync_to_replica.inc + +--echo # +--echo # Three entries in the same hash bucket: IODKU cycles through BI='foo' +--echo # three times, placing E1(bi_start=100), E3(bi_start=300), and +--echo # E5(bi_start=500) in the same bucket. The inner loop must find the +--echo # true global minimum (E1) regardless of bucket iteration order. +--source include/rpl/connection_source.inc +CREATE TABLE t3 ( + id INT DEFAULT NULL, + test VARCHAR(255) DEFAULT 'hallo', + UNIQUE KEY id (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +INSERT INTO t3 VALUES (1,'foo'),(2,'test'),(3,'lorem'); +# Generates five update pairs for id=1: +# E1 BI='foo' AI='test', E2 BI='test' AI='foo', +# E3 BI='foo' AI='bar', E4 BI='bar' AI='foo', +# E5 BI='foo' AI='baz' +# Bucket hash('foo') holds E1, E3, E5 — three entries. +INSERT INTO t3 VALUES (1,'test'), (1,'foo'), (1,'bar'), (1,'foo'), (1,'baz') AS new + ON DUPLICATE KEY UPDATE test=new.test; + +--source include/rpl/sync_to_replica.inc + +--let $assert_text= 'Three-entry bucket: three rows survive on master' +--let $assert_cond= [SELECT COUNT(*) FROM t3] = 3 +--source include/assert.inc + +--let $assert_text= 'Three-entry bucket: id=1 reaches final value baz' +--let $assert_cond= [SELECT COUNT(*) FROM t3 WHERE id=1 AND test="baz"] = 1 +--source include/assert.inc + +--let diff_tables=master:test.t3, slave:test.t3 +--source include/diff_tables.inc + +--source include/rpl/connection_source.inc +DROP TABLE t3; +--source include/rpl/sync_to_replica.inc + +--echo # +--echo # Long ping-pong chain: 30-entry IODKU alternates BI between 'foo' and +--echo # 'bar', producing 15 entries in each bucket. The inner loop must find +--echo # the correct minimum-bi_start entry across all 15 candidates. +--source include/rpl/connection_source.inc +CREATE TABLE t4 ( + id INT DEFAULT NULL, + test VARCHAR(255) DEFAULT 'hallo', + UNIQUE KEY id (id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +INSERT INTO t4 VALUES (1,'foo'),(2,'x'),(3,'y'); +# 30 values: alternates bar/foo 14 times (28 entries), then bar, then done. +# Bucket 'foo': E1,E3,...,E29 (15 entries); bucket 'bar': E2,E4,...,E28,E30 (15 entries). +# Final state of id=1: test='done'. +INSERT INTO t4 VALUES + (1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'), + (1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'), + (1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'), + (1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'), + (1,'bar'),(1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'), + (1,'foo'),(1,'bar'),(1,'foo'),(1,'bar'),(1,'done') AS new + ON DUPLICATE KEY UPDATE test=new.test; + +--source include/rpl/sync_to_replica.inc + +--let $assert_text= 'Long chain: three rows survive on master' +--let $assert_cond= [SELECT COUNT(*) FROM t4] = 3 +--source include/assert.inc + +--let $assert_text= 'Long chain: id=1 reaches final value done' +--let $assert_cond= [SELECT COUNT(*) FROM t4 WHERE id=1 AND test="done"] = 1 +--source include/assert.inc + +--let diff_tables=master:test.t4, slave:test.t4 +--source include/diff_tables.inc + +--source include/rpl/connection_source.inc +DROP TABLE t4; +--source include/rpl/sync_to_replica.inc + +--echo # +--echo # Bug#106918: plain UPDATE on physically identical rows — HASH_SCAN must +--echo # apply updates in event order so physical row ordering is preserved. +--echo # Two rows share the same before-image (c1=1, c2=1) and therefore land +--echo # in the same hash bucket. The fix picks the entry with the smallest +--echo # bi_start (c2=10) for the first physical row and leaves c2=20 for the +--echo # second. An AUTO_INCREMENT primary key added afterwards assigns ids in +--echo # physical scan order, surfacing any ordering divergence as a data +--echo # mismatch caught by diff_tables. +--source include/rpl/connection_source.inc +CREATE TABLE t5 (c1 INT, c2 INT) ENGINE=InnoDB; +INSERT INTO t5 VALUES (1, 1), (1, 1), (2, 2); + +# In ROW format the binlog contains the resolved row images: +# E1 BI=(1,1) AI=(1,10), E2 BI=(1,1) AI=(1,20). +SET @n = 0; +--disable_warnings +UPDATE t5 SET c2 = (@n := @n + 10) WHERE c1 = 1; +--enable_warnings + +--source include/rpl/sync_to_replica.inc + +--source include/rpl/connection_source.inc +ALTER TABLE t5 ADD COLUMN id INT NOT NULL AUTO_INCREMENT, ADD PRIMARY KEY (id); + +--source include/rpl/sync_to_replica.inc + +--let $assert_text= 'Bug#106918 first physical row (id=1) has c2=10' +--let $assert_cond= [SELECT c2 FROM t5 WHERE id = 1] = 10 +--source include/assert.inc + +--let $assert_text= 'Bug#106918 second physical row (id=2) has c2=20' +--let $assert_cond= [SELECT c2 FROM t5 WHERE id = 2] = 20 +--source include/assert.inc + +--let diff_tables=master:test.t5, slave:test.t5 +--source include/diff_tables.inc + +--source include/rpl/connection_source.inc +DROP TABLE t5; +--source include/rpl/sync_to_replica.inc + +--source include/rpl/deinit.inc diff --git a/sql/log_event.cc b/sql/log_event.cc index fac37e63379..a195a72a39b 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -9353,12 +9353,15 @@ int Rows_log_event::do_scan_and_update(Relay_log_info const *rli) { do { store_record(table, record[1]); - /** - If there are collisions we need to be sure that this is - indeed the record we want. Loop through all records for - the given key and explicitly compare them against the - record we got from the storage engine. - */ + /* + Scan every entry in this hash bucket, tracking the one whose + before-image matches the current row AND whose bi_start is the + smallest (i.e. appeared earliest in the binlog). When a single + IODKU updates the same row multiple times, several entries can + share the same before-image value; applying the earliest one + first keeps chained updates in event order. + */ + HASH_ROW_ENTRY *best = nullptr; while (entry) { m_curr_row = entry->positions->bi_start; m_curr_row_end = entry->positions->bi_ends; @@ -9367,11 +9370,14 @@ int Rows_log_event::do_scan_and_update(Relay_log_info const *rli) { if ((error = unpack_current_row(rli, &m_cols, false /*is not AI*/))) goto close_table; - if (record_compare(table, &this->m_local_cols)) - m_hash.next(&entry); - else - break; // we found a match + if (!record_compare(table, &this->m_local_cols) && + (best == nullptr || + entry->positions->bi_start < best->positions->bi_start)) { + best = entry; + } + m_hash.next(&entry); } + entry = best; /** We found the entry we needed, just apply the changes.