commit ed5f36312549cbb0035825be006c80a7124ca982 Author: Dmitry Lenev Date: Thu Mar 7 14:48:59 2024 +0100 Bug #114248: Incomplete query results when backward scan and concurrent page merge. Possible minimal fix. Problem: Query over InnoDB table that uses backward scan over the index occasionally might return incorrect/incomplete results when changes to table (for example, DELETEs in other or even the same connection followed by asynchronous purge) cause concurrent B-tree page merges. Cause: The problem occurs when persistent cursor which is used to scan over index in backwards direction stops on infimum record of the page to which it points currently and releases all latches it has, before moving to the previous page. At this point merge from the previous page to cursor's current one can happen (because cursor doesn't hold latch on current or previous page). During this merge records from the previous page are moved over infimum record and placed before any old user records in the current page. When later our persistent cursor resumes its iteration it might use optimistic approach to cursor restoration which won't detect this kind of page update and resumes the iteration right from infimum record, effectively skipping the moved records. Solution: This patch solves the problem by forcing persisted cursor to use pessimistic approach to cursor restoration in such cases. With this approach cursor restoration is performed by looking up and continuing from user record which preceded infimum record when cursor stopped iteration and released the latches. Indeed, in this case records which were moved during the merge will be visited by cursor as they precede this old-post-infimum record in the page. This forcing of pessimistic restore is achieved by increasing page's modify_clock version counter for the page merged into, when merge happens from the previous page (normally this version counter is only incremented when we delete records from the page or the whole page). Theoretically, this might be also done when we are merging into page the page which follows it. But it is not clear if it is really required, as forward scan over the index is not affected by this problem. In forward scan case different approach to latching is used when we switch between B-tree leaf pages - we always acquire latch on the next page before releasing latch on the current one. As result concurrent merges from the next page to the current one are blocked. Note that the same approach to latching can't be used for backward iteration as it will mean that latching happens into opposite order which will lead to deadlocks. Note: It is quite possible that there are move scenarios which should be covered by this patch and there is a better way to solve this issue. But we feel that required investigation and bigger changes are more appropriate for Oracle MySQL Team. diff --git a/mysql-test/suite/innodb/r/innodb_backward_scan_conc_merge.result b/mysql-test/suite/innodb/r/innodb_backward_scan_conc_merge.result new file mode 100644 index 00000000000..9cb2a52226b --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_backward_scan_conc_merge.result @@ -0,0 +1,89 @@ +# +# Bug#114248 : Incomplete query results when backward scan and concurrent page merge +# +CREATE TABLE testTable (`key` INT UNSIGNED, version BIGINT UNSIGNED, rowmarker CHAR(3) NOT NULL DEFAULT 'aaa', value MEDIUMBLOB, PRIMARY KEY (`key`, version)) ENGINE=InnoDB; +CREATE TABLE stopper (i INT); +INSERT INTO stopper VALUES (1); +CREATE PROCEDURE lft() BEGIN +DECLARE rnd VARBINARY(1024); +SET rnd = RANDOM_BYTES(1024); +INSERT INTO testTable (`key`, version, value) VALUES (0, 18446744073709551615, LEFT(rnd, 1)); +WHILE (SELECT COUNT(*) FROM stopper) DO +UPDATE testTable SET value = LEFT(rnd, RAND()*1023+1) WHERE `key` = 0; +END WHILE; +END | +CREATE PROCEDURE rght() BEGIN +DECLARE rnd VARBINARY(1024); +SET rnd = RANDOM_BYTES(1024); +INSERT INTO testTable (`key`, version, value) VALUES (2, 18446744073709551615, LEFT(rnd, 1)); +WHILE (SELECT COUNT(*) FROM stopper) DO +UPDATE testTable SET value = LEFT(rnd, RAND()*1023+1) WHERE `key` = 2; +END WHILE; +END | +CREATE PROCEDURE mdl() BEGIN +DECLARE rnd VARBINARY(1024); +DECLARE v BIGINT UNSIGNED DEFAULT 0; +SET rnd = RANDOM_BYTES(1024); +WHILE (SELECT COUNT(*) FROM stopper) DO +SET v = v + 1; +INSERT INTO testTable (`key`, version, value) VALUES (1, 18446744073709551615 - v, LEFT(rnd, RAND()*1023+1)); +IF RAND() <= 0.05 THEN +DELETE from testTable WHERE `key`=1 AND version > 18446744073709551615 - v; +END IF; +END WHILE; +END| +CREATE PROCEDURE ck() BEGIN +DECLARE global_max_seen BIGINT UNSIGNED DEFAULT 0; +foo: WHILE global_max_seen < 5000 DO +BEGIN +DECLARE done INT DEFAULT 0; +DECLARE local_max_seen BIGINT UNSIGNED DEFAULT 0; +DECLARE k INT; +DECLARE u BIGINT UNSIGNED; +DECLARE v BIGINT UNSIGNED; +DECLARE c1 CURSOR FOR select `key`, version FROM testTable WHERE `key` >= 1 AND `key` <= 1 ORDER BY `key` DESC; +DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET done = 1; +OPEN c1; +REPEAT +FETCH c1 INTO k, v; +SET u = 18446744073709551615 - v; +IF u > local_max_seen THEN +SET local_max_seen = u; +END IF; +UNTIL done END REPEAT; +CLOSE c1; +IF local_max_seen < global_max_seen THEN +SELECT "ERROR! local_max_seen < global_max_seen!" AS msg, local_max_seen, global_max_seen; +LEAVE foo; +END IF; +SET global_max_seen = local_max_seen; +END; +END WHILE; +END| +connect con1,localhost,root,,; +CALL lft(); +connect con2,localhost,root,,; +CALL mdl(); +connect con3,localhost,root,,; +CALL rght(); +connect con4,localhost,root,,; +# +# Check procedure is not supposed to return ERROR. +CALL ck(); +# +# Stop activity in other connections +DELETE FROM stopper; +disconnect con4; +connection con1; +disconnect con1; +connection con2; +disconnect con2; +connection con3; +disconnect con3; +connection default; +DROP TABLE stopper; +DROP TABLE testTable; +DROP PROCEDURE lft; +DROP PROCEDURE mdl; +DROP PROCEDURE rght; +DROP PROCEDURE ck; diff --git a/mysql-test/suite/innodb/t/innodb_backward_scan_conc_merge.test b/mysql-test/suite/innodb/t/innodb_backward_scan_conc_merge.test new file mode 100644 index 00000000000..fb2c0e1516f --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_backward_scan_conc_merge.test @@ -0,0 +1,124 @@ +--echo # +--echo # Bug#114248 : Incomplete query results when backward scan and concurrent page merge +--echo # +CREATE TABLE testTable (`key` INT UNSIGNED, version BIGINT UNSIGNED, rowmarker CHAR(3) NOT NULL DEFAULT 'aaa', value MEDIUMBLOB, PRIMARY KEY (`key`, version)) ENGINE=InnoDB; +CREATE TABLE stopper (i INT); +INSERT INTO stopper VALUES (1); + +DELIMITER |; + +CREATE PROCEDURE lft() BEGIN + DECLARE rnd VARBINARY(1024); + SET rnd = RANDOM_BYTES(1024); + INSERT INTO testTable (`key`, version, value) VALUES (0, 18446744073709551615, LEFT(rnd, 1)); + WHILE (SELECT COUNT(*) FROM stopper) DO + UPDATE testTable SET value = LEFT(rnd, RAND()*1023+1) WHERE `key` = 0; + END WHILE; +END | + +CREATE PROCEDURE rght() BEGIN + DECLARE rnd VARBINARY(1024); + SET rnd = RANDOM_BYTES(1024); + INSERT INTO testTable (`key`, version, value) VALUES (2, 18446744073709551615, LEFT(rnd, 1)); + WHILE (SELECT COUNT(*) FROM stopper) DO + UPDATE testTable SET value = LEFT(rnd, RAND()*1023+1) WHERE `key` = 2; + END WHILE; +END | + +CREATE PROCEDURE mdl() BEGIN + DECLARE rnd VARBINARY(1024); + DECLARE v BIGINT UNSIGNED DEFAULT 0; + SET rnd = RANDOM_BYTES(1024); + + WHILE (SELECT COUNT(*) FROM stopper) DO + SET v = v + 1; + INSERT INTO testTable (`key`, version, value) VALUES (1, 18446744073709551615 - v, LEFT(rnd, RAND()*1023+1)); + + IF RAND() <= 0.05 THEN + DELETE from testTable WHERE `key`=1 AND version > 18446744073709551615 - v; + END IF; + END WHILE; +END| + +CREATE PROCEDURE ck() BEGIN + DECLARE global_max_seen BIGINT UNSIGNED DEFAULT 0; + + foo: WHILE global_max_seen < 5000 DO + BEGIN + DECLARE done INT DEFAULT 0; + DECLARE local_max_seen BIGINT UNSIGNED DEFAULT 0; + DECLARE k INT; + DECLARE u BIGINT UNSIGNED; + DECLARE v BIGINT UNSIGNED; + DECLARE c1 CURSOR FOR select `key`, version FROM testTable WHERE `key` >= 1 AND `key` <= 1 ORDER BY `key` DESC; + DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET done = 1; + + OPEN c1; + + REPEAT + FETCH c1 INTO k, v; + SET u = 18446744073709551615 - v; + IF u > local_max_seen THEN + SET local_max_seen = u; + END IF; + UNTIL done END REPEAT; + CLOSE c1; + + IF local_max_seen < global_max_seen THEN + SELECT "ERROR! local_max_seen < global_max_seen!" AS msg, local_max_seen, global_max_seen; + LEAVE foo; + END IF; + SET global_max_seen = local_max_seen; + END; + END WHILE; +END| +DELIMITER ;| + +--enable_connect_log + +--connect (con1,localhost,root,,) +--send CALL lft() + +--connect (con2,localhost,root,,) +--send CALL mdl() + +--connect (con3,localhost,root,,) +--send CALL rght() + +--connect (con4,localhost,root,,) +--echo # +--echo # Check procedure is not supposed to return ERROR. +CALL ck(); + +--echo # +--echo # Stop activity in other connections +DELETE FROM stopper; + +--disconnect con4 +--source include/wait_until_disconnected.inc + +--connection con1 +--reap +--disconnect con1 +--source include/wait_until_disconnected.inc + +--connection con2 +--reap +--disconnect con2 +--source include/wait_until_disconnected.inc + +--connection con3 +--reap +--disconnect con3 +--source include/wait_until_disconnected.inc + +--connection default + +--disable_connect_log + +DROP TABLE stopper; +DROP TABLE testTable; +DROP PROCEDURE lft; +DROP PROCEDURE mdl; +DROP PROCEDURE rght; +DROP PROCEDURE ck; diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index e0d451d7af7..511c6a3b963 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -3254,6 +3254,25 @@ retry: goto err_exit; } + /* When persistent cursor is used to scan over index in backwards + direction it stops on infimum record of its current page and releases + all latches it has, before switching from the cursor's current page to + the previous one. At this point merge from the previous page to cursor's + current one might happen. During this merge records from the previous + page will be moved over cursor position/infimum record which is used + used to continue iteration in optimistic case, making moved records + invisible to the scan. + We force such cursor to use pessimistic approach of restoring its + position/continuing iteration, which is not affected by this problem + (as it relies on looking up user record which was visited by cursor + right before the infimum) by incrementing modification clock for page + being merged into. + The forward iteration seems to be unaffected by this problem as it + doesn't release latch on the current page before it acquires latch on + the next one when cursor switches pages. So merge from the next page + to the current one stays blocked. */ + buf_block_modify_clock_inc(merge_block); + btr_search_drop_page_hash_index(block); #ifdef UNIV_BTR_DEBUG