Description:
Subquery optimizer chooses materialization resulting in wrong result.
How to repeat:
Test case:
CREATE TABLE `t1` (
`cl` text COLLATE utf8_bin,
`fa` varchar(250) COLLATE utf8_bin DEFAULT NULL,
`ti` text COLLATE utf8_bin,
`id` varchar(250) COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `key1` (`fa`),
KEY `IDX_1` (`ti`(250))
) ENGINE=InnoDB;
INSERT INTO t1 VALUES('1','aaa111111111aaaaaaaaaaaaaaaa_ddff','AAA-20220620','001');
CREATE TABLE `t2` (
`ID` varchar(250) COLLATE utf8_bin NOT NULL,
`ide` varchar(128) COLLATE utf8_bin NOT NULL,
`ctime` bigint(20) DEFAULT '0',
PRIMARY KEY (`ide`),
KEY `Index1` (`ctime`)
) ENGINE=InnoDB;
INSERT INTO t2 VALUES('222222222222222bbbbbbbbbbbbbbb','111111111aaaaaaaaaaaaaaaa_ddff','1');
CREATE TABLE `t3` (
`ID` varchar(250) COLLATE utf8_bin NOT NULL,
`ide` varchar(128) COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`ide`)
) ENGINE=InnoDB;
INSERT INTO t3 VALUES('222222222222222bbbbbbbbbbbbbbb','111111111aaaaaaaaaaaaaaaa_ddff');
CREATE TABLE `t4` (
`ts` text COLLATE utf8_bin,
`orderid` varchar(100) COLLATE utf8_bin DEFAULT NULL,
UNIQUE KEY `key_1` (`orderid`)) ENGINE=InnoDB;
INSERT INTO t4 VALUES('completed','AAA-20220620');
SET GLOBAL log_bin_trust_function_creators=1;
DELIMITER //
CREATE FUNCTION getByteLength(inputText text)
RETURNS INT
BEGIN
RETURN(length(inputText));
END//
DELIMITER ;
CASE1 RESULT:
SELECT count(1) AS count
FROM (
SELECT substring(fa, getByteLength('11') + 2) AS ide,
ti
FROM t1
WHERE cl = 1
) AS TEMP
WHERE TEMP.ide IN (
SELECT /*+ SEMIJOIN(materialization) */ ap.Ide
FROM t2 AS ap
JOIN t3 AS ex ON ap.Ide = ex.Ide
WHERE ap.ctime > 0
)
AND TEMP.ti IN (
SELECT orderid
FROM t4
WHERE ts IN (
'running',
'completed',
'cancel'
)
);
+-------+
| count |
+-------+
| 0 |
+-------+
1 row in set (0.00 sec)
CASE2 RESULT:
SELECT count(1) AS count
FROM (
SELECT substring(fa, getByteLength('11') + 2) AS ide,
ti
FROM t1
WHERE cl = 1
) AS TEMP
WHERE TEMP.ide IN (
SELECT ap.Ide
FROM t2 AS ap
JOIN t3 AS ex ON ap.Ide = ex.Ide
WHERE ap.ctime > 0
)
AND TEMP.ti IN (
SELECT orderid
FROM t4
WHERE ts IN (
'running',
'completed',
'cancel'
)
);
+-------+
| count |
+-------+
| 1 |
+-------+
1 row in set (0.00 sec)