Description:
The following query returns different results depending on the setting optimizer_switch='derived_merge=[on|off]' or the use of derived table in subquery as part of semijoin:
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN (SELECT * FROM t2) AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;
+----+---------+-------------+-------------+
| pk | col_int | col_int_key | col_varchar |
+----+---------+-------------+-------------+
| 1 | 1 | 8 | p |
| 2 | 8 | NULL | k |
| 3 | 4 | NULL | r |
| 4 | 2 | 2 | l |
| 6 | 3 | 0 | h |
| 7 | 0 | 1 | r |
| 9 | 0 | 6 | t |
| 10 | NULL | 9 | i |
| 12 | 1 | 1 | b |
| 13 | 0 | 9 | i |
| 14 | 0 | 0 | x |
| 15 | NULL | 8 | n |
| 17 | 0 | 7 | x |
| 19 | 4 | 41 | u |
| 20 | 8 | 9 | l |
+----+---------+-------------+-------------+
15 rows in set (0.01 sec)
The above result is from 5.7.8 / 5.8.0 with default settings.
mysql> SET optimizer_switch='derived_merge=off';
Query OK, 0 rows affected (0.00 sec)
<same query>
+----+---------+-------------+-------------+
| pk | col_int | col_int_key | col_varchar |
+----+---------+-------------+-------------+
| 1 | 1 | 8 | p |
| 2 | 8 | NULL | k |
| 3 | 4 | NULL | r |
| 9 | 0 | 6 | t |
| 10 | NULL | 9 | i |
| 12 | 1 | 1 | b |
| 13 | 0 | 9 | i |
| 15 | NULL | 8 | n |
| 17 | 0 | 7 | x |
| 19 | 4 | 41 | u |
| 20 | 8 | 9 | l |
+----+---------+-------------+-------------+
11 rows in set (0.01 sec)
The latter result is also given with 5.7.7 or earlier, and when replacing
LEFT JOIN (SELECT * FROM t2) AS INNR
with
LEFT JOIN t2 AS INNR
Suspecting regression in 5.7.8.
Git bisect says:
855ef39c280bb20b8944111378b99c7ac463033e is the first bad commit
commit 855ef39c280bb20b8944111378b99c7ac463033e
Author: Roy Lyseng <roy.lyseng@oracle.com>
Date: Mon Jun 29 14:40:51 2015 +0200
Bug#14358878: Wrong results on table left join view
Bug#15936817: Table left join view, unmatched rows problem where view has an if
Bug#15967464: View evaluation incorrect when joining to view with literal
Bug#20708288: Literal selected from derived table mentioned in outer join ..null
Bug#20841369: Left join to view with <> test causing too many results
How to repeat:
CREATE TABLE t1 (
pk INT NOT NULL,
col_date DATE,
PRIMARY KEY (pk)
);
INSERT INTO t1 VALUES (1,'2008-06-18');
INSERT INTO t1 VALUES (2,'2000-06-28');
INSERT INTO t1 VALUES (3,'2003-05-01');
INSERT INTO t1 VALUES (4,NULL);
INSERT INTO t1 VALUES (5,'2002-11-02');
INSERT INTO t1 VALUES (6,NULL);
INSERT INTO t1 VALUES (7,'2008-11-07');
INSERT INTO t1 VALUES (8,NULL);
INSERT INTO t1 VALUES (9,'2002-02-02');
INSERT INTO t1 VALUES (10,'2002-02-02');
INSERT INTO t1 VALUES (11,'2003-03-09');
INSERT INTO t1 VALUES (12,'2003-04-10');
INSERT INTO t1 VALUES (13,'2002-02-02');
INSERT INTO t1 VALUES (14,'2009-04-17');
INSERT INTO t1 VALUES (15,'2002-02-02');
INSERT INTO t1 VALUES (16,NULL);
INSERT INTO t1 VALUES (17,'2002-02-02');
INSERT INTO t1 VALUES (18,'2001-03-01');
INSERT INTO t1 VALUES (19,'2006-01-19');
INSERT INTO t1 VALUES (20,NULL);
CREATE TABLE t2 (
col_date DATE,
pk INT NOT NULL,
PRIMARY KEY (pk)
);
INSERT INTO t2 VALUES ('2002-02-02',1);
CREATE TABLE t3 (
pk INT NOT NULL,
col_varchar VARCHAR(1),
col_int_key INT,
col_int INT,
PRIMARY KEY (pk),
KEY col_int_key (col_int_key)
);
INSERT INTO t3 VALUES (1,'p',8,1);
INSERT INTO t3 VALUES (2,'k',NULL,8);
INSERT INTO t3 VALUES (3,'r',NULL,4);
INSERT INTO t3 VALUES (4,'l',2,2);
INSERT INTO t3 VALUES (5,'b',NULL,NULL);
INSERT INTO t3 VALUES (6,'h',0,3);
INSERT INTO t3 VALUES (7,'r',1,0);
INSERT INTO t3 VALUES (8,NULL,8,7);
INSERT INTO t3 VALUES (9,'t',6,0);
INSERT INTO t3 VALUES (10,'i',9,NULL);
INSERT INTO t3 VALUES (11,'d',8,1);
INSERT INTO t3 VALUES (12,'b',1,1);
INSERT INTO t3 VALUES (13,'i',9,0);
INSERT INTO t3 VALUES (14,'x',0,0);
INSERT INTO t3 VALUES (15,'n',8,NULL);
INSERT INTO t3 VALUES (16,NULL,1,NULL);
INSERT INTO t3 VALUES (17,'x',7,0);
INSERT INTO t3 VALUES (18,'e',5,75);
INSERT INTO t3 VALUES (19,'u',41,4);
INSERT INTO t3 VALUES (20,'l',9,8);
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN (SELECT * FROM t2) AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;
-- Replacing inner derived table with regular table: Results OK
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN t2 AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;
-- Setting derived_merge=off: Results OK
SET optimizer_switch='derived_merge=off';
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN (SELECT * FROM t2) AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;