Description:
The following query returns different results depending on the setting optimizer_switch='derived_merge=[on|off]' or the use of derived table in subquery as part of semijoin:
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN (SELECT * FROM t2) AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;
+----+---------+-------------+-------------+
| pk | col_int | col_int_key | col_varchar |
+----+---------+-------------+-------------+
| 1 | 1 | 8 | p |
| 2 | 8 | NULL | k |
| 3 | 4 | NULL | r |
| 4 | 2 | 2 | l |
| 6 | 3 | 0 | h |
| 7 | 0 | 1 | r |
| 9 | 0 | 6 | t |
| 10 | NULL | 9 | i |
| 12 | 1 | 1 | b |
| 13 | 0 | 9 | i |
| 14 | 0 | 0 | x |
| 15 | NULL | 8 | n |
| 17 | 0 | 7 | x |
| 19 | 4 | 41 | u |
| 20 | 8 | 9 | l |
+----+---------+-------------+-------------+
15 rows in set (0.01 sec)
The above result is from 5.7.8 / 5.8.0 with default settings.
mysql> SET optimizer_switch='derived_merge=off';
Query OK, 0 rows affected (0.00 sec)
<same query>
+----+---------+-------------+-------------+
| pk | col_int | col_int_key | col_varchar |
+----+---------+-------------+-------------+
| 1 | 1 | 8 | p |
| 2 | 8 | NULL | k |
| 3 | 4 | NULL | r |
| 9 | 0 | 6 | t |
| 10 | NULL | 9 | i |
| 12 | 1 | 1 | b |
| 13 | 0 | 9 | i |
| 15 | NULL | 8 | n |
| 17 | 0 | 7 | x |
| 19 | 4 | 41 | u |
| 20 | 8 | 9 | l |
+----+---------+-------------+-------------+
11 rows in set (0.01 sec)
The latter result is also given with 5.7.7 or earlier, and when replacing
LEFT JOIN (SELECT * FROM t2) AS INNR
with
LEFT JOIN t2 AS INNR
Suspecting regression in 5.7.8.
Git bisect says:
855ef39c280bb20b8944111378b99c7ac463033e is the first bad commit
commit 855ef39c280bb20b8944111378b99c7ac463033e
Author: Roy Lyseng <roy.lyseng@oracle.com>
Date: Mon Jun 29 14:40:51 2015 +0200
Bug#14358878: Wrong results on table left join view
Bug#15936817: Table left join view, unmatched rows problem where view has an if
Bug#15967464: View evaluation incorrect when joining to view with literal
Bug#20708288: Literal selected from derived table mentioned in outer join ..null
Bug#20841369: Left join to view with <> test causing too many results
How to repeat:
CREATE TABLE t1 (
pk INT NOT NULL,
col_date DATE,
PRIMARY KEY (pk)
);
INSERT INTO t1 VALUES (1,'2008-06-18');
INSERT INTO t1 VALUES (2,'2000-06-28');
INSERT INTO t1 VALUES (3,'2003-05-01');
INSERT INTO t1 VALUES (4,NULL);
INSERT INTO t1 VALUES (5,'2002-11-02');
INSERT INTO t1 VALUES (6,NULL);
INSERT INTO t1 VALUES (7,'2008-11-07');
INSERT INTO t1 VALUES (8,NULL);
INSERT INTO t1 VALUES (9,'2002-02-02');
INSERT INTO t1 VALUES (10,'2002-02-02');
INSERT INTO t1 VALUES (11,'2003-03-09');
INSERT INTO t1 VALUES (12,'2003-04-10');
INSERT INTO t1 VALUES (13,'2002-02-02');
INSERT INTO t1 VALUES (14,'2009-04-17');
INSERT INTO t1 VALUES (15,'2002-02-02');
INSERT INTO t1 VALUES (16,NULL);
INSERT INTO t1 VALUES (17,'2002-02-02');
INSERT INTO t1 VALUES (18,'2001-03-01');
INSERT INTO t1 VALUES (19,'2006-01-19');
INSERT INTO t1 VALUES (20,NULL);
CREATE TABLE t2 (
col_date DATE,
pk INT NOT NULL,
PRIMARY KEY (pk)
);
INSERT INTO t2 VALUES ('2002-02-02',1);
CREATE TABLE t3 (
pk INT NOT NULL,
col_varchar VARCHAR(1),
col_int_key INT,
col_int INT,
PRIMARY KEY (pk),
KEY col_int_key (col_int_key)
);
INSERT INTO t3 VALUES (1,'p',8,1);
INSERT INTO t3 VALUES (2,'k',NULL,8);
INSERT INTO t3 VALUES (3,'r',NULL,4);
INSERT INTO t3 VALUES (4,'l',2,2);
INSERT INTO t3 VALUES (5,'b',NULL,NULL);
INSERT INTO t3 VALUES (6,'h',0,3);
INSERT INTO t3 VALUES (7,'r',1,0);
INSERT INTO t3 VALUES (8,NULL,8,7);
INSERT INTO t3 VALUES (9,'t',6,0);
INSERT INTO t3 VALUES (10,'i',9,NULL);
INSERT INTO t3 VALUES (11,'d',8,1);
INSERT INTO t3 VALUES (12,'b',1,1);
INSERT INTO t3 VALUES (13,'i',9,0);
INSERT INTO t3 VALUES (14,'x',0,0);
INSERT INTO t3 VALUES (15,'n',8,NULL);
INSERT INTO t3 VALUES (16,NULL,1,NULL);
INSERT INTO t3 VALUES (17,'x',7,0);
INSERT INTO t3 VALUES (18,'e',5,75);
INSERT INTO t3 VALUES (19,'u',41,4);
INSERT INTO t3 VALUES (20,'l',9,8);
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN (SELECT * FROM t2) AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;
-- Replacing inner derived table with regular table: Results OK
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN t2 AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;
-- Setting derived_merge=off: Results OK
SET optimizer_switch='derived_merge=off';
SELECT pk, col_int, col_int_key, col_varchar
FROM t3 AS OUTR
WHERE col_int IN (
SELECT INNR.pk
FROM t1 AS INNR2
LEFT JOIN (SELECT * FROM t2) AS INNR
ON (INNR2.col_date <> INNR.col_date)
WHERE OUTR.col_int_key < 6
)
XOR OUTR.col_varchar >= 'h'
;
Description: The following query returns different results depending on the setting optimizer_switch='derived_merge=[on|off]' or the use of derived table in subquery as part of semijoin: SELECT pk, col_int, col_int_key, col_varchar FROM t3 AS OUTR WHERE col_int IN ( SELECT INNR.pk FROM t1 AS INNR2 LEFT JOIN (SELECT * FROM t2) AS INNR ON (INNR2.col_date <> INNR.col_date) WHERE OUTR.col_int_key < 6 ) XOR OUTR.col_varchar >= 'h' ; +----+---------+-------------+-------------+ | pk | col_int | col_int_key | col_varchar | +----+---------+-------------+-------------+ | 1 | 1 | 8 | p | | 2 | 8 | NULL | k | | 3 | 4 | NULL | r | | 4 | 2 | 2 | l | | 6 | 3 | 0 | h | | 7 | 0 | 1 | r | | 9 | 0 | 6 | t | | 10 | NULL | 9 | i | | 12 | 1 | 1 | b | | 13 | 0 | 9 | i | | 14 | 0 | 0 | x | | 15 | NULL | 8 | n | | 17 | 0 | 7 | x | | 19 | 4 | 41 | u | | 20 | 8 | 9 | l | +----+---------+-------------+-------------+ 15 rows in set (0.01 sec) The above result is from 5.7.8 / 5.8.0 with default settings. mysql> SET optimizer_switch='derived_merge=off'; Query OK, 0 rows affected (0.00 sec) <same query> +----+---------+-------------+-------------+ | pk | col_int | col_int_key | col_varchar | +----+---------+-------------+-------------+ | 1 | 1 | 8 | p | | 2 | 8 | NULL | k | | 3 | 4 | NULL | r | | 9 | 0 | 6 | t | | 10 | NULL | 9 | i | | 12 | 1 | 1 | b | | 13 | 0 | 9 | i | | 15 | NULL | 8 | n | | 17 | 0 | 7 | x | | 19 | 4 | 41 | u | | 20 | 8 | 9 | l | +----+---------+-------------+-------------+ 11 rows in set (0.01 sec) The latter result is also given with 5.7.7 or earlier, and when replacing LEFT JOIN (SELECT * FROM t2) AS INNR with LEFT JOIN t2 AS INNR Suspecting regression in 5.7.8. Git bisect says: 855ef39c280bb20b8944111378b99c7ac463033e is the first bad commit commit 855ef39c280bb20b8944111378b99c7ac463033e Author: Roy Lyseng <roy.lyseng@oracle.com> Date: Mon Jun 29 14:40:51 2015 +0200 Bug#14358878: Wrong results on table left join view Bug#15936817: Table left join view, unmatched rows problem where view has an if Bug#15967464: View evaluation incorrect when joining to view with literal Bug#20708288: Literal selected from derived table mentioned in outer join ..null Bug#20841369: Left join to view with <> test causing too many results How to repeat: CREATE TABLE t1 ( pk INT NOT NULL, col_date DATE, PRIMARY KEY (pk) ); INSERT INTO t1 VALUES (1,'2008-06-18'); INSERT INTO t1 VALUES (2,'2000-06-28'); INSERT INTO t1 VALUES (3,'2003-05-01'); INSERT INTO t1 VALUES (4,NULL); INSERT INTO t1 VALUES (5,'2002-11-02'); INSERT INTO t1 VALUES (6,NULL); INSERT INTO t1 VALUES (7,'2008-11-07'); INSERT INTO t1 VALUES (8,NULL); INSERT INTO t1 VALUES (9,'2002-02-02'); INSERT INTO t1 VALUES (10,'2002-02-02'); INSERT INTO t1 VALUES (11,'2003-03-09'); INSERT INTO t1 VALUES (12,'2003-04-10'); INSERT INTO t1 VALUES (13,'2002-02-02'); INSERT INTO t1 VALUES (14,'2009-04-17'); INSERT INTO t1 VALUES (15,'2002-02-02'); INSERT INTO t1 VALUES (16,NULL); INSERT INTO t1 VALUES (17,'2002-02-02'); INSERT INTO t1 VALUES (18,'2001-03-01'); INSERT INTO t1 VALUES (19,'2006-01-19'); INSERT INTO t1 VALUES (20,NULL); CREATE TABLE t2 ( col_date DATE, pk INT NOT NULL, PRIMARY KEY (pk) ); INSERT INTO t2 VALUES ('2002-02-02',1); CREATE TABLE t3 ( pk INT NOT NULL, col_varchar VARCHAR(1), col_int_key INT, col_int INT, PRIMARY KEY (pk), KEY col_int_key (col_int_key) ); INSERT INTO t3 VALUES (1,'p',8,1); INSERT INTO t3 VALUES (2,'k',NULL,8); INSERT INTO t3 VALUES (3,'r',NULL,4); INSERT INTO t3 VALUES (4,'l',2,2); INSERT INTO t3 VALUES (5,'b',NULL,NULL); INSERT INTO t3 VALUES (6,'h',0,3); INSERT INTO t3 VALUES (7,'r',1,0); INSERT INTO t3 VALUES (8,NULL,8,7); INSERT INTO t3 VALUES (9,'t',6,0); INSERT INTO t3 VALUES (10,'i',9,NULL); INSERT INTO t3 VALUES (11,'d',8,1); INSERT INTO t3 VALUES (12,'b',1,1); INSERT INTO t3 VALUES (13,'i',9,0); INSERT INTO t3 VALUES (14,'x',0,0); INSERT INTO t3 VALUES (15,'n',8,NULL); INSERT INTO t3 VALUES (16,NULL,1,NULL); INSERT INTO t3 VALUES (17,'x',7,0); INSERT INTO t3 VALUES (18,'e',5,75); INSERT INTO t3 VALUES (19,'u',41,4); INSERT INTO t3 VALUES (20,'l',9,8); SELECT pk, col_int, col_int_key, col_varchar FROM t3 AS OUTR WHERE col_int IN ( SELECT INNR.pk FROM t1 AS INNR2 LEFT JOIN (SELECT * FROM t2) AS INNR ON (INNR2.col_date <> INNR.col_date) WHERE OUTR.col_int_key < 6 ) XOR OUTR.col_varchar >= 'h' ; -- Replacing inner derived table with regular table: Results OK SELECT pk, col_int, col_int_key, col_varchar FROM t3 AS OUTR WHERE col_int IN ( SELECT INNR.pk FROM t1 AS INNR2 LEFT JOIN t2 AS INNR ON (INNR2.col_date <> INNR.col_date) WHERE OUTR.col_int_key < 6 ) XOR OUTR.col_varchar >= 'h' ; -- Setting derived_merge=off: Results OK SET optimizer_switch='derived_merge=off'; SELECT pk, col_int, col_int_key, col_varchar FROM t3 AS OUTR WHERE col_int IN ( SELECT INNR.pk FROM t1 AS INNR2 LEFT JOIN (SELECT * FROM t2) AS INNR ON (INNR2.col_date <> INNR.col_date) WHERE OUTR.col_int_key < 6 ) XOR OUTR.col_varchar >= 'h' ;