Bug #47694 Setting join_cache_level= 1 produces extra, duplicate results
Submitted: 29 Sep 2009 1:46 Modified: 5 Nov 2009 10:32
Reporter: Patrick Crews Email Updates:
Status: Duplicate Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.1/5.4/6.0 OS:Any
Assigned to: Assigned Account CPU Architecture:Any
Tags: duplicate rows, join_cache_level, regression

[29 Sep 2009 1:46] Patrick Crews
Description:
For certain queries, setting join_cache_level = 1 is causing extra, duplicate rows to be added to the result sets:

For example, for this query:
SELECT 'j' FROM (  SELECT * FROM CC  ) table1  STRAIGHT_JOIN D table2  ON table2 .`varchar_key`  = table1 .`varchar_key`  WHERE ( table2 .`pk`  , table1 .`int_nokey`  )  NOT  IN (  SELECT `int_key`  , `int_nokey`  FROM CC  )   ;

We get this diff: (azalea with j_c_l = 0 vs. j_c_l = 1)
# 16:01:29 @@ -61,3 +61,20 @@
# 16:01:29  j
# 16:01:29  j
# 16:01:29  j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j
# 16:01:29 +j

How to repeat:
MTR test case:
Toggle the join_cache_level from 1 to 0 and observe the change in result sets

SET SESSION optimizer_switch = 'firstmatch=off,index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,loosescan=off,materialization=off,semijoin=off' ;

SET SESSION optimizer_use_mrr = 'disable' ;

SET SESSION engine_condition_pushdown = 'ON' ;

SET SESSION join_cache_level = 1; 

#/* Begin test case for query 0 */

--disable_warnings
DROP TABLE /*! IF EXISTS */ CC;
DROP TABLE /*! IF EXISTS */ D;
--enable_warnings

CREATE TABLE `CC` (
  `pk` int(11) NOT NULL AUTO_INCREMENT,
  `int_nokey` int(11) DEFAULT NULL,
  `int_key` int(11) DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  PRIMARY KEY (`pk`),
  KEY `int_key` (`int_key`),
  KEY `varchar_key` (`varchar_key`,`int_key`)
) ENGINE=MEMORY AUTO_INCREMENT=30 DEFAULT CHARSET=latin1;
INSERT INTO `CC` VALUES (10,7,8,'v');
INSERT INTO `CC` VALUES (11,1,9,'r');
INSERT INTO `CC` VALUES (12,5,9,'a');
INSERT INTO `CC` VALUES (13,3,186,'m');
INSERT INTO `CC` VALUES (14,6,NULL,'y');
INSERT INTO `CC` VALUES (15,92,2,'j');
INSERT INTO `CC` VALUES (16,7,3,'d');
INSERT INTO `CC` VALUES (17,NULL,0,'z');
INSERT INTO `CC` VALUES (18,3,133,'e');
INSERT INTO `CC` VALUES (19,5,1,'h');
INSERT INTO `CC` VALUES (20,1,8,'b');
INSERT INTO `CC` VALUES (21,2,5,'s');
INSERT INTO `CC` VALUES (22,NULL,5,'e');
INSERT INTO `CC` VALUES (23,1,8,'j');
INSERT INTO `CC` VALUES (24,0,6,'e');
INSERT INTO `CC` VALUES (25,210,51,'f');
INSERT INTO `CC` VALUES (26,8,4,'v');
INSERT INTO `CC` VALUES (27,7,7,'x');
INSERT INTO `CC` VALUES (28,5,6,'m');
INSERT INTO `CC` VALUES (29,NULL,4,'c');
CREATE TABLE `D` (
  `pk` int(11) NOT NULL AUTO_INCREMENT,
  `int_nokey` int(11) DEFAULT NULL,
  `int_key` int(11) DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  PRIMARY KEY (`pk`),
  KEY `int_key` (`int_key`),
  KEY `varchar_key` (`varchar_key`,`int_key`)
) ENGINE=MEMORY AUTO_INCREMENT=101 DEFAULT CHARSET=latin1;
INSERT INTO `D` VALUES (1,6,NULL,'r');
INSERT INTO `D` VALUES (2,8,0,'c');
INSERT INTO `D` VALUES (3,6,0,'o');
INSERT INTO `D` VALUES (4,6,7,'c');
INSERT INTO `D` VALUES (5,3,8,'d');
INSERT INTO `D` VALUES (6,9,4,'v');
INSERT INTO `D` VALUES (7,2,6,'m');
INSERT INTO `D` VALUES (8,1,5,'j');
INSERT INTO `D` VALUES (9,8,NULL,'f');
INSERT INTO `D` VALUES (10,0,NULL,'n');
INSERT INTO `D` VALUES (11,9,8,'z');
INSERT INTO `D` VALUES (12,8,8,'h');
INSERT INTO `D` VALUES (13,NULL,8,'q');
INSERT INTO `D` VALUES (14,0,1,'w');
INSERT INTO `D` VALUES (15,5,1,'z');
INSERT INTO `D` VALUES (16,1,5,'j');
INSERT INTO `D` VALUES (17,1,2,'a');
INSERT INTO `D` VALUES (18,6,7,'m');
INSERT INTO `D` VALUES (19,6,6,'n');
INSERT INTO `D` VALUES (20,1,4,'e');
INSERT INTO `D` VALUES (21,8,7,'u');
INSERT INTO `D` VALUES (22,1,0,'s');
INSERT INTO `D` VALUES (23,0,9,'u');
INSERT INTO `D` VALUES (24,4,3,'r');
INSERT INTO `D` VALUES (25,9,5,'g');
INSERT INTO `D` VALUES (26,8,1,'o');
INSERT INTO `D` VALUES (27,5,1,'w');
INSERT INTO `D` VALUES (28,9,5,'b');
INSERT INTO `D` VALUES (29,5,9,NULL);
INSERT INTO `D` VALUES (30,NULL,2,'y');
INSERT INTO `D` VALUES (31,NULL,5,'y');
INSERT INTO `D` VALUES (32,105,248,'u');
INSERT INTO `D` VALUES (33,0,0,'p');
INSERT INTO `D` VALUES (34,3,8,'s');
INSERT INTO `D` VALUES (35,1,1,'e');
INSERT INTO `D` VALUES (36,75,255,'d');
INSERT INTO `D` VALUES (37,9,9,'d');
INSERT INTO `D` VALUES (38,7,9,'c');
INSERT INTO `D` VALUES (39,NULL,3,'b');
INSERT INTO `D` VALUES (40,NULL,9,'t');
INSERT INTO `D` VALUES (41,4,6,NULL);
INSERT INTO `D` VALUES (42,0,4,'y');
INSERT INTO `D` VALUES (43,204,60,'c');
INSERT INTO `D` VALUES (44,0,7,'d');
INSERT INTO `D` VALUES (45,9,1,'x');
INSERT INTO `D` VALUES (46,8,6,'p');
INSERT INTO `D` VALUES (47,7,4,'e');
INSERT INTO `D` VALUES (48,8,NULL,'g');
INSERT INTO `D` VALUES (49,NULL,8,'x');
INSERT INTO `D` VALUES (50,6,0,'s');
INSERT INTO `D` VALUES (51,5,8,'e');
INSERT INTO `D` VALUES (52,2,151,'l');
INSERT INTO `D` VALUES (53,3,7,'p');
INSERT INTO `D` VALUES (54,7,6,'h');
INSERT INTO `D` VALUES (55,NULL,NULL,'m');
INSERT INTO `D` VALUES (56,145,23,'n');
INSERT INTO `D` VALUES (57,0,2,'v');
INSERT INTO `D` VALUES (58,1,4,'b');
INSERT INTO `D` VALUES (59,7,NULL,'x');
INSERT INTO `D` VALUES (60,3,NULL,'r');
INSERT INTO `D` VALUES (61,NULL,77,'t');
INSERT INTO `D` VALUES (62,2,NULL,'w');
INSERT INTO `D` VALUES (63,2,NULL,'w');
INSERT INTO `D` VALUES (64,2,7,'k');
INSERT INTO `D` VALUES (65,8,1,'a');
INSERT INTO `D` VALUES (66,6,9,'t');
INSERT INTO `D` VALUES (67,1,6,'z');
INSERT INTO `D` VALUES (68,NULL,2,'e');
INSERT INTO `D` VALUES (69,1,3,'q');
INSERT INTO `D` VALUES (70,0,0,'e');
INSERT INTO `D` VALUES (71,4,NULL,'v');
INSERT INTO `D` VALUES (72,1,6,'d');
INSERT INTO `D` VALUES (73,1,3,'u');
INSERT INTO `D` VALUES (74,27,195,'o');
INSERT INTO `D` VALUES (75,4,5,'b');
INSERT INTO `D` VALUES (76,6,2,'c');
INSERT INTO `D` VALUES (77,2,7,'q');
INSERT INTO `D` VALUES (78,248,25,NULL);
INSERT INTO `D` VALUES (79,NULL,NULL,'h');
INSERT INTO `D` VALUES (80,9,0,'d');
INSERT INTO `D` VALUES (81,75,98,'w');
INSERT INTO `D` VALUES (82,2,6,'m');
INSERT INTO `D` VALUES (83,9,5,'i');
INSERT INTO `D` VALUES (84,4,0,'w');
INSERT INTO `D` VALUES (85,0,3,'f');
INSERT INTO `D` VALUES (86,0,1,'k');
INSERT INTO `D` VALUES (87,1,1,'v');
INSERT INTO `D` VALUES (88,119,147,'c');
INSERT INTO `D` VALUES (89,1,3,'y');
INSERT INTO `D` VALUES (90,7,3,'h');
INSERT INTO `D` VALUES (91,2,NULL,NULL);
INSERT INTO `D` VALUES (92,7,2,'t');
INSERT INTO `D` VALUES (93,2,1,'l');
INSERT INTO `D` VALUES (94,6,8,'a');
INSERT INTO `D` VALUES (95,4,8,'r');
INSERT INTO `D` VALUES (96,5,8,'s');
INSERT INTO `D` VALUES (97,7,0,'z');
INSERT INTO `D` VALUES (98,1,1,'j');
INSERT INTO `D` VALUES (99,7,8,'c');
INSERT INTO `D` VALUES (100,2,5,'f');

 
SELECT 'j' 
FROM (  
SELECT * 
FROM CC  ) table1  STRAIGHT_JOIN D table2  ON table2 .`varchar_key`  = table1 .`varchar_key`  
WHERE ( table2 .`pk`  , table1 .`int_nokey`  )  NOT  IN (  
SELECT `int_key`  , `int_nokey`  
FROM CC  )   ;

#/* Diff: */

#/* --- /tmp//randgen25783-1254081690-server0.dump	2009-09-27 16:01:30.000000000 -0400
# +++ /tmp//randgen25783-1254081690-server1.dump	2009-09-27 16:01:30.000000000 -0400
# @@ -61,3 +61,20 @@
#  j
#  j
#  j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j
# +j */

DROP TABLE CC;
DROP TABLE D;
#/* End of test case for query 0 */

Suggested fix:
Ensure consistent result sets regardless of optimizer settings.
[30 Sep 2009 8:13] Philip Stoev
Setting a "regression" tag due to join_cache_level = 1 being the default for the 6.0 codebase.
[9 Oct 2009 11:29] Jørgen Løland
This problem can also be reproduced in 5.1 by replacing STRAIGHT_JOIN with JOIN. Reduced test case run in 5.1 follows:

CREATE TABLE `CC` (
  `pk` int(11) NOT NULL AUTO_INCREMENT,
  `int_nokey` int(11) DEFAULT NULL,
  `int_key` int(11) DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  PRIMARY KEY (`pk`),
  KEY `int_key` (`int_key`),
  KEY `varchar_key` (`varchar_key`,`int_key`)
) ENGINE=MEMORY AUTO_INCREMENT=30 DEFAULT CHARSET=latin1;

INSERT INTO `CC` VALUES (3,NULL,0,'z');
INSERT INTO `CC` VALUES (4,3,133,'e');
INSERT INTO `CC` VALUES (5,5,1,'h');
INSERT INTO `CC` VALUES (6,NULL,5,'e');
INSERT INTO `CC` VALUES (7,1,8,'j');
INSERT INTO `CC` VALUES (8,0,6,'e');
INSERT INTO `CC` VALUES (9,210,51,'f');
INSERT INTO `CC` VALUES (10,8,4,'v');
INSERT INTO `CC` VALUES (1,7,7,'x');
INSERT INTO `CC` VALUES (2,NULL,4,'c');

CREATE TABLE `D` (
  `pk` int(11) NOT NULL AUTO_INCREMENT,
  `int_nokey` int(11) DEFAULT NULL,
  `int_key` int(11) DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  PRIMARY KEY (`pk`),
  KEY `int_key` (`int_key`),
  KEY `varchar_key` (`varchar_key`,`int_key`)
) ENGINE=MEMORY AUTO_INCREMENT=101 DEFAULT CHARSET=latin1;

INSERT INTO `D` VALUES (4,6,7,'c');
INSERT INTO `D` VALUES (11,9,8,'z');
INSERT INTO `D` VALUES (12,8,8,'h');
INSERT INTO `D` VALUES (13,NULL,8,'q');
INSERT INTO `D` VALUES (16,1,5,'j');
INSERT INTO `D` VALUES (18,6,7,'m');
INSERT INTO `D` VALUES (20,1,4,'e');

SELECT *
FROM CC JOIN D 
ON D.`varchar_key`=CC.`varchar_key`
WHERE (D.`pk`, CC.`int_nokey`) 
NOT IN (SELECT `int_key`, `int_nokey`
              FROM CC)
ORDER BY CC.pk,D.pk;
pk	int_nokey	int_key	varchar_key	pk	int_nokey	int_key	varchar_key
2	NULL	4	c	4	6	7	c
3	NULL	0	z	11	9	8	z
4	3	133	e	20	1	4	e
5	5	1	h	12	8	8	h
6	NULL	5	e	20	1	4	e
7	1	8	j	16	1	5	j
8	0	6	e	20	1	4	e

SELECT *
FROM CC STRAIGHT_JOIN D 
ON D.`varchar_key`=CC.`varchar_key`
WHERE (D.`pk`, CC.`int_nokey`) 
NOT IN (SELECT `int_key`, `int_nokey`
              FROM CC)
ORDER BY CC.pk,D.pk;
pk	int_nokey	int_key	varchar_key	pk	int_nokey	int_key	varchar_key
4	3	133	e	20	1	4	e
5	5	1	h	12	8	8	h
7	1	8	j	16	1	5	j
8	0	6	e	20	1	4	e
[9 Oct 2009 12:08] Jørgen Løland
Run with the dataset above, jcl=0 behaves like 5.1 while jcl=1 behaves the other way around:

JCL=0:
======
SELECT *
FROM CC JOIN D 
ON D.`varchar_key`=CC.`varchar_key`
WHERE (D.`pk`, CC.`int_nokey`) 
NOT IN (SELECT `int_key`, `int_nokey`
              FROM CC)
ORDER BY CC.pk,D.pk;
pk	int_nokey	int_key	varchar_key	pk	varchar_key
2	NULL	4	c	4	c
3	NULL	0	z	11	z
4	3	133	e	20	e
5	5	1	h	12	h
6	NULL	5	e	20	e
7	1	8	j	16	j
8	0	6	e	20	e

SELECT *
FROM CC STRAIGHT_JOIN D 
ON D.`varchar_key`=CC.`varchar_key`
WHERE (D.`pk`, CC.`int_nokey`) 
NOT IN (SELECT `int_key`, `int_nokey`
              FROM CC)
ORDER BY CC.pk,D.pk;
pk	int_nokey	int_key	varchar_key	pk	varchar_key
4	3	133	e	20	e
5	5	1	h	12	h
7	1	8	j	16	j
8	0	6	e	20	e

JCL=1:
======
SELECT *
FROM CC JOIN D 
ON D.`varchar_key`=CC.`varchar_key`
WHERE (D.`pk`, CC.`int_nokey`) 
NOT IN (SELECT `int_key`, `int_nokey`
              FROM CC)
ORDER BY CC.pk,D.pk;
pk	int_nokey	int_key	varchar_key	pk	varchar_key
4	3	133	e	20	e
5	5	1	h	12	h
7	1	8	j	16	j
8	0	6	e	20	e

SELECT *
FROM CC STRAIGHT_JOIN D 
ON D.`varchar_key`=CC.`varchar_key`
WHERE (D.`pk`, CC.`int_nokey`) 
NOT IN (SELECT `int_key`, `int_nokey`
              FROM CC)
ORDER BY CC.pk,D.pk;
pk	int_nokey	int_key	varchar_key	pk	varchar_key
2	NULL	4	c	4	c
3	NULL	0	z	11	z
4	3	133	e	20	e
5	5	1	h	12	h
6	NULL	5	e	20	e
7	1	8	j	16	j
8	0	6	e	20	e
[9 Oct 2009 12:12] Jørgen Løland
Analysis:

SELECT CC.pk, D.pk, CC.int_nokey
FROM CC STRAIGHT_JOIN D 
ON D.`varchar_key`=CC.`varchar_key`
ORDER BY CC.pk,D.pk;
pk	pk	int_nokey
2	4	NULL
3	11	NULL
4	20	3
5	12	5
6	20	NULL
7	16	1
8	20	0

SELECT `int_key`, `int_nokey` FROM CC;
int_key	int_nokey
0	NULL
133	3
1	5
5	NULL
8	1
6	0
51	210
4	8
7	7
4	NULL

For [<x> NOT IN <y>], where either x or y is NULL, the row shall be filtered away as per the standard. Hence, the correct result should be:

pk	int_nokey	int_key	varchar_key	pk	varchar_key
4	3	133	e	20	e
5	5	1	h	12	h
7	1	8	j	16	j
8	0	6	e	20	e
[9 Oct 2009 13:44] Jørgen Løland
Requesting new triage since this bug is also repeatable on 5.1
[19 Oct 2009 11:20] Jørgen Løland
SQL 2003 has these relevant sections, defining why the rows with NULL above should not be in the result set:

WHERE clause:
-------------
General Rules
1) Let T be the result of the preceding <from clause>.
2) The <search condition> is applied to each row of T. The result
   of the <where clause> is a table of those rows of T for which
   the result of the <search condition> is True.

SEARCH condition:
-----------------
Specify a condition that is True, False, or Unknown, depending on
the value of a <boolean value expression>.

BOOLEAN VALUE expression:
-------------------------
* NOT (Unknown) is Unknown.
[20 Oct 2009 9:07] Jørgen Løland
BUG#48177 is probably the root cause for this bug.
[5 Nov 2009 10:32] Jørgen Løland
Confirmed fixed by BUG#48177.