Bug #48058 MySQL 5.1 adding extra rows on certain queries with redundant WHERE clause items
Submitted: 14 Oct 2009 20:41 Modified: 25 Jun 2010 21:41
Reporter: Patrick Crews Email Updates:
Status: Duplicate Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.1 OS:Any
Assigned to: Assigned Account CPU Architecture:Any
Tags: extra rows, regression

[14 Oct 2009 20:41] Patrick Crews
Description:
MySQL 5.1 is adding extra rows for certain queries that use complicated range predicates

From the attached test case:
SELECT table2 .`date_key`, table1.varchar_key  
FROM C table1  JOIN C table2  ON table2 .`int_key`  
WHERE table1 .`varchar_key`  IN ( 'r' )  OR table1 .`varchar_key`  IN ( 'i' , 'o' , 'w' )  OR table1 .`varchar_key`  > 'o'  ORDER BY table2.date_key, table1.varchar_key;

Produces a number of extra rows that all have a table1.varchar_key value of 'o'.  It appears that the server is doubling the correct results (ie if the proper answer is 3 rows, 5.1 is returning 6...)

How to repeat:
MTR test case (set up for running on 5.1)
Uncomment some of the SET statements if you are running on 6.0

#/* Server0: MySQL 6.0.14-alpha-gcov-debug-log */
#/* Server1: MySQL 5.1.40-debug-log */

#/* The value of optimizer_switch is distinct between the two servers: */
#SET SESSION optimizer_switch = 'firstmatch=off,index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,loosescan=off,materialization=off,semijoin=off' ;
#/* Server 1 : SET SESSION optimizer_switch = 'index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on' */;

#/* The value of optimizer_use_mrr is distinct between the two servers: */
#SET SESSION optimizer_use_mrr = 'disable' ;
#/* Server 1 : SET SESSION optimizer_use_mrr = '' */;

/* The value of engine_condition_pushdown is common between the two servers: */
/*!50400 SET SESSION engine_condition_pushdown = 1 */;

#/* The value of join_cache_level is distinct between the two servers: */
#SET SESSION join_cache_level = 1; 
#/* Server 1 : SET SESSION join_cache_level = '' */;

#/* The value of debug is distinct between the two servers: */
#SET SESSION debug = '+d,optimizer_no_icp' ;
#/* Server 1 : SET SESSION debug = '' */;

#/* Begin test case for query 0 */

--disable_warnings
DROP TABLE /*! IF EXISTS */ C;
--enable_warnings

CREATE TABLE `C` (
  `int_key` int(11) DEFAULT NULL,
  `date_key` date DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  KEY `int_key` (`int_key`),
  KEY `date_key` (`date_key`),
  KEY `varchar_key` (`varchar_key`,`int_key`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
INSERT INTO `C` VALUES (2,NULL,'w');
INSERT INTO `C` VALUES (9,'2001-09-19','m');
INSERT INTO `C` VALUES (3,'2004-09-12','m');
INSERT INTO `C` VALUES (9,NULL,'k');
INSERT INTO `C` VALUES (NULL,'2002-07-19','r');
INSERT INTO `C` VALUES (9,'2002-12-16','t');
INSERT INTO `C` VALUES (3,'2006-02-08','j');
INSERT INTO `C` VALUES (8,'2006-08-28','u');
INSERT INTO `C` VALUES (8,'2001-04-14','h');
INSERT INTO `C` VALUES (53,'2000-01-05','o');
INSERT INTO `C` VALUES (0,'2003-12-06',NULL);
INSERT INTO `C` VALUES (5,'1900-01-01','k');
INSERT INTO `C` VALUES (166,'2002-11-27','e');
INSERT INTO `C` VALUES (3,NULL,'n');
INSERT INTO `C` VALUES (0,'2003-05-27','t');
INSERT INTO `C` VALUES (1,'2005-05-03','c');
INSERT INTO `C` VALUES (9,'2001-04-18','m');
INSERT INTO `C` VALUES (5,'2005-12-27','y');
INSERT INTO `C` VALUES (6,'2004-08-20','f');
INSERT INTO `C` VALUES (2,'1900-01-01','d');

# This is just here to help figure out which server was right
# Comparing the .results of 5.1 and 6.0 will show that 5.1 is 
# 'fudging' extra rows
SELECT * 
FROM C table1  JOIN C table2  ON table2 .`int_key`
ORDER BY table1.int_key, table1.date_key, table1.varchar_key, table2.int_key, table2.date_key, table2.varchar_key; 

SELECT table2 .`date_key`, table1.varchar_key  
FROM C table1  JOIN C table2  ON table2 .`int_key`  
WHERE table1 .`varchar_key`  IN ( 'r' )  OR table1 .`varchar_key`  IN ( 'i' , 'o' , 'w' )  OR table1 .`varchar_key`  > 'o'  ORDER BY table2.date_key, table1.varchar_key;

#/* Diff: */

#/* --- /tmp//randgen2726-1255547636-server0.dump	2009-10-14 15:13:56.000000000 -0400
# +++ /tmp//randgen2726-1255547636-server1.dump	2009-10-14 15:13:56.000000000 -0400
# @@ -12,6 +12,9 @@
#  1900-01-01
#  1900-01-01
#  1900-01-01
# +1900-01-01
# +1900-01-01
# +2000-01-05
#  2000-01-05
#  2000-01-05
#  2000-01-05
# @@ -26,6 +29,8 @@
#  2001-04-14
#  2001-04-14
#  2001-04-14
# +2001-04-14
# +2001-04-18
#  2001-04-18
#  2001-04-18
#  2001-04-18
# @@ -40,6 +45,8 @@
#  2001-09-19
#  2001-09-19
#  2001-09-19
# +2001-09-19
# +2002-11-27
#  2002-11-27
#  2002-11-27
#  2002-11-27
# @@ -54,6 +61,8 @@
#  2002-12-16
#  2002-12-16
#  2002-12-16
# +2002-12-16
# +2004-08-20
#  2004-08-20
#  2004-08-20
#  2004-08-20
# @@ -68,6 +77,8 @@
#  2004-09-12
#  2004-09-12
#  2004-09-12
# +2004-09-12
# +2005-05-03
#  2005-05-03
#  2005-05-03
#  2005-05-03
# @@ -82,6 +93,8 @@
#  2005-12-27
#  2005-12-27
#  2005-12-27
# +2005-12-27
# +2006-02-08
#  2006-02-08
#  2006-02-08
#  2006-02-08
# @@ -96,6 +109,10 @@
#  2006-08-28
#  2006-08-28
#  2006-08-28
# +2006-08-28
# +NULL
# +NULL
# +NULL
#  NULL
#  NULL
#  NULL */

DROP TABLE C;
#/* End of test case for query 0 */

Suggested fix:
Ensure correct query processing.
[19 Oct 2009 15:51] Patrick Crews
EXPLAIN output:
6.0:
EXPLAIN SELECT table2 .`date_key`, table1.varchar_key  
FROM C table1  JOIN C table2  ON table2 .`int_key`  
WHERE table1 .`varchar_key`  IN ( 'r' )  OR table1 .`varchar_key`  IN ( 'i' , 'o' , 'w' ) 
OR table1 .`varchar_key`  > 'o'  ORDER BY table2.date_key, table1.varchar_key;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	table1	range	varchar_key	varchar_key	4	NULL	9	Using where; Using index; Using temporary; Using filesort
1	SIMPLE	table2	ALL	NULL	NULL	NULL	NULL	20	Using where; Using join buffer

5.1:
EXPLAIN SELECT table2 .`date_key`, table1.varchar_key  
FROM C table1  JOIN C table2  ON table2 .`int_key`  
WHERE table1 .`varchar_key`  IN ( 'r' )  OR table1 .`varchar_key`  IN ( 'i' , 'o' , 'w' ) 
OR table1 .`varchar_key`  > 'o'  ORDER BY table2.date_key, table1.varchar_key;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	table1	range	varchar_key	varchar_key	4	NULL	10	Using where; Using index; Using temporary; Using filesort
1	SIMPLE	table2	ALL	NULL	NULL	NULL	NULL	20	Using where; Using join buffer
[25 Jun 2010 21:41] Gleb Shchepa
Simplified test case:

Server version: 5.1.41-debug-log Source distribution

mysql> CREATE TABLE t1 (c INT PRIMARY KEY);
Query OK, 0 rows affected (0.01 sec)

mysql> INSERT INTO t1 (c) VALUES (1), (2), (3), (4), (9);
Query OK, 5 rows affected (0.00 sec)
Records: 5  Duplicates: 0  Warnings: 0

mysql> SELECT c FROM t1 WHERE c IN (11) OR c IN (12) OR c > 5;
+---+
| c |
+---+
| 9 |
| 9 |
+---+
2 rows in set (0.00 sec)

This bug is a duplicate of the bug #48665.