Bug #46555 MySQL processing LEFT JOIN in subqueries incorrectly with materialization = off
Submitted: 4 Aug 2009 20:42 Modified: 17 Aug 2011 14:23
Reporter: Patrick Crews Email Updates:
Status: Not a Bug Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.0, 5.1, 5.4/6.0 OS:Any
Assigned to: CPU Architecture:Any
Tags: azalea, IN, materialization, optimizer_switch, regression, subquery

[4 Aug 2009 20:42] Patrick Crews
Description:
Azalea is not processing LEFT JOIN in a subquery correctly if materialization = off.
This is also present in 5.0 and 5.1

For this query:
SELECT `varchar_nokey`  
FROM C  
WHERE `int_nokey`  IN (  
SELECT subquery_t1 .`int_nokey`  
FROM BB subquery_t1  LEFT  JOIN C subquery_t2  ON subquery_t2 .`varchar_key`  = subquery_t1 .`varchar_nokey`  
WHERE subquery_t1 .`varchar_nokey`  )   ;

There should be int_nokey values returned from this subquery (due to the LEFT JOIN) which should result in rows being returned for the outer query.

However, if materialization=off, the LEFT JOIN isn't being processed correctly.  From the appearance of things, there are *no* rows being returned from the subquery, and thus no rows in the outer query.

How to repeat:
RQG-generated test case.
Uncomment the appropriate Server0/Server1 lines depending on which version of the server is being tested.

Toggle the materialization switch (in the optimizer_switch setting) on and off and observe the change in behavior.  With the test as it is (materialization=on), the query is being processed correctly.

# Server0: version = 5.1.38-debug-log
# Server1: version = 5.4.4-alpha-debug-log

# The value of optimizer_switch is distinct between the two servers:
# Server 0 : SET SESSION optimizer_switch = 'index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on';
# Server 1 : SET SESSION optimizer_switch = 'firstmatch=on,index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,loosescan=on,materialization=on,semijoin=on';

# The value of optimizer_use_mrr is distinct between the two servers:
# Server 0 : SET SESSION optimizer_use_mrr = '';
# Server 1 : SET SESSION optimizer_use_mrr = 'force';

# The value of engine_condition_pushdown is common between the two servers:
/*!50400 SET SESSION engine_condition_pushdown = ON */;

# The value of join_cache_level is distinct between the two servers:
# Server 0 : SET SESSION join_cache_level = '';
# Server 1 : SET SESSION join_cache_level = 1;

# Begin test case for query 0

--disable_warnings
DROP TABLE IF EXISTS C, BB;
--enable_warnings

CREATE TABLE `C` (
  `int_nokey` int(11) DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  `varchar_nokey` varchar(1) DEFAULT NULL,
  KEY `varchar_key` (`varchar_key`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
INSERT INTO `C` VALUES (NULL,'g','g'),(0,'v','v'),(199,'t','t'),(3,'u','u'),(7,'n','n'),(3,'p','p'),(8,NULL,NULL),(9,'u','u'),(9,'n','n'),(0,NULL,NULL),(0,'k','k'),(188,'e','e'),(2,'d','d'),(5,'t','t'),(5,'o','o'),(6,'e','e'),(7,'s','s'),(2,NULL,NULL),(2,'d','d'),(6,'z','z');
CREATE TABLE `BB` (
  `int_nokey` int(11) DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  `varchar_nokey` varchar(1) DEFAULT NULL,
  KEY `varchar_key` (`varchar_key`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
INSERT INTO `BB` VALUES (8,'i','i'),(0,'f','f');

 
SELECT `varchar_nokey`  
FROM C  
WHERE `int_nokey`  IN (  
SELECT subquery_t1 .`int_nokey`  
FROM BB subquery_t1  LEFT  JOIN C subquery_t2  ON subquery_t2 .`varchar_key`  = subquery_t1 .`varchar_nokey`  
WHERE subquery_t1 .`varchar_nokey`  )   ;

# Diff:

# --- /var/folders/Pt/PtJd7NDTGgyOk3+iDGXrQk+++TI/-Tmp-///randgen73904-1249407658-server0.dump	2009-08-04 13:40:58.000000000 -0400
# +++ /var/folders/Pt/PtJd7NDTGgyOk3+iDGXrQk+++TI/-Tmp-///randgen73904-1249407658-server1.dump	2009-08-04 13:40:58.000000000 -0400
# @@ -0,0 +1,3 @@
# +NULL
# +k
# +v

DROP TABLE C, BB;

# End of test case for query 0

# Begin test case for query 1

--disable_warnings
DROP TABLE IF EXISTS C, BB;
--enable_warnings

CREATE TABLE `C` (
  `int_nokey` int(11) DEFAULT NULL,
  `time_key` time DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  `varchar_nokey` varchar(1) DEFAULT NULL,
  KEY `time_key` (`time_key`),
  KEY `varchar_key` (`varchar_key`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
INSERT INTO `C` VALUES (NULL,'18:52:56','g','g'),(0,'03:03:27','v','v'),(199,'07:48:21','t','t'),(3,'04:13:17','u','u'),(7,'11:58:42','n','n'),(3,'22:13:15','p','p'),(8,'13:05:44',NULL,NULL),(9,'21:46:44','u','u'),(9,'15:25:57','n','n'),(0,'19:13:57',NULL,NULL),(0,'00:39:30','k','k'),(188,NULL,'e','e'),(2,'00:41:08','d','d'),(5,'13:04:05','t','t'),(5,'04:20:48','o','o'),(6,'21:56:05','e','e'),(7,'19:35:19','s','s'),(2,'00:00:00',NULL,NULL),(2,'22:38:59','d','d'),(6,NULL,'z','z');
CREATE TABLE `BB` (
  `int_nokey` int(11) DEFAULT NULL,
  `time_key` time DEFAULT NULL,
  `varchar_key` varchar(1) DEFAULT NULL,
  `varchar_nokey` varchar(1) DEFAULT NULL,
  KEY `time_key` (`time_key`),
  KEY `varchar_key` (`varchar_key`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
INSERT INTO `BB` VALUES (8,NULL,'i','i'),(0,'14:09:39','f','f');

 
SELECT  MAX( `time_key`) AS field1 , `varchar_nokey` AS field2 
FROM C AS table1 
WHERE ( table1 . `int_nokey`  IN ( 
SELECT subquery_t1 . `int_nokey` 
FROM BB AS subquery_t1 LEFT  JOIN C AS subquery_t2 ON ( subquery_t2 . `varchar_key` = subquery_t1 . `varchar_nokey` ) 
WHERE subquery_t1 . `varchar_nokey` >= 'yu' ) )  
GROUP BY field2;

# Diff:

# --- /var/folders/Pt/PtJd7NDTGgyOk3+iDGXrQk+++TI/-Tmp-///randgen73904-1249407659-server0.dump	2009-08-04 13:40:59.000000000 -0400
# +++ /var/folders/Pt/PtJd7NDTGgyOk3+iDGXrQk+++TI/-Tmp-///randgen73904-1249407659-server1.dump	2009-08-04 13:40:59.000000000 -0400
# @@ -0,0 +1,3 @@
# +00:39:30	k
# +03:03:27	v
# +19:13:57	NULL

DROP TABLE C, BB;

# End of test case for query 1

Suggested fix:
Ensure proper query processing regardless of optimizer settings.