Description:
The following two queries return a different result set:
SELECT OUTR .`varchar_nokey` FROM B OUTR2 LEFT JOIN B OUTR ON OUTR2 .`datetime_nokey` WHERE ( OUTR .`pk` , OUTR .`int_key` ) IN ( SELECT `int_nokey` , `int_key` FROM CC ) AND NULL XOR OUTR .`int_nokey` IS NULL;
SELECT OUTR .`varchar_nokey` FROM B OUTR2 LEFT JOIN B OUTR ON OUTR2 .`datetime_nokey` WHERE ( OUTR .`pk` , OUTR .`int_key` ) IN ( SELECT DISTINCT `int_nokey` , `int_key` FROM CC ) AND NULL XOR OUTR .`int_nokey` IS NULL;
even though they are identical, except for the DISTINCT inside the IN() ,which should have been ignored by the optimizer and have no effect.
How to repeat:
--disable_warnings
DROP TABLE IF EXISTS CC, B;
--enable_warnings
CREATE TABLE `CC` (
`pk` int(11) NOT NULL AUTO_INCREMENT,
`int_nokey` int(11) NOT NULL,
`int_key` int(11) NOT NULL,
`datetime_nokey` datetime NOT NULL,
`varchar_nokey` varchar(1) NOT NULL,
PRIMARY KEY (`pk`),
KEY `int_key` (`int_key`)
) ENGINE=MyISAM AUTO_INCREMENT=30 DEFAULT CHARSET=latin1;
INSERT INTO `CC` VALUES (10,7,5,'2002-04-10 14:25:30','w'),(11,7,0,'0000-00-00 00:00:00','s'),(12,4,0,'2006-09-14 04:01:02','y'),(13,0,4,'0000-00-00 00:00:00','c'),(14,1,8,'0000-00-00 00:00:00','q'),(15,6,5,'0000-00-00 00:00:00',''),(16,2,9,'0000-00-00 00:00:00','d'),(17,6,8,'2007-04-01 11:04:17',''),(18,0,1,'0000-00-00 00:00:00','p'),(19,4,7,'2009-01-12 00:00:00','x'),(20,4,0,'2009-06-05 00:00:00','f'),(21,7,3,'2006-02-14 18:06:35','x'),(22,3,5,'2006-02-21 07:08:16','h'),(23,7,0,'0000-00-00 00:00:00','c'),(24,8,7,'0000-00-00 00:00:00','m'),(25,4,0,'0000-00-00 00:00:00','s'),(26,6,0,'2007-02-13 00:00:00','b'),(27,9,1,'0000-00-00 00:00:00','o'),(28,0,0,'0000-00-00 00:00:00','j'),(29,9,1,'2003-08-11 00:00:00','m');
CREATE TABLE `B` (
`pk` int(11) NOT NULL AUTO_INCREMENT,
`int_nokey` int(11) NOT NULL,
`int_key` int(11) NOT NULL,
`datetime_nokey` datetime NOT NULL,
`varchar_nokey` varchar(1) NOT NULL,
PRIMARY KEY (`pk`),
KEY `int_key` (`int_key`)
) ENGINE=MyISAM AUTO_INCREMENT=3 DEFAULT CHARSET=latin1;
INSERT INTO `B` VALUES (1,9,7,'0000-00-00 00:00:00','b'),(2,2,9,'2007-03-03 01:12:45','h');
SELECT OUTR .`varchar_nokey`
FROM B OUTR2 LEFT JOIN B OUTR ON OUTR2 .`datetime_nokey`
WHERE ( OUTR .`pk` , OUTR .`int_key` ) IN (
SELECT DISTINCT `int_nokey` , `int_key`
FROM CC ) AND NULL XOR OUTR .`int_nokey` IS NULL ;
SELECT OUTR .`varchar_nokey`
FROM B OUTR2 LEFT JOIN B OUTR ON OUTR2 .`datetime_nokey`
WHERE ( OUTR .`pk` , OUTR .`int_key` ) IN (
SELECT `int_nokey` , `int_key`
FROM CC ) AND NULL XOR OUTR .`int_nokey` IS NULL ;
DROP TABLE CC, B;
Suggested fix:
This is another situation where the optimizer fails to remove stuff that is not important (in this case, the DISTINCT in the inner query) before trying to optimize the query further.