Bug #69410 Poor execution plan when ORDER BY with LIMIT X
Submitted: 5 Jun 2013 20:43 Modified: 17 Jul 2013 16:41
Reporter: Michael Riediger Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S5 (Performance)
Version:5.6.11, 5.6.12 OS:Linux
Assigned to: Jørgen Løland CPU Architecture:Any
Tags: compound index, limit, OFFSET, order by, performance

[5 Jun 2013 20:43] Michael Riediger
Description:
Similar behavior to http://bugs.mysql.com/bug.php?id=69013 

When executing a query with an ORDER BY and LIMIT and compound indexes, the query optimizer chooses the wrong path. It actually performs better (doesn't confuse the query optimizer?) if you remove the LIMIT.

EXPLAIN
SELECT * FROM 0_a
WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1
AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00')
ORDER BY the_r DESC LIMIT 4;

+----+-------------+-------+-------+---------------+------+---------+------+--------+----------------------------------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows   | Extra                                              |
+----+-------------+-------+-------+---------------+------+---------+------+--------+----------------------------------------------------+
|  1 | SIMPLE      | 0_a   | range | TVLB,VLHR     | VLHR | 2       | NULL | 523005 | Using index condition; Using where; Using filesort |
+----+-------------+-------+-------+---------------+------+---------+------+--------+----------------------------------------------------+

EXPLAIN
SELECT * FROM  (SELECT * FROM 0_a
WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1
AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00')
ORDER BY the_r DESC ) a LIMIT 4;

+----+-------------+------------+-------+---------------+------+---------+------+------+----------------------------------------------------+
| id | select_type | table      | type  | possible_keys | key  | key_len | ref  | rows | Extra                                              |
+----+-------------+------------+-------+---------------+------+---------+------+------+----------------------------------------------------+
|  1 | PRIMARY     | <derived2> | ALL   | NULL          | NULL | NULL    | NULL |  321 | NULL                                               |
|  2 | DERIVED     | 0_a        | range | TVLB,VLHR     | TVLB | 7       | NULL |  427 | Using index condition; Using where; Using filesort |
+----+-------------+------------+-------+---------------+------+---------+------+------+----------------------------------------------------+

How to repeat:
A procedure to create some random data and the observed result (same behavior is observed each time I tried this):

DROP TABLE IF EXISTS 0_a;

CREATE TABLE `0_a` (
  `t_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `the_t` tinyint(3) unsigned NOT NULL DEFAULT '0',
  `the_l` tinyint(3) unsigned NOT NULL DEFAULT '1',
  `the_b` int(10) unsigned NOT NULL DEFAULT '0',
  `the_v` enum('yes','no') COLLATE utf8_unicode_ci NOT NULL DEFAULT 'yes',
  `the_r` float(10,2) NOT NULL DEFAULT '0.00',
  `the_d` int(10) unsigned NOT NULL DEFAULT '0',
  `the_s` enum('A','B','C','D','E','F','G','H') COLLATE utf8_unicode_ci NOT NULL DEFAULT 'A',
  `dud` int unsigned not null default 0,
  PRIMARY KEY (`t_id`),
  KEY `TVLB` (`the_t`,`the_v`,`the_l`,`the_b`),
  KEY `VLHR` (`the_v`,`the_l`,`the_d`,`the_r`)
) ENGINE=InnoDB ;

INSERT INTO 0_a SELECT NULL, 1, 1, 0, 'yes', 0, 0, 'A', ROUND(RAND()*1000);

INSERT INTO 0_a SELECT NULL, 1, 1, 0, 'yes', 0, 0, 'A', ROUND(RAND()*1000) FROM 0_a;  ### repeat 20 times to get ~1M rows 

# we'll overwrite / populate the data with a distribution approximating my data set

UPDATE 0_a SET the_t = CASE 
WHEN dud < 501 THEN 1
WHEN dud < 555 THEN 2
WHEN dud < 581 THEN 3 
WHEN dud < 620 THEN 4 
WHEN dud < 671 THEN 5 
WHEN dud < 856 THEN 6 
WHEN dud < 991 THEN 7
ELSE 8 END;

UPDATE 0_a SET dud = 1000*RAND();

UPDATE 0_a SET the_b = CASE 
WHEN dud < 1 THEN UNIX_TIMESTAMP('2004-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 2 THEN UNIX_TIMESTAMP('2005-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 15 THEN UNIX_TIMESTAMP('2006-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 70 THEN UNIX_TIMESTAMP('2007-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 184 THEN UNIX_TIMESTAMP('2008-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 355 THEN UNIX_TIMESTAMP('2009-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 530 THEN UNIX_TIMESTAMP('2010-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 700 THEN UNIX_TIMESTAMP('2011-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
WHEN dud < 908 THEN UNIX_TIMESTAMP('2012-01-01 00:00:00') + ROUND(RAND()*365*24*60*60)
ELSE UNIX_TIMESTAMP('2013-01-01 00:00:00') + ROUND(RAND()*180*24*60*60) END;

UPDATE 0_a SET dud = 1000*RAND();

UPDATE 0_a SET the_v = CASE 
WHEN dud < 883 THEN 'yes'
ELSE 'no' END;

UPDATE 0_a SET dud = 1000*RAND();

UPDATE 0_a SET the_r = CASE 
WHEN dud < 1 THEN -4000+1000*rand()
WHEN dud < 2 THEN -3000+1000*rand()
WHEN dud < 3 THEN -2000+1000*rand()
WHEN dud < 34 THEN -1000+1000*rand()
WHEN dud < 960 THEN      1000*rand()
WHEN dud < 962 THEN 1000+1000*rand()
WHEN dud < 963 THEN 2000+1000*rand()
ELSE 3000+1000*rand() END;

UPDATE 0_a SET dud = 1000*RAND();

UPDATE 0_a SET the_d = CASE 
WHEN dud < 871 THEN the_b + ROUND(30*rand()*24*60*60)
ELSE 0 END;

UPDATE 0_a SET the_s = CASE 
WHEN dud < 150 THEN 'A'
WHEN dud < 965 THEN 'C'
ELSE 'D' END;

OPTIMIZE TABLE 0_a;  

## check the explains

 EXPLAIN
 SELECT * FROM 0_a
 WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1
 AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00')
 ORDER BY the_r DESC LIMIT 4;

+----+-------------+-------+-------+---------------+------+---------+------+--------+----------------------------------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows   | Extra                                              |
+----+-------------+-------+-------+---------------+------+---------+------+--------+----------------------------------------------------+
|  1 | SIMPLE      | 0_a   | range | TVLB,VLHR     | VLHR | 2       | NULL | 523005 | Using index condition; Using where; Using filesort |
+----+-------------+-------+-------+---------------+------+---------+------+--------+----------------------------------------------------+

## ^^ wrong path, lots of rows, slow execution

 EXPLAIN
 SELECT * FROM 0_a
 WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1
 AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00')
 ORDER BY the_r DESC ;

+----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                                              |
+----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------------+
|  1 | SIMPLE      | 0_a   | range | TVLB,VLHR     | TVLB | 7       | NULL |  397 | Using index condition; Using where; Using filesort |
+----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------------+

## ^^ right path, few of rows, fast execution

### check the handler status for the respective queries

flush status;

pager md5sum; SELECT * FROM 0_a
WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1  
AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00') 
ORDER BY the_r DESC LIMIT 4; pager;

show status like 'hand%';

+----------------------------+--------+
| Variable_name              | Value  |
+----------------------------+--------+
| Handler_commit             | 1      |
| Handler_delete             | 0      |
| Handler_discover           | 0      |
| Handler_external_lock      | 2      |
| Handler_mrr_init           | 0      |
| Handler_prepare            | 0      |
| Handler_read_first         | 0      |
| Handler_read_key           | 1      |
| Handler_read_last          | 0      |
| Handler_read_next          | 925473 |
| Handler_read_prev          | 0      |
| Handler_read_rnd           | 0      |
| Handler_read_rnd_next      | 0      |
| Handler_rollback           | 0      |
| Handler_savepoint          | 0      |
| Handler_savepoint_rollback | 0      |
| Handler_update             | 0      |
| Handler_write              | 0      |
+----------------------------+--------+

flush status;

pager md5sum; SELECT * FROM 0_a
WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1  
AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00') 
ORDER BY the_r DESC ; pager;

show status like 'hand%';

+----------------------------+-------+
| Variable_name              | Value |
+----------------------------+-------+
| Handler_commit             | 1     |
| Handler_delete             | 0     |
| Handler_discover           | 0     |
| Handler_external_lock      | 2     |
| Handler_mrr_init           | 0     |
| Handler_prepare            | 0     |
| Handler_read_first         | 0     |
| Handler_read_key           | 1     |
| Handler_read_last          | 0     |
| Handler_read_next          | 397   |
| Handler_read_prev          | 0     |
| Handler_read_rnd           | 0     |
| Handler_read_rnd_next      | 0     |
| Handler_rollback           | 0     |
| Handler_savepoint          | 0     |
| Handler_savepoint_rollback | 0     |
| Handler_update             | 0     |
| Handler_write              | 0     |
+----------------------------+-------+

Suggested fix:
Performance was good/fine in 5.5.*
[6 Jun 2013 0:26] MySQL Verification Team
Thank you for the bug report.

mysql> SHOW VARIABLES LIKE "%version%";
+-------------------------+---------------------+
| Variable_name           | Value               |
+-------------------------+---------------------+
| innodb_version          | 5.6.13              |
| protocol_version        | 10                  |
| slave_type_conversions  |                     |
| version                 | 5.6.13              |
| version_comment         | Source distribution |
| version_compile_machine | x86_64              |
| version_compile_os      | Linux               |
+-------------------------+---------------------+
7 rows in set (0.00 sec)

mysql> EXPLAIN
    ->  SELECT * FROM 0_a
    ->  WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1
    ->  AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00')
    ->  ORDER BY the_r DESC LIMIT 4;
+----+-------------+-------+-------+---------------+------+---------+------+---------+----------------------------------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows    | Extra                                              |
+----+-------------+-------+-------+---------------+------+---------+------+---------+----------------------------------------------------+
|  1 | SIMPLE      | 0_a   | range | TVLB,VLHR     | VLHR | 2       | NULL | 1045831 | Using index condition; Using where; Using filesort |
+----+-------------+-------+-------+---------------+------+---------+------+---------+----------------------------------------------------+
1 row in set (0.00 sec)

mysql> EXPLAIN
    ->  SELECT * FROM 0_a
    ->  WHERE the_t = 3 AND the_s = 'A' AND the_v = 'yes'  AND the_l = 1
    ->  AND the_b BETWEEN UNIX_TIMESTAMP('2013-05-01 00:00:00') AND UNIX_TIMESTAMP('2013-06-02 00:00:00')
    ->  ORDER BY the_r DESC ;
+----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                                              |
+----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------------+
|  1 | SIMPLE      | 0_a   | range | TVLB,VLHR     | TVLB | 7       | NULL |  754 | Using index condition; Using where; Using filesort |
+----+-------------+-------+-------+---------------+------+---------+------+------+----------------------------------------------------+
1 row in set (0.00 sec)
[17 Jul 2013 16:41] Paul DuBois
Noted in 5.6.13, 5.7.2 changelogs.

For queries with ORDER BY ... LIMIT, the optimizer could choose a
nonordering index for table access.