Bug #73136 Index scan incorrectly selected over ref+sort leading to x1000 worse performance
Submitted: 27 Jun 2014 16:34 Modified: 30 Jun 2014 8:16
Reporter: Morgan Tocker Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.6.12,5.6.19,5.5, 5.5.38, 5.5.20 OS:Any
Assigned to: CPU Architecture:Any

[27 Jun 2014 16:34] Morgan Tocker
Description:
In a fairly common query, the optimizer seems to be picking an index scan of the PRIMARY key over a Ref on a secondary key that is much faster.

I have verified this in 5.5, 5.6 and 5.7.  It is still slow in 5.7, although confusingly the output of EXPLAIN is different, with the index scan being on the secondary key.

Here is the sample data to demonstrate:

DROP TABLE IF EXISTS users;
CREATE TABLE users (
 id INT NOT NULL PRIMARY KEY auto_increment,
 username VARCHAR(100) NOT NULL,
 created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);

INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM dual;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;
INSERT INTO users SELECT NULL, REPEAT('a', 10), '2014-01-01' FROM users;

DROP TABLE IF EXISTS activity_log;
CREATE TABLE activity_log (
 id INT NOT NULL PRIMARY KEY auto_increment,
 user_id INT NOT NULL,
 created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
 INDEX (user_id)
);

/* Lets say only 50 unique users have activity */

INSERT INTO activity_log SELECT NULL, id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM users WHERE id BETWEEN 5000 AND 5050;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;
INSERT INTO activity_log SELECT NULL, user_id, FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND()*86400*30)) FROM activity_log;

/* Guarantee that statistics are up to date - caused some issue with reproducibility earlier */
SET GLOBAL innodb_stats_persistent_sample_pages = 1000;
ANALYZE TABLE activity_log;
ANALYZE TABLE users;

How to repeat:
To repeat:

SELECT SQL_CALC_FOUND_ROWS
    u.*,
    (SELECT created_at FROM activity_log WHERE user_id = u.id ORDER BY id DESC LIMIT 1) AS activity_at
FROM
    users AS u
ORDER BY
    activity_at DESC
LIMIT 0, 5;
+------+------------+---------------------+---------------------+
| id   | username   | created_at          | activity_at         |
+------+------------+---------------------+---------------------+
| 5011 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-26 13:28:16 |
| 5024 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-26 04:45:15 |
| 5012 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-25 13:24:31 |
| 5034 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-25 08:39:25 |
| 5025 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-25 04:23:55 |
+------+------------+---------------------+---------------------+
5 rows in set (5 min 35.40 sec)

SELECT SQL_CALC_FOUND_ROWS
    u.*,
    (SELECT created_at FROM activity_log FORCE INDEX (user_id) WHERE user_id = u.id ORDER BY id DESC LIMIT 1) AS activity_at
FROM
    users AS u
ORDER BY
    activity_at DESC
LIMIT 0, 5;
+------+------------+---------------------+---------------------+
| id   | username   | created_at          | activity_at         |
+------+------------+---------------------+---------------------+
| 5011 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-26 13:28:16 |
| 5024 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-26 04:45:15 |
| 5012 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-25 13:24:31 |
| 5034 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-25 08:39:25 |
| 5025 | aaaaaaaaaa | 2014-01-01 00:00:00 | 2014-06-25 04:23:55 |
+------+------------+---------------------+---------------------+
5 rows in set (0.19 sec)

Results from EXPLAIN in 5.6:
+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
| id | select_type        | table        | type  | possible_keys | key     | key_len | ref  | rows | Extra                           |
+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
|  1 | PRIMARY            | u            | ALL   | NULL          | NULL    | NULL    | NULL | 8083 | Using temporary; Using filesort |
|  2 | DEPENDENT SUBQUERY | activity_log | index | user_id       | PRIMARY | 4       | NULL |   97 | Using where                     |
+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
2 rows in set (0.00 sec)

Results from EXPLAIN in 5.7:
+----+--------------------+--------------+------------+-------+---------------+---------+---------+------------+------+----------+---------------------------------+
| id | select_type        | table        | partitions | type  | possible_keys | key     | key_len | ref        | rows | filtered | Extra                           |
+----+--------------------+--------------+------------+-------+---------------+---------+---------+------------+------+----------+---------------------------------+
|  1 | PRIMARY            | u            | NULL       | ALL   | NULL          | NULL    | NULL    | NULL       | 8192 |   100.00 | Using temporary; Using filesort |
|  2 | DEPENDENT SUBQUERY | activity_log | NULL       | index | user_id       | user_id | 4       | test2.u.id |  102 |  1003.92 | Using where                     |
+----+--------------------+--------------+------------+-------+---------------+---------+---------+------------+------+----------+---------------------------------+
2 rows in set, 2 warnings (0.01 sec)

Versus the ideal plan with FORCE INDEX:
+----+--------------------+--------------+------+---------------+---------+---------+------------+------+---------------------------------+
| id | select_type        | table        | type | possible_keys | key     | key_len | ref        | rows | Extra                           |
+----+--------------------+--------------+------+---------------+---------+---------+------------+------+---------------------------------+
|  1 | PRIMARY            | u            | ALL  | NULL          | NULL    | NULL    | NULL       | 8083 | Using temporary; Using filesort |
|  2 | DEPENDENT SUBQUERY | activity_log | ref  | user_id       | user_id | 4       | test2.u.id | 1068 | Using where; Using filesort     |
+----+--------------------+--------------+------+---------------+---------+---------+------------+------+---------------------------------+
2 rows in set (0.00 sec)

Suggested fix:
This appears to be an issue with judging the cost of the sort?  I will upload the output from optimizer trace.

It is also worth mentioning that there is a workaround in this case, as the subquery can easily be converted to SELECT max(created_at).
[27 Jun 2014 16:34] Morgan Tocker
Optimizer Trace from 5.6

Attachment: optimizer_trace.txt (text/plain), 16.34 KiB.

[30 Jun 2014 8:16] MySQL Verification Team
Hello Morgan,

Thank you for the report and test case.
Verified as described.

Thanks,
Umesh
[30 Jun 2014 8:19] MySQL Verification Team
// 5.5.20

+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
| id | select_type        | table        | type  | possible_keys | key     | key_len | ref  | rows | Extra                           |
+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
|  1 | PRIMARY            | u            | ALL   | NULL          | NULL    | NULL    | NULL | 8192 | Using temporary; Using filesort |
|  2 | DEPENDENT SUBQUERY | activity_log | index | user_id       | PRIMARY | 4       | NULL |  102 | Using where                     |
+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
2 rows in set (0.00 sec)

mysql> explain SELECT SQL_CALC_FOUND_ROWS
    ->     u.*,
    ->     (SELECT created_at FROM activity_log FORCE INDEX (user_id) WHERE user_id = u.id ORDER BY id DESC LIMIT 1) AS activity_at
    -> FROM
    ->     users AS u
    -> ORDER BY
    ->     activity_at DESC
    -> LIMIT 0, 5;
+----+--------------------+--------------+------+---------------+---------+---------+-----------+------+---------------------------------+
| id | select_type        | table        | type | possible_keys | key     | key_len | ref       | rows | Extra                           |
+----+--------------------+--------------+------+---------------+---------+---------+-----------+------+---------------------------------+
|  1 | PRIMARY            | u            | ALL  | NULL          | NULL    | NULL    | NULL      | 8192 | Using temporary; Using filesort |
|  2 | DEPENDENT SUBQUERY | activity_log | ref  | user_id       | user_id | 4       | test.u.id | 1024 | Using where; Using filesort     |
+----+--------------------+--------------+------+---------------+---------+---------+-----------+------+---------------------------------+
2 rows in set (0.00 sec)
[30 Jun 2014 8:20] MySQL Verification Team
Sorry, typo in earlier note. explain plan were from 5.6.20

mysql> select version();
+-------------------------------------------+
| version()                                 |
+-------------------------------------------+
| 5.6.20-enterprise-commercial-advanced-log |
+-------------------------------------------+
1 row in set (0.00 sec)

+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
| id | select_type        | table        | type  | possible_keys | key     | key_len | ref  | rows | Extra                           |
+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
|  1 | PRIMARY            | u            | ALL   | NULL          | NULL    | NULL    | NULL | 8192 | Using temporary; Using filesort |
|  2 | DEPENDENT SUBQUERY | activity_log | index | user_id       | PRIMARY | 4       | NULL |  102 | Using where                     |
+----+--------------------+--------------+-------+---------------+---------+---------+------+------+---------------------------------+
2 rows in set (0.00 sec)

mysql> explain SELECT SQL_CALC_FOUND_ROWS
    ->     u.*,
    ->     (SELECT created_at FROM activity_log FORCE INDEX (user_id) WHERE user_id = u.id ORDER BY id DESC LIMIT 1) AS activity_at
    -> FROM
    ->     users AS u
    -> ORDER BY
    ->     activity_at DESC
    -> LIMIT 0, 5;
+----+--------------------+--------------+------+---------------+---------+---------+-----------+------+---------------------------------+
| id | select_type        | table        | type | possible_keys | key     | key_len | ref       | rows | Extra                           |
+----+--------------------+--------------+------+---------------+---------+---------+-----------+------+---------------------------------+
|  1 | PRIMARY            | u            | ALL  | NULL          | NULL    | NULL    | NULL      | 8192 | Using temporary; Using filesort |
|  2 | DEPENDENT SUBQUERY | activity_log | ref  | user_id       | user_id | 4       | test.u.id | 1024 | Using where; Using filesort     |
+----+--------------------+--------------+------+---------------+---------+---------+-----------+------+---------------------------------+
2 rows in set (0.00 sec)
[20 May 2015 15:16] Morgan Tocker
This appears to be fixed in 5.8.0 daily build.  Optimizer trace shows quite different output (filesort_priority_queue_optimization is used, costs are very different).