Description:
Certain queries using ORDER BY and a LIMIT clause with a low limit number, e.g., ORDER BY c LIMIT 1, prefers an index that sorts on c although this is a very poor choice considering the rest of the WHERE clause.
In my example an index with vastly better cardinality, allowing only a handful of rows to be touched, was disregarded in favour of an index on the ORDER BY column even though the latter means we have to scan a high number of rows from that index.
How to repeat:
CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT NOT NULL, c DATETIME, d DATETIME, e VARCHAR(10)) ENGINE=INNODB ;
INSERT INTO t1 (b, c, d, e) VALUES (FLOOR(RAND() * 1000), NOW() - INTERVAL FLOOR(RAND() * 700) DAY, NOW() - INTERVAL FLOOR(RAND() * 300) DAY, IF(RAND() > .5, 'AAA', 'BBB')) ;
INSERT INTO t1 (b, c, d, e) SELECT FLOOR(RAND() * 1000), NOW() - INTERVAL FLOOR(RAND() * 700) DAY, NOW() - INTERVAL FLOOR(RAND() * 300) DAY, IF(RAND() > .5, 'AAA', 'BBB') FROM t1 ;
...
mysql> INSERT INTO t1 (b, c, d, e) SELECT FLOOR(RAND() * 1000), NOW() - INTERVAL FLOOR(RAND() * 700) DAY, NOW() - INTERVAL FLOOR(RAND() * 300) DAY, IF(RAND() > .5, 'AAA', 'BBB') FROM t1 ;
Query OK, 65536 rows affected (0.50 sec)
Records: 65536 Duplicates: 0 Warnings: 0
ALTER TABLE t1 ADD INDEX (b, d, c) ;
ALTER TABLE t1 ADD INDEX (c) ;
SET @b = 166 ;
mysql> EXPLAIN SELECT * FROM t1 WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: t1
type: range
possible_keys: c,b
key: c
key_len: 6
ref: NULL
rows: 65517
Extra: Using index condition; Using where
1 row in set (0.01 sec)
mysql> EXPLAIN SELECT * FROM t1 FORCE INDEX (b) WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: t1
type: range
possible_keys: b
key: b
key_len: 16
ref: NULL
rows: 2
Extra: Using index condition; Using where; Using filesort
1 row in set (0.00 sec)
mysql> FLUSH STATUS ;
Query OK, 0 rows affected (0.01 sec)
mysql> SELECT * FROM t1 WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
a: 32917
b: 166
c: 2013-02-05 10:47:03
d: 2014-10-28 10:47:03
e: BBB
1 row in set (0.03 sec)
mysql> SHOW STATUS LIKE 'Handler%';
+----------------------------+-------+
| Variable_name | Value |
+----------------------------+-------+
| Handler_commit | 1 |
| Handler_delete | 0 |
| Handler_discover | 0 |
| Handler_external_lock | 2 |
| Handler_mrr_init | 0 |
| Handler_prepare | 0 |
| Handler_read_first | 0 |
| Handler_read_key | 1 |
| Handler_read_last | 0 |
| Handler_read_next | 12935 |
| Handler_read_prev | 0 |
| Handler_read_rnd | 0 |
| Handler_read_rnd_next | 0 |
| Handler_rollback | 0 |
| Handler_savepoint | 0 |
| Handler_savepoint_rollback | 0 |
| Handler_update | 0 |
| Handler_write | 0 |
+----------------------------+-------+
18 rows in set (0.00 sec)
mysql>
mysql> FLUSH STATUS ;
Query OK, 0 rows affected (0.02 sec)
mysql> SELECT * FROM t1 FORCE INDEX (b) WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
a: 32917
b: 166
c: 2013-02-05 10:47:03
d: 2014-10-28 10:47:03
e: BBB
1 row in set (0.00 sec)
mysql> SHOW STATUS LIKE 'Handler%';
+----------------------------+-------+
| Variable_name | Value |
+----------------------------+-------+
| Handler_commit | 1 |
| Handler_delete | 0 |
| Handler_discover | 0 |
| Handler_external_lock | 2 |
| Handler_mrr_init | 0 |
| Handler_prepare | 0 |
| Handler_read_first | 0 |
| Handler_read_key | 2 |
| Handler_read_last | 0 |
| Handler_read_next | 1 |
| Handler_read_prev | 0 |
| Handler_read_rnd | 0 |
| Handler_read_rnd_next | 0 |
| Handler_rollback | 0 |
| Handler_savepoint | 0 |
| Handler_savepoint_rollback | 0 |
| Handler_update | 0 |
| Handler_write | 0 |
+----------------------------+-------+
18 rows in set (0.00 sec)
mysql>
Optimizer trace for a run with the wrong, sorting, index:
{
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select `t1`.`a` AS `a`,`t1`.`b` AS `b`,`t1`.`c` AS `c`,`t1`.`d` AS `d`,`t1`.`e` AS `e` from `t1` where ((`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and (`t1`.`e` = 'BBB')) order by `t1`.`c` limit 1"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "((`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and (`t1`.`e` = 'BBB'))",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "((`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and multiple equal((@`b`), `t1`.`b`) and multiple equal('BBB', `t1`.`e`))"
},
{
"transformation": "constant_propagation",
"resulting_condition": "((`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and multiple equal((@`b`), `t1`.`b`) and multiple equal('BBB', `t1`.`e`))"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "((`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and multiple equal((@`b`), `t1`.`b`) and multiple equal('BBB', `t1`.`e`))"
}
]
}
},
{
"table_dependencies": [
{
"table": "`t1`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": [
]
}
]
},
{
"ref_optimizer_key_uses": [
{
"table": "`t1`",
"field": "b",
"equals": "(@`b`)",
"null_rejecting": false
}
]
},
{
"rows_estimation": [
{
"table": "`t1`",
"range_analysis": {
"table_scan": {
"rows": 392375,
"cost": 79598
},
"potential_range_indices": [
{
"index": "PRIMARY",
"usable": false,
"cause": "not_applicable"
},
{
"index": "c",
"usable": true,
"key_parts": [
"c",
"a"
]
},
{
"index": "d",
"usable": true,
"key_parts": [
"d",
"c",
"b",
"a"
]
},
{
"index": "b",
"usable": true,
"key_parts": [
"b",
"d",
"c",
"a"
]
}
],
"setup_range_conditions": [
],
"group_index_range": {
"chosen": false,
"cause": "not_group_by_or_distinct"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "c",
"ranges": [
"NULL < c <= 2014-10-27 13:27:21"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 196187,
"cost": 235425,
"chosen": false,
"cause": "cost"
},
{
"index": "d",
"ranges": [
"NULL <= d <= NULL AND NULL < c <= 2014-10-27 13:27:21",
"2014-10-27 13:27:21 <= d"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 2,
"cost": 4.41,
"chosen": true
},
{
"index": "b",
"ranges": [
"166 <= b <= 166 AND NULL <= d <= NULL AND NULL < c <= 2014-10-27 13:27:21",
"166 <= b <= 166 AND 2014-10-27 13:27:21 <= d"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 2,
"cost": 4.41,
"chosen": false,
"cause": "cost"
}
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"chosen_range_access_summary": {
"range_access_plan": {
"type": "range_scan",
"index": "d",
"rows": 2,
"ranges": [
"NULL <= d <= NULL AND NULL < c <= 2014-10-27 13:27:21",
"2014-10-27 13:27:21 <= d"
]
},
"rows_for_plan": 2,
"cost_for_plan": 4.41,
"chosen": true
}
}
}
]
},
{
"considered_execution_plans": [
{
"plan_prefix": [
],
"table": "`t1`",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "ref",
"index": "b",
"rows": 200,
"cost": 240,
"chosen": true
},
{
"access_type": "range",
"rows": 2,
"cost": 4.81,
"chosen": true
}
]
},
"cost_for_plan": 4.81,
"rows_for_plan": 2,
"chosen": true
}
]
},
{
"attaching_conditions_to_tables": {
"original_condition": "((`t1`.`e` = 'BBB') and (`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())))",
"attached_conditions_computation": [
{
"table": "`t1`",
"rechecking_index_usage": {
"recheck_reason": "low_limit",
"limit": 1,
"row_estimate": 2,
"range_analysis": {
"table_scan": {
"rows": 392375,
"cost": 470852
},
"potential_range_indices": [
{
"index": "PRIMARY",
"usable": false,
"cause": "not_applicable"
},
{
"index": "c",
"usable": true,
"key_parts": [
"c",
"a"
]
},
{
"index": "d",
"usable": false,
"cause": "not_applicable"
},
{
"index": "b",
"usable": false,
"cause": "not_applicable"
}
],
"setup_range_conditions": [
],
"group_index_range": {
"chosen": false,
"cause": "not_group_by_or_distinct"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "c",
"ranges": [
"NULL < c <= 2014-10-27 13:27:21"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 196187,
"cost": 235425,
"chosen": true
}
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"chosen_range_access_summary": {
"range_access_plan": {
"type": "range_scan",
"index": "c",
"rows": 196187,
"ranges": [
"NULL < c <= 2014-10-27 13:27:21"
]
},
"rows_for_plan": 196187,
"cost_for_plan": 235425,
"chosen": true
}
}
}
}
],
"attached_conditions_summary": [
{
"table": "`t1`",
"attached": "((`t1`.`e` = 'BBB') and (`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())))"
}
]
}
},
{
"clause_processing": {
"clause": "ORDER BY",
"original_clause": "`t1`.`c`",
"items": [
{
"item": "`t1`.`c`"
}
],
"resulting_clause_is_simple": true,
"resulting_clause": "`t1`.`c`"
}
},
{
"refine_plan": [
{
"table": "`t1`",
"pushed_index_condition": "(`t1`.`c` <= now())",
"table_condition_attached": "((`t1`.`e` = 'BBB') and (`t1`.`b` = (@`b`)) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())))",
"access_type": "range"
}
]
},
{
"reconsidering_access_paths_for_index_ordering": {
"clause": "ORDER BY",
"index_order_summary": {
"table": "`t1`",
"index_provides_order": true,
"order_direction": "asc",
"index": "c",
"plan_changed": false
}
}
}
]
}
},
{
"join_execution": {
"select#": 1,
"steps": [
]
}
}
]
}
Suggested fix:
Pick the correct index.
In the trace it looks strange that the expensive plan is chosen when a better cost is actually already found and evaluated before the low_limit reconsideration makes a bad decision.
Don't blindly choose the index with the correct sort order. It seems a last round of reconsideration is skipped in sql_optimizer.cc:
/*
If the current plan is to use a range access on an
index that provides the order dictated by the ORDER BY
clause there is no need to recheck index usage; we
already know from the former call to
test_quick_select() that a range scan on the chosen
index is cheapest. Note that previous calls to
test_quick_select() did not take order direction
(ASC/DESC) into account, so in case of DESC ordering
we still need to recheck.
*/
But this might also be unrelated.