Bug #74602 Optimizer prefers wrong index because of low_limit
Submitted: 28 Oct 2014 10:00 Modified: 30 Oct 2014 11:59
Reporter: Olle Nilsson Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.6.21, 5.6.22 OS:Any
Assigned to:
Tags: INDEX, index_condition_pushdown, low_limit, Optimizer, range

[28 Oct 2014 10:00] Olle Nilsson
Description:
Certain queries using ORDER BY and a LIMIT clause with a low limit number, e.g., ORDER BY c LIMIT 1, prefers an index that sorts on c although this is a very poor choice considering the rest of the WHERE clause.

In my example an index with vastly better cardinality, allowing only a handful of rows to be touched, was disregarded in favour of an index on the ORDER BY column even though the latter means we have to scan a high number of rows from that index.

How to repeat:
CREATE TABLE t1 (a INT PRIMARY KEY AUTO_INCREMENT, b INT NOT NULL, c DATETIME, d DATETIME, e VARCHAR(10)) ENGINE=INNODB ;

INSERT INTO t1 (b, c, d, e) VALUES (FLOOR(RAND() * 1000), NOW() - INTERVAL FLOOR(RAND() * 700) DAY, NOW() - INTERVAL FLOOR(RAND() * 300) DAY, IF(RAND() > .5, 'AAA', 'BBB')) ;

INSERT INTO t1 (b, c, d, e) SELECT FLOOR(RAND() * 1000), NOW() - INTERVAL FLOOR(RAND() * 700) DAY, NOW() - INTERVAL FLOOR(RAND() * 300) DAY, IF(RAND() > .5, 'AAA', 'BBB') FROM t1 ;

...

mysql> INSERT INTO t1 (b, c, d, e) SELECT FLOOR(RAND() * 1000), NOW() - INTERVAL FLOOR(RAND() * 700) DAY, NOW() - INTERVAL FLOOR(RAND() * 300) DAY, IF(RAND() > .5, 'AAA', 'BBB') FROM t1 ; 
Query OK, 65536 rows affected (0.50 sec)
Records: 65536  Duplicates: 0  Warnings: 0

ALTER TABLE t1 ADD INDEX (b, d, c) ; 
ALTER TABLE t1 ADD INDEX (c) ; 

SET @b = 166 ; 

mysql> EXPLAIN SELECT * FROM t1 WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
           id: 1
  select_type: SIMPLE
        table: t1
         type: range
possible_keys: c,b
          key: c
      key_len: 6
          ref: NULL
         rows: 65517
        Extra: Using index condition; Using where
1 row in set (0.01 sec)

mysql> EXPLAIN SELECT * FROM t1 FORCE INDEX (b) WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
           id: 1
  select_type: SIMPLE
        table: t1
         type: range
possible_keys: b
          key: b
      key_len: 16
          ref: NULL
         rows: 2
        Extra: Using index condition; Using where; Using filesort
1 row in set (0.00 sec)

mysql> FLUSH STATUS ; 
Query OK, 0 rows affected (0.01 sec)

mysql> SELECT * FROM t1 WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
a: 32917
b: 166
c: 2013-02-05 10:47:03
d: 2014-10-28 10:47:03
e: BBB
1 row in set (0.03 sec)

mysql> SHOW STATUS LIKE 'Handler%';
+----------------------------+-------+
| Variable_name              | Value |
+----------------------------+-------+
| Handler_commit             | 1     |
| Handler_delete             | 0     |
| Handler_discover           | 0     |
| Handler_external_lock      | 2     |
| Handler_mrr_init           | 0     |
| Handler_prepare            | 0     |
| Handler_read_first         | 0     |
| Handler_read_key           | 1     |
| Handler_read_last          | 0     |
| Handler_read_next          | 12935 |
| Handler_read_prev          | 0     |
| Handler_read_rnd           | 0     |
| Handler_read_rnd_next      | 0     |
| Handler_rollback           | 0     |
| Handler_savepoint          | 0     |
| Handler_savepoint_rollback | 0     |
| Handler_update             | 0     |
| Handler_write              | 0     |
+----------------------------+-------+
18 rows in set (0.00 sec)

mysql> 

mysql> FLUSH STATUS ; 
Query OK, 0 rows affected (0.02 sec)

mysql> SELECT * FROM t1 FORCE INDEX (b) WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
a: 32917
b: 166
c: 2013-02-05 10:47:03
d: 2014-10-28 10:47:03
e: BBB
1 row in set (0.00 sec)

mysql> SHOW STATUS LIKE 'Handler%';
+----------------------------+-------+
| Variable_name              | Value |
+----------------------------+-------+
| Handler_commit             | 1     |
| Handler_delete             | 0     |
| Handler_discover           | 0     |
| Handler_external_lock      | 2     |
| Handler_mrr_init           | 0     |
| Handler_prepare            | 0     |
| Handler_read_first         | 0     |
| Handler_read_key           | 2     |
| Handler_read_last          | 0     |
| Handler_read_next          | 1     |
| Handler_read_prev          | 0     |
| Handler_read_rnd           | 0     |
| Handler_read_rnd_next      | 0     |
| Handler_rollback           | 0     |
| Handler_savepoint          | 0     |
| Handler_savepoint_rollback | 0     |
| Handler_update             | 0     |
| Handler_write              | 0     |
+----------------------------+-------+
18 rows in set (0.00 sec)

mysql> 

Optimizer trace for a run with the wrong, sorting, index:

{
  "steps": [
    {
      "join_preparation": {
        "select#": 1,
        "steps": [
          {
            "expanded_query": "/* select#1 */ select `t1`.`a` AS `a`,`t1`.`b` AS `b`,`t1`.`c` AS `c`,`t1`.`d` AS `d`,`t1`.`e` AS `e` from `t1` where ((`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and (`t1`.`e` = 'BBB')) order by `t1`.`c` limit 1"
          }
        ]
      }
    },
    {
      "join_optimization": {
        "select#": 1,
        "steps": [
          {
            "condition_processing": {
              "condition": "WHERE",
              "original_condition": "((`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and (`t1`.`e` = 'BBB'))",
              "steps": [
                {
                  "transformation": "equality_propagation",
                  "resulting_condition": "((`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and multiple equal((@`b`), `t1`.`b`) and multiple equal('BBB', `t1`.`e`))"
                },
                {
                  "transformation": "constant_propagation",
                  "resulting_condition": "((`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and multiple equal((@`b`), `t1`.`b`) and multiple equal('BBB', `t1`.`e`))"
                },
                {
                  "transformation": "trivial_condition_removal",
                  "resulting_condition": "((`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())) and multiple equal((@`b`), `t1`.`b`) and multiple equal('BBB', `t1`.`e`))"
                }
              ]
            }
          },
          {
            "table_dependencies": [
              {
                "table": "`t1`",
                "row_may_be_null": false,
                "map_bit": 0,
                "depends_on_map_bits": [
                ]
              }
            ]
          },
          {
            "ref_optimizer_key_uses": [
              {
                "table": "`t1`",
                "field": "b",
                "equals": "(@`b`)",
                "null_rejecting": false
              }
            ]
          },
          {
            "rows_estimation": [
              {
                "table": "`t1`",
                "range_analysis": {
                  "table_scan": {
                    "rows": 392375,
                    "cost": 79598
                  },
                  "potential_range_indices": [
                    {
                      "index": "PRIMARY",
                      "usable": false,
                      "cause": "not_applicable"
                    },
                    {
                      "index": "c",
                      "usable": true,
                      "key_parts": [
                        "c",
                        "a"
                      ]
                    },
                    {
                      "index": "d",
                      "usable": true,
                      "key_parts": [
                        "d",
                        "c",
                        "b",
                        "a"
                      ]
                    },
                    {
                      "index": "b",
                      "usable": true,
                      "key_parts": [
                        "b",
                        "d",
                        "c",
                        "a"
                      ]
                    }
                  ],
                  "setup_range_conditions": [
                  ],
                  "group_index_range": {
                    "chosen": false,
                    "cause": "not_group_by_or_distinct"
                  },
                  "analyzing_range_alternatives": {
                    "range_scan_alternatives": [
                      {
                        "index": "c",
                        "ranges": [
                          "NULL < c <= 2014-10-27 13:27:21"
                        ],
                        "index_dives_for_eq_ranges": true,
                        "rowid_ordered": false,
                        "using_mrr": false,
                        "index_only": false,
                        "rows": 196187,
                        "cost": 235425,
                        "chosen": false,
                        "cause": "cost"
                      },
                      {
                        "index": "d",
                        "ranges": [
                          "NULL <= d <= NULL AND NULL < c <= 2014-10-27 13:27:21",
                          "2014-10-27 13:27:21 <= d"
                        ],
                        "index_dives_for_eq_ranges": true,
                        "rowid_ordered": false,
                        "using_mrr": false,
                        "index_only": false,
                        "rows": 2,
                        "cost": 4.41,
                        "chosen": true
                      },
                      {
                        "index": "b",
                        "ranges": [
                          "166 <= b <= 166 AND NULL <= d <= NULL AND NULL < c <= 2014-10-27 13:27:21",
                          "166 <= b <= 166 AND 2014-10-27 13:27:21 <= d"
                        ],
                        "index_dives_for_eq_ranges": true,
                        "rowid_ordered": false,
                        "using_mrr": false,
                        "index_only": false,
                        "rows": 2,
                        "cost": 4.41,
                        "chosen": false,
                        "cause": "cost"
                      }
                    ],
                    "analyzing_roworder_intersect": {
                      "usable": false,
                      "cause": "too_few_roworder_scans"
                    }
                  },
                  "chosen_range_access_summary": {
                    "range_access_plan": {
                      "type": "range_scan",
                      "index": "d",
                      "rows": 2,
                      "ranges": [
                        "NULL <= d <= NULL AND NULL < c <= 2014-10-27 13:27:21",
                        "2014-10-27 13:27:21 <= d"
                      ]
                    },
                    "rows_for_plan": 2,
                    "cost_for_plan": 4.41,
                    "chosen": true
                  }
                }
              }
            ]
          },
          {
            "considered_execution_plans": [
              {
                "plan_prefix": [
                ],
                "table": "`t1`",
                "best_access_path": {
                  "considered_access_paths": [
                    {
                      "access_type": "ref",
                      "index": "b",
                      "rows": 200,
                      "cost": 240,
                      "chosen": true
                    },
                    {
                      "access_type": "range",
                      "rows": 2,
                      "cost": 4.81,
                      "chosen": true
                    }
                  ]
                },
                "cost_for_plan": 4.81,
                "rows_for_plan": 2,
                "chosen": true
              }
            ]
          },
          {
            "attaching_conditions_to_tables": {
              "original_condition": "((`t1`.`e` = 'BBB') and (`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())))",
              "attached_conditions_computation": [
                {
                  "table": "`t1`",
                  "rechecking_index_usage": {
                    "recheck_reason": "low_limit",
                    "limit": 1,
                    "row_estimate": 2,
                    "range_analysis": {
                      "table_scan": {
                        "rows": 392375,
                        "cost": 470852
                      },
                      "potential_range_indices": [
                        {
                          "index": "PRIMARY",
                          "usable": false,
                          "cause": "not_applicable"
                        },
                        {
                          "index": "c",
                          "usable": true,
                          "key_parts": [
                            "c",
                            "a"
                          ]
                        },
                        {
                          "index": "d",
                          "usable": false,
                          "cause": "not_applicable"
                        },
                        {
                          "index": "b",
                          "usable": false,
                          "cause": "not_applicable"
                        }
                      ],
                      "setup_range_conditions": [
                      ],
                      "group_index_range": {
                        "chosen": false,
                        "cause": "not_group_by_or_distinct"
                      },
                      "analyzing_range_alternatives": {
                        "range_scan_alternatives": [
                          {
                            "index": "c",
                            "ranges": [
                              "NULL < c <= 2014-10-27 13:27:21"
                            ],
                            "index_dives_for_eq_ranges": true,
                            "rowid_ordered": false,
                            "using_mrr": false,
                            "index_only": false,
                            "rows": 196187,
                            "cost": 235425,
                            "chosen": true
                          }
                        ],
                        "analyzing_roworder_intersect": {
                          "usable": false,
                          "cause": "too_few_roworder_scans"
                        }
                      },
                      "chosen_range_access_summary": {
                        "range_access_plan": {
                          "type": "range_scan",
                          "index": "c",
                          "rows": 196187,
                          "ranges": [
                            "NULL < c <= 2014-10-27 13:27:21"
                          ]
                        },
                        "rows_for_plan": 196187,
                        "cost_for_plan": 235425,
                        "chosen": true
                      }
                    }
                  }
                }
              ],
              "attached_conditions_summary": [
                {
                  "table": "`t1`",
                  "attached": "((`t1`.`e` = 'BBB') and (`t1`.`b` = (@`b`)) and (`t1`.`c` <= now()) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())))"
                }
              ]
            }
          },
          {
            "clause_processing": {
              "clause": "ORDER BY",
              "original_clause": "`t1`.`c`",
              "items": [
                {
                  "item": "`t1`.`c`"
                }
              ],
              "resulting_clause_is_simple": true,
              "resulting_clause": "`t1`.`c`"
            }
          },
          {
            "refine_plan": [
              {
                "table": "`t1`",
                "pushed_index_condition": "(`t1`.`c` <= now())",
                "table_condition_attached": "((`t1`.`e` = 'BBB') and (`t1`.`b` = (@`b`)) and (isnull(`t1`.`d`) or (`t1`.`d` >= now())))",
                "access_type": "range"
              }
            ]
          },
          {
            "reconsidering_access_paths_for_index_ordering": {
              "clause": "ORDER BY",
              "index_order_summary": {
                "table": "`t1`",
                "index_provides_order": true,
                "order_direction": "asc",
                "index": "c",
                "plan_changed": false
              }
            }
          }
        ]
      }
    },
    {
      "join_execution": {
        "select#": 1,
        "steps": [
        ]
      }
    }
  ]
}

Suggested fix:
Pick the correct index.

In the trace it looks strange that the expensive plan is chosen when a better cost is actually already found and evaluated before the low_limit reconsideration makes a bad decision.

Don't blindly choose the index with the correct sort order. It seems a last round of reconsideration is skipped in sql_optimizer.cc:

             /*
                If the current plan is to use a range access on an
                index that provides the order dictated by the ORDER BY
                clause there is no need to recheck index usage; we
                already know from the former call to
                test_quick_select() that a range scan on the chosen
                index is cheapest. Note that previous calls to
                test_quick_select() did not take order direction
                (ASC/DESC) into account, so in case of DESC ordering
                we still need to recheck.
              */

But this might also be unrelated.
[29 Oct 2014 14:49] Umesh Shastry
Hello Olle,

Thank you for the bug report and test case.

Thanks,
Umesh
[29 Oct 2014 14:50] Umesh Shastry
mysql> show variables like '%version%';
+-------------------------+---------------------------------------------------------+
| Variable_name           | Value                                                   |
+-------------------------+---------------------------------------------------------+
| innodb_version          | 5.6.22                                                  |
| protocol_version        | 10                                                      |
| slave_type_conversions  |                                                         |
| version                 | 5.6.22-enterprise-commercial-advanced                   |
| version_comment         | MySQL Enterprise Server - Advanced Edition (Commercial) |
| version_compile_machine | x86_64                                                  |
| version_compile_os      | Linux                                                   |
+-------------------------+---------------------------------------------------------+
7 rows in set (0.00 sec)
..
..
mysql>
mysql> EXPLAIN SELECT * FROM t1 WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
           id: 1
  select_type: SIMPLE
        table: t1
         type: range
possible_keys: b,c
          key: c
      key_len: 6
          ref: NULL
         rows: 65517
        Extra: Using index condition; Using where
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT * FROM t1 FORCE INDEX (b) WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
           id: 1
  select_type: SIMPLE
        table: t1
         type: range
possible_keys: b
          key: b
      key_len: 16
          ref: NULL
         rows: 2
        Extra: Using index condition; Using where; Using filesort
1 row in set (0.00 sec)

mysql> FLUSH STATUS ;
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT * FROM t1 WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
a: 46672
b: 166
c: 2014-04-03 11:14:46
d: 2014-10-28 11:14:46
e: BBB
1 row in set (0.31 sec)

mysql> SHOW STATUS LIKE 'Handler%';
+----------------------------+-------+
| Variable_name              | Value |
+----------------------------+-------+
| Handler_commit             | 1     |
| Handler_delete             | 0     |
| Handler_discover           | 0     |
| Handler_external_lock      | 2     |
| Handler_mrr_init           | 0     |
| Handler_prepare            | 0     |
| Handler_read_first         | 0     |
| Handler_read_key           | 1     |
| Handler_read_last          | 0     |
| Handler_read_next          | 91422 |
| Handler_read_prev          | 0     |
| Handler_read_rnd           | 0     |
| Handler_read_rnd_next      | 0     |
| Handler_rollback           | 0     |
| Handler_savepoint          | 0     |
| Handler_savepoint_rollback | 0     |
| Handler_update             | 0     |
| Handler_write              | 0     |
+----------------------------+-------+
18 rows in set (0.00 sec)

mysql> FLUSH STATUS ;
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT * FROM t1 FORCE INDEX (b) WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
a: 46672
b: 166
c: 2014-04-03 11:14:46
d: 2014-10-28 11:14:46
e: BBB
1 row in set (0.00 sec)

mysql> SHOW STATUS LIKE 'Handler%';
+----------------------------+-------+
| Variable_name              | Value |
+----------------------------+-------+
| Handler_commit             | 1     |
| Handler_delete             | 0     |
| Handler_discover           | 0     |
| Handler_external_lock      | 2     |
| Handler_mrr_init           | 0     |
| Handler_prepare            | 0     |
| Handler_read_first         | 0     |
| Handler_read_key           | 2     |
| Handler_read_last          | 0     |
| Handler_read_next          | 1     |
| Handler_read_prev          | 0     |
| Handler_read_rnd           | 0     |
| Handler_read_rnd_next      | 0     |
| Handler_rollback           | 0     |
| Handler_savepoint          | 0     |
| Handler_savepoint_rollback | 0     |
| Handler_update             | 0     |
| Handler_write              | 0     |
+----------------------------+-------+
18 rows in set (0.00 sec)
[30 Oct 2014 11:26] Øystein Grøvlen
This is fixed in MySQL 5.7.5:

mysql> select version();
+-----------+
| version() |
+-----------+
| 5.7.5-m15 |
+-----------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT * FROM t1 WHERE b = @b AND c <= NOW() AND (d IS NULL OR d >= NOW() - INTERVAL 2 DAY) AND e='BBB' ORDER BY c ASC LIMIT 1 \G
*************************** 1. row ***************************
           id: 1
  select_type: SIMPLE
        table: t1
   partitions: NULL
         type: range
possible_keys: b,c
          key: b
      key_len: 16
          ref: NULL
         rows: 2
     filtered: 2.50
        Extra: Using index condition; Using where; Using filesort
1 row in set, 1 warning (0.00 sec)
[30 Oct 2014 11:59] Øystein Grøvlen
Fixed in 5.7.  Not likely to be fixed in 5.6.