Description:
Hi, MySQL developers, thanks for reading my report. I find an optimizer bug in MySQL.
Through analysis of the optimizer trace, when a query uses 'WHERE flag = 1 ORDER BY id LIMIT n' (a rare filter ordered by the primary key), the optimizer correctly picks the filtering index 'idx_t1_flag_x' during range analysis, but then in the 'reconsidering_access_paths_for_index_ordering' ('prefer_ordering_index') phase it discards that index and switches to a forward primary-key scan to avoid a filesort. The LIMIT short-circuit there assumes the matching rows are spread uniformly along the sort key, so it expects to read only a few rows before collecting `n` matches. When the filter column is correlated with the primary key, this assumption is violated and the plan scans nearly the entire table to find the first match.
The logically equivalent query with 'FORCE INDEX (idx_t1_flag_x)' produces the expected plan:
the table is accessed via the filtering index, only the matching rows are read and then
sorted, the result is identical, and it runs far faster.
How to repeat:
CREATE DATABASE IF NOT EXISTS optcov_target;
USE optcov_target;
DROP TABLE IF EXISTS optcov_t1;
CREATE TABLE optcov_t1 (
id INT NOT NULL PRIMARY KEY,
t0_id INT NOT NULL,
x INT NULL,
y VARCHAR(64) NULL,
flag TINYINT NOT NULL,
KEY idx_t1_t0_id (t0_id),
KEY idx_t1_x (x),
KEY idx_t1_t0_x (t0_id, x),
KEY idx_t1_flag_x (flag, x),
KEY idx_t1_y_prefix (y(10))
) ENGINE=InnoDB;
INSERT INTO optcov_t1 (id, t0_id, x, y, flag)
SELECT
i + 1,
((i * 13) % 3000000) + 1,
i,
CONCAT('y_', i),
IF(i >= 2985000, 1, 0)
FROM (
SELECT d1.n + d2.n*10 + d3.n*100 + d4.n*1000
+ d5.n*10000 + d6.n*100000 + d7.n*1000000 AS i
FROM (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d1
CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d2
CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d3
CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d4
CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d5
CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d6
CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d7
) t
WHERE i < 3000000;
EXPLAIN
SELECT SQL_NO_CACHE id, x, y FROM optcov_t1
WHERE flag = 1 ORDER BY id LIMIT 10;
EXPLAIN
SELECT SQL_NO_CACHE id, x, y FROM optcov_t1 FORCE INDEX (idx_t1_flag_x)
WHERE flag = 1 ORDER BY id LIMIT 10;
SELECT SQL_NO_CACHE id, x, y FROM optcov_t1
WHERE flag = 1 ORDER BY id LIMIT 10;
SELECT SQL_NO_CACHE id, x, y FROM optcov_t1 FORCE INDEX (idx_t1_flag_x)
WHERE flag = 1 ORDER BY id LIMIT 10;
Description: Hi, MySQL developers, thanks for reading my report. I find an optimizer bug in MySQL. Through analysis of the optimizer trace, when a query uses 'WHERE flag = 1 ORDER BY id LIMIT n' (a rare filter ordered by the primary key), the optimizer correctly picks the filtering index 'idx_t1_flag_x' during range analysis, but then in the 'reconsidering_access_paths_for_index_ordering' ('prefer_ordering_index') phase it discards that index and switches to a forward primary-key scan to avoid a filesort. The LIMIT short-circuit there assumes the matching rows are spread uniformly along the sort key, so it expects to read only a few rows before collecting `n` matches. When the filter column is correlated with the primary key, this assumption is violated and the plan scans nearly the entire table to find the first match. The logically equivalent query with 'FORCE INDEX (idx_t1_flag_x)' produces the expected plan: the table is accessed via the filtering index, only the matching rows are read and then sorted, the result is identical, and it runs far faster. How to repeat: CREATE DATABASE IF NOT EXISTS optcov_target; USE optcov_target; DROP TABLE IF EXISTS optcov_t1; CREATE TABLE optcov_t1 ( id INT NOT NULL PRIMARY KEY, t0_id INT NOT NULL, x INT NULL, y VARCHAR(64) NULL, flag TINYINT NOT NULL, KEY idx_t1_t0_id (t0_id), KEY idx_t1_x (x), KEY idx_t1_t0_x (t0_id, x), KEY idx_t1_flag_x (flag, x), KEY idx_t1_y_prefix (y(10)) ) ENGINE=InnoDB; INSERT INTO optcov_t1 (id, t0_id, x, y, flag) SELECT i + 1, ((i * 13) % 3000000) + 1, i, CONCAT('y_', i), IF(i >= 2985000, 1, 0) FROM ( SELECT d1.n + d2.n*10 + d3.n*100 + d4.n*1000 + d5.n*10000 + d6.n*100000 + d7.n*1000000 AS i FROM (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d1 CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d2 CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d3 CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d4 CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d5 CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d6 CROSS JOIN (SELECT 0 n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d7 ) t WHERE i < 3000000; EXPLAIN SELECT SQL_NO_CACHE id, x, y FROM optcov_t1 WHERE flag = 1 ORDER BY id LIMIT 10; EXPLAIN SELECT SQL_NO_CACHE id, x, y FROM optcov_t1 FORCE INDEX (idx_t1_flag_x) WHERE flag = 1 ORDER BY id LIMIT 10; SELECT SQL_NO_CACHE id, x, y FROM optcov_t1 WHERE flag = 1 ORDER BY id LIMIT 10; SELECT SQL_NO_CACHE id, x, y FROM optcov_t1 FORCE INDEX (idx_t1_flag_x) WHERE flag = 1 ORDER BY id LIMIT 10;