Description:
For query [SELECT *
FROM t #use index (index_father_id)
WHERE father_id = 1
AND column_useless = 'yes, it is useless'
AND id >= 200000
ORDER BY id
LIMIT 1000;]
the optimizer trace shows cost of index: index_father_id is higher than primary key, but since the order of index_father_id is exactly same from order by clause and there is a limit clause, actual cost is far less than the estimated value.
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "PRIMARY",
"ranges": [
"200000 <= id"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": true,
"using_mrr": false,
"index_only": false,
"in_memory": 0.999913,
"rows": 171450,
"cost": 17213.5,
"chosen": true
},
{
"index": "index_father_id",
"ranges": [
"father_id = 2 AND column_useless = 'yes, it is useless' AND 200000 <= id"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"in_memory": 1,
"rows": 68594,
"cost": 24012.7,
"chosen": false,
"cause": "cost"
}
],
related bugs:
https://bugs.mysql.com/bug.php?id=97001
How to repeat:
1, Create table
CREATE TABLE t (
id BIGINT AUTO_INCREMENT NOT NULL,
father_id BIGINT NOT NULL,
column_useless VARCHAR(50) NOT NULL,
column_for_sizing1 CHAR(255) NOT NULL default ' ',
column_for_sizing2 CHAR(255) NOT NULL default ' ',
column_for_sizing3 CHAR(255) NOT NULL default ' ',
column_for_sizing4 CHAR(255) NOT NULL default ' ',
column_for_sizing5 CHAR(255) NOT NULL default ' ',
column_for_sizing6 CHAR(255) NOT NULL default ' ',
column_for_sizing7 CHAR(255) NOT NULL default ' ',
column_for_sizing8 CHAR(255) NOT NULL default ' ',
PRIMARY KEY (id),
INDEX index_father_id (father_id, column_useless, id)
);
2, Create procedure and run it to generate testing data
CREATE PROCEDURE fillTheTable ()
BEGIN
START TRANSACTION;
SET @i = 0;
WHILE @i<100000 do
insert into t
(father_id, column_useless)
select RAND()*100, 'yes, it is useless';
SET @i = @i +1;
END WHILE;
COMMIT;
START TRANSACTION;
WHILE @i<200000 do
insert into t
(father_id, column_useless)
select RAND()*3, 'yes, it is useless';
SET @i = @i +1;
END WHILE;
COMMIT;
START TRANSACTION;
WHILE @i<300000 do
insert into t
(father_id, column_useless)
select RAND()*100, 'yes, it is useless';
SET @i = @i +1;
END WHILE;
COMMIT;
START TRANSACTION;
WHILE @i<400000 do
insert into t
(father_id, column_useless)
select RAND()*3, 'yes, it is useless';
SET @i = @i +1;
END WHILE;
COMMIT;
END;
3, Run below query, primary key will be chosen which is more inefficient than index_father_id.
EXPLAIN SELECT *
FROM t #use index (index_father_id)
WHERE father_id = 1
AND column_useless = 'yes, it is useless'
AND id >= 200000
ORDER BY id
LIMIT 1000;
Suggested fix:
considering limit clause to reduce estimated cost, for those indices with same order as order by clause.