Bug #108863 different index picked when row is not found, marginally slower
Submitted: 24 Oct 2022 16:40
Reporter: Shane Bester (Platinum Quality Contributor) Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S5 (Performance)
Version:8.0.31 OS:Any
Assigned to: CPU Architecture:Any

[24 Oct 2022 16:40] Shane Bester
Description:
(writing in bugs.mysql.com so avoid insane wrapping of ORABUG).

explain  select * from t where (a = 86944 and c = '86008') order by id desc limit 1; -- 1. id asc
explain  select * from t where (a = 40818 and c = '456478') order by id desc limit 1; -- 2. id desc
explain  select * from t where (a = 123456 and c = '1234567') order by id desc limit 1; -- 3. not found
 
Output:

---
mysql> explain  select * from t where (a = 86944 and c = '86008') order by id desc limit 1; -- 1. id asc
+----+-------------+-------+------------+------+-----------------+---------+---------+-------------+------+----------+----------------+
| id | select_type | table | partitions | type | possible_keys   | key     | key_len | ref         | rows | filtered | Extra          |
+----+-------------+-------+------------+------+-----------------+---------+---------+-------------+------+----------+----------------+
|  1 | SIMPLE      | t     | NULL       | ref  | key_abd,key_cad | key_cad | 1028    | const,const |    1 |   100.00 | Using filesort |
+----+-------------+-------+------------+------+-----------------+---------+---------+-------------+------+----------+----------------+
1 row in set, 1 warning (0.00 sec)

mysql> explain  select * from t where (a = 40818 and c = '456478') order by id desc limit 1; -- 2. id desc
+----+-------------+-------+------------+------+-----------------+---------+---------+-------------+------+----------+----------------+
| id | select_type | table | partitions | type | possible_keys   | key     | key_len | ref         | rows | filtered | Extra          |
+----+-------------+-------+------------+------+-----------------+---------+---------+-------------+------+----------+----------------+
|  1 | SIMPLE      | t     | NULL       | ref  | key_abd,key_cad | key_cad | 1028    | const,const |    1 |   100.00 | Using filesort |
+----+-------------+-------+------------+------+-----------------+---------+---------+-------------+------+----------+----------------+
1 row in set, 1 warning (0.00 sec)

mysql> explain  select * from t where (a = 123456 and c = '1234567') order by id desc limit 1; -- 3. not found
+----+-------------+-------+------------+------+-----------------+---------+---------+-------+------+----------+-----------------------------+
| id | select_type | table | partitions | type | possible_keys   | key     | key_len | ref   | rows | filtered | Extra                       |
+----+-------------+-------+------------+------+-----------------+---------+---------+-------+------+----------+-----------------------------+
|  1 | SIMPLE      | t     | NULL       | ref  | key_abd,key_cad | key_abd | 5       | const |    1 |     5.00 | Using where; Using filesort |
+----+-------------+-------+------------+------+-----------------+---------+---------+-------+------+----------+-----------------------------+
1 row in set, 1 warning (0.00 sec)
---

The complaint here is that 3rd query is marginally faster if using the key_cad index, so why does it pick key_abd ?

Empirically tested:

$  mysqlslap --query="select * from t force index(key_cad) where (a = 123456 and c = '1234567') order by id desc limit 1;" --concurrency=1 --number-of-queries=100000 --iterations=10 -uroot --create-schema=test
Benchmark
	Average number of seconds to run all queries: 12.275 seconds
	Minimum number of seconds to run all queries: 12.109 seconds
	Maximum number of seconds to run all queries: 12.428 seconds
	Number of clients running queries: 1
	Average number of queries per client: 100000

$  mysqlslap --query="select * from t where (a = 123456 and c = '1234567') order by id desc limit 1;" --concurrency=1 --number-of-queries=100000 --iterations=10 -uroot --create-schema=test
Benchmark
	Average number of seconds to run all queries: 12.821 seconds
	Minimum number of seconds to run all queries: 12.708 seconds
	Maximum number of seconds to run all queries: 12.944 seconds
	Number of clients running queries: 1
	Average number of queries per client: 100000

How to repeat:
set sql_mode='';
drop table if exists t;
CREATE TABLE t (
  `id` bigint NOT NULL AUTO_INCREMENT,
  `a` int ,
  `b` varchar(255),
  `c` varchar(255),
  `d` datetime ,
  PRIMARY KEY (`id`,`d`),
  UNIQUE KEY `key_abd` (`a`,`b`,`d`),
  KEY `key_cad` (`c`,`a`,`d` DESC)
) ENGINE=InnoDB ;  
 
 truncate table t;
 set rand_seed1:=4746373734;
 set rand_seed2:=4744444455555;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000);
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000; 
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000; 
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;
 replace into t(d,a,c,b) select now(),floor(rand()*100000),floor(rand()*1000000),floor(rand()*1000000) from t limit 100000;

 analyze table t;
 
 
 select count(*) from t;
 
 select a,c from t order by id asc limit 1;
 select a,c from t order by id desc limit 1;
 
 
explain  select * from t where (a = 86944 and c = '86008') order by id desc limit 1; -- 1. id asc
explain  select * from t where (a = 40818 and c = '456478') order by id desc limit 1; -- 2. id desc
explain  select * from t where (a = 123456 and c = '1234567') order by id desc limit 1; -- 3. not found
 

Suggested fix:
if it's truly not a bug, then can devs explain the reason?