Description:
We have identified a query which takes long time, while running most of the time is spent in query state 'Removing duplicates'. Table is very small with less than ~300k rows and table size is ~50MB.
Here is the original query:
MariaDB [sbtest]> SELECT DISTINCT `inspection_id` ,`field_value` FROM `inspections` WHERE (`label` LIKE 'start time%' ) GROUP BY `inspection_id` ,`field_value` ,`field_num` ,`label` LIMIT 1;
+--------------------+-------------+
| inspection_id | field_value |
+--------------------+-------------+
| ACE-P/201007081215 | 12:15 |
+--------------------+-------------+
1 row in set (1 min 26.36 sec)
mysql> show processlist;
+----+-----------------+-----------------+------+---------+---------+------------------------+---------------------
--------------------------------------------------+
| Id | User | Host | db | Command | Time | State | Info
|
+----+-----------------+-----------------+------+---------+---------+------------------------+---------------------
--------------------------------------------------+
| 4 | event_scheduler | localhost | NULL | Daemon | 1296784 | Waiting on empty queue | NULL
|
| 20 | root | localhost:65456 | test | Query | 31 | Removing duplicates | SELECT DISTINCT `a
` FROM `foo` GROUP BY `a` ,`b` ,`c` ,`d` limit 1 |
| 21 | root | localhost:65504 | test | Query | 0 | starting | show processlist
|
+----+-----------------+-----------------+------+---------+---------+------------------------+---------------------
--------------------------------------------------+
3 rows in set (0.00 sec)
The same query runs fast without DISTINCT keyword.
How to repeat:
Create test table `foo`
# Load sample data
CREATE TABLE `foo` (
`a` varchar(18) DEFAULT NULL,
`b` text,
`c` int(3) DEFAULT NULL,
`d` varchar(60) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
INSERT INTO `foo` VALUES ('ALD-A/200808181115','',5,'******************************'),('ALD-A/200808181115','*****',10,'Start time'),('ALD-A/200808181115','*****',15,'********'),('ALD-A/200808181115','**************',20,'*******'),('ALD-A/200808181115','*',25,'********************'),('ALD-A/200808181115','*',30,'************************'),('ALD-A/200808181115','*',35,'*************************'),('ALD-A/200808181115','*',40,'**************'),('ALD-A/200808181115','*',45,'****************'),('ALD-A/200808181115','*',50,'**********************');
insert into foo select f.* from foo f join foo f2 join foo f3 join foo f4 join foo f5 ;
## Verify ##
mysql> SELECT DISTINCT `a` ,`b` FROM `foo` GROUP BY `a` ,`b` ,`c` ,`d` limit 1\G
*************************** 1. row ***************************
a: ALD-A/200808181115
b:
1 row in set (0.34 sec)
mysql> explain format=json SELECT DISTINCT `a` ,`b` FROM `foo` GROUP BY `a` ,`b` ,`c` ,`d` limit 1\G
*************************** 1. row ***************************
EXPLAIN: {
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "10422.10"
},
"duplicates_removal": {
"using_filesort": false,
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "foo",
"access_type": "ALL",
"rows_examined_per_scan": 99684,
"rows_produced_per_join": 99684,
"filtered": "100.00",
"cost_info": {
"read_cost": "453.70",
"eval_cost": "9968.40",
"prefix_cost": "10422.10",
"data_read_per_join": "31M"
},
"used_columns": [
"a",
"b",
"c",
"d"
]
}
}
}
}
}
1 row in set, 1 warning (0.02 sec)