Description:
An aggregate function and group by with regexp_substr() result as wrong
How to repeat:
1、version:
select version();
+-----------+
| version() |
+-----------+
| 8.0.13 |
+-----------+
1 row in set (0.00 sec)
2、desc table info:
show create table test_ak\G
*************************** 1. row ***************************
Table: test_ak
Create Table: CREATE TABLE `test_ak` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`instanceId` varchar(50) DEFAULT NULL COMMENT '1',
`region` varchar(50) DEFAULT NULL COMMENT '2',
`scope` varchar(20) DEFAULT NULL COMMENT '3',
`instanceName` varchar(100) DEFAULT NULL COMMENT '4',
`price` decimal(10,2) DEFAULT NULL COMMENT '5',
`update_time` date NOT NULL DEFAULT '1970-01-01' COMMENT '6',
`type` varchar(20) DEFAULT NULL COMMENT '7',
PRIMARY KEY (`id`),
KEY `idx_utime_type` (`update_time`,`type`)
) ENGINE=InnoDB AUTO_INCREMENT=169099 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
1 row in set (0.00 sec)
3、simple aggregate query:
-> select t.instanceId
-> ,t.instanceName
-> ,regexp_substr(t.instanceName,'[0-9]{5}',1,1) as ss
-> ,t.price
-> from test_ak t
-> where 1=1
-> and t.update_time>=date_add(now(),interval -1 day)
-> and t.update_time<=now()
-> and t.type='ecs'
-> and regexp_substr(t.instanceName,'[0-9]{5}',1,1) is not null
-> and instanceId in ('test01','test02');
+------------+-----------------+-------+--------+
| instanceId | instanceName | ss | price |
+------------+-----------------+-------+--------+
| test01 | test-aa-10487-1 | 10487 | 100.00 |
| test02 | test-bb-10702-1 | 10702 | 200.00 |
+------------+-----------------+-------+--------+
2 rows in set (0.01 sec)
4、add sub_query,the result wrong!!!
-> select ss
-> ,sum(price) as sums
-> ,count(distinct instanceId) as counts
-> from (select t.instanceId
-> ,t.instanceName
-> ,regexp_substr(t.instanceName,'[0-9]{5}',1,1) as ss
-> ,t.price
-> from test_ak t
-> where 1=1
-> and t.update_time>=date_add(now(),interval -1 day)
-> and t.update_time<=now()
-> and t.type='ecs'
-> and regexp_substr(t.instanceName,'[0-9]{5}',1,1) is not null
-> and instanceId in ('test01','test02')
-> ) as t
-> group by ss;
+-------+--------+--------+
| ss | sums | counts |
+-------+--------+--------+
| 10487 | 300.00 | 2 |
+-------+--------+--------+
1 row in set (0.01 sec)
Suggested fix:
change the group by type to string,for example:
-> select ss
-> ,sum(price) as sums
-> ,count(distinct instanceId) as counts
-> from (select t.instanceId
-> ,t.instanceName
-> ,regexp_substr(t.instanceName,'[0-9]{5}',1,1) as ss
-> ,t.price
-> from test_ak t
-> where 1=1
-> and t.update_time>=date_add(now(),interval -1 day)
-> and t.update_time<=now()
-> and t.type='ecs'
-> and regexp_substr(t.instanceName,'[0-9]{5}',1,1) is not null
-> and instanceId in ('test01','test02')
-> ) as t
-> group by cast(ss as char(10));
+-------+--------+--------+
| ss | sums | counts |
+-------+--------+--------+
| 10487 | 100.00 | 1 |
| 10702 | 200.00 | 1 |
+-------+--------+--------+
2 rows in set (0.02 sec)