| Bug #108548 | GROUP_CONCAT NOT SET USED_TABLES WHEN DERIVED_MERGE. | ||
|---|---|---|---|
| Submitted: | 20 Sep 2022 6:37 | Modified: | 20 Sep 2022 7:35 |
| Reporter: | casa zhang (OCA) | Email Updates: | |
| Status: | Verified | Impact on me: | |
| Category: | MySQL Server: Optimizer | Severity: | S2 (Serious) |
| Version: | 5.7, 5.7.39 | OS: | Any |
| Assigned to: | CPU Architecture: | Any | |
| Tags: | transformation | ||
[20 Sep 2022 6:42]
casa zhang
GROUP_CONCAT NOT SET USED_TABLES WHEN DERIVED_MERGE.
Attachment: 0001-bugfix-GROUP_CONCAT-NOT-SET-USED_TABLES-WHEN-DERIVED.patch (application/octet-stream, text), 5.79 KiB.
[20 Sep 2022 6:44]
casa zhang
we found that only GROUP_CONCAT() function cause the wrong result, so add fix_after_pullout() for Item_func_group_concat item, hope that will be helpful.
[20 Sep 2022 7:04]
MySQL Verification Team
Hello casa zhang,
Thank you for the report and Contribution.
In which version are you seeing this issue? I quickly tried and confirmed that this issue is no longer reproducible on current GA.
- 5.7.39
bin/mysql -uroot -S /tmp/mysql.sock
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.7.39 MySQL Community Server (GPL)
Copyright (c) 2000, 2022, Oracle and/or its affiliates.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> create database test;
Query OK, 1 row affected (0.00 sec)
mysql> use test
Database changed
mysql> CREATE TABLE `t1` (`c1` bigint(20), `c2` varchar(30), `c3` varchar(30), `c4` varchar(30), PRIMARY KEY (`c1`));
Query OK, 0 rows affected (0.02 sec)
mysql> INSERT INTO t1 VALUES(1, 'one', 'category', 'ab');
Query OK, 1 row affected (0.01 sec)
mysql> INSERT INTO t1 VALUES(2, 'two', 'category', 'cc');
Query OK, 1 row affected (0.00 sec)
mysql> INSERT INTO t1 VALUES(3, 'three', 'category', 'aa');
Query OK, 1 row affected (0.00 sec)
mysql> INSERT INTO t1 VALUES(4, 'four', 'category', 'bb');
Query OK, 1 row affected (0.00 sec)
mysql>
mysql> CREATE TABLE `t2` ( `c1` varchar(10), `c2` varchar(80), PRIMARY KEY (`c1`));
Query OK, 0 rows affected (0.01 sec)
mysql> INSERT INTO t2 VALUES('fine', 'aa');
Query OK, 1 row affected (0.01 sec)
mysql> INSERT INTO t2 VALUES('thanks', 'bb');
Query OK, 1 row affected (0.00 sec)
mysql>
mysql> SET @@optimizer_switch="derived_merge=on";
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)));
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
mysql> SELECT * FROM
-> (
-> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)))
-> ) AS D;
+---+------+------+--------+------+
| a | b | c | d | e |
+---+------+------+--------+------+
| 4 | four | bb | fine | aa |
| 4 | four | bb | thanks | bb |
+---+------+------+--------+------+
2 rows in set (0.00 sec)
mysql> SET SESSION OPTIMIZER_SWITCH="derived_merge=off";
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)));
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
mysql> SELECT * FROM
-> (
-> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)))
-> ) AS D;
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
- 8.0.30
bin/mysql -uroot -S /tmp/mysql.sock
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 7
Server version: 8.0.30 MySQL Community Server - GPL
Copyright (c) 2000, 2022, Oracle and/or its affiliates.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> create database test;
Query OK, 1 row affected (0.01 sec)
mysql> use test
Database changed
mysql> SET @@optimizer_switch="derived_merge=on";
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)));
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
mysql> SELECT * FROM
-> (
-> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)))
-> ) AS D;
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
mysql> SET SESSION OPTIMIZER_SWITCH="derived_merge=off";
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)));
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
mysql> SELECT * FROM
-> (
-> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)))
-> ) AS D;
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
regards,
Umesh
[20 Sep 2022 7:28]
casa zhang
you repeat it, the result is not stable, but both are wrong.
mysql> SET @@optimizer_switch="derived_merge=on";
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)));
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
2 rows in set (0.00 sec)
mysql> SELECT * FROM
-> (
-> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e
-> FROM t1, t2
-> WHERE t1.c3 = 'category'
-> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2)))
-> ) AS D;
+---+------+------+--------+------+
| a | b | c | d | e |
+---+------+------+--------+------+
| 4 | four | bb | fine | aa |
| 4 | four | bb | thanks | bb |
+---+------+------+--------+------+
2 rows in set (0.00 sec)
The second SQL give the different result.
+---+-------+------+--------+------+
| a | b | c | d | e |
+---+-------+------+--------+------+
| 3 | three | aa | fine | aa |
| 4 | four | bb | thanks | bb |
+---+-------+------+--------+------+
is not same as
+---+------+------+--------+------+
| a | b | c | d | e |
+---+------+------+--------+------+
| 4 | four | bb | fine | aa |
| 4 | four | bb | thanks | bb |
+---+------+------+--------+------+
[20 Sep 2022 7:33]
MySQL Verification Team
Agree, thank you for the feedback.
[20 Sep 2022 7:35]
casa zhang
The problem will appear on 5.7 not 8.0.

Description: From Tencent CDB group, casazhang hi, something goes wrong with an application online, finally we found that when derived_merge switch on, subquery contains group_concat() may produce wrong results, since `used_tables_cache` does not been set correctly. group_concat() function should consider add fix_after_pullput() when derived merge into outer query block. How to repeat: CREATE TABLE `t1` (`c1` bigint(20), `c2` varchar(30), `c3` varchar(30), `c4` varchar(30), PRIMARY KEY (`c1`)); INSERT INTO t1 VALUES(1, 'one', 'category', 'ab'); INSERT INTO t1 VALUES(2, 'two', 'category', 'cc'); INSERT INTO t1 VALUES(3, 'three', 'category', 'aa'); INSERT INTO t1 VALUES(4, 'four', 'category', 'bb'); CREATE TABLE `t2` ( `c1` varchar(10), `c2` varchar(80), PRIMARY KEY (`c1`)); INSERT INTO t2 VALUES('fine', 'aa'); INSERT INTO t2 VALUES('thanks', 'bb'); SET @@optimizer_switch="derived_merge=on"; mysql> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e -> FROM t1, t2 -> WHERE t1.c3 = 'category' -> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2))); +---+-------+------+--------+------+ | a | b | c | d | e | +---+-------+------+--------+------+ | 3 | three | aa | fine | aa | | 4 | four | bb | thanks | bb | +---+-------+------+--------+------+ 2 rows in set (0.00 sec) If we wrap the SQL with simple `select * from () as d`, it give the wrong result, sometimes it produce 0 rows. mysql> SELECT * FROM -> ( -> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e -> FROM t1, t2 -> WHERE t1.c3 = 'category' -> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2))) -> ) AS D; Empty set (0.01 sec) But, if we switch off the derived_merge, the upper two SQLs give the right result. mysql> SET SESSION OPTIMIZER_SWITCH="derived_merge=off"; Query OK, 0 rows affected (0.00 sec) mysql> mysql> SELECT * FROM -> ( -> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e -> FROM t1, t2 -> WHERE t1.c3 = 'category' -> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2))) -> ) AS D; +---+-------+------+--------+------+ | a | b | c | d | e | +---+-------+------+--------+------+ | 3 | three | aa | fine | aa | | 4 | four | bb | thanks | bb | +---+-------+------+--------+------+ 2 rows in set (0.00 sec) mysql> mysql> SELECT t1.c1 AS a, t1.c2 AS b, t1.c4 AS c, t2.c1 AS d, t2.c2 AS e -> FROM t1, t2 -> WHERE t1.c3 = 'category' -> AND find_in_set(t1.c4, (SELECT group_concat(t2.c2))); +---+-------+------+--------+------+ | a | b | c | d | e | +---+-------+------+--------+------+ | 3 | three | aa | fine | aa | | 4 | four | bb | thanks | bb | +---+-------+------+--------+------+ 2 rows in set (0.00 sec) DROP TABLE t1; DROP TABLE t2; Suggested fix: Add Item_func_group_concat::fix_after_pullput() like: --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -3821,6 +3821,19 @@ void Item_func_group_concat::print(String *str, enum_query_type query_type) str->append(STRING_WITH_LEN("\')")); } +void Item_func_group_concat::fix_after_pullout(st_select_lex *parent_select, + st_select_lex *removed_select) +{ + // Perform pullout of arguments to aggregate function + used_tables_cache = 0; + + Item **arg, **arg_end; + for (arg = args, arg_end = args + arg_count; arg != arg_end; arg++) { + Item *const item = *arg; + item->fix_after_pullout(parent_select, removed_select); + used_tables_cache |= item->used_tables(); + } +} Item_func_group_concat::~Item_func_group_concat() { diff --git a/sql/item_sum.h b/sql/item_sum.h index 442a5c07239..cced83cb125 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -1622,6 +1622,8 @@ public: context= reinterpret_cast<Name_resolution_context *>(cntx); return false; } + void fix_after_pullout(st_select_lex *parent_select, + st_select_lex *removed_select); }; #endif /* ITEM_SUM_INCLUDED */