| Bug #115661 | Query results are incorrect after semijoin is turned on | ||
|---|---|---|---|
| Submitted: | 22 Jul 2024 5:48 | Modified: | 22 Jul 2024 8:17 |
| Reporter: | jiu chu | Email Updates: | |
| Status: | Verified | Impact on me: | |
| Category: | MySQL Server: Optimizer | Severity: | S2 (Serious) |
| Version: | mysql 8.4.1, 8.0.38, 9.0.0 | OS: | Linux |
| Assigned to: | CPU Architecture: | Any | |
[22 Jul 2024 8:17]
MySQL Verification Team
Hello jiu chu, Thank you for the report and test case. Verified as described. regards, Umesh
[15 Aug 2024 9:44]
huahua xu
Hi jiu chu: I think that the singularity phenomenon is related to FTS(full-text search) and has nothing to do with semijoin. For the test case(from https://bugs.mysql.com/bug.php?id=47930): mysql> CREATE TABLE t1 (a int) ENGINE = InnoDB; mysql> INSERT INTO t1 VALUES (1), (2); mysql> CREATE TABLE t2 (a int, b2 char(10), FULLTEXT KEY b2 (b2)) ENGINE = InnoDB; mysql> INSERT INTO t2 VALUES (1,'Scargill'); mysql> CREATE TABLE t3 (a int, b int) ENGINE = InnoDB; mysql> INSERT INTO t3 VALUES (1,1), (2,1); -- semijoin is turned off, the result is incorrect. mysql> SELECT count(*) FROM t1 WHERE not exists( SELECT /*+ NO_SEMIJOIN() */ 1 FROM t2, t3 WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE) ); +----------+ | count(*) | +----------+ | 1 | +----------+ -- semijoin is turned on, the result is correct. mysql> SELECT count(*) FROM t1 WHERE not exists( SELECT 1 FROM t2, t3 WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE) ); +----------+ | count(*) | +----------+ | 0 | +----------+
[15 Aug 2024 14:22]
huahua xu
During initializing FT index scan, the innodb should make a flag to resort the FTS Query result, which returned by fts_query() on fts_ranking_t::rank.
Suggested fix:
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index db5fb9b..819804d 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -10877,6 +10877,9 @@ int ha_innobase::ft_init() {
++trx->will_lock;
}
+ fts_result_t *result = reinterpret_cast<NEW_FT_INFO *>(ft_handler)->ft_result;
+ result->current = nullptr;
+
return rnd_init(false);
}
After applying the patch:
mysql> set optimizer_switch='semijoin=off';
Query OK, 0 rows affected (0.01 sec)
mysql> SELECT * FROM `users`
WHERE
directly_department_id IN (59) AND
id IN (
SELECT user_id FROM `users_roles`
WHERE
role_id IN (
SELECT id FROM `roles` WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE )
)
UNION
SELECT user_id FROM `departments_roles` WHERE false
) ;
+-----+------------------------+----------+
| id | directly_department_id | admin_id |
+-----+------------------------+----------+
| 205 | 59 | 0 |
| 206 | 59 | 0 |
| 426 | 59 | 0 |
| 764 | 59 | 0 |
| 811 | 59 | 0 |
| 866 | 59 | 0 |
+-----+------------------------+----------+
6 rows in set (0.04 sec)
mysql> set optimizer_switch='semijoin=on';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT * FROM `users`
WHERE
directly_department_id IN (59) AND
id IN (
SELECT user_id FROM `users_roles`
WHERE
role_id IN (
SELECT id FROM `roles` WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE )
)
UNION
SELECT user_id FROM `departments_roles` WHERE false
) ;
+-----+------------------------+----------+
| id | directly_department_id | admin_id |
+-----+------------------------+----------+
| 205 | 59 | 0 |
| 206 | 59 | 0 |
| 426 | 59 | 0 |
| 764 | 59 | 0 |
| 811 | 59 | 0 |
| 866 | 59 | 0 |
+-----+------------------------+----------+
6 rows in set (0.00 sec)
mysql> SELECT count(*) FROM t1 WHERE
not exists(
SELECT /*+ NO_SEMIJOIN() */ 1 FROM t2, t3
WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
);
+----------+
| count(*) |
+----------+
| 0 |
+----------+
1 row in set (0.03 sec)
mysql> SELECT count(*) FROM t1 WHERE
not exists(
SELECT 1 FROM t2, t3
WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
);
+----------+
| count(*) |
+----------+
| 0 |
+----------+
1 row in set (0.00 sec)

Description: mysql> SELECT @@optimizer_switch\G; *************************** 1. row *************************** @@optimizer_switch: index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,mrr=on,mrr_cost_based=on,block_nested_loop=on,batched_key_access=off,materialization=on,semijoin=on,loosescan=on,firstmatch=on,duplicateweedout=on,subquery_materialization_cost_based=on,use_index_extensions=on,condition_fanout_filter=on,derived_merge=on,use_invisible_indexes=off,skip_scan=on,hash_join=on,subquery_to_derived=off,prefer_ordering_index=on,hypergraph_optimizer=off,derived_condition_pushdown=on,hash_set_operations=on mysql> SELECT * FROM `users` -> WHERE -> directly_department_id IN (59) AND -> id IN ( -> SELECT user_id FROM `users_roles` -> WHERE -> role_id IN ( -> SELECT id FROM `roles` WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE ) -> ) -> UNION -> SELECT user_id FROM `departments_roles` WHERE false -> ) ; +-----+------------------------+----------+ | id | directly_department_id | admin_id | +-----+------------------------+----------+ | 205 | 59 | 0 | | 426 | 59 | 0 | | 811 | 59 | 0 | +-----+------------------------+----------+ 3 rows in set (0.00 sec) mysql> set optimizer_switch='semijoin=off'; Query OK, 0 rows affected (0.00 sec) mysql> SELECT @@optimizer_switch\G; *************************** 1. row *************************** @@optimizer_switch: index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,mrr=on,mrr_cost_based=on,block_nested_loop=on,batched_key_access=off,materialization=on,semijoin=off,loosescan=on,firstmatch=on,duplicateweedout=on,subquery_materialization_cost_based=on,use_index_extensions=on,condition_fanout_filter=on,derived_merge=on,use_invisible_indexes=off,skip_scan=on,hash_join=on,subquery_to_derived=off,prefer_ordering_index=on,hypergraph_optimizer=off,derived_condition_pushdown=on,hash_set_operations=on mysql> SELECT * FROM `users` -> WHERE -> directly_department_id IN (59) AND -> id IN ( -> SELECT user_id FROM `users_roles` -> WHERE -> role_id IN ( -> SELECT id FROM `roles` WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE ) -> ) -> UNION -> SELECT user_id FROM `departments_roles` WHERE false -> ) ; +-----+------------------------+----------+ | id | directly_department_id | admin_id | +-----+------------------------+----------+ | 205 | 59 | 0 | | 206 | 59 | 0 | | 426 | 59 | 0 | | 764 | 59 | 0 | | 811 | 59 | 0 | | 866 | 59 | 0 | +-----+------------------------+----------+ 6 rows in set (0.00 sec) How to repeat: CREATE TABLE `users` ( `id` bigint NOT NULL AUTO_INCREMENT, `directly_department_id` bigint NOT NULL, `admin_id` bigint NOT NULL DEFAULT 0, PRIMARY KEY (`id`) USING BTREE ) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; INSERT INTO `users` VALUES (205, 59, 0); INSERT INTO `users` VALUES (206, 59, 0); INSERT INTO `users` VALUES (426, 59, 0); INSERT INTO `users` VALUES (764, 59, 0); INSERT INTO `users` VALUES (811, 59, 0); INSERT INTO `users` VALUES (866, 59, 0); CREATE TABLE `users_roles` ( `user_id` bigint NOT NULL, `role_id` bigint NOT NULL, PRIMARY KEY (`user_id`, `role_id`) USING BTREE ) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; INSERT INTO `users_roles` VALUES (205, 2); INSERT INTO `users_roles` VALUES (205, 3); INSERT INTO `users_roles` VALUES (206, 2); INSERT INTO `users_roles` VALUES (206, 3); INSERT INTO `users_roles` VALUES (426, 2); INSERT INTO `users_roles` VALUES (426, 3); INSERT INTO `users_roles` VALUES (764, 2); INSERT INTO `users_roles` VALUES (764, 3); INSERT INTO `users_roles` VALUES (811, 2); INSERT INTO `users_roles` VALUES (811, 3); INSERT INTO `users_roles` VALUES (866, 2); INSERT INTO `users_roles` VALUES (866, 3); CREATE TABLE `departments_roles` ( `user_id` bigint NOT NULL, `department_id` bigint NOT NULL, `role_id` bigint NOT NULL, UNIQUE INDEX `userIDDepartmentIDRoleID`(`user_id` ASC, `department_id` ASC, `role_id` ASC) USING BTREE ) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; CREATE TABLE `roles` ( `id` bigint NOT NULL AUTO_INCREMENT, `permissions_list` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '', PRIMARY KEY (`id`) USING BTREE, FULLTEXT INDEX `permissions_list`(`permissions_list`) ) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic; INSERT INTO `roles` VALUES (1, 'burn'); INSERT INTO `roles` VALUES (2, 'client:fixedRFID,printer:list,stats:users'); INSERT INTO `roles` VALUES (3, 'client:fixedRFID,print:print,printer:list,stats:users'); INSERT INTO `roles` VALUES (4, 'approval'); INSERT INTO `roles` VALUES (5, 'archive'); INSERT INTO `roles` VALUES (6, 'reclaim'); sql: SELECT * FROM `users` WHERE directly_department_id IN (59) AND id IN ( SELECT user_id FROM `users_roles` WHERE role_id IN ( SELECT id FROM `roles` WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE ) ) UNION SELECT user_id FROM `departments_roles` WHERE false ) ;