Bug #115661 Query results are incorrect after semijoin is turned on
Submitted: 22 Jul 2024 5:48 Modified: 22 Jul 2024 8:17
Reporter: jiu chu Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S2 (Serious)
Version:mysql 8.4.1, 8.0.38, 9.0.0 OS:Linux
Assigned to: CPU Architecture:Any

[22 Jul 2024 5:48] jiu chu
Description:
mysql> SELECT @@optimizer_switch\G;
*************************** 1. row ***************************
@@optimizer_switch: index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,mrr=on,mrr_cost_based=on,block_nested_loop=on,batched_key_access=off,materialization=on,semijoin=on,loosescan=on,firstmatch=on,duplicateweedout=on,subquery_materialization_cost_based=on,use_index_extensions=on,condition_fanout_filter=on,derived_merge=on,use_invisible_indexes=off,skip_scan=on,hash_join=on,subquery_to_derived=off,prefer_ordering_index=on,hypergraph_optimizer=off,derived_condition_pushdown=on,hash_set_operations=on

mysql> SELECT * FROM `users`
    -> WHERE
    ->   directly_department_id IN (59) AND
    ->     id IN (
    ->       SELECT user_id FROM `users_roles`
    ->       WHERE
    ->       role_id IN (
    ->           SELECT id FROM `roles`  WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE )
    ->       )
    ->       UNION
    ->       SELECT user_id FROM `departments_roles`  WHERE false
    ->     ) ;
+-----+------------------------+----------+
| id  | directly_department_id | admin_id |
+-----+------------------------+----------+
| 205 |                     59 |        0 |
| 426 |                     59 |        0 |
| 811 |                     59 |        0 |
+-----+------------------------+----------+
3 rows in set (0.00 sec)

mysql> set optimizer_switch='semijoin=off';
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT @@optimizer_switch\G;
*************************** 1. row ***************************
@@optimizer_switch: index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,mrr=on,mrr_cost_based=on,block_nested_loop=on,batched_key_access=off,materialization=on,semijoin=off,loosescan=on,firstmatch=on,duplicateweedout=on,subquery_materialization_cost_based=on,use_index_extensions=on,condition_fanout_filter=on,derived_merge=on,use_invisible_indexes=off,skip_scan=on,hash_join=on,subquery_to_derived=off,prefer_ordering_index=on,hypergraph_optimizer=off,derived_condition_pushdown=on,hash_set_operations=on

mysql> SELECT * FROM `users`
    -> WHERE
    ->   directly_department_id IN (59) AND
    ->     id IN (
    ->       SELECT user_id FROM `users_roles`
    ->       WHERE
    ->       role_id IN (
    ->           SELECT id FROM `roles`  WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE )
    ->       )
    ->       UNION
    ->       SELECT user_id FROM `departments_roles`  WHERE false
    ->     ) ;
+-----+------------------------+----------+
| id  | directly_department_id | admin_id |
+-----+------------------------+----------+
| 205 |                     59 |        0 |
| 206 |                     59 |        0 |
| 426 |                     59 |        0 |
| 764 |                     59 |        0 |
| 811 |                     59 |        0 |
| 866 |                     59 |        0 |
+-----+------------------------+----------+
6 rows in set (0.00 sec)

How to repeat:
CREATE TABLE `users`  (
  `id` bigint NOT NULL AUTO_INCREMENT,
  `directly_department_id` bigint NOT NULL,
  `admin_id` bigint NOT NULL DEFAULT 0,
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;

INSERT INTO `users` VALUES (205, 59, 0);
INSERT INTO `users` VALUES (206, 59, 0);
INSERT INTO `users` VALUES (426, 59, 0);
INSERT INTO `users` VALUES (764, 59, 0);
INSERT INTO `users` VALUES (811, 59, 0);
INSERT INTO `users` VALUES (866, 59, 0);

CREATE TABLE `users_roles`  (
  `user_id` bigint NOT NULL,
  `role_id` bigint NOT NULL,
  PRIMARY KEY (`user_id`, `role_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;

INSERT INTO `users_roles` VALUES (205, 2);
INSERT INTO `users_roles` VALUES (205, 3);
INSERT INTO `users_roles` VALUES (206, 2);
INSERT INTO `users_roles` VALUES (206, 3);
INSERT INTO `users_roles` VALUES (426, 2);
INSERT INTO `users_roles` VALUES (426, 3);
INSERT INTO `users_roles` VALUES (764, 2);
INSERT INTO `users_roles` VALUES (764, 3);
INSERT INTO `users_roles` VALUES (811, 2);
INSERT INTO `users_roles` VALUES (811, 3);
INSERT INTO `users_roles` VALUES (866, 2);
INSERT INTO `users_roles` VALUES (866, 3);

CREATE TABLE `departments_roles`  (
  `user_id` bigint NOT NULL,
  `department_id` bigint NOT NULL,
  `role_id` bigint NOT NULL,
  UNIQUE INDEX `userIDDepartmentIDRoleID`(`user_id` ASC, `department_id` ASC, `role_id` ASC) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;

CREATE TABLE `roles`  (
  `id` bigint NOT NULL AUTO_INCREMENT,
  `permissions_list` varchar(2048) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '',
  PRIMARY KEY (`id`) USING BTREE,
  FULLTEXT INDEX `permissions_list`(`permissions_list`)
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;

INSERT INTO `roles` VALUES (1, 'burn');
INSERT INTO `roles` VALUES (2, 'client:fixedRFID,printer:list,stats:users');
INSERT INTO `roles` VALUES (3, 'client:fixedRFID,print:print,printer:list,stats:users');
INSERT INTO `roles` VALUES (4, 'approval');
INSERT INTO `roles` VALUES (5, 'archive');
INSERT INTO `roles` VALUES (6, 'reclaim');

sql:
SELECT * FROM `users`  
WHERE 
  directly_department_id IN (59) AND 
    id IN (
      SELECT user_id FROM `users_roles`
      WHERE
      role_id IN (
          SELECT id FROM `roles`  WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE ) 
      )
      UNION
      SELECT user_id FROM `departments_roles`  WHERE false
    ) ;
[22 Jul 2024 8:17] MySQL Verification Team
Hello jiu chu,

Thank you for the report and test case.
Verified as described.

regards,
Umesh
[15 Aug 2024 9:44] huahua xu
Hi jiu chu:

I think that the singularity phenomenon is related to FTS(full-text search) and has nothing to do with semijoin. For the test case(from https://bugs.mysql.com/bug.php?id=47930):

mysql> CREATE TABLE t1 (a int) ENGINE = InnoDB;

mysql> INSERT INTO t1 VALUES (1), (2);

mysql> CREATE TABLE t2 (a int, b2 char(10), FULLTEXT KEY b2 (b2)) ENGINE = InnoDB;

mysql> INSERT INTO t2 VALUES (1,'Scargill');

mysql> CREATE TABLE t3 (a int, b int) ENGINE = InnoDB;

mysql> INSERT INTO t3 VALUES (1,1), (2,1);

-- semijoin is turned off, the result is incorrect.
 
mysql> SELECT count(*) FROM t1 WHERE 
  not exists(
   SELECT /*+ NO_SEMIJOIN() */ 1 FROM t2, t3
   WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
  );
+----------+
| count(*) |
+----------+
|        1 |
+----------+

-- semijoin is turned on, the result is correct.
 
mysql> SELECT count(*) FROM t1 WHERE 
  not exists(
   SELECT 1 FROM t2, t3
   WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
  );
+----------+
| count(*) |
+----------+
|        0 |
+----------+
[15 Aug 2024 14:22] huahua xu
During initializing FT index scan, the innodb should make a flag to resort the FTS Query result, which returned by fts_query() on fts_ranking_t::rank.

Suggested fix:	

diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index db5fb9b..819804d 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -10877,6 +10877,9 @@ int ha_innobase::ft_init() {
     ++trx->will_lock;
   }
 
+  fts_result_t *result = reinterpret_cast<NEW_FT_INFO *>(ft_handler)->ft_result;
+  result->current = nullptr;
+
   return rnd_init(false);
 }
 

After applying the patch:

mysql>  set optimizer_switch='semijoin=off';
Query OK, 0 rows affected (0.01 sec)

mysql> SELECT * FROM `users`  
WHERE 
  directly_department_id IN (59) AND 
    id IN (
      SELECT user_id FROM `users_roles`
      WHERE
      role_id IN (
          SELECT id FROM `roles`  WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE ) 
      )
      UNION
      SELECT user_id FROM `departments_roles`  WHERE false
    ) ;
+-----+------------------------+----------+
| id  | directly_department_id | admin_id |
+-----+------------------------+----------+
| 205 |                     59 |        0 |
| 206 |                     59 |        0 |
| 426 |                     59 |        0 |
| 764 |                     59 |        0 |
| 811 |                     59 |        0 |
| 866 |                     59 |        0 |
+-----+------------------------+----------+
6 rows in set (0.04 sec)

mysql> set optimizer_switch='semijoin=on';
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT * FROM `users`  
WHERE 
  directly_department_id IN (59) AND 
    id IN (
      SELECT user_id FROM `users_roles`
      WHERE
      role_id IN (
          SELECT id FROM `roles`  WHERE match(`roles`.`permissions_list`) against ('+print:print' IN BOOLEAN MODE ) 
      )
      UNION
      SELECT user_id FROM `departments_roles`  WHERE false
    ) ;
+-----+------------------------+----------+
| id  | directly_department_id | admin_id |
+-----+------------------------+----------+
| 205 |                     59 |        0 |
| 206 |                     59 |        0 |
| 426 |                     59 |        0 |
| 764 |                     59 |        0 |
| 811 |                     59 |        0 |
| 866 |                     59 |        0 |
+-----+------------------------+----------+
6 rows in set (0.00 sec)

mysql> SELECT count(*) FROM t1 WHERE 
  not exists(
   SELECT /*+ NO_SEMIJOIN() */ 1 FROM t2, t3
   WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
  );
+----------+
| count(*) |
+----------+
|        0 |
+----------+
1 row in set (0.03 sec)

mysql> SELECT count(*) FROM t1 WHERE 
  not exists(
   SELECT 1 FROM t2, t3
   WHERE t3.a=t1.a AND MATCH(b2) AGAINST('scargill' IN BOOLEAN MODE)
  );
+----------+
| count(*) |
+----------+
|        0 |
+----------+
1 row in set (0.00 sec)