Bug #111485 When subquery_to_derived is on, IN-subq to derived table transformed wrongly
Submitted: 19 Jun 2023 11:16 Modified: 19 Jun 2023 11:39
Reporter: chen jiang Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:mysql-8.0.33 OS:Linux
Assigned to: CPU Architecture:x86
Tags: IN-subq, sql-rewrite, subquery_to_derived

[19 Jun 2023 11:16] chen jiang
Description:
When subquery_to_derived is turned on, the innermost IN subquery is converted to a semi-join, followed by the outer IN subquery being converted to a derived table. The sj_on_cond of the innermost semi-join is then transformed into a WHERE condition. If the relevant conditions in the WHERE condition are associated with both the outer table and the inner table of the semi-join, they will be pulled up to the outermost level. This will change the SQL semantics and is not a valid transformation. The bug manifests as an incorrect result set.

How to repeat:
source test.sql
mysql> SET optimizer_switch = 'subquery_to_derived=on';
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT (     NULL     ) AS A FROM select_one_range_p t1 WHERE datetime_test IN (         SELECT datetime_test         FROM select_two t2         WHERE t1.datetime_test IN (             SELECT datetime_test             FROM select_one_latin1 t3             WHERE t2.datetime_test = t3.datetime_test                 OR 82                 AND smallint_test         )     )     OR !25;
+------+
| A    |
+------+
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
+------+
7 rows in set, 1 warning (0.17 sec)
 SET optimizer_switch = 'subquery_to_derived=off';
mysql> SELECT (     NULL     ) AS A FROM select_one_range_p t1 WHERE datetime_test IN (         SELECT datetime_test         FROM select_two t2         WHERE t1.datetime_test IN (             SELECT datetime_test             FROM select_one_latin1 t3             WHERE t2.datetime_test = t3.datetime_test                 OR 82                 AND smallint_test         )     )     OR !25;

| NULL |
| NULL |
| NULL |
+------+
100 rows in set, 1 warning (0.13 sec)

Suggested fix:
diff --git a/sql/sql_resolver.cc b/sql/sql_resolver.cc
index 785f2b6228a..f3c79661344 100644
--- a/sql/sql_resolver.cc
+++ b/sql/sql_resolver.cc
@@ -2550,6 +2550,7 @@ bool Query_block::build_sj_cond(THD *thd, NESTED_JOIN *nested_join,

 /// Context object used by semijoin equality decorrelation code.
 class Semijoin_decorrelation {
+public:
   mem_root_deque<Item *> *sj_outer_exprs, *sj_inner_exprs;
   /// If nullptr: only a=b is decorrelated.
   /// Otherwise, a OP b is decorrelated for OP in <>, >=, >, <=, <, and
@@ -2657,6 +2658,23 @@ static inline bool can_decorrelate_operator(Item_func *func, bool only_eq) {
   }
 }

+bool Query_block::is_predicate_eligible(Item* outer, Item* inner) {
+  table_map sj_inner_tables{0};
+  if (has_sj_nests && outer->is_outer_reference()) {
+    walk_join_list(m_table_nest, [&sj_inner_tables](Table_ref *tr) {
+      if (tr->nested_join && tr->is_sj_or_aj_nest()) {
+        for (auto sj_inner : tr->nested_join->m_tables) {
+          // only allow innner is base table, not nested_join table.
+          if (!sj_inner->nested_join) sj_inner_tables |= sj_inner->map();
+        }
+      }
+      return false;
+    });
+
+    if (inner->used_tables() & sj_inner_tables) return false;
+  }
+  return true;
+}
 /**
   Decorrelate the WHERE clause or a join condition of a subquery used in
   an IN or EXISTS predicate.
@@ -2730,10 +2748,12 @@ static inline bool can_decorrelate_operator(Item_func *func, bool only_eq) {
   such predicates will not be decorrelated.
 */

-bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
-                                        Table_ref *join_nest) {
+bool Query_block::decorrelate_condition(THD* thd, Semijoin_decorrelation &sj_decor,
+                                        Table_ref *join_nest, bool* abort_xform) {
   Item *base_cond =
       join_nest == nullptr ? where_cond() : join_nest->join_cond();
+  Item** ref = join_nest == nullptr? where_cond_ref() : join_nest->join_cond_ref();
+
   Item_cond *cond;
   Item_func *func;

@@ -2744,6 +2764,13 @@ bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
       can_decorrelate_operator(func, sj_decor.decorrelate_only_eq())) {
 bool was_correlated;
     if (decorrelate_equality(sj_decor, func, &was_correlated)) return true;
+
+    bool eligible = is_predicate_eligible(sj_decor.sj_outer_exprs->back(), sj_decor.sj_inner_exprs->back());
+    if(!eligible && abort_xform) {
+      *abort_xform = true;
+      return false;
+    }
+
     if (was_correlated) {  // The simple equality has been decorrelated
       if (join_nest == nullptr)
         set_where_cond(nullptr);
@@ -2753,6 +2780,8 @@ bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
   } else if (base_cond->type() == Item::COND_ITEM &&
              (cond = down_cast<Item_cond *>(base_cond)) &&
              cond->functype() == Item_func::COND_AND_FUNC) {
+    Item* backup = cond->copy_andor_structure(thd);
+
     List<Item> *args = cond->argument_list();
     List_iterator<Item> li(*args);
     Item *item;
@@ -2762,6 +2791,14 @@ bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
           can_decorrelate_operator(func, sj_decor.decorrelate_only_eq())) {
         bool was_correlated;
         if (decorrelate_equality(sj_decor, func, &was_correlated)) return true;
+
+        bool eligible = is_predicate_eligible(sj_decor.sj_outer_exprs->back(), sj_decor.sj_inner_exprs->back());
+        if(!eligible && abort_xform) {
+           *abort_xform = true;
+          *ref = backup;
+          return false;
+        }
+
         if (was_correlated) li.remove();
       }
     }
@@ -3260,13 +3297,13 @@ bool Query_block::convert_subquery_to_semijoin(
                                     /*op_types=*/nullptr);

     if (subq_query_block->where_cond() &&
-        subq_query_block->decorrelate_condition(sj_decor, nullptr))
+        subq_query_block->decorrelate_condition(thd, sj_decor, nullptr))
       return true;

     if (walk_join_list(
             subq_query_block->m_table_nest, [&](Table_ref *tr) -> bool {
               return !tr->is_inner_table_of_outer_join() && tr->join_cond() &&
-                     subq_query_block->decorrelate_condition(sj_decor, tr);
+                     subq_query_block->decorrelate_condition(thd, sj_decor, tr);
             }))
       return true;
 }
@@ -5535,8 +5572,13 @@ bool Query_block::transform_table_subquery_to_join_with_derived(
     // This helps for this common pattern:
     // EXISTS(SELECT FROM it WHERE it.c=ot.c AND <condition on 'it' only>)
     const int initial_sj_inner_exprs_count = sj_inner_exprs.size();
-
-    if (subs_query_block->decorrelate_condition(sj_decor, nullptr)) return true;
+
+    bool abort = false;
+    if (subs_query_block->decorrelate_condition(thd, sj_decor, nullptr, &abort)) return true;
+    if (abort) {
+      subq->strategy = Subquery_strategy::UNSPECIFIED;
+      return false;
+    }

     // Append inner expressions of decorrelated equalities to the SELECT
     // list. Correct context info of outer expressions.
[19 Jun 2023 11:18] chen jiang
sql for setup test table

Attachment: test.sql (application/octet-stream, text), 1.43 MiB.

[19 Jun 2023 11:39] MySQL Verification Team
Hello chen jiang,

Thank you for the report and test case.
Verified as described.

regards,
Umesh