Description:
When subquery_to_derived is turned on, the innermost IN subquery is converted to a semi-join, followed by the outer IN subquery being converted to a derived table. The sj_on_cond of the innermost semi-join is then transformed into a WHERE condition. If the relevant conditions in the WHERE condition are associated with both the outer table and the inner table of the semi-join, they will be pulled up to the outermost level. This will change the SQL semantics and is not a valid transformation. The bug manifests as an incorrect result set.
How to repeat:
source test.sql
mysql> SET optimizer_switch = 'subquery_to_derived=on';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT ( NULL ) AS A FROM select_one_range_p t1 WHERE datetime_test IN ( SELECT datetime_test FROM select_two t2 WHERE t1.datetime_test IN ( SELECT datetime_test FROM select_one_latin1 t3 WHERE t2.datetime_test = t3.datetime_test OR 82 AND smallint_test ) ) OR !25;
+------+
| A |
+------+
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
| NULL |
+------+
7 rows in set, 1 warning (0.17 sec)
SET optimizer_switch = 'subquery_to_derived=off';
mysql> SELECT ( NULL ) AS A FROM select_one_range_p t1 WHERE datetime_test IN ( SELECT datetime_test FROM select_two t2 WHERE t1.datetime_test IN ( SELECT datetime_test FROM select_one_latin1 t3 WHERE t2.datetime_test = t3.datetime_test OR 82 AND smallint_test ) ) OR !25;
| NULL |
| NULL |
| NULL |
+------+
100 rows in set, 1 warning (0.13 sec)
Suggested fix:
diff --git a/sql/sql_resolver.cc b/sql/sql_resolver.cc
index 785f2b6228a..f3c79661344 100644
--- a/sql/sql_resolver.cc
+++ b/sql/sql_resolver.cc
@@ -2550,6 +2550,7 @@ bool Query_block::build_sj_cond(THD *thd, NESTED_JOIN *nested_join,
/// Context object used by semijoin equality decorrelation code.
class Semijoin_decorrelation {
+public:
mem_root_deque<Item *> *sj_outer_exprs, *sj_inner_exprs;
/// If nullptr: only a=b is decorrelated.
/// Otherwise, a OP b is decorrelated for OP in <>, >=, >, <=, <, and
@@ -2657,6 +2658,23 @@ static inline bool can_decorrelate_operator(Item_func *func, bool only_eq) {
}
}
+bool Query_block::is_predicate_eligible(Item* outer, Item* inner) {
+ table_map sj_inner_tables{0};
+ if (has_sj_nests && outer->is_outer_reference()) {
+ walk_join_list(m_table_nest, [&sj_inner_tables](Table_ref *tr) {
+ if (tr->nested_join && tr->is_sj_or_aj_nest()) {
+ for (auto sj_inner : tr->nested_join->m_tables) {
+ // only allow innner is base table, not nested_join table.
+ if (!sj_inner->nested_join) sj_inner_tables |= sj_inner->map();
+ }
+ }
+ return false;
+ });
+
+ if (inner->used_tables() & sj_inner_tables) return false;
+ }
+ return true;
+}
/**
Decorrelate the WHERE clause or a join condition of a subquery used in
an IN or EXISTS predicate.
@@ -2730,10 +2748,12 @@ static inline bool can_decorrelate_operator(Item_func *func, bool only_eq) {
such predicates will not be decorrelated.
*/
-bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
- Table_ref *join_nest) {
+bool Query_block::decorrelate_condition(THD* thd, Semijoin_decorrelation &sj_decor,
+ Table_ref *join_nest, bool* abort_xform) {
Item *base_cond =
join_nest == nullptr ? where_cond() : join_nest->join_cond();
+ Item** ref = join_nest == nullptr? where_cond_ref() : join_nest->join_cond_ref();
+
Item_cond *cond;
Item_func *func;
@@ -2744,6 +2764,13 @@ bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
can_decorrelate_operator(func, sj_decor.decorrelate_only_eq())) {
bool was_correlated;
if (decorrelate_equality(sj_decor, func, &was_correlated)) return true;
+
+ bool eligible = is_predicate_eligible(sj_decor.sj_outer_exprs->back(), sj_decor.sj_inner_exprs->back());
+ if(!eligible && abort_xform) {
+ *abort_xform = true;
+ return false;
+ }
+
if (was_correlated) { // The simple equality has been decorrelated
if (join_nest == nullptr)
set_where_cond(nullptr);
@@ -2753,6 +2780,8 @@ bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
} else if (base_cond->type() == Item::COND_ITEM &&
(cond = down_cast<Item_cond *>(base_cond)) &&
cond->functype() == Item_func::COND_AND_FUNC) {
+ Item* backup = cond->copy_andor_structure(thd);
+
List<Item> *args = cond->argument_list();
List_iterator<Item> li(*args);
Item *item;
@@ -2762,6 +2791,14 @@ bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
can_decorrelate_operator(func, sj_decor.decorrelate_only_eq())) {
bool was_correlated;
if (decorrelate_equality(sj_decor, func, &was_correlated)) return true;
+
+ bool eligible = is_predicate_eligible(sj_decor.sj_outer_exprs->back(), sj_decor.sj_inner_exprs->back());
+ if(!eligible && abort_xform) {
+ *abort_xform = true;
+ *ref = backup;
+ return false;
+ }
+
if (was_correlated) li.remove();
}
}
@@ -3260,13 +3297,13 @@ bool Query_block::convert_subquery_to_semijoin(
/*op_types=*/nullptr);
if (subq_query_block->where_cond() &&
- subq_query_block->decorrelate_condition(sj_decor, nullptr))
+ subq_query_block->decorrelate_condition(thd, sj_decor, nullptr))
return true;
if (walk_join_list(
subq_query_block->m_table_nest, [&](Table_ref *tr) -> bool {
return !tr->is_inner_table_of_outer_join() && tr->join_cond() &&
- subq_query_block->decorrelate_condition(sj_decor, tr);
+ subq_query_block->decorrelate_condition(thd, sj_decor, tr);
}))
return true;
}
@@ -5535,8 +5572,13 @@ bool Query_block::transform_table_subquery_to_join_with_derived(
// This helps for this common pattern:
// EXISTS(SELECT FROM it WHERE it.c=ot.c AND <condition on 'it' only>)
const int initial_sj_inner_exprs_count = sj_inner_exprs.size();
-
- if (subs_query_block->decorrelate_condition(sj_decor, nullptr)) return true;
+
+ bool abort = false;
+ if (subs_query_block->decorrelate_condition(thd, sj_decor, nullptr, &abort)) return true;
+ if (abort) {
+ subq->strategy = Subquery_strategy::UNSPECIFIED;
+ return false;
+ }
// Append inner expressions of decorrelated equalities to the SELECT
// list. Correct context info of outer expressions.