Description:
In the hypergraph optimizer during making join optimization, the join conditions are obtained from tl->join_cond_optim(), and then constant folding optimization is attempted. However, in some cases, constant folding may modify certain conditions in tl->join_cond_optim(), resulting in the final conditions after the hypergraph optimizer being logically inequivalent to the original conditions.
Relevant code logic:
```
MakeRelationalExpressionFromJoinList:
if (tl->join_cond_optim() != nullptr) {
Item *join_cond = EarlyExpandMultipleEquals(tl->join_cond_optim(),
join->tables_in_subtree);
ExtractConditions(join_cond, &join->join_conditions);
bool always_false = false;
EarlyNormalizeConditions(thd, join, &join->join_conditions,
&always_false);
ReorderConditions(&join->join_conditions);
}
```
In EarlyNormalizeConditions, the join conditions undergo constant folding optimization. If certain conditions are reduced to Item::COND_TRUE, it may remove such condition from tl->join_cond_optim().
In the current MySQL implementation, since tl->join_cond_optim() is no longer used, this does not cause any issues. However, in some plugin development, if tl->join_cond_optim() is relied upon, logically inequivalent conditions may be used, ultimately leading to incorrect results.
How to repeat:
Test SQL:
-------------------------
CREATE TABLE t1 (
id_col INT PRIMARY KEY,
smallint_col SMALLINT,
int_col INT
);
CREATE TABLE t2 (
id_col INT PRIMARY KEY,
smallint_col SMALLINT,
int_col INT
);
SET optimizer_switch='hypergraph_optimizer=on';
SELECT count(ref_1.int_col)
FROM t1 AS ref_0
LEFT JOIN t2 AS ref_1
ON ( ( ref_0.smallint_col IS NULL )
OR ( ref_0.id_col IS NOT NULL ) );
GDB print changed content:
-------------------------
step1: before EarlyNormalizeConditions, the tl->join_cond_optim() is 'AND(a, b)':
| if (tl->join_cond_optim() != nullptr) {
| Item *join_cond = EarlyExpandMultipleEquals(tl->join_cond_optim(),
| join->tables_in_subtree);
| ExtractConditions(join_cond, &join->join_conditions);
| bool always_false = false;
+-----> EarlyNormalizeConditions(thd, join, &join->join_conditions,
| &always_false);
| ReorderConditions(&join->join_conditions);
| }
| ret = join;
| }
| return ret;
| }
|
(gdb) p ItemToString(tl->join_cond_optim())
$1 = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>},
_M_p = 0x7fc7c59620c0 "((ref_0.smallint_col is null) or (ref_0.id_col is not null))"}, _M_string_length = 60, {_M_local_buf = "<\000\000\000\000\000\000\000?U?\025\310\177\000",
_M_allocated_capacity = 60}}
step2: after EarlyNormalizeConditions, the tl->join_cond_optim() is the 'a' only:
| if (tl->join_cond_optim() != nullptr) {
| Item *join_cond = EarlyExpandMultipleEquals(tl->join_cond_optim(),
| join->tables_in_subtree);
| ExtractConditions(join_cond, &join->join_conditions);
| bool always_false = false;
| EarlyNormalizeConditions(thd, join, &join->join_conditions,
| &always_false);
+-----> ReorderConditions(&join->join_conditions);
| }
| ret = join;
| }
| return ret;
| }
|
(gdb) p ItemToString(tl->join_cond_optim())
$2 = {static npos = 18446744073709551615, _M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>},
_M_p = 0x7fc7c72040c0 "((ref_0.smallint_col is null))"}, _M_string_length = 30, {_M_local_buf = "\036\000\000\000\000\000\000\000?U?\025\310\177\000", _M_allocated_capacity = 30}}
Suggested fix:
After EarlyNormalizeConditions is completed, tl->join_cond_optim() needs to be reset based on join->join_conditions to ensure logical equivalence.
index f81d5b8384e..da4ecca4e42 100644
--- a/sql/join_optimizer/make_join_hypergraph.cc
+++ b/sql/join_optimizer/make_join_hypergraph.cc
@@ -350,7 +350,7 @@ RelationalExpression *MakeRelationalExpressionFromJoinList(
RelationalExpression *ret = nullptr;
for (auto it = join_list->rbegin(); it != join_list->rend();
++it) { // The list goes backwards.
- const Table_ref *tl = *it;
+ Table_ref *tl = *it;
if (ret == nullptr) {
// The first table in the list.
ret = MakeRelationalExpression(thd, query_block, tl);
@@ -400,6 +400,19 @@ RelationalExpression *MakeRelationalExpressionFromJoinList(
EarlyNormalizeConditions(thd, join, &join->join_conditions,
&always_false);
ReorderConditions(&join->join_conditions);
+
+ // If constant folding optimization removes partial join_cond, such as
+ // COND_TRUE for OR, or COND_FALSE for AND, join_cond_optim should be
+ // updated with join->join_conditions.
+ if (thd->secondary_engine_optimization() ==
+ Secondary_engine_optimization::SECONDARY) {
+ List<Item> combined_join_cond;
+ for (Item *cond : join->join_conditions) {
+ combined_join_cond.push_back(cond);
+ }
+ tl->set_join_cond_optim(new (thd->mem_root)
+ Item_cond_and(combined_join_cond));
+ }
}
ret = join;
}