From 5e449e55554048225aaf173591ddf7fb9e7ea852 Mon Sep 17 00:00:00 2001 From: tianfengli Date: Mon, 26 Aug 2024 06:27:53 +0000 Subject: [PATCH] support row value decomposition for comparison Problem: ========= Item_row initially represented a row value. However, in the range optimizer, it cannot be divided into basic predicates or manipulated further in other places such as indexing, filtering, grouping or sorting. This limitation might also prevent some index conditions from pushing down to the storage engine. Solution: ========= We now support decomposing row value comparisons into basic comparisons. A new session sys_var, `row_comparison_decompose_threshold`, has been added to control the maximum number of elements in Item_row to be decomposed. The default value is 10. By default, we decompose all row values in comparisons as follows: (a,b) > (x,y) => (a>x) or (a=x and b>y) (a,b) = (x,y) => (a=x) and (b=y) ... and so on. as described in https://dev.mysql.com/doc/refman/8.0/en/comparison-operators.html Others: ======= 1. The row value comparison split occurs at the parser stage, making the changes to Item_row permanent. As a result, the sys_var cannot affect PS/SP once parsed. 2. The decomposing exponentially expands for comparisons other than '=', '<>', and '<=>'. This expansion increases memory consumption. To prevent excessive expansion, the sys_var acts as a limitation. 3. Recursive row value decomposing would also be deep-searched to ensure the number of items in LEFT/RIGHT Item_row is no more than `row_comparison_decompose_threshold`. --- mysql-test/r/row_value_comparison.result | 223 ++++++++++++++ mysql-test/t/row_value_comparison.test | 173 +++++++++++ sql/parse_tree_items.cc | 18 ++ sql/row_comparison_decomposer.cc | 351 +++++++++++++++++++++++ sql/sys_vars.cc | 9 +- sql/system_variables.h | 4 + 6 files changed, 777 insertions(+), 1 deletion(-) create mode 100644 mysql-test/r/row_value_comparison.result create mode 100644 mysql-test/t/row_value_comparison.test create mode 100644 sql/row_comparison_decomposer.cc diff --git a/mysql-test/r/row_value_comparison.result b/mysql-test/r/row_value_comparison.result new file mode 100644 index 00000000000..47290a4d043 --- /dev/null +++ b/mysql-test/r/row_value_comparison.result @@ -0,0 +1,223 @@ +create table t1 (id int primary key, c1 int, c2 int, key k_c (c1,c2)); +insert into t1 values (1,1,1), (2,2,2), (3,3,3), (4,4,4); +insert into t1 select id + 4, c1 +4, c2 + 4 from t1; +insert into t1 select id + 8, c1 + 8, c2 + 8 from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +set row_comparison_decompose_threshold = 0; +explain select * from t1 where (c1,c2) < (2,2); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL index NULL k_c 10 NULL 16 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2` from `test`.`t1` where ((`test`.`t1`.`c1`,`test`.`t1`.`c2`) < (2,2)) +explain format=tree select * from t1 where (c1,c2) < (2,2); +EXPLAIN +-> Filter: ((t1.c1,t1.c2) < (2,2)) (cost=***.** rows=**) + -> Covering index scan on t1 using k_c (cost=***.** rows=**) + +# +# basic test +# +set row_comparison_decompose_threshold = default; +explain select * from t1 where (c1,c2) < (2,2); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range k_c k_c 10 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2` from `test`.`t1` where ((`test`.`t1`.`c1` < 2) or ((`test`.`t1`.`c1` = 2) and (`test`.`t1`.`c2` < 2))) +explain format=tree select * from t1 where (c1,c2) < (2,2); +EXPLAIN +-> Filter: ((t1.c1 < 2) or ((t1.c1 = 2) and (t1.c2 < 2))) (cost=***.** rows=**) + -> Covering index range scan on t1 using k_c over (NULL < c1 < 2) OR (c1 = 2 AND NULL < c2 < 2) (cost=***.** rows=**) + +select * from t1 where (c1,c2) < (2,2); +id c1 c2 +1 1 1 +explain format=tree select * from t1 where (c1,c2) = (2,2); +EXPLAIN +-> Covering index lookup on t1 using k_c (c1=2, c2=2) (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2) > (2,2); +EXPLAIN +-> Filter: ((t1.c1 > 2) or ((t1.c1 = 2) and (t1.c2 > 2))) (cost=***.** rows=**) + -> Covering index range scan on t1 using k_c over (c1 = 2 AND 2 < c2) OR (2 < c1) (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2) != (2,2); +EXPLAIN +-> Filter: ((t1.c1 <> 2) or (t1.c2 <> 2)) (cost=***.** rows=**) + -> Covering index scan on t1 using k_c (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2) <> (2,2); +EXPLAIN +-> Filter: ((t1.c1 <> 2) or (t1.c2 <> 2)) (cost=***.** rows=**) + -> Covering index scan on t1 using k_c (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2) <=> (2,2); +EXPLAIN +-> Filter: ((t1.c1 <=> 2) and (t1.c2 <=> 2)) (cost=***.** rows=**) + -> Covering index lookup on t1 using k_c (c1=2, c2=2) (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2) >= (2,2); +EXPLAIN +-> Filter: ((t1.c1 > 2) or ((t1.c1 = 2) and (t1.c2 >= 2))) (cost=***.** rows=**) + -> Covering index range scan on t1 using k_c over (c1 = 2 AND 2 <= c2) OR (2 < c1) (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2) <= (2,2); +EXPLAIN +-> Filter: ((t1.c1 < 2) or ((t1.c1 = 2) and (t1.c2 <= 2))) (cost=***.** rows=**) + -> Covering index range scan on t1 using k_c over (NULL < c1 < 2) OR (c1 = 2 AND NULL < c2 <= 2) (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2,id) > (2,2,2); +EXPLAIN +-> Filter: ((t1.c1 > 2) or ((t1.c1 = 2) and (t1.c2 > 2)) or ((t1.c2 = 2) and (t1.c1 = 2) and (t1.id > 2))) (cost=***.** rows=**) + -> Covering index scan on t1 using k_c (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1,c2,id) >= (1,2,3); +EXPLAIN +-> Filter: ((t1.c1 > 1) or ((t1.c1 = 1) and (t1.c2 > 2)) or ((t1.c2 = 2) and (t1.c1 = 1) and (t1.id >= 3))) (cost=***.** rows=**) + -> Covering index scan on t1 using k_c (cost=***.** rows=**) + +drop table t1; +# +# Switch +# +create table t1(a int, b int, c int, d int); +set row_comparison_decompose_threshold = 1; +explain format=tree select * from t1 where (a,b) > (1,2); +EXPLAIN +-> Filter: ((t1.a,t1.b) > (1,2)) (cost=***.** rows=**) + -> Table scan on t1 (cost=***.** rows=**) + +set row_comparison_decompose_threshold = 2; +explain format=tree select * from t1 where (a,(b,c,d)) >= (1,(2,3,4)); +EXPLAIN +-> Filter: ((t1.a,(t1.b,t1.c,t1.d)) >= (1,(2,3,4))) (cost=***.** rows=**) + -> Table scan on t1 (cost=***.** rows=**) + +explain format=tree select * from t1 where (a,(b,c),(d)) >= (1,(2,3),4); +EXPLAIN +-> Filter: ((t1.a,(t1.b,t1.c),t1.d) >= (1,(2,3),4)) (cost=***.** rows=**) + -> Table scan on t1 (cost=***.** rows=**) + +drop table t1; +set row_comparison_decompose_threshold = default; +# +# Nested row value +# +create table t3(a int, b int, c int, d int, e int); +explain format=tree select * from t3 where (c,(b,a),(d,e)) > (1,(2,3),(4,5)); +EXPLAIN +-> Filter: ((t3.c > 1) or ((t3.c = 1) and ((t3.b > 2) or ((t3.b = 2) and (t3.a > 3)))) or ((t3.a = 3) and (t3.b = 2) and (t3.c = 1) and ((t3.d > 4) or ((t3.d = 4) and (t3.e > 5))))) (cost=***.** rows=**) + -> Table scan on t3 (cost=***.** rows=**) + +explain format=tree select * from t3 where (c,(b,a),(d,e)) = (1,(2,3),(4,5)); +EXPLAIN +-> Filter: ((t3.e = 5) and (t3.d = 4) and (t3.a = 3) and (t3.b = 2) and (t3.c = 1)) (cost=***.** rows=**) + -> Table scan on t3 (cost=***.** rows=**) + +explain format=tree select * from t3 where (a,((b,c),d,(e))) >= (1,((2,3),4,5)); +EXPLAIN +-> Filter: ((t3.a > 1) or ((t3.a = 1) and ((t3.b > 2) or ((t3.b = 2) and (t3.c >= 3)) or ((t3.c = 3) and (t3.b = 2) and (t3.d > 4)) or ((t3.d = 4) and (t3.c = 3) and (t3.b = 2) and (t3.e >= 5))))) (cost=***.** rows=**) + -> Table scan on t3 (cost=***.** rows=**) + +explain format=tree select * from t3 where (((((a),b),c),d),e) <= (((((1),2),3),4),5); +EXPLAIN +-> Filter: ((t3.a < 1) or ((t3.a = 1) and (t3.b <= 2)) or ((t3.b = 2) and (t3.a = 1) and (t3.c <= 3)) or ((t3.c = 3) and (t3.b = 2) and (t3.a = 1) and (t3.d <= 4)) or ((t3.d = 4) and (t3.c = 3) and (t3.b = 2) and (t3.a = 1) and (t3.e <= 5))) (cost=***.** rows=**) + -> Table scan on t3 (cost=***.** rows=**) + +drop table t3; +# +# abnormal +# +create table t2(a int, b int, c int); +insert into t2 values (1,1,1), (2,2,2), (3,3,3); +analyze table t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +explain format=tree select * from t2 where (a,b) > (1,1,1); +ERROR 21000: Operand should contain 2 column(s) +explain format=tree select * from t2 where (a) < (1); +EXPLAIN +-> Filter: (t2.a < 1) (cost=***.** rows=**) + -> Table scan on t2 (cost=***.** rows=**) + +explain format=tree select * from t2 where (a,(b,c)) >= (1,(2,3,4)); +ERROR 21000: Operand should contain 2 column(s) +explain format=tree select * from t2 where (a,(b,c)) > ((2,2),2); +ERROR 21000: Operand should contain 1 column(s) +explain format=tree select * from t2 where (1, null) > (2,3); +EXPLAIN +-> Zero rows (Impossible WHERE) (cost=***.** rows=**) + +explain format=tree select * from t2 where (2, (select min(a) from t2)) > (b, 3); +EXPLAIN +-> Filter: (2 > t2.b) (cost=***.** rows=**) + -> Table scan on t2 (cost=***.** rows=**) + +explain format=tree select * from t2 where (3, (select min(a) from t2)) > (b, -1); +EXPLAIN +-> Filter: ((3 > t2.b) or (t2.b = 3)) (cost=***.** rows=**) + -> Table scan on t2 (cost=***.** rows=**) + +explain format=tree select a,sum(b) from t2 group by 1 having (sum(b),a) > (1,2); +EXPLAIN +-> Filter: ((sum(t2.b) > 1) or ((sum(t2.b) = 1) and (t2.a > 2))) + -> Table scan on + -> Aggregate using temporary table + -> Table scan on t2 (cost=***.** rows=**) + +explain format=tree select a, (sum(b), 1) > (1,1) from t2 group by 1; +EXPLAIN +-> Table scan on + -> Aggregate using temporary table + -> Table scan on t2 (cost=***.** rows=**) + +explain format=tree select a,sum(b) from t2 group by 1 order by (sum(b),a) > (1,2); +EXPLAIN +-> Sort: ((sum(t2.b) > 1) or ((sum(t2.b) = 1) and (t2.a > 2))) + -> Table scan on + -> Aggregate using temporary table + -> Table scan on t2 (cost=***.** rows=**) + +explain format=tree select * from t2 where (a,b) = (select a,b from t2 where c=4); +EXPLAIN +-> Filter: ((t2.a,t2.b) = (select #2)) (cost=***.** rows=**) + -> Table scan on t2 (cost=***.** rows=**) + -> Select #2 (subquery in condition; run only once) + -> Filter: (t2.c = 4) (cost=***.** rows=**) + -> Table scan on t2 (cost=***.** rows=**) + +explain format=tree select * from t2 where ((a,b)) > (select a,b from t2 where c=4); +EXPLAIN +-> Filter: ((t2.a,t2.b) > (select #2)) (cost=***.** rows=**) + -> Table scan on t2 (cost=***.** rows=**) + -> Select #2 (subquery in condition; run only once) + -> Filter: (t2.c = 4) (cost=***.** rows=**) + -> Table scan on t2 (cost=***.** rows=**) + +create table t1(id int, c1 int, c2 int); +insert into t1 values (1,1,1), (2,2,2), (3,3,3); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +explain format=tree select * from t1 where (c2,c1)>(now(),now()); +EXPLAIN +-> Filter: ((t1.c2 > (now())) or ((t1.c2 = (now())) and (t1.c1 > (now())))) (cost=***.** rows=**) + -> Table scan on t1 (cost=***.** rows=**) + +explain format=tree select * from t1 where (c1+now()*id,c2)>(2,now()-id); +EXPLAIN +-> Filter: (((t1.c1 + ((now()) * t1.id)) > 2) or (((t1.c1 + ((now()) * t1.id)) = 2) and (t1.c2 > ((now()) - t1.id)))) (cost=***.** rows=**) + -> Table scan on t1 (cost=***.** rows=**) + +explain format=tree select * from t1 left join t2 on ((null,t1.c1+t2.c)>(1,2)); +EXPLAIN +-> Table scan on t1 (cost=***.** rows=**) + +select * from t1 left join t2 on ((null,t1.c1+t2.c)>(1,2)) order by 1; +id c1 c2 a b c +1 1 1 NULL NULL NULL +2 2 2 NULL NULL NULL +3 3 3 NULL NULL NULL +drop table t2, t1; +set row_comparison_decompose_threshold = default; diff --git a/mysql-test/t/row_value_comparison.test b/mysql-test/t/row_value_comparison.test new file mode 100644 index 00000000000..b50741d0165 --- /dev/null +++ b/mysql-test/t/row_value_comparison.test @@ -0,0 +1,173 @@ + +create table t1 (id int primary key, c1 int, c2 int, key k_c (c1,c2)); +insert into t1 values (1,1,1), (2,2,2), (3,3,3), (4,4,4); +insert into t1 select id + 4, c1 +4, c2 + 4 from t1; +insert into t1 select id + 8, c1 + 8, c2 + 8 from t1; +analyze table t1; + + +let $query=select * from t1 where (c1,c2) < (2,2); + +set row_comparison_decompose_threshold = 0; +eval explain $query; +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +--echo # +--echo # basic test +--echo # +set row_comparison_decompose_threshold = default; +eval explain $query; +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +--sorted_result +eval $query; + +let $query=select * from t1 where (c1,c2) = (2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2) > (2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2) != (2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2) <> (2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2) <=> (2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2) >= (2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2) <= (2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2,id) > (2,2,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (c1,c2,id) >= (1,2,3); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +drop table t1; + +--echo # +--echo # Switch +--echo # +create table t1(a int, b int, c int, d int); + +set row_comparison_decompose_threshold = 1; +let $query=select * from t1 where (a,b) > (1,2); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +set row_comparison_decompose_threshold = 2; +let $query=select * from t1 where (a,(b,c,d)) >= (1,(2,3,4)); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +let $query=select * from t1 where (a,(b,c),(d)) >= (1,(2,3),4); +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +eval explain format=tree $query; + +drop table t1; +set row_comparison_decompose_threshold = default; + +--echo # +--echo # Nested row value +--echo # +create table t3(a int, b int, c int, d int, e int); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t3 where (c,(b,a),(d,e)) > (1,(2,3),(4,5)); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t3 where (c,(b,a),(d,e)) = (1,(2,3),(4,5)); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t3 where (a,((b,c),d,(e))) >= (1,((2,3),4,5)); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t3 where (((((a),b),c),d),e) <= (((((1),2),3),4),5); + +drop table t3; + +--echo # +--echo # abnormal +--echo # +create table t2(a int, b int, c int); +insert into t2 values (1,1,1), (2,2,2), (3,3,3); +analyze table t2; + + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +--error 1241 +explain format=tree select * from t2 where (a,b) > (1,1,1); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t2 where (a) < (1); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +--error 1241 +explain format=tree select * from t2 where (a,(b,c)) >= (1,(2,3,4)); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +--error 1241 +explain format=tree select * from t2 where (a,(b,c)) > ((2,2),2); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t2 where (1, null) > (2,3); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t2 where (2, (select min(a) from t2)) > (b, 3); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t2 where (3, (select min(a) from t2)) > (b, -1); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select a,sum(b) from t2 group by 1 having (sum(b),a) > (1,2); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select a, (sum(b), 1) > (1,1) from t2 group by 1; + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select a,sum(b) from t2 group by 1 order by (sum(b),a) > (1,2); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t2 where (a,b) = (select a,b from t2 where c=4); + +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t2 where ((a,b)) > (select a,b from t2 where c=4); + + +create table t1(id int, c1 int, c2 int); +insert into t1 values (1,1,1), (2,2,2), (3,3,3); +analyze table t1; + +# const table +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t1 where (c2,c1)>(now(),now()); + +# now() is copyed when decomposition, item cache ensures that consistency. +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t1 where (c1+now()*id,c2)>(2,now()-id); + +# without row decomposition, the predicate act as an hash join extra condition. +--replace_regex /cost=[0-9.e\-]+\ rows=[0-9.e\-]+/cost=***.** rows=**/ +explain format=tree select * from t1 left join t2 on ((null,t1.c1+t2.c)>(1,2)); +select * from t1 left join t2 on ((null,t1.c1+t2.c)>(1,2)) order by 1; + +drop table t2, t1; + +set row_comparison_decompose_threshold = default; \ No newline at end of file diff --git a/sql/parse_tree_items.cc b/sql/parse_tree_items.cc index baffb22f24f..99cc9879153 100644 --- a/sql/parse_tree_items.cc +++ b/sql/parse_tree_items.cc @@ -54,6 +54,7 @@ #include "sql/system_variables.h" #include "sql/table.h" #include "sql/trigger_def.h" +#include "sql/row_comparison_decomposer.cc" // decompose_row_comparison #include "sql_string.h" #include "template_utils.h" @@ -170,6 +171,23 @@ bool PTI_comp_op::do_itemize(Parse_context *pc, Item **res) { if (super::do_itemize(pc, res) || left->itemize(pc, &left) || right->itemize(pc, &right)) return true; + /* + To enable the range optimizer to generate index range conditions + and pushdown index conditions from row value comparisons, we try + to optimize the comparison of row values by splitting them + if applicable. Firstly, all conditions must be met: + 1) switch is enabled, + 2) both left and right are row values. + */ + if (current_thd->variables.row_comparison_decompose_threshold && + left->type() == Item::ROW_ITEM && + right->type() == Item::ROW_ITEM) { + *res = decompose_row_comparison(pc, left, right, boolfunc2creator); + if (*res != nullptr) { + return false; + } + // continue if row value split isn't applicable or failed. + } *res = (*boolfunc2creator)(false)->create(left, right); return *res == nullptr; diff --git a/sql/row_comparison_decomposer.cc b/sql/row_comparison_decomposer.cc new file mode 100644 index 00000000000..0be7be250a2 --- /dev/null +++ b/sql/row_comparison_decomposer.cc @@ -0,0 +1,351 @@ +/* Copyright (c) 2009, 2024, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/** + @file row_comparison_decomposer.cc + + Implementation of row value comparison decomposition functions. Enabled if + `row_comparison_decompose_threshold` is set to larger than 0. + + @details + + The purpose of this operation is to optimize query performance and enhance + query flexibility. For example, by decomposing the row value comparison + (a, b) > (1, 2) into a > 1 OR (a = 1 AND b > 2), the database can optimize + and execute queries more efficiently. + + - Query optimization: + The decomposed query may be more suitable for the database's + query optimizer. For example, it can take advantage of indexes for more + efficient querying or apply query optimization rules more effectively. + + - Query flexibility: + The decomposed query may be easier to manipulate further, such as + filtering, grouping, or sorting. + + When the row_comparison_decompose_threshold option is enabled (>0), all + PTI_comp_op nodes will attempt to decompose row value comparisons. + This can provide more optimization opportunities for the database, + thereby improving query performance. + + All the decomposition rules as shown in + https://dev.mysql.com/doc/refman/8.0/en/comparison-operators.html +*/ + +#include "sql/comp_creator.h" +#include "sql/current_thd.h" +#include "sql/item.h" +#include "sql/item_cmpfunc.h" + +class THD; +enum RowValueCompareType { EQ, EQUAL, NE, LT, GT, LE, GE, OP_UNKNOWN }; + +static inline bool is_invalid_row_value_comparison(Item *left, Item *right, + uint *max_cols); +static inline Item *decompose_row_value_op_straight( + Parse_context *pc, Item *left, Item *right, + RowValueCompareType type); +static inline Item *decompose_row_value_op_combine( + Parse_context *pc, Item *left, Item *right, + RowValueCompareType type); +static inline Item *decompose_function(Parse_context *pc, + RowValueCompareType type, + bool is_recursive, bool is_last, + Item *left, Item *right); + +static inline RowValueCompareType get_type(const char *sym) { + if (strcmp(sym, "=") == 0) { + return RowValueCompareType::EQ; + } else if (strcmp(sym, "<=>") == 0) { + return RowValueCompareType::EQUAL; + } else if (strcmp(sym, "<>") == 0) { + return RowValueCompareType::NE; + } else if (strcmp(sym, "<") == 0) { + return RowValueCompareType::LT; + } else if (strcmp(sym, ">") == 0) { + return RowValueCompareType::GT; + } else if (strcmp(sym, "<=") == 0) { + return RowValueCompareType::LE; + } else if (strcmp(sym, ">=") == 0) { + return RowValueCompareType::GE; + } + assert(0); + return RowValueCompareType::OP_UNKNOWN; +} + +using decomposer_func = Item *(*)(Parse_context *, Item *, Item *, + RowValueCompareType); +static std::vector decomposer_func_factory = { + {// RowValueCompareType::EQ, + decompose_row_value_op_straight}, + {// RowValueCompareType::EQUAL, + decompose_row_value_op_straight}, + {// RowValueCompareType::NE, + decompose_row_value_op_straight}, + {// RowValueCompareType::LT, + decompose_row_value_op_combine}, + {// RowValueCompareType::GT, + decompose_row_value_op_combine}, + {// RowValueCompareType::LE, + decompose_row_value_op_combine}, + {// RowValueCompareType::GE, + decompose_row_value_op_combine}, +}; + +/** + This function set is used to create the comparison function for the + corresponding decomposed element in the row value comparison, as the following + format: + comparator_func_factory[RowValueCompareType][0] for NOT last element, + comparator_func_factory[RowValueCompareType][1] for the last element. + Note that only GE and LE need to handle the last element differently. + (left `func` right) is decomposed into: + (left[0] `func_without_eq` right[0]) OR ... OR + (left[0] = right[0] and .. and left[n-1] `func_without_eq` right[n-1]) OR + (left[0] = right[0] and .. left[n] `func` right[n]) +*/ +static std::vector> comparator_func_factory = { + {&eq_creator, &eq_creator}, // RowValueCompareType::EQ + {&equal_creator, &equal_creator}, // RowValueCompareType::EQUAL + {&ne_creator, &ne_creator}, // RowValueCompareType::NE + {<_creator, <_creator}, // RowValueCompareType::LT + {>_creator, >_creator}, // RowValueCompareType::GT + {<_creator, &le_creator}, // RowValueCompareType::LE + {>_creator, &ge_creator}, // RowValueCompareType::GE +}; + +static inline Item *decompose_function(Parse_context *pc, + RowValueCompareType type, + bool is_recursive, bool is_last, + Item *left, Item *right) { + if (is_recursive) { + return decomposer_func_factory[type](pc, left, right, type); + } else { + return comparator_func_factory[type][is_last]->create(left, right); + } +} + +/** + Check if this LEFT item tree and RIGHT item tree are invalid for row value + decomposition recursively. + Return true if any mismatch is found or the number of columns exceeds the + maximum limit. +*/ +static inline bool is_invalid_row_value_comparison(Item *left, Item *right, uint *max_cols) { + if (left->type() != Item::ROW_ITEM || right->type() != Item::ROW_ITEM || + left->cols() != right->cols() || + left->cols() <= 1 || left->cols() > *max_cols) { + return true; + } + + for (uint i = 0; i < left->cols(); i++) { + if (left->element_index(i)->type() == Item::ROW_ITEM && + is_invalid_row_value_comparison(left->element_index(i), + right->element_index(i), + max_cols)) { + return true; + } else { + if (*max_cols == 0) { + return true; + } + *max_cols = *max_cols - 1; + } + } + return false; +} + +/** + Decompose row value comparison into multiple single value comparisons. + Respectively, we handle different comparison operators for row values + differently. For `=`, `<=>`: + - (a, b) = (x, y) is decomposed into a = x AND b = y. + This situation is handled in `decompose_row_value_op_straight()`. + + For `<>`: + - (a, b) <> (x, y) is decomposed into a <> x OR b <> y. + This situation is handled in `decompose_row_value_op_straight()`. + + For `>`, `<`: + - (a, b) > (x, y) is decomposed into a > x OR (a = x AND b > y). + same as `<`. + This situation is handled in `decompose_row_value_op_combine()`. + We need to build the new equal function for the prefix elements. + + For `>=`, `<=`: + - (a, b) >= (x, y) is decomposed into a > x OR (a = x AND b >= y). + same as `<=`. + This situation is handled in `decompose_row_value_op_combine()`. + We also need to build the new equal function as above, + but differently, we must seperate the equal operator from the given + function until the last element. + + Note that the first element of the row value is handled differently + according to + https://dev.mysql.com/doc/refman/8.0/en/comparison-operators.html + + @param pc The parse context. + @param left The left operand of the comparison. + @param right The right operand of the comparison. + @param func The comparison function. + @return The decomposed comparison. +*/ +Item *decompose_row_comparison(Parse_context *pc, Item *left, Item *right, + chooser_compare_func_creator func) { + DBUG_TRACE; + assert(left->type() == Item::ROW_ITEM && right->type() == Item::ROW_ITEM); + + uint max_allowed_cols = + current_thd->variables.row_comparison_decompose_threshold; + assert(max_allowed_cols > 0); + + // Do a detailed check to see if the row value comparison can be decomposed. + if (is_invalid_row_value_comparison(left, right, &max_allowed_cols)) { + return nullptr; + } + + Item *res = nullptr; + + RowValueCompareType type = get_type((*func)(false)->symbol(false)); + assert(type != RowValueCompareType::OP_UNKNOWN); + + res = decompose_function(pc, type, true, false, left, right); + + return res; +} + +static inline Item *decompose_row_value_op_straight(Parse_context *pc, + Item *left, Item *right, + RowValueCompareType type) { + assert(type == RowValueCompareType::EQ || type == RowValueCompareType::NE || + type == RowValueCompareType::EQUAL); + + assert(left->type() == right->type() && left->cols() == right->cols()); + + Item_cond *result; + if (type != RowValueCompareType::NE) { + result = new (pc->mem_root) Item_cond_and; + } else { + result = new (pc->mem_root) Item_cond_or; + } + if (result == nullptr) { + return nullptr; + } + + uint cols = left->cols(); + for (uint i = 0; i < cols; i++) { + Item *item_i = nullptr; + + bool is_recursive = false; + // Recursively decompose row value comparison. + if (left->element_index(i)->type() == Item::ROW_ITEM) { + assert(right->element_index(i)->type() == Item::ROW_ITEM); + is_recursive = true; + } + + item_i = + decompose_function(pc, type, is_recursive, (i == cols - 1), + left->element_index(i), right->element_index(i)); + + if (item_i == nullptr) { + return nullptr; + } + result->add(item_i); + } + + return result; +} + +static inline Item *decompose_row_value_op_combine(Parse_context *pc, + Item *left, Item *right, + RowValueCompareType type) { + assert(type == RowValueCompareType::LT || type == RowValueCompareType::GT || + type == RowValueCompareType::LE || type == RowValueCompareType::GE); + + assert(left->type() == right->type() && left->cols() == right->cols()); + + Item_cond_or *result = new (pc->mem_root) Item_cond_or; + if (result == nullptr) { + return nullptr; + } + + Item_cond_and *and_prefix = nullptr; + uint cols = left->cols(); + for (uint i = 0; i < cols; i++) { + Item *item_i = nullptr; + Item *tuple_i = nullptr; + + bool is_recursive = false; + // Recursively decompose row value for comparison. + if (left->element_index(i)->type() == Item::ROW_ITEM) { + assert(right->element_index(i)->type() == Item::ROW_ITEM); + is_recursive = true; + } + + item_i = + decompose_function(pc, type, is_recursive, (i == cols - 1), + left->element_index(i), right->element_index(i)); + + if (item_i == nullptr) { + return nullptr; + } + + if (i == 0) { + tuple_i = item_i; + } else { + assert(and_prefix); + // must have a deep copy here. + tuple_i = and_prefix->copy_andor_structure(pc->thd); + if (tuple_i) { + down_cast(tuple_i)->add(item_i); + } + } + if (tuple_i == nullptr) { + return nullptr; + } + result->add(tuple_i); + + // Skip build and_prefix for the last element. + if (i == cols - 1) { + break; + } + // build new equal function for this `left[i] = right[i]`. + Item_func *eq_i = new (pc->mem_root) + Item_func_eq(left->element_index(i), right->element_index(i)); + // inheritate the previous and_prefix + Item_cond_and *new_prefix; + if (and_prefix == nullptr) { + new_prefix = new (pc->mem_root) Item_cond_and(); + } else { + new_prefix = + new (pc->mem_root) Item_cond_and(*and_prefix->argument_list()); + } + + if (new_prefix == nullptr || eq_i == nullptr) { + return nullptr; + } + + // update and_prefix with new equal function. + new_prefix->add(eq_i); + and_prefix = new_prefix; + } + return result; +} diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 6ada71fcc29..bd9ad85e13f 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -7518,4 +7518,11 @@ Sys_var_bool Sys_restrict_fk_on_non_standard_key( NON_PERSIST SESSION_VAR(restrict_fk_on_non_standard_key), CMD_LINE(OPT_ARG), DEFAULT(true), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(restrict_fk_on_non_standard_key_check), ON_UPDATE(nullptr)); -} // namespace \ No newline at end of file +} // namespace + +Sys_var_ulong Sys_row_comparison_decompose_threshold( + "row_comparison_decompose_threshold", + "Control the maximum number of values in a row to decompose for comparison.", + HINT_UPDATEABLE SESSION_VAR(row_comparison_decompose_threshold), + CMD_LINE(OPT_ARG), VALID_RANGE(0, 1000), DEFAULT(10), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(nullptr), ON_UPDATE(nullptr)); \ No newline at end of file diff --git a/sql/system_variables.h b/sql/system_variables.h index a248afc9558..30d3136c1df 100644 --- a/sql/system_variables.h +++ b/sql/system_variables.h @@ -455,6 +455,10 @@ struct System_variables { */ ulong terminology_use_previous; + /** + @sa Sys_row_comparison_decompose_threshold + */ + ulong row_comparison_decompose_threshold; /** @sa Sys_connection_memory_limit */ -- 2.19.1