Description:
Optimizer trace for the following query (found in subquery_sj.inc):
explain select t21.* from t21,t22 where t21.a = t22.a and t22.a in
(select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
shows that optimizer calculates lower cost with duplicate elimination than without for MaterializedScan and DuplicateWeedout.
That should not be possible since the duplicate elimination will have a cost. Here is the relevant part of optimizer trace for the query:
"rest_of_plan": [\
{\
"plan_prefix": [\
"`t11`",\
"`t12`",\
"`t22`"\
],\
"table": "`t21`",\
"best_access_path": {\
"considered_access_paths": [\
{\
"rows_to_scan": 26,\
"access_type": "scan",\
"using_join_cache": true,\
"buffers_needed": 1,\
"resulting_rows": 26,\
"cost": 2.352,\
"chosen": true\
}\
]\
},\
"condition_filtering_pct": 10,\
"rows_for_plan": 0.676,\
"cost_for_plan": 14.227,\
"semijoin_strategy_choice": [\
{\
"strategy": "MaterializeScan",\
"recalculate_access_paths_and_cost": {\
"tables": [\
{\
"table": "`t22`",\
"best_access_path": {\
"considered_access_paths": [\
{\
"rows_to_scan": 26,\
"access_type": "scan",\
"using_join_cache": true,\
"buffers_needed": 1,\
"resulting_rows": 2.6,\
"cost": 5.8101,\
"chosen": true\
}\
]\
}\
},\
{\
"table": "`t21`",\
"best_access_path": {\
"considered_access_paths": [\
{\
"rows_to_scan": 26,\
"access_type": "scan",\
"using_join_cache": true,\
"buffers_needed": 1,\
"resulting_rows": 26,\
"cost": 1.338,\
"chosen": true\
}\
]\
}\
}\
]\
},\
"cost": 13.248,\
"rows": 0.676,\
"duplicate_tables_left": false,\
"chosen": true\
},\
{\
"strategy": "DuplicatesWeedout",\
"cost": 13.317,\
"rows": 0.676,\
"duplicate_tables_left": false,\
"chosen": false\
}\
],\
"sort_cost": 0.676,\
"new_cost_for_plan": 13.924,\
"chosen": true\
}\
]\
}\
]\
},\
As we can see, the cost of an ordinary join is 14.227 while the cost with MaterializedScan is 13.248 and DuplicateWeedout 13.317.
How to repeat:
CREATE TABLE t1 (
a int(11) NOT NULL,
b int(11) NOT NULL,
c datetime default NULL,
PRIMARY KEY (a),
KEY idx_bc (b,c)
);
INSERT INTO t1 VALUES
(406989,67,'2006-02-23 17:08:46'), (150078,67,'2005-10-26 11:17:45'),
(406993,67,'2006-02-27 11:20:57'), (245655,67,'2005-12-08 15:59:08'),
(406994,67,'2006-02-27 11:26:46'), (256,67,NULL),
(398341,67,'2006-02-20 04:48:44'), (254,67,NULL),(1120,67,NULL),
(406988,67,'2006-02-23 17:07:22'), (255,67,NULL),
(398340,67,'2006-02-20 04:38:53'),(406631,67,'2006-02-23 10:49:42'),
(245653,67,'2005-12-08 15:59:07'),(406992,67,'2006-02-24 16:47:18'),
(245654,67,'2005-12-08 15:59:08'),(406995,67,'2006-02-28 11:55:00'),
(127261,67,'2005-10-13 12:17:58'),(406991,67,'2006-02-24 16:42:32'),
(245652,67,'2005-12-08 15:58:27'),(398545,67,'2006-02-20 04:53:13'),
(154504,67,'2005-10-28 11:53:01'),(9199,67,NULL),(1,67,'2006-02-23 15:01:35'),
(223456,67,NULL),(4101,67,NULL),(1133,67,NULL),
(406990,67,'2006-02-23 18:01:45'),(148815,67,'2005-10-25 15:34:17'),
(148812,67,'2005-10-25 15:30:01'),(245651,67,'2005-12-08 15:58:27'),
(154503,67,'2005-10-28 11:52:38');
create table t11 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 asc;
create table t12 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 desc;
create table t21 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 asc;
create table t22 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 desc;
update t22 set c = '2005-12-08 15:58:27' where a = 255;
SET optimizer_trace="enabled=on";
SET optimizer_trace_max_mem_size=1000000;
explain select t21.* from t21,t22 where t21.a = t22.a and
t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
select * from information_schema.optimizer_trace into outfile "tmp.out";
drop table t1, t11, t12, t21, t22;
Description: Optimizer trace for the following query (found in subquery_sj.inc): explain select t21.* from t21,t22 where t21.a = t22.a and t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a; shows that optimizer calculates lower cost with duplicate elimination than without for MaterializedScan and DuplicateWeedout. That should not be possible since the duplicate elimination will have a cost. Here is the relevant part of optimizer trace for the query: "rest_of_plan": [\ {\ "plan_prefix": [\ "`t11`",\ "`t12`",\ "`t22`"\ ],\ "table": "`t21`",\ "best_access_path": {\ "considered_access_paths": [\ {\ "rows_to_scan": 26,\ "access_type": "scan",\ "using_join_cache": true,\ "buffers_needed": 1,\ "resulting_rows": 26,\ "cost": 2.352,\ "chosen": true\ }\ ]\ },\ "condition_filtering_pct": 10,\ "rows_for_plan": 0.676,\ "cost_for_plan": 14.227,\ "semijoin_strategy_choice": [\ {\ "strategy": "MaterializeScan",\ "recalculate_access_paths_and_cost": {\ "tables": [\ {\ "table": "`t22`",\ "best_access_path": {\ "considered_access_paths": [\ {\ "rows_to_scan": 26,\ "access_type": "scan",\ "using_join_cache": true,\ "buffers_needed": 1,\ "resulting_rows": 2.6,\ "cost": 5.8101,\ "chosen": true\ }\ ]\ }\ },\ {\ "table": "`t21`",\ "best_access_path": {\ "considered_access_paths": [\ {\ "rows_to_scan": 26,\ "access_type": "scan",\ "using_join_cache": true,\ "buffers_needed": 1,\ "resulting_rows": 26,\ "cost": 1.338,\ "chosen": true\ }\ ]\ }\ }\ ]\ },\ "cost": 13.248,\ "rows": 0.676,\ "duplicate_tables_left": false,\ "chosen": true\ },\ {\ "strategy": "DuplicatesWeedout",\ "cost": 13.317,\ "rows": 0.676,\ "duplicate_tables_left": false,\ "chosen": false\ }\ ],\ "sort_cost": 0.676,\ "new_cost_for_plan": 13.924,\ "chosen": true\ }\ ]\ }\ ]\ },\ As we can see, the cost of an ordinary join is 14.227 while the cost with MaterializedScan is 13.248 and DuplicateWeedout 13.317. How to repeat: CREATE TABLE t1 ( a int(11) NOT NULL, b int(11) NOT NULL, c datetime default NULL, PRIMARY KEY (a), KEY idx_bc (b,c) ); INSERT INTO t1 VALUES (406989,67,'2006-02-23 17:08:46'), (150078,67,'2005-10-26 11:17:45'), (406993,67,'2006-02-27 11:20:57'), (245655,67,'2005-12-08 15:59:08'), (406994,67,'2006-02-27 11:26:46'), (256,67,NULL), (398341,67,'2006-02-20 04:48:44'), (254,67,NULL),(1120,67,NULL), (406988,67,'2006-02-23 17:07:22'), (255,67,NULL), (398340,67,'2006-02-20 04:38:53'),(406631,67,'2006-02-23 10:49:42'), (245653,67,'2005-12-08 15:59:07'),(406992,67,'2006-02-24 16:47:18'), (245654,67,'2005-12-08 15:59:08'),(406995,67,'2006-02-28 11:55:00'), (127261,67,'2005-10-13 12:17:58'),(406991,67,'2006-02-24 16:42:32'), (245652,67,'2005-12-08 15:58:27'),(398545,67,'2006-02-20 04:53:13'), (154504,67,'2005-10-28 11:53:01'),(9199,67,NULL),(1,67,'2006-02-23 15:01:35'), (223456,67,NULL),(4101,67,NULL),(1133,67,NULL), (406990,67,'2006-02-23 18:01:45'),(148815,67,'2005-10-25 15:34:17'), (148812,67,'2005-10-25 15:30:01'),(245651,67,'2005-12-08 15:58:27'), (154503,67,'2005-10-28 11:52:38'); create table t11 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 asc; create table t12 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 desc; create table t21 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 asc; create table t22 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 desc; update t22 set c = '2005-12-08 15:58:27' where a = 255; SET optimizer_trace="enabled=on"; SET optimizer_trace_max_mem_size=1000000; explain select t21.* from t21,t22 where t21.a = t22.a and t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a; select * from information_schema.optimizer_trace into outfile "tmp.out"; drop table t1, t11, t12, t21, t22;