Description:
Optimizer trace for the following query (found in subquery_sj.inc):
explain select t21.* from t21,t22 where t21.a = t22.a and t22.a in
(select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
shows that optimizer calculates lower cost with duplicate elimination than without for MaterializedScan and DuplicateWeedout.
That should not be possible since the duplicate elimination will have a cost. Here is the relevant part of optimizer trace for the query:
"rest_of_plan": [\
{\
"plan_prefix": [\
"`t11`",\
"`t12`",\
"`t22`"\
],\
"table": "`t21`",\
"best_access_path": {\
"considered_access_paths": [\
{\
"rows_to_scan": 26,\
"access_type": "scan",\
"using_join_cache": true,\
"buffers_needed": 1,\
"resulting_rows": 26,\
"cost": 2.352,\
"chosen": true\
}\
]\
},\
"condition_filtering_pct": 10,\
"rows_for_plan": 0.676,\
"cost_for_plan": 14.227,\
"semijoin_strategy_choice": [\
{\
"strategy": "MaterializeScan",\
"recalculate_access_paths_and_cost": {\
"tables": [\
{\
"table": "`t22`",\
"best_access_path": {\
"considered_access_paths": [\
{\
"rows_to_scan": 26,\
"access_type": "scan",\
"using_join_cache": true,\
"buffers_needed": 1,\
"resulting_rows": 2.6,\
"cost": 5.8101,\
"chosen": true\
}\
]\
}\
},\
{\
"table": "`t21`",\
"best_access_path": {\
"considered_access_paths": [\
{\
"rows_to_scan": 26,\
"access_type": "scan",\
"using_join_cache": true,\
"buffers_needed": 1,\
"resulting_rows": 26,\
"cost": 1.338,\
"chosen": true\
}\
]\
}\
}\
]\
},\
"cost": 13.248,\
"rows": 0.676,\
"duplicate_tables_left": false,\
"chosen": true\
},\
{\
"strategy": "DuplicatesWeedout",\
"cost": 13.317,\
"rows": 0.676,\
"duplicate_tables_left": false,\
"chosen": false\
}\
],\
"sort_cost": 0.676,\
"new_cost_for_plan": 13.924,\
"chosen": true\
}\
]\
}\
]\
},\
As we can see, the cost of an ordinary join is 14.227 while the cost with MaterializedScan is 13.248 and DuplicateWeedout 13.317.
How to repeat:
CREATE TABLE t1 (
a int(11) NOT NULL,
b int(11) NOT NULL,
c datetime default NULL,
PRIMARY KEY (a),
KEY idx_bc (b,c)
);
INSERT INTO t1 VALUES
(406989,67,'2006-02-23 17:08:46'), (150078,67,'2005-10-26 11:17:45'),
(406993,67,'2006-02-27 11:20:57'), (245655,67,'2005-12-08 15:59:08'),
(406994,67,'2006-02-27 11:26:46'), (256,67,NULL),
(398341,67,'2006-02-20 04:48:44'), (254,67,NULL),(1120,67,NULL),
(406988,67,'2006-02-23 17:07:22'), (255,67,NULL),
(398340,67,'2006-02-20 04:38:53'),(406631,67,'2006-02-23 10:49:42'),
(245653,67,'2005-12-08 15:59:07'),(406992,67,'2006-02-24 16:47:18'),
(245654,67,'2005-12-08 15:59:08'),(406995,67,'2006-02-28 11:55:00'),
(127261,67,'2005-10-13 12:17:58'),(406991,67,'2006-02-24 16:42:32'),
(245652,67,'2005-12-08 15:58:27'),(398545,67,'2006-02-20 04:53:13'),
(154504,67,'2005-10-28 11:53:01'),(9199,67,NULL),(1,67,'2006-02-23 15:01:35'),
(223456,67,NULL),(4101,67,NULL),(1133,67,NULL),
(406990,67,'2006-02-23 18:01:45'),(148815,67,'2005-10-25 15:34:17'),
(148812,67,'2005-10-25 15:30:01'),(245651,67,'2005-12-08 15:58:27'),
(154503,67,'2005-10-28 11:52:38');
create table t11 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 asc;
create table t12 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 desc;
create table t21 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 asc;
create table t22 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 desc;
update t22 set c = '2005-12-08 15:58:27' where a = 255;
SET optimizer_trace="enabled=on";
SET optimizer_trace_max_mem_size=1000000;
explain select t21.* from t21,t22 where t21.a = t22.a and
t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
select * from information_schema.optimizer_trace into outfile "tmp.out";
drop table t1, t11, t12, t21, t22;