=== added file 'mysql-test/t/itntit.test' --- mysql-test/t/itntit.test 1970-01-01 00:00:00 +0000 +++ mysql-test/t/itntit.test 2010-02-24 13:53:16 +0000 @@ -0,0 +1,57 @@ +# This is to test how Firstmatch handles plan ot, it1, nt, it2 + +create table tc_nt(a int); +create table ta_ot(a int); +create table tb_it1(a int); +create table td_it2(a int); + +insert into ta_ot values(1),(2); +insert into tc_nt values(1),(2); +insert into tb_it1 values(1),(2); +insert into td_it2 values(1),(2); + +insert into tc_nt values(1); + +set optimizer_switch="default",optimizer_prune_level=0; + +# Use this to avoid any interference with join buffering, but it does +# not influence SELECT results anyway. +#set optimizer_join_cache_level=0; + +# The correct SELECT results +explain select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); +select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); +# this should be the same: +select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (1,2); + +# Default chosen strategy is materialization, but we want to test +# firstmatch, so we turn materialization off. We also request +# ot,it1,nt,it2 as plan. But then duplicate weedout is chosen, +# so we ask that it is given a high cost. Only then do we hit +# firstmatch with the desired plan, and see the bugs. +set optimizer_switch="semijoin=on,firstmatch=on,materialization=off,loosescan=off"; +set debug="+d,info,query,opt,optimizer_force_alphab_sorted_plan,optimizer_avoid_dups_weedout"; + +explain select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); +# this misses a row: +select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); + +# ok, so jumping to "ot" from "it2" is bad, because it misses rows from +# "nt"; so let's try jumping to "nt" instead: +set debug="+d,test_fix_jump_to_nt"; +explain select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); +# correct results now +select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); + +# but this is not a correct logic either, as seen here: + +insert into tb_it1 values(1); + +explain select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); +# extra rows: +select * from ta_ot,tc_nt where ta_ot.a=tc_nt.a and ta_ot.a in (select tb_it1.a from tb_it1, td_it2); + +# it could well be that (it1,nt,it2) will only work when we implement +# *two* firstmatch jumps: from td_it2 to tc_nt and from tb_it1 to ta_ot. + +drop table ta_ot,tb_it1,td_it2,tc_nt; === modified file 'sql/sql_select.cc' --- sql/sql_select.cc 2010-02-23 13:21:49 +0000 +++ sql/sql_select.cc 2010-02-24 13:59:07 +0000 @@ -264,6 +264,11 @@ Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, bool *inherited_fl); +#ifndef DBUG_OFF +const double ARTIFICIAL_LOW_COST= 1e-20; +const double ARTIFICIAL_HIGH_COST= 1/ARTIFICIAL_LOW_COST; +#endif + /** This handles SELECT with and without UNION. @@ -1333,16 +1338,46 @@ case SJ_OPT_FIRST_MATCH: { JOIN_TAB *j, *jump_to= tab-1; + bool sj_inner_tab_bounds_set= false; + bool test_fix_jump_to_nt= false; + DBUG_EXECUTE_IF("test_fix_jump_to_nt", + /* + Make the last table of the range jump back to the + last non-correlated table. + */ + test_fix_jump_to_nt= true;); for (j= tab; j != tab + pos->n_sj_tables; j++) { - if (!tab->emb_sj_nest) - jump_to= tab; + if (test_fix_jump_to_nt) + { + if (!j->emb_sj_nest) + { + /* non-correlated 'nt' table, jump back to it on first match */ + jump_to= j; + } + else + { + /* inner table, remember the interval of them */ + j->first_sj_inner_tab= tab; + j->last_sj_inner_tab= tab + pos->n_sj_tables - 1; + sj_inner_tab_bounds_set= true; + } + } else { - j->first_sj_inner_tab= tab; - j->last_sj_inner_tab= tab + pos->n_sj_tables - 1; + /* original code: jump back to "ot" */ + if (!tab->emb_sj_nest) /* this is never true as tab is a "it" */ + jump_to= tab; + else + { + /* inner table, remember the interval of them */ + j->first_sj_inner_tab= tab; + j->last_sj_inner_tab= tab + pos->n_sj_tables - 1; + sj_inner_tab_bounds_set= true; + } } } + DBUG_ASSERT(sj_inner_tab_bounds_set); j[-1].do_firstmatch= jump_to; i += pos->n_sj_tables; break; @@ -7587,6 +7622,24 @@ advance_sj_state(join, remaining_tables, s, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); + /* This prefers a plan where tables are listed in alphabetical order */ + DBUG_EXECUTE_IF("optimizer_force_alphab_sorted_plan", + { + uint i= 1; + while (i < (idx + 1) && + (strcmp(join->positions[i-1].table->table->alias, + join->positions[i].table->table->alias) + == -1)) + i++; + if (i == (idx + 1)) + { + DBUG_PRINT("info", ("found alphabetically sorted" + " plan with %u tables", idx + 1)); + current_read_time= current_record_count= + ARTIFICIAL_LOW_COST; // prefer this plan + } + } + ); /* Expand only partial plans with lower cost than the best QEP so far */ if ((current_read_time + current_record_count / (double) TIME_FOR_COMPARE) >= join->best_read) @@ -13582,6 +13650,13 @@ one_lookup_cost; dups_cost += write_cost + full_lookup_cost; + DBUG_EXECUTE_IF("optimizer_avoid_dups_weedout", + { + /* This gives a high cost to this strategy: */ + dups_cost= prefix_rec_count= ARTIFICIAL_HIGH_COST; + sj_outer_fanout= 1; + } + ); /* Use the strategy if * it is cheaper then what we've had, or