diff --git a/mysql-test/r/bug115840.result b/mysql-test/r/bug115840.result new file mode 100644 index 00000000000..30c77f74ce7 --- /dev/null +++ b/mysql-test/r/bug115840.result @@ -0,0 +1,48 @@ +# +# Bug#115840: Incorrect Cost Display for Join with Materialized Subquery +# +SET @old_optimizer_switch= @@optimizer_switch; +SET optimizer_switch="firstmatch=off,duplicateweedout=off"; +drop table if exists t1; +CREATE TABLE t1 ( a INTEGER NOT NULL, b INTEGER NOT NULL ); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +EXPLAIN FORMAT=tree SELECT a FROM t1 WHERE a NOT IN ( SELECT b FROM t1 WHERE b > 2 ); +EXPLAIN +-> Nested loop antijoin (rows=3) + -> Table scan on t1 (rows=3) + -> Single-row index lookup on using (b=t1.a) (rows=1) + -> Materialize with deduplication (rows=3) + -> Filter: (t1.b is not null) (rows=3) + -> Filter: (t1.b > 2) (rows=3) + -> Table scan on t1 (rows=3) + +DROP TABLE t1; +CREATE TABLE t1 ( f1 TIME, f2 VARCHAR(1) ); +INSERT INTO t1 VALUES ('08:35:24', 'X'), ('14:51:13', 'S'), ('11:22:33', 'Q'); +drop table if exists t2; +CREATE TABLE t2 ( pk INTEGER ); +INSERT INTO t2 VALUES (1), (2); +ANALYZE TABLE t1, t2; +Table Op Msg_type Msg_text +test.t1 analyze status OK +test.t2 analyze status OK +EXPLAIN FORMAT=tree SELECT * FROM t1 WHERE EXISTS ( SELECT alias1.f2 FROM t2 LEFT JOIN t1 ON 1 LEFT JOIN t1 AS alias1 ON 1 ); +EXPLAIN +-> Inner hash join (no condition) (rows=3) + -> Table scan on t1 (rows=3) + -> Hash + -> Constant row from (rows=1) + -> Materialize with deduplication (rows=18) + -> Left hash join (no condition) (rows=18) + -> Left hash join (no condition) (rows=6) + -> Table scan on t2 (rows=2) + -> Hash + -> Table scan on t1 (rows=3) + -> Hash + -> Table scan on alias1 (rows=3) + +DROP TABLE t1, t2; +SET optimizer_switch= @old_optimizer_switch; diff --git a/mysql-test/t/bug115840.test b/mysql-test/t/bug115840.test new file mode 100644 index 00000000000..e9f2f97fdeb --- /dev/null +++ b/mysql-test/t/bug115840.test @@ -0,0 +1,33 @@ +--echo # +--echo # Bug#115840: Incorrect Cost Display for Join with Materialized Subquery +--echo # +--source include/elide_costs.inc + +SET @old_optimizer_switch= @@optimizer_switch; + +SET optimizer_switch="firstmatch=off,duplicateweedout=off"; +--disable_warnings +drop table if exists t1; +CREATE TABLE t1 ( a INTEGER NOT NULL, b INTEGER NOT NULL ); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +--enable_warnings +ANALYZE TABLE t1; + +--replace_regex $elide_costs +EXPLAIN FORMAT=tree SELECT a FROM t1 WHERE a NOT IN ( SELECT b FROM t1 WHERE b > 2 ); +DROP TABLE t1; + +CREATE TABLE t1 ( f1 TIME, f2 VARCHAR(1) ); +INSERT INTO t1 VALUES ('08:35:24', 'X'), ('14:51:13', 'S'), ('11:22:33', 'Q'); +--disable_warnings +drop table if exists t2; +CREATE TABLE t2 ( pk INTEGER ); +INSERT INTO t2 VALUES (1), (2); +ANALYZE TABLE t1, t2; +--enable_warnings + +--replace_regex $elide_costs +EXPLAIN FORMAT=tree SELECT * FROM t1 WHERE EXISTS ( SELECT alias1.f2 FROM t2 LEFT JOIN t1 ON 1 LEFT JOIN t1 AS alias1 ON 1 ); + +DROP TABLE t1, t2; +SET optimizer_switch= @old_optimizer_switch; diff --git a/sql/sql_executor.cc b/sql/sql_executor.cc index 50010a2d111..34f82794da9 100644 --- a/sql/sql_executor.cc +++ b/sql/sql_executor.cc @@ -1895,14 +1895,23 @@ void SetCostOnNestedLoopAccessPath(const Cost_model_server &cost_model, return; } + // If the type of the table is MATERIALIZE, we should use its table_path + // to estimate the number of rows after the join. (Bug#115840) + auto inner_output_rows = (inner->type == AccessPath::MATERIALIZE) + ? inner->materialize().table_path->num_output_rows() + : inner->num_output_rows(); + auto outer_output_rows = (outer->type == AccessPath::MATERIALIZE) + ? outer->materialize().table_path->num_output_rows() + : outer->num_output_rows(); + // Mirrors set_prefix_join_cost(), even though the cost calculation doesn't // make a lot of sense. double inner_expected_rows_before_filter = pos_inner->filter_effect > 0.0 - ? (inner->num_output_rows() / pos_inner->filter_effect) + ? (inner_output_rows / pos_inner->filter_effect) : 0.0; double joined_rows = - outer->num_output_rows() * inner_expected_rows_before_filter; + outer_output_rows * inner_expected_rows_before_filter; path->set_num_output_rows(joined_rows * pos_inner->filter_effect); path->cost = outer->cost + pos_inner->read_cost + cost_model.row_evaluate_cost(joined_rows); @@ -1923,9 +1932,17 @@ void SetCostOnHashJoinAccessPath(const Cost_model_server &cost_model, return; } + // Mirrors SetCostOnNestedLoopAccessPath(). + auto inner_output_rows = (inner->type == AccessPath::MATERIALIZE) + ? inner->materialize().table_path->num_output_rows() + : inner->num_output_rows(); + auto outer_output_rows = (outer->type == AccessPath::MATERIALIZE) + ? outer->materialize().table_path->num_output_rows() + : outer->num_output_rows(); + // Mirrors set_prefix_join_cost(), even though the cost calculation doesn't // make a lot of sense. - double joined_rows = outer->num_output_rows() * inner->num_output_rows(); + double joined_rows = outer_output_rows * inner_output_rows; path->set_num_output_rows(joined_rows * pos_outer->filter_effect); path->cost = inner->cost + pos_outer->read_cost + cost_model.row_evaluate_cost(joined_rows);