diff --git a/mysql-test/r/range_estimator.result b/mysql-test/r/range_estimator.result index 2d878b51c2e..f0f2df3433f 100644 --- a/mysql-test/r/range_estimator.result +++ b/mysql-test/r/range_estimator.result @@ -749,7 +749,7 @@ id select_type table partitions type possible_keys key key_len ref rows filtered Warnings: Note 1003 /* select#1 */ select `test`.`customers`.`id` AS `id`,`test`.`customers`.`modified` AS `modified`,`test`.`customers`.`custinfo` AS `custinfo` from `test`.`customers` where ((`test`.`customers`.`modified` = TIMESTAMP'2025-03-22 23:30:19') and json_contains(cast(json_extract(`custinfo`,_utf8mb4'$.zipcode') as unsigned array),json'[94507, 94582]')) DROP TABLE customers; -# col NULL, for D8 +# col NULL, for D8 and D9 CREATE TABLE tbl_int (col1 INT, key(col1)); INSERT INTO tbl_int VALUES (1), (2), (3), (3), (3), (3), (7), (8), (NULL), (NULL); ANALYZE TABLE tbl_int; @@ -778,6 +778,26 @@ id select_type table partitions type possible_keys key key_len ref rows filtered 1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index Warnings: Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` in (1,2,3)) or (`test`.`tbl_int`.`col1` >= 5)) -# col NULL, for D8 +# +# D9 adjust +# +SET @save_threshold=@@SESSION.range_estimation_adjust_threshold; +set range_estimation_adjust_threshold = 0.5; +set range_estimation = COMPATIBLE; +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 0; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 0) +set range_estimation = USE_STATISTICS_EQ_LIMIT; +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 0; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 0) +set range_estimation_adjust_threshold = @save_threshold; +# col NULL, for D8 and D9 DROP TABLE tbl_int; SET SESSION range_estimation = @savmode; diff --git a/mysql-test/t/range_estimator.test b/mysql-test/t/range_estimator.test index 894a6391334..d9ea39d7779 100644 --- a/mysql-test/t/range_estimator.test +++ b/mysql-test/t/range_estimator.test @@ -21,6 +21,7 @@ # D6 | # of columns | one or more | one # D7 | lob string | prefix key | implicit truncation # D8 | dive limit | sysvar | N/A +# D9 | adjust | sysvar | N/A SET @savmode=@@SESSION.range_estimation; @@ -269,7 +270,7 @@ WHERE DROP TABLE customers; ---echo # col NULL, for D8 +--echo # col NULL, for D8 and D9 CREATE TABLE tbl_int (col1 INT, key(col1)); INSERT INTO tbl_int VALUES (1), (2), (3), (3), (3), (3), (7), (8), (NULL), (NULL); ANALYZE TABLE tbl_int; @@ -294,7 +295,26 @@ SET SESSION DEBUG="-d,crash_records_in_range"; --echo # Expect "8" in column "rows" (6 + 2 = 8) EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 2, 3) or col1 >= 5; ---echo # col NULL, for D8 +--echo # +--echo # D9 adjust +--echo # + +SET @save_threshold=@@SESSION.range_estimation_adjust_threshold; +set range_estimation_adjust_threshold = 0.5; + +set range_estimation = COMPATIBLE; + +--echo # Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 0; + +set range_estimation = USE_STATISTICS_EQ_LIMIT; + +--echo # Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 0; + +set range_estimation_adjust_threshold = @save_threshold; + +--echo # col NULL, for D8 and D9 DROP TABLE tbl_int; SET SESSION range_estimation = @savmode; diff --git a/sql/handler.cc b/sql/handler.cc index b665dba985e..2e7c65b2766 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -6407,6 +6407,14 @@ ha_rows handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, /* Can't scan one range => can't do MRR scan at all */ return HA_POS_ERROR; } + const ha_rows rows_limit = + table->file->stats.records * + table->in_use->variables.range_estimation_adjust_threshold; + if (rows_limit > 0 && rows > rows_limit && + table->range_estimator.use_statistics_adjust()) { + ha_rows adjusted_rows = table->range_estimator.records_in_range(range); + if (adjusted_rows != HA_POS_ERROR) rows = adjusted_rows; + } } total_rows += rows; } diff --git a/sql/range_optimizer/range_estimator.cc b/sql/range_optimizer/range_estimator.cc index aea232ec47c..2758d87f359 100644 --- a/sql/range_optimizer/range_estimator.cc +++ b/sql/range_optimizer/range_estimator.cc @@ -78,7 +78,8 @@ void Range_estimator::use_index(uint kr) { void Range_estimator::decide_estimation_method(const Range_settings &settings, KEY_MULTI_RANGE &range) { - selected_estimator = check_statistics(settings, range); + adjust_only = false; + selected_estimator = check_statistics(settings, range, adjust_only); if (selected_estimator != NUM_ESTIMATORS) { range.range_flag |= SKIP_RECORDS_IN_RANGE; @@ -87,7 +88,7 @@ void Range_estimator::decide_estimation_method(const Range_settings &settings, enum enum_range_estimator Range_estimator::check_statistics(const Range_settings &settings, - KEY_MULTI_RANGE &range) { + KEY_MULTI_RANGE &range, bool &adjust_only) { enum enum_range_estimator estimator = NUM_ESTIMATORS; if ((range.range_flag & (UNIQUE_RANGE | NULL_RANGE)) == UNIQUE_RANGE) { /* @@ -97,10 +98,11 @@ Range_estimator::check_statistics(const Range_settings &settings, "x IS NULL" may have more than 1 matching row. */ estimator = ONE_ROW_ESTIMATOR; - } else if ((settings.skip || settings.eq_range || settings.strict_range) && - settings.use_histogram && + } else if (settings.use_histogram && check_histogram_statistics(range)) { estimator = HISTOGRAM_ESTIMATOR; + adjust_only = + !(settings.skip || settings.eq_range || settings.strict_range); } else if (settings.eq_range && settings.use_index_statistics && check_index_statistics(range)) { /* diff --git a/sql/range_optimizer/range_estimator.h b/sql/range_optimizer/range_estimator.h index 2b552ae3ce1..a986929342e 100644 --- a/sql/range_optimizer/range_estimator.h +++ b/sql/range_optimizer/range_estimator.h @@ -96,7 +96,11 @@ class Range_estimator { @retval False No available estimator. */ bool use_statistics_only() const { - return selected_estimator != NUM_ESTIMATORS; + return selected_estimator != NUM_ESTIMATORS && !adjust_only; + } + + bool use_statistics_adjust() const { + return selected_estimator != NUM_ESTIMATORS && adjust_only; } /** @@ -121,11 +125,13 @@ class Range_estimator { @param settings Settings that control the seletion of estimators. @param range The range. + @param[out] adjust_only The selected estimator is only used as an adjustment. @return The selected estimator, or MAX_ESTIMATORS if none. */ enum enum_range_estimator - check_statistics(const Range_settings &settings, KEY_MULTI_RANGE &range); + check_statistics(const Range_settings &settings, KEY_MULTI_RANGE &range, + bool &adjust_only); bool check_index_statistics(KEY_MULTI_RANGE &range); ha_rows estimate_by_index_statistics(KEY_MULTI_RANGE &range); @@ -187,6 +193,8 @@ class Range_estimator { // Selected estimator. enum enum_range_estimator selected_estimator; + // The selected estimator is only used as a way to adjust. + bool adjust_only; }; void init_range_settings(Range_settings &settings, const RANGE_OPT_PARAM ¶m, diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 39bf763f986..151eb38c195 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -7573,6 +7573,15 @@ static Sys_var_enum Sys_range_estimation( range_estimation_policy, DEFAULT(ESTIMATE_USE_STATISTICS_EQ_LIMIT), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(nullptr), ON_UPDATE(nullptr)); +static Sys_var_double Sys_range_estimation_adjust_threshold( + "range_estimation_adjust_threshold", + "When index dive returns more rows than table size times threshold, and " + "histogram is available, it could be used to produce estimated rows instead. " + "Zero means turning off adjustment.", + HINT_UPDATEABLE SESSION_VAR(range_estimation_adjust_threshold), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1.0), DEFAULT(0), NO_MUTEX_GUARD, + NOT_IN_BINLOG, ON_CHECK(nullptr), ON_UPDATE(nullptr)); + /** Warn usage of restrict_fk_on_non_standard_key variable. When it is set to false, warning should include usage of non std keys may break replication diff --git a/sql/system_variables.h b/sql/system_variables.h index 07bf3c9110d..ba427eaaa2b 100644 --- a/sql/system_variables.h +++ b/sql/system_variables.h @@ -524,6 +524,8 @@ struct System_variables { /// @sa Sys_range_estimation ulong range_estimation; + /// @sa Sys_range_estimation_adjust_threshold + double range_estimation_adjust_threshold; }; static_assert(std::is_trivially_copyable::value);