diff --git a/mysql-test/r/skip_records_in_range.result b/mysql-test/r/skip_records_in_range.result index e9a622f52f3..69cec1d1b80 100644 --- a/mysql-test/r/skip_records_in_range.result +++ b/mysql-test/r/skip_records_in_range.result @@ -528,3 +528,19 @@ select * from v1; v3 DROP VIEW v1; DROP TABLE t1; +# +# Bug #117791: FORCE INDEX does not skip records_in_range() for 'x IS NULL' +# +CREATE TABLE t1 (c1 INT, KEY (c1)); +INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (NULL), (NULL); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +set debug='+d,crash_records_in_range'; +EXPLAIN SELECT * FROM t1 FORCE INDEX(c1) WHERE c1 IS NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref c1 c1 5 const 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`c1` AS `c1` from `test`.`t1` FORCE INDEX (`c1`) where (`test`.`t1`.`c1` is null) +set debug='-d,crash_records_in_range'; +DROP TABLE t1; diff --git a/mysql-test/t/skip_records_in_range.test b/mysql-test/t/skip_records_in_range.test index 09da3de714f..b412e330b43 100644 --- a/mysql-test/t/skip_records_in_range.test +++ b/mysql-test/t/skip_records_in_range.test @@ -116,3 +116,19 @@ eval $query1; DROP VIEW v1; DROP TABLE t1; + +--echo # +--echo # Bug #117791: FORCE INDEX does not skip records_in_range() for 'x IS NULL' +--echo # + +CREATE TABLE t1 (c1 INT, KEY (c1)); +INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (NULL), (NULL); +ANALYZE TABLE t1; + +set debug='+d,crash_records_in_range'; + +EXPLAIN SELECT * FROM t1 FORCE INDEX(c1) WHERE c1 IS NULL; + +set debug='-d,crash_records_in_range'; + +DROP TABLE t1; diff --git a/sql/handler.cc b/sql/handler.cc index d7d947e481f..1f9847a8089 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -6404,26 +6404,19 @@ ha_rows handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, is actually 0, so the row estimate may be too high in this case. Also note: ranges of the form "x IS NULL" may have more than 1 matching row so records_in_range() is called for these. - 2) SKIP_RECORDS_IN_RANGE will be set when skip_records_in_range or - use_index_statistics are true. - Ranges of the form "x IS NULL" will not use index statistics - because the number of rows with this value are likely to be - very different than the values in the index statistics. - - Note: With SKIP_RECORDS_IN_RANGE, use Index statistics if: - a) Index statistics is available. - b) The range is an equality range but the index is either not - unique or all of the keyparts are not used. + 2) SKIP_RECORDS_IN_RANGE is set. + A user request to skip records_in_range() for this range, + either by using FORCE INDEX or by setting use_index_statistics, + was accepted. */ int keyparts_used = 0; if ((range.range_flag & UNIQUE_RANGE) && // 1) !(range.range_flag & NULL_RANGE)) rows = 1; /* there can be at most one row */ - else if (range.range_flag & SKIP_RECORDS_IN_RANGE && // 2) - !(range.range_flag & NULL_RANGE)) { - if ((range.range_flag & EQ_RANGE) && - (keyparts_used = std::popcount(range.start_key.keypart_map)) && - table->key_info[keyno].has_records_per_key(keyparts_used - 1)) { + else if (range.range_flag & SKIP_RECORDS_IN_RANGE) { // 2) + if (can_use_index_statistics(table, keyno, range.range_flag, + range.start_key.keypart_map, + &keyparts_used)) { rows = static_cast( table->key_info[keyno].records_per_key(keyparts_used - 1)); } else { @@ -9087,6 +9080,16 @@ bool is_index_access_error(int error) { return (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND); } +bool can_use_index_statistics(const TABLE *table, uint keyno, uint range_flag, + key_part_map keypart_map, int *keyparts_used) { + *keyparts_used = std::popcount(keypart_map); + return (range_flag & EQ_RANGE) && // 1) Equality range + !(range_flag & NULL_RANGE) && // 2) No NULL parts + *keyparts_used > 0 && // 3a) At least one keypart + table->key_info[keyno].has_records_per_key( + *keyparts_used - 1); // 3b) Statistics available +} + Xa_state_list::Xa_state_list(Xa_state_list::list &populated_by_tc) : m_underlying{populated_by_tc} {} diff --git a/sql/handler.h b/sql/handler.h index 810d2995256..faee58ea59e 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -7792,6 +7793,30 @@ std::pair commit_owned_gtids(THD *thd, bool all); bool set_tx_isolation(THD *thd, enum_tx_isolation tx_isolation, bool one_shot); bool is_index_access_error(int error); +/** + Check if index statistics can be used for an equality range. + + Index statistics are suitable for equality ranges when: + 1) It is an equality range (EQ_RANGE flag set) + 2) It contains no NULL parts (NULL_RANGE flag not set) + 3) Index statistics are available for the required keyparts + + Ranges of the form "x IS NULL" will not use index statistics because + the number of rows with NULL values are likely to be very different + than the values in the index statistics. + + @param table The table structure + @param keyno The index number + @param range_flag The range flags (EQ_RANGE, NULL_RANGE, etc.) + @param keypart_map Bitmap of keyparts used in the range + @param[out] keyparts_used Number of keyparts used (output parameter) + + @retval true Index statistics can be used for this range + @retval false Index statistics cannot be used for this range +*/ +bool can_use_index_statistics(const TABLE *table, uint keyno, uint range_flag, + key_part_map keypart_map, int *keyparts_used); + /* This class is used by INFORMATION_SCHEMA.FILES to read SE specific tablespace dynamic metadata. Some member like m_type and id, is not diff --git a/sql/range_optimizer/index_range_scan_plan.cc b/sql/range_optimizer/index_range_scan_plan.cc index c35cc113113..fda7c42ff84 100644 --- a/sql/range_optimizer/index_range_scan_plan.cc +++ b/sql/range_optimizer/index_range_scan_plan.cc @@ -516,13 +516,6 @@ static uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) { if (is_eq_range_pred) { range->range_flag = EQ_RANGE; - /* - Use statistics instead of index dives for estimates of rows in - this range if the user requested it - */ - if (param->use_index_statistics) - range->range_flag |= SKIP_RECORDS_IN_RANGE; - /* An equality range is a unique range (0 or 1 rows in the range) if the index is unique (1) and all keyparts are used (2). @@ -560,6 +553,27 @@ static uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) { seq->range_count++; seq->max_key_part = max(seq->max_key_part, key_tree->part); + /* + Use statistics instead of index dives for estimates of rows in + this range if the user has requested it (use_index_statistics) and + the range is suitable for index statistics. + + The can_use_index_statistics() function checks whether index statistics + are supportive for the range, which requires: + a) It is an equality range. + b) It contains no null part. + c) Index statistics are available. + + Ranges of the form "x IS NULL" will not use index statistics + because the number of rows with NULL values are likely to be + very different than the values in the index statistics. + */ + int keyparts_used = 0; + if (param->use_index_statistics && + can_use_index_statistics(param->table, seq->keyno, range->range_flag, + range->start_key.keypart_map, &keyparts_used)) + range->range_flag |= SKIP_RECORDS_IN_RANGE; + if (seq->skip_records_in_range) range->range_flag |= SKIP_RECORDS_IN_RANGE; return 0;