Bug #117037 index dive estimate wrong
Submitted: 23 Dec 2024 8:48
Reporter: jump mason Email Updates:
Status: Open Impact on me:
None 
Category:MySQL Server Severity:S3 (Non-critical)
Version:8.0 OS:Any
Assigned to: CPU Architecture:Any

[23 Dec 2024 8:48] jump mason
Description:
The optimizer estimate much more rows( maybe 1k) than it actually should be( 1 rows ).

It is caused by tree structure changing between the interval of getting path1 and path2 in index dive.

In the function btr_estimate_n_rows_in_range_low(),  slot1 and slot2 are expected to fall in different pages if the diverged or diverged_lot is true. But the exception may be broken, and then  the rows estimation would expand rapidly.

How to repeat:
1) add a debug_sync between path1 and path2 in btr_estimate_n_rows_in_range_low:

btr_estimate_n_rows_in_range_low()
  ...
  mtr_commit(&mtr);  // get path1 end
  DEBUG_SYNC(current_thd, "btr_estimate_n_rows_in_range_low_path1");
  mtr_start(&mtr);  // get path2 start
  ...

2) add log to tracking process:

btr_estimate_n_rows_in_range_on_level()
  ...
  mtr_commit(&mtr);  // get path1 end
  DEBUG_SYNC(current_thd, "btr_estimate_n_rows_in_range_low_path1");
  mtr_start(&mtr);  // get path2 start
  ...
  for (i = 0;; ++i) {
    ...  // n_rows eatimate
    fprintf(stderr, "\n diverge %lu, diverge lot %lu\n", diverged,diverged_lot);
    fprintf(stderr, "path1 deepth %lu: page %u, nth_rec %lu, totol_rec %lu\n", i, slot1->page_no, slot1->nth_rec, slot1->n_recs);
    fprintf(stderr, "path2 deepth %lu: page %u, nth_rec %lu, totol_rec %lu\n", i, slot2->page_no, slot2->nth_rec, slot2->n_recs);

    fprintf(stderr, "n_rows=%ld\n", n_rows);
  }
}

3) use the mtr case to repeat:

'''
--source include/have_debug_sync.inc

CREATE TABLE t1 (
    id INT,
    c1 CHAR(255),
    c2 CHAR(255),
    c3 CHAR(255),
    c4 CHAR(255),
    c5 CHAR(255),
    c6 CHAR(255),
    c7 CHAR(255),
    c8 CHAR(255),
    c9 CHAR(255),
    c10 CHAR(255),
    c11 CHAR(255),
    c12 CHAR(255),
    c13 CHAR(255),
    c14 CHAR(255),
    c15 CHAR(255),
    c16 CHAR(255),
	PRIMARY KEY pk(id,c1,c2,c3)
)engine = innodb;

delimiter $$;
CREATE PROCEDURE batch_insert(IN begin INT, IN end INT)
BEGIN
WHILE begin < end DO
insert into t1 values (end, 'data1_c1', 'data1_c2', 'data1_c3', 'data1_c4', 'data1_c5', 'data1_c6', 'data1_c7', 'data1_c8', 'data1_c9', 'data1_c10', 'data1_c11', 'data1_c12', 'data1_c13', 'data1_c14', 'data1_c15', 'data1_c16');
SET end = end - 1;
END WHILE;
END;
$$
delimiter ;$$

call batch_insert(3, 89);

--echo # test for spliting between path1 and path2
--connect(lookup,localhost,root,,)
--connection lookup

SET debug_sync='btr_estimate_n_rows_in_range_low_path1 WAIT_FOR data_insert';

set long_query_time=1000000;
set optimizer_trace_max_mem_size=163840000;
set optimizer_trace='enabled=on';

send select * from t1 where id=45;

--connection default
call batch_insert(-3, 3);
--sleep 1

SET debug_sync='now SIGNAL data_insert';

--connection lookup
reap;
select * from information_schema.optimizer_trace;

drop table t1;
drop procedure batch_insert;
set optimizer_trace=default;
set optimizer_trace_max_mem_size=default;
set long_query_time=default;
'''

you can see the dive estimate rows to 43( half of table ) 

and the error log describes the process causing this:

 diverge 1, diverge lot 0
path1 deepth 0: page 4, nth_rec 1, totol_rec 2
path2 deepth 0: page 4, nth_rec 2, totol_rec 3
n_rows=0

 diverge 1, diverge lot 1
path1 deepth 1: page 26, nth_rec 15, totol_rec 20
path2 deepth 1: page 26, nth_rec 14, totol_rec 19
n_rows=18

 diverge 1, diverge lot 1
path1 deepth 2: page 20, nth_rec 3, totol_rec 3
path2 deepth 2: page 20, nth_rec 4, totol_rec 3
n_rows=54

Suggested fix:
add a page_id check, in the  logical branch:
 else if (diverged && !diverged_lot) {
			/* If both slots point to the same page, but the two paths
			diverged. The tree must have changed between the dive for
			path1 and path2 at the beginning of this function. */
			if (slot1->page_no == slot2->page_no) {
				/* If the tree keeps changing even after a
				few attempts, then just return some arbitrary
				number. */
				if (nth_attempt >= rows_in_range_max_retries) {
					return(rows_in_range_arbitrary_ret_val);
				}

				const int64_t	ret =
					btr_estimate_n_rows_in_range_low(
						index, tuple1, mode1,
						tuple2, mode2, nth_attempt + 1);

				return(ret);
			}