diff --git a/mysql-test/include/range_estimator_basic.inc b/mysql-test/include/range_estimator_basic.inc new file mode 100644 index 00000000000..1e0f5fa66a0 --- /dev/null +++ b/mysql-test/include/range_estimator_basic.inc @@ -0,0 +1,99 @@ +# Basic forms of key ranges, col1 NULL + +--echo # Integer column, singleton histogram, with key, col1 NULL + +--echo # Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 0; +EXPLAIN SELECT * FROM tbl_int WHERE 0 < col1; + +--echo # Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 8; +EXPLAIN SELECT * FROM tbl_int WHERE 8 < col1; + +--echo # Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 0; +EXPLAIN SELECT * FROM tbl_int WHERE 0 > col1; + +--echo # Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 10; +EXPLAIN SELECT * FROM tbl_int WHERE 10 > col1; + +--echo # Expect "3" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 6; +EXPLAIN SELECT * FROM tbl_int WHERE 6 <= col1; + +--echo # Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= -100; +EXPLAIN SELECT * FROM tbl_int WHERE -100 <= col1; + +--echo # Expect "8" in column "rows" (7 + 1, D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 != 8; +EXPLAIN SELECT * FROM tbl_int WHERE 8 != col1; +EXPLAIN SELECT * FROM tbl_int WHERE col1 <> 8; +EXPLAIN SELECT * FROM tbl_int WHERE 8 <> col1; + +--echo # Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 10; +EXPLAIN SELECT * FROM tbl_int WHERE 10 = col1; + +--echo # Expect "2" in column "rows" (D3 null range, standalone) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL; + +--echo # Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NOT NULL; + +--echo # Expect "3" in column "rows" (D4) +EXPLAIN SELECT * FROM tbl_int WHERE col1 BETWEEN 1 AND 3; + +--echo # Expect "6" in column "rows" (5 + 1, D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 NOT BETWEEN 1 AND 3; + +--echo # Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 3, 4, 5, 6, 7); + +--echo # Expect 7 in column "rows" (7 ranges, all execept the second subject to D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 NOT IN (1, 3, 4, 5, 6, 7); + +--echo # D4 any form, col1 NULL + +--echo # Expect "4" in column "rows" (F1) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 1 and col1 <= 4; + +--echo # Expect "3" in column "rows" (F2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1 and col1 <= 4; + +--echo # Expect "3" in column "rows" (F3) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 1 and col1 < 4; + +--echo # Expect "2" in column "rows" (F4) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1 and col1 < 4; + +--echo # Expect "3" in column "rows" (F5) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 6; + +--echo # Expect "7" in column "rows" (F6) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1; + +--echo # Expect "10" in column "rows" (F7) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 IS NOT NULL; + +--echo # Expect "8" in column "rows" (F8) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NOT NULL; + +--echo # Expect "7" in column "rows" (F11) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 8; + +--echo # Expect "8" in column "rows" (F12) +EXPLAIN SELECT * FROM tbl_int WHERE col1 <= 8; + +--echo # Expect "9" in column "rows" (F13) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 < 8; + +--echo # Expect "10" in column "rows" (F14) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 <= 8; + +--echo # Expect "2" in column "rows" (F15) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL; + +--echo # Expect "8" in column "rows" (multiple ranges: 2 + 3 + 3) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL or col1 >= 1 and col1 <= 3 or col1 > 5 and col1 <= 8; diff --git a/mysql-test/r/range_estimator.result b/mysql-test/r/range_estimator.result new file mode 100644 index 00000000000..2d878b51c2e --- /dev/null +++ b/mysql-test/r/range_estimator.result @@ -0,0 +1,783 @@ +SET @savmode=@@SESSION.range_estimation; +SET SESSION DEBUG="+d,crash_records_in_range"; +set range_estimation = USE_STATISTICS_ONLY; +# +# Integer column, singleton histogram, with key asc +# +CREATE TABLE tbl_int (col1 INT, key(col1)); +INSERT INTO tbl_int VALUES (1), (2), (3), (4), (5), (6), (7), (8), (NULL), (NULL); +ANALYZE TABLE tbl_int; +Table Op Msg_type Msg_text +test.tbl_int analyze status OK +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_int histogram status Histogram statistics created for column 'col1'. +# Integer column, singleton histogram, with key, col1 NULL +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 0; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 0) +EXPLAIN SELECT * FROM tbl_int WHERE 0 < col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (0 < `test`.`tbl_int`.`col1`) +# Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 8) +EXPLAIN SELECT * FROM tbl_int WHERE 8 < col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (8 < `test`.`tbl_int`.`col1`) +# Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 0; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` < 0) +EXPLAIN SELECT * FROM tbl_int WHERE 0 > col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (0 > `test`.`tbl_int`.`col1`) +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 10; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` < 10) +EXPLAIN SELECT * FROM tbl_int WHERE 10 > col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (10 > `test`.`tbl_int`.`col1`) +# Expect "3" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 6; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` >= 6) +EXPLAIN SELECT * FROM tbl_int WHERE 6 <= col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (6 <= `test`.`tbl_int`.`col1`) +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= -100; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` >= (-(100))) +EXPLAIN SELECT * FROM tbl_int WHERE -100 <= col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((-(100)) <= `test`.`tbl_int`.`col1`) +# Expect "8" in column "rows" (7 + 1, D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 != 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` <> 8) +EXPLAIN SELECT * FROM tbl_int WHERE 8 != col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (8 <> `test`.`tbl_int`.`col1`) +EXPLAIN SELECT * FROM tbl_int WHERE col1 <> 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` <> 8) +EXPLAIN SELECT * FROM tbl_int WHERE 8 <> col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (8 <> `test`.`tbl_int`.`col1`) +# Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 10; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 1 100.00 Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` = 10) +EXPLAIN SELECT * FROM tbl_int WHERE 10 = col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 1 100.00 Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` = 10) +# Expect "2" in column "rows" (D3 null range, standalone) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is null) +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NOT NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is not null) +# Expect "3" in column "rows" (D4) +EXPLAIN SELECT * FROM tbl_int WHERE col1 BETWEEN 1 AND 3; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` between 1 and 3) +# Expect "6" in column "rows" (5 + 1, D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 NOT BETWEEN 1 AND 3; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 6 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` not between 1 and 3) +# Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 3, 4, 5, 6, 7); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 6 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` in (1,3,4,5,6,7)) +# Expect 7 in column "rows" (7 ranges, all execept the second subject to D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 NOT IN (1, 3, 4, 5, 6, 7); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 7 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` not in (1,3,4,5,6,7)) +# D4 any form, col1 NULL +# Expect "4" in column "rows" (F1) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 1 and col1 <= 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 4 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` >= 1) and (`test`.`tbl_int`.`col1` <= 4)) +# Expect "3" in column "rows" (F2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1 and col1 <= 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` > 1) and (`test`.`tbl_int`.`col1` <= 4)) +# Expect "3" in column "rows" (F3) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 1 and col1 < 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` >= 1) and (`test`.`tbl_int`.`col1` < 4)) +# Expect "2" in column "rows" (F4) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1 and col1 < 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` > 1) and (`test`.`tbl_int`.`col1` < 4)) +# Expect "3" in column "rows" (F5) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 6; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` >= 6) +# Expect "7" in column "rows" (F6) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 7 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 1) +# Expect "10" in column "rows" (F7) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 IS NOT NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL index col1 col1 5 NULL 10 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or (`test`.`tbl_int`.`col1` is not null)) +# Expect "8" in column "rows" (F8) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NOT NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is not null) +# Expect "7" in column "rows" (F11) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 7 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` < 8) +# Expect "8" in column "rows" (F12) +EXPLAIN SELECT * FROM tbl_int WHERE col1 <= 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` <= 8) +# Expect "9" in column "rows" (F13) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 < 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 9 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or (`test`.`tbl_int`.`col1` < 8)) +# Expect "10" in column "rows" (F14) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 <= 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL index col1 col1 5 NULL 10 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or (`test`.`tbl_int`.`col1` <= 8)) +# Expect "2" in column "rows" (F15) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is null) +# Expect "8" in column "rows" (multiple ranges: 2 + 3 + 3) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL or col1 >= 1 and col1 <= 3 or col1 > 5 and col1 <= 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or ((`test`.`tbl_int`.`col1` >= 1) and (`test`.`tbl_int`.`col1` <= 3)) or ((`test`.`tbl_int`.`col1` > 5) and (`test`.`tbl_int`.`col1` <= 8))) +DROP TABLE tbl_int; +# +# Integer column, singleton histogram, with key desc (D5) +# +CREATE TABLE tbl_int (col1 INT, key(col1 DESC)); +INSERT INTO tbl_int VALUES (1), (2), (3), (4), (5), (6), (7), (8), (NULL), (NULL); +ANALYZE TABLE tbl_int; +Table Op Msg_type Msg_text +test.tbl_int analyze status OK +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_int histogram status Histogram statistics created for column 'col1'. +# Integer column, singleton histogram, with key, col1 NULL +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 0; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 0) +EXPLAIN SELECT * FROM tbl_int WHERE 0 < col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (0 < `test`.`tbl_int`.`col1`) +# Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 8) +EXPLAIN SELECT * FROM tbl_int WHERE 8 < col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (8 < `test`.`tbl_int`.`col1`) +# Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 0; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` < 0) +EXPLAIN SELECT * FROM tbl_int WHERE 0 > col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (0 > `test`.`tbl_int`.`col1`) +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 10; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` < 10) +EXPLAIN SELECT * FROM tbl_int WHERE 10 > col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (10 > `test`.`tbl_int`.`col1`) +# Expect "3" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 6; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` >= 6) +EXPLAIN SELECT * FROM tbl_int WHERE 6 <= col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (6 <= `test`.`tbl_int`.`col1`) +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= -100; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` >= (-(100))) +EXPLAIN SELECT * FROM tbl_int WHERE -100 <= col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((-(100)) <= `test`.`tbl_int`.`col1`) +# Expect "8" in column "rows" (7 + 1, D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 != 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` <> 8) +EXPLAIN SELECT * FROM tbl_int WHERE 8 != col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (8 <> `test`.`tbl_int`.`col1`) +EXPLAIN SELECT * FROM tbl_int WHERE col1 <> 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` <> 8) +EXPLAIN SELECT * FROM tbl_int WHERE 8 <> col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (8 <> `test`.`tbl_int`.`col1`) +# Expect "1" in column "rows" (D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 10; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 1 100.00 Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` = 10) +EXPLAIN SELECT * FROM tbl_int WHERE 10 = col1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 1 100.00 Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` = 10) +# Expect "2" in column "rows" (D3 null range, standalone) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is null) +# Expect "8" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NOT NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is not null) +# Expect "3" in column "rows" (D4) +EXPLAIN SELECT * FROM tbl_int WHERE col1 BETWEEN 1 AND 3; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` between 1 and 3) +# Expect "6" in column "rows" (5 + 1, D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 NOT BETWEEN 1 AND 3; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 6 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` not between 1 and 3) +# Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 3, 4, 5, 6, 7); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 6 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` in (1,3,4,5,6,7)) +# Expect 7 in column "rows" (7 ranges, all execept the second subject to D2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 NOT IN (1, 3, 4, 5, 6, 7); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 7 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` not in (1,3,4,5,6,7)) +# D4 any form, col1 NULL +# Expect "4" in column "rows" (F1) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 1 and col1 <= 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 4 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` >= 1) and (`test`.`tbl_int`.`col1` <= 4)) +# Expect "3" in column "rows" (F2) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1 and col1 <= 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` > 1) and (`test`.`tbl_int`.`col1` <= 4)) +# Expect "3" in column "rows" (F3) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 1 and col1 < 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` >= 1) and (`test`.`tbl_int`.`col1` < 4)) +# Expect "2" in column "rows" (F4) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1 and col1 < 4; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` > 1) and (`test`.`tbl_int`.`col1` < 4)) +# Expect "3" in column "rows" (F5) +EXPLAIN SELECT * FROM tbl_int WHERE col1 >= 6; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` >= 6) +# Expect "7" in column "rows" (F6) +EXPLAIN SELECT * FROM tbl_int WHERE col1 > 1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 7 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` > 1) +# Expect "10" in column "rows" (F7) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 IS NOT NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL index col1 col1 5 NULL 10 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or (`test`.`tbl_int`.`col1` is not null)) +# Expect "8" in column "rows" (F8) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NOT NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is not null) +# Expect "7" in column "rows" (F11) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 7 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` < 8) +# Expect "8" in column "rows" (F12) +EXPLAIN SELECT * FROM tbl_int WHERE col1 <= 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` <= 8) +# Expect "9" in column "rows" (F13) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 < 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 9 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or (`test`.`tbl_int`.`col1` < 8)) +# Expect "10" in column "rows" (F14) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL OR col1 <= 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL index col1 col1 5 NULL 10 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or (`test`.`tbl_int`.`col1` <= 8)) +# Expect "2" in column "rows" (F15) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 5 const 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` is null) +# Expect "8" in column "rows" (multiple ranges: 2 + 3 + 3) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IS NULL or col1 >= 1 and col1 <= 3 or col1 > 5 and col1 <= 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` is null) or ((`test`.`tbl_int`.`col1` >= 1) and (`test`.`tbl_int`.`col1` <= 3)) or ((`test`.`tbl_int`.`col1` > 5) and (`test`.`tbl_int`.`col1` <= 8))) +DROP TABLE tbl_int; +# +# D4 any form, col1 NOT NULL +# +CREATE TABLE tbl_int (col1 INT NOT NULL, key(col1)); +INSERT INTO tbl_int VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +ANALYZE TABLE tbl_int; +Table Op Msg_type Msg_text +test.tbl_int analyze status OK +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_int histogram status Histogram statistics created for column 'col1'. +# Expect "7" in column "rows" (F9) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 4 NULL 7 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` < 8) +# Expect "8" in column "rows" (F10) +EXPLAIN SELECT * FROM tbl_int WHERE col1 <= 8; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 4 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` <= 8) +DROP TABLE tbl_int; +# +# Type tests for coverage +# +# +# String column, singleton histogram, with key +# +CREATE TABLE tbl_varchar (col1 VARCHAR(255), key(col1)); +INSERT INTO tbl_varchar VALUES +("abcd"), ("🍣"), ("🍺"), ("eeeeeeeeee"), ("ef"), ("AG"), +("a very long string that is longer than 42 characters"), +("lorem ipsum"), (NULL), (NULL); +ANALYZE TABLE tbl_varchar UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_varchar histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_varchar; +Table Op Msg_type Msg_text +test.tbl_varchar analyze status OK +# Expect "3" in column "rows" +EXPLAIN SELECT * FROM tbl_varchar WHERE col1 > "b"; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_varchar NULL range col1 col1 1023 NULL 3 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_varchar`.`col1` AS `col1` from `test`.`tbl_varchar` where (`test`.`tbl_varchar`.`col1` > 'b') +DROP TABLE tbl_varchar; +# +# Double column, singleton histogram, with key +# +CREATE TABLE tbl_double (col1 DOUBLE, key(col1)); +INSERT INTO tbl_double VALUES (-1.1), (0.0), (1.1), (2.2), (3.3), (4.4), (5.5), (6.6), (NULL), (NULL); +ANALYZE TABLE tbl_double UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_double histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_double; +Table Op Msg_type Msg_text +test.tbl_double analyze status OK +# Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_double WHERE col1 > 0.0e0; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_double NULL range col1 col1 9 NULL 6 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_double`.`col1` AS `col1` from `test`.`tbl_double` where (`test`.`tbl_double`.`col1` > 0.0e0) +DROP TABLE tbl_double; +# +# Time column, singleton histogram, with key +# +CREATE TABLE tbl_time (col1 TIME, key(col1)); +INSERT INTO tbl_time VALUES +("-01:00:00"), ("00:00:00"), ("00:00:01"), ("00:01:00"), ("01:00:00"), +("01:01:00"), ("02:00:00"), ("03:00:00"), (NULL), (NULL); +ANALYZE TABLE tbl_time UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_time histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_time; +Table Op Msg_type Msg_text +test.tbl_time analyze status OK +# Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_time WHERE col1 > "00:00:00"; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_time NULL range col1 col1 4 NULL 6 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_time`.`col1` AS `col1` from `test`.`tbl_time` where (`test`.`tbl_time`.`col1` > TIME'00:00:00') +DROP TABLE tbl_time; +# +# Date column, singleton histogram, with key +# +CREATE TABLE tbl_date (col1 DATE, key(col1)); +INSERT INTO tbl_date VALUES +("1000-01-02"), ("9999-12-30"), ("2017-01-01"), ("2017-01-02"), ("2017-02-01"), +("2018-01-01"), ("2019-01-01"), ("3019-01-01"), (NULL), (NULL); +ANALYZE TABLE tbl_date UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_date histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_date; +Table Op Msg_type Msg_text +test.tbl_date analyze status OK +# Expect "5" in column "rows" +EXPLAIN SELECT * FROM tbl_date WHERE col1 > "2017-01-02"; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_date NULL range col1 col1 4 NULL 5 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_date`.`col1` AS `col1` from `test`.`tbl_date` where (`test`.`tbl_date`.`col1` > DATE'2017-01-02') +DROP TABLE tbl_date; +# +# Datetime column, singleton histogram, with key +# +CREATE TABLE tbl_datetime (col1 DATETIME(6), key(col1)); +INSERT INTO tbl_datetime VALUES +("1000-01-01 00:00:01"), ("9999-12-31 23:59:59.999998"), +("2017-01-01 00:00:00"), ("2017-01-01 00:00:00.000001"), +("2017-02-01 00:00:00"), ("2018-01-01 00:00:00.999999"), +("2018-01-01 00:00:01"), ("3019-01-01 10:10:10.101010"), (NULL), (NULL); +ANALYZE TABLE tbl_datetime UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_datetime histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_datetime; +Table Op Msg_type Msg_text +test.tbl_datetime analyze status OK +# Expect "4" in column "rows" +EXPLAIN SELECT * FROM tbl_datetime WHERE col1 > "2018-01-01 00:00:00"; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_datetime NULL range col1 col1 9 NULL 4 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_datetime`.`col1` AS `col1` from `test`.`tbl_datetime` where (`test`.`tbl_datetime`.`col1` > TIMESTAMP'2018-01-01 00:00:00') +DROP TABLE tbl_datetime; +# +# Decimal column, singleton histogram, with key +# +CREATE TABLE tbl_decimal (col1 DECIMAL(65, 30), key(col1)); +INSERT INTO tbl_decimal VALUES +(00000000000000000000000000000000000.000000000000000000000000000000), +(99999999999999999999999999999999999.999999999999999999999999999998), +(-99999999999999999999999999999999999.999999999999999999999999999998), +(1), (2), (3), (4), (-1), (NULL), (NULL); +ANALYZE TABLE tbl_decimal UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_decimal histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_decimal; +Table Op Msg_type Msg_text +test.tbl_decimal analyze status OK +# Expect "4" in column "rows" +EXPLAIN SELECT * FROM tbl_decimal WHERE col1 > 1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_decimal NULL range col1 col1 31 NULL 4 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_decimal`.`col1` AS `col1` from `test`.`tbl_decimal` where (`test`.`tbl_decimal`.`col1` > 1.000000000000000000000000000000) +DROP TABLE tbl_decimal; +# +# ENUM column, singleton histogram, with key +# Note that we only support equality/inequality operators for ENUM +# columns. +# +CREATE TABLE tbl_enum (col1 ENUM('red', 'black', 'blue', 'green'), key(col1)); +INSERT INTO tbl_enum VALUES ('red'), ('red'), ('black'), ('blue'), ('green'), +('green'), (NULL), (NULL), (NULL); +ANALYZE TABLE tbl_enum UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_enum histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_enum; +Table Op Msg_type Msg_text +test.tbl_enum analyze status OK +# Expect "2" in column "rows" +EXPLAIN SELECT * FROM tbl_enum WHERE col1 = 'red'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_enum NULL ref col1 col1 2 const 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_enum`.`col1` AS `col1` from `test`.`tbl_enum` where (`test`.`tbl_enum`.`col1` = 'red') +DROP TABLE tbl_enum; +# +# SET column, singleton histogram, with key +# Note that we only support equality/inequality operators for SET +# columns. +# +CREATE TABLE tbl_set (col1 SET('red', 'black', 'blue', 'green'), key(col1)); +INSERT INTO tbl_set VALUES ('red'), ('red,black'), ('black,green,blue'), +('black,green,blue'), ('black,green,blue'), +('green'), ('green,red'), ('red,green'), (NULL), +(NULL), (NULL); +ANALYZE TABLE tbl_set UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_set histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_set; +Table Op Msg_type Msg_text +test.tbl_set analyze status OK +# Expect "2" in column "rows" +EXPLAIN SELECT * FROM tbl_set WHERE col1 = 'red,green'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_set NULL ref col1 col1 2 const 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_set`.`col1` AS `col1` from `test`.`tbl_set` where (`test`.`tbl_set`.`col1` = 'red,green') +DROP TABLE tbl_set; +# +# D6 prefix key with lob string +# +create table t0 (c0 double, c1 mediumtext, key(c1(10))); +insert into t0 VALUES (NULL, 62448600), (NULL, NULL), (NULL, 1880594341); +ANALYZE TABLE t0; +Table Op Msg_type Msg_text +test.t0 analyze status OK +ANALYZE TABLE t0 update histogram on c1; +Table Op Msg_type Msg_text +test.t0 histogram status Histogram statistics created for column 'c1'. +EXPLAIN SELECT t0.c0 AS ref0 FROM t0 WHERE ("Z1") < (t0.c1); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t0 NULL range c1 c1 43 NULL 2 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t0`.`c0` AS `ref0` from `test`.`t0` where ('Z1' < `test`.`t0`.`c1`) +DROP TABLE t0; +# +# D7 multiple columns +# +CREATE TABLE tbl_int (col1 INT, col2 INT, key(col1,col2)); +INSERT INTO tbl_int VALUES (1,1), (1,1), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (NULL,NULL), (NULL,NULL); +ANALYZE TABLE tbl_int; +Table Op Msg_type Msg_text +test.tbl_int analyze status OK +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_int histogram status Histogram statistics created for column 'col1'. +# Expect "2" in column "rows" (10 * 0.2 * 1) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 = 1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 10 const,const 2 100.00 Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1`,`test`.`tbl_int`.`col2` AS `col2` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col2` = 1) and (`test`.`tbl_int`.`col1` = 1)) +# Expect "2" in column "rows" (10 * 0.2 * 1 = 2, over-estimated) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 = 2; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 10 const,const 2 100.00 Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1`,`test`.`tbl_int`.`col2` AS `col2` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col2` = 2) and (`test`.`tbl_int`.`col1` = 1)) +DROP TABLE tbl_int; +CREATE TABLE tbl_int (col1 INT, col2 INT, key(col1,col2)); +INSERT INTO tbl_int VALUES (1,1), (1,2), (1,3), (1,4), (2,1), (2,2), (3,1), (3,2), (NULL,NULL), (NULL,NULL); +ANALYZE TABLE tbl_int; +Table Op Msg_type Msg_text +test.tbl_int analyze status OK +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_int histogram status Histogram statistics created for column 'col1'. +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col2 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_int histogram status Histogram statistics created for column 'col2'. +# Expect "1" in column "rows" (10 * 0.4 * max(0.3, 4/9) = 1.7778 = 1) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 = 1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL ref col1 col1 10 const,const 2 100.00 Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1`,`test`.`tbl_int`.`col2` AS `col2` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col2` = 1) and (`test`.`tbl_int`.`col1` = 1)) +# Expect "2" in column "rows" (10 * 0.4 * 0.5 = 2, under-estimated) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 >= 2; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 10 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1`,`test`.`tbl_int`.`col2` AS `col2` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` = 1) and (`test`.`tbl_int`.`col2` >= 2)) +DROP TABLE tbl_int; +CREATE TABLE customers (id BIGINT PRIMARY KEY, modified DATETIME, custinfo JSON); +INSERT INTO customers VALUES +(1, '2025-03-22 23:30:01', '{"user":"Jack","user_id":37,"zipcode":[94582,94536]}'), +(2, '2025-03-22 23:30:01', '{"user":"Jill","user_id":22,"zipcode":[94568,94507,94582]}'), +(3, '2025-03-22 23:30:19', '{"user":"Bob","user_id":31,"zipcode":[94477,94507]}'), +(4, '2025-03-22 23:30:19', '{"user":"Mary","user_id":72,"zipcode":[94536]}'), +(5, '2025-03-22 23:30:19', '{"user":"Ted","user_id":56,"zipcode":[94507,94582]}'); +ALTER TABLE customers ADD INDEX zips(modified, (CAST(custinfo->'$.zipcode' AS UNSIGNED ARRAY))); +ANALYZE TABLE customers; +Table Op Msg_type Msg_text +test.customers analyze status OK +ANALYZE TABLE customers UPDATE HISTOGRAM ON modified WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.customers histogram status Histogram statistics created for column 'modified'. +# Expect "2" in column "rows" (5 * 0.6 * 0.4 + 5 * 0.6 * 0.4) +EXPLAIN +SELECT * FROM customers +WHERE +modified = '2025-03-22 23:30:19' AND +JSON_CONTAINS(custinfo->'$.zipcode', CAST('[94507,94582]' AS JSON)); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE customers NULL range zips zips 15 NULL 2 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`customers`.`id` AS `id`,`test`.`customers`.`modified` AS `modified`,`test`.`customers`.`custinfo` AS `custinfo` from `test`.`customers` where ((`test`.`customers`.`modified` = TIMESTAMP'2025-03-22 23:30:19') and json_contains(cast(json_extract(`custinfo`,_utf8mb4'$.zipcode') as unsigned array),json'[94507, 94582]')) +DROP TABLE customers; +# col NULL, for D8 +CREATE TABLE tbl_int (col1 INT, key(col1)); +INSERT INTO tbl_int VALUES (1), (2), (3), (3), (3), (3), (7), (8), (NULL), (NULL); +ANALYZE TABLE tbl_int; +Table Op Msg_type Msg_text +test.tbl_int analyze status OK +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +Table Op Msg_type Msg_text +test.tbl_int histogram status Histogram statistics created for column 'col1'. +# +# D8 dive limit +# +SET range_estimation = USE_STATISTICS_EQ_LIMIT; +SET @saved_limit = @@SESSION.eq_range_index_dive_limit; +set eq_range_index_dive_limit = 2; +# Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 2, 3); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 6 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where (`test`.`tbl_int`.`col1` in (1,2,3)) +SET eq_range_index_dive_limit = @saved_limit; +SET SESSION DEBUG="-d,crash_records_in_range"; +# Expect "8" in column "rows" (6 + 2 = 8) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 2, 3) or col1 >= 5; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE tbl_int NULL range col1 col1 5 NULL 8 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`tbl_int`.`col1` AS `col1` from `test`.`tbl_int` where ((`test`.`tbl_int`.`col1` in (1,2,3)) or (`test`.`tbl_int`.`col1` >= 5)) +# col NULL, for D8 +DROP TABLE tbl_int; +SET SESSION range_estimation = @savmode; diff --git a/mysql-test/t/range_estimator.test b/mysql-test/t/range_estimator.test new file mode 100644 index 00000000000..894a6391334 --- /dev/null +++ b/mysql-test/t/range_estimator.test @@ -0,0 +1,300 @@ +-- source include/have_debug.inc + +# Some tests are borrowed from histogram_singleton.test, however, key is added +# to produce key range. +# +# The implementation uses the same template selectivity function as before, +# there is no need to repeat each test. Integer column, singleton histogram +# tests are almost sufficient. A few tests for other types should be added for +# coverage. +# +# Since estimating key ranges differs from estimating post-scan filter in many +# aspects, specific cases are added to verify the differences: +# +# id | aspect | key range est | post-scan filter est +# ----|------------------|-----------------------|-------------------- +# D1 | check column | "rows" | "filtered" +# D2 | zero defense | one row | minimal selectivity +# D3 | null range | inlined or standalone | standalone +# D4 | 2-endpoint | any form | between, not between +# D5 | descending index | reversed range | N/A +# D6 | # of columns | one or more | one +# D7 | lob string | prefix key | implicit truncation +# D8 | dive limit | sysvar | N/A + +SET @savmode=@@SESSION.range_estimation; + +SET SESSION DEBUG="+d,crash_records_in_range"; + +set range_estimation = USE_STATISTICS_ONLY; + +--echo # +--echo # Integer column, singleton histogram, with key asc +--echo # + +CREATE TABLE tbl_int (col1 INT, key(col1)); +INSERT INTO tbl_int VALUES (1), (2), (3), (4), (5), (6), (7), (8), (NULL), (NULL); +ANALYZE TABLE tbl_int; +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; + +--source include/range_estimator_basic.inc + +DROP TABLE tbl_int; + +--echo # +--echo # Integer column, singleton histogram, with key desc (D5) +--echo # + +CREATE TABLE tbl_int (col1 INT, key(col1 DESC)); +INSERT INTO tbl_int VALUES (1), (2), (3), (4), (5), (6), (7), (8), (NULL), (NULL); +ANALYZE TABLE tbl_int; +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; + +--source include/range_estimator_basic.inc + +DROP TABLE tbl_int; + +--echo # +--echo # D4 any form, col1 NOT NULL +--echo # + +# col1 NOT NULL +CREATE TABLE tbl_int (col1 INT NOT NULL, key(col1)); +INSERT INTO tbl_int VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +ANALYZE TABLE tbl_int; +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; + +--echo # Expect "7" in column "rows" (F9) +EXPLAIN SELECT * FROM tbl_int WHERE col1 < 8; + +--echo # Expect "8" in column "rows" (F10) +EXPLAIN SELECT * FROM tbl_int WHERE col1 <= 8; + +# col1 NOT NULL +DROP TABLE tbl_int; + +--echo # +--echo # Type tests for coverage +--echo # + +--echo # +--echo # String column, singleton histogram, with key +--echo # +CREATE TABLE tbl_varchar (col1 VARCHAR(255), key(col1)); +INSERT INTO tbl_varchar VALUES + ("abcd"), ("🍣"), ("🍺"), ("eeeeeeeeee"), ("ef"), ("AG"), + ("a very long string that is longer than 42 characters"), + ("lorem ipsum"), (NULL), (NULL); +ANALYZE TABLE tbl_varchar UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_varchar; + +--echo # Expect "3" in column "rows" +EXPLAIN SELECT * FROM tbl_varchar WHERE col1 > "b"; + +DROP TABLE tbl_varchar; + +--echo # +--echo # Double column, singleton histogram, with key +--echo # +CREATE TABLE tbl_double (col1 DOUBLE, key(col1)); +INSERT INTO tbl_double VALUES (-1.1), (0.0), (1.1), (2.2), (3.3), (4.4), (5.5), (6.6), (NULL), (NULL); +ANALYZE TABLE tbl_double UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_double; + +--echo # Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_double WHERE col1 > 0.0e0; + +DROP TABLE tbl_double; + +--echo # +--echo # Time column, singleton histogram, with key +--echo # +CREATE TABLE tbl_time (col1 TIME, key(col1)); +INSERT INTO tbl_time VALUES + ("-01:00:00"), ("00:00:00"), ("00:00:01"), ("00:01:00"), ("01:00:00"), + ("01:01:00"), ("02:00:00"), ("03:00:00"), (NULL), (NULL); +ANALYZE TABLE tbl_time UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_time; + +--echo # Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_time WHERE col1 > "00:00:00"; + +DROP TABLE tbl_time; + +--echo # +--echo # Date column, singleton histogram, with key +--echo # +CREATE TABLE tbl_date (col1 DATE, key(col1)); +INSERT INTO tbl_date VALUES + ("1000-01-02"), ("9999-12-30"), ("2017-01-01"), ("2017-01-02"), ("2017-02-01"), + ("2018-01-01"), ("2019-01-01"), ("3019-01-01"), (NULL), (NULL); +ANALYZE TABLE tbl_date UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_date; + +--echo # Expect "5" in column "rows" +EXPLAIN SELECT * FROM tbl_date WHERE col1 > "2017-01-02"; + +DROP TABLE tbl_date; + +--echo # +--echo # Datetime column, singleton histogram, with key +--echo # +CREATE TABLE tbl_datetime (col1 DATETIME(6), key(col1)); +INSERT INTO tbl_datetime VALUES + ("1000-01-01 00:00:01"), ("9999-12-31 23:59:59.999998"), + ("2017-01-01 00:00:00"), ("2017-01-01 00:00:00.000001"), + ("2017-02-01 00:00:00"), ("2018-01-01 00:00:00.999999"), + ("2018-01-01 00:00:01"), ("3019-01-01 10:10:10.101010"), (NULL), (NULL); +ANALYZE TABLE tbl_datetime UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_datetime; + +--echo # Expect "4" in column "rows" +EXPLAIN SELECT * FROM tbl_datetime WHERE col1 > "2018-01-01 00:00:00"; + +DROP TABLE tbl_datetime; + +--echo # +--echo # Decimal column, singleton histogram, with key +--echo # +CREATE TABLE tbl_decimal (col1 DECIMAL(65, 30), key(col1)); +INSERT INTO tbl_decimal VALUES + (00000000000000000000000000000000000.000000000000000000000000000000), + (99999999999999999999999999999999999.999999999999999999999999999998), + (-99999999999999999999999999999999999.999999999999999999999999999998), + (1), (2), (3), (4), (-1), (NULL), (NULL); +ANALYZE TABLE tbl_decimal UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_decimal; + +--echo # Expect "4" in column "rows" +EXPLAIN SELECT * FROM tbl_decimal WHERE col1 > 1; + +DROP TABLE tbl_decimal; + +--echo # +--echo # ENUM column, singleton histogram, with key +--echo # Note that we only support equality/inequality operators for ENUM +--echo # columns. +--echo # +CREATE TABLE tbl_enum (col1 ENUM('red', 'black', 'blue', 'green'), key(col1)); +INSERT INTO tbl_enum VALUES ('red'), ('red'), ('black'), ('blue'), ('green'), + ('green'), (NULL), (NULL), (NULL); +ANALYZE TABLE tbl_enum UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_enum; + +--echo # Expect "2" in column "rows" +EXPLAIN SELECT * FROM tbl_enum WHERE col1 = 'red'; + +DROP TABLE tbl_enum; + +--echo # +--echo # SET column, singleton histogram, with key +--echo # Note that we only support equality/inequality operators for SET +--echo # columns. +--echo # +CREATE TABLE tbl_set (col1 SET('red', 'black', 'blue', 'green'), key(col1)); +INSERT INTO tbl_set VALUES ('red'), ('red,black'), ('black,green,blue'), + ('black,green,blue'), ('black,green,blue'), + ('green'), ('green,red'), ('red,green'), (NULL), + (NULL), (NULL); +ANALYZE TABLE tbl_set UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_set; + +--echo # Expect "2" in column "rows" +EXPLAIN SELECT * FROM tbl_set WHERE col1 = 'red,green'; + +DROP TABLE tbl_set; + +--echo # +--echo # D6 prefix key with lob string +--echo # + +create table t0 (c0 double, c1 mediumtext, key(c1(10))); +insert into t0 VALUES (NULL, 62448600), (NULL, NULL), (NULL, 1880594341); +ANALYZE TABLE t0; +ANALYZE TABLE t0 update histogram on c1; +EXPLAIN SELECT t0.c0 AS ref0 FROM t0 WHERE ("Z1") < (t0.c1); +DROP TABLE t0; + +--echo # +--echo # D7 multiple columns +--echo # + +# A multiple column index with only leading histogram +CREATE TABLE tbl_int (col1 INT, col2 INT, key(col1,col2)); +INSERT INTO tbl_int VALUES (1,1), (1,1), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (NULL,NULL), (NULL,NULL); +ANALYZE TABLE tbl_int; +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; + +--echo # Expect "2" in column "rows" (10 * 0.2 * 1) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 = 1; + +--echo # Expect "2" in column "rows" (10 * 0.2 * 1 = 2, over-estimated) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 = 2; + +DROP TABLE tbl_int; + +# A multiple column index with full histograms +CREATE TABLE tbl_int (col1 INT, col2 INT, key(col1,col2)); +INSERT INTO tbl_int VALUES (1,1), (1,2), (1,3), (1,4), (2,1), (2,2), (3,1), (3,2), (NULL,NULL), (NULL,NULL); +ANALYZE TABLE tbl_int; +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col2 WITH 10 BUCKETS; + +--echo # Expect "1" in column "rows" (10 * 0.4 * max(0.3, 4/9) = 1.7778 = 1) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 = 1; + +--echo # Expect "2" in column "rows" (10 * 0.4 * 0.5 = 2, under-estimated) +EXPLAIN SELECT * FROM tbl_int WHERE col1 = 1 and col2 >= 2; + +DROP TABLE tbl_int; + +# A multiple column index with multi-valued key part, for coverage. +CREATE TABLE customers (id BIGINT PRIMARY KEY, modified DATETIME, custinfo JSON); +INSERT INTO customers VALUES + (1, '2025-03-22 23:30:01', '{"user":"Jack","user_id":37,"zipcode":[94582,94536]}'), + (2, '2025-03-22 23:30:01', '{"user":"Jill","user_id":22,"zipcode":[94568,94507,94582]}'), + (3, '2025-03-22 23:30:19', '{"user":"Bob","user_id":31,"zipcode":[94477,94507]}'), + (4, '2025-03-22 23:30:19', '{"user":"Mary","user_id":72,"zipcode":[94536]}'), + (5, '2025-03-22 23:30:19', '{"user":"Ted","user_id":56,"zipcode":[94507,94582]}'); +ALTER TABLE customers ADD INDEX zips(modified, (CAST(custinfo->'$.zipcode' AS UNSIGNED ARRAY))); +ANALYZE TABLE customers; +ANALYZE TABLE customers UPDATE HISTOGRAM ON modified WITH 10 BUCKETS; + +--echo # Expect "2" in column "rows" (5 * 0.6 * 0.4 + 5 * 0.6 * 0.4) +EXPLAIN +SELECT * FROM customers +WHERE + modified = '2025-03-22 23:30:19' AND + JSON_CONTAINS(custinfo->'$.zipcode', CAST('[94507,94582]' AS JSON)); + +DROP TABLE customers; + +--echo # col NULL, for D8 +CREATE TABLE tbl_int (col1 INT, key(col1)); +INSERT INTO tbl_int VALUES (1), (2), (3), (3), (3), (3), (7), (8), (NULL), (NULL); +ANALYZE TABLE tbl_int; +ANALYZE TABLE tbl_int UPDATE HISTOGRAM ON col1 WITH 10 BUCKETS; + +--echo # +--echo # D8 dive limit +--echo # + +SET range_estimation = USE_STATISTICS_EQ_LIMIT; + +SET @saved_limit = @@SESSION.eq_range_index_dive_limit; +set eq_range_index_dive_limit = 2; + +--echo # Expect "6" in column "rows" +EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 2, 3); + +SET eq_range_index_dive_limit = @saved_limit; + +SET SESSION DEBUG="-d,crash_records_in_range"; + +--echo # Expect "8" in column "rows" (6 + 2 = 8) +EXPLAIN SELECT * FROM tbl_int WHERE col1 IN (1, 2, 3) or col1 >= 5; + +--echo # col NULL, for D8 +DROP TABLE tbl_int; + +SET SESSION range_estimation = @savmode; diff --git a/sql/range_optimizer/index_range_scan_plan.cc b/sql/range_optimizer/index_range_scan_plan.cc index 956ec7d9fcd..bef070acf95 100644 --- a/sql/range_optimizer/index_range_scan_plan.cc +++ b/sql/range_optimizer/index_range_scan_plan.cc @@ -242,7 +242,8 @@ void Sel_arg_range_sequence::stack_push_range(SEL_ARG *key_tree) { min/max key flags from the predicate we're about to add to stack[0]. */ - push_position->min_key_flag = key_tree->get_min_flag(); + push_position->min_key_flag = key_tree->get_min_flag() | + (!key_tree->is_ascending * DESC_FLAG); push_position->max_key_flag = key_tree->get_max_flag(); push_position->rkey_func_flag = key_tree->rkey_func_flag; } else { @@ -626,6 +627,7 @@ ha_rows check_quick_select(THD *thd, RANGE_OPT_PARAM *param, uint idx, uint range_count = 0; param->use_index_statistics = eq_ranges_exceeds_limit( tree, &range_count, thd->variables.eq_range_index_dive_limit); + param->range_estimation = thd->variables.range_estimation; *is_imerge_scan = true; *is_ror_scan = !(file->index_flags(keynr, 0, true) & HA_KEY_SCAN_NOT_ROR); diff --git a/sql/range_optimizer/range_estimator.cc b/sql/range_optimizer/range_estimator.cc index 23dc3b21d65..aea232ec47c 100644 --- a/sql/range_optimizer/range_estimator.cc +++ b/sql/range_optimizer/range_estimator.cc @@ -24,6 +24,7 @@ #include "my_base.h" #include "my_dbug.h" #include "sql/handler.h" +#include "sql/histograms/histogram.h" // Histogram #include "sql/range_optimizer/range_opt_param.h" #include "sql/sql_select.h" // actual_key_parts #include "sql/table.h" @@ -36,6 +37,26 @@ void init_range_settings(Range_settings &settings, const RANGE_OPT_PARAM ¶m, settings.eq_range = param.use_index_statistics; settings.strict_range = false; settings.use_index_statistics = param.use_index_statistics; + + switch (param.range_estimation) { + case ESTIMATE_USE_STATISTICS_EQ_LIMIT: + if (settings.eq_range) { + settings.use_histogram = true; + } + break; + case ESTIMATE_USE_STATISTICS: + settings.eq_range = true; + settings.strict_range = true; + settings.use_histogram = true; + break; + case ESTIMATE_USE_STATISTICS_ONLY: + settings.skip = true; + settings.eq_range = true; + settings.strict_range = true; + settings.use_histogram = true; + default: + break; + } } void Range_estimator::init(TABLE *tbl) { @@ -44,6 +65,15 @@ void Range_estimator::init(TABLE *tbl) { void Range_estimator::use_index(uint kr) { keyno = kr; + + const KEY *key_info = &table->key_info[keyno]; + assert(actual_key_parts(key_info) <= histogram_array.size()); + histogram_array.fill(nullptr); + for (uint part = 0; part < actual_key_parts(key_info); part++) { + Field *fld = key_info->key_part[part].field; + histogram_array[part] = table->find_histogram(fld->field_index()); + } + } void Range_estimator::decide_estimation_method(const Range_settings &settings, @@ -67,6 +97,10 @@ Range_estimator::check_statistics(const Range_settings &settings, "x IS NULL" may have more than 1 matching row. */ estimator = ONE_ROW_ESTIMATOR; + } else if ((settings.skip || settings.eq_range || settings.strict_range) && + settings.use_histogram && + check_histogram_statistics(range)) { + estimator = HISTOGRAM_ESTIMATOR; } else if (settings.eq_range && settings.use_index_statistics && check_index_statistics(range)) { /* @@ -97,11 +131,18 @@ Range_estimator::check_statistics(const Range_settings &settings, ha_rows Range_estimator::records_in_range(KEY_MULTI_RANGE &range) { assert(range.range_flag & SKIP_RECORDS_IN_RANGE); + double selectivity = 1.0; + ha_rows rows = HA_POS_ERROR; switch (selected_estimator) { case EQ_RANGE_ESTIMATOR: return estimate_by_index_statistics(range); case ONE_ROW_ESTIMATOR: return 1; + case HISTOGRAM_ESTIMATOR: + if (!get_range_selectivity(range, selectivity)) { + rows = std::max(1.0, std::round(table->file->stats.records * selectivity)); + } + return rows; default: assert(0); return HA_POS_ERROR; @@ -119,3 +160,121 @@ ha_rows Range_estimator::estimate_by_index_statistics(KEY_MULTI_RANGE &) { return static_cast( table->key_info[keyno].records_per_key(keyparts_used - 1)); } + +bool Range_estimator::check_histogram_statistics(KEY_MULTI_RANGE &range) { + /* + The histogram estimator is ready only when all are met: + + 1) It is not a geometry type, because there is no supportive statistics. + 2) Minimal statistics are ready. + */ + return + !((range.range_flag & GEOM_FLAG) || // 1) + !histogram_array[0]); // 2) +} + +bool Range_estimator::get_range_selectivity(const KEY_MULTI_RANGE &range, + double &selectivity) { + assert(!(range.range_flag & (GEOM_FLAG | SKIP_RANGE))); + selectivity = 1.0; + + uint fld_offset = 0; + key_part_map part_map = + (range.start_key.keypart_map | range.end_key.keypart_map); + + const KEY *key_info = &table->key_info[keyno]; + bool prev_is_null = false; + for (uint part = 0; part < actual_key_parts(key_info); part++) { + Field *fld = key_info->key_part[part].field; + if (fld->is_array()) { + /* + A Field_typed_array is translated into multiple equality min/max ranges, + one per each array element. It's accessed through the conversion field. + */ + fld = down_cast(fld)->get_conv_field(); + } + if (!(part_map & (1 << part))) break; + + /* + For a range supported by a given key, + (1) All parts except for the last are equal parts, + (2) The combined range flag effectively reflects only the last part, + which can be either equal or range. + */ + uint flag = + (part_map & (1 << (part + 1))) ? EQ_RANGE : range.range_flag; + assert(!(flag & EQ_RANGE) || + !(flag & (NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | NEAR_MAX)) && + !memcmp(range.start_key.key + fld_offset, + range.end_key.key + fld_offset, + key_info->key_part[part].store_length)); + if ((flag & EQ_RANGE) && fld->is_nullable() && *range.start_key.key) + flag |= NULL_RANGE; + + const histograms::Histogram *histogram = histogram_array[part]; + if (!histogram && part == 0) { + // Histogram for the first key part is required. + assert(0); + return true; + } + + bool h_sel = false; + double histogram_selectivity = 1.0; + if (histogram) { + if (histogram->get_raw_range_selectivity( + fld, flag, range.start_key.key + fld_offset, + range.end_key.key + fld_offset, &histogram_selectivity)) + return true; + + h_sel = true; + if (part == 0) { + selectivity = histogram_selectivity; + } + } + + if (part > 0) { + bool rpk_sel = false; + double rpk_selectivity = 1.0; + + /* + Ranges of the form "x IS NULL" will not use index statistics + because the number of rows with this value are likely to be + very different than the values in the index statistics. + */ + if ((flag & EQ_RANGE) && !(flag & NULL_RANGE) && !prev_is_null && + key_info->has_records_per_key(part) && + key_info->has_records_per_key(part - 1)) { + rpk_sel = true; + rpk_selectivity = key_info->records_per_key(part) / + key_info->records_per_key(part - 1); + } + + bool has_f = false; + double f = 0; + // TODO add correlation + + if (unlikely(h_sel && has_f)) { + // formula-1, explcit f + selectivity = + f * selectivity + (1 - f) * selectivity * histogram_selectivity; + } else if (rpk_sel && + (!h_sel || + rpk_selectivity > histogram_selectivity)) { // (*) + // formula-2 + selectivity *= rpk_selectivity; + } else if (h_sel) { + // formula-1, f = 0 (unavailable) + selectivity *= histogram_selectivity; + } else { + // formula-1, f = 0, use magic number + selectivity *= + (flag & EQ_RANGE) ? COND_FILTER_EQUALITY : COND_FILTER_INEQUALITY; + } + } + + prev_is_null = (flag & NULL_RANGE); + fld_offset += key_info->key_part[part].store_length; + } + + return false; +} diff --git a/sql/range_optimizer/range_estimator.h b/sql/range_optimizer/range_estimator.h index dbdb820f9c6..2b552ae3ce1 100644 --- a/sql/range_optimizer/range_estimator.h +++ b/sql/range_optimizer/range_estimator.h @@ -28,6 +28,9 @@ class RANGE_OPT_PARAM; class TABLE; +namespace histograms { + class Histogram; +} /// Range estimator types. enum enum_range_estimator { @@ -40,6 +43,10 @@ enum enum_range_estimator { An estimator for equality non-null range. */ EQ_RANGE_ESTIMATOR, + /** + An estimator for any range. + */ + HISTOGRAM_ESTIMATOR, NUM_ESTIMATORS }; @@ -55,6 +62,8 @@ struct Range_settings { /// True: use index statistics for range estimation. bool use_index_statistics; + /// True: use histogram for range estimation. + bool use_histogram{false}; }; /** @@ -121,6 +130,46 @@ class Range_estimator { bool check_index_statistics(KEY_MULTI_RANGE &range); ha_rows estimate_by_index_statistics(KEY_MULTI_RANGE &range); + bool check_histogram_statistics(KEY_MULTI_RANGE &range); + /** + Get selectivity of a key range, based on pure statistics. + + Taking correlation into account gets better joint selectivity, which + is bigger than one assuming independency. Correlation could be + explicitly provided or implicitly represented in record_per_key. + + Typically the joint selectivity is obtained by formula-1: + + P(ab) = f * P(a) + (1 - f) * P(a) * P(b) + + With record_per_key, given that + + records_per_key[i] = rows * P(a) + records_per_key[i+1] = rows * P(ab) + + We have formula-2: + + P(ab) = P(a) * records_per_key[i+1] / records_per_key[i] + = P(a) * rpk_selectivity + + Note that although a histogram with explicit correlation factor (f) is + usually the better choice, f is not always available, while + rpk_selectivity is almost ready. + + Besides, we have an observation: + + (*) If f = 0 and rpk_selectivity > P(b), correlation should play a + significant role, consequently formula-2 should be chosen. + + @param range The range + @param[out] selectivity the calculated selectivity + + @retval false Success + @retval true Error + */ + bool get_range_selectivity(const KEY_MULTI_RANGE &range, + double &selectivity); + // The TABLE as statistics provider. const TABLE *table; @@ -130,6 +179,12 @@ class Range_estimator { // Key parts used for the current range. int keyparts_used; + static constexpr size_t MAX_KEY_PARTS = sizeof(key_part_map) * 8; + using Histogram_array = + std::array; + // A cache of histograms for key parts. + Histogram_array histogram_array; + // Selected estimator. enum enum_range_estimator selected_estimator; }; diff --git a/sql/range_optimizer/range_opt_param.h b/sql/range_optimizer/range_opt_param.h index a6d86a447db..e49697a3a01 100644 --- a/sql/range_optimizer/range_opt_param.h +++ b/sql/range_optimizer/range_opt_param.h @@ -75,6 +75,9 @@ class RANGE_OPT_PARAM { */ bool use_index_statistics; + /// How histogram is used for range estimation. @sa enum_range_estimation. + uint range_estimation; + /// Error handler for this param. Range_optimizer_error_handler error_handler; diff --git a/sql/range_optimizer/range_optimizer.cc b/sql/range_optimizer/range_optimizer.cc index 5d96eb1a954..1a5a84997fb 100644 --- a/sql/range_optimizer/range_optimizer.cc +++ b/sql/range_optimizer/range_optimizer.cc @@ -310,6 +310,7 @@ bool setup_range_optimizer_param(THD *thd, MEM_ROOT *return_mem_root, param->temp_mem_root = temp_mem_root; param->using_real_indexes = true; param->use_index_statistics = false; + param->range_estimation = ESTIMATE_COMPATIBLE; temp_mem_root->set_max_capacity(thd->variables.range_optimizer_max_mem_size); temp_mem_root->set_error_for_capacity_exceeded(true); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 7bc71357022..39bf763f986 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -7558,6 +7558,21 @@ static Sys_var_charptr Sys_debug_set_operations_secondary_overflow_at( NOT_IN_BINLOG, ON_CHECK(nullptr), ON_UPDATE(nullptr)); #endif +static const char *range_estimation_policy[] = + {"COMPATIBLE", "USE_STATISTICS_EQ_LIMIT", "USE_STATISTICS", + "USE_STATISTICS_ONLY", NullS}; +static Sys_var_enum Sys_range_estimation( + "range_estimation", + "Set key range estimation policy. COMPATIBLE - estimate as before. " + "USE_STATISTICS_EQ_LIMIT - use histogram, if available, rather than index " + "statistics, when eq_range_index_dive_limit is effective. USE_STATISTICS - " + "use statistics, if any is available, rather than index dive. " + "USE_STATISTICS_ONLY - avoid sampling totally even when there is no proper " + "statistics.", + HINT_UPDATEABLE SESSION_VAR(range_estimation), CMD_LINE(REQUIRED_ARG), + range_estimation_policy, DEFAULT(ESTIMATE_USE_STATISTICS_EQ_LIMIT), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(nullptr), ON_UPDATE(nullptr)); + /** Warn usage of restrict_fk_on_non_standard_key variable. When it is set to false, warning should include usage of non std keys may break replication diff --git a/sql/system_variables.h b/sql/system_variables.h index bbe74d85809..07bf3c9110d 100644 --- a/sql/system_variables.h +++ b/sql/system_variables.h @@ -95,6 +95,13 @@ enum use_secondary_engine { SECONDARY_ENGINE_FORCED = 2 }; +enum enum_range_estimation { + ESTIMATE_COMPATIBLE = 0, + ESTIMATE_USE_STATISTICS_EQ_LIMIT = 1, + ESTIMATE_USE_STATISTICS = 2, + ESTIMATE_USE_STATISTICS_ONLY = 3, +}; + /** Values for explain_format sysvar. @@ -514,6 +521,9 @@ struct System_variables { @sa Sys_restrict_fk_on_non_standard_key */ bool restrict_fk_on_non_standard_key; + + /// @sa Sys_range_estimation + ulong range_estimation; }; static_assert(std::is_trivially_copyable::value);