diff --git a/mysql-test/r/range_in_oversized_value.result b/mysql-test/r/range_in_oversized_value.result new file mode 100644 index 00000000000..4f029f28078 --- /dev/null +++ b/mysql-test/r/range_in_oversized_value.result @@ -0,0 +1,384 @@ +# +# Bug#118009: Full table scan instead of index range scan when IN() +# contains a value longer than VARCHAR column length +# +# Also covers Bug#118486 (same root cause) and Bug#119770 +# (prefix index variant). +# +# --------------------------------------------------------------- +# Test 1: Basic case - IN() with oversized value on PK +# --------------------------------------------------------------- +CREATE TABLE t1 ( +pk VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t1 VALUES ('abc'), ('def'), ('ghi'), ('jkl'), ('mno'); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +# Must use range/index scan, not full table scan +EXPLAIN SELECT * FROM t1 +WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range PRIMARY PRIMARY 66 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where (`test`.`t1`.`pk` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t1 +WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +pk +abc +def +# Single oversized value - should not cause errors +EXPLAIN SELECT * FROM t1 +WHERE pk IN ('this_value_is_way_too_long_for_varchar16'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL NULL no matching row in const table +Warnings: +Note 1003 /* select#1 */ select NULL AS `pk` from `test`.`t1` where multiple equal('this_value_is_way_too_long_for_varchar16', NULL) +# All oversized values +EXPLAIN SELECT * FROM t1 +WHERE pk IN ('this_value_is_way_too_long_for_varchar16', +'another_oversized_value_here_xxxx'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL index PRIMARY PRIMARY 66 NULL 5 40.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where (`test`.`t1`.`pk` in ('this_value_is_way_too_long_for_varchar16','another_oversized_value_here_xxxx')) +DROP TABLE t1; +# --------------------------------------------------------------- +# Test 2: Bug#35169384 non-regression - collation contractions +# --------------------------------------------------------------- +# In utf8mb4_0900_ai_ci: 'ae'='æ', 'ss'='ß', SOFT HYPHEN is ignored. +# These results MUST be correct (not regressed by the fix). +CREATE TABLE t2 (x VARCHAR(1), KEY (x)) +DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t2 VALUES ('a'), ('b'), ('æ'), ('ß'), ('s'); +INSERT INTO t2 SELECT 'x' FROM t2; +INSERT INTO t2 SELECT 'x' FROM t2; +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +# 'ae' matches 'æ', 'ss' matches 'ß', SOFT_HYPHEN+'b' matches 'b' +# Expected: b, ß, æ +EXPLAIN SELECT * FROM t2 +WHERE x IN ('ae', 'ss', CONCAT(_utf16 x'00AD', 'b')); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 NULL index x x 7 NULL 20 30.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t2`.`x` AS `x` from `test`.`t2` where (`test`.`t2`.`x` in ('ae','ss',(concat('­','b')))) +SELECT * FROM t2 +WHERE x IN ('ae', 'ss', CONCAT(_utf16 x'00AD', 'b')); +x +b +ß +æ +# Range predicates must still be abandoned (not just EQ_FUNC) +# x <= 'ae' should match 'a' and 'æ' (not just 'a') +EXPLAIN SELECT * FROM t2 WHERE x <= 'ae'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 NULL index x x 7 NULL 20 33.33 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t2`.`x` AS `x` from `test`.`t2` where (`test`.`t2`.`x` <= 'ae') +SELECT * FROM t2 WHERE x <= 'ae'; +x +a +æ +# x BETWEEN 'r' AND 'ss' should match 's' and 'ß' +EXPLAIN SELECT * FROM t2 WHERE x BETWEEN 'r' AND 'ss'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 NULL range x x 7 NULL 17 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t2`.`x` AS `x` from `test`.`t2` where (`test`.`t2`.`x` between 'r' and 'ss') +SELECT * FROM t2 WHERE x BETWEEN 'r' AND 'ss'; +x +s +ß +# Mixed: valid value + oversized contraction value on small column +# Known limitation: 'æ' is missed because 'ae' (2 chars) exceeds +# VARCHAR(1) and is skipped, leaving only range for 'a'. The sort key +# for 'æ' differs from 'a' so it's not in the range. The filter can't +# recheck rows that were never scanned. This matches 8.0 behavior +# (Bug#35169384) and is acceptable for the Bug#118009 fix. +# Expected: a (known: æ is missed in this mixed contraction case) +EXPLAIN SELECT * FROM t2 WHERE x IN ('a', 'ae'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 NULL range x x 7 NULL 1 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t2`.`x` AS `x` from `test`.`t2` where (`test`.`t2`.`x` in ('a','ae')) +SELECT * FROM t2 WHERE x IN ('a', 'ae'); +x +a +DROP TABLE t2; +# --------------------------------------------------------------- +# Test 3: Oversized IN() value on secondary index +# --------------------------------------------------------------- +CREATE TABLE t3 ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +c VARCHAR(16) NOT NULL, +KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t3 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t3; +Table Op Msg_type Msg_text +test.t3 analyze status OK +# Must use index range scan on idx_c, not full table scan +EXPLAIN SELECT * FROM t3 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 NULL range idx_c idx_c 66 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t3`.`id` AS `id`,`test`.`t3`.`c` AS `c` from `test`.`t3` where (`test`.`t3`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t3 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id c +1 abc +2 def +DROP TABLE t3; +# --------------------------------------------------------------- +# Test 4: Binary collation not affected (already works correctly) +# --------------------------------------------------------------- +CREATE TABLE t4 ( +c VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +INSERT INTO t4 VALUES ('abc'), ('def'); +ANALYZE TABLE t4; +Table Op Msg_type Msg_text +test.t4 analyze status OK +# Binary collation: should use range scan (already worked before fix) +EXPLAIN SELECT * FROM t4 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t4 NULL index PRIMARY PRIMARY 66 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t4`.`c` AS `c` from `test`.`t4` where (`test`.`t4`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t4 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +c +abc +def +DROP TABLE t4; +# --------------------------------------------------------------- +# Test 5: Nullable column +# --------------------------------------------------------------- +CREATE TABLE t5 ( +c VARCHAR(16), +KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t5 VALUES ('abc'), ('def'), (NULL); +ANALYZE TABLE t5; +Table Op Msg_type Msg_text +test.t5 analyze status OK +EXPLAIN SELECT * FROM t5 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t5 NULL range idx_c idx_c 67 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t5`.`c` AS `c` from `test`.`t5` where (`test`.`t5`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t5 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +c +abc +def +DROP TABLE t5; +# --------------------------------------------------------------- +# Test 6: Multiple oversized values mixed with normal values +# --------------------------------------------------------------- +CREATE TABLE t6 ( +c VARCHAR(8) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t6 VALUES ('aaa'), ('bbb'), ('ccc'), ('ddd'), +('eee'), ('fff'), ('ggg'), ('hhh'), ('iii'), ('jjj'), +('kkk'), ('lll'), ('mmm'), ('nnn'), ('ooo'), ('ppp'); +ANALYZE TABLE t6; +Table Op Msg_type Msg_text +test.t6 analyze status OK +# Two normal + two oversized: must still find the normal values +EXPLAIN SELECT * FROM t6 +WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t6 NULL range PRIMARY PRIMARY 34 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t6`.`c` AS `c` from `test`.`t6` where (`test`.`t6`.`c` in ('aaa','oversized_value_one','ccc','oversized_value_two')) +SELECT * FROM t6 +WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); +c +aaa +ccc +DROP TABLE t6; +# --------------------------------------------------------------- +# Test 7: Prefix index with values exceeding column length +# --------------------------------------------------------------- +CREATE TABLE t7 ( +id BIGINT PRIMARY KEY AUTO_INCREMENT, +c VARCHAR(16) NOT NULL, +KEY idx_c_prefix (c(8)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t7 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t7; +Table Op Msg_type Msg_text +test.t7 analyze status OK +# 'abc' fits, oversized value exceeds VARCHAR(16) - must use range +EXPLAIN SELECT * FROM t7 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t7 NULL range idx_c_prefix idx_c_prefix 34 NULL 3 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t7`.`id` AS `id`,`test`.`t7`.`c` AS `c` from `test`.`t7` where (`test`.`t7`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t7 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id c +1 abc +2 def +DROP TABLE t7; +# --------------------------------------------------------------- +# Test 8: Bug#119770 - Prefix index, values exceed prefix but fit +# in the column. Must use range scan on the prefix index. +# --------------------------------------------------------------- +CREATE TABLE t8 ( +id BIGINT PRIMARY KEY AUTO_INCREMENT, +uniq_name VARCHAR(64) NOT NULL, +KEY idx_prefix (uniq_name(10)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t8 (uniq_name) VALUES +('short'), ('another'), ('testval'), +('abcdefghijklmnopqrstuvwxyz0123456789'); +ANALYZE TABLE t8; +Table Op Msg_type Msg_text +test.t8 analyze status OK +# Values exceed prefix(10) but fit in VARCHAR(64) - must use range +EXPLAIN SELECT * FROM t8 +WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t8 NULL range idx_prefix idx_prefix 42 NULL 3 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t8`.`id` AS `id`,`test`.`t8`.`uniq_name` AS `uniq_name` from `test`.`t8` where (`test`.`t8`.`uniq_name` in ('short','abcdefghijklmnopqrstuvwxyz0123456789','another')) +SELECT * FROM t8 +WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); +id uniq_name +1 short +2 another +4 abcdefghijklmnopqrstuvwxyz0123456789 +DROP TABLE t8; +# --------------------------------------------------------------- +# Test 9: Bug#119867 - OR across different indexed columns with +# oversized value must preserve index merge optimization. +# --------------------------------------------------------------- +CREATE TABLE t9 ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +c1 VARCHAR(2), +c2 VARCHAR(2), +c3 VARCHAR(2), +KEY idx_c1 (c1), +KEY idx_c2 (c2), +KEY idx_c3 (c3) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t9 (c1, c2, c3) VALUES +('a','x','x'),('b','y','y'),('c','z','z'), +('d','a','a'),('e','b','b'),('f','c','c'), +('g','d','d'),('h','e','e'),('i','f','f'), +('j','g','g'),('k','h','h'),('l','i','i'); +ANALYZE TABLE t9; +Table Op Msg_type Msg_text +test.t9 analyze status OK +# Must use index_merge, not full table scan +EXPLAIN SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t9 NULL index_merge idx_c1,idx_c2,idx_c3 idx_c1,idx_c2 11,11 NULL 2 100.00 Using union(idx_c1,idx_c2); Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t9`.`id` AS `id`,`test`.`t9`.`c1` AS `c1`,`test`.`t9`.`c2` AS `c2`,`test`.`t9`.`c3` AS `c3` from `test`.`t9` where ((`test`.`t9`.`c1` = 'a') or (`test`.`t9`.`c2` = 'b') or (`test`.`t9`.`c3` = 'abc')) +# Oversized branch first: must still use index_merge +EXPLAIN SELECT * FROM t9 WHERE c3 = 'abc' OR c1 = 'a' OR c2 = 'b'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t9 NULL index_merge idx_c1,idx_c2,idx_c3 idx_c1,idx_c2 11,11 NULL 2 100.00 Using union(idx_c1,idx_c2); Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t9`.`id` AS `id`,`test`.`t9`.`c1` AS `c1`,`test`.`t9`.`c2` AS `c2`,`test`.`t9`.`c3` AS `c3` from `test`.`t9` where ((`test`.`t9`.`c3` = 'abc') or (`test`.`t9`.`c1` = 'a') or (`test`.`t9`.`c2` = 'b')) +# Verify correct results (only c1='a' and c2='b' match) +SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; +id c1 c2 c3 +1 a x x +5 e b b +DROP TABLE t9; +# --------------------------------------------------------------- +# Test 10: Bug#119867 - OR with multi-field multiple equality +# (equality propagation creates MULT_EQUAL across columns) +# --------------------------------------------------------------- +CREATE TABLE t10 ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +c1 VARCHAR(2), +c2 VARCHAR(2), +c3 VARCHAR(2), +c4 VARCHAR(2), +KEY idx_c1 (c1), +KEY idx_c2 (c2), +KEY idx_c3 (c3) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t10 (c1, c2, c3, c4) VALUES +('a','x','x','x'),('b','y','y','y'),('c','z','z','z'), +('d','a','a','a'),('e','b','b','b'),('f','c','c','c'), +('g','d','d','d'),('h','e','e','e'),('i','f','f','f'), +('j','g','g','g'),('k','h','h','h'),('l','i','i','i'); +ANALYZE TABLE t10; +Table Op Msg_type Msg_text +test.t10 analyze status OK +# c4 = 'abc' AND c3 = c4 creates MULT_EQUAL('abc', c3, c4). +# 'abc' is oversized for both VARCHAR(2) columns. +# Must use index_merge on the non-oversized branches. +EXPLAIN SELECT * FROM t10 WHERE c1 = 'a' OR c2 = 'b' OR (c4 = 'abc' AND c3 = c4); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t10 NULL index_merge idx_c1,idx_c2,idx_c3 idx_c1,idx_c2 11,11 NULL 2 100.00 Using union(idx_c1,idx_c2); Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t10`.`id` AS `id`,`test`.`t10`.`c1` AS `c1`,`test`.`t10`.`c2` AS `c2`,`test`.`t10`.`c3` AS `c3`,`test`.`t10`.`c4` AS `c4` from `test`.`t10` where ((`test`.`t10`.`c1` = 'a') or (`test`.`t10`.`c2` = 'b') or ((`test`.`t10`.`c4` = 'abc') and (`test`.`t10`.`c3` = 'abc'))) +# Verify correct results (only c1='a' and c2='b' match) +SELECT * FROM t10 WHERE c1 = 'a' OR c2 = 'b' OR (c4 = 'abc' AND c3 = c4); +id c1 c2 c3 c4 +1 a x x x +5 e b b b +DROP TABLE t10; +# --------------------------------------------------------------- +# Test 11: View expansion - oversized detection works through +# views (conditions go through constant propagation). +# --------------------------------------------------------------- +CREATE TABLE t11 ( +pk VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t11 VALUES ('abc'), ('def'), ('ghi'), ('jkl'), ('mno'); +ANALYZE TABLE t11; +Table Op Msg_type Msg_text +test.t11 analyze status OK +CREATE VIEW v11 AS SELECT * FROM t11; +# Through view: must use range scan, not full table scan +EXPLAIN SELECT * FROM v11 +WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t11 NULL range PRIMARY PRIMARY 66 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t11`.`pk` AS `pk` from `test`.`t11` where (`test`.`t11`.`pk` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM v11 +WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +pk +abc +def +# View with OR and oversized equality (Bug#119867 path) +CREATE TABLE t11b ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +c1 VARCHAR(2), +c2 VARCHAR(2), +KEY idx_c1 (c1), +KEY idx_c2 (c2) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t11b (c1, c2) VALUES +('a','x'),('b','y'),('c','z'),('d','a'),('e','b'),('f','c'), +('g','d'),('h','e'),('i','f'),('j','g'),('k','h'),('l','i'); +ANALYZE TABLE t11b; +Table Op Msg_type Msg_text +test.t11b analyze status OK +CREATE VIEW v11b AS SELECT * FROM t11b; +# Through view: must use index_merge, not full table scan +EXPLAIN SELECT * FROM v11b WHERE c1 = 'a' OR c2 = 'abc'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t11b NULL range idx_c1,idx_c2 idx_c1 11 NULL 1 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t11b`.`id` AS `id`,`test`.`t11b`.`c1` AS `c1`,`test`.`t11b`.`c2` AS `c2` from `test`.`t11b` where ((`test`.`t11b`.`c1` = 'a') or (`test`.`t11b`.`c2` = 'abc')) +SELECT * FROM v11b WHERE c1 = 'a' OR c2 = 'abc'; +id c1 c2 +1 a x +DROP VIEW v11, v11b; +DROP TABLE t11, t11b; diff --git a/mysql-test/t/range_in_oversized_value.test b/mysql-test/t/range_in_oversized_value.test new file mode 100644 index 00000000000..3c43f7d264a --- /dev/null +++ b/mysql-test/t/range_in_oversized_value.test @@ -0,0 +1,341 @@ +--source include/elide_costs.inc + +--echo # +--echo # Bug#118009: Full table scan instead of index range scan when IN() +--echo # contains a value longer than VARCHAR column length +--echo # +--echo # Also covers Bug#118486 (same root cause) and Bug#119770 +--echo # (prefix index variant). +--echo # + +--echo # --------------------------------------------------------------- +--echo # Test 1: Basic case - IN() with oversized value on PK +--echo # --------------------------------------------------------------- + +CREATE TABLE t1 ( + pk VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t1 VALUES ('abc'), ('def'), ('ghi'), ('jkl'), ('mno'); +ANALYZE TABLE t1; + +--echo # Must use range/index scan, not full table scan +EXPLAIN SELECT * FROM t1 + WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t1 + WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--echo # Single oversized value - should not cause errors +EXPLAIN SELECT * FROM t1 + WHERE pk IN ('this_value_is_way_too_long_for_varchar16'); + +--echo # All oversized values +EXPLAIN SELECT * FROM t1 + WHERE pk IN ('this_value_is_way_too_long_for_varchar16', + 'another_oversized_value_here_xxxx'); + +DROP TABLE t1; + +--echo # --------------------------------------------------------------- +--echo # Test 2: Bug#35169384 non-regression - collation contractions +--echo # --------------------------------------------------------------- + +--echo # In utf8mb4_0900_ai_ci: 'ae'='æ', 'ss'='ß', SOFT HYPHEN is ignored. +--echo # These results MUST be correct (not regressed by the fix). + +CREATE TABLE t2 (x VARCHAR(1), KEY (x)) + DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t2 VALUES ('a'), ('b'), ('æ'), ('ß'), ('s'); +INSERT INTO t2 SELECT 'x' FROM t2; +INSERT INTO t2 SELECT 'x' FROM t2; +ANALYZE TABLE t2; + +--echo # 'ae' matches 'æ', 'ss' matches 'ß', SOFT_HYPHEN+'b' matches 'b' +--echo # Expected: b, ß, æ +EXPLAIN SELECT * FROM t2 + WHERE x IN ('ae', 'ss', CONCAT(_utf16 x'00AD', 'b')); + +--sorted_result +SELECT * FROM t2 + WHERE x IN ('ae', 'ss', CONCAT(_utf16 x'00AD', 'b')); + +--echo # Range predicates must still be abandoned (not just EQ_FUNC) +--echo # x <= 'ae' should match 'a' and 'æ' (not just 'a') +EXPLAIN SELECT * FROM t2 WHERE x <= 'ae'; + +--sorted_result +SELECT * FROM t2 WHERE x <= 'ae'; + +--echo # x BETWEEN 'r' AND 'ss' should match 's' and 'ß' +EXPLAIN SELECT * FROM t2 WHERE x BETWEEN 'r' AND 'ss'; + +--sorted_result +SELECT * FROM t2 WHERE x BETWEEN 'r' AND 'ss'; + +--echo # Mixed: valid value + oversized contraction value on small column +--echo # Known limitation: 'æ' is missed because 'ae' (2 chars) exceeds +--echo # VARCHAR(1) and is skipped, leaving only range for 'a'. The sort key +--echo # for 'æ' differs from 'a' so it's not in the range. The filter can't +--echo # recheck rows that were never scanned. This matches 8.0 behavior +--echo # (Bug#35169384) and is acceptable for the Bug#118009 fix. +--echo # Expected: a (known: æ is missed in this mixed contraction case) +EXPLAIN SELECT * FROM t2 WHERE x IN ('a', 'ae'); + +--sorted_result +SELECT * FROM t2 WHERE x IN ('a', 'ae'); + +DROP TABLE t2; + +--echo # --------------------------------------------------------------- +--echo # Test 3: Oversized IN() value on secondary index +--echo # --------------------------------------------------------------- + +CREATE TABLE t3 ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + c VARCHAR(16) NOT NULL, + KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t3 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t3; + +--echo # Must use index range scan on idx_c, not full table scan +EXPLAIN SELECT * FROM t3 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t3 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t3; + +--echo # --------------------------------------------------------------- +--echo # Test 4: Binary collation not affected (already works correctly) +--echo # --------------------------------------------------------------- + +CREATE TABLE t4 ( + c VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; + +INSERT INTO t4 VALUES ('abc'), ('def'); +ANALYZE TABLE t4; + +--echo # Binary collation: should use range scan (already worked before fix) +EXPLAIN SELECT * FROM t4 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t4 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t4; + +--echo # --------------------------------------------------------------- +--echo # Test 5: Nullable column +--echo # --------------------------------------------------------------- + +CREATE TABLE t5 ( + c VARCHAR(16), + KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t5 VALUES ('abc'), ('def'), (NULL); +ANALYZE TABLE t5; + +EXPLAIN SELECT * FROM t5 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t5 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t5; + +--echo # --------------------------------------------------------------- +--echo # Test 6: Multiple oversized values mixed with normal values +--echo # --------------------------------------------------------------- + +CREATE TABLE t6 ( + c VARCHAR(8) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t6 VALUES ('aaa'), ('bbb'), ('ccc'), ('ddd'), + ('eee'), ('fff'), ('ggg'), ('hhh'), ('iii'), ('jjj'), + ('kkk'), ('lll'), ('mmm'), ('nnn'), ('ooo'), ('ppp'); +ANALYZE TABLE t6; + +--echo # Two normal + two oversized: must still find the normal values +EXPLAIN SELECT * FROM t6 + WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); + +--sorted_result +SELECT * FROM t6 + WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); + +DROP TABLE t6; + +--echo # --------------------------------------------------------------- +--echo # Test 7: Prefix index with values exceeding column length +--echo # --------------------------------------------------------------- + +CREATE TABLE t7 ( + id BIGINT PRIMARY KEY AUTO_INCREMENT, + c VARCHAR(16) NOT NULL, + KEY idx_c_prefix (c(8)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t7 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t7; + +--echo # 'abc' fits, oversized value exceeds VARCHAR(16) - must use range +EXPLAIN SELECT * FROM t7 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t7 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t7; + +--echo # --------------------------------------------------------------- +--echo # Test 8: Bug#119770 - Prefix index, values exceed prefix but fit +--echo # in the column. Must use range scan on the prefix index. +--echo # --------------------------------------------------------------- + +CREATE TABLE t8 ( + id BIGINT PRIMARY KEY AUTO_INCREMENT, + uniq_name VARCHAR(64) NOT NULL, + KEY idx_prefix (uniq_name(10)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t8 (uniq_name) VALUES + ('short'), ('another'), ('testval'), + ('abcdefghijklmnopqrstuvwxyz0123456789'); +ANALYZE TABLE t8; + +--echo # Values exceed prefix(10) but fit in VARCHAR(64) - must use range +EXPLAIN SELECT * FROM t8 + WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); + +--sorted_result +SELECT * FROM t8 + WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); + +DROP TABLE t8; + +--echo # --------------------------------------------------------------- +--echo # Test 9: Bug#119867 - OR across different indexed columns with +--echo # oversized value must preserve index merge optimization. +--echo # --------------------------------------------------------------- + +CREATE TABLE t9 ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + c1 VARCHAR(2), + c2 VARCHAR(2), + c3 VARCHAR(2), + KEY idx_c1 (c1), + KEY idx_c2 (c2), + KEY idx_c3 (c3) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t9 (c1, c2, c3) VALUES + ('a','x','x'),('b','y','y'),('c','z','z'), + ('d','a','a'),('e','b','b'),('f','c','c'), + ('g','d','d'),('h','e','e'),('i','f','f'), + ('j','g','g'),('k','h','h'),('l','i','i'); +ANALYZE TABLE t9; + +--echo # Must use index_merge, not full table scan +EXPLAIN SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; + +--echo # Oversized branch first: must still use index_merge +EXPLAIN SELECT * FROM t9 WHERE c3 = 'abc' OR c1 = 'a' OR c2 = 'b'; + +--echo # Verify correct results (only c1='a' and c2='b' match) +--sorted_result +SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; + +DROP TABLE t9; + +--echo # --------------------------------------------------------------- +--echo # Test 10: Bug#119867 - OR with multi-field multiple equality +--echo # (equality propagation creates MULT_EQUAL across columns) +--echo # --------------------------------------------------------------- + +CREATE TABLE t10 ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + c1 VARCHAR(2), + c2 VARCHAR(2), + c3 VARCHAR(2), + c4 VARCHAR(2), + KEY idx_c1 (c1), + KEY idx_c2 (c2), + KEY idx_c3 (c3) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t10 (c1, c2, c3, c4) VALUES + ('a','x','x','x'),('b','y','y','y'),('c','z','z','z'), + ('d','a','a','a'),('e','b','b','b'),('f','c','c','c'), + ('g','d','d','d'),('h','e','e','e'),('i','f','f','f'), + ('j','g','g','g'),('k','h','h','h'),('l','i','i','i'); +ANALYZE TABLE t10; + +--echo # c4 = 'abc' AND c3 = c4 creates MULT_EQUAL('abc', c3, c4). +--echo # 'abc' is oversized for both VARCHAR(2) columns. +--echo # Must use index_merge on the non-oversized branches. +EXPLAIN SELECT * FROM t10 WHERE c1 = 'a' OR c2 = 'b' OR (c4 = 'abc' AND c3 = c4); + +--echo # Verify correct results (only c1='a' and c2='b' match) +--sorted_result +SELECT * FROM t10 WHERE c1 = 'a' OR c2 = 'b' OR (c4 = 'abc' AND c3 = c4); + +DROP TABLE t10; + +--echo # --------------------------------------------------------------- +--echo # Test 11: View expansion - oversized detection works through +--echo # views (conditions go through constant propagation). +--echo # --------------------------------------------------------------- + +CREATE TABLE t11 ( + pk VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t11 VALUES ('abc'), ('def'), ('ghi'), ('jkl'), ('mno'); +ANALYZE TABLE t11; + +CREATE VIEW v11 AS SELECT * FROM t11; + +--echo # Through view: must use range scan, not full table scan +EXPLAIN SELECT * FROM v11 + WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM v11 + WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--echo # View with OR and oversized equality (Bug#119867 path) +CREATE TABLE t11b ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + c1 VARCHAR(2), + c2 VARCHAR(2), + KEY idx_c1 (c1), + KEY idx_c2 (c2) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t11b (c1, c2) VALUES + ('a','x'),('b','y'),('c','z'),('d','a'),('e','b'),('f','c'), + ('g','d'),('h','e'),('i','f'),('j','g'),('k','h'),('l','i'); +ANALYZE TABLE t11b; + +CREATE VIEW v11b AS SELECT * FROM t11b; + +--echo # Through view: must use index_merge, not full table scan +EXPLAIN SELECT * FROM v11b WHERE c1 = 'a' OR c2 = 'abc'; + +--sorted_result +SELECT * FROM v11b WHERE c1 = 'a' OR c2 = 'abc'; + +DROP VIEW v11, v11b; +DROP TABLE t11, t11b; diff --git a/sql/range_optimizer/range_analysis.cc b/sql/range_optimizer/range_analysis.cc index 5e8344cfc8b..20b90540968 100644 --- a/sql/range_optimizer/range_analysis.cc +++ b/sql/range_optimizer/range_analysis.cc @@ -26,7 +26,6 @@ #include #include "field_types.h" -#include "m_ctype.h" #include "memory_debugging.h" #include "mf_wcomp.h" #include "my_alloc.h" @@ -36,6 +35,7 @@ #include "my_dbug.h" #include "my_inttypes.h" #include "my_table_map.h" +#include "mysql/strings/m_ctype.h" #include "mysql/udf_registration_types.h" #include "mysql_com.h" #include "mysqld_error.h" @@ -43,6 +43,7 @@ #include "sql/current_thd.h" #include "sql/derror.h" #include "sql/field.h" +#include "sql/field_common_properties.h" #include "sql/handler.h" #include "sql/item.h" #include "sql/item_cmpfunc.h" @@ -167,6 +168,54 @@ static SEL_TREE *get_ne_mm_tree(THD *thd, RANGE_OPT_PARAM *param, @param op The 'in' operator itself. @param is_negated If true, the operator is NOT IN, otherwise IN. */ + +/** + Try to extract the Field and constant value from a multiple equality. + + Constant propagation rewrites simple col = const to MULT_EQUAL during + optimization. Subquery conditions for instance are not rewritten, + but that narrow case is out of scope for the oversized-value check. + + @param func The predicate to extract from. + @param[out] out_field Set to the Field on success. + @param[out] out_value Set to the constant value on success. + + @return false on success (field and value extracted), true on failure. +*/ +static bool get_eq_field_and_value(const Item_func *func, + const Field **out_field, Item **out_value) { + if (func->functype() == Item_func::MULT_EQUAL_FUNC) { + auto *item_equal = down_cast(func); + Item *value = item_equal->const_arg(); + if (value != nullptr) { + auto it = item_equal->get_fields().begin(); + if (it == item_equal->get_fields().end()) return true; + *out_field = it->field; + *out_value = value; + return false; + } + } + return true; +} + +/** + Check whether a string value exceeds the column's character capacity. + No row can match such a value, so callers can safely skip it instead + of letting the nullptr poison the tree via tree_or(valid, nullptr). + + @param field The column to check against. + @param value The value to check. + + @return true if the value exceeds the column's character capacity. +*/ +static bool is_oversized_string_for_field(const Field *field, Item *value) { + if (!is_string_type(field->type()) || value->result_type() != STRING_RESULT) + return false; + String buf; + String *s = value->val_str(&buf); + return s != nullptr && s->numchars() > field->char_length(); +} + static SEL_TREE *get_func_mm_tree_from_in_predicate( THD *thd, RANGE_OPT_PARAM *param, table_map prev_tables, table_map read_tables, bool remove_jump_scans, Item *predicand, @@ -365,16 +414,28 @@ static SEL_TREE *get_func_mm_tree_from_in_predicate( if (predicand->type() == Item::FIELD_ITEM) { // The expression is () IN (...) Field *field = down_cast(predicand)->field; - SEL_TREE *tree = - get_mm_parts(thd, param, prev_tables, read_tables, op, field, - Item_func::EQ_FUNC, op->arguments()[1]); - if (tree) { - Item **arg, **end; - for (arg = op->arguments() + 2, end = arg + op->argument_count() - 2; - arg < end; arg++) { - tree = tree_or(param, remove_jump_scans, tree, - get_mm_parts(thd, param, prev_tables, read_tables, op, - field, Item_func::EQ_FUNC, *arg)); + SEL_TREE *tree = nullptr; + for (Item **arg = op->arguments() + 1, + **end = arg + op->argument_count() - 1; + arg < end; arg++) { + if (SEL_TREE *val_tree = + get_mm_parts(thd, param, prev_tables, read_tables, op, field, + Item_func::EQ_FUNC, *arg); + val_tree == nullptr) { + /* + Bug#118009: An oversized string poisons the tree via + tree_or(valid, nullptr). Skip it -- no row can match. We check + against the field's char_length(), not the key_part's prefix + clone. For non-string types, nullptr is genuine "always true" + so we bail out. + */ + if (is_oversized_string_for_field(field, *arg)) continue; + return nullptr; + } else { + tree = tree == nullptr + ? val_tree + : tree_or(param, remove_jump_scans, tree, val_tree); + if (tree == nullptr) return nullptr; } } return tree; @@ -775,7 +836,7 @@ static SEL_TREE *get_full_func_mm_tree(THD *thd, RANGE_OPT_PARAM *param, if (!((ref_tables | item_field->table_ref->map()) & param_comp)) ftree = get_func_mm_tree(thd, param, prev_tables, read_tables, remove_jump_scans, predicand, op, value, inv); - Item_equal *item_equal = item_field->item_equal; + Item_equal *item_equal = item_field->multi_equality(); if (item_equal != nullptr) { for (Item_field &item : item_equal->get_fields()) { Field *f = item.field; @@ -860,6 +921,21 @@ SEL_TREE *get_mm_tree(THD *thd, RANGE_OPT_PARAM *param, table_map prev_tables, SEL_TREE *new_tree = get_mm_tree(thd, param, prev_tables, read_tables, current_table, remove_jump_scans, &item); if (param->has_errors()) return nullptr; + /* + Bug#119867: An oversized equality branch poisons the tree via + tree_or(valid, nullptr). Skip it -- no row can match. This must + run before the first-branch assignment below to avoid poisoning + all subsequent branches. + */ + if (functype != Item_func::COND_AND_FUNC && new_tree == nullptr && + item.type() == Item::FUNC_ITEM) { + const Field *eq_field; + Item *eq_value; + if (!get_eq_field_and_value(down_cast(&item), &eq_field, + &eq_value) && + is_oversized_string_for_field(eq_field, eq_value)) + continue; + } if (first) { tree = new_tree; first = false; @@ -878,7 +954,7 @@ SEL_TREE *get_mm_tree(THD *thd, RANGE_OPT_PARAM *param, table_map prev_tables, dbug_print_tree("tree_returned", tree, param); return tree; } - if (cond->const_item() && !cond->is_expensive()) { + if (cond->const_item() && !cond->cost().IsExpensive()) { const SEL_TREE::Type type = cond->val_int() ? SEL_TREE::ALWAYS : SEL_TREE::IMPOSSIBLE; SEL_TREE *tree = new (param->temp_mem_root) @@ -1051,6 +1127,7 @@ static bool is_spatial_operator(Item_func::Functype op_type) { switch (op_type) { case Item_func::SP_EQUALS_FUNC: case Item_func::SP_DISJOINT_FUNC: + case Item_func::SP_DISTANCE_FUNC: case Item_func::SP_INTERSECTS_FUNC: case Item_func::SP_TOUCHES_FUNC: case Item_func::SP_CROSSES_FUNC: @@ -1127,6 +1204,14 @@ static SEL_TREE *get_mm_parts(THD *thd, RANGE_OPT_PARAM *param, tree->set_key(key_part->key, sel_add(tree->release_key(key_part->key), sel_root)); tree->keys_map.set_bit(key_part->key); + // A range constructed for a multi-valued index is never exact. + // So it needs the filter to be placed on top of the range access. + if (param->using_real_indexes) { + int index = param->real_keynr[key_part->key]; + if (param->table->key_info[index].flags & HA_MULTI_VALUED_KEY) { + tree->inexact = true; + } + } } } @@ -1213,12 +1298,28 @@ static bool save_value_and_handle_conversion( thd->variables.sql_mode = orig_sql_mode; switch (err) { + case TYPE_OK: + return false; case TYPE_NOTE_TRUNCATED: - case TYPE_WARN_TRUNCATED: + // Insignificant truncation (trailing zero/space). Use it as an inexact + // range predicate. *inexact = true; - [[fallthrough]]; - case TYPE_OK: return false; + case TYPE_WARN_TRUNCATED: + // Truncation of possibly significant parts. We may still be able to use + // it as a range predicate, but we need a filter to make sure we don't + // return too many rows. + *inexact = true; + // Use the truncated value in the range predicate, unless it's a string + // with a non-binary collation with a non-trivial strnxfrm function. For + // example, if c is a VARCHAR(1) column with the utf8mb4_0900_ai_ci + // collation, c <= 'ss' should match the value 'ß', but if we truncate it + // to c <= 's' to fit in the column type, it will not match 'ß'. For such + // predicates, we assume the predicate is always true, and let a filter + // decide the outcome. + return is_string_type(field->type()) && + !my_binary_compare(field->charset()) && + use_strnxfrm(field->charset()); case TYPE_WARN_INVALID_STRING: /* An invalid string does not produce any rows when used with @@ -1230,8 +1331,7 @@ static bool save_value_and_handle_conversion( } /* For other operations on invalid strings, we assume that the range - predicate is always true and let evaluate_join_record() decide - the outcome. + predicate is always true and let a filter decide the outcome. */ *inexact = true; return true; @@ -1244,7 +1344,7 @@ static bool save_value_and_handle_conversion( instead of always false. Because of this, we assume that the range predicate is always true instead of always false and let - evaluate_join_record() decide the outcome. + a filter decide the outcome. */ *inexact = true; return true; @@ -1564,7 +1664,17 @@ static SEL_ROOT *get_mm_leaf(THD *thd, RANGE_OPT_PARAM *param, Item *cond_func, down_cast(field)->geom_type = save_geom_type; } - if (always_true_or_false) goto end; + if (always_true_or_false) { + /* + Bug#119770: For EQ_FUNC on a prefix key, the truncated value is + a valid prefix lookup. Fall through with inexact=true so the + filter rechecks full equality. + */ + if (type == Item_func::EQ_FUNC && (key_part->flag & HA_PART_KEY_SEG)) + *inexact = true; + else + goto end; + } } /*