diff --git a/mysql-test/r/range_in_oversized_value.result b/mysql-test/r/range_in_oversized_value.result new file mode 100644 index 00000000000..9f148c6b480 --- /dev/null +++ b/mysql-test/r/range_in_oversized_value.result @@ -0,0 +1,277 @@ +# +# Bug#118009: Full table scan instead of index range scan when IN() +# contains a value longer than VARCHAR column length +# +# Also covers Bug#118486 (same root cause) and Bug#119770 +# (prefix index variant). +# +# --------------------------------------------------------------- +# Test 1: Basic case - IN() with oversized value on PK +# --------------------------------------------------------------- +CREATE TABLE t1 ( +pk VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t1 VALUES ('abc'), ('def'), ('ghi'), ('jkl'), ('mno'); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +# Must use range/index scan, not full table scan +EXPLAIN SELECT * FROM t1 +WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range PRIMARY PRIMARY 66 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where (`test`.`t1`.`pk` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t1 +WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +pk +abc +def +# Single oversized value - should not cause errors +EXPLAIN SELECT * FROM t1 +WHERE pk IN ('this_value_is_way_too_long_for_varchar16'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL NULL no matching row in const table +Warnings: +Note 1003 /* select#1 */ select NULL AS `pk` from `test`.`t1` where multiple equal('this_value_is_way_too_long_for_varchar16', NULL) +# All oversized values +EXPLAIN SELECT * FROM t1 +WHERE pk IN ('this_value_is_way_too_long_for_varchar16', +'another_oversized_value_here_xxxx'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL index PRIMARY PRIMARY 66 NULL 5 40.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where (`test`.`t1`.`pk` in ('this_value_is_way_too_long_for_varchar16','another_oversized_value_here_xxxx')) +DROP TABLE t1; +# --------------------------------------------------------------- +# Test 2: Bug#35169384 non-regression - collation contractions +# --------------------------------------------------------------- +# In utf8mb4_0900_ai_ci: 'ae'='æ', 'ss'='ß', SOFT HYPHEN is ignored. +# These results MUST be correct (not regressed by the fix). +CREATE TABLE t2 (x VARCHAR(1), KEY (x)) +DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t2 VALUES ('a'), ('b'), ('æ'), ('ß'), ('s'); +INSERT INTO t2 SELECT 'x' FROM t2; +INSERT INTO t2 SELECT 'x' FROM t2; +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status OK +# 'ae' matches 'æ', 'ss' matches 'ß', SOFT_HYPHEN+'b' matches 'b' +# Expected: b, ß, æ +SELECT * FROM t2 +WHERE x IN ('ae', 'ss', CONCAT(_utf16 x'00AD', 'b')); +x +b +ß +æ +# Range predicates must still be abandoned (not just EQ_FUNC) +# x <= 'ae' should match 'a' and 'æ' (not just 'a') +SELECT * FROM t2 WHERE x <= 'ae'; +x +a +æ +# x BETWEEN 'r' AND 'ss' should match 's' and 'ß' +SELECT * FROM t2 WHERE x BETWEEN 'r' AND 'ss'; +x +s +ß +# Mixed: valid value + oversized contraction value on small column +# Known limitation: 'æ' is missed because 'ae' (2 chars) exceeds +# VARCHAR(1) and is skipped, leaving only range for 'a'. The sort key +# for 'æ' differs from 'a' so it's not in the range. The filter can't +# recheck rows that were never scanned. This matches 8.0 behavior +# (Bug#35169384) and is acceptable for the Bug#118009 fix. +# Expected: a (known: æ is missed in this mixed contraction case) +SELECT * FROM t2 WHERE x IN ('a', 'ae'); +x +a +DROP TABLE t2; +# --------------------------------------------------------------- +# Test 3: Oversized IN() value on secondary index +# --------------------------------------------------------------- +CREATE TABLE t3 ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +c VARCHAR(16) NOT NULL, +KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t3 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t3; +Table Op Msg_type Msg_text +test.t3 analyze status OK +# Must use index range scan on idx_c, not full table scan +EXPLAIN SELECT * FROM t3 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t3 NULL range idx_c idx_c 66 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t3`.`id` AS `id`,`test`.`t3`.`c` AS `c` from `test`.`t3` where (`test`.`t3`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t3 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id c +1 abc +2 def +DROP TABLE t3; +# --------------------------------------------------------------- +# Test 4: Binary collation not affected (already works correctly) +# --------------------------------------------------------------- +CREATE TABLE t4 ( +c VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; +INSERT INTO t4 VALUES ('abc'), ('def'); +ANALYZE TABLE t4; +Table Op Msg_type Msg_text +test.t4 analyze status OK +# Binary collation: should use range scan (already worked before fix) +EXPLAIN SELECT * FROM t4 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t4 NULL index PRIMARY PRIMARY 66 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t4`.`c` AS `c` from `test`.`t4` where (`test`.`t4`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t4 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +c +abc +def +DROP TABLE t4; +# --------------------------------------------------------------- +# Test 5: Nullable column +# --------------------------------------------------------------- +CREATE TABLE t5 ( +c VARCHAR(16), +KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t5 VALUES ('abc'), ('def'), (NULL); +ANALYZE TABLE t5; +Table Op Msg_type Msg_text +test.t5 analyze status OK +EXPLAIN SELECT * FROM t5 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t5 NULL range idx_c idx_c 67 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t5`.`c` AS `c` from `test`.`t5` where (`test`.`t5`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t5 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +c +abc +def +DROP TABLE t5; +# --------------------------------------------------------------- +# Test 6: Multiple oversized values mixed with normal values +# --------------------------------------------------------------- +CREATE TABLE t6 ( +c VARCHAR(8) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t6 VALUES ('aaa'), ('bbb'), ('ccc'), ('ddd'), +('eee'), ('fff'), ('ggg'), ('hhh'), ('iii'), ('jjj'), +('kkk'), ('lll'), ('mmm'), ('nnn'), ('ooo'), ('ppp'); +ANALYZE TABLE t6; +Table Op Msg_type Msg_text +test.t6 analyze status OK +# Two normal + two oversized: must still find the normal values +EXPLAIN SELECT * FROM t6 +WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t6 NULL range PRIMARY PRIMARY 34 NULL 2 100.00 Using where; Using index +Warnings: +Note 1003 /* select#1 */ select `test`.`t6`.`c` AS `c` from `test`.`t6` where (`test`.`t6`.`c` in ('aaa','oversized_value_one','ccc','oversized_value_two')) +SELECT * FROM t6 +WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); +c +aaa +ccc +DROP TABLE t6; +# --------------------------------------------------------------- +# Test 7: Prefix index with values exceeding column length +# --------------------------------------------------------------- +CREATE TABLE t7 ( +id BIGINT PRIMARY KEY AUTO_INCREMENT, +c VARCHAR(16) NOT NULL, +KEY idx_c_prefix (c(8)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t7 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t7; +Table Op Msg_type Msg_text +test.t7 analyze status OK +# 'abc' fits, oversized value exceeds VARCHAR(16) - must use range +EXPLAIN SELECT * FROM t7 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t7 NULL range idx_c_prefix idx_c_prefix 34 NULL 3 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t7`.`id` AS `id`,`test`.`t7`.`c` AS `c` from `test`.`t7` where (`test`.`t7`.`c` in ('abc','this_value_is_way_too_long_for_varchar16','def')) +SELECT * FROM t7 +WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); +id c +1 abc +2 def +DROP TABLE t7; +# --------------------------------------------------------------- +# Test 8: Bug#119770 - Prefix index, values exceed prefix but fit +# in the column. Must use range scan on the prefix index. +# --------------------------------------------------------------- +CREATE TABLE t8 ( +id BIGINT PRIMARY KEY AUTO_INCREMENT, +uniq_name VARCHAR(64) NOT NULL, +KEY idx_prefix (uniq_name(10)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t8 (uniq_name) VALUES +('short'), ('another'), ('testval'), +('abcdefghijklmnopqrstuvwxyz0123456789'); +ANALYZE TABLE t8; +Table Op Msg_type Msg_text +test.t8 analyze status OK +# Values exceed prefix(10) but fit in VARCHAR(64) - must use range +EXPLAIN SELECT * FROM t8 +WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t8 NULL range idx_prefix idx_prefix 42 NULL 3 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t8`.`id` AS `id`,`test`.`t8`.`uniq_name` AS `uniq_name` from `test`.`t8` where (`test`.`t8`.`uniq_name` in ('short','abcdefghijklmnopqrstuvwxyz0123456789','another')) +SELECT * FROM t8 +WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); +id uniq_name +1 short +2 another +4 abcdefghijklmnopqrstuvwxyz0123456789 +DROP TABLE t8; +# --------------------------------------------------------------- +# Test 9: Bug#119867 - OR across different indexed columns with +# oversized value must preserve index merge optimization. +# --------------------------------------------------------------- +CREATE TABLE t9 ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +c1 VARCHAR(2), +c2 VARCHAR(2), +c3 VARCHAR(2), +KEY idx_c1 (c1), +KEY idx_c2 (c2), +KEY idx_c3 (c3) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t9 (c1, c2, c3) VALUES +('a','x','x'),('b','y','y'),('c','z','z'), +('d','a','a'),('e','b','b'),('f','c','c'), +('g','d','d'),('h','e','e'),('i','f','f'), +('j','g','g'),('k','h','h'),('l','i','i'); +ANALYZE TABLE t9; +Table Op Msg_type Msg_text +test.t9 analyze status OK +# Must use index_merge, not full table scan +EXPLAIN SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t9 NULL index_merge idx_c1,idx_c2,idx_c3 idx_c1,idx_c2 11,11 NULL 2 100.00 Using union(idx_c1,idx_c2); Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t9`.`id` AS `id`,`test`.`t9`.`c1` AS `c1`,`test`.`t9`.`c2` AS `c2`,`test`.`t9`.`c3` AS `c3` from `test`.`t9` where ((`test`.`t9`.`c1` = 'a') or (`test`.`t9`.`c2` = 'b') or (`test`.`t9`.`c3` = 'abc')) +# Oversized branch first: must still use index_merge +EXPLAIN SELECT * FROM t9 WHERE c3 = 'abc' OR c1 = 'a' OR c2 = 'b'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t9 NULL index_merge idx_c1,idx_c2,idx_c3 idx_c1,idx_c2 11,11 NULL 2 100.00 Using union(idx_c1,idx_c2); Using where +Warnings: +Note 1003 /* select#1 */ select `test`.`t9`.`id` AS `id`,`test`.`t9`.`c1` AS `c1`,`test`.`t9`.`c2` AS `c2`,`test`.`t9`.`c3` AS `c3` from `test`.`t9` where ((`test`.`t9`.`c3` = 'abc') or (`test`.`t9`.`c1` = 'a') or (`test`.`t9`.`c2` = 'b')) +# Verify correct results (only c1='a' and c2='b' match) +SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; +id c1 c2 c3 +1 a x x +5 e b b +DROP TABLE t9; diff --git a/mysql-test/t/range_in_oversized_value.test b/mysql-test/t/range_in_oversized_value.test new file mode 100644 index 00000000000..52ef9f0db7d --- /dev/null +++ b/mysql-test/t/range_in_oversized_value.test @@ -0,0 +1,251 @@ +--source include/elide_costs.inc + +--echo # +--echo # Bug#118009: Full table scan instead of index range scan when IN() +--echo # contains a value longer than VARCHAR column length +--echo # +--echo # Also covers Bug#118486 (same root cause) and Bug#119770 +--echo # (prefix index variant). +--echo # + +--echo # --------------------------------------------------------------- +--echo # Test 1: Basic case - IN() with oversized value on PK +--echo # --------------------------------------------------------------- + +CREATE TABLE t1 ( + pk VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t1 VALUES ('abc'), ('def'), ('ghi'), ('jkl'), ('mno'); +ANALYZE TABLE t1; + +--echo # Must use range/index scan, not full table scan +EXPLAIN SELECT * FROM t1 + WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t1 + WHERE pk IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--echo # Single oversized value - should not cause errors +EXPLAIN SELECT * FROM t1 + WHERE pk IN ('this_value_is_way_too_long_for_varchar16'); + +--echo # All oversized values +EXPLAIN SELECT * FROM t1 + WHERE pk IN ('this_value_is_way_too_long_for_varchar16', + 'another_oversized_value_here_xxxx'); + +DROP TABLE t1; + +--echo # --------------------------------------------------------------- +--echo # Test 2: Bug#35169384 non-regression - collation contractions +--echo # --------------------------------------------------------------- + +--echo # In utf8mb4_0900_ai_ci: 'ae'='æ', 'ss'='ß', SOFT HYPHEN is ignored. +--echo # These results MUST be correct (not regressed by the fix). + +CREATE TABLE t2 (x VARCHAR(1), KEY (x)) + DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; +INSERT INTO t2 VALUES ('a'), ('b'), ('æ'), ('ß'), ('s'); +INSERT INTO t2 SELECT 'x' FROM t2; +INSERT INTO t2 SELECT 'x' FROM t2; +ANALYZE TABLE t2; + +--echo # 'ae' matches 'æ', 'ss' matches 'ß', SOFT_HYPHEN+'b' matches 'b' +--echo # Expected: b, ß, æ +--sorted_result +SELECT * FROM t2 + WHERE x IN ('ae', 'ss', CONCAT(_utf16 x'00AD', 'b')); + +--echo # Range predicates must still be abandoned (not just EQ_FUNC) +--echo # x <= 'ae' should match 'a' and 'æ' (not just 'a') +--sorted_result +SELECT * FROM t2 WHERE x <= 'ae'; + +--echo # x BETWEEN 'r' AND 'ss' should match 's' and 'ß' +--sorted_result +SELECT * FROM t2 WHERE x BETWEEN 'r' AND 'ss'; + +--echo # Mixed: valid value + oversized contraction value on small column +--echo # Known limitation: 'æ' is missed because 'ae' (2 chars) exceeds +--echo # VARCHAR(1) and is skipped, leaving only range for 'a'. The sort key +--echo # for 'æ' differs from 'a' so it's not in the range. The filter can't +--echo # recheck rows that were never scanned. This matches 8.0 behavior +--echo # (Bug#35169384) and is acceptable for the Bug#118009 fix. +--echo # Expected: a (known: æ is missed in this mixed contraction case) +--sorted_result +SELECT * FROM t2 WHERE x IN ('a', 'ae'); + +DROP TABLE t2; + +--echo # --------------------------------------------------------------- +--echo # Test 3: Oversized IN() value on secondary index +--echo # --------------------------------------------------------------- + +CREATE TABLE t3 ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + c VARCHAR(16) NOT NULL, + KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t3 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t3; + +--echo # Must use index range scan on idx_c, not full table scan +EXPLAIN SELECT * FROM t3 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t3 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t3; + +--echo # --------------------------------------------------------------- +--echo # Test 4: Binary collation not affected (already works correctly) +--echo # --------------------------------------------------------------- + +CREATE TABLE t4 ( + c VARCHAR(16) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; + +INSERT INTO t4 VALUES ('abc'), ('def'); +ANALYZE TABLE t4; + +--echo # Binary collation: should use range scan (already worked before fix) +EXPLAIN SELECT * FROM t4 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t4 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t4; + +--echo # --------------------------------------------------------------- +--echo # Test 5: Nullable column +--echo # --------------------------------------------------------------- + +CREATE TABLE t5 ( + c VARCHAR(16), + KEY idx_c (c) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t5 VALUES ('abc'), ('def'), (NULL); +ANALYZE TABLE t5; + +EXPLAIN SELECT * FROM t5 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t5 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t5; + +--echo # --------------------------------------------------------------- +--echo # Test 6: Multiple oversized values mixed with normal values +--echo # --------------------------------------------------------------- + +CREATE TABLE t6 ( + c VARCHAR(8) NOT NULL PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t6 VALUES ('aaa'), ('bbb'), ('ccc'), ('ddd'), + ('eee'), ('fff'), ('ggg'), ('hhh'), ('iii'), ('jjj'), + ('kkk'), ('lll'), ('mmm'), ('nnn'), ('ooo'), ('ppp'); +ANALYZE TABLE t6; + +--echo # Two normal + two oversized: must still find the normal values +EXPLAIN SELECT * FROM t6 + WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); + +--sorted_result +SELECT * FROM t6 + WHERE c IN ('aaa', 'oversized_value_one', 'ccc', 'oversized_value_two'); + +DROP TABLE t6; + +--echo # --------------------------------------------------------------- +--echo # Test 7: Prefix index with values exceeding column length +--echo # --------------------------------------------------------------- + +CREATE TABLE t7 ( + id BIGINT PRIMARY KEY AUTO_INCREMENT, + c VARCHAR(16) NOT NULL, + KEY idx_c_prefix (c(8)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t7 (c) VALUES ('abc'), ('def'), ('ghi'); +ANALYZE TABLE t7; + +--echo # 'abc' fits, oversized value exceeds VARCHAR(16) - must use range +EXPLAIN SELECT * FROM t7 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +--sorted_result +SELECT * FROM t7 + WHERE c IN ('abc', 'this_value_is_way_too_long_for_varchar16', 'def'); + +DROP TABLE t7; + +--echo # --------------------------------------------------------------- +--echo # Test 8: Bug#119770 - Prefix index, values exceed prefix but fit +--echo # in the column. Must use range scan on the prefix index. +--echo # --------------------------------------------------------------- + +CREATE TABLE t8 ( + id BIGINT PRIMARY KEY AUTO_INCREMENT, + uniq_name VARCHAR(64) NOT NULL, + KEY idx_prefix (uniq_name(10)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t8 (uniq_name) VALUES + ('short'), ('another'), ('testval'), + ('abcdefghijklmnopqrstuvwxyz0123456789'); +ANALYZE TABLE t8; + +--echo # Values exceed prefix(10) but fit in VARCHAR(64) - must use range +EXPLAIN SELECT * FROM t8 + WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); + +--sorted_result +SELECT * FROM t8 + WHERE uniq_name IN ('short', 'abcdefghijklmnopqrstuvwxyz0123456789', 'another'); + +DROP TABLE t8; + +--echo # --------------------------------------------------------------- +--echo # Test 9: Bug#119867 - OR across different indexed columns with +--echo # oversized value must preserve index merge optimization. +--echo # --------------------------------------------------------------- + +CREATE TABLE t9 ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + c1 VARCHAR(2), + c2 VARCHAR(2), + c3 VARCHAR(2), + KEY idx_c1 (c1), + KEY idx_c2 (c2), + KEY idx_c3 (c3) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; + +INSERT INTO t9 (c1, c2, c3) VALUES + ('a','x','x'),('b','y','y'),('c','z','z'), + ('d','a','a'),('e','b','b'),('f','c','c'), + ('g','d','d'),('h','e','e'),('i','f','f'), + ('j','g','g'),('k','h','h'),('l','i','i'); +ANALYZE TABLE t9; + +--echo # Must use index_merge, not full table scan +EXPLAIN SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; + +--echo # Oversized branch first: must still use index_merge +EXPLAIN SELECT * FROM t9 WHERE c3 = 'abc' OR c1 = 'a' OR c2 = 'b'; + +--echo # Verify correct results (only c1='a' and c2='b' match) +--sorted_result +SELECT * FROM t9 WHERE c1 = 'a' OR c2 = 'b' OR c3 = 'abc'; + +DROP TABLE t9; diff --git a/sql/range_optimizer/range_analysis.cc b/sql/range_optimizer/range_analysis.cc index d2c98133078..7ad0f8d3466 100644 --- a/sql/range_optimizer/range_analysis.cc +++ b/sql/range_optimizer/range_analysis.cc @@ -168,6 +168,70 @@ static SEL_TREE *get_ne_mm_tree(THD *thd, RANGE_OPT_PARAM *param, @param op The 'in' operator itself. @param is_negated If true, the operator is NOT IN, otherwise IN. */ + +/** + Extract the Field and constant value from an equality predicate. + + Handles EQ_FUNC/EQUAL_FUNC (col = 'val') and MULT_EQUAL_FUNC + (multiple equal('val', col)) which is produced by the optimizer's + equality propagation rewrite. + + @return true if field and value were successfully extracted. +*/ +static bool get_eq_field_and_value(Item_func *func, Field **out_field, + Item **out_value) { + if ((func->functype() == Item_func::EQ_FUNC || + func->functype() == Item_func::EQUAL_FUNC) && + func->argument_count() == 2) { + Item *left = func->arguments()[0]->real_item(); + Item *right = func->arguments()[1]; + if (left->type() == Item::FIELD_ITEM && right->const_item()) { + *out_field = down_cast(left)->field; + *out_value = right; + return true; + } + if (right->real_item()->type() == Item::FIELD_ITEM && + left->const_item()) { + *out_field = down_cast(right->real_item())->field; + *out_value = func->arguments()[0]; + return true; + } + } else if (func->functype() == Item_func::MULT_EQUAL_FUNC) { + Item_equal *item_equal = down_cast(func); + *out_value = item_equal->const_arg(); + if (*out_value != nullptr) { + for (Item_field &f : item_equal->get_fields()) { + *out_field = f.field; + return true; + } + } + } + return false; +} + +/** + Check whether a string value exceeds the column's character capacity. + + Used by Bug#118009 (IN() loop) and Bug#119867 (OR loop) to detect + oversized string values that the range optimizer rejects as "always true" + for strnxfrm collations (Bug#35169384). No row can match such a value, + so the caller can safely skip it instead of letting the nullptr from + get_mm_parts/get_mm_leaf poison the tree via tree_or(valid, nullptr). + + @note UCA contractions (e.g. 'ae' = U+00E6) can make an N-char value + match an M-char stored value where N > M. This check does not account + for contractions, so a contraction match may be missed when the value + is skipped. This matches MySQL 8.0 behavior and is acceptable — see + the test file for details. +*/ +static bool is_overlong_string_for_field(Field *field, Item *value) { + if (!is_string_type(field->type()) || value->result_type() != STRING_RESULT) + return false; + String buf; + String *s = value->val_str(&buf); + return s != nullptr && s->numchars() > field->char_length(); +} + static SEL_TREE *get_func_mm_tree_from_in_predicate( THD *thd, RANGE_OPT_PARAM *param, table_map prev_tables, table_map read_tables, bool remove_jump_scans, Item *predicand, @@ -366,17 +430,32 @@ static SEL_TREE *get_func_mm_tree_from_in_predicate( if (predicand->type() == Item::FIELD_ITEM) { // The expression is () IN (...) Field *field = down_cast(predicand)->field; - SEL_TREE *tree = - get_mm_parts(thd, param, prev_tables, read_tables, op, field, - Item_func::EQ_FUNC, op->arguments()[1]); - if (tree) { - Item **arg, **end; - for (arg = op->arguments() + 2, end = arg + op->argument_count() - 2; - arg < end; arg++) { - tree = tree_or(param, remove_jump_scans, tree, - get_mm_parts(thd, param, prev_tables, read_tables, op, - field, Item_func::EQ_FUNC, *arg)); + SEL_TREE *tree = nullptr; + Item **arg, **end; + for (arg = op->arguments() + 1, end = arg + op->argument_count() - 1; + arg < end; arg++) { + SEL_TREE *val_tree = + get_mm_parts(thd, param, prev_tables, read_tables, op, field, + Item_func::EQ_FUNC, *arg); + if (val_tree == nullptr) { + /* + Bug#118009: An overlong string in an IN() list poisons the tree + via tree_or(valid, nullptr)->nullptr. Skip it — no row can match + a value exceeding the column's character capacity. The filter + rechecks the full IN() condition. + + We check against the table field's char_length(), not the + key_part's prefix clone — skipping a value that fits the column + but exceeds a prefix would miss matching rows. + + For non-string cases (e.g. type mismatch CHAR vs INT), nullptr + is a genuine "always true" so we must bail out (return nullptr). + */ + if (is_overlong_string_for_field(field, *arg)) continue; + return nullptr; } + tree = tree == nullptr ? val_tree + : tree_or(param, remove_jump_scans, tree, val_tree); } return tree; } @@ -861,6 +940,37 @@ SEL_TREE *get_mm_tree(THD *thd, RANGE_OPT_PARAM *param, table_map prev_tables, SEL_TREE *new_tree = get_mm_tree(thd, param, prev_tables, read_tables, current_table, remove_jump_scans, &item); if (param->has_errors()) return nullptr; + /* + Bug#119867: When an OR branch like "col = 'oversized'" returns + nullptr because the value was truncated for a strnxfrm collation + (Bug#35169384), tree_or(valid, nullptr) destroys the valid tree. + If the branch is an equality with a string value exceeding the + column's character capacity, skip it — no row can match. + The WHERE filter rechecks the full OR condition. + + This check must run before the first-branch assignment below, + otherwise an oversized first branch sets tree=nullptr and poisons + all subsequent valid branches via tree_or(nullptr, valid)->nullptr. + + Equality predicates appear as EQ_FUNC (col = 'val') or + MULT_EQUAL_FUNC (multiple equal('val', col)) after the + optimizer's equality propagation rewrite. + + Known limitation: UCA contractions (e.g. 'ae'='æ') can make + an N-char value match an M-char stored value (N>M). When such + a value is skipped here, the contraction match is missed. This + matches MySQL 8.0 behavior where the truncated value's sort key + also failed to match the contraction character. + */ + if (functype != Item_func::COND_AND_FUNC && + new_tree == nullptr && item.type() == Item::FUNC_ITEM) { + Field *eq_field; + Item *eq_value; + if (get_eq_field_and_value(down_cast(&item), + &eq_field, &eq_value) && + is_overlong_string_for_field(eq_field, eq_value)) + continue; + } if (first) { tree = new_tree; first = false; @@ -1589,7 +1699,20 @@ static SEL_ROOT *get_mm_leaf(THD *thd, RANGE_OPT_PARAM *param, Item *cond_func, down_cast(field)->geom_type = save_geom_type; } - if (always_true_or_false) goto end; + if (always_true_or_false) { + /* + Bug#119770: For EQ_FUNC on a prefix key (HA_PART_KEY_SEG), the + truncated value is a valid prefix lookup key — the prefix index + stores truncated values by design. Any row whose full column value + equals the search value must share the same prefix, so scanning + for it will find all candidates. Fall through to create a point + range with inexact=true so the WHERE filter rechecks full equality. + */ + if (type == Item_func::EQ_FUNC && (key_part->flag & HA_PART_KEY_SEG)) + *inexact = true; + else + goto end; + } } /*