Description:
Backward scan partition table use multi-valued indexes contains prefix index, and more than one index column after the prefix index column may lost some result.
This problem may occur in RefIterator<true> and ReverseIndexRangeScanIterator under optimizer_switch = index_condition_pushdown=off.
The reason for this problem is Filed_varstring::cmp_max and Field_blob::cmp_max
used in key_rec_cmp, not accurate calculation of the character lengths that should be compared.
How to repeat:
create table ttb2(
c1 int primary key,
c2 varchar(6),
c3 varchar(6),
c4 varchar(6),
c5 varchar(6),
index(c2, c3(3), c4)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
partition by range(c1)
(partition p0 values less than (100),
partition p1 values less than (200));
insert into ttb2 values
(1, 'aa', 'aaaxxx', 'aa', '11111'),
(2, 'aa', 'aaafff', null, '11111');
insert into ttb2 values
(110, 'aa', 'aaa', 'aa', '111111'),
(111, 'aa', 'aaammm', 'aa', '11111');
mysql>select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc;
Empty set (0.01 sec)
mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa');
+-----+------+------+------+--------+
| c1 | c2 | c3 | c4 | c5 |
+-----+------+------+------+--------+
| 110 | aa | aaa | aa | 111111 |
+-----+------+------+------+--------+
1 row in set (0.00 sec)
mysql> alter table ttb2 modify column c3 blob;
mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc;
Empty set (0.01 sec)
Suggested fix:
this is a fix diff on mysql-8.0.35:
diff --git a/sql/field.cc b/sql/field.cc
index 357036d9ca7..59b1eeeb42f 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -6682,6 +6682,40 @@ int Field_varstring::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
return diff;
}
+int Field_varstring::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const {
+ uint a_length, b_length;
+ uint local_char_a_length, local_char_b_length;
+ int diff;
+
+ if (length_bytes == 1) {
+ a_length = (uint)*a_ptr;
+ b_length = (uint)*b_ptr;
+ } else {
+ a_length = uint2korr(a_ptr);
+ b_length = uint2korr(b_ptr);
+ }
+ if (field_charset->mbmaxlen != 1) {
+ local_char_a_length = my_charpos(field_charset, a_ptr + length_bytes,
+ a_ptr + length_bytes + a_length,
+ max_len / field_charset->mbmaxlen);
+ local_char_b_length = my_charpos(field_charset, b_ptr + length_bytes,
+ b_ptr + length_bytes + b_length,
+ max_len / field_charset->mbmaxlen);
+ } else {
+ local_char_a_length = max_len;
+ local_char_b_length = max_len;
+ }
+ a_length = std::min(a_length, local_char_a_length);
+ b_length = std::min(b_length, local_char_b_length);
+
+ diff = field_charset->coll->strnncollsp(field_charset, a_ptr + length_bytes,
+ a_length, b_ptr + length_bytes,
+ b_length);
+ return diff;
+}
+
+
/**
@note
varstring and blob keys are ALWAYS stored with a 2 byte length prefix
@@ -7181,6 +7215,28 @@ int Field_blob::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
return Field_blob::cmp(blob1, a_len, blob2, b_len);
}
+int Field_blob::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const {
+ uint local_char_a_length;
+ uint local_char_b_length;
+ const uchar *blob1 = get_blob_data(a_ptr + packlength);
+ const uchar *blob2 = get_blob_data(b_ptr + packlength);
+ uint a_len = get_length(a_ptr);
+ uint b_len = get_length(b_ptr);
+ if (field_charset->mbmaxlen != 1) {
+ local_char_a_length = my_charpos(field_charset, blob1, blob1 + a_len,
+ max_len / field_charset->mbmaxlen);
+ local_char_b_length = my_charpos(field_charset, blob2, blob2 + b_len,
+ max_len / field_charset->mbmaxlen);
+ } else {
+ local_char_a_length = max_len;
+ local_char_b_length = max_len;
+ }
+ a_len = std::min(a_len, local_char_a_length);
+ b_len = std::min(b_len, local_char_b_length);
+ return Field_blob::cmp(blob1, a_len, blob2, b_len);
+}
+
int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr,
uint32 max_length) const {
const uchar *a = get_blob_data(a_ptr + packlength);
diff --git a/sql/field.h b/sql/field.h
index 14484e060ea..7da98f2a8e6 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1178,6 +1178,10 @@ class Field {
uint max_len [[maybe_unused]]) const {
return cmp(a, b);
}
+ virtual int cmp_prefix_key_rec(const uchar *a, const uchar *b,
+ uint max_len) const {
+ return cmp_max(a, b, max_len);
+ }
virtual int cmp(const uchar *, const uchar *) const = 0;
virtual int cmp_binary(const uchar *a, const uchar *b,
uint32 max_length [[maybe_unused]] = ~0L) const {
@@ -3533,6 +3537,9 @@ class Field_varstring : public Field_longstr {
String *val_str(String *, String *) const override;
my_decimal *val_decimal(my_decimal *) const final;
int cmp_max(const uchar *, const uchar *, uint max_length) const final;
+ int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const final;
+
int cmp(const uchar *a, const uchar *b) const final {
return cmp_max(a, b, ~0U);
}
@@ -3713,6 +3720,9 @@ class Field_blob : public Field_longstr {
String *val_str(String *, String *) const override;
my_decimal *val_decimal(my_decimal *) const override;
int cmp_max(const uchar *, const uchar *, uint max_length) const final;
+ int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const final;
+
int cmp(const uchar *a, const uchar *b) const final {
return cmp_max(a, b, ~0U);
}
diff --git a/sql/key.cc b/sql/key.cc
index 11ea5f44f47..def95de9076 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -643,10 +643,18 @@ int key_rec_cmp(KEY **key, uchar *first_rec, uchar *second_rec) {
max length. The exceptions are the BLOB and VARCHAR field types
that take the max length into account.
*/
- if ((result =
- field->cmp_max(field->field_ptr() + first_diff,
- field->field_ptr() + sec_diff, key_part->length)))
- return (sort_order < 0) ? -result : result;
+ if (key_part->key_part_flag & HA_PART_KEY_SEG) {
+ result = field->cmp_prefix_key_rec(field->field_ptr() + first_diff,
+ field->field_ptr() + sec_diff,
+ key_part->length);
+
+ } else {
+ result =
+ field->cmp_max(field->field_ptr() + first_diff,
+ field->field_ptr() + sec_diff, key_part->length);
+ }
+ if (result) return (sort_order < 0) ? -result : result;
+
next_loop:
key_part++;
key_part_num++;