Description:
Backward scan partition table use multi-valued indexes contains prefix index, and more than one index column after the prefix index column may lost some result.
This problem may occur in RefIterator<true> and ReverseIndexRangeScanIterator under optimizer_switch = index_condition_pushdown=off.
The reason for this problem is Filed_varstring::cmp_max and Field_blob::cmp_max
used in key_rec_cmp, not accurate calculation of the character lengths that should be compared.
How to repeat:
create table ttb2(
c1 int primary key,
c2 varchar(6),
c3 varchar(6),
c4 varchar(6),
c5 varchar(6),
index(c2, c3(3), c4)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
partition by range(c1)
(partition p0 values less than (100),
partition p1 values less than (200));
insert into ttb2 values
(1, 'aa', 'aaaxxx', 'aa', '11111'),
(2, 'aa', 'aaafff', null, '11111');
insert into ttb2 values
(110, 'aa', 'aaa', 'aa', '111111'),
(111, 'aa', 'aaammm', 'aa', '11111');
mysql>select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc;
Empty set (0.01 sec)
mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa');
+-----+------+------+------+--------+
| c1 | c2 | c3 | c4 | c5 |
+-----+------+------+------+--------+
| 110 | aa | aaa | aa | 111111 |
+-----+------+------+------+--------+
1 row in set (0.00 sec)
mysql> alter table ttb2 modify column c3 blob;
mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc;
Empty set (0.01 sec)
Suggested fix:
this is a fix diff on mysql-8.0.35:
diff --git a/sql/field.cc b/sql/field.cc
index 357036d9ca7..59b1eeeb42f 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -6682,6 +6682,40 @@ int Field_varstring::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
return diff;
}
+int Field_varstring::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const {
+ uint a_length, b_length;
+ uint local_char_a_length, local_char_b_length;
+ int diff;
+
+ if (length_bytes == 1) {
+ a_length = (uint)*a_ptr;
+ b_length = (uint)*b_ptr;
+ } else {
+ a_length = uint2korr(a_ptr);
+ b_length = uint2korr(b_ptr);
+ }
+ if (field_charset->mbmaxlen != 1) {
+ local_char_a_length = my_charpos(field_charset, a_ptr + length_bytes,
+ a_ptr + length_bytes + a_length,
+ max_len / field_charset->mbmaxlen);
+ local_char_b_length = my_charpos(field_charset, b_ptr + length_bytes,
+ b_ptr + length_bytes + b_length,
+ max_len / field_charset->mbmaxlen);
+ } else {
+ local_char_a_length = max_len;
+ local_char_b_length = max_len;
+ }
+ a_length = std::min(a_length, local_char_a_length);
+ b_length = std::min(b_length, local_char_b_length);
+
+ diff = field_charset->coll->strnncollsp(field_charset, a_ptr + length_bytes,
+ a_length, b_ptr + length_bytes,
+ b_length);
+ return diff;
+}
+
+
/**
@note
varstring and blob keys are ALWAYS stored with a 2 byte length prefix
@@ -7181,6 +7215,28 @@ int Field_blob::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
return Field_blob::cmp(blob1, a_len, blob2, b_len);
}
+int Field_blob::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const {
+ uint local_char_a_length;
+ uint local_char_b_length;
+ const uchar *blob1 = get_blob_data(a_ptr + packlength);
+ const uchar *blob2 = get_blob_data(b_ptr + packlength);
+ uint a_len = get_length(a_ptr);
+ uint b_len = get_length(b_ptr);
+ if (field_charset->mbmaxlen != 1) {
+ local_char_a_length = my_charpos(field_charset, blob1, blob1 + a_len,
+ max_len / field_charset->mbmaxlen);
+ local_char_b_length = my_charpos(field_charset, blob2, blob2 + b_len,
+ max_len / field_charset->mbmaxlen);
+ } else {
+ local_char_a_length = max_len;
+ local_char_b_length = max_len;
+ }
+ a_len = std::min(a_len, local_char_a_length);
+ b_len = std::min(b_len, local_char_b_length);
+ return Field_blob::cmp(blob1, a_len, blob2, b_len);
+}
+
int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr,
uint32 max_length) const {
const uchar *a = get_blob_data(a_ptr + packlength);
diff --git a/sql/field.h b/sql/field.h
index 14484e060ea..7da98f2a8e6 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1178,6 +1178,10 @@ class Field {
uint max_len [[maybe_unused]]) const {
return cmp(a, b);
}
+ virtual int cmp_prefix_key_rec(const uchar *a, const uchar *b,
+ uint max_len) const {
+ return cmp_max(a, b, max_len);
+ }
virtual int cmp(const uchar *, const uchar *) const = 0;
virtual int cmp_binary(const uchar *a, const uchar *b,
uint32 max_length [[maybe_unused]] = ~0L) const {
@@ -3533,6 +3537,9 @@ class Field_varstring : public Field_longstr {
String *val_str(String *, String *) const override;
my_decimal *val_decimal(my_decimal *) const final;
int cmp_max(const uchar *, const uchar *, uint max_length) const final;
+ int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const final;
+
int cmp(const uchar *a, const uchar *b) const final {
return cmp_max(a, b, ~0U);
}
@@ -3713,6 +3720,9 @@ class Field_blob : public Field_longstr {
String *val_str(String *, String *) const override;
my_decimal *val_decimal(my_decimal *) const override;
int cmp_max(const uchar *, const uchar *, uint max_length) const final;
+ int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+ uint max_len) const final;
+
int cmp(const uchar *a, const uchar *b) const final {
return cmp_max(a, b, ~0U);
}
diff --git a/sql/key.cc b/sql/key.cc
index 11ea5f44f47..def95de9076 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -643,10 +643,18 @@ int key_rec_cmp(KEY **key, uchar *first_rec, uchar *second_rec) {
max length. The exceptions are the BLOB and VARCHAR field types
that take the max length into account.
*/
- if ((result =
- field->cmp_max(field->field_ptr() + first_diff,
- field->field_ptr() + sec_diff, key_part->length)))
- return (sort_order < 0) ? -result : result;
+ if (key_part->key_part_flag & HA_PART_KEY_SEG) {
+ result = field->cmp_prefix_key_rec(field->field_ptr() + first_diff,
+ field->field_ptr() + sec_diff,
+ key_part->length);
+
+ } else {
+ result =
+ field->cmp_max(field->field_ptr() + first_diff,
+ field->field_ptr() + sec_diff, key_part->length);
+ }
+ if (result) return (sort_order < 0) ? -result : result;
+
next_loop:
key_part++;
key_part_num++;
Description: Backward scan partition table use multi-valued indexes contains prefix index, and more than one index column after the prefix index column may lost some result. This problem may occur in RefIterator<true> and ReverseIndexRangeScanIterator under optimizer_switch = index_condition_pushdown=off. The reason for this problem is Filed_varstring::cmp_max and Field_blob::cmp_max used in key_rec_cmp, not accurate calculation of the character lengths that should be compared. How to repeat: create table ttb2( c1 int primary key, c2 varchar(6), c3 varchar(6), c4 varchar(6), c5 varchar(6), index(c2, c3(3), c4) ) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci partition by range(c1) (partition p0 values less than (100), partition p1 values less than (200)); insert into ttb2 values (1, 'aa', 'aaaxxx', 'aa', '11111'), (2, 'aa', 'aaafff', null, '11111'); insert into ttb2 values (110, 'aa', 'aaa', 'aa', '111111'), (111, 'aa', 'aaammm', 'aa', '11111'); mysql>select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc; Empty set (0.01 sec) mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa'); +-----+------+------+------+--------+ | c1 | c2 | c3 | c4 | c5 | +-----+------+------+------+--------+ | 110 | aa | aaa | aa | 111111 | +-----+------+------+------+--------+ 1 row in set (0.00 sec) mysql> alter table ttb2 modify column c3 blob; mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc; Empty set (0.01 sec) Suggested fix: this is a fix diff on mysql-8.0.35: diff --git a/sql/field.cc b/sql/field.cc index 357036d9ca7..59b1eeeb42f 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -6682,6 +6682,40 @@ int Field_varstring::cmp_max(const uchar *a_ptr, const uchar *b_ptr, return diff; } +int Field_varstring::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr, + uint max_len) const { + uint a_length, b_length; + uint local_char_a_length, local_char_b_length; + int diff; + + if (length_bytes == 1) { + a_length = (uint)*a_ptr; + b_length = (uint)*b_ptr; + } else { + a_length = uint2korr(a_ptr); + b_length = uint2korr(b_ptr); + } + if (field_charset->mbmaxlen != 1) { + local_char_a_length = my_charpos(field_charset, a_ptr + length_bytes, + a_ptr + length_bytes + a_length, + max_len / field_charset->mbmaxlen); + local_char_b_length = my_charpos(field_charset, b_ptr + length_bytes, + b_ptr + length_bytes + b_length, + max_len / field_charset->mbmaxlen); + } else { + local_char_a_length = max_len; + local_char_b_length = max_len; + } + a_length = std::min(a_length, local_char_a_length); + b_length = std::min(b_length, local_char_b_length); + + diff = field_charset->coll->strnncollsp(field_charset, a_ptr + length_bytes, + a_length, b_ptr + length_bytes, + b_length); + return diff; +} + + /** @note varstring and blob keys are ALWAYS stored with a 2 byte length prefix @@ -7181,6 +7215,28 @@ int Field_blob::cmp_max(const uchar *a_ptr, const uchar *b_ptr, return Field_blob::cmp(blob1, a_len, blob2, b_len); } +int Field_blob::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr, + uint max_len) const { + uint local_char_a_length; + uint local_char_b_length; + const uchar *blob1 = get_blob_data(a_ptr + packlength); + const uchar *blob2 = get_blob_data(b_ptr + packlength); + uint a_len = get_length(a_ptr); + uint b_len = get_length(b_ptr); + if (field_charset->mbmaxlen != 1) { + local_char_a_length = my_charpos(field_charset, blob1, blob1 + a_len, + max_len / field_charset->mbmaxlen); + local_char_b_length = my_charpos(field_charset, blob2, blob2 + b_len, + max_len / field_charset->mbmaxlen); + } else { + local_char_a_length = max_len; + local_char_b_length = max_len; + } + a_len = std::min(a_len, local_char_a_length); + b_len = std::min(b_len, local_char_b_length); + return Field_blob::cmp(blob1, a_len, blob2, b_len); +} + int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr, uint32 max_length) const { const uchar *a = get_blob_data(a_ptr + packlength); diff --git a/sql/field.h b/sql/field.h index 14484e060ea..7da98f2a8e6 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1178,6 +1178,10 @@ class Field { uint max_len [[maybe_unused]]) const { return cmp(a, b); } + virtual int cmp_prefix_key_rec(const uchar *a, const uchar *b, + uint max_len) const { + return cmp_max(a, b, max_len); + } virtual int cmp(const uchar *, const uchar *) const = 0; virtual int cmp_binary(const uchar *a, const uchar *b, uint32 max_length [[maybe_unused]] = ~0L) const { @@ -3533,6 +3537,9 @@ class Field_varstring : public Field_longstr { String *val_str(String *, String *) const override; my_decimal *val_decimal(my_decimal *) const final; int cmp_max(const uchar *, const uchar *, uint max_length) const final; + int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr, + uint max_len) const final; + int cmp(const uchar *a, const uchar *b) const final { return cmp_max(a, b, ~0U); } @@ -3713,6 +3720,9 @@ class Field_blob : public Field_longstr { String *val_str(String *, String *) const override; my_decimal *val_decimal(my_decimal *) const override; int cmp_max(const uchar *, const uchar *, uint max_length) const final; + int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr, + uint max_len) const final; + int cmp(const uchar *a, const uchar *b) const final { return cmp_max(a, b, ~0U); } diff --git a/sql/key.cc b/sql/key.cc index 11ea5f44f47..def95de9076 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -643,10 +643,18 @@ int key_rec_cmp(KEY **key, uchar *first_rec, uchar *second_rec) { max length. The exceptions are the BLOB and VARCHAR field types that take the max length into account. */ - if ((result = - field->cmp_max(field->field_ptr() + first_diff, - field->field_ptr() + sec_diff, key_part->length))) - return (sort_order < 0) ? -result : result; + if (key_part->key_part_flag & HA_PART_KEY_SEG) { + result = field->cmp_prefix_key_rec(field->field_ptr() + first_diff, + field->field_ptr() + sec_diff, + key_part->length); + + } else { + result = + field->cmp_max(field->field_ptr() + first_diff, + field->field_ptr() + sec_diff, key_part->length); + } + if (result) return (sort_order < 0) ? -result : result; + next_loop: key_part++; key_part_num++;