Bug #114783 Backward scan partition table with multi-valued Indexes may lost some result.
Submitted: 25 Apr 2024 11:32 Modified: 29 Apr 2024 5:40
Reporter: deng yl (OCA) Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S2 (Serious)
Version:mysql-8.0.35, 8.0.36 OS:Linux
Assigned to: CPU Architecture:Any
Tags: Backward scan partition table with multi-valued Indexes may lost some result.

[25 Apr 2024 11:32] deng yl
Description:
  Backward scan partition table use multi-valued indexes contains prefix index, and more than one index column after the prefix index column may lost some result.

  This problem may occur in RefIterator<true> and ReverseIndexRangeScanIterator under optimizer_switch = index_condition_pushdown=off.
  
  The reason for this problem is Filed_varstring::cmp_max and Field_blob::cmp_max
used in key_rec_cmp, not accurate calculation of the character lengths that should be compared.
  

How to repeat:
create table ttb2(
c1 int primary key, 
c2 varchar(6), 
c3 varchar(6), 
c4 varchar(6), 
c5 varchar(6),
index(c2, c3(3), c4)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci 
partition by range(c1) 
(partition p0 values less than (100), 
partition p1 values less than (200));

insert into ttb2 values
(1, 'aa', 'aaaxxx', 'aa', '11111'),
(2, 'aa', 'aaafff', null, '11111');

insert into ttb2 values
(110, 'aa', 'aaa', 'aa', '111111'),
(111, 'aa', 'aaammm', 'aa', '11111');

mysql>select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc;
Empty set (0.01 sec)

mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa');
+-----+------+------+------+--------+
| c1  | c2   | c3   | c4   | c5     |
+-----+------+------+------+--------+
| 110 | aa   | aaa  | aa   | 111111 |
+-----+------+------+------+--------+
1 row in set (0.00 sec)

mysql>  alter table ttb2 modify column c3 blob;

mysql> select * from ttb2 force index(c2) where (c2 = 'aa') and (c3 = 'aaa') and (c4 = 'aa') order by c2 desc, c3 desc, c4 desc, c1 desc;
Empty set (0.01 sec)

Suggested fix:
this is a fix diff on mysql-8.0.35:

diff --git a/sql/field.cc b/sql/field.cc
index 357036d9ca7..59b1eeeb42f 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -6682,6 +6682,40 @@ int Field_varstring::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
   return diff;
 }
 
+int Field_varstring::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+                                        uint max_len) const {
+  uint a_length, b_length;
+  uint local_char_a_length, local_char_b_length;
+  int diff;
+
+  if (length_bytes == 1) {
+    a_length = (uint)*a_ptr;
+    b_length = (uint)*b_ptr;
+  } else {
+    a_length = uint2korr(a_ptr);
+    b_length = uint2korr(b_ptr);
+  }
+  if (field_charset->mbmaxlen != 1) {
+    local_char_a_length = my_charpos(field_charset, a_ptr + length_bytes,
+                                     a_ptr + length_bytes + a_length,
+                                     max_len / field_charset->mbmaxlen);
+    local_char_b_length = my_charpos(field_charset, b_ptr + length_bytes,
+                                     b_ptr + length_bytes + b_length,
+                                     max_len / field_charset->mbmaxlen);
+  } else {
+    local_char_a_length = max_len;
+    local_char_b_length = max_len;
+  }
+  a_length = std::min(a_length, local_char_a_length);
+  b_length = std::min(b_length, local_char_b_length);
+
+  diff = field_charset->coll->strnncollsp(field_charset, a_ptr + length_bytes,
+                                          a_length, b_ptr + length_bytes,
+                                          b_length);
+  return diff;
+}
+
+
 /**
   @note
     varstring and blob keys are ALWAYS stored with a 2 byte length prefix
@@ -7181,6 +7215,28 @@ int Field_blob::cmp_max(const uchar *a_ptr, const uchar *b_ptr,
   return Field_blob::cmp(blob1, a_len, blob2, b_len);
 }
 
+int Field_blob::cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+                                   uint max_len) const {
+  uint local_char_a_length;
+  uint local_char_b_length;
+  const uchar *blob1 = get_blob_data(a_ptr + packlength);
+  const uchar *blob2 = get_blob_data(b_ptr + packlength);
+  uint a_len = get_length(a_ptr);
+  uint b_len = get_length(b_ptr);
+  if (field_charset->mbmaxlen != 1) {
+    local_char_a_length = my_charpos(field_charset, blob1, blob1 + a_len,
+                                     max_len / field_charset->mbmaxlen);
+    local_char_b_length = my_charpos(field_charset, blob2, blob2 + b_len,
+                                     max_len / field_charset->mbmaxlen);
+  } else {
+    local_char_a_length = max_len;
+    local_char_b_length = max_len;
+  }
+  a_len = std::min(a_len, local_char_a_length);
+  b_len = std::min(b_len, local_char_b_length);
+  return Field_blob::cmp(blob1, a_len, blob2, b_len);
+}
+
 int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr,
                            uint32 max_length) const {
   const uchar *a = get_blob_data(a_ptr + packlength);
diff --git a/sql/field.h b/sql/field.h
index 14484e060ea..7da98f2a8e6 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1178,6 +1178,10 @@ class Field {
                       uint max_len [[maybe_unused]]) const {
     return cmp(a, b);
   }
+  virtual int cmp_prefix_key_rec(const uchar *a, const uchar *b,
+                                 uint max_len) const {
+    return cmp_max(a, b, max_len);
+  }
   virtual int cmp(const uchar *, const uchar *) const = 0;
   virtual int cmp_binary(const uchar *a, const uchar *b,
                          uint32 max_length [[maybe_unused]] = ~0L) const {
@@ -3533,6 +3537,9 @@ class Field_varstring : public Field_longstr {
   String *val_str(String *, String *) const override;
   my_decimal *val_decimal(my_decimal *) const final;
   int cmp_max(const uchar *, const uchar *, uint max_length) const final;
+  int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+                         uint max_len) const final;
+
   int cmp(const uchar *a, const uchar *b) const final {
     return cmp_max(a, b, ~0U);
   }
@@ -3713,6 +3720,9 @@ class Field_blob : public Field_longstr {
   String *val_str(String *, String *) const override;
   my_decimal *val_decimal(my_decimal *) const override;
   int cmp_max(const uchar *, const uchar *, uint max_length) const final;
+  int cmp_prefix_key_rec(const uchar *a_ptr, const uchar *b_ptr,
+                         uint max_len) const final;
+
   int cmp(const uchar *a, const uchar *b) const final {
     return cmp_max(a, b, ~0U);
   }
diff --git a/sql/key.cc b/sql/key.cc
index 11ea5f44f47..def95de9076 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -643,10 +643,18 @@ int key_rec_cmp(KEY **key, uchar *first_rec, uchar *second_rec) {
         max length. The exceptions are the BLOB and VARCHAR field types
         that take the max length into account.
       */
-      if ((result =
-               field->cmp_max(field->field_ptr() + first_diff,
-                              field->field_ptr() + sec_diff, key_part->length)))
-        return (sort_order < 0) ? -result : result;
+      if (key_part->key_part_flag & HA_PART_KEY_SEG) {
+        result = field->cmp_prefix_key_rec(field->field_ptr() + first_diff,
+                                           field->field_ptr() + sec_diff,
+                                           key_part->length);
+
+      } else {
+        result =
+            field->cmp_max(field->field_ptr() + first_diff,
+                           field->field_ptr() + sec_diff, key_part->length);
+      }
+      if (result) return (sort_order < 0) ? -result : result;
+
     next_loop:
       key_part++;
       key_part_num++;
[29 Apr 2024 5:40] MySQL Verification Team
Hello deng yl,

Thank you for the report and test case.

regards,
Umesh