MySQL Bugs: #114795: Arithmetic overflow when calculating the length of used fields involving blob

Bug #114795	Arithmetic overflow when calculating the length of used fields involving blob
Submitted:	26 Apr 2024 10:05	Modified:	26 Apr 2024 10:26
Reporter:	Hope Lee (OCA)	Email Updates:
Status:	Verified	Impact on me:	None
Category:	MySQL Server: Optimizer	Severity:	S3 (Non-critical)
Version:	8.0	OS:	Any
Assigned to:		CPU Architecture:	Any

Description:
When calculating the length of used fields involving blob type in the function calc_used_field_length(), it uses the average length of records minus the unused pack length of row records as blob length in the following codes:

```c++
 void calc_used_field_length(TABLE *table, bool needs_rowid,
                              uint *p_used_fieldlength) {
    uint null_fields, blobs, fields, rec_length;
    Field **f_ptr, *field;
    uint uneven_bit_fields;
    MY_BITMAP *read_set = table->read_set;

    uneven_bit_fields = null_fields = blobs = fields = rec_length = 0;
    for (f_ptr = table->field; (field = *f_ptr); f_ptr++) {
      if (bitmap_is_set(read_set, field->field_index())) {
        fields++;
        rec_length += field->pack_length();
        if (field->is_flag_set(BLOB_FLAG) || field->is_array()) blobs++;
        if (!field->is_flag_set(NOT_NULL_FLAG)) null_fields++;
        if (field->type() == MYSQL_TYPE_BIT && ((Field_bit *)field)->bit_len)
          uneven_bit_fields++;
      }
    }
    if (null_fields || uneven_bit_fields)
      rec_length += (table->s->null_fields + 7) / 8;
    if (table->is_nullable()) rec_length += sizeof(bool);
    if (blobs) {
      uint blob_length = (uint)(table->file->stats.mean_rec_length -
                                (table->s->reclength - rec_length));
      rec_length += max<uint>(4U, blob_length);
    }

    if (needs_rowid) {
      rec_length += table->file->ref_length;
      fields++;
    }

    *p_used_fieldlength = rec_length;
  }
```

The average length of records could be petite, while the pack length of unused fields could be larger. This will cause a small unsigned integer minus a big unsigned integer, an arithmetic overflow.

How to repeat:
CREATE TABLE t1 (
  `id` varchar(5),
  `def1` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def2` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def3` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def4` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def5` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def6` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def7` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def8` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def9` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def10` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def11` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def12` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def13` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def14` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def15` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def16` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def17` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def18` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def19` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
  `def20` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci
);

DELIMITER //

CREATE PROCEDURE insert_data(IN loop_count int)
BEGIN
  SET @n = 1;
  REPEAT
    INSERT INTO t1(id, def1) VALUES ('aaa', 'wdwdkafvllxlldad'), ('bbb', 'duwucuuwiuww'), ('ccc', 'dwjnvnviwiwiq');
    SET @n = @n + 1;
  UNTIL @n > loop_count
  END REPEAT;
END;
//

delimiter ;

CALL insert_data(200);

CREATE TABLE t2 SELECT * FROM t1;

ANALYZE TABLE t1, t2;
SELECT t1.id, t1.def1, t2.id, t2.def2 FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND t1.def1 = t2.def1;

Set a breakpoint to the function calc_used_field_length(). And see:

```gdb
(gdb) list
2005      }
2006      if (null_fields || uneven_bit_fields)
2007        rec_length += (table->s->null_fields + 7) / 8;
2008      if (table->is_nullable()) rec_length += sizeof(bool);
2009      if (blobs) {
2010        uint blob_length = (uint)(table->file->stats.mean_rec_length -
2011                                  (table->s->reclength - rec_length));
2012        rec_length += max<uint>(4U, blob_length);
2013      }
2014
(gdb) p table->file->stats.mean_rec_length
$242 = 109
(gdb) p table->s->reclength
$243 = 224
(gdb) p rec_length
$244 = 34
(gdb) n
2011                                  (table->s->reclength - rec_length));
(gdb)
2010        uint blob_length = (uint)(table->file->stats.mean_rec_length -
(gdb)
2012        rec_length += max<uint>(4U, blob_length);
(gdb) p blob_length
$245 = 4294967215
```

Suggested fix:
If the average length of records is smaller than the pack length of unused fields, it means the actual length of the blob is not very large. Just set it to 4 according to the current logic.

Bugfix Arithmetic overflow when calculating the length of used fields involving blob (*) I confirm the code being submitted is offered under the terms of the OCA, and that I am authorized to contribute it.

Contribution: 0001-Bugfix-Arithmetic-overflow-when-calculating-the-leng.patch (application/octet-stream, text), 1.21 KiB.

Hi Mr. Lee,

Thank you for your bug report.

We have ran your test case and we have debugged the code when SELECT was running,  as you instructed.

We managed to repeat the results that you reported.

This is now a verified bug for the versions 8.0 and higher.

Also, thank you very much for your patch.