Description:
When calculating the length of used fields involving blob type in the function calc_used_field_length(), it uses the average length of records minus the unused pack length of row records as blob length in the following codes:
```c++
void calc_used_field_length(TABLE *table, bool needs_rowid,
uint *p_used_fieldlength) {
uint null_fields, blobs, fields, rec_length;
Field **f_ptr, *field;
uint uneven_bit_fields;
MY_BITMAP *read_set = table->read_set;
uneven_bit_fields = null_fields = blobs = fields = rec_length = 0;
for (f_ptr = table->field; (field = *f_ptr); f_ptr++) {
if (bitmap_is_set(read_set, field->field_index())) {
fields++;
rec_length += field->pack_length();
if (field->is_flag_set(BLOB_FLAG) || field->is_array()) blobs++;
if (!field->is_flag_set(NOT_NULL_FLAG)) null_fields++;
if (field->type() == MYSQL_TYPE_BIT && ((Field_bit *)field)->bit_len)
uneven_bit_fields++;
}
}
if (null_fields || uneven_bit_fields)
rec_length += (table->s->null_fields + 7) / 8;
if (table->is_nullable()) rec_length += sizeof(bool);
if (blobs) {
uint blob_length = (uint)(table->file->stats.mean_rec_length -
(table->s->reclength - rec_length));
rec_length += max<uint>(4U, blob_length);
}
if (needs_rowid) {
rec_length += table->file->ref_length;
fields++;
}
*p_used_fieldlength = rec_length;
}
```
The average length of records could be petite, while the pack length of unused fields could be larger. This will cause a small unsigned integer minus a big unsigned integer, an arithmetic overflow.
How to repeat:
CREATE TABLE t1 (
`id` varchar(5),
`def1` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def2` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def3` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def4` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def5` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def6` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def7` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def8` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def9` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def10` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def11` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def12` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def13` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def14` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def15` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def16` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def17` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def18` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def19` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def20` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci
);
DELIMITER //
CREATE PROCEDURE insert_data(IN loop_count int)
BEGIN
SET @n = 1;
REPEAT
INSERT INTO t1(id, def1) VALUES ('aaa', 'wdwdkafvllxlldad'), ('bbb', 'duwucuuwiuww'), ('ccc', 'dwjnvnviwiwiq');
SET @n = @n + 1;
UNTIL @n > loop_count
END REPEAT;
END;
//
delimiter ;
CALL insert_data(200);
CREATE TABLE t2 SELECT * FROM t1;
ANALYZE TABLE t1, t2;
SELECT t1.id, t1.def1, t2.id, t2.def2 FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND t1.def1 = t2.def1;
Set a breakpoint to the function calc_used_field_length(). And see:
```gdb
(gdb) list
2005 }
2006 if (null_fields || uneven_bit_fields)
2007 rec_length += (table->s->null_fields + 7) / 8;
2008 if (table->is_nullable()) rec_length += sizeof(bool);
2009 if (blobs) {
2010 uint blob_length = (uint)(table->file->stats.mean_rec_length -
2011 (table->s->reclength - rec_length));
2012 rec_length += max<uint>(4U, blob_length);
2013 }
2014
(gdb) p table->file->stats.mean_rec_length
$242 = 109
(gdb) p table->s->reclength
$243 = 224
(gdb) p rec_length
$244 = 34
(gdb) n
2011 (table->s->reclength - rec_length));
(gdb)
2010 uint blob_length = (uint)(table->file->stats.mean_rec_length -
(gdb)
2012 rec_length += max<uint>(4U, blob_length);
(gdb) p blob_length
$245 = 4294967215
```
Suggested fix:
If the average length of records is smaller than the pack length of unused fields, it means the actual length of the blob is not very large. Just set it to 4 according to the current logic.