Description:
When calculating the length of used fields involving blob type in the function calc_used_field_length(), it uses the average length of records minus the unused pack length of row records as blob length in the following codes:
```c++
void calc_used_field_length(TABLE *table, bool needs_rowid,
uint *p_used_fieldlength) {
uint null_fields, blobs, fields, rec_length;
Field **f_ptr, *field;
uint uneven_bit_fields;
MY_BITMAP *read_set = table->read_set;
uneven_bit_fields = null_fields = blobs = fields = rec_length = 0;
for (f_ptr = table->field; (field = *f_ptr); f_ptr++) {
if (bitmap_is_set(read_set, field->field_index())) {
fields++;
rec_length += field->pack_length();
if (field->is_flag_set(BLOB_FLAG) || field->is_array()) blobs++;
if (!field->is_flag_set(NOT_NULL_FLAG)) null_fields++;
if (field->type() == MYSQL_TYPE_BIT && ((Field_bit *)field)->bit_len)
uneven_bit_fields++;
}
}
if (null_fields || uneven_bit_fields)
rec_length += (table->s->null_fields + 7) / 8;
if (table->is_nullable()) rec_length += sizeof(bool);
if (blobs) {
uint blob_length = (uint)(table->file->stats.mean_rec_length -
(table->s->reclength - rec_length));
rec_length += max<uint>(4U, blob_length);
}
if (needs_rowid) {
rec_length += table->file->ref_length;
fields++;
}
*p_used_fieldlength = rec_length;
}
```
The average length of records could be petite, while the pack length of unused fields could be larger. This will cause a small unsigned integer minus a big unsigned integer, an arithmetic overflow.
How to repeat:
CREATE TABLE t1 (
`id` varchar(5),
`def1` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def2` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def3` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def4` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def5` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def6` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def7` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def8` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def9` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def10` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def11` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def12` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def13` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def14` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def15` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def16` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def17` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def18` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def19` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
`def20` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci
);
DELIMITER //
CREATE PROCEDURE insert_data(IN loop_count int)
BEGIN
SET @n = 1;
REPEAT
INSERT INTO t1(id, def1) VALUES ('aaa', 'wdwdkafvllxlldad'), ('bbb', 'duwucuuwiuww'), ('ccc', 'dwjnvnviwiwiq');
SET @n = @n + 1;
UNTIL @n > loop_count
END REPEAT;
END;
//
delimiter ;
CALL insert_data(200);
CREATE TABLE t2 SELECT * FROM t1;
ANALYZE TABLE t1, t2;
SELECT t1.id, t1.def1, t2.id, t2.def2 FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND t1.def1 = t2.def1;
Set a breakpoint to the function calc_used_field_length(). And see:
```gdb
(gdb) list
2005 }
2006 if (null_fields || uneven_bit_fields)
2007 rec_length += (table->s->null_fields + 7) / 8;
2008 if (table->is_nullable()) rec_length += sizeof(bool);
2009 if (blobs) {
2010 uint blob_length = (uint)(table->file->stats.mean_rec_length -
2011 (table->s->reclength - rec_length));
2012 rec_length += max<uint>(4U, blob_length);
2013 }
2014
(gdb) p table->file->stats.mean_rec_length
$242 = 109
(gdb) p table->s->reclength
$243 = 224
(gdb) p rec_length
$244 = 34
(gdb) n
2011 (table->s->reclength - rec_length));
(gdb)
2010 uint blob_length = (uint)(table->file->stats.mean_rec_length -
(gdb)
2012 rec_length += max<uint>(4U, blob_length);
(gdb) p blob_length
$245 = 4294967215
```
Suggested fix:
If the average length of records is smaller than the pack length of unused fields, it means the actual length of the blob is not very large. Just set it to 4 according to the current logic.
Description: When calculating the length of used fields involving blob type in the function calc_used_field_length(), it uses the average length of records minus the unused pack length of row records as blob length in the following codes: ```c++ void calc_used_field_length(TABLE *table, bool needs_rowid, uint *p_used_fieldlength) { uint null_fields, blobs, fields, rec_length; Field **f_ptr, *field; uint uneven_bit_fields; MY_BITMAP *read_set = table->read_set; uneven_bit_fields = null_fields = blobs = fields = rec_length = 0; for (f_ptr = table->field; (field = *f_ptr); f_ptr++) { if (bitmap_is_set(read_set, field->field_index())) { fields++; rec_length += field->pack_length(); if (field->is_flag_set(BLOB_FLAG) || field->is_array()) blobs++; if (!field->is_flag_set(NOT_NULL_FLAG)) null_fields++; if (field->type() == MYSQL_TYPE_BIT && ((Field_bit *)field)->bit_len) uneven_bit_fields++; } } if (null_fields || uneven_bit_fields) rec_length += (table->s->null_fields + 7) / 8; if (table->is_nullable()) rec_length += sizeof(bool); if (blobs) { uint blob_length = (uint)(table->file->stats.mean_rec_length - (table->s->reclength - rec_length)); rec_length += max<uint>(4U, blob_length); } if (needs_rowid) { rec_length += table->file->ref_length; fields++; } *p_used_fieldlength = rec_length; } ``` The average length of records could be petite, while the pack length of unused fields could be larger. This will cause a small unsigned integer minus a big unsigned integer, an arithmetic overflow. How to repeat: CREATE TABLE t1 ( `id` varchar(5), `def1` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def2` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def3` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def4` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def5` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def6` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def7` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def8` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def9` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def10` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def11` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def12` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def13` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def14` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def15` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def16` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def17` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def18` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def19` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, `def20` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci ); DELIMITER // CREATE PROCEDURE insert_data(IN loop_count int) BEGIN SET @n = 1; REPEAT INSERT INTO t1(id, def1) VALUES ('aaa', 'wdwdkafvllxlldad'), ('bbb', 'duwucuuwiuww'), ('ccc', 'dwjnvnviwiwiq'); SET @n = @n + 1; UNTIL @n > loop_count END REPEAT; END; // delimiter ; CALL insert_data(200); CREATE TABLE t2 SELECT * FROM t1; ANALYZE TABLE t1, t2; SELECT t1.id, t1.def1, t2.id, t2.def2 FROM t1 LEFT JOIN t2 ON t1.id = t2.id AND t1.def1 = t2.def1; Set a breakpoint to the function calc_used_field_length(). And see: ```gdb (gdb) list 2005 } 2006 if (null_fields || uneven_bit_fields) 2007 rec_length += (table->s->null_fields + 7) / 8; 2008 if (table->is_nullable()) rec_length += sizeof(bool); 2009 if (blobs) { 2010 uint blob_length = (uint)(table->file->stats.mean_rec_length - 2011 (table->s->reclength - rec_length)); 2012 rec_length += max<uint>(4U, blob_length); 2013 } 2014 (gdb) p table->file->stats.mean_rec_length $242 = 109 (gdb) p table->s->reclength $243 = 224 (gdb) p rec_length $244 = 34 (gdb) n 2011 (table->s->reclength - rec_length)); (gdb) 2010 uint blob_length = (uint)(table->file->stats.mean_rec_length - (gdb) 2012 rec_length += max<uint>(4U, blob_length); (gdb) p blob_length $245 = 4294967215 ``` Suggested fix: If the average length of records is smaller than the pack length of unused fields, it means the actual length of the blob is not very large. Just set it to 4 according to the current logic.