MySQL Bugs: #116492: The utf8mb4 character set has worse performance

Bug #116492	The utf8mb4 character set has worse performance
Submitted:	28 Oct 2024 11:47	Modified:	6 Nov 2024 13:45
Reporter:	Xizhe Zhang (OCA)	Email Updates:
Status:	Verified	Impact on me:	None
Category:	MySQL Server: Charsets	Severity:	S5 (Performance)
Version:	8.0, 8.0.40	OS:	Any
Assigned to:		CPU Architecture:	Any

Description:
MySQL 8.0 now uses the utf8mb4 character set by default and utf8mb4_0900_ai_ci as the default Collation. However, the performance of utf8mb4_0900_ai_ci is not satisfactory. 

The number of instructions in the function 'my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 1>, 1, Mb_wc_utf8mb4>' is nearly 80% higher than that in the function 'my_strnncollsp_utf8mb3', and the execution time is increased by 75%. This ultimately causes the execution time of statements in the subsequent cases to increase by 25%.

How to repeat:
-- 1. Create Table
CREATE TABLE `t_mb3` (
  `a` varchar(50) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3;

CREATE TABLE `t_mb4` (
  `a` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

-- 2. Create Procedure
DELIMITER |
CREATE PROCEDURE foo_mb3(num int)
BEGIN
    DECLARE i int;
    SET i=1;
    WHILE(i<=num)DO
        INSERT INTO t_mb3 (a) VALUES (repeat('abc', 15));
        SET i=i+1;
    END WHILE;
END|

CREATE PROCEDURE foo_mb4(num int)
BEGIN
    DECLARE i int;
    SET i=1;
    WHILE(i<=num)DO
        INSERT INTO t_mb4 (a) VALUES (repeat('abc', 15));
        SET i=i+1;
    END WHILE;
END|
DELIMITER ;

-- 3. Insert Data
CALL foo_mb3(1000000);
CALL foo_mb4(1000000);

-- 4. Scan with VARCHAR condition, execution time increased by 25%
mysql> select count(*) from t_mb3 where a=repeat('abc',15);
+----------+
| count(*) |
+----------+
|  1000000 |
+----------+
1 row in set (0.79 sec)

mysql> select count(*) from t_mb4 where a=repeat('abc',15);
+----------+
| count(*) |
+----------+
|  1000000 |
+----------+
1 row in set (1.02 sec)

-- 5. If I use utf8mb4_general_ci, the execution time will be restored
mysql> alter table t_mb4 modify `a` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL;

mysql> select count(*) from t_mb4 where a=repeat('abc',15);
+----------+
| count(*) |
+----------+
|  1000000 |
+----------+
1 row in set (0.81 sec)

Suggested fix:
I used Intel PT to sample the execution process of the statements, and got the following statistics on function execution time and number of instructions:

scan_mb3=# select count(*) as count, avg(elapsed_time) as avg_elapsed_time, avg(insn_count) as avg_insn_count from calls_view where symbol = 'my_strnncollsp_utf8mb3';
 count |   avg_elapsed_time   |    avg_insn_count
-------+----------------------+-----------------------
 74048 | 535.1534545159896283 | 2598.8805504537597234
(1 row)

scan_mb4=# select count(*) as count, avg(elapsed_time) as avg_elapsed_time, avg(insn_count) as avg_insn_count from calls_view where symbol = 'my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 1>, 1, Mb_wc_utf8mb4>';
 count |   avg_elapsed_time   |    avg_insn_count
-------+----------------------+-----------------------
 67509 | 938.1227688160097172 | 4709.8754388303781718
(1 row)

You can see that the execution time is increased by 75%, and the instruction count is increased by 80%.

I further looked at the raw data to determine where the increased number of instructions was consumed. Below are the instruction counts for one iteration of 'my_strnncollsp_utf8mb3' and 'my_strnncoll_uca' respectively.

utf8mb3:

static int my_strnncollsp_utf8mb3(const CHARSET_INFO *cs, const uchar *s,
                                  size_t slen, const uchar *t, size_t tlen) {
  ...
  /* 1. condition check, 6 instructions */
  while (s < se && t < te) {
    /* 2. parse character from byte stream，12 instructions */
    s_res = my_mb_wc_utf8mb3(&s_wc, s, se);
    t_res = my_mb_wc_utf8mb3(&t_wc, t, te);

    /* 3. if branch, 2 instructions */
    if (s_res <= 0 || t_res <= 0) { ... }

    /* 4. get the weight of character，34 instructions */
    my_tosort_unicode(uni_plane, &s_wc, cs->state);
    my_tosort_unicode(uni_plane, &t_wc, cs->state);

    /* 5. if branch, and increase s and t, 5 instructions */
    if (s_wc != t_wc) {
      return s_wc > t_wc ? 1 : -1;
    }
    s += s_res;
    t += t_res;
  }
  ...
}

utf8mb4:

template <class Scanner, int LEVELS_FOR_COMPARE, class Mb_wc>
static int my_strnncoll_uca(const CHARSET_INFO *cs, const Mb_wc mb_wc,
                            const uchar *s, size_t slen, const uchar *t,
                            size_t tlen, bool t_is_prefix) {
  ...
  do {
    { /* Expanded from s_res = sscanner.next(); */
      { /* Expanded from int res = next_raw(); */

        /* 1. related to UCA 9.0.0，6 instructions for two strings */
        int remain_weight = more_weight();
        if (remain_weight >= 0) return remain_weight;

        do {
          my_wc_t wc = 0;

          /* 2. parse character from byte stream, 12 instructions for two strings */
          int mblen = mb_wc(&wc, sbeg, send);
          if (mblen <= 0) { ... }

          /* 3. increase sbeg, 4 instructions for two strings */
          sbeg += mblen;

          /* 4. unnecessary check for this case, 6 instructions for two strings */
          if (my_uca_have_contractions(uca)) { ... }

          /* 5. get the weight of character，36 instructions for two strings */
          uint page = wc >> 8;
          uint code = wc & 0xFF;
          const uint16 *wpage = uca->weights[page];
          if (!wpage) return next_implicit(wc);
          wbeg = UCA900_WEIGHT_ADDR(wpage, weight_lv, code);
          wbeg_stride = UCA900_DISTANCE_BETWEEN_WEIGHTS;
          num_of_ce_left = UCA900_NUM_OF_CE(wpage, code);
        } while (!wbeg[0]); /* 6. unnecessary check for this case, 6 instructions for two strings */

        /* 7. related to UCA 9.0.0, 14 instructions for two strings */
        uint16 rtn = *wbeg;
        wbeg += wbeg_stride;
        --num_of_ce_left;
        res = rtn
      }

      /* 8. unnecessary check for this case, 10 instructions for two strings */
      Coll_param *param = cs->coll_param;
      if (res > 0 && param) { ... }
      s_res = res;
    }
  
    /* The instruction count has been included above */
    t_res = tscanner.next();
  } while (s_res == t_res && s_res >= 0 &&
           sscanner.get_weight_level() == current_lv &&
           tscanner.get_weight_level() == current_lv); /* 9. condition check, 9 instructions */
  ...
}

Collation utf8mb4_0900_ai_ci introduces some checks, which complicates the originally simple logic and leads to performance degradation. These checks can be done before the loop starts, rather than doing it for every character.

Hello Xizhe Zhang,

Thank you for the report and test case.
I quickly attempted to reproduce in 8.0.40 release build with default configuration but not seeing the reported issue. Please share exact version you have used, if it is a source build then cmake options used for the build along with configuration file if it is not on default settings.

--
BugNumber=116492
rm -rf $BugNumber/
bin/mysqld --no-defaults --initialize-insecure --basedir=$PWD --datadir=$PWD/$BugNumber --log-error-verbosity=3
bin/mysqld_safe --no-defaults --mysqld-version='' --basedir=$PWD --datadir=$PWD/$BugNumber --core-file --socket=/tmp/mysql.sock  --port=3306 --log-error=$PWD/$BugNumber/log.err --mysqlx-port=33330 --mysqlx-socket=/tmp/mysql_x_ushastry.sock --log-error-verbosity=3  --secure-file-priv="" --local-infile=1  2>&1 &

cat 116492.pl

#!/usr/bin/perl

for(my $i=1; $i <= 1000000; $i++) {
  my $txt = 'abc' x 15;
  print "$txt\n";
}

perl 116492.pl > 116492.txt

head 116492.txt
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc
abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc

wc -l 116492.txt
1000000 116492.txt

bin/mysql -uroot -S/tmp/mysql.sock
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 7
Server version: 8.0.40 MySQL Community Server - GPL

Copyright (c) 2000, 2024, Oracle and/or its affiliates.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql> create database test;
Query OK, 1 row affected (0.00 sec)

mysql> use test;
Database changed
mysql>
mysql> CREATE TABLE `t_mb3` (
    ->   `a` varchar(50) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci DEFAULT NULL
    -> ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3;
Query OK, 0 rows affected, 3 warnings (0.02 sec)

mysql> show warnings;
+---------+------+---------------------------------------------------------------------------------------------------------------------------------------------------+
| Level   | Code | Message                                                                                                                                           |
+---------+------+---------------------------------------------------------------------------------------------------------------------------------------------------+
| Warning | 1287 | 'utf8mb3' is deprecated and will be removed in a future release. Please use utf8mb4 instead                                                       |
| Warning | 3778 | 'utf8mb3_general_ci' is a collation of the deprecated character set UTF8MB3. Please consider using UTF8MB4 with an appropriate collation instead. |
| Warning | 1287 | 'utf8mb3' is deprecated and will be removed in a future release. Please use utf8mb4 instead                                                       |
+---------+------+---------------------------------------------------------------------------------------------------------------------------------------------------+
3 rows in set (0.00 sec)

mysql>
mysql> CREATE TABLE `t_mb4` (
    ->   `a` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL
    -> ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
Query OK, 0 rows affected (0.01 sec)

mysql> show warnings;
Empty set (0.00 sec)
Database changed
mysql> load data local infile '/export/home/tmp/ushastry/mysql-8.0.40/116492.txt' into table t_mb3 fields terminated by ',';
Query OK, 1000000 rows affected (4.09 sec)
Records: 1000000  Deleted: 0  Skipped: 0  Warnings: 0

mysql> load data local infile '/export/home/tmp/ushastry/mysql-8.0.40/116492.txt' into table t_mb4 fields terminated by ',';
Query OK, 1000000 rows affected (5.00 sec)
Records: 1000000  Deleted: 0  Skipped: 0  Warnings: 0

-- ran some 10+ times and it is same

mysql> select count(*) from t_mb3 where a=repeat('abc',15);
+----------+
| count(*) |
+----------+
|  1000000 |
+----------+
1 row in set (0.69 sec)

mysql> select count(*) from t_mb4 where a=repeat('abc',15);
+----------+
| count(*) |
+----------+
|  1000000 |
+----------+
1 row in set (0.64 sec)

regards,
Umesh

Thank you for your prompt reply.

I checked my cmake options and I think this problem should not be related to the cmake options. From the code's logic, it can be seen that utf8mb4_0900_ai_ci has more branches, so the executable binary file has more instructions, which is within expectations.

I currently have two machines, one is the machine where the bug was first reported, using Intel Xeon Platinum 8163 CPU, base frequency 2.7GHz. My other machine uses Intel Xeon Platinum 8369B CPU, base frequency 3.5GHz. The second CPU is better and has a higher base frequency. I measured the results on this machine, and utf8mb4 is 20% worse instead of the previous 25%:

mysql> select count(*) from t_mb3 where a=repeat('abc',15);
+----------+
| count(*) |
+----------+
|  1000000 |
+----------+
1 row in set (0.59 sec)

mysql> select count(*) from t_mb4 where a=repeat('abc',15);
+----------+
| count(*) |
+----------+
|  1000000 |
+----------+
1 row in set (0.70 sec)

So I think the difference in execution time is related to the Generation of the CPU or the base frequency. The better the CPU, the less obvious the performance regression caused by the number of instructions. But even so, I see that your results show that utf8mb3 performs better than utf8mb4, but the difference is smaller.

I also used perf to check the hot spots during the statement execution, It can be clearly seen that utf8mb4 has higher CPU consumption:

# utf8mb3
+   28.44%  mysqld   mysqld              [.] my_strnncollsp_utf8mb3
+   10.53%  mysqld   mysqld              [.] buf::Block_hint::buffer_fix_block_if_still_valid
+    6.97%  mysqld   mysqld              [.] row_search_mvcc
+    3.28%  mysqld   mysqld              [.] memo_slot_release

# utf8mb4
+   46.09%  mysqld   mysqld               [.] my_strnncoll_uca<uca_scanner_900<Mb_wc_utf8mb4, 1>, 1, Mb_wc_utf8mb4>
+    8.10%  mysqld   mysqld               [.] buf::Block_hint::buffer_fix_block_if_still_valid
+    5.24%  mysqld   mysqld               [.] row_search_mvcc
+    2.69%  mysqld   mysqld               [.] memo_slot_release

Thank you for the details, let me re-run this and get back to you if anything further needed.

regards,
Umesh

Thank you once again.
Verified as described.

regards,
Umesh