Description:
The result mismatch with below test. When set @@collation_connection='utf8mb4_unicode_ci'; the group by result mismatch as should have two groups.
The result is right in 8.0 .
mysql> show variables like "%collation%";
+----------------------+--------------------+
| Variable_name | Value |
+----------------------+--------------------+
| collation_connection | utf8mb4_unicode_ci |
| collation_database | utf8_general_ci |
| collation_server | utf8_general_ci |
+----------------------+--------------------+
CREATE TABLE `data_user_test` ( `user_id` int(11) NOT NULL, `country` varchar(20) NOT NULL DEFAULT '', `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP , `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP , PRIMARY KEY (`user_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
mysql> insert into data_user_test values(1, '中国', now(), now()), (2, '日本', now(), now()), (3,'中国', now(), now()), (4, '韩国', now(), now());
Query OK, 4 rows affected (0.00 sec)
Records: 4 Duplicates: 0 Warnings: 0
mysql> SELECT IF(country='中国', '国内', '国外') country_type, count(distinct du.user_id) from data_user_test du group by 1;
+--------------+----------------------------+
| country_type | count(distinct du.user_id) |
+--------------+----------------------------+
| 国内 | 2 |
| 国外 | 2 |
+--------------+----------------------------+
mysql> set @@collation_connection='utf8mb4_unicode_ci';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT IF(country='中国', '国内', '国外') country_type, count(distinct du.user_id) from data_user_test du group by 1;
+--------------+----------------------------+
| country_type | count(distinct du.user_id) |
+--------------+----------------------------+
| 国内 | 1 |
| 国外 | 1 |
| 国内 | 1 |
| 国外 | 1 |
+--------------+----------------------------+
mysql> set @@collation_connection='utf8_unicode_ci';
mysql> SELECT IF(country='中国', '国内', '国外') country_type, count(distinct du.user_id) from data_user_test du group by 1;
+--------------+----------------------------+
| country_type | count(distinct du.user_id) |
+--------------+----------------------------+
| 国内 | 1 |
| 国外 | 1 |
| 国内 | 1 |
| 国外 | 1 |
+--------------+----------------------------+
How to repeat:
t/unicode_result_mismatch.test
CREATE TABLE `data_user_test` ( `user_id` int(11) NOT NULL, `country` varchar(20) NOT NULL DEFAULT '', `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP , `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP , PRIMARY KEY (`user_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
insert into data_user_test values(1, '中国', now(), now()), (2, '日本', now(), now()), (3,'中国', now(), now()), (4, '韩国', now(), now());
SELECT IF(country='中国', '国内', '国外') country_type, count(distinct du.user_id) from data_user_test du group by 1;
set @@collation_connection='utf8mb4_unicode_ci';
SELECT IF(country='中国', '国内', '国外') country_type, count(distinct du.user_id) from data_user_test du group by 1;
set @@collation_connection='utf8_unicode_ci';
SELECT IF(country='中国', '国内', '国外') country_type, count(distinct du.user_id) from data_user_test du group by 1;