Bug #98239 member state error when alter column while other member commit transaction later
Submitted: 15 Jan 2020 13:49 Modified: 28 Feb 2020 11:39
Reporter: hao lu (OCA) Email Updates:
Status: Not a Bug Impact on me:
None 
Category:MySQL Server: Group Replication Severity:S3 (Non-critical)
Version:8.0.19 OS:Ubuntu (ubuntu 1804 desktop x64)
Assigned to: CPU Architecture:Any
Tags: group replication; member state error;

[15 Jan 2020 13:49] hao lu
Description:
I set up a three-node multipoint write mgr cluster. And I create a simple table and confirm that all members are writable.

I start a transaction and insert one row into the table but not commit at node2. After that I modify the table at node1. At this moment, the table at node1 and node3 are modified, and the table at node2 is not modified yet. 

After commit transaction at node2, the table at node2 is modified, but node1 & node3 are in error state and removed from the group replication.

How to repeat:
###############################################
Step 1: prepare replication cluster environment.
###############################################

[node1.cnf]
pid-file        = /hdd-dir/data/mysqldata/data_3316/mysqld.pid
socket          = /hdd-dir/data/mysqldata/data_3316/mysqld.sock
datadir         = /hdd-dir/data/mysqldata/data_3316
log-error       = /var/log/mysql/error_3316.log
port            = 3316
mysqlx          = 0

#each is unique for local_address
group_replication_local_address= "127.0.0.1:33016"
group_replication_group_seeds= "127.0.0.1:33016,127.0.0.1:33017,127.0.0.1:33018"

#each is unique for server_id
server_id=2
gtid_mode=ON
enforce_gtid_consistency=ON
binlog_checksum=NONE

log_bin=binlog
log_slave_updates=ON
binlog_format=ROW
master_info_repository=TABLE
relay_log_info_repository=TABLE

plugin_load_add=group_replication.so
group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
group_replication_start_on_boot=off
group_replication_bootstrap_group=off

[node2.cnf]
pid-file        = /hdd-dir/data/mysqldata/data_3317/mysqld.pid
socket          = /hdd-dir/data/mysqldata/data_3317/mysqld.sock
datadir         = /hdd-dir/data/mysqldata/data_3317
log-error       = /var/log/mysql/error_3317.log
port            = 3317
mysqlx          = 0

#each is unique for local_address
group_replication_local_address= "127.0.0.1:33017"
group_replication_group_seeds= "127.0.0.1:33016,127.0.0.1:33017,127.0.0.1:33018"

#each is unique for server_id
server_id=2
gtid_mode=ON
enforce_gtid_consistency=ON
binlog_checksum=NONE

log_bin=binlog
log_slave_updates=ON
binlog_format=ROW
master_info_repository=TABLE
relay_log_info_repository=TABLE

plugin_load_add=group_replication.so
group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
group_replication_start_on_boot=off
group_replication_bootstrap_group=off

[node3.cnf]
pid-file        = /hdd-dir/data/mysqldata/data_3318/mysqld.pid
socket          = /hdd-dir/data/mysqldata/data_3318/mysqld.sock
datadir         = /hdd-dir/data/mysqldata/data_3318
log-error       = /var/log/mysql/error_3318.log
port            = 3318
mysqlx          = 0

#each is unique for local_address
group_replication_local_address= "127.0.0.1:33018"
group_replication_group_seeds= "127.0.0.1:33016,127.0.0.1:33017,127.0.0.1:33018"

#each is unique for server_id
server_id=3
gtid_mode=ON
enforce_gtid_consistency=ON
binlog_checksum=NONE

log_bin=binlog
log_slave_updates=ON
binlog_format=ROW
master_info_repository=TABLE
relay_log_info_repository=TABLE

plugin_load_add=group_replication.so
group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
group_replication_start_on_boot=off
group_replication_bootstrap_group=off

#### connect to node1 ####

mysql -uroot -h127.0.0.1 -P3316

mysql> SET SQL_LOG_BIN=0;
mysql> CREATE USER rpl_user@'%' IDENTIFIED BY 'password';
mysql> GRANT REPLICATION SLAVE ON *.* TO rpl_user@'%';
mysql> GRANT BACKUP_ADMIN ON *.* TO rpl_user@'%';
mysql> FLUSH PRIVILEGES;
mysql> SET SQL_LOG_BIN=1;
mysql>CHANGE MASTER TO MASTER_USER='rpl_user', MASTER_PASSWORD='password' FOR CHANNEL 'group_replication_recovery';

mysql> reset master;
mysql> SET GLOBAL group_replication_single_primary_mode=FALSE;
mysql> SET GLOBAL group_replication_enforce_update_everywhere_checks=TRUE;
mysql> SET GLOBAL group_replication_bootstrap_group=on;
mysql> START GROUP_REPLICATION;
mysql> SET GLOBAL group_replication_bootstrap_group=off;
mysql> set global group_replication_recovery_get_public_key=ON;

#### connect to node2 ####

mysql -uroot -h127.0.0.1 -P3317

mysql> SET SQL_LOG_BIN=0;
mysql> CREATE USER rpl_user@'%' IDENTIFIED BY 'password';
mysql> GRANT REPLICATION SLAVE ON *.* TO rpl_user@'%';
mysql> GRANT BACKUP_ADMIN ON *.* TO rpl_user@'%';
mysql> FLUSH PRIVILEGES;
mysql> SET SQL_LOG_BIN=1;
mysql>CHANGE MASTER TO MASTER_USER='rpl_user', MASTER_PASSWORD='password' FOR CHANNEL 'group_replication_recovery';

mysql> reset master;
mysql> SET GLOBAL group_replication_single_primary_mode=FALSE;
mysql> SET GLOBAL group_replication_enforce_update_everywhere_checks=TRUE;
mysql> START GROUP_REPLICATION;
mysql> set global group_replication_recovery_get_public_key=ON;

#### connect to node3 ####

mysql -uroot -h127.0.0.1 -P3318

mysql> SET SQL_LOG_BIN=0;
mysql> CREATE USER rpl_user@'%' IDENTIFIED BY 'password';
mysql> GRANT REPLICATION SLAVE ON *.* TO rpl_user@'%';
mysql> GRANT BACKUP_ADMIN ON *.* TO rpl_user@'%';
mysql> FLUSH PRIVILEGES;
mysql> SET SQL_LOG_BIN=1;
mysql>CHANGE MASTER TO MASTER_USER='rpl_user', MASTER_PASSWORD='password' FOR CHANNEL 'group_replication_recovery';

mysql> reset master;
mysql> SET GLOBAL group_replication_single_primary_mode=FALSE;
mysql> SET GLOBAL group_replication_enforce_update_everywhere_checks=TRUE;
mysql> START GROUP_REPLICATION;
mysql> set global group_replication_recovery_get_public_key=ON;

#### check status ####

mysql> select * from performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| CHANNEL_NAME              | MEMBER_ID                            | MEMBER_HOST           | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | MEMBER_VERSION |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| group_replication_applier | 16174457-3795-11ea-a60f-e454e8ab5528 | greatlh-OptiPlex-5060 |        3318 | ONLINE       | PRIMARY     | 8.0.19         |
| group_replication_applier | d153ef7c-3794-11ea-921f-e454e8ab5528 | greatlh-OptiPlex-5060 |        3316 | ONLINE       | PRIMARY     | 8.0.19         |
| group_replication_applier | ebb30f20-3794-11ea-8f4c-e454e8ab5528 | greatlh-OptiPlex-5060 |        3317 | ONLINE       | PRIMARY     | 8.0.19         |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+

###############################################
Step 2: recurrent error
###############################################

#### connect node1 and create table ####

mysql -uroot -h127.0.0.1 -P3316
mysql> use test;
Database changed
mysql> create table t1 (c1 int primary key, c2 varchar(10));
Query OK, 0 rows affected (0.50 sec)

#### connect node2 and start transaction but not commit ####

mysql -uroot -h127.0.0.1 -P3317
mysql> use test;
Database changed
mysql> begin;
Query OK, 0 rows affected (0.00 sec)

mysql> insert into t1 values (1, 'abcd');
Query OK, 1 row affected (0.00 sec)

#### alter table at node1 ####

mysql> alter table t1 change c2 c3 varchar(11);
Query OK, 0 rows affected (0.28 sec)
Records: 0  Duplicates: 0  Warnings: 0

#### show binlog events and table at node3 ####

mysql> show binlog events;
+---------------+------+----------------+-----------+-------------+-------------------------------------------------------------------------------+
| Log_name      | Pos  | Event_type     | Server_id | End_log_pos | Info                                                                          |
+---------------+------+----------------+-----------+-------------+-------------------------------------------------------------------------------+
| binlog.000001 |    4 | Format_desc    |         3 |         124 | Server ver: 8.0.19, Binlog ver: 4                                             |
| binlog.000001 |  124 | Previous_gtids |         3 |         151 |                                                                               |
| binlog.000001 |  151 | Gtid           |         2 |         233 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:1'             |
| binlog.000001 |  233 | Query          |         2 |         295 | BEGIN                                                                         |
| binlog.000001 |  295 | View_change    |         2 |         394 | view_id=15790948149569500:1                                                   |
| binlog.000001 |  394 | Query          |         2 |         462 | COMMIT                                                                        |
| binlog.000001 |  462 | Gtid           |         2 |         544 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:2'             |
| binlog.000001 |  544 | Query          |         2 |         606 | BEGIN                                                                         |
| binlog.000001 |  606 | View_change    |         2 |         745 | view_id=15790948149569500:2                                                   |
| binlog.000001 |  745 | Query          |         2 |         813 | COMMIT                                                                        |
| binlog.000001 |  813 | Gtid           |         2 |         895 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:3'             |
| binlog.000001 |  895 | Query          |         2 |         957 | BEGIN                                                                         |
| binlog.000001 |  957 | View_change    |         2 |        1096 | view_id=15790948149569500:3                                                   |
| binlog.000001 | 1096 | Query          |         2 |        1164 | COMMIT                                                                        |
| binlog.000001 | 1164 | Gtid           |         2 |        1244 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:4'             |
| binlog.000001 | 1244 | Query          |         2 |        1353 | create database test /* xid=33 */                                             |
| binlog.000001 | 1353 | Gtid           |         2 |        1433 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:5'             |
| binlog.000001 | 1433 | Query          |         2 |        1574 | use `test`; create table t1 (c1 int primary key, c2 varchar(10)) /* xid=38 */ |
| binlog.000001 | 1574 | Gtid           |         2 |        1654 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:6'             |
| binlog.000001 | 1654 | Query          |         2 |        1782 | use `test`; alter table t1 change c2 c3 varchar(11) /* xid=39 */              |
+---------------+------+----------------+-----------+-------------+-------------------------------------------------------------------------------+
mysql> show create table t1;
+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table                                                                                                                                                      |
+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| t1    | CREATE TABLE `t1` (
  `c1` int NOT NULL,
  `c3` varchar(11) DEFAULT NULL,
  PRIMARY KEY (`c1`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci |
+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)

#### show binlog events at node2 ####

mysql> show binlog events;
+---------------+------+----------------+-----------+-------------+-------------------------------------------------------------------------------+
| Log_name      | Pos  | Event_type     | Server_id | End_log_pos | Info                                                                          |
+---------------+------+----------------+-----------+-------------+-------------------------------------------------------------------------------+
| binlog.000001 |    4 | Format_desc    |         2 |         124 | Server ver: 8.0.19, Binlog ver: 4                                             |
| binlog.000001 |  124 | Previous_gtids |         2 |         151 |                                                                               |
| binlog.000001 |  151 | Gtid           |         2 |         233 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:1'             |
| binlog.000001 |  233 | Query          |         2 |         295 | BEGIN                                                                         |
| binlog.000001 |  295 | View_change    |         2 |         394 | view_id=15790948149569500:1                                                   |
| binlog.000001 |  394 | Query          |         2 |         462 | COMMIT                                                                        |
| binlog.000001 |  462 | Gtid           |         2 |         544 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:2'             |
| binlog.000001 |  544 | Query          |         2 |         606 | BEGIN                                                                         |
| binlog.000001 |  606 | View_change    |         2 |         745 | view_id=15790948149569500:2                                                   |
| binlog.000001 |  745 | Query          |         2 |         813 | COMMIT                                                                        |
| binlog.000001 |  813 | Gtid           |         2 |         895 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:3'             |
| binlog.000001 |  895 | Query          |         2 |         957 | BEGIN                                                                         |
| binlog.000001 |  957 | View_change    |         2 |        1096 | view_id=15790948149569500:3                                                   |
| binlog.000001 | 1096 | Query          |         2 |        1164 | COMMIT                                                                        |
| binlog.000001 | 1164 | Gtid           |         2 |        1244 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:4'             |
| binlog.000001 | 1244 | Query          |         2 |        1353 | create database test /* xid=35 */                                             |
| binlog.000001 | 1353 | Gtid           |         2 |        1433 | SET @@SESSION.GTID_NEXT= 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:5'             |
| binlog.000001 | 1433 | Query          |         2 |        1574 | use `test`; create table t1 (c1 int primary key, c2 varchar(10)) /* xid=40 */ |
+---------------+------+----------------+-----------+-------------+-------------------------------------------------------------------------------+

# we can see that the 'alter table' is blocked
mysql> show processlist;
+----+-----------------+-----------------+------+---------+------+---------------------------------+-----------------------------------------+
| Id | User            | Host            | db   | Command | Time | State                           | Info                                    |
+----+-----------------+-----------------+------+---------+------+---------------------------------+-----------------------------------------+
|  4 | event_scheduler | localhost       | NULL | Daemon  | 1035 | Waiting on empty queue          | NULL                                    |
|  9 | system user     |                 | NULL | Connect |  690 | waiting for handler commit      | Group replication applier module        |
| 12 | system user     |                 | test | Query   |  211 | Waiting for table metadata lock | alter table t1 change c2 c3 varchar(11) |
| 24 | root            | localhost:36788 | test | Query   |    0 | starting                        | show processlist                        |
+----+-----------------+-----------------+------+---------+------+---------------------------------+-----------------------------------------+
4 rows in set (0.00 sec)

#### check status before commit ####

mysql> select * from performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| CHANNEL_NAME              | MEMBER_ID                            | MEMBER_HOST           | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | MEMBER_VERSION |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| group_replication_applier | 16174457-3795-11ea-a60f-e454e8ab5528 | greatlh-OptiPlex-5060 |        3318 | ONLINE       | PRIMARY     | 8.0.19         |
| group_replication_applier | d153ef7c-3794-11ea-921f-e454e8ab5528 | greatlh-OptiPlex-5060 |        3316 | ONLINE       | PRIMARY     | 8.0.19         |
| group_replication_applier | ebb30f20-3794-11ea-8f4c-e454e8ab5528 | greatlh-OptiPlex-5060 |        3317 | ONLINE       | PRIMARY     | 8.0.19         |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
3 rows in set (0.00 sec)

#### finally commit transaction at node2 and check status ####

# only has one node in MGR.
mysql> commit;
Query OK, 0 rows affected (0.02 sec)

mysql> select * from performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| CHANNEL_NAME              | MEMBER_ID                            | MEMBER_HOST           | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | MEMBER_VERSION |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| group_replication_applier | ebb30f20-3794-11ea-8f4c-e454e8ab5528 | greatlh-OptiPlex-5060 |        3317 | ONLINE       | PRIMARY     | 8.0.19         |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
1 row in set (0.01 sec)

#### node1 and node 3 are in error status ####

mysql -uroot -h127.0.0.1 -P3316
mysql> select * from performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| CHANNEL_NAME              | MEMBER_ID                            | MEMBER_HOST           | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | MEMBER_VERSION |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| group_replication_applier | d153ef7c-3794-11ea-921f-e454e8ab5528 | greatlh-OptiPlex-5060 |        3316 | ERROR        |             | 8.0.19         |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
1 row in set (0.00 sec)

mysql -uroot -h127.0.0.1 -P3318
mysql> select * from performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| CHANNEL_NAME              | MEMBER_ID                            | MEMBER_HOST           | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | MEMBER_VERSION |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
| group_replication_applier | 16174457-3795-11ea-a60f-e454e8ab5528 | greatlh-OptiPlex-5060 |        3318 | ERROR        |             | 8.0.19         |
+---------------------------+--------------------------------------+-----------------------+-------------+--------------+-------------+----------------+
1 row in set (0.01 sec)

###############################################
error log at node1
###############################################

2020-01-15T13:40:07.909706Z 12 [ERROR] [MY-013146] [Repl] Slave SQL for channel 'group_replication_applier': Column 1 of table 'test.t1' cannot be converted from type 'varchar(40(bytes))' to type 'varchar(44(bytes) utf8mb4)', Error_code: MY-013146
2020-01-15T13:40:07.909733Z 12 [ERROR] [MY-011451] [Repl] Plugin group_replication reported: 'The applier thread execution was aborted. Unable to process more transactions, this member will now leave the group.'
2020-01-15T13:40:07.909743Z 12 [ERROR] [MY-010586] [Repl] Error running query, slave SQL thread aborted. Fix the problem, and restart the slave SQL thread with "SLAVE START". We stopped at log 'FIRST' position 0
2020-01-15T13:40:07.909754Z 9 [ERROR] [MY-011452] [Repl] Plugin group_replication reported: 'Fatal error during execution on the Applier process of Group Replication. The server will now leave the group.'
2020-01-15T13:40:07.909789Z 9 [ERROR] [MY-011712] [Repl] Plugin group_replication reported: 'The server was automatically set into read only mode after an error was detected.'

Suggested fix:
Report error either for DML or DDL, prevent into error status.
[26 Feb 2020 5:16] MySQL Verification Team
Hi,

Thanks for the test case. Verified as described. Now, I can't say it is a bug or a designed behavior but preventing the erroneous state would be preferred so I'm verifying this as a bug but our group replication dev team might decide otherwise.

kind regards
Bogdan
[28 Feb 2020 11:39] Nuno Carvalho
Posted by developer:
 
Thank you for the bug report, this case is listed on Group Replication limitations:
https://dev.mysql.com/doc/refman/8.0/en/group-replication-limitations.html

Concurrent DDL versus DML Operations.  Concurrent data definition statements and data manipulation statements executing against the same object but on different servers is not supported when using multi-primary mode. During execution of Data Definition Language (DDL) statements on an object, executing concurrent Data Manipulation Language (DML) on the same object but on a different server instance has the risk of conflicting DDL executing on different instances not being detected.

DDL and DML operations on the same table need to be executed on the same server.

Best regards,
Nuno Carvalho