Description:
When in AFTER consistency mode, it one node become UNREACHABLE, transaction cannot commit success until the unreachable expel from group_replication cluster.
In this behavior, any backup node failover, the cluster will have some time that cannot provide RW service, which maybe an accient in real database system.
How to repeat:
first, add below cnf and test file.
$ cat mysql-test/suite/group_replication/t/gr_after_hang.cnf
!include ../my.cnf
[mysqld.1]
[mysqld.2]
[mysqld.3]
[ENV]
SERVER_MYPORT_3= @mysqld.3.port
SERVER_MYSOCK_3= @mysqld.3.socket
$ cat mysql-test/suite/group_replication/t/gr_after_hang.test
--source include/have_group_replication_plugin.inc
--let $rpl_skip_group_replication_start= 1
--let $rpl_server_count= 3
--let $rpl_group_replication_single_primary_mode=1
--source include/group_replication.inc
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET GLOBAL group_replication_member_expel_timeout=30;
SET GLOBAL group_replication_consistency='AFTER';
--source include/start_and_bootstrap_group_replication.inc
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
SET GLOBAL group_replication_member_expel_timeout=30;
SET GLOBAL group_replication_consistency='AFTER';
--source include/start_group_replication.inc
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
SET GLOBAL group_replication_member_expel_timeout=30;
SET GLOBAL group_replication_consistency='AFTER';
--source include/start_group_replication.inc
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SELECT * FROM performance_schema.replication_group_members;
CREATE TABLE t1 (a INT PRIMARY KEY , b INT);
--exec date
INSERT INTO t1 VALUES (1,1);
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
SET PERSIST group_replication_local_address=@@group_replication_local_address;
--echo [server3 shutdown]
--let $_server_id= `SELECT @@server_id`
--let $expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
--let $mysqld_pid_file=`SELECT @@GLOBAL.pid_file`
--source include/expect_crash.inc
exec kill -9 `cat $mysqld_pid_file`;
connect (conn1,127.0.0.1,root,,test,$SERVER_MYPORT_1,);
connect (conn2,127.0.0.1,root,,test,$SERVER_MYPORT_1,);
connection conn1;
--echo [connect conn1]
--let $wait_condition=SELECT COUNT(*)=1 FROM performance_schema.replication_group_members WHERE MEMBER_STATE='UNREACHABLE'
--source include/wait_condition.inc
SELECT * FROM performance_schema.replication_group_members;
send INSERT INTO t1 VALUES (2,2);
sleep 5;
connection conn2;
--echo [connect conn2]
show processlist;
connection conn1;
--echo [connect conn1]
reap;
--exec date
And then run the testcase with command:
mysql-test/mtr gr_after_hang --nocheck-testcases
==============================================================================
TEST NAME RESULT TIME (ms) COMMENT
------------------------------------------------------------------------------
include/group_replication.inc [rpl_server_count=3]
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection server1]
[connection server1]
SET GLOBAL group_replication_member_expel_timeout=30;
SET GLOBAL group_replication_consistency='AFTER';
include/start_and_bootstrap_group_replication.inc
[connection server2]
SET GLOBAL group_replication_member_expel_timeout=30;
SET GLOBAL group_replication_consistency='AFTER';
include/start_group_replication.inc
[connection server3]
SET GLOBAL group_replication_member_expel_timeout=30;
SET GLOBAL group_replication_consistency='AFTER';
include/start_group_replication.inc
[connection server1]
SELECT * FROM performance_schema.replication_group_members;
CHANNEL_NAME MEMBER_ID MEMBER_HOST MEMBER_PORT MEMBER_STATE MEMBER_ROLE MEMBER_VERSION
group_replication_applier 2c4f2d31-03f7-11ec-9dec-c8f7507e5048 127.0.0.1 13002 ONLINE SECONDARY 8.0.25
group_replication_applier 2c89f332-03f7-11ec-abbe-c8f7507e5048 127.0.0.1 13004 ONLINE SECONDARY 8.0.25
group_replication_applier 2c963be8-03f7-11ec-ba4a-c8f7507e5048 127.0.0.1 13000 ONLINE PRIMARY 8.0.25
CREATE TABLE t1 (a INT PRIMARY KEY , b INT);
Mon Aug 23 12:47:59 GMT 2021
INSERT INTO t1 VALUES (1,1);
[connection server3]
SET PERSIST group_replication_local_address=@@group_replication_local_address;
[server3 shutdown]
[connect conn1]
SELECT * FROM performance_schema.replication_group_members;
CHANNEL_NAME MEMBER_ID MEMBER_HOST MEMBER_PORT MEMBER_STATE MEMBER_ROLE MEMBER_VERSION
group_replication_applier 2c4f2d31-03f7-11ec-9dec-c8f7507e5048 127.0.0.1 13002 ONLINE SECONDARY 8.0.25
group_replication_applier 2c89f332-03f7-11ec-abbe-c8f7507e5048 127.0.0.1 13004 UNREACHABLE SECONDARY 8.0.25
group_replication_applier 2c963be8-03f7-11ec-ba4a-c8f7507e5048 127.0.0.1 13000 ONLINE PRIMARY 8.0.25
INSERT INTO t1 VALUES (2,2);
[connect conn2]
show processlist;
Id User Host db Command Time State Info Time_ms Rows_sent Rows_examined
5 event_scheduler localhost NULL Daemon 19 Waiting on empty queue NULL 18855 0 0
8 root localhost test Sleep 19 NULL 18718 1 1
10 root localhost:43354 test Sleep 19 NULL 18392 1 1
11 root localhost:43356 test Sleep 19 NULL 18531 1 1
12 root localhost:43358 test Sleep 8 NULL 7570 0 0
13 root localhost:43364 test Sleep 19 NULL 18207 1 1
14 root localhost:43366 test Sleep 19 NULL 18402 1 1
17 system user NULL Connect 18 waiting for handler commit Group replication applier module 18200 0 0
20 system user NULL Query 18 Slave has read all relay log; waiting for more updates NULL 8968 0 0
33 root localhost:43416 test Query 5 waiting for handler commit INSERT INTO t1 VALUES (2,2) 5001 0 0
34 root localhost:43418 test Query 0 init show processlist 0 0 0
[connect conn1]
Mon Aug 23 12:48:33 GMT 2021