Description:
a node crash in a mgr cluster during doing the "alter table ... import tablespace", when the node restart and join the mgr cluster, its Gtid_executed & binlog is inconsistent.
How to repeat:
1.I add debug code:
```
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -11302,6 +11302,9 @@ bool Sql_cmd_discard_import_tablespace::mysql_discard_or_import_tablespace(
++pa_id;
}
} else {
+ if (DBUG_EVALUATE_IF("import_tablespace_failed", true, false)) {
+ sleep(3600);
+ }
assert(m_alter_info->partition_names.elements == 0);
// We get here when we have imported a table tablespace, or all
// partition tablespaces. In this case, we remove SDIs from all
```
2.the mtr script:
```
################################################################################
#
# steps:
# 0. Initialization:
# - The test requires three servers: M1, M2, and M3.
# 1. ALTER TABLE IMPORT ... TABLESPACE ON M1
# 2. M3 APPLY THE BINLOG, when ALTER TABLE IMPORT ... TABLESPACE , the server M3 was killed
# 3. M3 start and join the group replicaton. then, M3 should have applied binlog (alter table import tablespace ...) agin.
# however, in step 2, the gtid(ALTER TABLE IMPORT ... TABLESPACE) was record in table mysql.gtid_executed,
# the binlog (ALTER TABLE IMPORT ... TABLESPACE) was not write into binlog file.
# 4.Conlusion: the ddl (ALTER TABLE IMPORT ... TABLESPACE) was not atomicity lead some serious problem.
################################################################################
--source include/have_group_replication_plugin.inc
--source include/force_restart.inc
--let $rpl_skip_group_replication_start= 0
--let $rpl_server_count= 3
--source include/group_replication.inc
let $MYSQLD_DATADIR = `SELECT @@datadir`;
CREATE TABLE t1 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32));
INSERT INTO t1 VALUES ('Test Data -1', 'Test Data -2', 'Test Data -3');
--sleep 5
FLUSH table t1 for export;
--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/t1.ibd_back
--copy_file $MYSQLD_DATADIR/test/t1.cfg $MYSQLD_DATADIR/t1.cfg_back
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
let $MYSQLD_DATADIR = `SELECT @@datadir`;
--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/t1.ibd_back
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
let $MYSQLD_DATADIR = `SELECT @@datadir`;
--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/t1.ibd_back
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
unlock tables;
DROP TABLE t1;
CREATE TABLE t2 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32));
ALTER TABLE t2 DISCARD TABLESPACE;
let $MYSQLD_DATADIR = `SELECT @@datadir`;
--move_file $MYSQLD_DATADIR/t1.ibd_back $MYSQLD_DATADIR/test/t2.ibd
--move_file $MYSQLD_DATADIR/t1.cfg_back $MYSQLD_DATADIR/test/t2.cfg
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
let $MYSQLD_DATADIR = `SELECT @@datadir`;
--move_file $MYSQLD_DATADIR/t1.ibd_back $MYSQLD_DATADIR/test/t2.ibd
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
let $MYSQLD_DATADIR = `SELECT @@datadir`;
--move_file $MYSQLD_DATADIR/t1.ibd_back $MYSQLD_DATADIR/test/t2.ibd
set global debug='+d,import_tablespace_failed';
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
ALTER TABLE t2 IMPORT TABLESPACE;
--sleep 5
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
--let $_server_id= `SELECT @@server_id`
--let $expect_file_name_s3= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
--let $mysqld_pid_file=`SELECT @@GLOBAL.pid_file`
--source include/expect_crash.inc
exec kill -9 `cat $mysqld_pid_file`;
--source include/start_mysqld.inc
set global debug='-d,import_tablespace_failed';
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
show master status;
--let $s1_gtid= `SELECT @@gtid_executed`
select * from t2;
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
eval SELECT WAIT_FOR_EXECUTED_GTID_SET('$s1_gtid', 60);
--error 1814
select * from t2;
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
DROP TABLE t2;
--source include/group_replication_end.inc
```
3.the result script:
```
[connection server1]
CREATE TABLE t1 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32));
INSERT INTO t1 VALUES ('Test Data -1', 'Test Data -2', 'Test Data -3');
FLUSH table t1 for export;
[connection server2]
[connection server3]
[connection server1]
unlock tables;
DROP TABLE t1;
CREATE TABLE t2 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32));
ALTER TABLE t2 DISCARD TABLESPACE;
[connection server2]
[connection server3]
set global debug='+d,import_tablespace_failed';
[connection server1]
ALTER TABLE t2 IMPORT TABLESPACE;
[connection server3]
# restart
set global debug='-d,import_tablespace_failed';
[connection server1]
show master status;
File Position Binlog_Do_DB Binlog_Ignore_DB Executed_Gtid_Set
server-binary-log.000001 2705 e8b953e5-2236-11ee-9ad1-2cf05daaf60d:1-9
select * from t2;
c1 c2 c3
Test Data -1 Test Data -2 Test Data -3
[connection server3]
SELECT WAIT_FOR_EXECUTED_GTID_SET('e8b953e5-2236-11ee-9ad1-2cf05daaf60d:1-9', 60);
WAIT_FOR_EXECUTED_GTID_SET('e8b953e5-2236-11ee-9ad1-2cf05daaf60d:1-9', 60)
0
select * from t2;
ERROR HY000: Tablespace has been discarded for table 't2'
[connection server1]
DROP TABLE t2;
[connection server3]
[connection server1]
include/group_replication_end.inc
```
Suggested fix:
I do not have good suggestion.
Description: a node crash in a mgr cluster during doing the "alter table ... import tablespace", when the node restart and join the mgr cluster, its Gtid_executed & binlog is inconsistent. How to repeat: 1.I add debug code: ``` --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -11302,6 +11302,9 @@ bool Sql_cmd_discard_import_tablespace::mysql_discard_or_import_tablespace( ++pa_id; } } else { + if (DBUG_EVALUATE_IF("import_tablespace_failed", true, false)) { + sleep(3600); + } assert(m_alter_info->partition_names.elements == 0); // We get here when we have imported a table tablespace, or all // partition tablespaces. In this case, we remove SDIs from all ``` 2.the mtr script: ``` ################################################################################ # # steps: # 0. Initialization: # - The test requires three servers: M1, M2, and M3. # 1. ALTER TABLE IMPORT ... TABLESPACE ON M1 # 2. M3 APPLY THE BINLOG, when ALTER TABLE IMPORT ... TABLESPACE , the server M3 was killed # 3. M3 start and join the group replicaton. then, M3 should have applied binlog (alter table import tablespace ...) agin. # however, in step 2, the gtid(ALTER TABLE IMPORT ... TABLESPACE) was record in table mysql.gtid_executed, # the binlog (ALTER TABLE IMPORT ... TABLESPACE) was not write into binlog file. # 4.Conlusion: the ddl (ALTER TABLE IMPORT ... TABLESPACE) was not atomicity lead some serious problem. ################################################################################ --source include/have_group_replication_plugin.inc --source include/force_restart.inc --let $rpl_skip_group_replication_start= 0 --let $rpl_server_count= 3 --source include/group_replication.inc let $MYSQLD_DATADIR = `SELECT @@datadir`; CREATE TABLE t1 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32)); INSERT INTO t1 VALUES ('Test Data -1', 'Test Data -2', 'Test Data -3'); --sleep 5 FLUSH table t1 for export; --copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/t1.ibd_back --copy_file $MYSQLD_DATADIR/test/t1.cfg $MYSQLD_DATADIR/t1.cfg_back --let $rpl_connection_name= server2 --source include/rpl_connection.inc let $MYSQLD_DATADIR = `SELECT @@datadir`; --copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/t1.ibd_back --let $rpl_connection_name= server3 --source include/rpl_connection.inc let $MYSQLD_DATADIR = `SELECT @@datadir`; --copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/t1.ibd_back --let $rpl_connection_name= server1 --source include/rpl_connection.inc unlock tables; DROP TABLE t1; CREATE TABLE t2 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32)); ALTER TABLE t2 DISCARD TABLESPACE; let $MYSQLD_DATADIR = `SELECT @@datadir`; --move_file $MYSQLD_DATADIR/t1.ibd_back $MYSQLD_DATADIR/test/t2.ibd --move_file $MYSQLD_DATADIR/t1.cfg_back $MYSQLD_DATADIR/test/t2.cfg --let $rpl_connection_name= server2 --source include/rpl_connection.inc let $MYSQLD_DATADIR = `SELECT @@datadir`; --move_file $MYSQLD_DATADIR/t1.ibd_back $MYSQLD_DATADIR/test/t2.ibd --let $rpl_connection_name= server3 --source include/rpl_connection.inc let $MYSQLD_DATADIR = `SELECT @@datadir`; --move_file $MYSQLD_DATADIR/t1.ibd_back $MYSQLD_DATADIR/test/t2.ibd set global debug='+d,import_tablespace_failed'; --let $rpl_connection_name= server1 --source include/rpl_connection.inc ALTER TABLE t2 IMPORT TABLESPACE; --sleep 5 --let $rpl_connection_name= server3 --source include/rpl_connection.inc --let $_server_id= `SELECT @@server_id` --let $expect_file_name_s3= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect --let $mysqld_pid_file=`SELECT @@GLOBAL.pid_file` --source include/expect_crash.inc exec kill -9 `cat $mysqld_pid_file`; --source include/start_mysqld.inc set global debug='-d,import_tablespace_failed'; --let $rpl_connection_name= server1 --source include/rpl_connection.inc show master status; --let $s1_gtid= `SELECT @@gtid_executed` select * from t2; --let $rpl_connection_name= server3 --source include/rpl_connection.inc eval SELECT WAIT_FOR_EXECUTED_GTID_SET('$s1_gtid', 60); --error 1814 select * from t2; --let $rpl_connection_name= server1 --source include/rpl_connection.inc DROP TABLE t2; --source include/group_replication_end.inc ``` 3.the result script: ``` [connection server1] CREATE TABLE t1 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32)); INSERT INTO t1 VALUES ('Test Data -1', 'Test Data -2', 'Test Data -3'); FLUSH table t1 for export; [connection server2] [connection server3] [connection server1] unlock tables; DROP TABLE t1; CREATE TABLE t2 (c1 VARCHAR(32) primary key, c2 VARCHAR(32), c3 VARCHAR(32)); ALTER TABLE t2 DISCARD TABLESPACE; [connection server2] [connection server3] set global debug='+d,import_tablespace_failed'; [connection server1] ALTER TABLE t2 IMPORT TABLESPACE; [connection server3] # restart set global debug='-d,import_tablespace_failed'; [connection server1] show master status; File Position Binlog_Do_DB Binlog_Ignore_DB Executed_Gtid_Set server-binary-log.000001 2705 e8b953e5-2236-11ee-9ad1-2cf05daaf60d:1-9 select * from t2; c1 c2 c3 Test Data -1 Test Data -2 Test Data -3 [connection server3] SELECT WAIT_FOR_EXECUTED_GTID_SET('e8b953e5-2236-11ee-9ad1-2cf05daaf60d:1-9', 60); WAIT_FOR_EXECUTED_GTID_SET('e8b953e5-2236-11ee-9ad1-2cf05daaf60d:1-9', 60) 0 select * from t2; ERROR HY000: Tablespace has been discarded for table 't2' [connection server1] DROP TABLE t2; [connection server3] [connection server1] include/group_replication_end.inc ``` Suggested fix: I do not have good suggestion.