Description:
When InnoDB background purge thread truncate some undo logs, the GLOBAL and BACKUP MDL lock is held by it and never released. The user thread will get stuck if try to execute SQLs that need these two MDL locks, such as
1. FLUSH TABLES WITH READ LOCK
2. LOCK INSTANCE FOR BACKUP
3. SET GLBOAL read_only = on
How to repeat:
Below is the recurrence testcase, actually I just copied the previous part of innodb_undo.truncate mtr testcase, and added 2 lines.
#
# WL#6965: Truncate UNDO logs.
#
--source include/not_valgrind.inc
--source include/have_debug.inc
# The test is shutting down the server to force undo truncation.
--source include/big_test.inc
SET GLOBAL innodb_undo_tablespaces=2;
SET GLOBAL innodb_rollback_segments=1;
SET GLOBAL innodb_undo_log_truncate=1;
SET GLOBAL innodb_max_undo_log_size=10485760;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
SET GLOBAL innodb_purge_stop_now=ON;
SET GLOBAL innodb_stats_auto_recalc=OFF;
SHOW VARIABLES LIKE 'innodb_undo_tablespaces';
SHOW VARIABLES LIKE 'innodb_rollback_segments';
################################################################################
# Test-case will test following scenarios.
#
# 1. Perform enough DML action so that undo tablespace sizes grow beyond
# set threshold and then wait and see if it is being truncated.
#
################################################################################
#-----------------------------------------------------------------------------
#
# create test-bed
#
let MYSQLD_DATADIR = `select @@datadir`;
#-----------------------------------------------------------------------------
#
# 1. Perform enough DML action so that undo tablespace sizes grow beyond
# set threshold and then wait and see if it is being truncated.
#
# Storing the initial sizes of the tablespaces before DML action
let CHECKFILE = $MYSQLTEST_VARDIR/tmp/check.txt;
perl;
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$ini1)
= stat("$ENV{MYSQLD_DATADIR}/undo_001");
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$ini2)
= stat("$ENV{MYSQLD_DATADIR}/undo_002");
open(OUT, ">$ENV{CHECKFILE}") || die;
print OUT "let \$ini1= $ini1;\n";
print OUT "let \$ini2= $ini2;\n";
close(OUT);
EOF
--source $CHECKFILE
# Start of the first transaction that performs sufficient DML action
--connect(con1,localhost,root,,test)
--connection con1
SHOW VARIABLES LIKE 'innodb_rollback_segments';
create table t1(
keyc int,
c1 char(255),
c2 char(255),
c3 char(255),
c4 char(255),
c5 char(255),
c6 char(255),
primary key(keyc)) engine = innodb;
delimiter |;
CREATE PROCEDURE populate_t1()
BEGIN
DECLARE i INT DEFAULT 1;
while (i <= 20000) DO
insert into t1 values (i, 'a', 'b', 'c', 'd', 'e', 'f' );
SET i = i + 1;
END WHILE;
END |
delimiter ;|
begin;
call populate_t1();
delete from t1 where keyc < 10000;
update t1 set c1 = 'mysql' where keyc > 10000;
update t1 set c2 = 'mysql' where keyc > 10000;
update t1 set c3= 'mysql' where keyc > 10000;
update t1 set c4= 'mysql' where keyc > 10000;
update t1 set c5= 'mysql' where keyc > 10000;
update t1 set c6= 'mysql' where keyc > 10000;
# Starting a new transaction for both tablespaces to get utilized
--connect(con2,localhost,root,,test)
--connection con2
create table t2 (
keyc int,
c1 char(255),
c2 char(255),
c3 char(255),
c4 char(255),
c5 char(255),
c6 char(255),
primary key(keyc)) engine = innodb;
delimiter |;
CREATE PROCEDURE populate_t2()
BEGIN
DECLARE i INT DEFAULT 1;
while (i <= 20000) DO
insert into t2 values (i, 'a', 'b', 'c', 'd', 'e', 'f' );
SET i = i + 1;
END WHILE;
END |
delimiter ;|
begin;
call populate_t2();
delete from t2 where keyc < 10000;
update t2 set c1 = 'mysql' where keyc > 10000;
update t2 set c2 = 'mysql' where keyc > 10000;
update t2 set c3= 'mysql' where keyc > 10000;
update t2 set c4= 'mysql' where keyc > 10000;
update t2 set c5= 'mysql' where keyc > 10000;
update t2 set c6= 'mysql' where keyc > 10000;
--connection con2
commit;
--connection con1
commit;
--connection default
# Cleanup
drop PROCEDURE populate_t1;
drop PROCEDURE populate_t2;
drop table t1, t2;
--disconnect con1
--disconnect con2
# Creating a file check.txt to record information about tablespace sizes
let CHECKFILE = $MYSQLTEST_VARDIR/tmp/check.txt;
# Storing the sizes of both tablespaces before server shutdown
perl;
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1)
= stat("$ENV{MYSQLD_DATADIR}/undo_001");
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2)
= stat("$ENV{MYSQLD_DATADIR}/undo_002");
open(OUT, ">$ENV{CHECKFILE}") || die;
print OUT "let \$size1= $size1;\n";
print OUT "let \$size2= $size2;\n";
print OUT "expr \$before= \$size1 + \$size2;\n";
close(OUT);
EOF
SET GLOBAL innodb_purge_run_now=ON;
# Give the purge thread some time to do undo truncate
sleep 10;
FLUSH TABLES WITH READ LOCK; // will stuck here
When the testcase get stuck, we can get the MDL info easily from performance_schema.metadata_locks, and we can see that thread/innodb/srv_purge_thread hold the INTENTION_EXCLUSIVE type GLOAL and BACKUP MDL lock with TRANSACTION duration.
mysql> show processlist;
+----+-----------------+-----------------+------+---------+------+------------------------------+-----------------------------+
| Id | User | Host | db | Command | Time | State | Info |
+----+-----------------+-----------------+------+---------+------+------------------------------+-----------------------------+
| 4 | event_scheduler | localhost | NULL | Daemon | 185 | Waiting on empty queue | NULL |
| 9 | root | localhost | test | Query | 46 | Waiting for global read lock | FLUSH TABLES WITH READ LOCK |
| 11 | root | localhost:48654 | NULL | Query | 0 | starting | show processlist |
+----+-----------------+-----------------+------+---------+------+------------------------------+-----------------------------+
3 rows in set (0.00 sec)
mysql> select * from performance_schema.metadata_locks;
+-------------+--------------------+----------------+-------------+-----------------------+---------------------+---------------+-------------+------------------------+-----------------+----------------+
| OBJECT_TYPE | OBJECT_SCHEMA | OBJECT_NAME | COLUMN_NAME | OBJECT_INSTANCE_BEGIN | LOCK_TYPE | LOCK_DURATION | LOCK_STATUS | SOURCE | OWNER_THREAD_ID | OWNER_EVENT_ID |
+-------------+--------------------+----------------+-------------+-----------------------+---------------------+---------------+-------------+------------------------+-----------------+----------------+
| GLOBAL | NULL | NULL | NULL | 140181295967536 | INTENTION_EXCLUSIVE | TRANSACTION | GRANTED | dictionary_impl.cc:416 | 34 | 1213371 |
| BACKUP LOCK | NULL | NULL | NULL | 140181293968896 | INTENTION_EXCLUSIVE | TRANSACTION | GRANTED | dictionary_impl.cc:421 | 34 | 1213374 |
| GLOBAL | NULL | NULL | NULL | 140180690164416 | SHARED | EXPLICIT | PENDING | lock.cc:1013 | 47 | 27691 |
| TABLE | performance_schema | metadata_locks | NULL | 140180619626736 | SHARED_READ | TRANSACTION | GRANTED | sql_parse.cc:5744 | 49 | 10014 |
+-------------+--------------------+----------------+-------------+-----------------------+---------------------+---------------+-------------+------------------------+-----------------+----------------+
4 rows in set (0.00 sec)
mysql> select * from performance_schema.threads where thread_id = 34\G
*************************** 1. row ***************************
THREAD_ID: 34
NAME: thread/innodb/srv_purge_thread
TYPE: BACKGROUND
PROCESSLIST_ID: NULL
PROCESSLIST_USER: NULL
PROCESSLIST_HOST: NULL
PROCESSLIST_DB: NULL
PROCESSLIST_COMMAND: NULL
PROCESSLIST_TIME: 207
PROCESSLIST_STATE: NULL
PROCESSLIST_INFO: NULL
PARENT_THREAD_ID: NULL
ROLE: NULL
INSTRUMENTED: YES
HISTORY: YES
CONNECTION_TYPE: NULL
THREAD_OS_ID: 65003
RESOURCE_GROUP: SYS_default
1 row in set (0.00 sec)
Suggested fix:
From the code level, in the `trx_undo_truncate_tablespace()` fucntion, the invoking of `dd::acquire_exclusive_tablespace_mdl()` will get the GLOBAL and BAKCUP mdl lock with MDL_TRANSACTION duration while try to get the EXPLICT mdl lock of undo log to be truncated. But after that, these two transaction duration MDL locks are not released (dd_release_mdl() only release EXPLICT MDL lock).
The proposed fix is to call `dd::commit_or_rollback_tablespace_change()` to release TRANSACTION duration MDL locks in `trx_undo_truncate_tablespace()`, just like what we did in `resume_alter_encrypt_tablespace()`.