Description:
We have noticed multiple slave hung issues on mysql 5.7.26 version. The sql thread hung in "init" state while executing insert command. There are no other transactions or any queries running on the instance. There is only one open table and it is in use by the hung INSERT query. Here, stopping replication slave using "stop slave" didnt work as the mysql is waiting for the replication sql_thread to exit. The only solution is to restart mysqld instance.
mysql> select * from information_schema.innodb_trx;
Empty set (0.00 sec)
mysql> show open tables where in_use>0;
+----------+------------+--------+-------------+
| Database | Table | In_use | Name_locked |
+----------+------------+--------+-------------+
| appsdb | table_name | 1 | 0 |
+----------+------------+--------+-------------+
1 row in set (0.00 sec)
mysql> show processlist;
+-----------+-----------------+-----------------+---------+-------------+----------+---------------------------------------------------------------+-----------------------------------+
| Id | User | Host | db | Command | Time | State | Info |
+-----------+-----------------+-----------------+---------+-------------+----------+---------------------------------------------------------------+-----------------------------------+
| 1 | event_scheduler | localhost | NULL | Daemon | 14647728 | Waiting on empty queue | NULL |
| 146 | system user | | NULL | Connect | 14647715 | Waiting for master to send event | NULL |
| 147 | system user | | appsdb | Connect | 8636 | init | INSERT INTO `table_name` (col1, |
| 149999962 | sysadmin | localhost | NULL | Sleep | 2 | | NULL |
| 150050466 | dev_user | 10.1.1.29:80998 | NULL | Binlog Dump | 267 | Master has sent all binlog to slave; waiting for more updates | NULL |
| 150050469 | sysadmin | localhost | appsdb | Query | 0 | starting | show processlist |
+-----------+-----------------+-----------------+---------+-------------+----------+---------------------------------------------------------------+-----------------------------------+
6 rows in set (0.00 sec)
From trace we can see that the command entered innodb and stuck in waiting for asynchrounous rollback.
nanosleep(libpthread.so.0),os_thread_sleep(os0thread.cc:279),wait(trx0trx.h:1564),TrxInInnoDB::enter(trx0trx.h:1564),TrxInInnoDB::enter(trx0trx.ic:213),TrxInInnoDB(trx0trx.h:1369),ha_innobase::store_lock(trx0trx.h:1369),ha_innopart::store_lock(ha_innopart.cc:4208),get_lock_data(lock.cc:741),mysql_lock_tables(lock.cc:741),lock_tables(sql_base.cc:6695),Sql_cmd_insert::mysql_insert(sql_insert.cc:617),Sql_cmd_insert::execute(sql_insert.cc:3118),mysql_execute_command(sql_parse.cc:3625),mysql_parse(sql_parse.cc:5727),Query_log_event::do_apply_event(log_event.cc:4778),Log_event::apply_event(log_event.cc:3473),apply_event_and_update_pos(rpl_slave.cc:4734),exec_relay_log_event(rpl_slave.cc:5272),handle_slave_sql(rpl_slave.cc:5272),pfs_spawn_thread(pfs.cc:2190),start_thread(libpthread.so.0),clone(libc.so.6)
Thread 104 (Thread 0x2b6f1dd91700 (LWP 21338)):
#0 0x00002b6d89b7e96d in nanosleep () from /lib64/libpthread.so.0
#1 0x0000000001043950 in os_thread_sleep (tm=<optimized out>) at /local/mysqlsql/storage/innobase/os/os0thread.cc:279
#2 0x0000000000fbeb45 in wait (trx=0x2b6d96c1d6e8) at /local/mysqlsql/storage/innobase/include/trx0trx.h:1564
#3 TrxInInnoDB::enter (trx=trx@entry=0x2b6d96c1d6e8, disable=false) at /local/mysqlsql/storage/innobase/include/trx0trx.h:1484
#4 0x0000000000fbf83e in TrxInInnoDB::enter (trx=trx@entry=0x2b6d96c1d6e8, disable=false) at /local/mysqlsql/storage/innobase/include/trx0trx.ic:213
#5 0x0000000000fc04a0 in TrxInInnoDB (disable=false, trx=0x2b6d96c1d6e8, this=<synthetic pointer>) at /local/mysqlsql/storage/innobase/include/trx0trx.h:1369
#6 ha_innobase::store_lock (this=this@entry=0x2b7aa8100430, thd=thd@entry=0x2b7a70ca3800, to=to@entry=0x2b7a718943b8, lock_type=lock_type@entry=TL_WRITE_CONCURRENT_INSERT) at /local/mysqlsql/storage/innobase/handler/ha_innodb.cc:16463
#7 0x0000000000fde4d4 in ha_innopart::store_lock (this=0x2b7aa8100430, thd=0x2b7a70ca3800, to=0x2b7a718943b8, lock_type=TL_WRITE_CONCURRENT_INSERT) at /local/mysqlsql/storage/innobase/handler/ha_innopart.cc:4208
#8 0x0000000000e1f3b0 in get_lock_data (flags=<optimized out>, count=<optimized out>, table_ptr=<optimized out>, thd=<optimized out>) at /local/mysqlsql/sql/lock.cc:741
#9 mysql_lock_tables (thd=thd@entry=0x2b7a70ca3800, tables=0x2b7aa9bc8c58, count=<optimized out>, flags=<optimized out>) at /local/mysqlsql/sql/lock.cc:320
#10 0x0000000000ca2425 in lock_tables (thd=thd@entry=0x2b7a70ca3800, tables=tables@entry=0x2b7aa9bc8030, count=<optimized out>, flags=flags@entry=0) at /local/mysqlsql/sql/sql_base.cc:6695
#11 0x0000000000e7774c in Sql_cmd_insert::mysql_insert (this=this@entry=0x2b7aa9bc8ba8, thd=thd@entry=0x2b7a70ca3800, table_list=table_list@entry=0x2b7aa9bc8030) at /local/mysqlsql/sql/sql_insert.cc:617
#12 0x0000000000e786c2 in Sql_cmd_insert::execute (this=0x2b7aa9bc8ba8, thd=0x2b7a70ca3800) at /local/mysqlsql/sql/sql_insert.cc:3118
#13 0x0000000000cfaa41 in mysql_execute_command (thd=thd@entry=0x2b7a70ca3800, first_level=first_level@entry=true) at /local/mysqlsql/sql/sql_parse.cc:3625
#14 0x0000000000cfe5fd in mysql_parse (thd=0x2b7a70ca3800, parser_state=parser_state@entry=0x2b6f1dd905f0) at /local/mysqlsql/sql/sql_parse.cc:5727
#15 0x0000000000ea50f6 in Query_log_event::do_apply_event (this=0x2b7a79beb020, rli=0x2b6d8ac1f800, query_arg=0x2b7a79844dac "INSERT INTO `table_name` (..) VALUES (.."..., q_len_arg=<optimized out>) at /local/mysqlsql/sql/log_event.cc:4778
#16 0x0000000000ea3605 in Log_event::apply_event (this=this@entry=0x2b7a79beb020, rli=rli@entry=0x2b6d8ac1f800) at /local/mysqlsql/sql/log_event.cc:3473
#17 0x0000000000ee7fc2 in apply_event_and_update_pos (ptr_ev=ptr_ev@entry=0x2b6f1dd90958, thd=thd@entry=0x2b7a70ca3800, rli=rli@entry=0x2b6d8ac1f800) at /local/mysqlsql/sql/rpl_slave.cc:4734
#18 0x0000000000ef2ccf in exec_relay_log_event (rli=0x2b6d8ac1f800, thd=0x2b7a70ca3800) at /local/mysqlsql/sql/rpl_slave.cc:5272
#19 handle_slave_sql (arg=arg@entry=0x2b7a045ed800) at /local/mysqlsql/sql/rpl_slave.cc:7463
#20 0x00000000012ead59 in pfs_spawn_thread (arg=0x2b7a6fbe11e0) at /local/mysqlsql/storage/perfschema/pfs.cc:2190
#21 0x00002b6d89b77dc5 in start_thread () from /lib64/libpthread.so.0
#22 0x00002b6d89e82c9d in clone () from /lib64/libc.so.6
/* Wait for the asynchronous rollback to complete, if it is in progress */
static void wait(trx_t* trx)
{
ut_ad(trx_mutex_own(trx));
ulint loop_count = 0;
/* start with optimistic sleep time - 20 micro seconds. */
ulint sleep_time = 20;
while (is_forced_rollback(trx)) {
/* Wait for the async rollback to complete */
trx_mutex_exit(trx);
loop_count++;
/* If the wait is long, don't hog the cpu. */
if (loop_count < 100) {
/* 20 microseconds */
sleep_time = 20;
} else if (loop_count < 1000) {
/* 1 millisecond */
sleep_time = 1000;
} else {
/* 100 milliseconds */
sleep_time = 100000;
}
os_thread_sleep(sleep_time);
trx_mutex_enter(trx);
}
}
There was similar issues reported in below bug on few other mysql 5.7 minors by my colleague.
https://bugs.mysql.com/bug.php?id=92975
How to repeat:
Above hung query is regular query executed multiple times on the same instance. However the issue appears occasionally. Exact repo steps are not available at the moment.
Suggested fix:
restart MySQL