Description:
[Warning] InnoDB: Cannot rename file ./test/t1.ibd (space id 78102), retried 1000 times. There are either pending IOs or flushes or the file is being extended.
InnoDB: Assertion failure in thread 140414061098752 in file handler0alter.cc line 7763
/mysql57/bin/mysqld(my_print_stacktrace+0x2c)[0xf4cdbc]
/mysql57/bin/mysqld(handle_fatal_signal+0x4b6)[0x7edce6]
/lib64/libpthread.so.0(+0xf620)[0x7f32030f1620]
/lib64/libc.so.6(gsignal+0x37)[0x7f3201ef2277]
/lib64/libc.so.6(abort+0x148)[0x7f3201ef3968]
/mysql57/bin/mysqld[0x7b5607]
/mysql570/bin/mysqld(_ZN11ha_innobase26commit_inplace_alter_tableEP5TABLEP18Alter_inplace_infob+0x1747)[0xfb5b47]
/u01/mysql57_20210630/bin/mysqld(_Z17mysql_alter_tableP3THDPKcS2_P24st_ha_create_informationP10TABLE_LISTP10Alter_info+0x356e)[0xd5142e]
/mysql57/bin/mysqld(_ZN19Sql_cmd_alter_table7executeEP3THD+0x5a0)[0xe7f980]
/mysql57/bin/mysqld(_Z21mysql_execute_commandP3THDb+0x8b2)[0xce2d02]
/mysql57/bin/mysqld(_Z11mysql_parseP3THDP12Parser_state+0x3ad)[0xce765d]
/mysql57/bin/mysqld(_Z16dispatch_commandP3THDPK8COM_DATA19enum_server_command+0xb5f)[0xce826f]
/mysql57/bin/mysqld(_Z21do_threadpool_commandP12connection_tP8COM_DATA19enum_server_command+0x6e)[0x13504ce]
/mysql57/bin/mysqld(_Z31threadpool_process_request_unixP3THDP12connection_tPi+0x157)[0x13506f7]
/mysql57/bin/mysqld[0x1350cac]
/mysql57/bin/mysqld[0x13516a5]
/mysql57/bin/mysqld(pfs_spawn_thread+0x1b1)[0x12c3321]
/lib64/libpthread.so.0(+0x7e25)[0x7f32030e9e25]
/lib64/libc.so.6(clone+0x6d)[0x7f3201fb9f1d]
How to repeat:
Occasionally occur in production system.
Suggested fix:
See attached patch.
the root cause by analyzing core file.
Deadlock happens here:
thread#1(reanme thread) waits for thread#2(io thread)
thread#2(io thread) waits for thread#3(flush thread)
thread#3(flush thread) waits for thread#1(rename thread)
The whole system hangs because thread#1 holds dict_sys->mutex during reaming tablespace!!!
let's say renamed table space id is 71538.
thread#1: the crashed thread fails to waiting for all io of (71538) to complete.
#5 0x00000000007b5607 in ut_dbg_assertion_failed (
expr=expr@entry=0x178e20a "error == DB_SUCCESS",
file=file@entry=0x178e082 "handler/handler0alter.cc", line=line@entry=7763)
at ut/ut0dbg.cc:75
#6 0x0000000000fb5b47 in commit_cache_rebuild (ctx=0x7f2e26ac18a0)
at handler/handler0alter.cc:7763
#7 ha_innobase::commit_inplace_alter_table (this=0x7f2e26cbb030,
altered_table=<optimized out>, ha_alter_info=<optimized out>, commit=<optimized out>)
at handler/handler0alter.cc:8682
#8 0x0000000000d5142e in mysql_inplace_alter_table (target_mdl_request=0x7f2e73af0ea0,
alter_ctx=0x7f2e73af19f0, inplace_supported=HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE,
ha_alter_info=0x7f2e73af1460, altered_table=0x7f2e26bdf820, table=0x7f2e26b17a20,
table_list=0x7f2e1a196278, thd=0x7f2e1a2bd000) at sql_table.cc:7890
#9 mysql_alter_table (thd=thd@entry=0x7f2e1a2bd000, new_db=<optimized out>,
new_name=<optimized out>, create_info=create_info@entry=0x7f2e73af25c0,
table_list=table_list@entry=0x7f2e1a196278, alter_info=alter_info@entry=0x7f2e73af2510)
at sql_table.cc:10210
#10 0x0000000000e7f980 in Sql_cmd_alter_table::execute (this=<optimized out>,
thd=0x7f2e1a2bd000) at sql_alter.cc:340
#11 0x0000000000ce2d02 in mysql_execute_command (thd=thd@entry=0x7f2e1a2bd000,
first_level=first_level@entry=true) at sql_parse.cc:5320
#12 0x0000000000ce765d in mysql_parse (thd=thd@entry=0x7f2e1a2bd000,
parser_state=parser_state@entry=0x7f2e73af3c90) at sql_parse.cc:6122
#13 0x0000000000ce826f in dispatch_command (thd=thd@entry=0x7f2e1a2bd000,
com_data=com_data@entry=0x7f31f81e2630, command=command@entry=COM_QUERY)
at sql_parse.cc:1647
#14 0x00000000013504ce in do_threadpool_command (connection=connection@entry=0x7f31f81e25e0,
com_data=com_data@entry=0x7f31f81e2630, command=COM_QUERY) at threadpool_unix.cc:2553
#15 0x00000000013506f7 in threadpool_process_request_unix (thd=0x7f2e1a2bd000,
connection=connection@entry=0x7f31f81e25e0, skip_io=skip_io@entry=0x7f2e73af4440)
at threadpool_unix.cc:1913
#16 0x0000000001350cac in handle_event (connection=connection@entry=0x7f31f81e25e0,
thdstack=thdstack@entry=0x7f2e73af45d0, this_thread=0x7f2e26a80020)
at threadpool_unix.cc:1989
thread#2: finished one page(71538,207774), but waiting for another page(71538,196609)'s lock, because of change buffer merge.
#0 0x00007f32030ed9b5 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x0000000001013b61 in wait (this=<optimized out>) at os/os0event.cc:179
#2 wait_low (reset_sig_count=<optimized out>, this=0x7f310c337498) at os/os0event.cc:366
#3 os_event_wait_low (event=0x7f310c337498, reset_sig_count=<optimized out>)
at os/os0event.cc:611
#4 0x00000000010cb832 in sync_array_wait_event (arr=arr@entry=0x7f32016728f8,
cell=@0x7f2e86bfa7e0: 0x7f31f28ee818) at sync/sync0arr.cc:483
#5 0x00000000010ce01d in rw_lock_x_lock_func (lock=0x7f30d9ba3760, pass=pass@entry=0,
file_name=file_name@entry=0x17907e0 "ibuf/ibuf0ibuf.cc", line=line@entry=4494)
at sync/sync0rw.cc:820
#6 0x0000000001157e65 in pfs_rw_lock_x_lock_func (lock=lock@entry=0x7f30d9ba3760,
file_name=file_name@entry=0x17907e0 "ibuf/ibuf0ibuf.cc", line=line@entry=4494, pass=0)
at include/sync0rw.ic:719
#7 0x000000000115e24a in buf_page_get_gen (page_id=..., page_size=...,
rw_latch=rw_latch@entry=2, guess=<optimized out>, guess@entry=0x0,
mode=mode@entry=10, file=<optimized out>, line=4494, mtr=0x7f2e86bfbf00,
dirty_with_no_latch=dirty_with_no_latch@entry=false) at buf/buf0buf.cc:4610
#8 0x0000000000fc5bc3 in ibuf_bitmap_get_map_page_func (page_id=..., page_size=...,
file=<optimized out>, line=<optimized out>, mtr=<optimized out>)
at ibuf/ibuf0ibuf.cc:818
#9 0x0000000000fd0b7e in ibuf_merge_or_delete_for_page (
block=block@entry=0x7f31e2055f30, page_id=...,
page_size=page_size@entry=0x7f31e2055f40,
update_ibuf_bitmap=update_ibuf_bitmap@entry=1) at ibuf/ibuf0ibuf.cc:4494
#10 0x0000000001160e8f in buf_page_io_complete (bpage=0x7f31e2055f30,
evict=evict@entry=false) at buf/buf0buf.cc:5806
#11 0x00000000011d292d in fil_aio_wait (segment=segment@entry=4) at fil/fil0fil.cc:5951
#12 0x00000000010bedd8 in io_handler_thread (arg=<optimized out>) at srv/srv0start.cc:321
#13 0x00007f32030e9e25 in start_thread () from /lib64/libpthread.so.0
#14 0x00007f3201fb9f1d in clone () from /lib64/libc.so.6
thread#3: try to flush page(71538, 196609) and wait for rename to finish.
but hold page(71538,196609) lock, because double write buffer flushes page in batch.
p buf_dblwr->buf_block_arr[56].id
$74 = {m_space = 71538, m_page_no = 196609, m_fold = 1998854003}
#0 0x00007f32030f0e8d in nanosleep () from /lib64/libpthread.so.0
#1 0x0000000001014320 in os_thread_sleep (tm=tm@entry=20000) at os/os0thread.cc:287
#2 0x00000000011daa57 in fil_mutex_enter_and_prepare_for_io (space_id=71538)
at fil/fil0fil.cc:1079
#3 0x00000000011dae5a in fil_io (type=..., sync=<optimized out>, page_id=..., page_size=...,
byte_offset=byte_offset@entry=0, len=16384, buf=0x7f3117aa8000, message=0x7f311609d260)
at fil/fil0fil.cc:5645
#4 0x0000000001167019 in buf_dblwr_write_block_to_datafile (bpage=0x7f311609d260,
sync=sync@entry=false) at buf/buf0dblwr.cc:931
#5 0x0000000001169c44 in buf_dblwr_flush_buffered_writes () at buf/buf0dblwr.cc:1077
#6 0x000000000116a1e4 in buf_dblwr_add_to_batch (bpage=bpage@entry=0x7f31af1dd930)
at buf/buf0dblwr.cc:1158
#7 0x0000000001176537 in buf_flush_write_block_low (sync=false, flush_type=BUF_FLUSH_LIST,
bpage=0x7f31af1dd930) at buf/buf0flu.cc:1114
#8 buf_flush_page (buf_pool=<optimized out>, bpage=0x7f31af1dd930,
flush_type=<optimized out>, sync=<optimized out>) at buf/buf0flu.cc:1245
#9 0x0000000001177227 in buf_flush_try_neighbors (page_id=...,
flush_type=flush_type@entry=BUF_FLUSH_LIST, n_flushed=n_flushed@entry=220,
n_to_flush=n_to_flush@entry=402) at buf/buf0flu.cc:1469
#10 0x000000000117850c in buf_flush_page_and_try_neighbors (flush_type=BUF_FLUSH_LIST,
count=<synthetic pointer>, n_to_flush=<optimized out>, bpage=<optimized out>)
at buf/buf0flu.cc:1541
#11 buf_do_flush_list_batch (buf_pool=buf_pool@entry=0x7f31f34a3958, min_n=min_n@entry=402,
lsn_limit=lsn_limit@entry=18446744073709551615) at buf/buf0flu.cc:1795
#12 0x000000000117a841 in buf_flush_batch (lsn_limit=18446744073709551615, min_n=402,
flush_type=BUF_FLUSH_LIST, buf_pool=<optimized out>) at buf/buf0flu.cc:1873
#13 buf_flush_do_batch (type=BUF_FLUSH_LIST, n_processed=0x7f30a724fd30,
lsn_limit=18446744073709551615, min_n=402, buf_pool=<optimized out>) at buf/buf0flu.cc:2033
#14 pc_flush_slot () at buf/buf0flu.cc:2891
#15 0x000000000117c005 in buf_flush_page_cleaner_coordinator (arg=<optimized out>)
at buf/buf0flu.cc:3316
#16 0x00007f32030e9e25 in start_thread () from /lib64/libpthread.so.0
#17 0x00007f3201fb9f1d in clone () from /lib64/libc.so.6