Description:
hi, in our product environment, when we drop an very large table (120G), even the buffer pool size is 2G and adaptive hash index is off and create a hard link on ibd file, it is very slow(about 10s).
I pstack the mysqld. below show you that it just keep doing dict_drop_index_tree. In dict_drop_index_tree , it check the first page of every extent, and put it to the free extent list. So it cost a lot of time.
So when drop table, can we skip the dict_drop_index_tree when the table is file per table
#0 0x00007f774aa6d133 in pread64 () from /lib64/libpthread.so.0
#1 0x0000000000fed465 in os_file_io(IORequest const&, int, void*, unsigned long, unsigned long, dberr_t*) ()
#2 0x0000000000fede39 in os_file_read_page(IORequest&, int, void*, unsigned long, unsigned long, unsigned long*, bool, trx_t*) ()
#3 0x0000000000feff0a in os_aio_func(IORequest&, unsigned long, char const*, pfs_os_file_t, void*, unsigned long, unsigned long, bool, fil_node_t*, void*, unsigned long, trx_t*, bool) ()
#4 0x00000000011a99f9 in _fil_io(IORequest const&, bool, page_id_t const&, page_size_t const&, unsigned long, unsigned long, void*, void*, trx_t*, bool) ()
#5 0x000000000115d436 in buf_read_page_low(dberr_t*, bool, unsigned long, unsigned long, page_id_t const&, page_size_t const&, bool, trx_t*, bool) ()
#6 0x000000000115dfc7 in buf_read_page(page_id_t const&, page_size_t const&, trx_t*) ()
#7 0x0000000001131883 in buf_page_get_gen(page_id_t const&, page_size_t const&, unsigned long, buf_block_t*, unsigned long, char const*, unsigned long, mtr_t*, bool, dberr_t*) ()
#8 0x00000000011080e5 in btr_free_if_exists(page_id_t const&, page_size_t const&, unsigned long, mtr_t*) ()
#9 0x0000000001162d14 in dict_drop_index_tree(unsigned char*, btr_pcur_t*, mtr_t*) ()
#10 0x0000000001086c66 in row_upd_clust_step(upd_node_t*, que_thr_t*) ()
#11 0x0000000001088019 in row_upd(upd_node_t*, que_thr_t*) ()
#12 0x0000000001088163 in row_upd_step(que_thr_t*) ()
#13 0x00000000010133f8 in que_run_threads(que_thr_t*) ()
#14 0x0000000001013f12 in que_eval_sql(pars_info_t*, char const*, unsigned long, trx_t*) ()
#15 0x00000000010526fc in row_drop_table_for_mysql(char const*, trx_t*, bool, bool, dict_table_t*) ()
#16 0x0000000000f71467 in ha_innobase::delete_table(char const*) ()
#17 0x000000000082cf5f in ha_delete_table(THD*, handlerton*, char const*, char const*, char const*, bool) ()
#18 0x0000000000cfe855 in mysql_rm_table_no_locks(THD*, TABLE_LIST*, bool, bool, bool, bool) ()
#19 0x0000000000cff741 in mysql_rm_table(THD*, TABLE_LIST*, char, char) ()
#20 0x0000000000c92de0 in mysql_execute_command(THD*, bool) ()
#21 0x0000000000c9621d in mysql_parse(THD*, Parser_state*, bool) ()
#22 0x0000000000c97219 in dispatch_command(THD*, COM_DATA const*, enum_server_command) ()
#23 0x0000000000c988f3 in tp_do_command(THD*, tp_do_command_vars*) ()
#24 0x0000000000d3efa8 in worker_process_request(THD*, connection_t*, worker_thread_t*, bool*) ()
#25 0x0000000000d3fc31 in worker_handle_event(worker_thread_t*, connection_t*) ()
#26 0x0000000000d40dd9 in worker_main(void*) ()
#27 0x0000000000ed766f in pfs_spawn_thread ()
#28 0x00007f774aa65aa1 in start_thread () from /lib64/libpthread.so.0
#29 0x00007f77493adc4d in clone () from /lib64/libc.so.6
How to repeat:
buffer pool size 2G
use sysbench to create a 500,000,000 rows table.
drop the table
Suggested fix:
in storage/innobase/row/row0upd.cc
change
################
if (node->is_delete && node->table->id == DICT_INDEXES_ID ) {
ut_ad(!dict_index_is_online_ddl(index));
dict_drop_index_tree(
btr_pcur_get_rec(pcur), pcur, &mtr);
################
to
if ( is table drop && is file_per table ) {
//do nothing
}
else if (node->is_delete && node->table->id == DICT_INDEXES_ID ) {
ut_ad(!dict_index_is_online_ddl(index));
dict_drop_index_tree(
btr_pcur_get_rec(pcur), pcur, &mtr);
Description: hi, in our product environment, when we drop an very large table (120G), even the buffer pool size is 2G and adaptive hash index is off and create a hard link on ibd file, it is very slow(about 10s). I pstack the mysqld. below show you that it just keep doing dict_drop_index_tree. In dict_drop_index_tree , it check the first page of every extent, and put it to the free extent list. So it cost a lot of time. So when drop table, can we skip the dict_drop_index_tree when the table is file per table #0 0x00007f774aa6d133 in pread64 () from /lib64/libpthread.so.0 #1 0x0000000000fed465 in os_file_io(IORequest const&, int, void*, unsigned long, unsigned long, dberr_t*) () #2 0x0000000000fede39 in os_file_read_page(IORequest&, int, void*, unsigned long, unsigned long, unsigned long*, bool, trx_t*) () #3 0x0000000000feff0a in os_aio_func(IORequest&, unsigned long, char const*, pfs_os_file_t, void*, unsigned long, unsigned long, bool, fil_node_t*, void*, unsigned long, trx_t*, bool) () #4 0x00000000011a99f9 in _fil_io(IORequest const&, bool, page_id_t const&, page_size_t const&, unsigned long, unsigned long, void*, void*, trx_t*, bool) () #5 0x000000000115d436 in buf_read_page_low(dberr_t*, bool, unsigned long, unsigned long, page_id_t const&, page_size_t const&, bool, trx_t*, bool) () #6 0x000000000115dfc7 in buf_read_page(page_id_t const&, page_size_t const&, trx_t*) () #7 0x0000000001131883 in buf_page_get_gen(page_id_t const&, page_size_t const&, unsigned long, buf_block_t*, unsigned long, char const*, unsigned long, mtr_t*, bool, dberr_t*) () #8 0x00000000011080e5 in btr_free_if_exists(page_id_t const&, page_size_t const&, unsigned long, mtr_t*) () #9 0x0000000001162d14 in dict_drop_index_tree(unsigned char*, btr_pcur_t*, mtr_t*) () #10 0x0000000001086c66 in row_upd_clust_step(upd_node_t*, que_thr_t*) () #11 0x0000000001088019 in row_upd(upd_node_t*, que_thr_t*) () #12 0x0000000001088163 in row_upd_step(que_thr_t*) () #13 0x00000000010133f8 in que_run_threads(que_thr_t*) () #14 0x0000000001013f12 in que_eval_sql(pars_info_t*, char const*, unsigned long, trx_t*) () #15 0x00000000010526fc in row_drop_table_for_mysql(char const*, trx_t*, bool, bool, dict_table_t*) () #16 0x0000000000f71467 in ha_innobase::delete_table(char const*) () #17 0x000000000082cf5f in ha_delete_table(THD*, handlerton*, char const*, char const*, char const*, bool) () #18 0x0000000000cfe855 in mysql_rm_table_no_locks(THD*, TABLE_LIST*, bool, bool, bool, bool) () #19 0x0000000000cff741 in mysql_rm_table(THD*, TABLE_LIST*, char, char) () #20 0x0000000000c92de0 in mysql_execute_command(THD*, bool) () #21 0x0000000000c9621d in mysql_parse(THD*, Parser_state*, bool) () #22 0x0000000000c97219 in dispatch_command(THD*, COM_DATA const*, enum_server_command) () #23 0x0000000000c988f3 in tp_do_command(THD*, tp_do_command_vars*) () #24 0x0000000000d3efa8 in worker_process_request(THD*, connection_t*, worker_thread_t*, bool*) () #25 0x0000000000d3fc31 in worker_handle_event(worker_thread_t*, connection_t*) () #26 0x0000000000d40dd9 in worker_main(void*) () #27 0x0000000000ed766f in pfs_spawn_thread () #28 0x00007f774aa65aa1 in start_thread () from /lib64/libpthread.so.0 #29 0x00007f77493adc4d in clone () from /lib64/libc.so.6 How to repeat: buffer pool size 2G use sysbench to create a 500,000,000 rows table. drop the table Suggested fix: in storage/innobase/row/row0upd.cc change ################ if (node->is_delete && node->table->id == DICT_INDEXES_ID ) { ut_ad(!dict_index_is_online_ddl(index)); dict_drop_index_tree( btr_pcur_get_rec(pcur), pcur, &mtr); ################ to if ( is table drop && is file_per table ) { //do nothing } else if (node->is_delete && node->table->id == DICT_INDEXES_ID ) { ut_ad(!dict_index_is_online_ddl(index)); dict_drop_index_tree( btr_pcur_get_rec(pcur), pcur, &mtr);