| Bug #52580 | Crash in ha_innobase::open on executing INSERT with concurrent ALTER TABLE | ||
|---|---|---|---|
| Submitted: | 4 Apr 2010 1:17 | Modified: | 15 Jun 2010 14:57 |
| Reporter: | Elena Stepanova | Email Updates: | |
| Status: | Closed | Impact on me: | |
| Category: | MySQL Server: InnoDB storage engine | Severity: | S1 (Critical) |
| Version: | 5.5.4-m3 | OS: | Any |
| Assigned to: | Jimmy Yang | CPU Architecture: | Any |
| Tags: | regression | ||
[4 Apr 2010 1:22]
Elena Stepanova
mysql-stress-test-based test
Attachment: bug52580_stress_test.tar.gz (application/x-gzip, text), 1.83 KiB.
[6 Apr 2010 11:34]
Konstantin Osipov
I can't repeat the bug locally on either mysql-trunk-runtime-exp or mysql-trunk-iplus-test. The backtrace analysis suggests that bug is not in the server:
the crashed connection has a valid metadata lock (as expected), and the
ALTER statement is rightfully blocked on that metadata lock.
TABLE_SHARE of the crashed connection is valid:
(gdb) p thd->mdl_context.m_tickets
$3 = {<I_P_List_null_counter> = {<No data fields>}, first = 0x80f08a0}
(gdb) p thd->mdl_context.m_tickets.first
$4 = (MDL_ticket *) 0x80f08a0
(gdb) p *thd->mdl_context.m_tickets.first
$5 = {next_in_context = 0x0, prev_in_context = 0x80f7dc8,
next_in_lock = 0x8111290, prev_in_lock = 0x80f2f50,
m_type = MDL_SHARED_WRITE, m_ctx = 0x80f7dc8, m_lock = 0x80f2d70}
(gdb) p *thd->mdl_context.m_tickets.first->m_lock
$6 = {_vptr.MDL_lock = 0xa771f0, key = {m_length = 10, m_db_name_length = 4,
m_ptr = "\001test\000tb0", '\0' <repeats 131 times>, "\002\000 '\177\000\000\001", '\0' <repeats 120 times>, "\020\000\000\000\000\000\000\000TCP/IP (34)", '\0' <repeats 45 times>, "ðò\220\000\000\000\000\000Ðü\220\000\000\000\000\000`ý\220\000\000\000\000\000Pý\220\000\000\000\000\000àü\220\000\000\000\000\000Ð÷\220\000\000\000\000\000Ðø\220\000\000\000"}, cached_object = 0x811d570,
cached_object_release_hook = 0x62ef50 <table_share_release_hook(void*)>,
m_rwlock = {m_prlock = {__rw_lock = {__status = 0, __spinlock = 1},
__rw_readers = 0, __rw_writer = 0x0, __rw_read_waiting = 0x90f890,
__rw_write_waiting = 0x90f990, __rw_kind = 0, __rw_pshared = 0},
m_psi = 0x2aaaac22a658}, m_granted = {
m_list = {<I_P_List_null_counter> = {<No data fields>},
first = 0x80f08a0}, m_bitmap = 16 '\020'}, m_waiting = {
m_list = {<I_P_List_null_counter> = {<No data fields>},
first = 0x80e3160}, m_bitmap = 32 ' '}, m_ref_usage = 71,
m_ref_release = 71, m_is_destroyed = false}
(gdb) p *thd->mdl_context.m_tickets.first->m_lock->m_cached_object
There is no member or method named m_cached_object.
(gdb) p *thd->mdl_context.m_tickets.first->m_lock->cached_object
Attempt to dereference a generic pointer.
(gdb) p thd->mdl_context.m_tickets.first->m_lock->cached_object
$7 = (void *) 0x811d570
(gdb) p (TABLE_SHARE*)(thd->mdl_context.m_tickets.first->m_lock->cached_object)
$8 = (TABLE_SHARE *) 0x811d570
(gdb) p *(TABLE_SHARE*)(thd->mdl_context.m_tickets.first->m_lock->cached_object)
$9 = {table_category = TABLE_CATEGORY_USER, name_hash = {key_offset = 0,
key_length = 0, blength = 0, records = 0, flags = 0, array = {
buffer = 0x0, elements = 0, max_element = 0, alloc_increment = 0,
size_of_element = 0}, get_key = 0, free = 0, charset = 0x0}, mem_root = {
free = 0x80f7230, used = 0x811d560, pre_alloc = 0x0, min_malloc = 32,
block_size = 992, block_num = 7, first_block_usage = 1,
error_handler = 0x5843f0 <sql_alloc_error_handler>}, keynames = {
count = 1, name = 0x0, type_names = 0x811d918, type_lengths = 0x0},
fieldnames = {count = 1, name = 0x0, type_names = 0x811d908,
type_lengths = 0x0}, intervals = 0x0, LOCK_ha_data = {m_mutex = {mutex = {
__m_reserved = 0, __m_count = 0, __m_owner = 0x0, __m_kind = 3,
__m_lock = {__status = 0, __spinlock = 0}}, spins = 8, rng_state = 1},
m_psi = 0x2aaaac214150}, next = 0x0, prev = 0x0,
used_tables = {<I_P_List_null_counter> = {<No data fields>},
first = 0x805c370},
free_tables = {<I_P_List_null_counter> = {<No data fields>}, first = 0x0},
field = 0x811d8f8, found_next_number_field = 0x0, timestamp_field = 0x0,
key_info = 0x80f7240, blob_field = 0x0,
default_values = 0x80f72d0 "ÿ", ' ' <repeats 199 times>..., comment = {
str = 0x811d8f0 "", length = 0}, table_charset = 0xed59c0, all_set = {
bitmap = 0x80f75d8, n_bits = 1, last_word_mask = 4294967294,
last_word_ptr = 0x80f75d8, mutex = 0x0}, table_cache_key = {
str = 0x811d8c0 "test", length = 9}, db = {str = 0x811d8c0 "test",
length = 4}, table_name = {str = 0x811d8c5 "tb0", length = 3}, path = {
str = 0x811d8d0 "./test/tb0", length = 10}, normalized_path = {
str = 0x811d8d0 "./test/tb0", length = 10}, connect_string = {
str = 0x811d8e8 "", length = 0}, keys_in_use = {map = 1},
keys_for_keyread = {map = 1}, min_rows = 0, max_rows = 0,
avg_row_length = 0, raid_chunksize = 0, mysql_version = 50504,
timestamp_offset = 0, reclength = 256, db_plugin = 0x7b5f1e0,
row_type = ROW_TYPE_DEFAULT, tmp_table = NO_TMP_TABLE,
unused1 = HA_CHOICE_UNDEF, unused2 = HA_CHOICE_UNDEF, version = 1,
ref_count = 3, open_count = 0, blob_ptr_size = 8, key_block_size = 0,
null_bytes = 1, last_null_bit_pos = 2, fields = 1, rec_buff_length = 264,
keys = 1, key_parts = 1, max_key_length = 257, max_unique_length = 0,
total_key_length = 256, uniques = 0, null_fields = 1, blob_fields = 0,
timestamp_field_offset = 0, varchar_fields = 0, db_create_options = 8,
db_options_in_use = 8, db_record_offset = 1, raid_type = 0, raid_chunks = 0,
rowid_field_offset = 0, primary_key = 64, next_number_index = 0,
next_number_key_offset = 0, next_number_keypart = 0, error = 0,
open_errno = 0, errarg = 0, column_bitmap_size = 4, frm_version = 10 '\n',
null_field_first = true, system = false, crypted = false,
db_low_byte_first = true, crashed = false, is_view = false,
table_map_id = 204, table_map_version = 18446744073709551615,
cached_row_logging_check = 1, auto_partitioned = false,
partition_info = 0x0, partition_info_len = 0,
partition_info_buffer_size = 0, part_state = 0x0, part_state_len = 0,
---Type <return> to continue, or q <return> to quit---
default_part_db_type = 0x0, table_field_def_cache = 0x0, ha_data = 0x0,
ha_data_destroy = 0, m_psi = 0x0}
[6 Apr 2010 11:34]
Konstantin Osipov
Other active threads:
(gdb) thread 2
[Switching to thread 2 (process 9066)]#0 0x0000003ba880d3b7 in fsync ()
from /lib64/libpthread.so.0
(gdb) bt
#0 0x0000003ba880d3b7 in fsync () from /lib64/libpthread.so.0
#1 0x00000000007e8c45 in os_file_flush_func (file=9) at os/os0file.c:2041
#2 0x000000000089c73a in fil_flush (space_id=<value optimized out>)
at ./include/os0file.ic:372
#3 0x00000000007db3de in log_write_up_to (lsn=<value optimized out>,
wait=512, flush_to_disk=1) at log/log0log.c:1512
#4 0x0000000000845911 in trx_commit_complete_for_mysql (trx=0x2aaab0005238)
at trx/trx0trx.c:1607
#5 0x00000000007be3c4 in innobase_commit (hton=<value optimized out>,
thd=0x80e5410, all=false) at handler/ha_innodb.cc:2686
#6 0x00000000006ee11a in ha_commit_trans (thd=0x80e5410, all=false)
at handler.cc:1258
#7 0x00000000007952a5 in trans_commit_stmt (thd=0x80e5410)
at transaction.cc:237
#8 0x00000000005f9dcb in dispatch_command (command=COM_QUERY, thd=0x80e5410,
packet=0x0, packet_length=135364368) at sql_parse.cc:1375
#9 0x00000000005ed9b8 in do_handle_one_connection (
thd_arg=<value optimized out>) at sql_connect.cc:1173
#10 0x00000000005eddba in handle_one_connection (arg=<value optimized out>)
at sql_connect.cc:1112
#11 0x000000000090eb17 in pfs_spawn_thread (arg=<value optimized out>)
at pfs.cc:1011
#12 0x0000003ba88062f7 in start_thread () from /lib64/libpthread.so.0
#13 0x0000003ba80ce85d in clone () from /lib64/libc.so.6
(gdb) thread 3
[Switching to thread 3 (process 9041)]#0 0x0000003ba880a697 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
(gdb) bt
#0 0x0000003ba880a697 in pthread_cond_timedwait@@GLIBC_2.3.2 ()
from /lib64/libpthread.so.0
#1 0x00000000007917f0 in MDL_context::timed_wait (this=0x811dde8,
abs_timeout=0x46508af0) at ../include/mysql/psi/mysql_thread.h:1092
#2 0x00000000007939a9 in MDL_context::acquire_lock_impl (this=0x811dde8,
mdl_request=0x80d9720, lock_wait_timeout=<value optimized out>)
at mdl.cc:1555
#3 0x0000000000793bee in MDL_context::acquire_locks (this=0x811dde8,
mdl_requests=<value optimized out>, lock_wait_timeout=31536000)
at mdl.cc:1654
#4 0x0000000000633698 in open_tables (thd=0x811dd20, start=0x46508ec8,
counter=0x46508edc, flags=32, prelocking_strategy=0x4650ab90)
at sql_base.cc:4443
#5 0x00000000006339ff in open_and_lock_tables (thd=0x811de94,
tables=0x80d93d0, derived=false, flags=4294967295,
prelocking_strategy=0x811de58) at sql_base.cc:5217
#6 0x000000000070422d in mysql_alter_table (thd=0x811dd20,
new_db=0x80d98d8 "test", new_name=0x0, create_info=0x4650bc70,
table_list=0x80d93d0, alter_info=0x4650bd50, order_num=0, order=0x0,
ignore=false) at sql_table.cc:6558
#7 0x00000000005f58fd in mysql_execute_command (thd=0x811dd20)
at sql_parse.cc:2915
#8 0x00000000005f9c45 in mysql_parse (thd=0x811dd20,
inBuf=0x80d92b0 "ALTER TABLE tb0 MODIFY f1 CHAR(255) CHARACTER SET latin1", length=56, found_semicolon=0x4650d060) at sql_parse.cc:5871
#9 0x00000000005fa6b4 in dispatch_command (command=COM_QUERY, thd=0x811dd20,
packet=0x80d1281 "ALTER TABLE tb0 MODIFY f1 CHAR(255) CHARACTER SET latin1", packet_length=<value optimized out>) at sql_parse.cc:1023
#10 0x00000000005ed9b8 in do_handle_one_connection (
thd_arg=<value optimized out>) at sql_connect.cc:1173
#11 0x00000000005eddba in handle_one_connection (arg=<value optimized out>)
at sql_connect.cc:1112
#12 0x000000000090eb17 in pfs_spawn_thread (arg=<value optimized out>)
at pfs.cc:1011
#13 0x0000003ba88062f7 in start_thread () from /lib64/libpthread.so.0
#14 0x0000003ba80ce85d in clone () from /lib64/libc.so.6
As you can see from the trace, the ALTER thread is blocked on a metadata lock.
[6 Apr 2010 12:08]
Konstantin Osipov
16:01 < kostja> marko: hi 16:01 < marko> Hi Kostja! 16:01 < kostja> so my guess is that innodb code is to blame for bug 52580 and 16:02 < kostja> and Bug#52431 16:02 < kostja> what happens is: 16:02 < kostja> ha_innobase::open is calling 16:04 < kostja> innobase_build_index_translation 16:04 < kostja> this function, is accessing INNOBASE_SHARE, in particular: 16:04 < marko> 52431 sure looks like InnoDB bug. The culprit would be Jimmy Yang, who implemented the translation table 16:04 < kostja> index_mapping = share->idx_trans_tbl.index_mapping; 16:05 < kostja> while the other connection is doing: 16:05 < kostja> #0 0x0000003ba880d3b7 in fsync () from /lib64/libpthread.so.0 16:05 < kostja> #1 0x00000000007e8c45 in os_file_flush_func (file=9) at os/os0file.c:2041 16:05 < kostja> #2 0x000000000089c73a in fil_flush (space_id=<value optimized out>) at ./include/os0file.ic:372 16:05 < kostja> #3 0x00000000007db3de in log_write_up_to (lsn=<value optimized out>, wait=512, flush_to_disk=1) at log/log0log.c:1512 16:05 < kostja> #4 0x0000000000845911 in trx_commit_complete_for_mysql (trx=0x2aaab0005238) at trx/trx0trx.c:1607 16:05 < kostja> #5 0x00000000007be3c4 in innobase_commit (hton=<value optimized out>, thd=0x80e5410, all=false) at handler/ha_innodb.cc:2686 16:05 < kostja> #6 0x00000000006ee11a in ha_commit_trans (thd=0x80e5410, all=false) at handler.cc:1258 16:05 < kostja> #7 0x00000000007952a5 in trans_commit_stmt (thd=0x80e5410) at transaction.cc:237 16:05 < kostja> #8 0x00000000005f9dcb in dispatch_command (command=COM_QUERY, thd=0x80e5410, 16:05 < kostja> which I believe somehow changes INNOBASE_SHARE. 16:06 < kostja> previously it didn't crash because the concurrency was much lower, since LOCK_open was held around ha_innobase::open 16:06 < kostja> now we no longer keep lock_open around it and a race has surfaced 16:06 < marko> Sounds plausible. 16:06 < marko> Can you post that to one of the tickets and assign it to Jimmy Yang?
[7 Apr 2010 0:55]
Jimmy Yang
A recent change in bug #51557 removed the protection on ha_innobase::open with mutex LOCK_open protection: === modified file 'sql/sql_base.cc' --- a/sql/sql_base.cc 2010-03-19 10:39:08 +0000 +++ b/sql/sql_base.cc 2010-03-19 10:54:59 +0000 @@ -2914,9 +2914,11 @@ bool open_table(THD *thd, TABLE_LIST *ta while (table_cache_count > table_cache_size && unused_tables) free_cache_entry(unused_tables); + mysql_mutex_unlock(&LOCK_open); + /* make a new table */ if (!(table=(TABLE*) my_malloc(sizeof(*table),MYF(MY_WME)))) - goto err_unlock2; + DBUG_RETURN(TRUE); error= open_table_from_share(thd, share, alias, (uint) (HA_OPEN_KEYFILE | This subsequently cause race condition in ha_innobase::open for building index translation table, which do not use mutex protection as there is already such LOCK_open protection. Fix this by adding mutex protection for index translation table construction.
[14 May 2010 17:45]
Paul DuBois
Not in any released version. No changelog entry needed.
[15 Jun 2010 8:21]
Bugs System
Pushed into 5.5.5-m3 (revid:alik@sun.com-20100615080459-smuswd9ooeywcxuc) (version source revid:mmakela@bk-internal.mysql.com-20100415070122-1nxji8ym4mao13ao) (merge vers: 5.1.47) (pib:16)
[15 Jun 2010 8:38]
Bugs System
Pushed into mysql-next-mr (revid:alik@sun.com-20100615080558-cw01bzdqr1bdmmec) (version source revid:mmakela@bk-internal.mysql.com-20100415070122-1nxji8ym4mao13ao) (pib:16)

Description: Two threads execute simple insert INSERT INTO tb0 (f1) VALUES ('insert'); Another thread alters the table: ALTER TABLE tb0 MODIFY f1 CHAR(255) CHARACTER SET latin2; ALTER TABLE tb0 MODIFY f1 CHAR(255) CHARACTER SET latin1; Server crashes on the concurrent flow: #0 0x0000003ba880b132 in pthread_kill () from /lib64/libpthread.so.0 #1 0x00000000005df45e in handle_segfault (sig=11) at mysqld.cc:2751 #2 <signal handler called> #3 ha_innobase::open (this=0x1c1fedf0, name=0x1c149b90 "./test/tb0", mode=<value optimized out>, test_if_locked=<value optimized out>) at handler/ha_innodb.cc:3382 #4 0x00000000006e94ef in handler::ha_open (this=0x1c1c7433, table_arg=<value optimized out>, name=0x1c149b90 "./test/tb0", mode=2, test_if_locked=24) at handler.cc:2093 #5 0x0000000000637c8a in open_table_from_share (thd=0x1c1c46c0, share=0x1c149830, alias=<value optimized out>, db_stat=0, prgflag=<value optimized out>, ha_open_flags=0, outparam=0x1c1f1e80, is_create_table=false) at table.cc:1916 #6 0x0000000000630697 in open_table (thd=0x1c1c46c0, table_list=0x1c21d158, mem_root=0x464888b0, ot_ctx=0x464888f0, flags=0) at sql_base.cc:2911 #7 0x0000000000632e3b in open_tables (thd=0x1c1c46c0, start=0x464889b8, counter=0x464889cc, flags=0, prelocking_strategy=0x46488bc0) at sql_base.cc:4247 #8 0x00000000006338cf in open_and_lock_tables (thd=0x1c1c7433, tables=0x1c21d158, derived=true, flags=0, prelocking_strategy=0x18) at sql_base.cc:5217 #9 0x000000000066ccd6 in mysql_insert (thd=0x1c1c46c0, table_list=0x1c21d158, fields=@0x1c1c6a08, values_list=@0x1c1c6a50, update_fields=@0x1c1c6a38, update_values=@0x1c1c6a20, duplic=DUP_ERROR, ignore=false) at mysql_priv.h:1608 #10 0x00000000005f60ed in mysql_execute_command (thd=0x1c1c46c0) at sql_parse.cc:3243 #11 0x00000000005f9b15 in mysql_parse (thd=0x1c1c46c0, inBuf=0x1c21d060 "INSERT INTO tb0 (f1) VALUES ('insert')", length=38, found_semicolon=0x4648b060) at sql_parse.cc:5871 #12 0x00000000005fa584 in dispatch_command (command=COM_QUERY, thd=0x1c1c46c0, packet=0x1c215031 "INSERT INTO tb0 (f1) VALUES ('insert')", packet_length=<value optimized out>) at sql_parse.cc:1023 #13 0x00000000005ed888 in do_handle_one_connection (thd_arg=<value optimized out>) at sql_connect.cc:1173 #14 0x00000000005edc8a in handle_one_connection (arg=<value optimized out>) at sql_connect.cc:1112 #15 0x000000000090e017 in pfs_spawn_thread (arg=<value optimized out>) at pfs.cc:1011 #16 0x0000003ba88062f7 in start_thread () from /lib64/libpthread.so.0 #17 0x0000003ba80ce85d in clone () from /lib64/libc.so.6 Could not reproduce on 5.5.3-m3. How to repeat: - unpack the attached archive in <mysql-basedir>/mysql-test. It should create 'stress_test_basedir' folder; - on Linux/Solaris, run as perl ./stress_test_basedir/run.pl