| Bug #30364 | Falcon: running falcon_concurrent_blob_updates leads to assertion | ||
|---|---|---|---|
| Submitted: | 10 Aug 2007 19:06 | Modified: | 16 Aug 2007 18:13 |
| Reporter: | Hakan Küçükyılmaz | Email Updates: | |
| Status: | Closed | Impact on me: | |
| Category: | MySQL Server: Falcon storage engine | Severity: | S3 (Non-critical) |
| Version: | 6.0.2 | OS: | Linux (64-bit) |
| Assigned to: | Kevin Lewis | CPU Architecture: | Any |
[11 Aug 2007 17:56]
MySQL Verification Team
Thank you for the bug report.
070811 14:53:18 [ERROR] /home/miguel/dbs/6.0/libexec/mysqld: Sort aborted
Program received signal SIGILL, Illegal instruction.
[Switching to Thread 1186838848 (LWP 16235)]
0x00002af8c8be8fdd in raise () from /lib64/libpthread.so.0
(gdb) bt full
#0 0x00002af8c8be8fdd in raise () from /lib64/libpthread.so.0
No symbol table info available.
#1 0x00000000009393c0 in Error::debugBreak () at Error.cpp:92
No locals.
#2 0x00000000009394d4 in Error::error (string=0xd7a028 "assertion failed at line %d in file %s\n") at Error.cpp:69
buffer = "assertion failed at line 508 in file Record.cpp\n\000\221�F", '\0' <repeats 44 times>, "�\220�F\000\000\000\000�\220�F\000\000\000\000�\220�F\000\000\000\000�\220�F\000\000\000\000\b\221�F", '\0' <repeats 12 times>, "\020\221�F\000\000\000\000�\210\217", '\0' <repeats 29 times>, "\220�j��*", '\0' <repeats 42 times>, "0�\000��*\000\000\001", '\0' <repeats 14 times>
args = {{gp_offset = 24, fp_offset = 48, overflow_arg_area = 0x46bd91d0, reg_save_area = 0x46bd9100}}
#3 0x000000000093956b in Error::assertionFailed (fileName=0xd7f2e0 "Record.cpp", line=508) at Error.cpp:76
No locals.
#4 0x0000000000970357 in Record::release (this=0x2af8c9f594e8) at Record.cpp:508
No locals.
#5 0x00000000008e6419 in StorageDatabase::nextRow (this=0x2af8c9dfa1c8, storageTable=0x2af8c9c79788, recordNumber=894,
lockForUpdate=true) at StorageDatabase.cpp:278
exception = (SQLException &) @0x2aaab000ea50: {_vptr.SQLException = 0xd83050}
storageConnection = (class StorageConnection *) 0x2af8c9ed6070
connection = (class Connection *) 0x2af8c9ed6108
table = (class Table *) 0x2af8c9e52108
transaction = (class Transaction *) 0x2af8c9f55c10
candidate = (class Record *) 0x2af8c9f594e8
record = (class Record *) 0x0
#6 0x00000000008ebc21 in StorageTable::next (this=0x2af8c9c79788, recordNumber=0, lockForUpdate=true) at StorageTable.cpp:126
ret = 0
#7 0x00000000008df65e in StorageInterface::rnd_next (this=0x1d65eb8, buf=0x1d660f8 "�K*��") at ha_falcon.cpp:480
_db_func_ = 0xd3536f "mysql_update"
_db_file_ = 0xd35146 "sql_update.cc"
_db_level_ = 5
_db_framep_ = (char **) 0x46bd9340
#8 0x00000000007d0865 in rr_sequential (info=0x46bd9560) at records.cc:363
tmp = 0
#9 0x00000000007686c7 in mysql_update (thd=0x1fa1f38, table_list=0x203ee18, fields=@0x1fa3a40, values=@0x1fa3e18, conds=0x0,
order_num=0, order=0x0, limit=18446744073709551615, handle_duplicates=DUP_ERROR, ignore=false) at sql_update.cc:558
using_limit = false
---Type <return> to continue, or q <return> to quit---
safe_update = false
used_key_is_modified = false
transactional_table = true
will_batch = false
can_compare_record = true
res = 0
error = 0
loc_error = 1186830016
used_index = 64
dup_key_found = 0
need_sort = true
want_privilege = 0
table_count = 1
updated = 0
found = 0
old_covering_keys = {map = 0}
table = (TABLE *) 0x1f9d5f8
select = (SQL_SELECT *) 0x0
info = {table = 0x1f9d5f8, file = 0x1d65eb8, forms = 0x46bd9560, read_record = 0x7d07ea <rr_sequential(st_read_record*)>,
thd = 0x1fa1f38, select = 0x0, cache_records = 0, ref_length = 4, struct_length = 0, reclength = 0, rec_cache_size = 0,
error_offset = 0, index = 0, ref_pos = 0x0, record = 0x1d660f8 "�K*��", rec_buf = 0x0, cache = 0x0, cache_pos = 0x0, cache_end = 0x0,
read_positions = 0x0, io_cache = 0x0, print_error = true, ignore_not_found_rows = false}
select_lex = (SELECT_LEX *) 0x1fa3938
need_reopen = false
id = 33174896
all_fields = {<base_list> = {<Sql_alloc> = {<No data fields>}, first = 0x1253380, last = 0x46bd9630,
elements = 0}, <No data fields>}
_db_func_ = 0xcdcb10 "mysql_execute_command"
_db_file_ = 0xcdc4b0 "sql_parse.cc"
_db_level_ = 4
_db_framep_ = (char **) 0x1fa2da0
__PRETTY_FUNCTION__ = "int mysql_update(THD*, TABLE_LIST*, List<Item>&, List<Item>&, COND*, uint, ORDER*, ha_rows, enum_duplicates, bool)"
#10 0x00000000006cbc90 in mysql_execute_command (thd=0x1fa1f38) at sql_parse.cc:2592
---Type <return> to continue, or q <return> to quit---
res = false
need_start_waiting = false
up_result = 0
lex = (LEX *) 0x1fa34d0
select_lex = (SELECT_LEX *) 0x1fa3938
first_table = (TABLE_LIST *) 0x203ee18
all_tables = (TABLE_LIST *) 0x203ee18
unit = (SELECT_LEX_UNIT *) 0x1fa3570
_db_func_ = 0xcdd2a0 "mysql_parse"
_db_file_ = 0xcdc4b0 "sql_parse.cc"
_db_level_ = 3
_db_framep_ = (char **) 0x20034d0
__PRETTY_FUNCTION__ = "bool mysql_execute_command(THD*)"
#11 0x00000000006d1e57 in mysql_parse (thd=0x1fa1f38, inBuf=0x203ed58 "UPDATE t1 SET c3 = md5(c3)", length=26,
found_semicolon=0x46bdaee0) at sql_parse.cc:5381
lex = (LEX *) 0x1fa34d0
lip = {m_thd = 0x1fa1f38, yylineno = 1, yytoklen = 2, yylval = 0x46bd8fd0,
ptr = 0x203ed73 '\217' <repeats 93 times>, "\2003%\001", tok_start = 0x203ed73 '\217' <repeats 93 times>, "\2003%\001",
tok_end = 0x203ed73 '\217' <repeats 93 times>, "\2003%\001", end_of_query = 0x203ed72 "", tok_start_prev = 0x203ed72 "",
buf = 0x203ed58 "UPDATE t1 SET c3 = md5(c3)", next_state = MY_LEX_END, found_semicolon = 0x0, ignore_space = false,
stmt_prepare_mode = false}
err = 0
_db_func_ = 0xcdd31b "dispatch_command"
_db_file_ = 0xcdc4b0 "sql_parse.cc"
_db_level_ = 2
_db_framep_ = (char **) 0x3
__PRETTY_FUNCTION__ = "void mysql_parse(THD*, const char*, uint, const char**)"
#12 0x00000000006d2b91 in dispatch_command (command=COM_QUERY, thd=0x1fa1f38, packet=0x2aaab00008e9 "UPDATE t1 SET c3 = md5(c3)",
packet_length=27) at sql_parse.cc:907
packet_end = 0x203ed72 ""
format = 0xcdd32f "%.*b"
found_semicolon = 0x0
net = (NET *) 0x1fa2008
error = false
---Type <return> to continue, or q <return> to quit---
_db_func_ = 0xddd1b4 "?func"
_db_file_ = 0xddd1ba "?file"
_db_level_ = 1
_db_framep_ = (char **) 0x46bdb000
#13 0x00000000006d3f38 in do_command (thd=0x1fa1f38) at sql_parse.cc:669
packet = 0x2aaab00008e8 "\003UPDATE t1 SET c3 = md5(c3)"
packet_length = 27
net = (NET *) 0x1fa2008
command = COM_QUERY
_db_func_ = 0xddd1b4 "?func"
_db_file_ = 0xddd1ba "?file"
_db_level_ = 1
_db_framep_ = (char **) 0x46bdb0a0
#14 0x00000000006c20e6 in handle_one_connection (arg=0x1fa1f38) at sql_connect.cc:1091
net = (NET *) 0x1fa2008
thd = (class THD *) 0x1fa1f38
launch_time = 0
#15 0x00002af8c8be109e in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#16 0x00002af8c99764cd in clone () from /lib64/libc.so.6
No symbol table info available.
#17 0x0000000000000000 in ?? ()
No symbol table info available.
(gdb)
[14 Aug 2007 4:39]
Kevin Lewis
I was able to compile and execute falcon_concurrent_blob_updates on Windows by creating a Visual Studio solution and changing the .c file in several ways. Now I can reproduce this bug in Windows. The ASSERT happens after about 10 concurrent threads have started up. There are two call stacks. One is the release()at StorageDatabase.cpp:278 in the function StorageDatabase::nextRow. It is always associated with a call to fetchForUpdate which fails with an error code that the "Record has changed since last read in table". The SQL statement is an Update. The transaction is RepeatableRead. The ASSERT can also happen at the release() called from fetchForUpdate(), Table.cpp, line 3085. The state is 'WasActive' indicating that the thread just waited on another transaction. In both cases the ASSERT fails because the RecordVersion referenced has been released and the memory is no longer reliable. I am adding some code to gather more info and isolate the reason fro the premature record destruction.
[15 Aug 2007 3:40]
Kevin Lewis
This bug is about an ASSERT(useCount > 0); in Record::release().
This would happen consistently when running falcon_concurrent_blob_updates with at least 10 concurrent threads.
I converted this test program to Windows and repeated it dozens of times while adding debug code to track the history of each Record object.
A record was being released while there still was a candidate pointer to it in either Table::fetchForUpdate or one of the three functions that call it. It is only seen with a high level of concurrency. Table::fetchForUpdate is intended to release the refCount of the candidate sent in if NULL is returned. But nothing is released if an exception occurs. Normally, the candiate record reference would then be released in the catch() code of the caller. But if fetchForUpdate() waited on a transaction, it will release the original candidate and fetch a new one. If this new one is not the same record, and it causes a deadlock, then an exception is thrown and the caller will release the wrong record.
The solution is to release before throwing an exception, and then not release in the catch{} code of the caller when fetchForUpdate was called.
This test no longer hits the assert in Record::release(). But it does hit an assert that was hit several times in the past. This is;
ASSERT(!bdb->syncObject.isLocked()); in Cache::fakePage() line 275. This was reported at one point in both Bug#30032 and Bug#28174. Each time, the bug morphed into some other problem and this ASSERT was lost sight of, or not repeated exactly. Now that many of the other problems have been fixed, I can reproduce this readily with 5 or more threads and less than 5 minutes run time.
I recommend that we open a new bug to track this fakePage assert separately and not let the bug morph into something else.
[16 Aug 2007 18:13]
Kevin Lewis
I assume Hakan cannot reproduce this bug either, since he asks me to close the bug. The new bug associated with the fakePage assert is Bug#30463

Description: Running falcon_concurrent_blob_updates leads to an assertion. How to repeat: Run falcon_concurrent_blob_updates with 15 threads like ./falcon_concurrent_blob_updates -r540 -s1 -n15 Bugcheck: assertion failed at line 508 in file Record.cpp Program received signal SIGILL, Illegal instruction. [Switching to Thread 1118099792 (LWP 31822)] 0x00002b22607d8fad in raise () from /lib/libpthread.so.0 (gdb) (gdb) bt #0 0x00002b22607d8fad in raise () from /lib/libpthread.so.0 #1 0x00000000008ec784 in Error::debugBreak () at Error.cpp:92 #2 0x00000000008ec898 in Error::error (string=0xc75130 "assertion failed at line %d in file %s\n") at Error.cpp:69 #3 0x00000000008ec92f in Error::assertionFailed (fileName=0xc7a140 "Record.cpp", line=508) at Error.cpp:76 #4 0x000000000092058f in Record::release (this=0x2aaab23b68c0) at Record.cpp:508 #5 0x000000000089e131 in StorageDatabase::nextRow (this=0x2b22620441c8, storageTable=0x2b2261e9eff8, recordNumber=2093, lockForUpdate=true) at StorageDatabase.cpp:278 #6 0x00000000008a3939 in StorageTable::next (this=0x2b2261e9eff8, recordNumber=0, lockForUpdate=true) at StorageTable.cpp:126 #7 0x000000000088c64a in StorageInterface::rnd_next (this=0x1320268, buf=0x13204a8 "��*@�") at ha_falcon.cpp:480 #8 0x00000000007b5c40 in find_all_keys (param=0x42a4b0a0, select=0x0, sort_keys=0x130ed48, buffpek_pointers=0x42a4ae00, tempfile=0x42a4af50, indexfile=0x0) at filesort.cc:516 #9 0x00000000007b6843 in filesort (thd=0x12d3aa8, table=0x142c978, sortorder=0x12787d8, s_length=1, select=0x0, max_rows=18446744073709551615, sort_positions=true, examined_rows=0x42a4b620) at filesort.cc:233 #10 0x000000000074c03e in mysql_delete (thd=0x12d3aa8, table_list=0x1277ff0, conds=0x0, order=0x12d5648, limit=10, options=0, reset_auto_increment=false) at sql_delete.cc:189 #11 0x00000000006ada25 in mysql_execute_command (thd=0x12d3aa8) at sql_parse.cc:2829 #12 0x00000000006b3103 in mysql_parse (thd=0x12d3aa8, inBuf=0x1277f18 "DELETE FROM t1 ORDER BY c1 * rand(8) LIMIT 10", length=45, found_semicolon=0x42a4cf00) at sql_parse.cc:5381 #13 0x00000000006b3e3d in dispatch_command (command=COM_QUERY, thd=0x12d3aa8, packet=0x1466f09 "DELETE FROM t1 ORDER BY c1 * rand(8) LIMIT 10", packet_length=46) at sql_parse.cc:907 #14 0x00000000006b51e4 in do_command (thd=0x12d3aa8) at sql_parse.cc:669 #15 0x00000000006a3392 in handle_one_connection (arg=0x12d3aa8) at sql_connect.cc:1091 #16 0x00002b22607d1225 in start_thread () from /lib/libpthread.so.0 #17 0x00002b226159f33d in clone () from /lib/libc.so.6