Bug #22153 Crash recovery failure
Submitted: 9 Sep 2006 5:44 Modified: 7 Oct 2008 12:18
Reporter: Georg Richter Email Updates:
Status: Can't repeat Impact on me:
None 
Category:MySQL Server: Falcon storage engine Severity:S1 (Critical)
Version:5.2 OS:Linux (Linux)
Assigned to: CPU Architecture:Any
Tags: F_RECOVERY

[9 Sep 2006 5:44] Georg Richter
Description:
 If I force the server to go down abruptly,
then start it again, the database is
corrupt -- attempts to update it cause
crashes.

0 0x00002aaaab05143a in raise () from /lib64/tls/libc.so.6
1 0x0000000000918f40 in Error::debugBreak () at Error.cpp:77
0000002 0x0000000000919059 in Error::error (
    string=0xbae680 "read error on page %d of \"%s\": %s (%d)") at Error.cpp:54
0000003 0x00000000009262a9 in IO::readPage (this=0x2aaaabc45d00, bdb=0x2aaaad73bd18)
    at IO.cpp:153
0000004 0x000000000097902e in Cache::fetchPage (this=0x2aaaabc46118, dbb=0x2aaaabc45cf8,
    pageNumber=251664, pageType=PAGE_data_overflow, lockType=Exclusive) at Cache.cpp:154
0000005 0x0000000000912fa0 in Dbb::fetchPage (this=0x2aaaabc45cf8, pageNumber=251664,
    pageType=PAGE_data_overflow, lockType=Exclusive) at Dbb.cpp:180
0000006 0x0000000000988080 in DataPage::deleteOverflowPages (this=0x2aaaadcf0000,
    dbb=0x2aaaabc45cf8, overflowPageNumber=251664, transId=500005) at DataPage.cpp:296
0000007 0x0000000000988259 in DataPage::updateRecord (this=0x2aaaadcf0000,
    section=0x2aaaabc50178, lineNumber=161, stream=0x400bf600, transId=500005,
    earlyWrite=false) at DataPage.cpp:42
0000008 0x00000000009406ad in Section::updateRecord (this=0x2aaaabc50178, recordNumber=205781,
    stream=0x400bf600, transId=500005, earlyWrite=false) at Section.cpp:431
0000009 0x000000000091324e in Dbb::updateRecord (this=0x2aaaabc45cf8, sectionId=43,
    recordId=205781, stream=0x400bf600, transId=500005, earlyWrite=false) at Dbb.cpp:265
0000010 0x000000000094c1bb in SRLData::redo (this=0x400bfbb8) at SRLData.cpp:69
0000011 0x000000000094651b in SerialLog::recover (this=0x2aaaad7568d0) at SerialLog.cpp:264
0000012 0x000000000090e6ed in Database::openDatabase (this=0x2aaaad6f1b10,
    filename=0x400c0020 "/home/patg/mysql-build/mysql-5.0-jstar/mysql-test/var/master-data/test.ndb") at Database.cpp:568
0000013 0x0000000000984961 in Connection::getDatabase (this=0x2aaaabc45ac0,
    dbName=0x2aaaabc4582c "test",
    dbFileName=0x400c0020 "/home/patg/mysql-build/mysql-5.0-jstar/mysql-test/var/master-data/test.ndb", threads=0x2aaaabc45878) at Connection.cpp:1625
0000014 0x0000000000984b9f in Connection::openDatabase (this=0x2aaaabc45ac0,
    dbName=0x2aaaabc4582c "test", filename=0x2aaaabc45854 "./test.ndb",
    account=0xbb35e7 "mysql", password=0xbb35e7 "mysql", address=0x0, parent=0x2aaaabc45878)
    at Connection.cpp:906
0000015 0x000000000095843d in StorageDatabase::getOpenConnection (this=0x2aaaabc45450)
    at StorageDatabase.cpp:176
0000016 0x000000000095696d in StorageConnection::connect (this=0x2aaaabc45350)
    at StorageConnection.cpp:197
0000017 0x0000000000957608 in StorageConnection::getStorageConnection (
    path=0x2aaaabc45208 "./test", mySqlThread=0xf78830, createFlag=OpenDatabase)
    at StorageConnection.cpp:163

0000018 0x000000000095785d in getStorageConnection (path=0x2aaaabc45208 "./test",
    mySqlThread=0xf78830, create=OpenDatabase) at StorageConnection.cpp:49
---Type <return> to continue, or q <return> to quit---
0000019 0x00000000007687b1 in NfsStorageTable::open (this=0xf65608, name=0x400c03f0 "./test/t",
    mode=2, test_if_locked=2) at NfsStorageTable.cpp:159
0000020 0x00000000006cf8dc in handler::ha_open (this=0xf65608, name=0x400c03f0 "./test/t",
    mode=2, test_if_locked=2) at handler.cc:1413
0000021 0x00000000006474ba in openfrm (thd=0xf78830, name=0x400c0c10 "./test/t",
    alias=0xfa0590 "t", db_stat=8231, prgflag=44, ha_open_flags=0, outparam=0xf66a10)
    at table.cc:913
0000022 0x000000000063fbd3 in open_unireg_entry (thd=0xf78830, entry=0xf66a10,
    db=0xf69dc0 "test", name=0xfa0590 "t", alias=0xfa0590 "t", table_desc=0x400c1c50,
    mem_root=0x400c1a60) at sql_base.cc:1775
0000023 0x0000000000641542 in open_table (thd=0xf78830, table_list=0x400c1c50,
    mem_root=0x400c1a60, refresh=0x400c1a97, flags=0) at sql_base.cc:1307
0000024 0x0000000000641cd4 in open_tables (thd=0xf78830, start=0x400c1af0, counter=0x400c1b0c,
    flags=0) at sql_base.cc:2053
0000025 0x00000000006421ab in open_normal_and_derived_tables (thd=0xf78830, tables=0x400c1c50,
    flags=0) at sql_base.cc:2410
0000026 0x000000000070f7b9 in mysqld_list_fields (thd=0xf78830, table_list=0x400c1c50,
    wild=0xfa0598 "") at sql_show.cc:569
0000027 0x000000000061724c in dispatch_command (command=COM_FIELD_LIST, thd=0xf78830,
    packet=0xf98553 "", packet_length=3) at sql_parse.cc:1842

0000028 0x00000000006181e1 in do_command (thd=0xf78830) at sql_parse.cc:1515
0000029 0x00000000006185c9 in handle_one_connection (arg=0xf78830) at sql_parse.cc:1158
0000030 0x00002aaaab8e3fa5 in start_thread () from /lib64/tls/libpthread.so.0
0000031 0x00002aaaab0dfcb2 in clone () from /lib64/tls/libc.so.6
0000032 0x0000000000000000 in ?? ()
0000033 0x0000000000000000 in ?? ()
0000034 0x0000000000000000 in ?? ()

How to repeat:
 How to repeat:

I used SUSE 10.0, I used three terminal programs
(consoles).

On console 0, start the mysqld server.

On console 1, say:
su root
ps -A | grep mysqld
... This tells me the process number of mysqld

On console 0000002, say:

/* mysql as usual, i.e. start mysql client, 'use', etc. */

delimiter //
drop table t//
drop procedure p//

CREATE TABLE t (s1 char(100), s2 blob, s3 int(11) default NULL) ENGINE=JStar//

CREATE PROCEDURE p()
begin
  declare v int default 1;
  while v < 500000 do
    insert into t values ('a',repeat('b',1000),v);
    set v = v + 1;
 end while;
end//

CALL p()//

/*
  At this point there are 500,000 rows in table t,
  so the following UPDATE statement will take a
  long time.
*/

update t set s1 = concat(s1,'x')//

/* Wait for a few seconds. */

On console 1, say:
kill -9 X
where X = process number of mysqld.
This simulates a crash, but a fairly mild one,
since the operating-system buffers aren't cleared.

On console 0, start up mysqld again.
It's possible that failure will occur here,
but usually the server survives startup.

On console 0000002, say:
update t set s1 = concat(s1,'x')//
It should crash within a few seconds.

I tested this several times, the server never survives
till the end of the UPDATE.
I have tested the same way with InnoDB, no problem.
[13 Sep 2006 6:59] Hakan Küçükyılmaz
Verified on Linux 32-bit change set 1.2287, 2006-09-12.

Backtrace:

[New Thread 1301773232 (LWP 25083)]
Recovering database...
first recovery block is 500056
last recovery block is 500060
recovery read block is 500044
Bugcheck: assertion failed at line 356 in file SerialLog.cpp

Memory
Records

Program received signal SIGILL, Illegal instruction.
[Switching to Thread 1261628336 (LWP 25081)]
0xffffe410 in __kernel_vsyscall ()
(gdb) bt
#0  0xffffe410 in __kernel_vsyscall ()
#1  0x40181541 in raise () from /lib/tls/libc.so.6
#2  0x084d83be in Error::debugBreak () at Error.cpp:93
#3  0x084d841b in Error::error (string=0x871a83c "assertion failed at line %d in file %s\n")
    at Error.cpp:70
#4  0x084d84ab in Error::assertionFailed (fileName=0x8728fb5 "SerialLog.cpp", line=356)
    at Error.cpp:77
#5  0x0855eec3 in SerialLog::recover (this=0x8f5c3a8) at SerialLog.cpp:356
#6  0x08514f91 in Database::openDatabase (this=0x8f3c920,
    filename=0x4b32c5b0 "/data/mysql-sap/test.ndb") at Database.cpp:585
#7  0x0850c996 in Connection::getDatabase (this=0x8f3c7f8, dbName=0x8f15c4c "test",
    dbFileName=0x4b32c5b0 "/data/mysql-sap/test.ndb", threads=0x8f14680)
    at Connection.cpp:1641
#8  0x0850cbad in Connection::openDatabase (this=0x8f3c7f8, dbName=0x8f15c4c "test",
    filename=0x8ef96cc "./test.ndb", account=0x871bb1b "mysql", password=0x871bb1b "mysql",
    address=0x0, parent=0x8f14680) at Connection.cpp:922
#9  0x084e4af2 in StorageDatabase::getOpenConnection (this=0x8f323b0)
    at StorageDatabase.cpp:185
#10 0x084e318f in StorageConnection::connect (this=0x8f14640) at StorageConnection.cpp:219
#11 0x084e3cd5 in StorageConnection::getStorageConnection (path=0x8ef8718 "./test",
    mySqlThread=0x0, createFlag=OpenDatabase) at StorageConnection.cpp:182
#12 0x084e3ed9 in getStorageConnection (path=0x8ef8718 "./test", mySqlThread=0x0,
    create=OpenDatabase) at StorageConnection.cpp:65
#13 0x084db815 in NfsStorageTable::open (this=0x8f3c438, name=0x8f335ef "./test/t", mode=2,
    test_if_locked=2) at ha_falcon.cpp:179
#14 0x0832b103 in handler::ha_open (this=0x8f3c438, table_arg=0x8f31d28,
    name=0x8f335ef "./test/t", mode=2, test_if_locked=2) at handler.cc:1447
#15 0x0828ec41 in open_table_from_share (thd=0x8f30708, share=0x8f33398, alias=0x8f3e558 "t",
    db_stat=39, prgflag=44, ha_open_flags=0, outparam=0x8f31d28, is_create_table=false)
    at table.cc:1522
#16 0x0828911b in open_unireg_entry (thd=0x8f30708, entry=0x8f31d28, table_list=0x8f3e560,
    alias=0x8f3e558 "t", cache_key=0x4b32d202 "test", cache_key_length=7, mem_root=0x4b32d310)
    at sql_base.cc:2660
#17 0x0828ada2 in open_table (thd=0x8f30708, table_list=0x8f3e560, mem_root=0x4b32d310,
    refresh=0x4b32d347, flags=0) at sql_base.cc:2058
#18 0x0828b5a8 in open_tables (thd=0x8f30708, start=0x4b32d594, counter=0x4b32d528, flags=0)
    at sql_base.cc:2908
#19 0x082cb37a in mysql_update (thd=0x8f30708, table_list=0x8f3e560, fields=@0x8f30a70,
    values=@0x8f30c44, conds=0x0, order_num=0, order=0x0, limit=18446744073709551615,
    handle_duplicates=DUP_ERROR, ignore=false) at sql_update.cc:144
#20 0x08256c5b in mysql_execute_command (thd=0x8f30708) at sql_parse.cc:3326
#21 0x0825cf36 in mysql_parse (thd=0x8f30708,
    inBuf=0x8f3e4d8 "update t set s1 = concat(s1,'x')", length=32) at sql_parse.cc:6103
#22 0x0825d934 in dispatch_command (command=COM_QUERY, thd=0x8f30708,
    packet=0x8f36479 "update t set s1 = concat(s1,'x')", packet_length=33)
    at sql_parse.cc:1830
#23 0x0825ec6f in do_command (thd=0x8f30708) at sql_parse.cc:1614
#24 0x0825f071 in handle_one_connection (arg=0x8f30708) at sql_parse.cc:1228
#25 0x40283297 in start_thread () from /lib/tls/libpthread.so.0
#26 0x4021837e in clone () from /lib/tls/libc.so.6
#27 0x4b32ebb0 in ?? ()
[3 Oct 2006 3:42] Hakan Küçükyılmaz
Does work now on Linux 32-bit change set 1.2362, 2006-10-02.

After killing mysqld and restarting:

[05:22] root@test>update t set s1 = concat(s1,'x')//
ERROR 2006 (HY000): MySQL server has gone away
No connection. Trying to reconnect...
Connection id:    1
Current database: test

Query OK, 499999 rows affected (1 min 9.40 sec)

[05:24] root@test>select count(*) from t//
+----------+
| count(*) |
+----------+
|   499999 |
+----------+
1 row in set (1.91 sec)

Regards, Hakan
Rows matched: 499999  Changed: 499999  Warnings: 0
[25 Oct 2006 13:46] Peter Gulutzan
(Sorry for my delay noticing that this was in "need feedback" state.
Remember that the bugs that I reported via Mantis all have Georg's
name on them, so I don't get alerted when state changes.)

I did the test again, this time with 32-bit Ubuntu 6.10.
After killing and restarting mysqld, I see that the
recovery has still failed, but in a different way:
"select distinct s1 from t" returns 2 rows. That shows
that some rows, but not all have been affected by the UPDATE.
[26 Oct 2006 12:20] Hakan Küçükyılmaz
Hi Peter,

I can't repeat on Linux 32-bit, change set 1.2344, 2006-10-26.

I inserted 500000 rows like described. I run the first UPDATE and killed mysqld

[14:16] root@test>update t set s1 = concat(s1,'x')//
ERROR 2013 (HY000): Lost connection to MySQL server during query
5.1.13-beta-debug

Restarted mysqld again:

[14:06] root@test>update t set s1 = concat(s1,'x');
ERROR 2006 (HY000): MySQL server has gone away
No connection. Trying to reconnect...
Connection id:    1
Current database: test

Query OK, 499999 rows affected (37.52 sec)
Rows matched: 499999  Changed: 499999  Warnings: 0

5.1.13-beta-debug
[14:18] root@test>select distinct s1 from t;
+------+
| s1   |
+------+
| ax   |
+------+
1 row in set (6.94 sec)

Maybe the autocommit setting has an influence?

Regards, Hakan.
[27 Oct 2006 19:07] Patrick Galbraith
This is the backtrace I get:

#0  0x00002aaaab16643a in raise () from /lib64/tls/libc.so.6
#1  0x000000000085b198 in Error::debugBreak () at Error.cpp:93
#2  0x000000000085b2b1 in Error::error (string=0xbba508 "read error on page %d of \"%s\": %s (%d)") at Error.cpp:70
#3  0x0000000000870f1b in IO::readPage (this=0x2aaaab7cebd0, bdb=0x2aaaac22ddf8) at IO.cpp:169
#4  0x00000000008d84f1 in Cache::fetchPage (this=0x2aaaab7cf010, dbb=0x2aaaab7cebc8, pageNumber=145484, pageType=PAGE_data_overflow,
    lockType=Exclusive) at Cache.cpp:207
#5  0x0000000000852066 in Dbb::fetchPage (this=0x2aaaab7cebc8, pageNumber=145484, pageType=PAGE_data_overflow, lockType=Exclusive)
    at Dbb.cpp:196
#6  0x00000000008db750 in DataPage::deleteOverflowPages (this=0x2aaaac7a7000, dbb=0x2aaaab7cebc8, overflowPageNumber=145484,
    transId=500006) at DataPage.cpp:318
#7  0x00000000008db929 in DataPage::updateRecord (this=0x2aaaac7a7000, section=0x2aaaab7d4e50, lineNumber=2, stream=0x4007e390,
    transId=500006, earlyWrite=false) at DataPage.cpp:58
#8  0x00000000008a5b0e in Section::updateRecord (this=0x2aaaab7d4e50, recordNumber=438955, stream=0x4007e390, transId=500006,
    earlyWrite=false) at Section.cpp:478
#9  0x000000000085451c in Dbb::updateRecord (this=0x2aaaab7cebc8, sectionId=47, recordId=438955, stream=0x4007e390, transId=500006,
    earlyWrite=false) at Dbb.cpp:288
#10 0x00000000008c14bc in SRLUpdateRecords::redo (this=0x4007ee88) at SRLUpdateRecords.cpp:104
#11 0x00000000008ab665 in SerialLog::recover (this=0x2aaaac2355a0) at SerialLog.cpp:319
#12 0x000000000084eeef in Database::openDatabase (this=0x2aaaac1d0048,
    filename=0x4007f260 "/home/patg/mysql-build/mysql-5.1-falcon-clean/mysql-test/var/master-data/test.ndb") at Database.cpp:596
#13 0x0000000000842760 in Connection::getDatabase (this=0x2aaaab7ce980, dbName=0x2aaaab7ce6b4 "test",
    dbFileName=0x4007f260 "/home/patg/mysql-build/mysql-5.1-falcon-clean/mysql-test/var/master-data/test.ndb", threads=0x2aaaab7ce710)
    at Connection.cpp:1642
#14 0x00000000008429e5 in Connection::openDatabase (this=0x2aaaab7ce980, dbName=0x2aaaab7ce6b4 "test",
    filename=0x2aaaab7ce6e4 "./test.ndb", account=0xbaf723 "mysql", password=0xbaf723 "mysql", address=0x0, parent=0x2aaaab7ce710)
    at Connection.cpp:922
#15 0x0000000000817009 in StorageDatabase::getOpenConnection (this=0x2aaaab7ce2d0) at StorageDatabase.cpp:186
#16 0x0000000000812f37 in StorageConnection::connect (this=0x2aaaab7ce1b8) at StorageConnection.cpp:225
#17 0x0000000000813a69 in StorageConnection::getStorageConnection (path=0xf73010 "./test", mySqlThread=0x0, createFlag=OpenDatabase)
    at StorageConnection.cpp:188
#18 0x0000000000813cd3 in getStorageConnection (path=0xf73010 "./test", mySqlThread=0x0, create=OpenDatabase) at StorageConnection.cpp:66
#19 0x000000000080ff8c in NfsStorageTable::open (this=0xfea968, name=0xfc47a0 "./test/t", mode=2, test_if_locked=2) at ha_falcon.cpp:210
#20 0x000000000074e1ee in handler::ha_open (this=0xfea968, table_arg=0xfe98c8, name=0xfc47a0 "./test/t", mode=2, test_if_locked=2)
    at handler.cc:1530
#21 0x0000000000699121 in open_table_from_share (thd=0xfd67d8, share=0xfc4438, alias=0xfc48a0 "t", db_stat=39, prgflag=44,
    ha_open_flags=0, outparam=0xfe98c8, is_create_table=false) at table.cc:1540
#22 0x0000000000692700 in open_unireg_entry (thd=0xfd67d8, entry=0xfe98c8, table_list=0x40080c20, alias=0xfc48a0 "t",
    cache_key=0x400802e0 "test", cache_key_length=7, mem_root=0x40080430, flags=0) at sql_base.cc:2662
#23 0x000000000069465b in open_table (thd=0xfd67d8, table_list=0x40080c20, mem_root=0x40080430, refresh=0x4008048f, flags=0)
    at sql_base.cc:2047
#24 0x0000000000694f69 in open_tables (thd=0xfd67d8, start=0x400804e0, counter=0x4008051c, flags=0) at sql_base.cc:2909
#25 0x0000000000695392 in open_normal_and_derived_tables (thd=0xfd67d8, tables=0x40080c20, flags=0) at sql_base.cc:3241
[16 Nov 2006 14:25] Patrick Galbraith
I have tested this on windows xp (running mysqld in visual studio)

1. Create the table and procedure as described above in terminal 1
2. Run the update command in terminal 1
3. Use task manager to kill mysqld.exe
4. Restart mysqld in visual studio
5. Run the update command in terminal 2

I did not observe a crash. I will need to re-verify on a Linux box and see what I end up with.
[17 Nov 2006 19:09] Patrick Galbraith
This doesn't fail for me on Linux any more:

window 1:

mysql> CREATE PROCEDURE p()
    -> begin
    ->   declare v int default 1;
    ->   while v < 500000 do
    ->     insert into t values ('a',repeat('b',1000),v);
    ->     set v = v + 1;
    ->  end while;
    -> end//
Query OK, 0 rows affected (0.16 sec)

mysql>
mysql> CALL p()//
Query OK, 1 row affected (3 min 54.54 sec)

mysql> update t set s1 = concat(s1,'x')//

Window 3 (5 seconds)

patg@govinda:/my/mysql-5.1-falcon-clean2> ps aux|grep mysqld
patg     13751  0.1  7.1  82352 72896 pts/2    S    11:52   0:08 gdb -q
-nw -i mi -tty /dev/pts/13 --cd=/home/patg/workspace/falcon-clean3
--command=.gdbinit
/home/patg/mysql-build/mysql-5.1-falcon-clean3/sql/mysqld
patg     13767  2.6 11.6 575516 118872 ?       Sl   11:52   2:37
/home/patg/mysql-build/mysql-5.1-falcon-clean3/sql/mysqld
patg     14749  0.0  0.0   3824   764 pts/9    S+   13:32   0:00 grep
mysqld
patg@govinda:/my/mysql-5.1-falcon-clean2> kill -9 13767
patg@govinda:/my/mysql-5.1-falcon-clean2>  

Window 1 (immediate):
    
ERROR 2013 (HY000): Lost connection to MySQL server during query
mysql>

Window 2 (restart mysqld in debugger):

mysql> update t set s1 = concat(s1,'x');
ERROR 2006 (HY000): MySQL server has gone away
No connection. Trying to reconnect...
Connection id:    1
Current database: test

Query OK, 499999 rows affected (18.82 sec)
Rows matched: 499999  Changed: 499999  Warnings: 0

mysql> select distinct s1 from t;
+------+
| s1   |
+------+
| ax   |
+------+
1 row in set (4.46 sec)
[21 Nov 2006 21:24] Peter Gulutzan
I tried the same test again, this time with an index on columns s1,s3.
That is, I said:

"
...
delimiter //
drop table t//
drop procedure p//

CREATE TABLE t (s1 char(100), s2 blob, s3 int(11) default NULL) ENGINE=Falcon//

CREATE PROCEDURE p()
begin
  declare v int default 1;
  while v < 500000 do
    insert into t values ('a',repeat('b',1000),v);
    set v = v + 1;
 end while;
end//

CALL p()//

CREATE INDEX i ON t (s1,s3)//

/*
  At this point there are 500,000 rows in table t,
  so the following UPDATE statement will take a
  long time.
*/

update t set s1 = concat(s1,'x')//

/* Wait for a few seconds. */

On console 1, say:
kill -9 X
where X = process number of mysqld.
...
"

This caused a crash -- once. But on subsequent attempts
of the same thing, no crash. So I don't think this is
really resolved, but if it's set to "Can't repeat" that
is understandable.
[23 Nov 2006 13:46] Peter Gulutzan
The original test is almost okay now, I could not make it fail again.

So I made a different test. This one seems more reliable:
I tried it three times, and I got a corrupted database three times.

How to repeat:

I used SUSE 10.0, I used three terminal programs
(consoles).

On console 0, start the mysqld server.

On console 1, say:
su root
ps -A | grep mysqld
... This tells me the process number of mysqld

On console 0000002, say:

/* mysql as usual, i.e. start mysql client, 'use', etc. */

delimiter //
create database d1//
use d1//
set @@autocommit=0//
create procedure pe () begin declare v int; create table if not exists te (s1 int, s2 blob) engine=falcon; create index ie on te (s1); set v = 0; while v < 100000 do insert into te values (v,repeat('a',rand()*1000)); update te set s2 = concat(s2,v); update te set s1 = s1 - 1; delete from te where s1 = 999999; set v = v + 1; end while; drop table te; end//
call pe()//

/* Wait for a few seconds. */

On console 1, say:
kill -9 X
where X = process number of mysqld.
This simulates a crash, but a fairly mild one,
since the operating-system buffers aren't cleared.

On console 2, say:
select count(*) from te//
It should crash within a few seconds.

I tested this three times, the server never survives
till the end of the UPDATE.

This time I did not test the same way with InnoDB.
[27 Nov 2006 15:41] Jim Starkey
This was fixed over the weekend.
[27 Nov 2006 22:47] Hakan Küçükyılmaz
Tried as described in Peter Gulutzan's comment of 23th November on Linux 32-bit with change set 1.2367, 2006-11-26.

It crashes for me:

[23:45] root@d1>select count(*) from te//
ERROR 2006 (HY000): MySQL server has gone away
No connection. Trying to reconnect...
Connection id:    1
Current database: d1

ERROR 2013 (HY000): Lost connection to MySQL server during query

Regards, Hakan
[28 Nov 2006 0:23] Hakan Küçükyılmaz
Platform: Linux 32-bit Pentium M
OS: Suse 10.0
gcc: gcc (GCC) 4.0.2 20050901 (prerelease) (SUSE Linux)
glibc: glibc-2.3.5

Cloned 5.1-falcon with:
hakan@lu0008:~/work/mysql/> bk clone hakank@bk-internal.mysql.com:/home/bk/mysql-5.1-falcon mysql-5.1-falcon

hakan@lu0008:~/work/mysql> cd mysql-5.1-falcon 

Build with:
hakan@lu0008:~/work/mysql/mysql-5.1-falcon> BUILD/compile-pentium-debug-max-no-ndb --prefix=/usr/local/mysql-5.1-debug

Installed with:
hakan@lu0008:~/work/mysql/mysql-5.1-falcon> sudo make install

On console 1 we start mysqld as root with help of mysqld_safe:
lu0008:~ # mysqld_safe &

We determine mysqld's PID:

lu0008:~ # ps aux | grep mysql
root     31542  0.8  0.1   2712  1304 pts/1    S    01:05   0:00 /bin/sh /usr/local/mysql/bin/mysqld_safe
mysql    31599  1.6  2.0  31040 21252 pts/1    Sl   01:05   0:00 /usr/local/mysql-5.1-debug/libexec/mysqld --basedir=/usr/local/mysql --datadir=/data/mysql-sap --user=mysql --pid-file=/data/mysql-sap/lu0008.pid --log-error=/data/mysql-sap/lu0008.err --socket=/tmp/mysql.sock --port=3306

The PID in question is 31599. We prepare the kill -9, but not execute it yet:
lu0008:~ # kill -9 31599

On Console 2 we start mysql client as normal non-root user:
hakan@lu0008:~> mysql -uroot
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 1
Server version: 5.1.13-falcon-alpha-debug Source distribution

Type 'help;' or '\h' for help. Type '\c' to clear the buffer.
[01:16] root@(none)>

On the mysql client we execute follwing commands:
drop database if exists d1;
drop procedure if exists d1.pe;

delimiter //
create database d1//
use d1//
set @@autocommit=0//
create procedure pe ()
begin
  declare v int;
  create table if not exists te (s1 int, s2 blob) engine=falcon;
  create index ie on te (s1);
  set v = 0;
  while v < 100000 do
    insert into te values (v,repeat('a',rand()*1000));
    update te set s2 = concat(s2,v);
    update te set s1 = s1 - 1;
    delete from te where s1 = 999999;
    set v = v + 1;
  end while;
  drop table te;
end//
call pe()//

After calling pe() we wait for around 10 seconds.

On console 1 we execute the ready waiting kill -9:
lu0008:~ # kill -9 31599
lu0008:~ # /usr/local/mysql/bin/mysqld_safe: line 418: 31599 Killed                  nohup /usr/local/mysql-5.1-debug/libexec/mysqld --basedir=/usr/local/mysql --datadir=/data/mysql-sap --user=mysql --pid-file=/data/mysql-sap/lu0008.pid --log-error=/data/mysql-sap/lu0008.err --socket=/tmp/mysql.sock --port=3306 >>/data/mysql-sap/lu0008.err 2>&1

Number of processes running now: 0
061128 01:07:05  mysqld restarted

We see that mysqld get's restarted.

On console 2 we try:
[01:21] root@(none)>use d1;
Database changed

[01:21] root@d1>select count(*) from te;
ERROR 2013 (HY000): Lost connection to MySQL server during query

We see a 2013 Lost connection. We shutdown mysqld and restart it in gdb to get a backtrace:

(gdb) bt
#0  0xffffe410 in __kernel_vsyscall ()
#1  0x40182541 in raise () from /lib/tls/libc.so.6
#2  0x0844a0d2 in Error::debugBreak () at Error.cpp:93
#3  0x0844a12f in Error::error (string=0x8751014 "page %d wrong page type, expected %d got %d\n")
    at Error.cpp:70
#4  0x084be047 in Cache::fetchPage (this=0x402f9e14, dbb=0x402f9af4, pageNumber=103,
    pageType=PAGE_section_index, lockType=Exclusive) at Cache.cpp:224
#5  0x08440086 in Dbb::fetchPage (this=0x402f9af4, pageNumber=103, pageType=PAGE_section_index,
    lockType=Exclusive) at Dbb.cpp:195
#6  0x0848ae7e in Section::deleteSectionLevel (dbb=0x402f9af4, pageNumber=98, transId=0) at Section.cpp:796
#7  0x0848b05b in Section::deleteSection (dbb=0x402f9af4, sectionId=43, transId=0) at Section.cpp:777
#8  0x084404c9 in Dbb::deleteSection (this=0x402f9af4, sectionId=43, transId=0) at Dbb.cpp:469
#9  0x084378d1 in Database::deleteSection (this=0x4155d028, sectionId=43, transaction=0x0) at Database.cpp:1447
#10 0x084a5b7e in SRLDropTable::redo (this=0x41559f18) at SRLDropTable.cpp:81
#11 0x08491696 in SerialLog::recover (this=0x402faa1c) at SerialLog.cpp:325
#12 0x0843d341 in Database::openDatabase (this=0x4155d028, filename=0x4155a300 "/data/mysql-sap/d1.fts")
    at Database.cpp:597
#13 0x08431ba9 in Connection::getDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    dbFileName=0x4155a300 "/data/mysql-sap/d1.fts", threads=0x402f97f0) at Connection.cpp:1643
#14 0x08431dd5 in Connection::openDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    filename=0x402f97d0 "./d1.fts", account=0x873f4eb "mysql", password=0x873f4eb "mysql", address=0x0,
    parent=0x402f97f0) at Connection.cpp:923
#15 0x084076cd in StorageDatabase::getOpenConnection (this=0x402f95b4) at StorageDatabase.cpp:190
#16 0x08404621 in StorageConnection::connect (this=0x402f94fc) at StorageConnection.cpp:111
#17 0x08408c92 in StorageHandler::getStorageConnection (this=0x402f9028, path=0x891bf70 "./d1",
    mySqlThread=0x0, createFlag=OpenDatabase) at StorageHandler.cpp:123
#18 0x08401911 in NfsStorageTable::open (this=0x8963140, name=0x894a2f8 "./d1/te", mode=2, test_if_locked=2)
    at ha_falcon.cpp:266

Regards, Hakan
[29 Nov 2006 20:59] Jim Starkey
Split the physical and logical components of index recovery into separate
recovery passes.  All physical recovery (bucket split images) are now 
done during pass2 (physical) while logical add/drop node are done during
the third (logical or redo) pass.
[29 Nov 2006 23:25] Hakan Küçükyılmaz
Runs fine now on Linux 32-bit, change set 1.2373.1.2, 2006-11-29.

After "kill -9 pid_of_mysqld":

[00:16] root@d1>select count(*) from te//
+----------+
| count(*) |
+----------+
|        0 |
+----------+
1 row in set (0.00 sec)

Regards, Hakan
[30 Nov 2006 0:47] Peter Gulutzan
I tried again with the latest version,
ChangeSet@1.2373.1.2, 2006-11-29 16:35:33-05:00

After I restart the server and try to do anything
with the table, e.g. "select count(*) from te", I
still see a crash.
[30 Nov 2006 9:32] Hakan Küçükyılmaz
Verified on Linux 32-bit change set 1.2373.1.2, 2006-11-29.

After restarting mysqld, the database hits an assertion. I get following backtrace:

Program received signal SIGILL, Illegal instruction.
[Switching to Thread 1096141744 (LWP 19674)]
0xffffe410 in __kernel_vsyscall ()
(gdb) bt
#0  0xffffe410 in __kernel_vsyscall ()
#1  0x40182541 in raise () from /lib/tls/libc.so.6
#2  0x0844a8ea in Error::debugBreak () at Error.cpp:93
#3  0x0844a947 in Error::error (string=0x8747d58 "assertion failed at line %d in file %s\n") at Error.cpp:70
#4  0x0844a9d7 in Error::assertionFailed (fileName=0x874e215 "SerialLog.cpp", line=361) at Error.cpp:77
#5  0x084925bb in SerialLog::recover (this=0x402faa1c) at SerialLog.cpp:361
#6  0x0843dac9 in Database::openDatabase (this=0x4155d028, filename=0x4155a300 "/data/mysql-sap/d1.fts")
    at Database.cpp:597
#7  0x08432331 in Connection::getDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    dbFileName=0x4155a300 "/data/mysql-sap/d1.fts", threads=0x402f97f0) at Connection.cpp:1643
#8  0x0843255d in Connection::openDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    filename=0x402f97d0 "./d1.fts", account=0x874046b "mysql", password=0x874046b "mysql", address=0x0,
    parent=0x402f97f0) at Connection.cpp:923
#9  0x084079ad in StorageDatabase::getOpenConnection (this=0x402f95b4) at StorageDatabase.cpp:190
#10 0x08404901 in StorageConnection::connect (this=0x402f94fc) at StorageConnection.cpp:111
#11 0x08408f86 in StorageHandler::getStorageConnection (this=0x402f9028, path=0x891cf70 "./d1", mySqlThread=0x0,
    createFlag=OpenDatabase) at StorageHandler.cpp:123
#12 0x08401bf1 in NfsStorageTable::open (this=0x8964140, name=0x894b2f8 "./d1/te", mode=2, test_if_locked=2)
    at ha_falcon.cpp:266
#13 0x0834eb4d in handler::ha_open (this=0x8964140, table_arg=0x89636a0, name=0x894b2f8 "./d1/te", mode=2,
    test_if_locked=2) at handler.cc:1523
#14 0x082ad5ab in open_table_from_share (thd=0x8949a50, share=0x894b0a0, alias=0x8971240 "te", db_stat=39,
    prgflag=44, ha_open_flags=0, outparam=0x89636a0, is_create_table=false) at table.cc:1553
#15 0x082a7721 in open_unireg_entry (thd=0x8949a50, entry=0x89636a0, table_list=0x8971248, alias=0x8971240 "te",
    cache_key=0x4155afae "d1", cache_key_length=6, mem_root=0x4155b0c0, flags=0) at sql_base.cc:2667
#16 0x082a93c0 in open_table (thd=0x8949a50, table_list=0x8971248, mem_root=0x4155b0c0, refresh=0x4155b0f7, flags=0)
    at sql_base.cc:2052
#17 0x082a9bdc in open_tables (thd=0x8949a50, start=0x4155b154, counter=0x4155b144, flags=0) at sql_base.cc:2914
#18 0x082aa018 in open_and_lock_tables (thd=0x8949a50, tables=0x8971248) at sql_base.cc:3205
#19 0x0826ff8e in execute_sqlcom_select (thd=0x8949a50, all_tables=0x8971248) at sql_parse.cc:5186
#20 0x08275b2c in mysql_execute_command (thd=0x8949a50) at sql_parse.cc:2635
#21 0x0827d6b4 in mysql_parse (thd=0x8949a50, inBuf=0x89710a0 "select count(*) from te", length=23)
    at sql_parse.cc:6036
#22 0x0827e05e in dispatch_command (command=COM_QUERY, thd=0x8949a50, packet=0x8969041 "select count(*) from te",
    packet_length=24) at sql_parse.cc:1835
#23 0x0827f38d in do_command (thd=0x8949a50) at sql_parse.cc:1619
#24 0x0827f83d in handle_one_connection (arg=0x8949a50) at sql_parse.cc:1234
#25 0x40284297 in start_thread () from /lib/tls/libpthread.so.0
#26 0x4021937e in clone () from /lib/tls/libc.so.6
#27 0x4155cbb0 in ?? ()

Regards, Hakan
[30 Nov 2006 17:53] Jim Starkey
The most recently reported problem is fixed, however, other than being a recovery problem, it had nothing to do with the original.  The original problem was a generic problem with index recovery.  The latest incarnation, however, was a boundary problem where recovery succeeded, but exactly filled the serial log window (I suspect that something truncated the log file).
[30 Nov 2006 21:04] Hakan Küçükyılmaz
Still hitting assertion. Using Linux 32-bit change set 1.2381, 2006-11-30.

[21:58] root@d1>select count(*) from te//
ERROR 2006 (HY000): MySQL server has gone away
No connection. Trying to reconnect...
Connection id:    1
Current database: d1

ERROR 2013 (HY000): Lost connection to MySQL server during query

Backtrace is:
Program received signal SIGILL, Illegal instruction.
[Switching to Thread 1096141744 (LWP 31171)]
0xffffe410 in __kernel_vsyscall ()
(gdb) bt
#0  0xffffe410 in __kernel_vsyscall ()
#1  0x40182541 in raise () from /lib/tls/libc.so.6
#2  0x0844a91a in Error::debugBreak () at Error.cpp:93
#3  0x0844a977 in Error::error (string=0x8749974 "read error on page %d of \"%s\": %s (%d)") at Error.cpp:70
#4  0x0845e6f1 in IO::readPage (this=0x402f9af8, bdb=0x41590fe4) at IO.cpp:169
#5  0x084bedbf in Cache::fetchPage (this=0x402f9e14, dbb=0x402f9af4, pageNumber=105, pageType=PAGE_btree,
    lockType=Exclusive) at Cache.cpp:207
#6  0x0844083e in Dbb::fetchPage (this=0x402f9af4, pageNumber=105, pageType=PAGE_btree, lockType=Exclusive)
    at Dbb.cpp:195
#7  0x0845a16a in IndexRootPage::redoIndexPage (dbb=0x402f9af4, pageNumber=101, parentPage=105, level=0,
    priorPage=0, nextPage=98, length=2042, data=0x428da59f "") at IndexRootPage.cpp:741
#8  0x084a73f0 in SRLIndexPage::pass2 (this=0x41558888) at SRLIndexPage.cpp:70
#9  0x08492299 in SerialLog::recover (this=0x402faa1c) at SerialLog.cpp:304
#10 0x0843daf9 in Database::openDatabase (this=0x4155d028, filename=0x41558b70 "/data/mysql-sap/d1.fts")
    at Database.cpp:597
#11 0x08432361 in Connection::getDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    dbFileName=0x41558b70 "/data/mysql-sap/d1.fts", threads=0x402f97f0) at Connection.cpp:1643
#12 0x0843258d in Connection::openDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    filename=0x402f97d0 "./d1.fts", account=0x874050b "mysql", password=0x874050b "mysql", address=0x0,
    parent=0x402f97f0) at Connection.cpp:923
#13 0x084079dd in StorageDatabase::getOpenConnection (this=0x402f95b4) at StorageDatabase.cpp:190
#14 0x08404931 in StorageConnection::connect (this=0x402f94fc) at StorageConnection.cpp:111
#15 0x08408fb6 in StorageHandler::getStorageConnection (this=0x402f9028, path=0x891cf70 "./d1", mySqlThread=0x0,
    createFlag=OpenDatabase) at StorageHandler.cpp:123
#16 0x08401c21 in NfsStorageTable::open (this=0x896e6d8, name=0x896e160 "./d1/te", mode=2, test_if_locked=2)
    at ha_falcon.cpp:266
#17 0x0834eb7d in handler::ha_open (this=0x896e6d8, table_arg=0x896d850, name=0x896e160 "./d1/te", mode=2,
    test_if_locked=2) at handler.cc:1523
#18 0x082ad5db in open_table_from_share (thd=0x8949a50, share=0x896df08, alias=0x897a810 "te", db_stat=39,
    prgflag=44, ha_open_flags=0, outparam=0x896d850, is_create_table=false) at table.cc:1553
#19 0x082a7751 in open_unireg_entry (thd=0x8949a50, entry=0x896d850, table_list=0x897a818, alias=0x897a810 "te",
    cache_key=0x4155981e "d1", cache_key_length=6, mem_root=0x4155b410, flags=0) at sql_base.cc:2667
#20 0x082a93f0 in open_table (thd=0x8949a50, table_list=0x897a818, mem_root=0x4155b410, refresh=0x41559933, flags=0)
    at sql_base.cc:2052
#21 0x082a987c in open_ltable (thd=0x8949a50, table_list=0x897a818, lock_type=TL_WRITE_ALLOW_READ)
    at sql_base.cc:3108
#22 0x0836c0e6 in mysql_alter_table (thd=0x8949a50, new_db=0x897a9c0 "d1", new_name=0x897a7f0 "te",
    lex_create_info=0x4155aa78, table_list=0x897a818, fields=@0x4155ab38, keys=@0x898144c, order_num=0, order=0x0,
    ignore=false, alter_info=0x4155aaf4, do_send_ok=true) at sql_table.cc:5340
#23 0x08275023 in mysql_create_index (thd=0x8949a50, table_list=0x897a818, keys=@0x898144c) at sql_parse.cc:7239
#24 0x08276bcf in mysql_execute_command (thd=0x8949a50) at sql_parse.cc:3044
#25 0x083d1b65 in sp_instr_stmt::exec_core (this=0x897aa10, thd=0x8949a50, nextp=0x4155b484) at sp_head.cc:2556
#26 0x083d191f in sp_lex_keeper::reset_lex_and_exec_core (this=0x897aa38, thd=0x8949a50, nextp=0x4155b484,
    open_tables=false, instr=0x897aa10) at sp_head.cc:2425
#27 0x083d56ad in sp_instr_stmt::execute (this=0x897aa10, thd=0x8949a50, nextp=0x4155b484) at sp_head.cc:2507
#28 0x083cf4cf in sp_head::execute (this=0x8979f88, thd=0x8949a50) at sp_head.cc:1095
#29 0x083d3e1b in sp_head::execute_procedure (this=0x8979f88, thd=0x8949a50, args=0x8949fa4) at sp_head.cc:1726
#30 0x0827bb2d in mysql_execute_command (thd=0x8949a50) at sql_parse.cc:4623
#31 0x0827d6e4 in mysql_parse (thd=0x8949a50, inBuf=0x89710a0 "call pe()", length=9) at sql_parse.cc:6036
#32 0x0827e08e in dispatch_command (command=COM_QUERY, thd=0x8949a50, packet=0x8969041 "call pe()",
    packet_length=10) at sql_parse.cc:1835
#33 0x0827f3bd in do_command (thd=0x8949a50) at sql_parse.cc:1619
#34 0x0827f86d in handle_one_connection (arg=0x8949a50) at sql_parse.cc:1234
#35 0x40284297 in start_thread () from /lib/tls/libpthread.so.0
#36 0x4021937e in clone () from /lib/tls/libc.so.6
#37 0x4155cbb0 in ?? ()

Frame 4 is the intersting frame:
(gdb) f 4
#4  0x0845e6f1 in IO::readPage (this=0x402f9af8, bdb=0x41590fe4) at IO.cpp:169
169                                     bdb->pageNumber, (const char*) fileName, strerror (errno), errno);
(gdb) l
164
165             if (length != pageSize)
166                     {
167                     declareFatalError();
168                     FATAL ("read error on page %d of \"%s\": %s (%d)",
169                                     bdb->pageNumber, (const char*) fileName, strerror (errno), errno);
170                     }
171
172             ++reads;
173     }
(gdb) p length
$1 = 0
(gdb) p pageSize
$2 = 4096

Regards, Hakan
[1 Dec 2006 22:06] Hakan Küçükyılmaz
Tried on Linux 32-bit with latest change set 1.2384, 2006-12-01.

I hit following assertion:

Program received signal SIGILL, Illegal instruction.
[Switching to Thread 1096141744 (LWP 13760)]
0xffffe410 in __kernel_vsyscall ()
(gdb) bt
#0  0xffffe410 in __kernel_vsyscall ()
#1  0x40182541 in raise () from /lib/tls/libc.so.6
#2  0x0844ac6a in Error::debugBreak () at Error.cpp:93
#3  0x0844acc7 in Error::error (string=0x874a0d4 "read error on page %d of \"%s\": %s (%d)") at Error.cpp:70
#4  0x0845ea55 in IO::readPage (this=0x402f9af8, bdb=0x41590fe4) at IO.cpp:169
#5  0x084bf3c3 in Cache::fetchPage (this=0x402f9e14, dbb=0x402f9af4, pageNumber=105, pageType=PAGE_btree,
    lockType=Exclusive) at Cache.cpp:207
#6  0x08440a62 in Dbb::fetchPage (this=0x402f9af4, pageNumber=105, pageType=PAGE_btree, lockType=Exclusive)
    at Dbb.cpp:195
#7  0x0845a4e6 in IndexRootPage::redoIndexPage (dbb=0x402f9af4, pageNumber=101, parentPage=105, level=0,
    priorPage=0, nextPage=98, length=2042, data=0x428da8cf "") at IndexRootPage.cpp:741
#8  0x084a76e8 in SRLIndexPage::pass2 (this=0x41559ffc) at SRLIndexPage.cpp:70
#9  0x084925a3 in SerialLog::recover (this=0x402faa1c) at SerialLog.cpp:304
#10 0x0843dd09 in Database::openDatabase (this=0x4155d028, filename=0x4155a300 "/data/mysql-sap/d1.fts")
    at Database.cpp:599
#11 0x08432571 in Connection::getDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    dbFileName=0x4155a300 "/data/mysql-sap/d1.fts", threads=0x402f97f0) at Connection.cpp:1643
#12 0x0843279d in Connection::openDatabase (this=0x402f99a4, dbName=0x402f97b4 "d1",
    filename=0x402f97d0 "./d1.fts", account=0x8740c6b "mysql", password=0x8740c6b "mysql", address=0x0,
    parent=0x402f97f0) at Connection.cpp:923
#13 0x08407bed in StorageDatabase::getOpenConnection (this=0x402f95b4) at StorageDatabase.cpp:190
#14 0x08404b41 in StorageConnection::connect (this=0x402f94fc) at StorageConnection.cpp:111
#15 0x084091c6 in StorageHandler::getStorageConnection (this=0x402f9028, path=0x891df70 "./d1", mySqlThread=0x0,
    createFlag=OpenDatabase) at StorageHandler.cpp:123
#16 0x08401e31 in NfsStorageTable::open (this=0x8965140, name=0x894c2f8 "./d1/te", mode=2, test_if_locked=2)
    at ha_falcon.cpp:266
#17 0x0834ed8d in handler::ha_open (this=0x8965140, table_arg=0x89646a0, name=0x894c2f8 "./d1/te", mode=2,
    test_if_locked=2) at handler.cc:1523
#18 0x082ad7eb in open_table_from_share (thd=0x894aa50, share=0x894c0a0, alias=0x8972240 "te", db_stat=39,
    prgflag=44, ha_open_flags=0, outparam=0x89646a0, is_create_table=false) at table.cc:1553
#19 0x082a7961 in open_unireg_entry (thd=0x894aa50, entry=0x89646a0, table_list=0x8972248, alias=0x8972240 "te",
    cache_key=0x4155afae "d1", cache_key_length=6, mem_root=0x4155b0c0, flags=0) at sql_base.cc:2667
#20 0x082a9600 in open_table (thd=0x894aa50, table_list=0x8972248, mem_root=0x4155b0c0, refresh=0x4155b0f7, flags=0)
    at sql_base.cc:2052
#21 0x082a9e1c in open_tables (thd=0x894aa50, start=0x4155b154, counter=0x4155b144, flags=0) at sql_base.cc:2914
#22 0x082aa258 in open_and_lock_tables (thd=0x894aa50, tables=0x8972248) at sql_base.cc:3205
#23 0x082701ce in execute_sqlcom_select (thd=0x894aa50, all_tables=0x8972248) at sql_parse.cc:5186
#24 0x08275d6c in mysql_execute_command (thd=0x894aa50) at sql_parse.cc:2635
#25 0x0827d8f4 in mysql_parse (thd=0x894aa50, inBuf=0x89720a0 "select count(*) from te", length=23)
    at sql_parse.cc:6036
#26 0x0827e29e in dispatch_command (command=COM_QUERY, thd=0x894aa50, packet=0x896a041 "select count(*) from te",
    packet_length=24) at sql_parse.cc:1835
#27 0x0827f5cd in do_command (thd=0x894aa50) at sql_parse.cc:1619
#28 0x0827fa7d in handle_one_connection (arg=0x894aa50) at sql_parse.cc:1234
#29 0x40284297 in start_thread () from /lib/tls/libpthread.so.0
#30 0x4021937e in clone () from /lib/tls/libc.so.6
#31 0x4155cbb0 in ?? ()

Regards, Hakan
[6 Dec 2006 22:38] Peter Gulutzan
Trying it again today, with
changeset ChangeSet@1.2390.1.1 2006-12-06 14:51:27-06:00,
I still saw a crash two times out of three.
On the third time, 'select count(*) from te' was okay,
but then 'drop table te' crashed.
[7 Dec 2006 11:12] Hakan Küçükyılmaz
Jim Starkey pushed a fix for this recovery issue. He also stated that there are one or two open issues, which he has to fix before this test can be tested again.

Regards, Hakan
[13 Dec 2006 21:00] Peter Gulutzan
The new test case is:

/*
   You'll need six consoles:
   1 for mysqld
   4 for mysql client connections (T1,T2,T3,T4)
   1 for 'kill -9' statement.
*/

On T1, do:

delimiter //
use test//
delete from mysql.proc//
delete from mysql.event//
drop database d54//
set @@autocommit=0//
create database d54//
use d54//
drop table tz1//
drop table tz2//
drop table tz3//
drop procedure pz//
drop function fz//

create function fz () returns varchar(1000) begin   declare v int default 0; declare vc varchar(1000); declare hexchar char(2); declare rand1 int; set vc='';  set rand1=rand()*100; if rand1 mod 2 = 1 then set rand1 = rand1 - 1; end if; while v < rand()*100 do  set hexchar=hex(rand()*255); while length(hexchar)<2 do set hexchar=concat('0',hexchar); end while;   set vc = concat(vc,hexchar);     set v = v + 1;     end while; return vc;  end//

create table tz1
(s1 blob,
s2 varchar(1000),
s3 int,
s4 varchar(1000) character set ucs2,
s5 varbinary(1000),
s6 tinyint)
engine=falcon
partition by list(s6)
(partition p0 values in (0),
partition p1 values in (1),
partition p2 values in (2),
partition p3 values in (3),
partition p4 values in (4),
partition p5 values in (5))//
create table tz2 like tz1//

create trigger tz1_bi before insert on tz1 for each row insert into tz2 values (new.s1,new.s2,new.s3,new.s4,new.s5,new.s6)//
create trigger tz1_bu before update on tz1 for each row update tz2 set s1=fz(), s2=fz() where s3 = new.s3//
create trigger tz1_bd before delete on tz1 for each row delete from tz2 where s3 = old.s3//
create unique index iz1 on tz1 (s3,s6)//
create index iz2 on tz2 (s4(300),s3,s2(300),s5(100))//
create procedure pz ()
begin
declare v int default 0;
declare rand1 int;
declare rand2 int;
declare fz1 varchar(1000);
declare fz2 varchar(1000);
declare fz4 varchar(1000);
declare fz5 varchar(1000);
set @@autocommit=0;
while v < 100000 do
set rand1 = (connection_id()*10000) + (rand()*9999);
set rand2 = rand()*21;
select v,rand1,rand2;
set fz1=repeat(fz(),100); set fz2=fz(); set fz4=fz(); set fz5=fz();
set @v=0;
if rand2 = 3 then
if (select count(*) from tz1 where s3 = rand1) <> 0 then
delete from tz1 where s3 = rand1;
end if;
insert into tz1 values (fz1,fz2,rand1,fz4,fz5,rand1 mod 5);
select s3 into @v from tz1 where s3 = rand1 and s1=fz1 and s2=fz2 and s4=fz4 and s5=fz5;
if @v <> rand1 then select 'insert. @v <> rand1',@v,rand1; set v = 100000; end if;
end if;
if rand2 = 5 then
update tz1 set s1 = fz4 where s3 >= connection_id()*10000 and s3 < (connection_id()+1)*10000;
end if;
/* if rand2 = 7 then rollback; end if; */
/* if rand2 = 11 then commit; end if; */
if rand2 = 13 then
if (select count(*) from tz1 where s3 = rand1) = 0 then
insert into tz1 values (fz1,fz1,rand1,fz4,fz5,rand2 mod 5);
end if;
update tz1 set s1=null, s2=null, s4=null, s5=null where s3 = rand1;
select s3 into @v from tz1 where s3 = rand1 and s1 is null and s2 is null and s4 is null and s5 is null;
if @v <> rand1 then select 'update. @v <> rand1',@v,rand1; set v = 100000; end if;
end if;
if rand2 = 17 then delete from tz1 where s3 = rand1 and s1 is null; end if;
if rand2 = 19 then
begin
declare continue handler for sqlexception begin end;
create table tz3 as select * from tz2;
drop table tz3;
end;
end if;
set v = v + 1;
end while;
end//
delimiter ;

On T1 and T2 and T3 and T4, do:
call pz();

All four connections will display, on each iteration:
'select v,rand1,rand2;'. For this purpose, only 'v'
is important.

Wait till v > 500 on all four connections.

Use 'kill -9' to kill the server, as described before.

Restart mysqld.

On T1, say:
select count(s2) from tz1;
select count(s2) from tz2;
create table tz3 as select * from tz2;
drop table tz3;
call pz();

For me, there's always a crash before the 'call pz();'.
[18 Dec 2006 21:47] Peter Gulutzan
The following is already being handled, via email.
I add it here as a comment in case it's worth trying again someday.

Here's another test case. I consider it "new" because:
- The only statements inside the loop are INSERT or
  UPDATE or COMMIT, there is no DDL, there is no ROLLBACK
- Instead of the usual symptom (i.e. crash) I see:
  database open failed: corrupted serial log
  database open failed: corrupted serial log

How to repeat:
I have only tried this once, it's a rather tedious test.
As usual, I have a bunch of connections on my 64-bit
SUSE 10 machine: 1 for mysqld, 1 for 'kill', 7 for clients.

On T1, say:
use d56
delimiter //
set @@autocommit=0//
create table t (s1 int, s2 varchar(100) character set ucs2) engine=falcon//
create unique index i on t (s1,s2)//
create procedure p ()
begin
  declare v int default 0;
  declare v2 int;
  while v < 1000000 do
    if v mod 1000 = 0 then select v; commit; end if;
    set v2 = v * 10 + connection_id();
    start transaction;
    insert into t values (v2,repeat(0x00,rand()*100));
    update t set s2 = repeat(0x01,rand()*100) where s1 = v2 - 10;
    set v = v + 1;
    end while;
  end//
call p()//

(Hmm, that "start transaction;" statement is my error, I
had intended to match the start-transaction and end-transaction
statements. Oh, well.)

On T2+T3+T4+T5+T6+T7, say:
use d56
call p();

Wait till all connections display at least '1000' (well, in
fact  I see that T7 still says '0', I didn't give it much time).
Kill mysqld.
Restart mysqld.

On T1, say:
select count(*) from t//

Now on T1 the message appears:
ERROR 1030 (HY000): Got error 157 from storage engine
And on mysqld the two-line message appears:
database open failed: corrupted serial log
database open failed: corrupted serial log

Incidentally, because of bug#24970
"Falcon: error 1020 not handled", I have to use
"set v2 = v * 10 + connection_id();" so that every
connection is inserting different unique-key values.
This avoids conflict, which weakens the test.
[22 Dec 2006 22:45] Patrick Galbraith
works for me now

ChangeSet@1.2376, 2006-12-22 16:32:04-05:00, jas@rowvwade. +1 -0

mysql> CREATE PROCEDURE p()
    -> begin
    ->   declare v int default 1;
    ->   while v < 500000 do
    ->     insert into t values ('a',repeat('b',1000),v);
    ->     set v = v + 1;
    ->  end while;
    -> end//
Query OK, 0 rows affected (0.02 sec)

mysql> call p()//
Query OK, 1 row affected (2 min 14.10 sec)

mysql> update t set s1 = concat(s1,'x')//
ERROR 2013 (HY000): Lost connection to MySQL server during query
mysql> \q
Bye
[patg@buffy benchmarks]$ screen -x patg1
[detached]
[patg@buffy benchmarks]$ screen -x patg2
[detached]
[patg@buffy benchmarks]$ mysql -u root test
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 1 to server version: 5.1.14-falcon-alpha-debug-log

Type 'help;' or '\h' for help. Type '\c' to clear the buffer.

mysql> update t set s1 = concat(s1,'x')//
    -> ;
ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '/' at line 1
mysql> update t set s1 = concat(s1,'x');
Query OK, 499999 rows affected (24.04 sec)
Rows matched: 499999  Changed: 499999  Warnings: 0
[26 Dec 2006 20:39] Jim Starkey
No longer reproducable.  Hurray!
[28 Dec 2006 21:54] Hakan Küçükyılmaz
Peter Gulutzan's test from 13th December runs fine with 8 connections on my laptop. However, on a multiple CPU machine the test hangs after ~3 min.

Regards, Hakan
[29 Jan 2007 0:00] Bugs System
No feedback was provided for this bug for over a month, so it is
being suspended automatically. If you are able to provide the
information that was originally requested, please do so and change
the status of the bug back to "Open".