Bug #20760 mysqld causes the core dump during ndb_restore operation
Submitted: 28 Jun 2006 20:48 Modified: 11 Jul 2006 17:10
Reporter: Nikolay Grishakin Email Updates:
Status: Can't repeat Impact on me:
None 
Category:MySQL Cluster: Cluster (NDB) storage engine Severity:S1 (Critical)
Version:5.1 OS:Linux (Linux)
Assigned to: Nikolay Grishakin CPU Architecture:Any

[28 Jun 2006 20:48] Nikolay Grishakin
Description:
Core produced running ndb_util test. I see the pattern with #20742. If you do a lot operations like stop, restart, start cluster nodes and after that trying to do something with database core is being generated.

Errors are (from /home/ndbdev/ngrishakin/mysql-5.1/mysql-test/var/log/mysqltest-time) :
Restore: Failed to restore table: cluster/def/NDB$BLOB_2_3 ... Exiting
sh: line 1: 29480 Aborted                 (core dumped) /home/ndbdev/ngrishakin/mysql-5.1/storage/nd
b/tools/ndb_restore --no-defaults --ndb-connectstring="localhost:9310" -p 8 -b 1 -n 1 -m -r --print
--print_meta /home/ndbdev/ngrishakin/mysql-5.1/mysql-test/var/ndbcluster-9310/BACKUP/BACKUP-1 >>/hom
e/ndbdev/ngrishakin/mysql-5.1/mysql-test/var/log/ndb_testrun.log
mysqltest: In included file "./include/ndb_restore_master.inc": At line 6: command "$NDB_TOOLS_DIR/n
db_restore --no-defaults --ndb-connectstring="localhost:$NDBCLUSTER_PORT" -p 8 -b $the_backup_id -n
1 -m -r --print --print_meta $NDB_BACKUP_DIR/BACKUP/BACKUP-$the_backup_id >> $NDB_TOOLS_OUTPUT" fail
ed

Back Trace:

[ndbdev@ndb15 mysql-test]$ gdb ../sql/mysqld ./var/master-data/core.29321
GNU gdb Red Hat Linux (6.3.0.0-1.84rh)
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu"...Using host libthread_db library "/lib64/li
read_db.so.1".

Core was generated by `/home/ndbdev/ngrishakin/mysql-5.1/sql/mysqld --no-defaults --console --bas
r='.
Program terminated with signal 6, Aborted.
Reading symbols from /usr/lib64/libz.so.1...done.
Loaded symbols for /usr/lib64/libz.so.1
Reading symbols from /lib64/libdl.so.2...done.
Loaded symbols for /lib64/libdl.so.2
Reading symbols from /lib64/libpthread.so.0...done.
Loaded symbols for /lib64/libpthread.so.0
Reading symbols from /lib64/libcrypt.so.1...done.
Loaded symbols for /lib64/libcrypt.so.1
Reading symbols from /lib64/libnsl.so.1...done.
Loaded symbols for /lib64/libnsl.so.1
Reading symbols from /lib64/libm.so.6...done.
Loaded symbols for /lib64/libm.so.6
Reading symbols from /lib64/libc.so.6...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/ld-linux-x86-64.so.2...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
Reading symbols from /lib64/libnss_files.so.2...done.
Loaded symbols for /lib64/libnss_files.so.2
#0  0x00000033b8c096a7 in pthread_kill () from /lib64/libpthread.so.0
(gdb) bt
#0  0x00000033b8c096a7 in pthread_kill () from /lib64/libpthread.so.0
#1  0x0000000000819930 in write_core (sig=6) at stacktrace.c:220
#2  0x00000000006444a4 in handle_segfault (sig=6) at mysqld.cc:2175
#3  <signal handler called>
#4  0x00000033b832f280 in raise () from /lib64/libc.so.6
#5  0x00000033b8330750 in abort () from /lib64/libc.so.6
#6  0x00000033b83282e6 in __assert_fail () from /lib64/libc.so.6
#7  0x0000000000a90868 in NdbEventBuffer::deleteUsedEventOperations (this=0x1d98270)
    at NdbEventOperationImpl.cpp:1258
#8  0x0000000000a90c66 in NdbEventBuffer::nextEvent (this=0x1d98270)
    at NdbEventOperationImpl.cpp:1227
#9  0x0000000000a66f3f in Ndb::nextEvent (this=0x1d957c0) at Ndb.cpp:1479
#10 0x00000000007b6c45 in ndb_binlog_thread_func (arg=0x0) at ha_ndbcluster_binlog.cc:3571
#11 0x00000033b8c0697c in start_thread () from /lib64/libpthread.so.0
#12 0x00000033b83c992e in clone () from /lib64/libc.so.6
#13 0x0000000000000000 in ?? ()
(gdb)

How to repeat:
Test case to reproduce:

-- source include/have_ndb.inc

--disable_warnings
DROP DATABASE IF EXISTS util;
DROP TABLE IF EXISTS util.t1;
--enable_warnings

CREATE DATABASE util;

CREATE LOGFILE GROUP lg
ADD UNDOFILE 'undofile.dat'
INITIAL_SIZE 16M
UNDO_BUFFER_SIZE = 1M
ENGINE=NDB;

CREATE TABLESPACE ts
ADD DATAFILE 'datafile.dat'
USE LOGFILE GROUP lg
INITIAL_SIZE 12M
ENGINE NDB;

CREATE TABLE util.t1
(a1 INT NOT NULL PRIMARY KEY, a2 CHAR(50)) TABLESPACE ts STORAGE DISK ENGINE=NDB;

let $j= 500;
--disable_query_log
while ($j)
{
  eval INSERT INTO util.t1 VALUES ($j, "aaaaaaaaaaaa$j");
  dec $j;
}
--enable_query_log
SELECT COUNT(*) FROM util.t1;

--echo ******** Restart each node *****************

--echo **** Restart node 2 and check status *****
--echo ndb_mgm <id=2> restart
--exec $NDB_MGM localhost:$NDBCLUSTER_PORT --execute="2 restart"
--sleep 20

--echo ndb_mgm ALL STATUS
--exec $NDB_MGM localhost:$NDBCLUSTER_PORT --execute="ALL STATUS"

--echo **** Restart node 1 and check status *****
--echo ndb_mgm <id=1> restart
--exec $NDB_MGM localhost:$NDBCLUSTER_PORT --execute="1 restart"
--sleep 20

--echo ndb_mgm ALL STATUS
--exec $NDB_MGM localhost:$NDBCLUSTER_PORT --execute="ALL STATUS"

############### Backup and restore ##################################

-- source include/ndb_backup.inc

DROP TABLE util.t1;

ALTER TABLESPACE ts
DROP DATAFILE 'datafile.dat'
ENGINE = NDB;

DROP TABLESPACE ts
ENGINE = NDB;

# Bug 20742
#DROP LOGFILE GROUP lg
#ENGINE = NDB;

-- source include/ndb_restore_master.inc #core generated !!!!!!!!!!!!!!

SELECT COUNT(*) FROM util.t1;

############# CleanUp ##################################################

DROP TABLE util.t1;
DROP DATABASE util;

ALTER TABLESPACE ts
DROP DATAFILE 'datafile.dat'
ENGINE = NDB;

DROP TABLESPACE ts
ENGINE = NDB;

# Bug 20742
#DROP LOGFILE GROUP lg
#ENGINE = NDB;

--echo ndb_mgm SHUTDOWN
--exec $NDB_MGM localhost:$NDBCLUSTER_PORT --execute=shutdown
[28 Jun 2006 20:55] Nikolay Grishakin
Core dump, log files and test case "ndb_util_ts.test" are copied to:
ndbdev@ndbmaster.mysql.com:/bugs/bug20760
[29 Jun 2006 12:23] Jonas Oreland
My guess would be that this would crash a mysqld
1) create a table with blobs and populate it with some rows (with blob larger than 256 bytes)
2) take a backup
3) restore backup (data only)

here i would assume that mysqld would core as it should get puzzled about the strange event that is comming in.

can you test this minimal testcase ?

/Jonas
[29 Jun 2006 21:22] Nikolay Grishakin
Jonas, I did the following test:
-- source include/have_ndb.inc
--disable_warnings
DROP DATABASE IF EXISTS util;
DROP TABLE IF EXISTS util.t1;
--enable_warnings
CREATE DATABASE util;

CREATE LOGFILE GROUP lg
ADD UNDOFILE 'undofile.dat'
INITIAL_SIZE 16M
UNDO_BUFFER_SIZE = 1M
ENGINE=NDB;

CREATE TABLESPACE ts
ADD DATAFILE 'datafile.dat'
USE LOGFILE GROUP lg
INITIAL_SIZE 12M
ENGINE NDB;

CREATE TABLE util.t1
(a1 INT NOT NULL PRIMARY KEY, a2 BLOB) TABLESPACE ts STORAGE DISK ENGINE=NDB;

set @blb = repeat('a', 300);
let $j= 500;
--disable_query_log
while ($j)
{
  eval INSERT INTO util.t1 VALUES ($j, @blb);
  dec $j;
}
--enable_query_log
SELECT COUNT(*) FROM util.t1;
-- source include/ndb_backup.inc
DROP TABLE util.t1;
-- source include/ndb_restore_master.inc

SELECT COUNT(*) FROM util.t1;

--exec $NDB_MGM localhost:$NDBCLUSTER_PORT --execute=shutdown

and got the following error:
Errors are (from /home/ndbdev/ngrishakin/mysql-5.1/mysql-test/var/log/mysqltest-time) :
Restore: Failed to restore table: cluster/def/NDB$BLOB_2_3 ... Exiting
sh: line 1: 17782 Aborted                 (core dumped) /home/ndbdev/ngrishakin/mysql-5.1/storage/nd
b/tools/ndb_restore --no-defaults --ndb-connectstring="localhost:9310" -p 8 -b 1 -n 1 -m -r --print
--print_meta /home/ndbdev/ngrishakin/mysql-5.1/mysql-test/var/ndbcluster-9310/BACKUP/BACKUP-1 >>/hom
e/ndbdev/ngrishakin/mysql-5.1/mysql-test/var/log/ndb_testrun.log
mysqltest: In included file "./include/ndb_restore_master.inc": At line 6: command "$NDB_TOOLS_DIR/n
db_restore --no-defaults --ndb-connectstring="localhost:$NDBCLUSTER_PORT" -p 8 -b $the_backup_id -n
1 -m -r --print --print_meta $NDB_BACKUP_DIR/BACKUP/BACKUP-$the_backup_id >> $NDB_TOOLS_OUTPUT" fail

but I cannot find a core file. It's not in ./mysql-test/var/master-data or
master1-data.
[29 Jun 2006 21:51] Nikolay Grishakin
Jonas, found the core. It was under ./mysql_test/ and coused by ndb_restore.
Here is a back trace. Seems like this is another bug.

[ndbdev@ndb15 mysql-test]$ gdb ../storage/ndb/tools/ndb_restore core.18539
GNU gdb Red Hat Linux (6.3.0.0-1.84rh)
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu"...Using host libthread_db library "/lib64/libt
read_db.so.1".

Core was generated by `/home/ndbdev/ngrishakin/mysql-5.1/storage/ndb/tools/ndb_restore --no-default
 -'.
Program terminated with signal 6, Aborted.
Reading symbols from /lib64/libpthread.so.0...done.
Loaded symbols for /lib64/libpthread.so.0
Reading symbols from /usr/lib64/libz.so.1...done.
Loaded symbols for /usr/lib64/libz.so.1
Reading symbols from /lib64/libcrypt.so.1...done.
Loaded symbols for /lib64/libcrypt.so.1
Reading symbols from /lib64/libnsl.so.1...done.
Loaded symbols for /lib64/libnsl.so.1
Reading symbols from /lib64/libm.so.6...done.
Loaded symbols for /lib64/libm.so.6
Reading symbols from /lib64/libc.so.6...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/ld-linux-x86-64.so.2...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
Reading symbols from /lib64/libnss_files.so.2...done.
Loaded symbols for /lib64/libnss_files.so.2
#0  0x00000033b832f280 in raise () from /lib64/libc.so.6
(gdb) bt
#0  0x00000033b832f280 in raise () from /lib64/libc.so.6
#1  0x00000033b8330750 in abort () from /lib64/libc.so.6
#2  0x0000000000454135 in exitHandler (code=1) at restore/restore_main.cpp:439
#3  0x00000000004546dd in main (argc=1, argv=0x79d6c0) at restore/restore_main.cpp:551
[29 Jun 2006 22:34] Nikolay Grishakin
mysqld causes the core dump during ndb_restore operation
[11 Jul 2006 17:10] Nikolay Grishakin
Don't see the core from mysqld anymore with latest mysql 5.1. 
core caused bu ndb_restore only. Changing status to "Cannot repeat".