Description:
Hi,
I recieved a mysqld core today while running TPC-B replication testing. This test have been running for a long, long time w/o issues.
Program terminated with signal 11, Segmentation fault.
#0 0x0000003907e0b142 in pthread_kill () from /lib64/libpthread.so.0
(gdb) bt
#0 0x0000003907e0b142 in pthread_kill () from /lib64/libpthread.so.0
#1 0x00000000006298d1 in handle_segfault (sig=11) at mysqld.cc:2497
#2 <signal handler called>
#3 0x000000000091d85c in NdbScanOperation::receiver_delivered (this=0x3ca3fa0, tRec=0x3ca4910)
at NdbScanOperation.cpp:1222
#4 0x000000000090f699 in Ndb::handleReceivedSignal (this=0x3ca3fa0, aSignal=0x3ca4910, ptr=0x0)
at Ndbif.cpp:364
#5 0x00000000009ae5f6 in execute (callbackObj=0x3b09940, header=0x3ca4910,
prio=<value optimized out>, theData=0x3b491bc, ptr=0x12) at TransporterFacade.cpp:305
#6 0x000000000097ecc9 in TransporterRegistry::unpack (this=0x3b2b1d0, readPtr=0x3b491ac,
sizeOfData=0, remoteNodeId=3, state=<value optimized out>) at Packer.cpp:115
#7 0x000000000097c966 in TransporterRegistry::get_tcp_data (this=0x3b2b1d0, t=0x3b48dc0)
at ../../../../../storage/ndb/include/util/Bitmask.hpp:198
#8 0x000000000097ca2b in TransporterRegistry::performReceive (this=0x3b2b1d0)
at TransporterRegistry.cpp:1023
#9 0x000000000090e68b in Ndb::waitCompletedTransactions (this=0x3ddb650, aMilliSecondsToWait=360000,
noOfEventsToWaitFor=<value optimized out>, poll_guard=<value optimized out>) at Ndbif.cpp:1285
#10 0x000000000090ee50 in Ndb::poll_trans (this=0x3ddb650, aMillisecondNumber=<value optimized out>,
minNoOfEventsToWakeup=<value optimized out>, pg=0x40e0a3a0) at Ndbif.cpp:1348
#11 0x000000000090ef05 in Ndb::sendPollNdb (this=<value optimized out>, aMillisecondNumber=360000,
minNoOfEventsToWakeup=<value optimized out>, forceSend=<value optimized out>) at Ndbif.cpp:1328
#12 0x00000000009145a2 in NdbTransaction::executeNoBlobs (this=0x3e1ebb0,
aTypeOfExec=<value optimized out>, abortOption=<value optimized out>, forceSend=1)
at NdbTransaction.cpp:585
#13 0x0000000000914af0 in NdbTransaction::execute (this=0x3e1ebb0,
aTypeOfExec=NdbTransaction::NoCommit, abortOption=<value optimized out>, forceSend=1)
at ../../../../storage/ndb/include/ndbapi/NdbOperation.hpp:1546
#14 0x00000000007bbb7f in ha_ndbcluster::pk_read (this=0x2aaaac185940, key=<value optimized out>,
key_len=<value optimized out>, buf=<value optimized out>, part_id=1) at ha_ndbcluster.cc:2181
#15 0x00000000007ca893 in ha_ndbcluster::read_range_first_to_buf (this=0x2aaaac185940,
start_key=0x40e0a5a0, end_key=0x0, desc=false, sorted=false, buf=0x2aaaac1918e0 "\uffff
at ha_ndbcluster.cc:4521
#16 0x00000000007ca973 in ha_ndbcluster::index_last (this=0x3ca3fa0, buf=0x3ca4910 "D3\"\0219")
at ha_ndbcluster.cc:4465
#17 0x000000000071e491 in stat_print (thd=0x3ca3fa0, type=0x3ca4910 "D3\"\0219", type_len=0,
file=0x3ca48f0 "\020I\uffff\003", file_len=4294967295, status=0x0, status_len=0) at handler.cc:4370
#18 0x0000000000000000 in ?? ()
(gdb) f 0
#0 0x0000003907e0b142 in pthread_kill () from /lib64/libpthread.so.0
(gdb) f 1
#1 0x00000000006298d1 in handle_segfault (sig=11) at mysqld.cc:2497
2497 fprintf(stderr, "\n\
(gdb) l
2492 the documentation for your distribution on how to do that.\n");
2493 #endif
2494
2495 if (locked_in_memory)
2496 {
2497 fprintf(stderr, "\n\
2498 The \"--memlock\" argument, which was enabled, uses system calls that are\n\
2499 unreliable and unstable on some operating systems and operating-system\n\
2500 versions (notably, some versions of Linux). This crash could be due to use\n\
2501 of those buggy OS calls. You should consider whether you really need the\n\
(gdb) f 2
#2 <signal handler called>
(gdb) l
2502 \"--memlock\" parameter and/or consult the OS distributer about \"mlockall\"\n\
2503 bugs.\n");
2504 }
2505
2506 #ifdef HAVE_WRITE_CORE
2507 if (test_flags & TEST_CORE_ON_SIGNAL)
2508 {
2509 fprintf(stderr, "Writing a core file\n");
2510 fflush(stderr);
2511 my_write_core(sig);
(gdb) f 3
#3 0x000000000091d85c in NdbScanOperation::receiver_delivered (this=0x3ca3fa0, tRec=0x3ca4910)
at NdbScanOperation.cpp:1222
1222 m_sent_receivers_count = last;
(gdb) l
1217 if(idx != last){
1218 NdbReceiver * move = m_sent_receivers[last];
1219 m_sent_receivers[idx] = move;
1220 move->m_list_index = idx;
1221 }
1222 m_sent_receivers_count = last;
1223
1224 last = m_conf_receivers_count;
1225 m_conf_receivers[last] = tRec;
1226 m_conf_receivers_count = last + 1;
(gdb) f 4
#4 0x000000000090f699 in Ndb::handleReceivedSignal (this=0x3ca3fa0, aSignal=0x3ca4910, ptr=0x0)
at Ndbif.cpp:364
364 switch (tSignalNumber){
(gdb) l
359
360 All signals received by the API requires the first data word to be such
361 an id to the receiving object.
362 */
363
364 switch (tSignalNumber){
365 case GSN_TCKEYCONF:
366 {
367 tFirstDataPtr = int2void(tFirstData);
368 if (tFirstDataPtr == 0) goto InvalidSignal;
(gdb) f 5
#5 0x00000000009ae5f6 in execute (callbackObj=0x3b09940, header=0x3ca4910,
prio=<value optimized out>, theData=0x3b491bc, ptr=0x12) at TransporterFacade.cpp:305
305 switch (gsn){
(gdb) l
300 * We handle it immediately here.
301 */
302 ClusterMgr * clusterMgr = theFacade->theClusterMgr;
303 const Uint32 gsn = header->theVerId_signalNumber;
304
305 switch (gsn){
306 case GSN_API_REGREQ:
307 clusterMgr->execAPI_REGREQ(theData);
308 break;
309
How to repeat:
ACRT conf=clst-rep clone=6.3