Bug #5591 Crash on SHOW TABLE STATUS FROM
Submitted: 15 Sep 2004 11:37 Modified: 21 Sep 2004 11:00
Reporter: Anton Fedorov Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Cluster: Cluster (NDB) storage engine Severity:S1 (Critical)
Version:4.1.5-gamma-nightly-20040915 OS:Linux (Debian Linux)
Assigned to: Tomas Ulin CPU Architecture:Any

[15 Sep 2004 11:37] Anton Fedorov
Description:
4.1.4 gamma failed to create table with autoincrement and primary key; so I have compiled from last avail snapshot.

Structure of cluster: 5 computers:
  5 MGM
  5 API
  5*3 data nodes
3 replicas.

Server crashes on 
  SHOW TABLE STATUS FROM `voipfone`

backtrace:
0x81648dd handle_segfault + 669
0x4003d825 _end + 935343637
0x82313fc _ZN3Ndb15checkFailedNodeEv + 76
0x822f4d9 _ZN3Ndb16startTransactionEjPKcj + 57
0x81f2dd6 _Z24ndb_get_table_statisticsP3NdbPKcPyS3_ + 38
0x81edcc5 _ZN13ha_ndbcluster14records_updateEv + 117
0x81f0aa8 _ZN13ha_ndbcluster4infoEj + 56
0x8203b2a _Z25mysqld_extend_show_tablesP3THDPKcS2_ + 3098
0x8179104 _Z21mysql_execute_commandP3THD + 5028
0x817da23 _Z11mysql_parseP3THDPcj + 211
0x8176bd4 _Z16dispatch_command19enum_server_commandP3THDPcj + 1028
0x8176786 _Z10do_commandP3THD + 134
0x8175f38 handle_one_connection + 872
0x40037e51 _end + 935320641
0x402826ca _end + 937723066

Status of cluster:
NDB> show
Cluster Configuration
---------------------
[ndbd(NDB)]     15 node(s)
id=11   (Version: 3.5.1, Nodegroup: 0)
id=35   (Version: 3.5.1, Nodegroup: 4)
id=34   (Version: 3.5.1, Nodegroup: 4)
id=33   (Version: 3.5.1, Nodegroup: 4)
id=12   (Version: 3.5.1, Nodegroup: 0)
id=13   (Version: 3.5.1, Nodegroup: 0)
id=14   (Version: 3.5.1, Nodegroup: 1, Master)
id=15   (Version: 3.5.1, Nodegroup: 1)
id=21   (Version: 3.5.1, Nodegroup: 1)
id=32   (Version: 3.5.1, Nodegroup: 3)
id=22   (Version: 3.5.1, Nodegroup: 2)
id=23   (Version: 3.5.1, Nodegroup: 2)
id=24   (Version: 3.5.1, Nodegroup: 2)
id=25   (Version: 3.5.1, Nodegroup: 3)
id=31   (Version: 3.5.1, Nodegroup: 3)

[ndb_mgmd(MGM)] 5 node(s)
id=1   (Version: 3.5.1)
id=2    @127.0.0.1  (Version: 3.5.1)
id=3   (Version: 3.5.1)
id=4   (Version: 3.5.1)
id=5   (Version: 3.5.1)

[mysqld(API)]   5 node(s)
id=41 (not connected, accepting connect from vf1)
id=42   (Version: 3.5.1)
id=43 (not connected, accepting connect from vf3)
id=44 (not connected, accepting connect from lbmaster)
id=45 (not connected, accepting connect from lbslave)

How to repeat:
Cluster config:
[COMPUTER DEFAULT]

[DB DEFAULT]
NoOfReplicas: 3

[API DEFAULT]
ArbitrationRank: 2

[MGM DEFAULT]
ArbitrationRank: 1

[TCP DEFAULT]
PortNumber: 28002

[COMPUTER]
Id:1
HostName: vf1

[COMPUTER]
Id:2
HostName: vf2

[COMPUTER]
Id:3
HostName: vf3

[COMPUTER]
Id:4
HostName: lbmaster

[COMPUTER]
Id:5
HostName: lbslave

[MGM]
Id:1
ExecuteOnComputer: 1
PortNumber: 2200

[MGM]
Id:2
ExecuteOnComputer: 2
PortNumber: 2200

[MGM]
Id:3
ExecuteOnComputer: 3
PortNumber: 2200

[MGM]
Id:4
ExecuteOnComputer: 4
PortNumber: 2200

[MGM]
Id:5
ExecuteOnComputer: 5
PortNumber: 2200

[DB]
Id: 11
ExecuteOnComputer: 1
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB1/

[DB]
Id: 21
ExecuteOnComputer: 1
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB2/

[DB]
Id: 31
ExecuteOnComputer: 1
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB3/

[DB]
Id: 12
ExecuteOnComputer: 2
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB1/

[DB]
Id: 22
ExecuteOnComputer: 2
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB2/

[DB]
Id: 32
ExecuteOnComputer: 2
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB3/
[DB]
Id: 13
ExecuteOnComputer: 3
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB1/

[DB]
Id: 23
ExecuteOnComputer: 3
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB2/

[DB]
Id: 33
ExecuteOnComputer: 3
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB3/

[DB]
Id: 14
ExecuteOnComputer: 4
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB1/

[DB]
Id: 24
ExecuteOnComputer: 4
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB2/

[DB]
Id: 34
ExecuteOnComputer: 4
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB3/

[DB]
Id: 15
ExecuteOnComputer: 5
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB1/

[DB]
Id: 25
ExecuteOnComputer: 5
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB2/

[DB]
Id: 35
ExecuteOnComputer: 5
FileSystemPath: /voipfone/var/lib/mysql-cluster/nodeDB3/

[API]
Id: 41
ExecuteOnComputer: 1

[API]
Id: 42
ExecuteOnComputer: 2

[API]
Id: 43
ExecuteOnComputer: 3

[API]
Id: 44
ExecuteOnComputer: 4

[API]
Id: 45
ExecuteOnComputer: 5

DB structire:
***
CREATE TABLE `balance` (
  `id` int(10) unsigned NOT NULL default '0',
  `time` datetime default NULL,
  `money` double default NULL,
  `immoney` double default NULL,
  `freemins` int(10) unsigned default NULL
) ENGINE=NDB;

CREATE TABLE `limits` (
  `balanceId` int(10) unsigned NOT NULL default '0',
  `yearmon` char(6) default NULL,
  `lmt` int(10) unsigned default NULL
) ENGINE=NDB;

CREATE TABLE `paymentlog` (
  `balanceId` int(10) unsigned default NULL,
  `whenDate` datetime default NULL,
  `fromVC` varchar(20) default NULL,
  `toVC` varchar(20) default NULL,
  `event` enum('INCOMING','OUTGOING') default NULL,
  `freemins` int(10) unsigned default NULL,
  `money` double default NULL
) ENGINE=NDB;

CREATE TABLE `price` (
  `id` int(10) unsigned NOT NULL default '0',
  `prefix` varchar(20) default NULL,
  `cost` double NOT NULL default '0',
  `freemins` int(11) NOT NULL default '0'
) ENGINE=NDB;

CREATE TABLE `reseller` (
  `id` int(10) unsigned NOT NULL auto_increment,
  `ownPrice` tinyint(1) default NULL,
  `priceId` int(10) unsigned NOT NULL default '0',
  `balanceId` int(10) unsigned NOT NULL default '0',
  PRIMARY KEY  (`id`)
) ENGINE=NDB;

CREATE TABLE `user` (
  `id` int(10) unsigned NOT NULL auto_increment,
  `phone` varchar(10) NOT NULL default '',
  `priceId` int(10) unsigned NOT NULL default '0',
  `resellerId` int(10) unsigned default NULL,
  `parentId` int(10) unsigned NOT NULL default '0',
  `balanceId` int(10) unsigned default NULL,
  `accountType` varchar(50) default NULL,
  PRIMARY KEY  (`id`)
) ENGINE=NDB;

Ndb.cfg:
nodeid=X;host=lbmaster:2200;host=lbslave:2200;host=vf1:2200;host=vf2:2200;host=vf3:2200
  /where X is correct node id/
[15 Sep 2004 11:52] Anton Fedorov
If runned only one management node from 5, and Ndb.cfg configured to use only this one node -- no trap.
[16 Sep 2004 8:06] Magnus BlÄudd
Hi,

this is an impressive cluster you have setup. Nice. 

To use 5 MGM's is not so common and it's very important that they use _exactly_ the same config.ini and that they are listed in the same order in ndb.cfg. Please make sure this is the case and restart all of them. Then restart all NDB nodes and the MySQL Servers.

Your backtrace indicates the crash occurs in Ndb::checkFailedNode, we have reviewed that function and added some debug printouts. Does any NDB node crash when you run the query? If so please send tracefiles.

To further diagnose the problem, do you think you could help us by running a debug compiled version? 
Please replace the function Ndb::checkFailedNode with the one below and debug compile.

<mysql_src_path>/ndb/src/ndbapi/Ndblist.cpp 
>>
void
Ndb::checkFailedNode()
{
  DBUG_ENTER("Ndb::checkFailedNode");
  DBUG_PRINT("enter", ("theNoOfDBnodes: %d", theNoOfDBnodes));

  DBUG_ASSERT(theNoOfDBnodes < MAX_NDB_NODES);
  for (int i = 0; i < theNoOfDBnodes; i++){
    const NodeId node_id = theDBnodes[i];
    DBUG_PRINT("info", ("i: %d, node_id: %d", i, node_id));
    
    DBUG_ASSERT(node_id < MAX_NDB_NODES);    
    if (the_release_ind[node_id] == 1){

      /**
       * Release all connections in idle list (for node)
       */
      NdbConnection * tNdbCon = theConnectionArray[node_id];
      theConnectionArray[node_id] = NULL;
      while (tNdbCon != NULL) {
        NdbConnection* tempNdbCon = tNdbCon;
        tNdbCon = tNdbCon->next();
        releaseNdbCon(tempNdbCon);
      }
      the_release_ind[node_id] = 0;
    }
  }
  DBUG_VOID_RETURN;
}
<<

Start the mysqld with the option "--debug=F:L:d:t:i:O", then please send us the tracefile.

Best regards
Magnus
[16 Sep 2004 9:59] Anton Fedorov
All nodes have exactly same config -- after any changes on lb1 made, I copy its with scp to other servers, and sync configs then.

I have replaced this function, recompiled with debug on all servers:
./configure --with-ndbcluster --without-innodb --without-debug  --without-docs --without-bench  --with-extra-charsets=all --prefix=/voipfone --enable-thread-safe-client --enable-assembler --libexecdir=/voipfone/bin

Then runned mgm nodes in order lb1[4]->lb2[5]->vf1[1]->vf2[2]->vf3[3].
Then started DB nodes at same time.
After all nodes started and progressed to some node groups -- runned API node at vf2[42].

Logged in with phpMyAdmin, runned SQL for table creations -- all created fine.
Tried to run SQL "SHOW TABLE STATUS FROM `voipfone`" -- get crash of MySQLD.

After that going and shutdown with killall ndbd ndb_mgmd all nodes. [Some DB nodes stopped not right after, with a bit later]

Then have collected all logs for you, and posted its into Files section {logs-16-sep-2004-0.tar.bz1}.

Do you need in CORE file of crashed nodes? about 600mb uncompressed :) compressed with bzip2 -9 is 877880 bytes.

Good luck :)
If you need any more info/help -- i'm fully yours.
[17 Sep 2004 7:06] Anton Fedorov
Some additional tested configurations:

2 replicas, 1 MGM, 2 nodes, 2 APIs, only one API connected.
Crash only in mysqld.

1 replicas, 1 MGM, 2 nodes, 2 APIs, only one API connected.
Crash only in mysqld.

1 replicas, 1 MGM, 1 node, 2 APIs, only one API connected.
No crash, on "SHOW TABLE STATUS FROM `voipfone`" sometimes incorrect value in "ROWS" returned.

1 replicas, 1 MGM, 1 node, 1 API
No crash, on "SHOW TABLE STATUS FROM `voipfone`" sometimes incorrect value in "ROWS" returned.

I think, I should switch to schema "2 replicas via binary log" =(
[19 Sep 2004 12:21] Anton Fedorov
Have build 20040919 snapshot.
Simple config (2 replicas, 2 MGMs, 2 nodes, 3 API one connected) working without trap.
But if I shutdown 1st runned MGM -- cluster become unavailable =(
I was thinking that all nodes connected as star also with MGMs.
So, no failsafe way ?

Looks fine:

 >Ndb::checkFailedNode     
 | enter: theNoOfDBnodes: 2
 | info: i: 0, node_id: 14 
 | info: i: 1, node_id: 15 
 <Ndb::checkFailedNode     

Shutdown 2nd node:

>Ndb::checkFailedNode     
| enter: theNoOfDBnodes: 2
| info: i: 0, node_id: 14 
| info: i: 1, node_id: 0  
<Ndb::checkFailedNode
[19 Sep 2004 12:22] Anton Fedorov
"Full cluster" 5*15*5 -- 1st request for SHOW TABLE STATUS fine, second -- crash.

>Ndb::checkFailedNode       
| enter: theNoOfDBnodes: 15 
| info: i: 0, node_id: 11   
| info: i: 1, node_id: 0    
| info: i: 2, node_id: 3    
| info: i: 3, node_id: 65535
Assertion `node_id < 49' faled.

No crashes in nodes in all time of cluster live.

Log around problem (shored for a bit, all in thread 229384):

Input of command:
   sql_parse.cc:  1297: >dispatch_command
   sql_parse.cc:  1456: | query: SHOW TABLE STATUS FROM `voipfone`
   sql_parse.cc:  4025: | >mysql_parse
   sql_parse.cc:  3865: | | >mysql_init_query
   sql_parse.cc:  3903: | | <mysql_init_query
   sql_cache.cc:   921: | | >Query_cache::send_result_to_client
   sql_cache.cc:  1106: | | <Query_cache::send_result_to_client
   sql_parse.cc:  1863: | | >mysql_execute_command
   sql_parse.cc:  3605: | | | >check_access
   sql_parse.cc:  3607: | | | | enter: want_access: 1  master_access: 2097151
   sql_parse.cc:  3640: | | | <check_access
    sql_show.cc:   466: | | | >mysqld_extend_show_tables
      mf_pack.c:   281: | | | | >unpack_dirname
      mf_pack.c:   318: | | | | <unpack_dirname
    protocol.cc:   498: | | | | >send_fields
    protocol.cc:   596: | | | | <send_fields
    sql_show.cc:   373: | | | | >mysql_find_files
    mf_fn_ext.c:    40: | | | | | >fn_ext
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: '.'
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: '..'
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: 'balance.frm'
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: 'limits.frm'
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: 'paymentlog.frm'
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: 'price.frm'
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: 'reseller.frm'
    mf_fn_ext.c:    41: | | | | | | mfunkt: name: 'user.frm'
    mf_fn_ext.c:    53: | | | | | <fn_ext
    sql_show.cc:   446: | | | | | info: found: 6 files
       my_lib.c:    91: | | | | | <my_dirend
    sql_show.cc:   448: | | | | <mysql_find_files

Success story of getting info {table "balance"}:
    sql_base.cc:  1585: | | | | >open_ltable
        lock.cc:    89: | | | | | >mysql_lock_tables
        lock.cc:   172: | | | | | | >lock_external
 ha_ndbcluster.cc:2689: | | | | | | | >external_lock
 ha_ndbcluster.cc:3539: | | | | | | | | >check_ndb_connection
 ha_ndbcluster.cc:3550: | | | | | | | | <check_ndb_connection
 ha_ndbcluster.cc:2700: | | | | | | | | enter: transaction.thd_ndb->lock_count: 0
 ha_ndbcluster.cc:2704: | | | | | | | | info: lock_type != F_UNLCK
 ha_ndbcluster.cc:2713: | | | | | | | | trans: Starting transaction stmt
        Ndb.cpp:   313: | | | | | | | | >Ndb::startTransaction
    Ndblist.cpp:    32: | | | | | | | | | >Ndb::checkFailedNode
    Ndblist.cpp:    33: | | | | | | | | | | enter: theNoOfDBnodes: 15
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 0, node_id: 11
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 1, node_id: 12
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 2, node_id: 13
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 3, node_id: 14
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 4, node_id: 15
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 5, node_id: 21
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 6, node_id: 22
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 7, node_id: 23
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 8, node_id: 24
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 9, node_id: 25
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 10, node_id: 31
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 11, node_id: 32
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 12, node_id: 33
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 13, node_id: 34
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 14, node_id: 35
    Ndblist.cpp:    56: | | | | | | | | | <Ndb::checkFailedNode
        Ndb.cpp:   330: | | | | | | | | <Ndb::startTransaction
        Ndb.cpp:   424: | | | | | | | | >Ndb::startTransactionLocal
        Ndb.cpp:   425: | | | | | | | | | enter: nodeid: 0
        Ndb.cpp:   454: | | | | | | | | | exit: transaction id: 26
        Ndb.cpp:   455: | | | | | | | | <Ndb::startTransactionLocal
 ha_ndbcluster.cc:2804: | | | | | | | <external_lock
        lock.cc:   199: | | | | | | <lock_external
     thr_lock.c:   829: | | | | | | >thr_multi_lock
     thr_lock.c:   893: | | | | | | <thr_multi_lock
        lock.cc:   164: | | | | | <mysql_lock_tables
    sql_base.cc:  1619: | | | | <open_ltable
 ha_ndbcluster.cc:2374: | | | | >info
 ha_ndbcluster.cc:2375: | | | | | enter: flag: 22
 ha_ndbcluster.cc:2380: | | | | | info: HA_STATUS_NO_LOCK
 ha_ndbcluster.cc:2382: | | | | | info: HA_STATUS_TIME
 ha_ndbcluster.cc:2387: | | | | | info: HA_STATUS_VARIABLE
 ha_ndbcluster.cc: 171: | | | | | >ha_ndbcluster::records_update
 ha_ndbcluster.cc:4034: | | | | | | >ndb_get_table_statistics
 ha_ndbcluster.cc:4035: | | | | | | | enter: table: balance
        Ndb.cpp:   313: | | | | | | | >Ndb::startTransaction
    Ndblist.cpp:    32: | | | | | | | | >Ndb::checkFailedNode
    Ndblist.cpp:    33: | | | | | | | | | enter: theNoOfDBnodes: 15
    Ndblist.cpp:    56: | | | | | | | | <Ndb::checkFailedNode
        Ndb.cpp:   330: | | | | | | | <Ndb::startTransaction
        Ndb.cpp:   424: | | | | | | | >Ndb::startTransactionLocal
        Ndb.cpp:   425: | | | | | | | | enter: nodeid: 0
        Ndb.cpp:   454: | | | | | | | | exit: transaction id: 27
        Ndb.cpp:   455: | | | | | | | <Ndb::startTransactionLocal
        Ndb.cpp:   347: | | | | | | | >Ndb::hupp
    Ndblist.cpp:    32: | | | | | | | | >Ndb::checkFailedNode
    Ndblist.cpp:    33: | | | | | | | | | enter: theNoOfDBnodes: 15
    Ndblist.cpp:    56: | | | | | | | | <Ndb::checkFailedNode
        Ndb.cpp:   424: | | | | | | | | >Ndb::startTransactionLocal
        Ndb.cpp:   425: | | | | | | | | | enter: nodeid: 11
        Ndb.cpp:   454: | | | | | | | | | exit: transaction id: 28
        Ndb.cpp:   455: | | | | | | | | <Ndb::startTransactionLocal
        Ndb.cpp:   371: | | | | | | | <Ndb::hupp
        Ndb.cpp:   467: | | | | | | | >Ndb::closeTransaction
        Ndb.cpp:   467: | | | | | | | | >Ndb::closeTransaction
        Ndb.cpp:   541: | | | | | | | | <Ndb::closeTransaction
        Ndb.cpp:   541: | | | | | | | <Ndb::closeTransaction
 ha_ndbcluster.cc:4079: | | | | | | | exit: records: 0 commits: 0
 ha_ndbcluster.cc:4080: | | | | | | <ndb_get_table_statistics
 ha_ndbcluster.cc: 189: | | | | | <ha_ndbcluster::records_update
 ha_ndbcluster.cc:2397: | | | | <info
    sql_base.cc:   366: | | | | >close_thread_tables
        lock.cc:   205: | | | | | >mysql_unlock_tables
     thr_lock.c:   901: | | | | | | >thr_multi_unlock
     thr_lock.c:   792: | | | | | | | <thr_unlock
     thr_lock.c:   919: | | | | | | <thr_multi_unlock
        lock.cc:   369: | | | | | | >unlock_external
 ha_ndbcluster.cc:2689: | | | | | | | >external_lock
 ha_ndbcluster.cc:3539: | | | | | | | | >check_ndb_connection
 ha_ndbcluster.cc:3550: | | | | | | | | <check_ndb_connection
 ha_ndbcluster.cc:2700: | | | | | | | | enter: transaction.thd_ndb->lock_count: 1
 ha_ndbcluster.cc:2773: | | | | | | | | info: lock_type == F_UNLCK
 ha_ndbcluster.cc:2776: | | | | | | | | trans: Last external_lock
 ha_ndbcluster.cc:2786: | | | | | | | | trans: ending non-updating transaction
        Ndb.cpp:   467: | | | | | | | | >Ndb::closeTransaction
        Ndb.cpp:   541: | | | | | | | | <Ndb::closeTransaction
 ha_ndbcluster.cc:2792: | | | | | | | | warning: m_active_trans != NULL
 ha_ndbcluster.cc:2804: | | | | | | | <external_lock
        lock.cc:   384: | | | | | | <unlock_external
        lock.cc:   211: | | | | | <mysql_unlock_tables
    sql_base.cc:   400: | | | | | info: thd->open_tables=0x8614de8
    sql_base.cc:   425: | | | | | >close_thread_table
 ha_ndbcluster.cc:2598: | | | | | | >reset
 ha_ndbcluster.cc:2600: | | | | | | <reset
    sql_base.cc:   461: | | | | | <close_thread_table
    sql_base.cc:   418: | | | | <close_thread_tables

Then fine 'limits'; and fail on 'paymentlog' :
    sql_base.cc:  1585: | | | | >open_ltable
    sql_base.cc:   796: | | | | | >open_table
         hash.c:   144: | | | | | | >hash_search
         hash.c:   156: | | | | | | | exit: found key at 4
         hash.c:   158: | | | | | | <hash_search
    sql_base.cc:   950: | | | | | <open_table
        lock.cc:    89: | | | | | >mysql_lock_tables
    my_malloc.c:    30: | | | | | | >my_malloc
    my_malloc.c:    31: | | | | | | | my: size: 24  my_flags: 0
    my_malloc.c:    47: | | | | | | | exit: ptr: 0x8614638
    my_malloc.c:    48: | | | | | | <my_malloc
 ha_ndbcluster.cc:2628: | | | | | | >store_lock
 ha_ndbcluster.cc:2653: | | | | | | | exit: lock_type: 1
 ha_ndbcluster.cc:2655: | | | | | | <store_lock
        lock.cc:   172: | | | | | | >lock_external
 ha_ndbcluster.cc:2689: | | | | | | | >external_lock
 ha_ndbcluster.cc:3539: | | | | | | | | >check_ndb_connection
 ha_ndbcluster.cc:3550: | | | | | | | | <check_ndb_connection
 ha_ndbcluster.cc:2700: | | | | | | | | enter: transaction.thd_ndb->lock_count: 0
 ha_ndbcluster.cc:2704: | | | | | | | | info: lock_type != F_UNLCK
 ha_ndbcluster.cc:2713: | | | | | | | | trans: Starting transaction stmt
         Ndb.cpp:  313: | | | | | | | | >Ndb::startTransaction
     Ndblist.cpp:   32: | | | | | | | | | >Ndb::checkFailedNode
     Ndblist.cpp:   33: | | | | | | | | | | enter: theNoOfDBnodes: 15
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 0, node_id: 11
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 1, node_id: 12
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 2, node_id: 13
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 3, node_id: 14
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 4, node_id: 15
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 5, node_id: 21
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 6, node_id: 22
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 7, node_id: 23
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 8, node_id: 24
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 9, node_id: 25
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 10, node_id: 31
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 11, node_id: 32
     Ndblist.cpp:   38: | | | | | | | | | | info: i: 12, node_id: 33
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 13, node_id: 34
    Ndblist.cpp:    38: | | | | | | | | | | info: i: 14, node_id: 35
    Ndblist.cpp:    56: | | | | | | | | | <Ndb::checkFailedNode
        Ndb.cpp:   330: | | | | | | | | <Ndb::startTransaction
        Ndb.cpp:   424: | | | | | | | | >Ndb::startTransactionLocal
        Ndb.cpp:   425: | | | | | | | | | enter: nodeid: 0
        Ndb.cpp:   454: | | | | | | | | | exit: transaction id: 32
        Ndb.cpp:   455: | | | | | | | | <Ndb::startTransactionLocal
 ha_ndbcluster.cc: 230: | | | | | | | | >ha_ndbcluster::no_uncommitted_rows_reset
 ha_ndbcluster.cc: 233: | | | | | | | | <ha_ndbcluster::no_uncommitted_rows_reset
 ha_ndbcluster.cc: 202: | | | | | | | | >ha_ndbcluster::no_uncommitted_rows_init
 ha_ndbcluster.cc: 212: | | | | | | | | | info: id=8, no_uncommitted_rows_count=0
 ha_ndbcluster.cc: 214: | | | | | | | | <ha_ndbcluster::no_uncommitted_rows_init
 ha_ndbcluster.cc:2804: | | | | | | | <external_lock
        lock.cc:   199: | | | | | | <lock_external
     thr_lock.c:   829: | | | | | | >thr_multi_lock
     thr_lock.c:   830: | | | | | | | lock: data: 0x8614648  count: 1
     thr_lock.c:   430: | | | | | | | >thr_lock
     thr_lock.c:   439: | | | | | | | | lock: data: 0x8640c70  thread:229384  lock: 0x8641238  type: 1
     thr_lock.c:   600: | | | | | | | <thr_lock
     thr_lock.c:   893: | | | | | | <thr_multi_lock
        lock.cc:   164: | | | | | <mysql_lock_tables
    sql_base.cc:  1619: | | | | <open_ltable
 ha_ndbcluster.cc:2374: | | | | >info
 ha_ndbcluster.cc:2375: | | | | | enter: flag: 22
 ha_ndbcluster.cc:2380: | | | | | info: HA_STATUS_NO_LOCK
 ha_ndbcluster.cc:2382: | | | | | info: HA_STATUS_TIME
 ha_ndbcluster.cc:2387: | | | | | info: HA_STATUS_VARIABLE
 ha_ndbcluster.cc: 171: | | | | | >ha_ndbcluster::records_update
 ha_ndbcluster.cc: 175: | | | | | | info: id=8, no_uncommitted_rows_count=0
 ha_ndbcluster.cc:4034: | | | | | | >ndb_get_table_statistics
 ha_ndbcluster.cc:4035: | | | | | | | enter: table: paymentlog
        Ndb.cpp:   313: | | | | | | | >Ndb::startTransaction
    Ndblist.cpp:    32: | | | | | | | | >Ndb::checkFailedNode
    Ndblist.cpp:    33: | | | | | | | | | enter: theNoOfDBnodes: 15
    Ndblist.cpp:    38: | | | | | | | | | info: i: 0, node_id: 11
    Ndblist.cpp:    38: | | | | | | | | | info: i: 1, node_id: 0
    Ndblist.cpp:    38: | | | | | | | | | info: i: 2, node_id: 3
    Ndblist.cpp:    38: | | | | | | | | | info: i: 3, node_id: 65535
mysqld: Ndblist.cpp:40: void Ndb::checkFailedNode(): Assertion `node_id < 49' failed.

Can't guess where memory can be garbaged :/
Will try to debug around.
[19 Sep 2004 14:13] Anton Fedorov
Ok, runned with valgrind and see:

==10543== Thread 8:                                                             
==10543== Invalid read of size 4                                                
==10543==    at 0x81FDE30: ha_ndbcluster::no_uncommitted_rows_init(THD*) (ha_ndbcluster.cc:205)
==10543==    by 0x8204EFC: ha_ndbcluster::external_lock(THD*, int) (ha_ndbcluster.cc:2769)
==10543==    by 0x8160FE0: lock_external(THD*, st_table**, unsigned) (lock.cc:183)
==10543==    by 0x8160DFB: mysql_lock_tables(THD*, st_table**, unsigned) (lock.cc:116)
==10543==  Address 0x1BC9A128 is 8 bytes inside a block of size 167 free'd      
==10543==    at 0x1B905460: free (vg_replace_malloc.c:153)                      
==10543==    by 0x8276BEC: Ndb_local_table_info::destroy(Ndb_local_table_info*) (DictCache.cpp:37)
==10543==    by 0x826FE2E: NdbDictionaryImpl::~NdbDictionaryImpl() (NdbDictionaryImpl.cpp:605)
==10543==    by 0x8257E94: Ndb::~Ndb() (Ndbinit.cpp:220)

==10543== Thread 8:                                                             
==10543== Invalid read of size 4                                                
==10543==    at 0x81FDCC4: ha_ndbcluster::records_update() (ha_ndbcluster.cc:173)
==10543==    by 0x8203E46: ha_ndbcluster::info(unsigned) (ha_ndbcluster.cc:2388)
==10543==    by 0x821B604: mysqld_extend_show_tables(THD*, char const*, char const*) (sql_show.cc:533)
==10543==    by 0x817E4B7: mysql_execute_command(THD*) (sql_string.h:85)        
==10543==  Address 0x1BC9A124 is 4 bytes inside a block of size 167 free'd      
==10543==    at 0x1B905460: free (vg_replace_malloc.c:153)                      
==10543==    by 0x8276BEC: Ndb_local_table_info::destroy(Ndb_local_table_info*) (DictCache.cpp:37)
==10543==    by 0x826FE2E: NdbDictionaryImpl::~NdbDictionaryImpl() (NdbDictionaryImpl.cpp:605)
==10543==    by 0x8257E94: Ndb::~Ndb() (Ndbinit.cpp:220)

==10543== Thread 8:                                                             
==10543== Invalid read of size 4                                                
==10543==    at 0x81FDCDA: ha_ndbcluster::records_update() (ha_ndbcluster.cc:176)
==10543==    by 0x8203E46: ha_ndbcluster::info(unsigned) (ha_ndbcluster.cc:2388)
==10543==    by 0x821B604: mysqld_extend_show_tables(THD*, char const*, char const*) (sql_show.cc:533)
==10543==    by 0x817E4B7: mysql_execute_command(THD*) (sql_string.h:85)        
==10543==  Address 0x1BC9A12C is 12 bytes inside a block of size 167 free'd     
==10543==    at 0x1B905460: free (vg_replace_malloc.c:153)                      
==10543==    by 0x8276BEC: Ndb_local_table_info::destroy(Ndb_local_table_info*) (DictCache.cpp:37)
==10543==    by 0x826FE2E: NdbDictionaryImpl::~NdbDictionaryImpl() (NdbDictionaryImpl.cpp:605)
==10543==    by 0x8257E94: Ndb::~Ndb() (Ndbinit.cpp:220)

==10543== Thread 8:                                                             
==10543== Invalid read of size 4                                                
==10543==    at 0x81FDD30: ha_ndbcluster::records_update() (ha_ndbcluster.cc:188)
==10543==    by 0x8203E46: ha_ndbcluster::info(unsigned) (ha_ndbcluster.cc:2388)
==10543==    by 0x821B604: mysqld_extend_show_tables(THD*, char const*, char const*) (sql_show.cc:533)
==10543==    by 0x817E4B7: mysql_execute_command(THD*) (sql_string.h:85)        
==10543==  Address 0x1BC9A12C is 12 bytes inside a block of size 167 free'd     
==10543==    at 0x1B905460: free (vg_replace_malloc.c:153)                      
==10543==    by 0x8276BEC: Ndb_local_table_info::destroy(Ndb_local_table_info*) (DictCache.cpp:37)
==10543==    by 0x826FE2E: NdbDictionaryImpl::~NdbDictionaryImpl() (NdbDictionaryImpl.cpp:605)
==10543==    by 0x8257E94: Ndb::~Ndb() (Ndbinit.cpp:220)

......

==10543== Thread 8:
==10543== Conditional jump or move depends on uninitialised value(s)
==10543==    at 0x81FDE36: ha_ndbcluster::no_uncommitted_rows_init(THD*) (ha_ndbcluster.cc:205)
==10543==    by 0x8204EFC: ha_ndbcluster::external_lock(THD*, int) ha_ndbcluster.cc:2769)
==10543==    by 0x8160FE0: lock_external(THD*, st_table**, unsigned) (lock.cc:183)
==10543==    by 0x8160DFB: mysql_lock_tables(THD*, st_table**, unsigned) (lock.cc:116)
[19 Sep 2004 18:53] Anton Fedorov
Problem fixed.

Patch:

diff -urN mysql-4.1.5-gamma-nightly-20040919/sql/ha_ndbcluster.h mysql-4.1.5-gamma-nightly-20040919-data/sql/ha_ndbcluster.h
--- mysql-4.1.5-gamma-nightly-20040919/sql/ha_ndbcluster.h  2004-09-19 04:57:41.000000000 +0100
+++ mysql-4.1.5-gamma-nightly-20040919-data/sql/ha_ndbcluster.h 2004-09-19 19:34:25.000000000 +0100
@@ -233,7 +233,9 @@
   NdbResultSet *m_active_cursor;
   Ndb *m_ndb;
   void *m_table;
-  void *m_table_info;
+// DataCompBoy: can't use cache for this object, it removed as soon as
+//              connection closed for some reason
+//  void *m_table_info;
   char m_dbname[FN_HEADLEN];
   //char m_schemaname[FN_HEADLEN];
   char m_tabname[FN_HEADLEN];

diff -urN mysql-4.1.5-gamma-nightly-20040919/sql/ha_ndbcluster.cc mysql-4.1.5-gamma-nightly-20040919-data/sql/ha_ndbcluster.cc
--- mysql-4.1.5-gamma-nightly-20040919/sql/ha_ndbcluster.cc 2004-09-19 04:57:41.000000000 +0100
+++ mysql-4.1.5-gamma-nightly-20040919-data/sql/ha_ndbcluster.cc    2004-09-19 19:42:45.000000000 +0100
@@ -169,7 +169,10 @@
 void ha_ndbcluster::records_update()
 {
   DBUG_ENTER("ha_ndbcluster::records_update");
-  struct Ndb_table_local_info *info= (struct Ndb_table_local_info *)m_table_info;
+  struct Ndb_table_local_info *info;
+  m_ndb->getDictionary()->getTable(m_tabname, (void**)&info);
   DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
              ((const NDBTAB *)m_table)->getTableId(),
              info->no_uncommitted_rows_count));
@@ -200,7 +203,8 @@
 void ha_ndbcluster::no_uncommitted_rows_init(THD *thd)
 {
   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_init");
-  struct Ndb_table_local_info *info= (struct Ndb_table_local_info *)m_table_info;
+  struct Ndb_table_local_info *info;
+  m_ndb->getDictionary()->getTable(m_tabname, (void**)&info);
   Thd_ndb *thd_ndb= (Thd_ndb *)thd->transaction.thd_ndb;
   if (info->last_count != thd_ndb->count)
   {
@@ -217,7 +221,8 @@
 void ha_ndbcluster::no_uncommitted_rows_update(int c)
 {
   DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
-  struct Ndb_table_local_info *info= (struct Ndb_table_local_info *)m_table_info;
+  struct Ndb_table_local_info *info;
+  m_ndb->getDictionary()->getTable(m_tabname, (void**)&info);
   info->no_uncommitted_rows_count+= c;
   DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
              ((const NDBTAB *)m_table)->getTableId(),
@@ -603,7 +608,7 @@
   DBUG_ENTER("get_metadata");
   DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));

-  if (!(tab= dict->getTable(m_tabname, &m_table_info)))
+  if (!(tab= dict->getTable(m_tabname)))
     ERR_RETURN(dict->getNdbError());
   DBUG_PRINT("info", ("Table schema version: %d", tab->getObjectVersion()));

@@ -3357,7 +3362,7 @@
   m_active_cursor(NULL),
   m_ndb(NULL),
   m_table(NULL),
-  m_table_info(NULL),
+//  m_table_info(NULL),
   m_table_flags(HA_REC_NOT_IN_SEQ |
        HA_NULL_IN_KEY |
        HA_AUTO_PART_KEY |
[20 Sep 2004 3:27] Anton Fedorov
May be better to re-assign m_table too:
m_table = (void*)m_ndb->getDictionary()->getTable(m_tabname, (void**)&info);

But I see no problems for now with my patch as shown upper.
[21 Sep 2004 11:00] Tomas Ulin
Thank you for your bug report. This issue has been committed to our
source repository of that product and will be incorporated into the
next release.

If necessary, you can access the source repository and build the latest
available version, including the bugfix, yourself. More information 
about accessing the source trees is available at
    http://www.mysql.com/doc/en/Installing_source_tree.html