Bug #25997 Valgrind reports leak in event code in mysqld during handeling of NF and CF
Submitted: 1 Feb 2007 1:18 Modified: 8 Feb 2007 7:42
Reporter: Tomas Ulin Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Cluster: Cluster (NDB) storage engine Severity:S3 (Non-critical)
Version:5.1.15 OS:
Assigned to: Tomas Ulin CPU Architecture:Any

[1 Feb 2007 1:18] Tomas Ulin
Description:
VALGRIND: '48 bytes in 1 blocks are indirectly lost in loss record 5 of 8'
    COUNT: 2
    FUNCTION: malloc    FILES:    master.err master1.err
    TESTS:    ndb_alter_table:ndb_binlog_discover
    STACK: at 0x4A20859: malloc (vg_replace_malloc.c:149)
             by 0x9348BF: EventBufData_list::add_gci_op(EventBufData_list::Gci_op) (NdbEventOperationImpl.cpp:2607)
             by 0x93A9A6: NdbEventBuffer::insertDataL(NdbEventOperationImpl*, SubTableData const*, LinearSectionPtr*) (NdbEventOperationImpl.hpp:264)
             by 0x93B194: NdbEventBuffer::insert_event(NdbEventOperationImpl*, SubTableData&, LinearSectionPtr*, unsigned&) (NdbEventOperationImpl.cpp:1619)
             by 0x93B2F6: NdbEventBuffer::report_node_failure(unsigned) (NdbEventOperationImpl.cpp:1688)
             by 0x91F270: Ndb::report_node_failure_completed(unsigned) (Ndbif.cpp:273)
             by 0x91F37D: Ndb::statusMessage(void*, unsigned, bool, bool) (Ndbif.cpp:238)
             by 0x992FB2: TransporterFacade::ReportNodeFailureComplete(unsigned short) (TransporterFacade.cpp:836)
             by 0x994D32: ClusterMgr::execNF_COMPLETEREP(unsigned const*) (ClusterMgr.cpp:462)
             by 0x9965C0: ClusterMgr::reportNodeFailed(unsigned short, bool) (ClusterMgr.cpp:547)
             by 0x967A7B: TransporterRegistry::report_disconnect(unsigned short, int) (TransporterRegistry.cpp:1027)
             by 0x997B89: TCP_Transporter::doReceive() (Transporter.hpp:162)
             by 0x96A01A: TransporterRegistry::performReceive() (TransporterRegistry.cpp:809)
             by 0x991A10: TransporterFacade::threadMainReceive() (TransporterFacade.cpp:513)
             by 0x991A38: runReceiveResponse_C (TransporterFacade.cpp:485)
             by 0x983059: ndb_thread_wrapper (NdbThread.c:81)

VALGRIND: '80 (32 direct, 48 indirect) bytes in 1 blocks are definitely lost in loss record 4 of 8'
    COUNT: 2
    FUNCTION: malloc    FILES:    master.err master1.err
    TESTS:    ndb_alter_table:ndb_binlog_discover
    STACK: at 0x4A20859: malloc (vg_replace_malloc.c:149)
             by 0x9346E9: EventBufData_list::move_gci_ops(EventBufData_list*, unsigned long long) (NdbEventOperationImpl.cpp:2655)
             by 0x938911: EventBufData_list::append_list(EventBufData_list*, unsigned long long) (NdbEventOperationImpl.cpp:2579)
             by 0x938BD3: NdbEventBuffer::execSUB_GCP_COMPLETE_REP(SubGcpCompleteRep const*) (NdbEventOperationImpl.cpp:1514)
             by 0x93B6A4: NdbEventBuffer::completeClusterFailed() (NdbEventOperationImpl.cpp:1781)
             by 0x91F2A8: Ndb::report_node_failure_completed(unsigned) (Ndbif.cpp:278)
             by 0x91F37D: Ndb::statusMessage(void*, unsigned, bool, bool) (Ndbif.cpp:238)
             by 0x992FB2: TransporterFacade::ReportNodeFailureComplete(unsigned short) (TransporterFacade.cpp:836)
             by 0x994D32: ClusterMgr::execNF_COMPLETEREP(unsigned const*) (ClusterMgr.cpp:462)
             by 0x9965C0: ClusterMgr::reportNodeFailed(unsigned short, bool) (ClusterMgr.cpp:547)
             by 0x967A7B: TransporterRegistry::report_disconnect(unsigned short, int) (TransporterRegistry.cpp:1027)
             by 0x997B89: TCP_Transporter::doReceive() (Transporter.hpp:162)
             by 0x96A01A: TransporterRegistry::performReceive() (TransporterRegistry.cpp:809)
             by 0x991A10: TransporterFacade::threadMainReceive() (TransporterFacade.cpp:513)
             by 0x991A38: runReceiveResponse_C (TransporterFacade.cpp:485)
             by 0x983059: ndb_thread_wrapper (NdbThread.c:81)

How to repeat:
valgrind in push build

Suggested fix:
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2007/01/31 08:23:19+07:00 tomas@poseidon.mysql.com 
#   (recommit from 5.1 main)
#   valgrind leak
#   - no injected events if operation is not connected
# 
# storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp
#   2007/01/31 08:23:11+07:00 tomas@poseidon.mysql.com +29 -27
#   (recommit from 5.1 main)
#   valgrind leak
#   - no injected events if operation is not connected
# 
diff -Nru a/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp b/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp
--- a/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp	2007-02-01 08:13:49 +07:00
+++ b/storage/ndb/src/ndbapi/NdbEventOperationImpl.cpp	2007-02-01 08:13:49 +07:00
@@ -1611,17 +1611,24 @@
                              Uint32 &oid_ref)
 {
   NdbEventOperationImpl *dropped_ev_op = m_dropped_ev_op;
+  DBUG_PRINT("info", ("gci: %u", data.gci));
   do
   {
     do
     {
-      oid_ref = impl->m_oid;
-      insertDataL(impl, &data, ptr);
+      if (impl->m_node_bit_mask.get(0u))
+      {
+        oid_ref = impl->m_oid;
+        insertDataL(impl, &data, ptr);
+      }
       NdbEventOperationImpl* blob_op = impl->theBlobOpList;
       while (blob_op != NULL)
       {
-        oid_ref = blob_op->m_oid;
-        insertDataL(blob_op, &data, ptr);
+        if (blob_op->m_node_bit_mask.get(0u))
+        {
+          oid_ref = blob_op->m_oid;
+          insertDataL(blob_op, &data, ptr);
+        }
         blob_op = blob_op->m_next;
       }
     } while((impl = impl->m_next));
@@ -1806,6 +1813,7 @@
     switch (operation)
     {
     case NdbDictionary::Event::_TE_NODE_FAILURE:
+      DBUG_ASSERT(op->m_node_bit_mask.get(0u) != 0);
       op->m_node_bit_mask.clear(SubTableData::getNdbdNodeId(ri));
       DBUG_PRINT("info",
                  ("_TE_NODE_FAILURE: m_ref_count: %u for op: %p id: %u",
@@ -1821,29 +1829,23 @@
       DBUG_RETURN_EVENT(0);
       break;
     case NdbDictionary::Event::_TE_CLUSTER_FAILURE:
-      if (op->m_node_bit_mask.get(0))
-      {
-        op->m_node_bit_mask.clear();
-        DBUG_ASSERT(op->m_ref_count > 0);
-        // remove kernel reference
-        // added in execute_nolock
-        op->m_ref_count--;
-        DBUG_PRINT("info", ("_TE_CLUSTER_FAILURE: m_ref_count: %u for op: %p",
-                            op->m_ref_count, op));
-        if (op->theMainOp)
-        {
-          DBUG_ASSERT(op->m_ref_count == 0);
-          DBUG_ASSERT(op->theMainOp->m_ref_count > 0);
-          // remove blob reference in main op
-          // added in execute_no_lock
-          op->theMainOp->m_ref_count--;
-          DBUG_PRINT("info", ("m_ref_count: %u for op: %p",
-                              op->theMainOp->m_ref_count, op->theMainOp));
-        }
-      }
-      else
-      {
-        DBUG_ASSERT(op->m_node_bit_mask.isclear() != 0);
+      DBUG_ASSERT(op->m_node_bit_mask.get(0u) != 0);
+      op->m_node_bit_mask.clear();
+      DBUG_ASSERT(op->m_ref_count > 0);
+      // remove kernel reference
+      // added in execute_nolock
+      op->m_ref_count--;
+      DBUG_PRINT("info", ("_TE_CLUSTER_FAILURE: m_ref_count: %u for op: %p",
+                          op->m_ref_count, op));
+      if (op->theMainOp)
+      {
+        DBUG_ASSERT(op->m_ref_count == 0);
+        DBUG_ASSERT(op->theMainOp->m_ref_count > 0);
+        // remove blob reference in main op
+        // added in execute_no_lock
+        op->theMainOp->m_ref_count--;
+        DBUG_PRINT("info", ("m_ref_count: %u for op: %p",
+                            op->theMainOp->m_ref_count, op->theMainOp));
       }
       break;
     case NdbDictionary::Event::_TE_STOP:
[7 Feb 2007 16:50] Tomas Ulin
pushed to 5.1.16
[8 Feb 2007 7:42] Jon Stephens
Thank you for your bug report. This issue has been committed to our source repository of that product and will be incorporated into the next release.

If necessary, you can access the source repository and build the latest available version, including the bug fix. More information about accessing the source trees is available at

    http://dev.mysql.com/doc/en/installing-source.html

Documented bugfix in 5.1.16 changelog.