Bug #10894 NDBD cored on start up in Slave Cluster
Submitted: 26 May 2005 22:57 Modified: 27 May 2005 2:22
Reporter: Jonathan Miller Email Updates:
Status: Not a Bug Impact on me:
None 
Category:MySQL Cluster: Cluster (NDB) storage engine Severity:S2 (Serious)
Version:5.1.0 OS:Linux (Linux)
Assigned to: Tomas Ulin CPU Architecture:Any

[26 May 2005 22:57] Jonathan Miller
Description:
Starting the Slave Cluster, the second NDBD cored.

ST:

(gdb) bt
#0  0x40106da1 in kill () from /lib/libc.so.6
#1  0x40037f4a in pthread_kill () from /lib/libpthread.so.0
#2  0x400382f9 in raise () from /lib/libpthread.so.0
#3  0x40106b7a in raise () from /lib/libc.so.6
#4  0x40107d95 in abort () from /lib/libc.so.6
#5  0x081e717f in NdbShutdown (type=136502079, restartType=NRT_Default)
    at Emulator.cpp:233
#6  0x081e7a09 in Configuration::fetch_configuration() (this=0x827dbf0)
    at Configuration.cpp:228
#7  0x0809e696 in main (argc=4, argv=0x40204160) at main.cpp:80
#8  0x400f562d in __libc_start_main () from /lib/libc.so.6

frame 7:
(gdb) frame 7
#7  0x0809e696 in main (argc=4, argv=0x40204160) at main.cpp:80
80          theConfig->fetch_configuration();
(gdb) list
75
76        { // Do configuration
77      #ifndef NDB_WIN32
78              signal(SIGPIPE, SIG_IGN);
79      #endif
80          theConfig->fetch_configuration();
81        }
82
83        my_setwd(NdbConfig_get_path(0), MYF(0));

Frame 6
(gdb) frame 6
#6  0x081e7a09 in Configuration::fetch_configuration() (this=0x827dbf0)
    at Configuration.cpp:228
228                   "Unable to alloc node id", m_config_retriever->getErrorString());
(gdb) l
223
224       globalData.ownId = cr.allocNodeId(2 /*retry*/,3 /*delay*/);
225
226       if(globalData.ownId == 0){
227         ERROR_SET(fatal, ERR_INVALID_CONFIG,
228                   "Unable to alloc node id", m_config_retriever->getErrorString());
229       }
230
231       ndb_mgm_configuration * p = cr.getConfig();
232       if(p == 0){
(gdb)

Frame5:
(gdb) frame 5
#5  0x081e717f in NdbShutdown (type=136502079, restartType=NRT_Default)
    at Emulator.cpp:233
233           abort();
(gdb) l
228
229         if(type != NST_Normal && type != NST_Restart){
230           ndbout << "Error handler shutdown completed - " << exitAbort << endl;
231     #if ( defined VM_TRACE || defined ERROR_INSERT ) && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) )
232           signal(6, SIG_DFL);
233           abort();
234     #else
235           exit(-1);
236     #endif
237         }

(gdb) frame 4
#4  0x40107d95 in abort () from /lib/libc.so.6
(gdb) l
80          theConfig->fetch_configuration();
81        }
82
83        my_setwd(NdbConfig_get_path(0), MYF(0));
84
85        if (theConfig->getDaemonMode()) {
86          // Become a daemon
87          char *lockfile= NdbConfig_PidFileName(globalData.ownId);
88          char *logfile=  NdbConfig_StdoutFileName(globalData.ownId);
89          NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile);
(gdb)

#3  0x40106b7a in raise () from /lib/libc.so.6
(gdb) list
90
91          if (NdbDaemon_Make(lockfile, logfile, 0) == -1) {
92            ndbout << "Cannot become daemon: " << NdbDaemon_ErrorText << endl;
93            return 1;
94          }
95        }
96
97      #ifndef NDB_WIN32
98        for(pid_t child = fork(); child != 0; child = fork()){
99          /**
(gdb)

#2  0x400382f9 in raise () from /lib/libpthread.so.0
(gdb) l
100          * Parent
101          */
102         catchsigs(true);
103
104         int status = 0;
105         while(waitpid(child, &status, 0) != child);
106         if(WIFEXITED(status)){
107           switch(WEXITSTATUS(status)){
108           case NRT_Default:
109             g_eventLogger.info("Angel shutting down");

#1  0x40037f4a in pthread_kill () from /lib/libpthread.so.0
(gdb) list
110             exit(0);
111             break;
112           case NRT_NoStart_Restart:
113             theConfig->setInitialStart(false);
114             globalData.theRestartFlag = initial_state;
115             break;
116           case NRT_NoStart_InitialStart:
117             theConfig->setInitialStart(true);
118             globalData.theRestartFlag = initial_state;
119             break;
(gdb)
(gdb) bt full
#0  0x40106da1 in kill () from /lib/libc.so.6
No symbol table info available.
#1  0x40037f4a in pthread_kill () from /lib/libpthread.so.0
No symbol table info available.
#2  0x400382f9 in raise () from /lib/libpthread.so.0
No symbol table info available.
#3  0x40106b7a in raise () from /lib/libc.so.6
No symbol table info available.
#4  0x40107d95 in abort () from /lib/libc.so.6
No symbol table info available.
#5  0x081e717f in NdbShutdown (type=136502079, restartType=NRT_Default)
    at Emulator.cpp:233
        restart = 24
        shutting = 0x0
#6  0x081e7a09 in Configuration::fetch_configuration() (this=0x827dbf0)
    at Configuration.cpp:228
        cr = (ConfigRetriever &) @0x827e020: {errorString = {m_chr = 0x0,
    m_len = 81}, latestErrorType = CR_ERROR, _ownNodeId = 0, m_version = 327936,
  m_node_type = 0, m_handle = 0x0}
        p = (ndb_mgm_configuration *) 0x827e050
        iter = {m_sectionNo = 0, m_typeOfSection = 268435456, m_config = {
    m_cfg = @0x827dbf0, m_currentSection = 3221223956}}
#7  0x0809e696 in main (argc=4, argv=0x40204160) at main.cpp:80
        theConfig = (Configuration *) 0x827dbf0
#8  0x400f562d in __libc_start_main () from /lib/libc.so.6

core.8225 located @ ndb.mysql.com:/home/jonathan.

How to repeat:
Start NDBD one after the other quickly.
[27 May 2005 2:22] Stewart Smith
Is expected behaviour for certain types of builds.

Below (code snippet from NdbShutdown() in Emulator.cpp) shows that when we have VM_TRACE enabled, we abort(), otherwise we just exit().

Verified on debug and non-debug builds. only the debug build will abort with core.

#if defined VM_TRACE && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) )
    exitAbort = "aborting";
#else
    exitAbort = "exiting";
#endif
    
    if(type == NST_Watchdog){
      /**
       * Very serious, don't attempt to free, just die!!
       */
      ndbout << "Watchdog shutdown completed - " << exitAbort << endl;
#if defined VM_TRACE && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) )
      signal(6, SIG_DFL);
      abort();
#else
      exit(-1);
#endif
    }