MySQL Bugs: #51054: mysqld segfault after binlog enabled

Bug #51054	mysqld segfault after binlog enabled
Submitted:	10 Feb 2010 10:15	Modified:	9 Sep 2010 17:06
Reporter:	Pascal Schmiel	Email Updates:
Status:	No Feedback	Impact on me:	None
Category:	MySQL Server: Replication	Severity:	S1 (Critical)
Version:	5.1.37, 5.5.x, next-mr-merge	OS:	Linux (RHEL5)
Assigned to:	Assigned Account	CPU Architecture:	Any

Description:
We're running a high-traffic Webservice. There were no problems in the past but since we have enabled the binlog and changed some binlog related settings mysqld is segfaulting frequently.
The new settings are:

log-bin         = /var/lib/mysql/binlog/mysql-bin
server-id       = 10
sync_binlog     = 1
log_slave_updates
innodb_flush_log_at_trx_commit  = 1
binlog-format = MIXED
max_binlog_size = 500M
expire_logs_days        = 10
innodb_support_xa                       = 1
innodb_locks_unsafe_for_binlog          = 0

Here are some informations about the system:

- Linux db 2.6.18-164.11.1.el5 #1 SMP Wed Jan 6 13:26:04 EST 2010 x86_64 x86_64 x86_64 GNU/Linux

- Used filesystem for DB is ext3 with "noatime" mount option. Binary logs go to seperate ext3 on RAID1

Here is the mysqld.log output:

100210  3:02:48 - mysqld got signal 11 ;                                                                                                                                                                  
blabla                                                                                                                                                                                       

key_buffer_size=536870912
read_buffer_size=131072  
max_used_connections=450 
max_threads=500          
threads_connected=150    
It is possible that mysqld could use up to 
key_buffer_size + (read_buffer_size + sort_buffer_size)*max_threads = 1617373 K
bytes of memory                                                                
Hope that's ok; if not, decrease some variables in the equation.               

thd: 0x2ab1ccd545d0
Attempting backtrace. You can use the following information to find out
where mysqld died. If you see no messages after this, something went   
terribly wrong...                                                      
stack_bottom = 0x62cc7f20 thread_stack 0x40000                         
/usr/sbin/mysqld(my_print_stacktrace+0x2e)[0x8496fe]                   
/usr/sbin/mysqld(handle_segfault+0x322)[0x5be9e2]                      
/lib64/libpthread.so.0[0x356f80e930]                                   
/usr/sbin/mysqld(lock_sec_rec_cons_read_sees+0x42)[0x7805b2]           
/usr/sbin/mysqld(row_search_for_mysql+0x16a5)[0x7b7255]                
/usr/sbin/mysqld(_ZN11ha_innobase13general_fetchEPhjj+0x79)[0x72f539]  
/usr/sbin/mysqld(_ZN7handler21read_multi_range_nextEPP18st_key_multi_range+0x29)[0x6958f9]
/usr/sbin/mysqld(_ZN18QUICK_RANGE_SELECT8get_nextEv+0x167)[0x682567]                      
/usr/sbin/mysqld(_ZN26QUICK_ROR_INTERSECT_SELECT8get_nextEv+0x29)[0x682289]               
/usr/sbin/mysqld[0x691d01]                                                                
/usr/sbin/mysqld(_Z10sub_selectP4JOINP13st_join_tableb+0x56)[0x622446]                    
/usr/sbin/mysqld[0x628f50]                                                                
/usr/sbin/mysqld(_ZN4JOIN4execEv+0x90c)[0x63505c]                                         
/usr/sbin/mysqld(_Z12mysql_selectP3THDPPP4ItemP10TABLE_LISTjR4ListIS1_ES2_jP8st_orderSB_S2_SB_yP13select_resultP18st_select_lex_unitP13st_select_lex+0x16e)[0x636aae]
/usr/sbin/mysqld(_Z13handle_selectP3THDP6st_lexP13select_resultm+0x169)[0x6373c9]                                                                                    
/usr/sbin/mysqld[0x5c95d4]                                                                                                                                           
/usr/sbin/mysqld(_Z21mysql_execute_commandP3THD+0x4c1)[0x5cbc81]                                                                                                     
/usr/sbin/mysqld(_Z11mysql_parseP3THDPKcjPS2_+0x1f1)[0x5d1161]                                                                                                       
/usr/sbin/mysqld(_Z16dispatch_command19enum_server_commandP3THDPcj+0xfd0)[0x5d2150]                                                                                  
/usr/sbin/mysqld(_Z10do_commandP3THD+0xe4)[0x5d26e4]                                                                                                                 
/usr/sbin/mysqld(handle_one_connection+0x6f0)[0x5c5930]                                                                                                              
/lib64/libpthread.so.0[0x356f806617]                                                                                                                                 
/lib64/libc.so.6(clone+0x6d)[0x356f0d3c2d]                                                                                                                           
Trying to get some variables.                                                                                                                                        
Some pointers may be invalid and cause the dump to abort...                                                                                                          
thd->query at 0x3ad87d50 = select count(this_.id) as y0_ from im_contact this_ inner join im_profile profiledto1_ on this_.friend=profiledto1_.userId where this_.user=4040934 and this_.status=1 and this_.friend in (select s_.uid as y0_ from session_log s_ where s_.`validto`>'2010-02-10 03:02:47.0') and profiledto1_.memberStatus<>0                                                                                            
thd->thread_id=430893                                                                                                                                                                                               
thd->killed=NOT_KILLED                                                                                                                                                                                              
The manual page at http://dev.mysql.com/doc/mysql/en/crashing.html contains                                                                                                                                         
information that should help you find out what is causing the crash.                                                                                                                                                
mysqld: my_new.cc:51: int __cxa_pure_virtual(): Assertion `! "Aborted: pure virtual method called."' failed.                                                                                                        
mysqld: my_new.cc:51: int __cxa_pure_virtual(): Assertion `! "Aborted: pure virtual method called."' failed.                                                                                                        
Fatal signal 6 while backtracing                                                                                                                                                                                    
100210 03:02:51 mysqld_safe Number of processes running now: 0                                                                                                                                                      
100210 03:02:51 mysqld_safe mysqld restarted                                                                                                                                                                        

In addition to the segfaulting we have much more deadlocks since the new settings are enabled, but i don't think it's related to the segfault.

How to repeat:
N/A

I am not sure how crash while execuing SELECT query can be related to binlog being enabled... Anyway, please, try to repeat with a newer version, 5.1.43. In case of the same problem, please, send your my.cnf file content and EXPLAIN results for that crashing SELECT.

I'm not sure if the SELECT query is the cause of the crash because it is executed about 20000 times / hour.
nevertheless i will try an update asap.

this looks alot like bug #39022 to me!

we can now reproduce the bug. it occures when executing this query:

DELETE FROM users.session_log WHERE (validto IS NULL or validto < '2010-02-11 00:00:00' ) AND timestamp not like '2010-02-11%;

it is not reproducable in our development-system, i think because there are no deadlocks.

Thank you for the feedback.

As this really looks like bug #39022 which is fixed in 5.1.45 please upgrade to current version 5.1.47 and inform us if problem still exists.

No feedback was provided for this bug for over a month, so it is
being suspended automatically. If you are able to provide the
information that was originally requested, please do so and change
the status of the bug back to "Open".

Re-opening the bug as something similar is happening in pb.

Please, take a look at http://tinyurl.com/2eaojup

Alfranio,

thank you for the feedback. Set to "Verified" as we have this crash repeated and documented on our boxes.

HOW TO REPEAT
-------------
. Get mysql-next-mr.crash-safe.

. run:

#!/bin/bash

for i in `seq 1 300`; do
  ./mtr --parallel=auto --mem rpl_mixed_crash_safe rpl_stm_crash_safe rpl_row_crash_safe 2>&1 | tee output.log
  res=`grep fail output.log`
  if [ "!$res" != "!" ]; then
    exit $?
  fi
done

The test cases crash the slave periodically and verify if the safely recovery.
Sporadically, after a crash, there will be:

mysqld: /home/acorreia/workspace.sun/repository.mysql.new/bzrwork/wl-2775/mysql-next-mr-rpl-merge.crash-safe.2775/mysys/my_new.cc:51: int __cxa_pure_virtual(): Assertion `! "Aborted: pure virtual method called."' failed.
100830  4:56:00 [Note] Plugin 'FEDERATED' is disabled.
InnoDB: The InnoDB memory heap is disabled
InnoDB: Mutexes and rw_locks use GCC atomic builtins
InnoDB: Compressed tables use zlib 1.2.3.3
100830  4:56:00  InnoDB: highest supported file format is Barracuda.
InnoDB: The log sequence number in ibdata files does not match
InnoDB: the log sequence number in the ib_logfiles!
100830  4:56:00  InnoDB: Database was not shut down normally!
InnoDB: Starting crash recovery.
InnoDB: Reading tablespace information from the .ibd files...
InnoDB: Restoring possible half-written data pages from the doublewrite
InnoDB: buffer...
InnoDB: 1 transaction(s) which must be rolled back or cleaned up
InnoDB: in total 4 row operations to undo
InnoDB: Trx id counter is 2900
InnoDB: Last MySQL binlog file position 0 636, file name ./slave-bin.000018
100830  4:56:00 InnoDB 1.2.0 started; log sequence number 1703595
100830  4:56:00 [Note] Recovering after a crash using slave-bin
100830  4:56:00 [Note] Starting crash recovery...
100830  4:56:00 [Note] Crash recovery finished.
InnoDB: Starting in background the rollback of uncommitted transactions
100830  4:56:00  InnoDB: Rolling back trx with id 2713, 4 rows to undo

Hi all,

After some tests, I've figured out that the new failure is not related to Innodb. We have the following stacktrace:

/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(__cxa_pure_virtual+0x6f)[0xa3d523]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z23get_cached_table_accessP22st_grant_internal_infoPKcS2_+0x73)[0x54416d]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z11check_grantP3THDmP10TABLE_LISTbjb+0x18c)[0x546dcc]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z18check_table_accessP3THDmP10TABLE_LISTbjb+0x238)[0x5abc78]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z21mysql_execute_commandP3THD+0x740)[0x5ad8be]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z11mysql_parseP3THDPcjP12Parser_state+0x1c9)[0x5b4dad]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z16dispatch_command19enum_server_commandP3THDPcj+0x9cf)[0x5b620b]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z10do_commandP3THD+0x227)[0x5b7603]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(_Z24do_handle_one_connectionP3THD+0x135)[0x686553]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(handle_one_connection+0x2d)[0x686619]
/export/home2/tmp/ac230274/mysql-5.6.99-m5/sql/mysqld(pfs_spawn_thread+0x92)[0x9f5c2e]
/lib64/libpthread.so.0[0x33d20062f7]
/lib64/libc.so.6(clone+0x6d)[0x33d14d1b6d]
Fatal signal 11 while backtracing 

The problem stems from the fact that destructor of both ACL_internal_schema_access and ACL_internal_table_access are called while exiting the server.

Specifically, the test case explicitly crashes the server by calling abort() that calls

  handle_segfault() --> exit(1)

While processing exit(1), associated handlers/callbacks are executed and destroy
the ACL_internal_schema_access and ACL_internal_table_access. Meanwhile, another
thread is able to start processing a statement and fails because the objects
ACL_internal_schema_access and ACL_internal_table_access do not exist.

I've made the following changes to the code to get the stacktrace:

int __cxa_pure_virtual()
{
DBUG_ENTER("__cxa_pure_virtual");
fprintf(stderr,"Aborted: pure virtual method called.\n");
fflush(stderr);
void *addrs[128];
char **strings= NULL;
int n= backtrace(addrs, array_elements(addrs));
strings= backtrace_symbols(addrs, n);
backtrace_symbols_fd(addrs, n, fileno(stderr));
fflush(stderr);
DBUG_RETURN(1);
}

mysql-test/suite/rpl/t/rpl_*_crash_safe-slave.opt
--skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --relay-log-recovery=1