Bug #57230 STOP SLAVE hangs if executed in parallel with a group of events
Submitted: 4 Oct 2010 21:46 Modified: 8 Oct 2010 14:59
Reporter: Elena Stepanova Email Updates:
Status: Won't fix Impact on me:
None 
Category:MySQL Server: Replication Severity:S3 (Non-critical)
Version:5.1 OS:Any
Assigned to: Daogang Qu CPU Architecture:Any

[4 Oct 2010 21:46] Elena Stepanova
Description:
If slave is stopped while SQL thread is executing a long group of events, STOP SLAVE command hangs for what seems to be forever.

Could not reproduce on 5.5 -- the same test case results in a different error, see bug#57225

All threads bt:

Using host libthread_db library "/lib64/libthread_db.so.1".
[Thread debugging using libthread_db enabled]
[New Thread 47276647829696 (LWP 2332)]
[New Thread 1083464000 (LWP 2353)]
[New Thread 1082665280 (LWP 2350)]
[New Thread 1082399040 (LWP 2335)]
0x00002aff738f66e2 in select () from /lib64/libc.so.6

Thread 4 (Thread 1082399040 (LWP 2335)):
#0  0x00002aff731cb8e8 in do_sigwait () from /lib64/libpthread.so.0
#1  0x00002aff731cb98d in sigwait () from /lib64/libpthread.so.0
#2  0x00000000005d61d9 in signal_hand (arg=<value optimized out>) at mysqld.cc:2811
#3  0x00002aff731c4143 in start_thread () from /lib64/libpthread.so.0
#4  0x00002aff738fc8cd in clone () from /lib64/libc.so.6
#5  0x0000000000000000 in ?? ()

Thread 3 (Thread 1082665280 (LWP 2350)):
#0  0x00002aff731c83b7 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1  0x00000000008e316c in safe_cond_timedwait (cond=0xd18a20, mp=0xd18988, abstime=0xffffffffffffff92, file=0xa381d3 "slave.cc", line=535)
    at thr_mutex.c:275
#2  0x00000000007350cb in terminate_slave_thread (thd=0xd56640, term_lock=0xd18988, term_cond=0xd18a20, slave_running=0xd19198,
    skip_lock=true) at slave.cc:535
#3  0x00000000007352d5 in terminate_slave_threads (mi=0xd15eb0, thread_mask=3, skip_lock=176) at slave.cc:426
#4  0x0000000000740e31 in stop_slave (thd=0xd21900, mi=0xd15eb0, net_report=true) at sql_repl.cc:968
#5  0x00000000005ee6e9 in mysql_execute_command (thd=0xd21900) at sql_parse.cc:2887
#6  0x00000000005ef2c0 in mysql_parse (thd=0xd21900, rawbuf=0xd34c60 "STOP SLAVE", length=10, found_semicolon=0x40881e50)
    at sql_parse.cc:6051
#7  0x00000000005f05b1 in dispatch_command (command=COM_QUERY, thd=0xd21900, packet=0xd2cc31 "STOP SLAVE",
    packet_length=<value optimized out>) at sql_parse.cc:1260
#8  0x00000000005f15b2 in do_command (thd=0xd21900) at sql_parse.cc:888
#9  0x00000000005e0713 in handle_one_connection (arg=<value optimized out>) at sql_connect.cc:1136
#10 0x00002aff731c4143 in start_thread () from /lib64/libpthread.so.0
#11 0x00002aff738fc8cd in clone () from /lib64/libc.so.6
#12 0x0000000000000000 in ?? ()

Thread 2 (Thread 1083464000 (LWP 2353)):
#0  0x00002aff731c81c6 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1  0x00000000008e327d in safe_cond_wait (cond=0xd17ce0, mp=0xd17738, file=0xa48fc7 "log.cc", line=4916) at thr_mutex.c:237
#2  0x0000000000684ae7 in MYSQL_BIN_LOG::wait_for_update (this=0xd17730, thd=0xd56640, is_slave=<value optimized out>) at log.cc:4916
#3  0x000000000073cdc2 in next_event (rli=0xd172a0) at slave.cc:4269
#4  0x000000000073d364 in exec_relay_log_event (thd=0xd56640, rli=0xd172a0) at slave.cc:2260
#5  0x000000000073e2a3 in handle_slave_sql (arg=<value optimized out>) at slave.cc:3077
#6  0x00002aff731c4143 in start_thread () from /lib64/libpthread.so.0
#7  0x00002aff738fc8cd in clone () from /lib64/libc.so.6
#8  0x0000000000000000 in ?? ()

Thread 1 (Thread 47276647829696 (LWP 2332)):
#0  0x00002aff738f66e2 in select () from /lib64/libc.so.6
#1  0x00000000005dad5e in handle_connections_sockets (arg=<value optimized out>) at mysqld.cc:5108
#2  0x00000000005dd056 in main (argc=<value optimized out>, argv=<value optimized out>) at mysqld.cc:4592
#0  0x00002aff738f66e2 in select () from /lib64/libc.so.6

How to repeat:
--source include/master-slave.inc
--source include/have_binlog_format_row.inc

--perl

open(O_FILE, ">> $ENV{MYSQLTEST_VARDIR}/tb.txt") or die("can't open
\"$ENV{MYSQLTEST_VARDIR}/tb.txt\": $!");

for my $count ( 1..100000 )
{
        my $num = int(rand(9000000000))+1000000000;
        my $str = 'Test name';
        my $year = 1900 + ((localtime)[5]) - int(rand(100));
        print O_FILE ("$num\t$str\t$year\n");
}
close(O_FILE);

EOF

--disconnect master1
--disconnect slave1

--connection master

CREATE TABLE t_celosia_ddl_no_partitions (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
                user_num BIGINT UNSIGNED,
                name TINYTEXT NOT NULL,
                year YEAR,
                modified TIME DEFAULT NULL,
                KEY user_num (user_num),
                INDEX name (name(255))) ENGINE = MyISAM;

CREATE TABLE t_celosia_log (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
                table_name VARCHAR(32),
                update_time DATETIME NOT NULL,
                operation VARCHAR(32),
                connection INT,
                KEY update_time (update_time)) ENGINE = MyISAM;

CREATE TRIGGER ins_log_t_celosia_ddl_no_partitions AFTER INSERT ON
t_celosia_ddl_no_partitions FOR EACH ROW INSERT INTO t_celosia_log VALUES
( NULL, 't_celosia_ddl_no_partitions', NOW(), CONCAT('INSERT ' ,new.id), CONNECTION_ID()
);

--sync_slave_with_master

--connection master
eval LOAD DATA LOCAL INFILE '$MYSQLTEST_VARDIR/tb.txt'
INTO TABLE t_celosia_ddl_no_partitions (user_num, name, year);

--connection slave
let $show_statement= SHOW PROCESSLIST;
let $field= State;
let $condition= = 'Reading event from the relay log';
--source include/wait_show_condition.inc
--echo # Stopping slave...
STOP SLAVE;

# End of test case
[8 Oct 2010 15:00] Omer Barnir
Problem addressed in 5.5