Bug #43753 hang with pool-of-threads during sysbench
Submitted: 19 Mar 2009 16:50 Modified: 27 Apr 2009 12:29
Reporter: Mark Callaghan Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server Severity:S3 (Non-critical)
Version:6.0.9 OS:Linux
Assigned to: Magne Mæhre CPU Architecture:Any
Tags: hang, pool-of-threads

[19 Mar 2009 16:50] Mark Callaghan
Description:
I use the term 'hang' loosely as I can connect to MySQL and run commands. But sysbench gets stuck waiting for query responses. The hang is intermittent.

This is for:
* mysql 6.0.9 alpha
* linux 2.6
* my.cnf has thread_pool_size=20, thread_handling=pool-of-threads
* run sysbench readonly for 16 concurrent users
* server has 16 cores
* sysbench and mysqld run on the same server

mysql> show processlist;
+----+------+-------------------+------+---------+------+-------+------------------+
| Id | User | Host              | db   | Command | Time | State | Info             |
+----+------+-------------------+------+---------+------+-------+------------------+
| 10 | root | 10.13.154.1:54152 | test | Sleep   |  441 | NULL  | NULL             | 
| 11 | root | 10.13.154.1:54153 | test | Sleep   |  441 | NULL  | NULL             | 
| 12 | root | 10.13.154.1:54154 | test | Sleep   |  441 | NULL  | NULL             | 
| 13 | root | 10.13.154.1:54155 | test | Sleep   |  486 | NULL  | NULL             | 
| 14 | root | 10.13.154.1:54156 | test | Sleep   |  441 | NULL  | NULL             | 
| 15 | root | 10.13.154.1:54157 | test | Sleep   |  441 | NULL  | NULL             | 
| 16 | root | 10.13.154.1:54158 | test | Sleep   |  441 | NULL  | NULL             | 
| 17 | root | 10.13.154.1:54159 | test | Sleep   |  441 | NULL  | NULL             | 
| 18 | root | 10.13.154.1:54160 | test | Sleep   |  441 | NULL  | NULL             | 
| 19 | root | 10.13.154.1:54161 | test | Sleep   |  441 | NULL  | NULL             | 
| 20 | root | 10.13.154.1:54162 | test | Sleep   |  441 | NULL  | NULL             | 
| 21 | root | 10.13.154.1:54163 | test | Sleep   |  453 | NULL  | NULL             | 
| 22 | root | 10.13.154.1:54164 | test | Sleep   |  441 | NULL  | NULL             | 
| 23 | root | 10.13.154.1:54165 | test | Sleep   |  441 | NULL  | NULL             | 
| 24 | root | 10.13.154.1:54166 | test | Sleep   |  441 | NULL  | NULL             | 
| 25 | root | 10.13.154.1:54167 | test | Sleep   |  441 | NULL  | NULL             | 
| 29 | root | 10.13.154.1:52895 | NULL | Query   |    0 | NULL  | show processlist | 
+----+------+-------------------+------+---------+------+-------+------------------+

Flattened gdb thread stacks for mysqld:
30,pthread_cond_wait@@GLIBC_2.3.2,os_event_wait_low,os_aio_simulated_handle,fil_aio_wait,io_handler_thread,start_thread,clone,??
29,pthread_cond_wait@@GLIBC_2.3.2,os_event_wait_low,os_aio_simulated_handle,fil_aio_wait,io_handler_thread,start_thread,clone,??
28,pthread_cond_wait@@GLIBC_2.3.2,os_event_wait_low,os_aio_simulated_handle,fil_aio_wait,io_handler_thread,start_thread,clone,??
27,pthread_cond_wait@@GLIBC_2.3.2,os_event_wait_low,os_aio_simulated_handle,fil_aio_wait,io_handler_thread,start_thread,clone,??
26,select,os_thread_sleep,srv_lock_timeout_and_monitor_thread,start_thread,clone,??
25,select,os_thread_sleep,srv_error_monitor_thread,start_thread,clone,??
24,pthread_cond_wait@@GLIBC_2.3.2,os_event_wait_low,srv_master_thread,start_thread,clone,??
23,pthread_cond_timedwait@@GLIBC_2.3.2,ma_checkpoint_background,start_thread,clone,??
22,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
21,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
20,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
19,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
18,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
17,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
16,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
15,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
14,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
13,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
12,syscall,epoll_wait,epoll_dispatch,event_base_loop,libevent_thread_proc,start_thread,clone,??
11,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
10,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
9,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
8,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
7,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
6,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
5,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
4,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
3,__lll_mutex_lock_wait,_L_mutex_lock_214,pthread_mutex_lock,libevent_thread_proc,start_thread,clone,??
2,do_sigwait,sigwait,signal_hand,start_thread,clone,??
1,select,handle_connections_sockets,main,select

How to repeat:
see above

Suggested fix:
see above
[20 Mar 2009 7:12] Sveta Smirnova
Thank you for the report.

This is duplicate of bug #34399
[20 Mar 2009 13:17] Mark Callaghan
In the referenced bug, connections cannot be made to mysqld during the hang. In my case they can and I have provided SHOW PROCESSLIST output. I don't think this is the same symptom -- although it may be the same problem.
[24 Mar 2009 10:39] Sveta Smirnova
Thank you for the feedback.

I re-tested on Linux and could not repeat. I think will leave it in duplicate status until bug #34399 is fixed and would be good if you could check after it fixed.
[20 Apr 2009 11:06] Magne Mæhre
This bug is probably not related to #34399 as only "read-only" transactions are used.  34399 is due to a combination of having fewer threads than active sessions, multi-statement transactions, and need of exclusive lock on rows.
[26 Apr 2009 13:36] Mark Callaghan
More background on this:
* it happened once out of 10+ runs, but I haven't used it much since then
* 6.0 uses an old version of libevent
* data set used is sysbench with 2M rows

But if you can't reproduce this then you should close it for now and we can refer to it in the future if it shows up again.
[27 Apr 2009 12:29] Magne Mæhre
I've done a long series of test runs over the weekend without hitting this one, so I close it for now..  (as suggested by the reporter)