Bug #37223 PURGE MASTER LOGS command hangs in cluster replication
Submitted: 5 Jun 2008 14:18 Modified: 28 Oct 2009 6:48
Reporter: Matthew Montgomery Email Updates:
Status: Can't repeat Impact on me:
None 
Category:MySQL Server: Replication Severity:S1 (Critical)
Version:mysql-5.1.23-telco-6.2.12 OS:Any
Assigned to: Daogang Qu CPU Architecture:Any

[5 Jun 2008 14:18] Matthew Montgomery
Description:
From CORE produced by killing hung server

Thread 6 (process 2881):
#0 0x00002aec88427478 in __lll_mutex_lock_wait () from ./lib/libpthread.so.0
#1 0x00002aec88423792 in _L_mutex_lock_51 () from ./lib/libpthread.so.0
#2 0x00002aec884230f3 in pthread_mutex_lock () from ./lib/libpthread.so.0
#3 0x00000000006858f6 in MYSQL_BIN_LOG::write ()
#4 0x00000000005db3ac in THD::binlog_query ()
#5 0x000000000067b970 in mysql_delete ()
#6 0x00000000005fdc26 in mysql_execute_command ()
#7 0x0000000000600ab2 in mysql_parse ()
#8 0x0000000000773667 in run_query ()
#9 0x000000000077411f in ndbcluster_binlog_func ()
#10 0x00000000006d018e in ha_binlog_index_purge_file ()
#11 0x00000000006832d5 in MYSQL_BIN_LOG::purge_logs ()
#12 0x0000000000714e53 in purge_master_logs ()
#13 0x00000000005fbc30 in mysql_execute_command ()
#14 0x0000000000600ab2 in mysql_parse ()
#15 0x0000000000600f16 in dispatch_command ()
#16 0x0000000000602116 in do_command ()
#17 0x00000000005f374c in handle_one_connection ()
#18 0x00002aec88421027 in start_thread () from ./lib/libpthread.so.0
#19 0x00002aec88eaa12e in clone () from ./lib/libc.so.6
#20 0x0000000000000000 in ?? ()

purge_logs calls
ndbcluster_binlog_index_purge_file
which does the following:

static int
ndbcluster_binlog_index_purge_file(THD *thd, const char *file)
{
if (!ndb_binlog_running || thd->slave_thread)
return 0;

DBUG_ENTER("ndbcluster_binlog_index_purge_file");
DBUG_PRINT("enter", ("file: %s", file));

char buf[1024];
char *end= strmov(strmov(strmov(buf,
"DELETE FROM "
NDB_REP_DB "." NDB_REP_TABLE
" WHERE File='"), file), "'");

run_query(thd, buf, end, NULL, TRUE);

DBUG_RETURN(0);
}
and run_query sets thd->options&= ~OPTION_BIN_LOG;
In
bool MYSQL_BIN_LOG::write(Log_event *event_info)

pthread_mutex_lock(&LOCK_log);

...
if ((thd && !(thd->options & OPTION_BIN_LOG)) ||

How to repeat:
SESSION 1:
mysql> SHOW GLOBAL VARIABLES LIKE '%binlog%';
+-------------------------------------+----------------------+
| Variable_name | Value |
+-------------------------------------+----------------------+
| binlog_cache_size | 32768 |
| binlog_format | MIXED |
| innodb_locks_unsafe_for_binlog | OFF |
| max_binlog_cache_size | 18446744073709551615 |
| max_binlog_size | 1073741824 |
| ndb_report_thresh_binlog_epoch_slip | 3 |
| ndb_report_thresh_binlog_mem_usage | 10 |
| sync_binlog | 0 |
+-------------------------------------+----------------------+
8 rows in set (0.00 sec)

mysql> show binary logs;
+------------------+------------+
| Log_name | File_size |
+------------------+------------+
| mysql-bin.000032 | 66970751 |
| mysql-bin.000033 | 370224704 |
| mysql-bin.000034 | 738592973 |
| mysql-bin.000035 | 1073748988 |
| mysql-bin.000036 | 1073743181 |
| mysql-bin.000037 | 76879440 |
| mysql-bin.000038 | 912070932 |
| mysql-bin.000039 | 885985162 |
| mysql-bin.000040 | 3347467 |
| mysql-bin.000041 | 185 |
| mysql-bin.000042 | 125 |
| mysql-bin.000043 | 185 |
| mysql-bin.000044 | 67551160 |
| mysql-bin.000045 | 538879322 |
| mysql-bin.000046 | 347287455 |
| mysql-bin.000047 | 185 |
| mysql-bin.000048 | 1074465498 |
| mysql-bin.000049 | 1074237401 |
| mysql-bin.000050 | 1074004320 |
| mysql-bin.000051 | 1074264678 |
| mysql-bin.000052 | 446305687 |
| mysql-bin.000053 | 48991212 |
| mysql-bin.000054 | 577237803 |
| mysql-bin.000055 | 53595262 |
+------------------+------------+
24 rows in set (0.00 sec)

mysql> purge master logs to 'mysql-bin.000054';
Hang

SESSION 2:
mysql> show binary logs;
Hang

SESSION 3:
mysql> show processlist;
+--------+-------------+-------------------+------+---------+--------+----------------------------------+------------------------------------------------------------------------------------------------------+
| Id | User | Host | db | Command | Time | State | Info |
+--------+-------------+-------------------+------+---------+--------+----------------------------------+------------------------------------------------------------------------------------------------------+
| 1 | system user | | | Daemon | 106 | Processing events | NULL |
| 203485 | system user | | NULL | Connect | 101316 | Waiting for master to send event | NULL |
| 203486 | system user | | RIP2 | Connect | 83 | update | INSERT INTO RIP2.RIP2_USER_ADDRESS(IDUSER, ADDRESS1, ADDRESS2, TOWN, MAIL) VALUES(<...>)|
| 666447 | root | localhost | RIP2 | Query | 42 | NULL | show binary logs |
| 666881 | root | localhost:53523 | NULL | Query | 106 | NULL | SHOW MASTER LOGS |
| 668016 | root | localhost | RIP2 | Query | 0 | NULL | show processlist |
| 668053 | <user> | <IP>:28011 | RIP2 | Sleep | 1 | | NULL |
+--------+-------------+-------------------+------+---------+--------+----------------------------------+------------------------------------------------------------------------------------------------------+
7 rows in set (0.00 sec)

SESSION 1:
mysql> purge master logs to 'mysql-bin.000054';
Query aborted by Ctrl+C

Suggested fix:
It looks like a mutex is taken before OPTION_BIN_LOG is checked.
In this case the delete done by ndbcluster_binlog_func should
not be logged, but it still seems to try and acquire the mutex.

Something is not right here.

Moving the
if ((thd && !(thd->options & OPTION_BIN_LOG))
check before mutex is taken would solve the problem.
[16 Apr 2009 12:59] Jonathan Miller
Left for replication team to Triage
[22 Oct 2009 3:35] Daogang Qu
The bug can't be repeated. Please review the following test:

mysql> SHOW GLOBAL VARIABLES LIKE '%binlog%';
+-------------------------------------+------------+
| Variable_name                       | Value      |
+-------------------------------------+------------+
| binlog_cache_size                   | 32768      |
| binlog_format                       | ROW        |
| max_binlog_cache_size               | 4294963200 |
| max_binlog_size                     | 1073741824 |
| ndb_log_binlog_index                | ON         |
| ndb_report_thresh_binlog_epoch_slip | 3          |
| ndb_report_thresh_binlog_mem_usage  | 10         |
| sync_binlog                         | 0          |
+-------------------------------------+------------+
8 rows in set (0.00 sec)

......

mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000002 |      1452 |
| master-bin.000003 |       584 |
| master-bin.000004 |       150 |
| master-bin.000005 |       801 |
| master-bin.000006 |       367 |
| master-bin.000007 |       367 |
| master-bin.000008 |       367 |
| master-bin.000009 |       367 |
| master-bin.000010 |       106 |
+-------------------+-----------+
9 rows in set (0.00 sec)

mysql> INSERT INTO t1 SET a = 2;
Query OK, 1 row affected (0.00 sec)

mysql> flush logs;
Query OK, 0 rows affected (0.01 sec)

mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000002 |      1452 |
| master-bin.000003 |       584 |
| master-bin.000004 |       150 |
| master-bin.000005 |       801 |
| master-bin.000006 |       367 |
| master-bin.000007 |       367 |
| master-bin.000008 |       367 |
| master-bin.000009 |       367 |
| master-bin.000010 |       367 |
| master-bin.000011 |       106 |
+-------------------+-----------+
10 rows in set (0.00 sec)

mysql> purge master logs to 'master-bin.000010';
Query OK, 0 rows affected (0.00 sec)

mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000010 |       367 |
| master-bin.000011 |       106 |
+-------------------+-----------+
2 rows in set (0.00 sec)

mysql>
mysql> 
mysql> purge master logs to 'master-bin.000011';
Query OK, 0 rows affected (0.00 sec)

mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000011 |       106 |
+-------------------+-----------+
1 row in set (0.00 sec)

mysql>
mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000011 |       106 |
| master-bin.000012 |       265 |
| master-bin.000013 |       265 |
| master-bin.000014 |       265 |
| master-bin.000015 |       265 |
| master-bin.000016 |       265 |
| master-bin.000017 |       265 |
| master-bin.000018 |       265 |
| master-bin.000019 |       265 |
| master-bin.000020 |       265 |
| master-bin.000021 |       265 |
| master-bin.000022 |       265 |
| master-bin.000023 |       265 |
| master-bin.000024 |       265 |
| master-bin.000025 |       265 |
| master-bin.000026 |       265 |
| master-bin.000027 |       106 |
+-------------------+-----------+
17 rows in set (0.00 sec)

mysql> 
mysql> 
mysql> 
mysql> purge master logs to 'master-bin.000022';
Query OK, 0 rows affected (0.00 sec)

mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000022 |       265 |
| master-bin.000023 |       265 |
| master-bin.000024 |       265 |
| master-bin.000025 |       265 |
| master-bin.000026 |       265 |
| master-bin.000027 |       106 |
+-------------------+-----------+
6 rows in set (0.00 sec)

mysql> purge master logs to 'master-bin.000026';
Query OK, 0 rows affected (0.01 sec)

mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000026 |       265 |
| master-bin.000027 |       106 |
+-------------------+-----------+
2 rows in set (0.00 sec)

mysql> purge master logs to 'master-bin.000027';
Query OK, 0 rows affected (0.01 sec)

mysql> show binary logs;
+-------------------+-----------+
| Log_name          | File_size |
+-------------------+-----------+
| master-bin.000027 |       106 |
+-------------------+-----------+
1 row in set (0.00 sec)

mysql>
[22 Oct 2009 6:06] Daogang Qu
The above test was done on mysql-5.1-telco-6.2 and mysql-5.1-bugteam.
[28 Oct 2009 6:43] Daogang Qu
The bug still can't be reproduced on newest version of mysql-5.1-telco-6.2 and mysql-5.1-bugteam base on very large binlogs. Every binlog eats about 1.3G in test result as followings:

show binary logs;
Log_name        File_size
master-bin.000031       1350150
master-bin.000032       1350150
master-bin.000033       1350150
master-bin.000034       1350150
master-bin.000035       1350150
master-bin.000036       1350150
master-bin.000037       1350150
master-bin.000038       1350150
master-bin.000039       1350150
master-bin.000040       1350150
master-bin.000041       1350150
master-bin.000042       1350150
master-bin.000043       1350150
master-bin.000044       1350150
master-bin.000045       1350150
master-bin.000046       1350150
master-bin.000047       1350150
master-bin.000048       1350150
master-bin.000049       1350150
master-bin.000050       1350150
master-bin.000051       1350150
master-bin.000052       1350150
master-bin.000053       1350150
master-bin.000054       1350150
master-bin.000055       106
purge master logs to 'master-bin.000054';
show binary logs;
Log_name        File_size
master-bin.000054       1350150
master-bin.000055       106

The above test result is produced by the test case below.

Test case:
source include/master-slave.inc;
source include/have_binlog_format_statement.inc;

CREATE TABLE t1( a TEXT, b TEXT );

--disable_query_log
#let $count=10000;

let $topcount=30;
while ($topcount)
{
  let $count=1;
  while ($count)
  {
    eval INSERT INTO t1 (a,b) VALUES( REPEAT( 'a', 6 ), REPEAT( 'b', 6 ));
    dec $count;
  }
  flush logs;
  dec $topcount;
}

show binary logs;
purge master logs to 'master-bin.000031';
show binary logs;

let $topcount=24;
while ($topcount)
{
  let $count=10000;
  while ($count)
  {
    eval INSERT INTO t1 (a,b) VALUES( REPEAT( 'a', 50000 ), REPEAT( 'b', 50000 ));
    dec $count;
  }
  flush logs;
  dec $topcount;
}

--enable_query_log

show binary logs;

purge master logs to 'master-bin.000054';

show binary logs;

drop table t1;
sync_slave_with_master;
[28 Oct 2009 6:48] Daogang Qu
Hi Matthew,
If you think the above binlog's size is not enough large to reproduce the problem, Please update the above test case for generating more large binlogs. If the problem can be reproduced base on the updated test case. Please append it. Thanks!