Bug #71433 recreate+analyze OPTIMIZE TABLE T and online ALTER TABLE T may deadlock
Submitted: 20 Jan 2014 19:10 Modified: 25 Sep 2014 11:49
Reporter: Laurynas Biveinis (OCA) Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server: Locking Severity:S2 (Serious)
Version:5.6 OS:Any
Assigned to: CPU Architecture:Any
Tags: alter, locking, mdl, Optimize, TokuDB

[20 Jan 2014 19:10] Laurynas Biveinis
Description:
Credit shared with Rich Prohaska and Joel Epstein.

If two threads OPTIMIZE and ALTER the same TABLE at the same time, the following deadlock is possible:

1) ALTER TABLE thread has a TL_READ lock on the table, and a MDL_SHARED_UPGRADABLE metadata lock, which it wants to upgrade to MDL_EXCLUSIVE, but cannot, because

2) OPTIMIZE TABLE thread has a MDL_SHARED_WRITE metadata lock. It wants to lock table TL_WRITE but cannot because of TL_READ above.

How to repeat:
The testcase is not of GA quality, but it shows the issue.

=== modified file 'sql/sql_admin.cc'
--- sql/sql_admin.cc	2013-10-30 23:22:43 +0000
+++ sql/sql_admin.cc	2014-01-20 18:27:25 +0000
@@ -800,6 +800,7 @@
       if (!result_code) // recreation went ok
       {
         DEBUG_SYNC(thd, "ha_admin_open_ltable");
+        dbug_stop_in_open_and_lock_tables= true;
         table->mdl_request.set_type(MDL_SHARED_WRITE);
         if (!open_temporary_tables(thd, table) &&
             (table->table= open_n_lock_single_table(thd, table, lock_type, 0)))

=== modified file 'sql/sql_base.cc'
--- sql/sql_base.cc	2013-10-30 23:22:43 +0000
+++ sql/sql_base.cc	2014-01-20 18:29:21 +0000
@@ -5604,6 +5604,8 @@
   @retval TRUE   Error
 */
 
+bool dbug_stop_in_open_and_lock_tables= false;
+
 bool open_and_lock_tables(THD *thd, TABLE_LIST *tables,
                           bool derived, uint flags,
                           Prelocking_strategy *prelocking_strategy)
@@ -5616,6 +5618,9 @@
   if (open_tables(thd, &tables, &counter, flags, prelocking_strategy))
     goto err;
 
+  if (tables && dbug_stop_in_open_and_lock_tables)
+    DEBUG_SYNC(thd, "open_and_lock_tables_after_mdl");
+
   DBUG_EXECUTE_IF("sleep_open_and_lock_after_open", {
                   const char *old_proc_info= thd->proc_info;
                   thd->proc_info= "DBUG sleep";

=== modified file 'sql/sql_base.h'
--- sql/sql_base.h	2013-08-20 09:15:15 +0000
+++ sql/sql_base.h	2014-01-20 18:24:11 +0000
@@ -307,6 +307,8 @@
 extern Field *view_ref_found;
 extern HASH table_def_cache;
 
+extern bool dbug_stop_in_open_and_lock_tables;
+
 /**
   clean/setup table fields and map.
 

--source include/have_innodb.inc
--source include/have_debug_sync.inc
--source include/count_sessions.inc
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;

# ha_admin_try_alter
SET DEBUG_SYNC="ha_admin_open_ltable SIGNAL optimize_ready_1 WAIT_FOR continue_optimize";
SET DEBUG_SYNC="open_and_lock_tables_after_mdl SIGNAL optimize_ready_2 WAIT_FOR finish_optimize";
send OPTIMIZE TABLE t1;

--connect (con1,localhost,root,,)

SET DEBUG_SYNC="now WAIT_FOR optimize_ready_1";
SET DEBUG_SYNC="alter_table_inplace_after_lock_downgrade SIGNAL alter_ready WAIT_FOR finish_alter";
send ALTER TABLE t1 ADD INDEX b(b), ALGORITHM=INPLACE, LOCK=NONE;

--connect (con2,localhost,root,,)

SET DEBUG_SYNC="now WAIT_FOR alter_ready";
SET DEBUG_SYNC="now SIGNAL continue_optimize";
SET DEBUG_SYNC="now WAIT_FOR optimize_ready_2";

SET DEBUG_SYNC="now SIGNAL finish_alter";
SELECT SLEEP(1);
SET DEBUG_SYNC="now SIGNAL finish_optimize";

--disconnect con2

--connection con1
reap;

--disconnect con1
--connection default
reap;
DROP TABLE t1;
--source include/wait_until_count_sessions.inc

Suggested fix:
don't know
[20 Jan 2014 19:12] Laurynas Biveinis
The deadlocked threads on the testcase:

(gdb) bt
#0  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238
#1  0x0000000000aad433 in safe_cond_timedwait (cond=0x7f8a800008c8, mp=0x7f8a7400cd98, abstime=0x7f8a8d9f5fe0, file=0xfe6d30 "/home/laurynas/percona/src/5.6/include/mysql/psi/mysql_thread.h", line=1188)
    at /home/laurynas/percona/src/5.6/mysys/thr_mutex.c:278
#2  0x0000000000aaa8c6 in inline_mysql_cond_timedwait (that=0x7f8a800008c8, mutex=0x7f8a7400cd98, abstime=0x7f8a8d9f5fe0, src_file=0xfe6d70 "/home/laurynas/percona/src/5.6/mysys/thr_lock.c", 
    src_line=462) at /home/laurynas/percona/src/5.6/include/mysql/psi/mysql_thread.h:1188
#3  0x0000000000aaaef9 in wait_for_lock (wait=0x7f8a7400ce28, data=0x7f8a80045448, in_wait_list=0 '\000', lock_wait_timeout=31536000) at /home/laurynas/percona/src/5.6/mysys/thr_lock.c:462
#4  0x0000000000aab70c in thr_lock (data=0x7f8a80045448, owner=0x2978c88, lock_type=TL_WRITE, lock_wait_timeout=31536000) at /home/laurynas/percona/src/5.6/mysys/thr_lock.c:787
#5  0x0000000000aabff9 in thr_multi_lock (data=0x7f8a80052ed0, count=1, owner=0x2978c88, lock_wait_timeout=31536000) at /home/laurynas/percona/src/5.6/mysys/thr_lock.c:1060
#6  0x000000000093d044 in mysql_lock_tables (thd=0x2977dc0, tables=0x7f8a80006170, count=1, flags=0) at /home/laurynas/percona/src/5.6/sql/lock.cc:321
#7  0x0000000000762d84 in lock_tables (thd=0x2977dc0, tables=0x7f8a800050e8, count=1, flags=0) at /home/laurynas/percona/src/5.6/sql/sql_base.cc:5867
#8  0x00000000007625e4 in open_and_lock_tables (thd=0x2977dc0, tables=0x7f8a800050e8, derived=false, flags=0, prelocking_strategy=0x7f8a8d9f6300) at /home/laurynas/percona/src/5.6/sql/sql_base.cc:5630
#9  0x000000000076201e in open_n_lock_single_table (thd=0x2977dc0, table_l=0x7f8a800050e8, lock_type=TL_WRITE, flags=0, prelocking_strategy=0x7f8a8d9f6300)
    at /home/laurynas/percona/src/5.6/sql/sql_base.cc:5468
#10 0x000000000085a0a5 in open_n_lock_single_table (thd=0x2977dc0, table_l=0x7f8a800050e8, lock_type=TL_WRITE, flags=0) at /home/laurynas/percona/src/5.6/sql/sql_base.h:463
#11 0x000000000098a86c in mysql_admin_table(THD *, TABLE_LIST *, HA_CHECK_OPT *, const char *, thr_lock_type, bool, bool, uint, int (*)(THD *, TABLE_LIST *, HA_CHECK_OPT *), struct {...}, int (*)(THD *, TABLE_LIST *)) (thd=0x2977dc0, tables=0x7f8a800050e8, check_opt=0x297ada0, operator_name=0xfab108 "optimize", lock_type=TL_WRITE, open_for_modify=false, repair_table_use_frm=false, extra_open_options=0, 
    prepare_func=0x0, operator_func=(int (handler::*)(handler * const, THD *, HA_CHECK_OPT *)) 0x62f2de <handler::ha_optimize(THD*, st_ha_check_opt*)>, view_operator_func=0x0)
    at /home/laurynas/percona/src/5.6/sql/sql_admin.cc:806
#12 0x000000000098b937 in Sql_cmd_optimize_table::execute (this=0x7f8a80005650, thd=0x2977dc0) at /home/laurynas/percona/src/5.6/sql/sql_admin.cc:1110
#13 0x00000000007d314c in mysql_execute_command (thd=0x2977dc0) at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:4936
#14 0x00000000007d6270 in mysql_parse (thd=0x2977dc0, rawbuf=0x7f8a80005010 "OPTIMIZE TABLE t1", length=17, parser_state=0x7f8a8d9f8e40) at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:6235
#15 0x00000000007c96ea in dispatch_command (command=COM_QUERY, thd=0x2977dc0, packet=0x2a4ddf1 "", packet_length=17) at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:1334
#16 0x00000000007c883a in do_command (thd=0x2977dc0) at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:1036
#17 0x000000000078f4b8 in do_handle_one_connection (thd_arg=0x2977dc0) at /home/laurynas/percona/src/5.6/sql/sql_connect.cc:982
#18 0x000000000078efce in handle_one_connection (arg=0x2977dc0) at /home/laurynas/percona/src/5.6/sql/sql_connect.cc:898
#19 0x0000000000b7e552 in pfs_spawn_thread (arg=0x29c8480) at /home/laurynas/percona/src/5.6/storage/perfschema/pfs.cc:1858
#20 0x00007f8a9a09ef6e in start_thread (arg=0x7f8a8d9fa700) at pthread_create.c:311
#21 0x00007f8a995ab9cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113

(gdb) bt
#0  pthread_cond_timedwait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:238
#1  0x0000000000aad433 in safe_cond_timedwait (cond=0x2a52088, mp=0x2a52018, abstime=0x7f8a8d9b4570, file=0xef5580 "/home/laurynas/percona/src/5.6/include/mysql/psi/mysql_thread.h", line=1188)
    at /home/laurynas/percona/src/5.6/mysys/thr_mutex.c:278
#2  0x00000000007047fe in inline_mysql_cond_timedwait (that=0x2a52088, mutex=0x2a52018, abstime=0x7f8a8d9b4570, src_file=0xef57c0 "/home/laurynas/percona/src/5.6/sql/mdl.cc", src_line=1327)
    at /home/laurynas/percona/src/5.6/include/mysql/psi/mysql_thread.h:1188
#3  0x0000000000705791 in MDL_wait::timed_wait (this=0x2a52018, owner=0x2a51f30, abs_timeout=0x7f8a8d9b4570, set_status_on_timeout=false, 
    wait_state_name=0x14fddd0 <MDL_key::m_namespace_to_wait_state_name+48>) at /home/laurynas/percona/src/5.6/sql/mdl.cc:1327
#4  0x0000000000706c40 in MDL_context::acquire_lock (this=0x2a52018, mdl_request=0x7f8a8d9b4600, lock_wait_timeout=31536000) at /home/laurynas/percona/src/5.6/sql/mdl.cc:2246
#5  0x00000000007072a8 in MDL_context::upgrade_shared_lock (this=0x2a52018, mdl_ticket=0x7f8a74008240, new_type=MDL_EXCLUSIVE, lock_wait_timeout=31536000)
    at /home/laurynas/percona/src/5.6/sql/mdl.cc:2428
#6  0x000000000075c5ee in wait_while_table_is_used (thd=0x2a51f30, table=0x7f8a7400a590, function=HA_EXTRA_PREPARE_FOR_RENAME) at /home/laurynas/percona/src/5.6/sql/sql_base.cc:2200
#7  0x000000000083fa0e in mysql_inplace_alter_table (thd=0x2a51f30, table_list=0x7f8a74004d98, table=0x7f8a7400a590, altered_table=0x7f8a74010d30, ha_alter_info=0x7f8a8d9b4a00, 
    inplace_supported=HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE, target_mdl_request=0x7f8a8d9b4a90, alter_ctx=0x7f8a8d9b55a0) at /home/laurynas/percona/src/5.6/sql/sql_table.cc:6515
#8  0x0000000000843b2f in mysql_alter_table (thd=0x2a51f30, new_db=0x7f8a740052f8 "test", new_name=0x0, create_info=0x7f8a8d9b61a0, table_list=0x7f8a74004d98, alter_info=0x7f8a8d9b6110, order_num=0, 
    order=0x0, ignore=false) at /home/laurynas/percona/src/5.6/sql/sql_table.cc:8200
#9  0x000000000098cd1e in Sql_cmd_alter_table::execute (this=0x7f8a740053d0, thd=0x2a51f30) at /home/laurynas/percona/src/5.6/sql/sql_alter.cc:313
#10 0x00000000007d314c in mysql_execute_command (thd=0x2a51f30) at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:4936
#11 0x00000000007d6270 in mysql_parse (thd=0x2a51f30, rawbuf=0x7f8a74004c70 "ALTER TABLE t1 ADD INDEX b(b), ALGORITHM=INPLACE, LOCK=NONE", length=59, parser_state=0x7f8a8d9b7e40)
    at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:6235
#12 0x00000000007c96ea in dispatch_command (command=COM_QUERY, thd=0x2a51f30, packet=0x2a55cf1 "ALTER TABLE t1 ADD INDEX b(b), ALGORITHM=INPLACE, LOCK=NONE", packet_length=59)
    at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:1334
#13 0x00000000007c883a in do_command (thd=0x2a51f30) at /home/laurynas/percona/src/5.6/sql/sql_parse.cc:1036
#14 0x000000000078f4b8 in do_handle_one_connection (thd_arg=0x2a51f30) at /home/laurynas/percona/src/5.6/sql/sql_connect.cc:982
#15 0x000000000078efce in handle_one_connection (arg=0x2a51f30) at /home/laurynas/percona/src/5.6/sql/sql_connect.cc:898
#16 0x0000000000b7e552 in pfs_spawn_thread (arg=0x29c90e0) at /home/laurynas/percona/src/5.6/storage/perfschema/pfs.cc:1858
#17 0x00007f8a9a09ef6e in start_thread (arg=0x7f8a8d9b9700) at pthread_create.c:311
#18 0x00007f8a995ab9cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113
[21 Jan 2014 14:49] MySQL Verification Team
Hello Laurynas,

Thank you for the bug report and test case.
Verified as described.

IMHO - if you use --lock-wait-timeout=10 then it should timeout with "1205: Lock wait timeout exceeded; try restarting transaction" 

Thanks,
Umesh
[21 Jan 2014 14:51] MySQL Verification Team
Stacktrace etc details..

Attachment: 71433.txt (text/plain), 22.12 KiB.

[28 Aug 2014 17:39] Paul DuBois
Noted in 5.6.21, 5.7.5 changelogs.

A simultaneous OPTIMIZE TABLE and online ALTER TABLE on the same
InnoDB table could result in deadlock.
[25 Sep 2014 11:49] Laurynas Biveinis
revno: 6080
committer: Nisha Gopalakrishnan <nisha.gopalakrishnan@oracle.com>
branch nick: mysql-5.6-18110156
timestamp: Wed 2014-07-16 22:19:12 +0530
message:
  BUG#18110156: RECREATE+ANALYZE OPTIMIZE TABLE T AND ONLINE
                ALTER TABLE T MAY DEADLOCK
  
  Analysis:
  --------
  OPTIMIZE TABLE and ONLINE ALTER TABLE on the same table
  using INNODB storage engine may cause a deadlock.
  
  OPTIMIZE TABLE for INNODB tables is mapped to recreate
  and analyze of the table. The deadlock occurs because
  InnoDB:
  
  1) Tries to support "online" ALTER TABLE which at some
     point tries to upgrade SU metadata lock to X lock,
     while holding TL_READ thr_lock lock.
  2) The analyze phase of the OPTIMIZE TABLE statement,
     acquires SW metadata lock (which is compatible with
     SU lock) and then tries to acquire "strong" TL_WRITE
     lock, which is not compatible with TL_READ.
     I.e. there is a discrepancy between "weak" MDL and
     "strong" thr_lock requested.
  
  The problem does not occur for other statements and other
  phases of OPTIMIZE TABLE, since they either acquire strong
  metadata locks which are not compatible with SU lock or
  downgrade the request to acquire TL_WRITE, to request for
  TL_WRITE_ALLOW_WRITE lock, which is compatible with TL_READ.
  
  
  Fix:
  ----
  For the normal ANALYZE TABLE operation, the locks requested
  are MDL_SHARED_READ and TL_READ_NO_INSERT locks.
  In case of OPTIMIZE TABLE operation, after recreate, the
  table is closed and locks are released. The table is re-opened
  and locks are re-acquired for the analyze phase. Since the
  analyze operation does not involve changing the data or
  metadata, a MDL_SHARED_READ and TL_READ_NO_INSERT locks
  are requested instead of MDL_SHARED_WRITE and TL_WRITE
  locks(same as the normal ANALYZE TABLE operation).
  This prevents the deadlock reported.