Bug #53921 Wrong locks for SELECTs used stored functions may lead to broken SBR
Submitted: 23 May 2010 5:32 Modified: 17 Dec 2010 22:43
Reporter: Dmitry Lenev Email Updates:
Status: Won't fix Impact on me:
None 
Category:MySQL Server: Locking Severity:S3 (Non-critical)
Version:5.1.47-bzr OS:Any
Assigned to: CPU Architecture:Any

[23 May 2010 5:32] Dmitry Lenev
Description:
Concurrent execution of stored functions (or triggers) which use SELECT INTO statements and INSERTs into tables being selected from may result in wrong binary log order in statement/mixed modes and in broken replication as result. Investigation shows that problem is caused by the fact that for such SELECTs we take too weak TL_READ locks (e.g. unlike subselects in DML which take TL_READ_NO_INSERT). Also after investigation I think that stored functions which use SELECTs in a form of cursors are also affected.

This problem should not exist in 5.5 series of server thanks that the fix for bug #46947 also fixes this problem.

I have already reported this problem as bug #42134 which was closed as unrepeatable due to fix for "Bug #39843 DELETE requires write access to
table in subquery in where clause". Unfortunately, it seems that patch
bug #39843 has not solved problem in cases when SELECT used in stored
function is not a subquery in some expression.

How to repeat:
To simplify repeating the problem let us introduce a debug sync point
(without repeatability of problem depends on timing).

=== modified file 'sql/sql_base.cc'
--- sql/sql_base.cc     2010-05-16 14:37:44 +0000
+++ sql/sql_base.cc     2010-05-23 04:43:40 +0000
@@ -5389,6 +5389,8 @@ int lock_tables(THD *thd, TABLE_LIST *ta
       DBUG_RETURN(-1);
     }
 
+    DEBUG_SYNC(thd, "after_lock_tables_takes_lock");
+
     if (thd->lex->requires_prelocking() &&
         thd->lex->sql_command != SQLCOM_LOCK_TABLES)
     {

Run the following script for mysqltest tool and inspect its results:

--source include/have_debug_sync.inc
--source include/have_binlog_format_mixed_or_statement.inc

connect (con1, localhost, root,,);
connection default;

# Check that concurrent INSERT is enabled
select @@global.concurrent_insert;
# Should return:
# @@global.concurrent_insert
# 1

set debug_sync= "RESET";
create table t1(i int) engine=myisam;
create table t2(j int) engine=myisam;
delimiter |;
create function f1() returns int
begin
declare j int;
select count(*) from t1 into j;
insert into t2 values (j);
return j;
end|
delimiter ;|

set debug_sync='after_lock_tables_takes_lock SIGNAL parked WAIT_FOR go';
--send select f1();

--echo # connection 'con1'
connection con1;
set debug_sync='now WAIT_FOR parked';
insert into t1 values (1);
set debug_sync='now SIGNAL go';

--echo # connection 'default'
connection default;
--reap
# Returns:
# f1()
# 0

select * from t2;
# Returns:
# j
# 0

show binlog events;
# Returns:
# Log_name        Pos     Event_type      Server_id       End_log_pos     Info
# master-bin.000001       4       Format_desc     1       106     Server ver: 5.1.47-debug-log, Binlog ver: 4
# master-bin.000001       106     Query   1       205     use `test`; create table t1(i int) engine=myisam
# master-bin.000001       205     Query   1       304     use `test`; create table t2(j int) engine=myisam
# master-bin.000001       304     Query   1       526     use `test`; CREATE DEFINER=`root`@`localhost` FUNCTION `f1`() RETURNS int(11)
begin
declare j int;
select count(*) from t1 into j;
insert into t2 values (j);
return j;
end
# master-bin.000001       526     Query   1       614     use `test`; insert into t1 values (1)
# master-bin.000001       614     Query   1       697     use `test`; SELECT `test`.`f1`()

Notice that order of INSERT and SELECT f1() events in binary log contradicts to contents of table t2. 

Suggested fix:
Use TL_READ_DEFAULT (translated to TL_READ_NO_INSERT when SBR is on) as lock type for all SELECTs which are used in stored functions/triggers.
[23 May 2010 5:45] Dmitry Lenev
Note the fact that we set wrong lock type for such SELECTs in parser/SQL-layer means that InnoDB tables are also affected by similar issues. See script below:

--source include/have_innodb.inc
--source include/have_binlog_format_mixed_or_statement.inc

connect (con1, localhost, root,,);
connection default;

# The problem arises only in REPEATABLE-READ mode.
select @@session.tx_isolation;
# Returns:
# @@session.tx_isolation
# REPEATABLE-READ

create table t1(i int) engine=innodb;
create table t2(j int) engine=innodb;
delimiter |;
create function f1() returns int
begin
declare j int;
select count(*) from t1 into j;
insert into t2 values (j);
return j;
end|
delimiter ;|

--echo # connection 'con1'
connection con1;
begin;
insert into t1 values (1);

--echo # connection 'default'
connection default;
begin;
select f1();
# Returns:
# f1()
# 0

--echo # connection 'con1'
connection con1;
commit;

--echo # connection 'default'
connection default;
commit;

select * from t2;
# Returns:
# j
# 0

show binlog events;
# Returns:
# Log_name        Pos     Event_type      Server_id       End_log_pos     Info
# master-bin.000001       4       Format_desc     1       106     Server ver: 5.1.47-debug-log, Binlog ver: 4
# master-bin.000001       106     Query   1       205     use `test`; create table t1(i int) engine=innodb
# master-bin.000001       205     Query   1       304     use `test`; create table t2(j int) engine=innodb
# master-bin.000001       304     Query   1       526     use `test`; CREATE DEFINER=`root`@`localhost` FUNCTION `f1`() RETURNS int(11)
begin
declare j int;
select count(*) from t1 into j;
insert into t2 values (j);
return j;
end
# master-bin.000001       526     Query   1       594     BEGIN
# master-bin.000001       594     Query   1       682     use `test`; insert into t1 values (1)
# master-bin.000001       682     Xid     1       709     COMMIT /* xid=17 */
# master-bin.000001       709     Query   1       777     BEGIN
# master-bin.000001       777     Query   1       860     use `test`; SELECT `test`.`f1`()
# master-bin.000001       860     Xid     1       887     COMMIT /* xid=19 */

Notice that order of events in binary log contradicts contents of t2 table.
[25 May 2010 7:40] Sveta Smirnova
Thank you for the report.

Verified as described.
[17 Dec 2010 22:43] Omer Barnir
Issue is fixed in 5.5 and will not be back ported