Bug #71070 kill of truncate table will lead to binary log written while rows remains
Submitted: 4 Dec 2013 4:05 Modified: 14 May 2014 15:44
Reporter: Guangpu Feng Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server: Replication Severity:S2 (Serious)
Version:5.6.14,5.5.18, 5.6.15 OS:Any
Assigned to: CPU Architecture:Any
Tags: binlog, truncate

[4 Dec 2013 4:05] Guangpu Feng
Description:
open_table will return TRUE when thd->killed is set, thus killing a "truncate table" query before the function *open_and_lock_tables* is called in Truncate_statement::handler_truncate will lead to handler_truncate return TRUE without executing table->file->ha_truncate().

while in Truncate_statement::truncate_table, whether to log binlog is decided by the return value of Truncate_statement::handler_truncate:

   error= handler_truncate(thd, table_ref, FALSE);

   binlog_stmt= !error || error != HA_ERR_WRONG_COMMAND;

  /* DDL is logged in statement format, regardless of binlog format. */
  if (binlog_stmt)
    error|= write_bin_log(thd, !error, thd->query(), thd->query_length());

the following is the bt:

#0  open_table (thd=0x78c11480, table_list=0x2ba5dc004c30, mem_root=0x2ba5d807e3d0, ot_ctx=0x2ba5d807e410) at sql/sql_base.cc:2683
#1  0x000000000054d5c1 in open_and_process_table (thd=0x78c11480, start=0x2ba5d807e498, counter=0x2ba5d807e4bc, flags=130, prelocking_strategy=0x2ba5d807e5f0)
    at sql/sql_base.cc:4482
#2  open_tables (thd=0x78c11480, start=0x2ba5d807e498, counter=0x2ba5d807e4bc, flags=130, prelocking_strategy=0x2ba5d807e5f0)
    at sql/sql_base.cc:4930
#3  0x000000000054e194 in open_and_lock_tables (thd=<value optimized out>, tables=0x2ba5dc004c30, derived=false, flags=130,
    prelocking_strategy=<value optimized out>) at sql/sql_base.cc:5527
#4  0x00000000007a93f9 in open_and_lock_tables (this=<value optimized out>, thd=0x78c11480, table_ref=0x2ba5dc004c30, is_tmp_table=<value optimized out>)
    at sql/sql_base.h:475
#5  Truncate_statement::handler_truncate (this=<value optimized out>, thd=0x78c11480, table_ref=0x2ba5dc004c30, is_tmp_table=<value optimized out>)
    at sql/sql_truncate.cc:226
#6  0x00000000007a9c20 in Truncate_statement::truncate_table (this=0x2ba5dc005160, thd=0x78c11480, table_ref=0x2ba5dc004c30)
    at sql/sql_truncate.cc:475
#7  0x00000000007a9cde in Truncate_statement::execute (this=0x2ba5dc005160, thd=0x78c11480) at sql/sql_truncate.cc:529
#8  0x0000000000587a1e in mysql_execute_command (thd=0x78c11480) at sql/sql_parse.cc:4813
#9  0x000000000058a094 in mysql_parse (thd=0x78c11480, rawbuf=<value optimized out>, length=11, parser_state=0x2ba5d807fbd0)
    at sql/sql_parse.cc:6047
#10 0x000000000058bfce in dispatch_command (command=COM_QUERY, thd=0x78c11480, packet=0x2ba5d807fcb8 "", packet_length=11)
    at sql/sql_parse.cc:1273
#11 0x000000000062a4bd in do_handle_one_connection (thd_arg=<value optimized out>) at sql/sql_connect.cc:1409
#12 0x000000000062a60c in handle_one_connection (arg=0x78c6aa10) at sql/sql_connect.cc:1315
#13 0x000000381b407851 in start_thread () from /lib64/libpthread.so.0
#14 0x000000381b0e767d in clone () from /lib64/libc.so.6

How to repeat:
>> test case

$cat truncate_bug.test

source include/have_binlog_format_row_or_statement.inc;

connection default;
show processlist;
use test;
create table t1(id int auto_increment primary key, a int, b int) engine=innodb;
insert into t1(a, b) values(1,2),(2,4),(3,6),(4,8),(5,10);
set debug_sync = "before_open_and_lock_tables signal truncate_before_lock wait_for kill_finish";
send truncate t1; 

  connect(con1,localhost,root,,);
  connection con1;
  set debug_sync = "now wait_for truncate_before_lock";
  set debug_sync = "after_kill signal kill_finish";
  send kill query 2;

connection default;
--ERROR 1317
reap;

connection con1;
reap;
# ignore the first binlog
show binlog events in 'master-bin.000001';                                                                                                                           
select * from t1; 

drop table t1; 

>> debug sync points added(code based on Perconaer server 5.5.18):

Index: sql/sql_parse.cc
===================================================================
--- sql/sql_parse.cc    (revision 4589)
+++ sql/sql_parse.cc    (working copy)
@@ -4109,6 +4109,7 @@
       goto error;
     }
     sql_kill(thd, (ulong)it->val_int(), lex->type & ONLY_KILL_QUERY);
+    DEBUG_SYNC(thd, "after_kill");
     break;
   }
 #ifndef NO_EMBEDDED_ACCESS_CHECKS
Index: sql/sql_truncate.cc
===================================================================
--- sql/sql_truncate.cc (revision 4589)
+++ sql/sql_truncate.cc (working copy)
@@ -222,6 +222,8 @@
     table_ref->mdl_request.ticket= NULL;
   }
 
+  DEBUG_SYNC(thd, "before_open_and_lock_tables");
+
   /* Open the table as it will handle some required preparations. */
   if (open_and_lock_tables(thd, table_ref, FALSE, flags))
     DBUG_RETURN(1);

Suggested fix:
return HA_ERR_WRONG_COMMAND instead when *open_and_lock_tables* failed in Truncate_statement::handler_truncate
[4 Dec 2013 6:13] Guangpu Feng
this bug will break replication
[4 Dec 2013 14:58] qinglin zhang
HA_ERR_WRONG_COMMAND may mislead people, so you can offer another errorno instead
[12 Dec 2013 11:21] Umesh Shastry
Hello Guangpu,

Thank you for the bug report and contribution.

Thanks,
Umesh
[14 May 2014 15:44] Jon Stephens
This issue is fixed in MySQL 5.5.38, 5.6.19, and 5.7.5. 

Documented fix in the corresponding changelogs as follows:

        When used on a table employing a transaction storage engine, a
        failed TRUNCATE TABLE was still written to the binary log and
        thus replayed on the slave. This could lead to inconsistency
        when the master retained data that was removed on the slave.

        Now in such cases TRUNCATE TABLE is logged only when it
        completes successfully.

Closed.
[31 May 2014 14:05] Laurynas Biveinis
$ bzr log -r 4620
------------------------------------------------------------
revno: 4620
committer: Sujatha Sivakumar <sujatha.sivakumar@oracle.com>
branch nick: Bug17942050_mysql-5.5
timestamp: Tue 2014-04-15 15:17:25 +0530
message:
  Bug#17942050:KILL OF TRUNCATE TABLE WILL LEAD TO BINARY LOG
  WRITTEN WHILE ROWS REMAINS
  
  Problem:
  ========
  When truncate table fails while using transactional based
  engines even though the operation errors out we still
  continue and log it to binlog. Because of this master has
  data but the truncate will be written to binary log which
  will cause inconsistency.
  
  Analysis:
  ========
  Truncate table can happen either through drop and create of
  table or by deleting rows. In the second case the existing
  code is written in such a way that even if an error occurs
  the truncate statement will always be binlogged. Which is not
  correct.
  
  Binlogging of TRUNCATE TABLE statement should check whether
  truncate is executed "transactionally or not". If the table
  is transaction based we log the TRUNCATE TABLE only on
  successful completion.
  
  If table is non transactional there are possibilities that on
  error we could have partial changes done hence in such cases
  we do log in spite of errors as some of the lines might have
  been removed, so the statement has to be sent to slave.
  
  Fix:
  ===
  Using table handler whether truncate table is being executed
  in transaction based mode or not is identified and statement
  is binlogged accordingly.
[28 Jul 2014 9:37] zhang simon
error= table_ref->table->file->ha_truncate();
if (error)
{
  .....
}
DBUG_RETURN(TRUNCATE_OK);

in a situation such as raw disk
row_truncate_table_for_mysql will return DB_ERROR
and convert_error_code_to_mysql will return -1.

then DBUG_RETURN(TRUNCATE_OK) incorrectly.

Suggested fix:

=== modified file 'sql/sql_truncate.cc'
--- sql/sql_truncate.cc 2014-04-15 09:47:25 +0000
+++ sql/sql_truncate.cc 2014-07-28 09:02:20 +0000
@@ -243,7 +243,7 @@
       DBUG_RETURN(TRUNCATE_FAILED_SKIP_BINLOG);

   error= table_ref->table->file->ha_truncate();
-  if (error)
+  if (error != 0)
   {
     table_ref->table->file->print_error(error, MYF(0));
     /*
@@ -252,7 +252,7 @@
       donot binlog the statment. Only in non transactional engine we binlog
       inspite of errors.
      */
-    if (error == HA_ERR_WRONG_COMMAND ||
+    if (error == HA_ERR_WRONG_COMMAND || error == -1
         table_ref->table->file->has_transactions())
       DBUG_RETURN(TRUNCATE_FAILED_SKIP_BINLOG);
     else