commit 7a9aa1fc762d1f10198a6859b05adf28b3f152e9 Author: Vlad Lesin Date: Fri Aug 4 08:34:20 2017 +0300 Fix bug 85371 / 1667307 (Memory leak in multi-source replication when binlog_rows_query_log_events=1) Bug description: 1) When binlog_rows_query_log_events is enabled, Rows_query_log_event is created, written to binary log on master prior Rows_log_event. 2) Slave is reading this event from relay log and applying it, see Rows_query_log_event::do_apply_event(). When the event is applied the event itself is stored in Relay_log_info::rows_query_ev: int Rows_query_log_event::do_apply_event(Relay_log_info const *rli) { DBUG_ENTER("Rows_query_log_event::do_apply_event"); DBUG_ASSERT(rli->info_thd == thd); /* Set query for writing Rows_query log event into binlog later.*/ thd->set_query(m_rows_query, strlen(m_rows_query)); DBUG_ASSERT(rli->rows_query_ev == NULL); const_cast(rli)->rows_query_ev= this; /* Tell worker not to free the event */ worker= NULL; DBUG_RETURN(0); } 3) Usually Relay_log_info::rows_query_ev is removed after Rows_log_event execution with the following stack trace: at ./sql/log_event.h:3795 at ./sql/rpl_rli.cc:1777 at ./sql/log_event.cc:11175 at ./sql/rpl_slave.cc:4750 But if we look into Rows_log_event::do_apply_event() the function rows_event_stmt_cleanup() is called at the end: int Rows_log_event::do_apply_event(Relay_log_info const *rli) { ... end: if (get_flags(STMT_END_F)) { if((error= rows_event_stmt_cleanup(rli, thd))) { ... } ... } ... } 4) But when the statement must be skipped because it has been already applied from another channel, Rows_log_event::do_apply_event() exits before rows_event_stmt_cleanup() call. See the following code: int Rows_log_event::do_apply_event(Relay_log_info const *rli) { ... enum_gtid_statement_status state= gtid_pre_statement_checks(thd); if (state == GTID_STATEMENT_EXECUTE) { if (gtid_pre_statement_post_implicit_commit_checks(thd)) state= GTID_STATEMENT_CANCEL; } if (state == GTID_STATEMENT_CANCEL) { uint error= thd->get_stmt_da()->mysql_errno(); DBUG_ASSERT(error != 0); rli->report(ERROR_LEVEL, error, "Error executing row event: '%s'", thd->get_stmt_da()->message_text()); thd->is_slave_error= 1; DBUG_RETURN(-1); } else if (state == GTID_STATEMENT_SKIP) DBUG_RETURN(0); ... end: if (get_flags(STMT_END_F)) { if((error= rows_event_stmt_cleanup(rli, thd))) { ... } ... } ... } So in this case rli->rows_query_ev will not be deleted. In debug build such case will lead to crash due to the corresponding assert in Rows_query_log_event::do_apply_event(): int Rows_query_log_event::do_apply_event(Relay_log_info const *rli) { ... DBUG_ASSERT(rli->rows_query_ev == NULL); ... } But for release build there will be just memory leak. How to fix: delete Rows_query_log_event instance if the next Rows_log_event instance read from relay log must be skipped. diff --git a/mysql-test/suite/rpl_gtid/r/bug85371.result b/mysql-test/suite/rpl_gtid/r/bug85371.result new file mode 100644 index 00000000000..a6d289a527f --- /dev/null +++ b/mysql-test/suite/rpl_gtid/r/bug85371.result @@ -0,0 +1,31 @@ +include/rpl_init.inc [topology=1->3,3->2,1->2] +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information. +### Create some binlog events on server_1 +CREATE TABLE t1(a INT); +INSERT INTO t1 (a) VALUES (1), (2), (3), (4), (5); +INSERT INTO t1 (a) VALUES (6), (7), (8), (9), (10); +DROP TABLE t1; +### sync the server_3 with server_1 +[connection server_1] +include/sync_slave_sql_with_master.inc [FOR CHANNEL 'channel_1'] +### sync the server_2 with server_3 +[connection server_3] +include/sync_slave_sql_with_master.inc [FOR CHANNEL 'channel_3'] +### sync the server_2 with server_1 +### here crash must take place if the bug is not fixed +[connection server_1] +include/sync_slave_sql_with_master.inc [FOR CHANNEL 'channel_1'] +### finish replication +[connection server_1] +include/rpl_end.inc +RESET SLAVE ALL FOR CHANNEL 'channel_1'; +RESET SLAVE ALL FOR CHANNEL 'channel_3'; +RESET SLAVE ALL FOR CHANNEL 'channel_1'; diff --git a/mysql-test/suite/rpl_gtid/t/bug85371.cnf b/mysql-test/suite/rpl_gtid/t/bug85371.cnf new file mode 100644 index 00000000000..becc4afeb14 --- /dev/null +++ b/mysql-test/suite/rpl_gtid/t/bug85371.cnf @@ -0,0 +1,16 @@ +!include ../my.cnf + +[mysqld.1] +binlog-rows-query-log-events= ON + +[mysqld.2] +binlog-rows-query-log-events= ON + +[mysqld.3] +binlog-rows-query-log-events= ON + +[ENV] +SERVER_MYPORT_2= @mysqld.2.port +SERVER_MYPORT_3= @mysqld.3.port +SERVER_MYSOCK_3= @mysqld.3.socket + diff --git a/mysql-test/suite/rpl_gtid/t/bug85371.test b/mysql-test/suite/rpl_gtid/t/bug85371.test new file mode 100644 index 00000000000..40f8877f1d8 --- /dev/null +++ b/mysql-test/suite/rpl_gtid/t/bug85371.test @@ -0,0 +1,50 @@ +# +# In the case if the bug is not fixed the test will crash on assert +# in Rows_query_log_event::do_apply_event() on debug build. +# +--source include/have_binlog_format_row.inc +--source include/not_group_replication_plugin.inc + +--let $rpl_topology= 1->3,3->2,1->2 +--let $rpl_multi_source= 1 +--let $use_gtids= 1 +--source include/rpl_init.inc + +--echo ### Create some binlog events on server_1 +--connection server_1 +CREATE TABLE t1(a INT); +INSERT INTO t1 (a) VALUES (1), (2), (3), (4), (5); +INSERT INTO t1 (a) VALUES (6), (7), (8), (9), (10); +DROP TABLE t1; + +--echo ### sync the server_3 with server_1 +--let $rpl_connection_name= server_1 +--source include/rpl_connection.inc + +--let $rpl_channel_name= 'channel_1' +--let $sync_slave_connection= server_3 +--source include/sync_slave_sql_with_master.inc + +--echo ### sync the server_2 with server_3 +--let $rpl_connection_name= server_3 +--source include/rpl_connection.inc + +--let $rpl_channel_name= 'channel_3' +--let $sync_slave_connection=server_2 +--source include/sync_slave_sql_with_master.inc + +--echo ### sync the server_2 with server_1 +--echo ### here crash must take place if the bug is not fixed +--let $rpl_connection_name= server_1 +--source include/rpl_connection.inc + +--let $rpl_channel_name= 'channel_1' +--let $sync_slave_connection= server_2 +--source include/sync_slave_sql_with_master.inc + +--echo ### finish replication +--let $rpl_connection_name= server_1 +--source include/rpl_connection.inc + +--let $rpl_skip_sync= 1 +--source include/rpl_end.inc diff --git a/sql/log_event.cc b/sql/log_event.cc index e3f1d77a25f..feccac17eaf 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -9676,7 +9676,21 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) DBUG_RETURN(-1); } else if (state == GTID_STATEMENT_SKIP) + { + if (rli->rows_query_ev) + { + /* + thd->m_query_string now points to the data from + rli->rows_query_ev->m_rows_query + (see Rows_query_log_event::do_apply_event()), don't let it point + to unallocated memory, reset query string first + */ + thd->reset_query(); + delete rli->rows_query_ev; + const_cast(rli)->rows_query_ev= nullptr; + } DBUG_RETURN(0); + } /* The current statement is just about to begin and diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 2a934a99295..c8bd2314d0f 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1626,9 +1626,15 @@ void Relay_log_info::cleanup_context(THD *thd, bool error) } if (rows_query_ev) { + /* + thd->m_query_string now points to the data from + rli->rows_query_ev->m_rows_query + (see Rows_query_log_event::do_apply_event()), don't let it point + to unallocated memory, reset query string first + */ + info_thd->reset_query(); delete rows_query_ev; rows_query_ev= NULL; - info_thd->reset_query(); } m_table_map.clear_tables(); slave_close_thread_tables(thd);