Description:
Hi,
when a replica logs a slow query which was not run in a specific schema after a query run in a specific schema, the slow query log file contains a "use ;" which is not a valid command. See How to repeat for details.
I would expect the slow query log file on the replica to not contain a "use ;" line (like on the primary, see How to repeat).
How to repeat for 8.4.0, but the same happens in 8.0.37 (needs tuning the part of the script extracting the slow log entry as 8.4.0 logs the Quit command and 8.0.37 does not).
I found this via code analysis while working on Bug#106645. See Suggested fix for a solution.
Many thanks for looking into this,
Jean-François Gagné
How to repeat:
# Cannot use "deploy replication" because 8.4 introduced incompatibilities with dbdeployer,
# so we are creative for creating a replication test environment.
dbdeployer deploy multiple 8.4.0 -c slow_query_log_file=slow.log -c slow_query_log=ON
./n1 <<< "RESET BINARY LOGS AND GTIDS" # Replace by RESET MASTER for 8.0.
./n1 <<< "CREATE USER 'repl'@'%' IDENTIFIED BY 'password'"
./n1 -u root <<< "GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%'"
port=$(./n1 -N <<< "select @@global.port")
sql="change replication source to SOURCE_HOST='127.0.0.1', SOURCE_PORT=$port"
sql="$sql, SOURCE_USER='repl', SOURCE_PASSWORD='password', SOURCE_SSL=1"
sql="$sql; start replica"
./n2 <<< "$sql"
# Enable slow query log on the replica.
./n2 <<< "stop replica; set global long_query_time = 0, log_slow_replica_statements = ON; start replica"
# Run the right sequence of command, and check the slow query log files on primary and replcia.
# The primary file does not contain a "use ..." line, and the replica contains a "use ;" line.
./n1 <<< "set session long_query_time = 0; create database test_jfg"
./n1 test_jfg <<< "set session long_query_time = 0; create table t1(id int)"
./n1 <<< "set session long_query_time = 0; create table test_jfg.t2(id int)"
sleep 1; \
echo; echo --Primary:; tail -n 10 node1/data/slow.log | head -n 5; \
echo; echo --Replica:; tail -n 6 node2/data/slow.log
--Primary:
# Time: 2024-06-03T17:24:40.868854Z
# User@Host: msandbox[msandbox] @ localhost [] Id: 18
# Query_time: 0.000014 Lock_time: 0.000000 Rows_sent: 0 Rows_examined: 0
SET timestamp=1717435480;
set session long_query_time = 0;
--Replica:
# Time: 2024-06-03T17:24:40.872084Z
# User@Host: skip-grants user[] @ [] Id: 20
# Query_time: 0.001329 Lock_time: 0.000006 Rows_sent: 0 Rows_examined: 0
use ;
SET timestamp=1717435480;
create table test_jfg.t2(id int);
Suggested fix:
I think below could be a fix.
$ git diff
diff --git a/sql/log.cc b/sql/log.cc
index bcfd73a25a5..1c93269e1dc 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -802,7 +802,7 @@ bool File_query_log::write_slow(THD *thd, ulonglong current_utime,
goto err; /* purecov: inspected */
}
- if (thd->db().str && strcmp(thd->db().str, db)) { // Database changed
+ if (thd->db().str && thd->db().length > 0 && strcmp(thd->db().str, db)) { // Database changed
if (my_b_printf(&log_file, "use %s;\n", thd->db().str) == (uint)-1)
goto err;
my_stpcpy(db, thd->db().str);
This fix would also remove "use test_jfg;" on the replica in below (after running commands in How to repeat), but this might be what we want as this line is not in the slow query log file on the primary.
./n1 test_jfg <<< "set session long_query_time = 0; create table t3(id int)"
sleep 1; \
echo; echo --Primary:; tail -n 10 node1/data/slow.log | head -n 5; \
echo; echo --Replica:; tail -n 6 node2/data/slow.log
--Primary:
# Time: 2024-06-03T17:25:01.741787Z
# User@Host: msandbox[msandbox] @ localhost [] Id: 19
# Query_time: 0.004345 Lock_time: 0.000009 Rows_sent: 0 Rows_examined: 0
SET timestamp=1717435501;
create table t3(id int);
--Replica:
# Time: 2024-06-03T17:25:01.745169Z
# User@Host: skip-grants user[] @ [] Id: 20
# Query_time: 0.003110 Lock_time: 0.000011 Rows_sent: 0 Rows_examined: 0
use test_jfg;
SET timestamp=1717435501;
create table t3(id int);