Bug #79784 update setup_instruments do not affect the global mutex/rwlock/cond
Submitted: 28 Dec 2015 6:50 Modified: 21 Jan 2016 15:06
Reporter: zhang yingqiang (OCA) Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server: Performance Schema Severity:S3 (Non-critical)
Version:5.6.24 5.7.10 OS:Any
Assigned to: Marc ALFF CPU Architecture:Any
Tags: Performance Schema; global mutex;

[28 Dec 2015 6:50] zhang yingqiang
Description:
Switch on instruments of performance schema dynamically by update table 'performance_schema.setup_instruments' without startup parameter '--performance-schema-instrument' or my.cnf, do not affect the global mutex/rwlock/cond.

If we startup with "performance_schema=1 and performance_schema_instrument = '%=off' ", mysqld will set that->m_psi to NULL because klass->m_enabled is false in INIT_BODY_V1. The call stack is my_thread_global_reinit/init_thread_environment/... -> inline_mysql_mutex_init/inline_mysql_rwlock_init/...-> init_mutex_v1/init_rwlock_v1/... -> INIT_BODY_V1 .

If we switch on instruments by update table 'performance_schema.setup_instruments' after that, the function table_setup_instruments::update_row_values do not create the m_psi for these global mutex/rwlock that already created, and we cannot monitor these  waits in the consumer tables such as mutex_instances or events_waits_summary_by_thread_by_event_name etc.

How to repeat:
We take LOCK_uuid_generator as example.
The debug sync patch:

diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 0e44416..9a810f3 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -4289,6 +4289,7 @@ String *Item_func_uuid::val_str(String *str)
   THD *thd= current_thd;
 
   mysql_mutex_lock(&LOCK_uuid_generator);
+  DEBUG_SYNC_C("after_LOCK_uuid_generator");
   if (! uuid_time) /* first UUID() call. initializing data */
   {
     ulong tmp=sql_rnd_with_mutex();

the mtr script ptach:

diff --git a/mysql-test/t/ps_global-master.opt b/mysql-test/t/ps_global-master.opt
new file mode 100644
index 0000000..b74e03b
--- /dev/null
+++ b/mysql-test/t/ps_global-master.opt
@@ -0,0 +1 @@
+--performance-schema-instrument='%=off'
diff --git a/mysql-test/t/ps_global.test b/mysql-test/t/ps_global.test
new file mode 100644
index 0000000..e74cb3a
--- /dev/null
+++ b/mysql-test/t/ps_global.test
@@ -0,0 +1,27 @@
+--source include/have_debug_sync.inc
+update performance_schema.setup_instruments set ENABLED='yes', TIMED='yes';
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+set DEBUG_SYNC='after_LOCK_uuid_generator WAIT_FOR continue';
+--send select uuid();
+
+connection con2;
+--send select uuid();
+
+
+connection default;
+--sleep 1
+show processlist;
+# expect LOCK_uuid_generator
+select * from performance_schema.events_waits_current order by THREAD_ID desc limit 1;
+set DEBUG_SYNC='now signal continue'; 
+
+update performance_schema.setup_instruments set ENABLED='no', TIMED='no';
+
+connection con2;
+--reap
+
+connection con1;
+--reap

Suggested fix:
In my opinion, we should initialize the mysql_mutex_t::m_psi if performance_schema=1 (even if performance_schema_instrument = '%=off'). For performance reasons, we could do this to the global mutex/rwlock/cond.

diff --git a/storage/perfschema/pfs.cc b/storage/perfschema/pfs.cc
index 072bb9f..3ccea69 100644
--- a/storage/perfschema/pfs.cc
+++ b/storage/perfschema/pfs.cc
@@ -1477,7 +1477,7 @@ static void register_socket_v1(const char *category,
   klass= find_##T##_class(KEY);                                             \
   if (unlikely(klass == NULL))                                              \
     return NULL;                                                            \
-  if (! klass->m_enabled)                                                   \
+  if (! pfs_param.m_enabled)                                                \
     return NULL;                                                            \
   pfs= create_##T(klass, ID);                                               \
   return reinterpret_cast<PSI_##T *> (pfs)
diff --git a/storage/perfschema/pfs_server.cc b/storage/perfschema/pfs_server.cc
index 093f772..b5d9406 100644
--- a/storage/perfschema/pfs_server.cc
+++ b/storage/perfschema/pfs_server.cc
@@ -37,6 +37,7 @@
 #include "pfs_account.h"
 #include "pfs_defaults.h"
 #include "pfs_digest.h"
+#include "pfs_server.h"
 
 PFS_global_param pfs_param;
[8 Jan 2016 12:36] MySQL Verification Team
Hello zhang yingqiang,

Thank you for the report and contribution.

Thanks,
Umesh
[8 Jan 2016 12:36] MySQL Verification Team
-- 5.7.10 src build

-- 
[umshastr@hod03]/export/umesh/server/source/bugs/79784/mysql-5.7.10/mysql-test: ./mtr ps_global
Logging: ./mtr  ps_global
MySQL Version 5.7.10
Checking supported features...
 - SSL connections supported
 - binaries are debug compiled
Collecting tests...
Removing old var directory...
Creating var directory '/export/umesh/server/source/bugs/79784/mysql-5.7.10/mysql-test/var'...
Installing system database...

==============================================================================

TEST                                      RESULT   TIME (ms) or COMMENT
--------------------------------------------------------------------------

worker[1] Using MTR_BUILD_THREAD 300, with reserved ports 13000..13009
update performance_schema.setup_instruments set ENABLED='yes', TIMED='yes';
set DEBUG_SYNC='after_LOCK_uuid_generator WAIT_FOR continue';
select uuid();;
select uuid();;
show processlist;
Id      User    Host    db      Command Time    State   Info
3       root    localhost       test    Query   0       starting        show processlist
4       root    localhost       test    Query   1       debug sync point: after_LOCK_uuid_generator     select uuid()
5       root    localhost       test    Query   1       executing       select uuid()
select * from performance_schema.events_waits_current order by THREAD_ID desc limit 1;
THREAD_ID       EVENT_ID        END_EVENT_ID    EVENT_NAME      SOURCE  TIMER_START     TIMER_END       TIMER_WAIT      SPINS   OBJECT_SCHEMA   OBJECT_NAME     INDEX_NAME      OBJECT_TYPE     OBJECT_INSTANCE_BEGIN     NESTING_EVENT_ID        NESTING_EVENT_TYPE      OPERATION       NUMBER_OF_BYTES FLAGS
26      23      23      wait/synch/mutex/sql/THD::LOCK_query_plan       sql_class.h:1636        660349105508    660349290788    185280  NULL    NULL    NULL    NULL    NULL    139879367317872 22STAGE   lock    NULL    NULL
set DEBUG_SYNC='now signal continue';
update performance_schema.setup_instruments set ENABLED='no', TIMED='no';
uuid()
1c77f3e1-b601-11e5-9b4d-0010e05f3e06
uuid()
1c77f370-b601-11e5-9b4d-0010e05f3e06
main.ps_global                           [ pass ]   1019
--------------------------------------------------------------------------
The servers were restarted 0 times
Spent 1.019 of 7 seconds executing testcases

Completed: All 1 tests were successful.

-- with suggested patch

[umshastr@hod03]/export/umesh/server/source/bugs/79784/mysql-5.7.10/mysql-test: ./mtr ps_global
Logging: ./mtr  ps_global
MySQL Version 5.7.10
Checking supported features...
 - SSL connections supported
 - binaries are debug compiled
Collecting tests...
Checking leftover processes...
Removing old var directory...
Creating var directory '/export/umesh/server/source/bugs/79784/mysql-5.7.10/mysql-test/var'...
Installing system database...

==============================================================================

TEST                                      RESULT   TIME (ms) or COMMENT
--------------------------------------------------------------------------

worker[1] Using MTR_BUILD_THREAD 300, with reserved ports 13000..13009
update performance_schema.setup_instruments set ENABLED='yes', TIMED='yes';
set DEBUG_SYNC='after_LOCK_uuid_generator WAIT_FOR continue';
select uuid();;
select uuid();;
show processlist;
Id      User    Host    db      Command Time    State   Info
3       root    localhost       test    Query   0       starting        show processlist
4       root    localhost       test    Query   1       debug sync point: after_LOCK_uuid_generator     select uuid()
5       root    localhost       test    Query   1       executing       select uuid()
select * from performance_schema.events_waits_current order by THREAD_ID desc limit 1;
THREAD_ID       EVENT_ID        END_EVENT_ID    EVENT_NAME      SOURCE  TIMER_START     TIMER_END       TIMER_WAIT      SPINS   OBJECT_SCHEMA   OBJECT_NAME     INDEX_NAME      OBJECT_TYPE     OBJECT_INSTANCE_BEGIN     NESTING_EVENT_ID        NESTING_EVENT_TYPE      OPERATION       NUMBER_OF_BYTES FLAGS
26      42      NULL    wait/synch/mutex/sql/LOCK_uuid_generator        item_strfunc.cc:5089    683659714216    1686893934440   1003234220224   NULL    NULL    NULL    NULL    NULL    46456480 41       STAGE   lock    NULL    NULL
set DEBUG_SYNC='now signal continue';
update performance_schema.setup_instruments set ENABLED='no', TIMED='no';
uuid()
cf009d16-b603-11e5-97ba-0010e05f3e06
uuid()
cf009c73-b603-11e5-97ba-0010e05f3e06
main.ps_global                           [ pass ]   1023
--------------------------------------------------------------------------
The servers were restarted 0 times
Spent 1.023 of 8 seconds executing testcases

Completed: All 1 tests were successful.
[21 Jan 2016 15:06] Paul DuBois
Noted in 5.7.12, 5.8.0 changelogs.

Some Performance Schema global instruments, if not enabled at server
startup, could be in a state where it was not possible to enable them
at runtime. This restriction has been lifted. Affected instruments
include mutex, rwlock, cond, and socket instances.

Thanks to Zhang Yingqiang for the patch.