From 65edbf7ab3a51a65146eb05f11607cbeb9c948be Mon Sep 17 00:00:00 2001 From: Krunal Bauskar Date: Wed, 5 Aug 2020 13:49:56 +0800 Subject: [PATCH] Optimal memory-barrier for performance-schema counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - performance-schema is filled with counters to track different aspects.   Most of these counters use default memory-order that is sequential-consistent. - Given these are tracking counters and not meant to synchronize threads, memory order of these counters could be switched over to relaxed. --- include/my_atomic.h | 43 +++++++++++++++++++ storage/perfschema/ha_perfschema.cc | 5 +-- storage/perfschema/pfs_account.h | 4 +- storage/perfschema/pfs_digest.h | 6 +-- storage/perfschema/pfs_engine_table.h | 4 +- storage/perfschema/pfs_global.h | 11 ++--- storage/perfschema/pfs_histogram.h | 4 +- storage/perfschema/pfs_host.h | 4 +- storage/perfschema/pfs_instr_class.cc | 40 ++++++++--------- storage/perfschema/pfs_instr_class.h | 6 +-- storage/perfschema/pfs_stat.h | 22 +++++----- storage/perfschema/pfs_user.h | 4 +- .../perfschema/service_pfs_notification.cc | 6 +-- 13 files changed, 99 insertions(+), 60 deletions(-) diff --git a/include/my_atomic.h b/include/my_atomic.h index ce8927816f6..dc713fd96d8 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -60,4 +60,47 @@ static inline int my_yield_processor() { #endif +#include +#ifdef __powerpc__ +#define CACHE_LINE_SIZE 128 +#else +#define CACHE_LINE_SIZE 64 +#endif /* __powerpc__ */ + +template +class atomic_counter_t { + private: + char m_pad[CACHE_LINE_SIZE - sizeof(std::atomic)]; + std::atomic m_counter; + + public: + atomic_counter_t(T n) : m_counter(n) {} + atomic_counter_t() {} + + atomic_counter_t(const atomic_counter_t &rhs) { m_counter.store(rhs.load()); } + + T fetch_add(T n) { return m_counter.fetch_add(n, std::memory_order_relaxed); } + T fetch_sub(T n) { return m_counter.fetch_sub(n, std::memory_order_relaxed); } + T fetch_or(T n) { return m_counter.fetch_or(n, std::memory_order_relaxed); } + + T add(T n) { return fetch_add(n); } + T sub(T n) { return fetch_sub(n); } + T load() const { return m_counter.load(std::memory_order_relaxed); } + void store(T n) { m_counter.store(n, std::memory_order_relaxed); } + + T operator++(int) { return add(1); } + T operator--(int) { return sub(1); } + T operator++() { return add(1) + 1; } + T operator--() { return sub(1) - 1; } + T operator+=(T n) { return add(n) + n; } + T operator-=(T n) { return sub(n) - n; } + + operator T() const { return m_counter.load(); } + + T operator=(T n) { + store(n); + return n; + } +}; + #endif /* MY_ATOMIC_INCLUDED */ diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc index 4c5504f05f8..d2ba99b9228 100644 --- a/storage/perfschema/ha_perfschema.cc +++ b/storage/perfschema/ha_perfschema.cc @@ -27,7 +27,6 @@ @file storage/perfschema/ha_perfschema.cc Performance schema storage engine (implementation). */ -#include #include "lex_string.h" #include "lf.h" @@ -1283,11 +1282,11 @@ static bool pfs_show_status(handlerton *, THD *thd, stat_print_fn *print, } static void inc_ref_count(PFS_engine_table_share *share) { - std::atomic_fetch_add(&share->m_ref_count, 1); + ++share->m_ref_count; } static void dec_ref_count(PFS_engine_table_share *share) { - std::atomic_fetch_sub(&share->m_ref_count, 1); + --share->m_ref_count; } static int compare_database_names(const char *name1, const char *name2) { diff --git a/storage/perfschema/pfs_account.h b/storage/perfschema/pfs_account.h index 936466197a0..50cdac38373 100644 --- a/storage/perfschema/pfs_account.h +++ b/storage/perfschema/pfs_account.h @@ -30,7 +30,7 @@ */ #include -#include +#include "my_atomic.h" #include "lf.h" #include "my_hostname.h" /* HOSTNAME_LENGTH */ @@ -128,7 +128,7 @@ struct PFS_ALIGNED PFS_account : PFS_connection_slice { ulonglong m_disconnected_count; private: - std::atomic m_refcount; + atomic_counter_t m_refcount; /** Per account memory aggregated statistics. diff --git a/storage/perfschema/pfs_digest.h b/storage/perfschema/pfs_digest.h index 1196d83048c..15013f4889f 100644 --- a/storage/perfschema/pfs_digest.h +++ b/storage/perfschema/pfs_digest.h @@ -29,7 +29,7 @@ */ #include -#include +#include "my_atomic.h" #include "lf.h" #include "my_inttypes.h" @@ -79,9 +79,9 @@ struct PFS_ALIGNED PFS_statements_digest_stat { /** Query sample seen timestamp.*/ ulonglong m_query_sample_seen; /** Query sample timer wait.*/ - std::atomic m_query_sample_timer_wait; + atomic_counter_t m_query_sample_timer_wait; /** Query sample reference count. */ - std::atomic m_query_sample_refs; + atomic_counter_t m_query_sample_refs; /** First and last seen timestamps.*/ ulonglong m_first_seen; diff --git a/storage/perfschema/pfs_engine_table.h b/storage/perfschema/pfs_engine_table.h index 55bfff31946..1251469d445 100644 --- a/storage/perfschema/pfs_engine_table.h +++ b/storage/perfschema/pfs_engine_table.h @@ -26,9 +26,9 @@ #include #include #include -#include #include +#include "my_atomic.h" #include "my_base.h" #include "my_compiler.h" #include "my_dbug.h" @@ -397,7 +397,7 @@ struct PFS_engine_table_share { /* Interface to be implemented by plugin who adds its own table in PFS. */ PFS_engine_table_proxy m_st_table; /* Number of table objects using this share currently. */ - std::atomic m_ref_count; + atomic_counter_t m_ref_count; /* is marked to be deleted? */ bool m_in_purgatory; }; diff --git a/storage/perfschema/pfs_global.h b/storage/perfschema/pfs_global.h index d77dc35545e..41fafe883ae 100644 --- a/storage/perfschema/pfs_global.h +++ b/storage/perfschema/pfs_global.h @@ -25,7 +25,7 @@ #include "my_config.h" -#include +#include "my_atomic.h" #include #ifdef HAVE_SYS_SOCKET_H @@ -72,8 +72,7 @@ extern bool pfs_initialized; This is for performance, for variables accessed very frequently. */ struct PFS_cacheline_atomic_uint32 { - std::atomic m_u32; - char m_full_cache_line[PFS_CACHE_LINE_SIZE - sizeof(std::atomic)]; + atomic_counter_t m_u32; PFS_cacheline_atomic_uint32() : m_u32(0) {} }; @@ -83,8 +82,7 @@ struct PFS_cacheline_atomic_uint32 { This is for performance, for variables accessed very frequently. */ struct PFS_cacheline_atomic_uint64 { - std::atomic m_u64; - char m_full_cache_line[PFS_CACHE_LINE_SIZE - sizeof(std::atomic)]; + atomic_counter_t m_u64; PFS_cacheline_atomic_uint64() : m_u64(0) {} }; @@ -94,8 +92,7 @@ struct PFS_cacheline_atomic_uint64 { This is for performance, for variables accessed very frequently. */ struct PFS_cacheline_atomic_size_t { - std::atomic m_size_t; - char m_full_cache_line[PFS_CACHE_LINE_SIZE - sizeof(std::atomic)]; + atomic_counter_t m_size_t; PFS_cacheline_atomic_size_t() : m_size_t(0) {} }; diff --git a/storage/perfschema/pfs_histogram.h b/storage/perfschema/pfs_histogram.h index 241ff1d3752..b11affeb151 100644 --- a/storage/perfschema/pfs_histogram.h +++ b/storage/perfschema/pfs_histogram.h @@ -23,7 +23,7 @@ #ifndef PFS_HISTOGRAM_H #define PFS_HISTOGRAM_H -#include +#include "my_atomic.h" #include "my_compiler.h" #include "my_inttypes.h" @@ -43,7 +43,7 @@ struct PFS_histogram { ulonglong read_bucket(uint bucket_index) { return m_bucket[bucket_index]; } private: - std::atomic m_bucket[NUMBER_OF_BUCKETS]; + atomic_counter_t m_bucket[NUMBER_OF_BUCKETS]; }; struct PFS_histogram_timers { diff --git a/storage/perfschema/pfs_host.h b/storage/perfschema/pfs_host.h index a4e1aa7235f..7106beeb7ab 100644 --- a/storage/perfschema/pfs_host.h +++ b/storage/perfschema/pfs_host.h @@ -30,7 +30,7 @@ */ #include -#include +#include "my_atomic.h" #include "lf.h" #include "my_hostname.h" /* HOSTNAME_LENGTH */ @@ -117,7 +117,7 @@ struct PFS_ALIGNED PFS_host : PFS_connection_slice { ulonglong m_disconnected_count; private: - std::atomic m_refcount; + atomic_counter_t m_refcount; /** Per host memory aggregated statistics. diff --git a/storage/perfschema/pfs_instr_class.cc b/storage/perfschema/pfs_instr_class.cc index b5dc2fb0af4..b1c46575fe4 100644 --- a/storage/perfschema/pfs_instr_class.cc +++ b/storage/perfschema/pfs_instr_class.cc @@ -29,8 +29,8 @@ #include #include -#include +#include "my_atomic.h" #include "lex_string.h" #include "lf.h" #include "my_dbug.h" @@ -68,7 +68,7 @@ bool pfs_enabled = true; Incremented when a shared library is being unloaded, decremented when the performance schema is finished processing the event. */ -std::atomic pfs_unload_plugin_ref_count(0); +atomic_counter_t pfs_unload_plugin_ref_count(0); /** PFS_INSTRUMENT option settings array @@ -88,12 +88,12 @@ static void init_instr_class(PFS_instr_class *klass, const char *name, - the performance schema initialization - a plugin initialization */ -static std::atomic mutex_class_dirty_count{0}; -static std::atomic mutex_class_allocated_count{0}; -static std::atomic rwlock_class_dirty_count{0}; -static std::atomic rwlock_class_allocated_count{0}; -static std::atomic cond_class_dirty_count{0}; -static std::atomic cond_class_allocated_count{0}; +static atomic_counter_t mutex_class_dirty_count{0}; +static atomic_counter_t mutex_class_allocated_count{0}; +static atomic_counter_t rwlock_class_dirty_count{0}; +static atomic_counter_t rwlock_class_allocated_count{0}; +static atomic_counter_t cond_class_dirty_count{0}; +static atomic_counter_t cond_class_allocated_count{0}; /** Size of the mutex class array. @sa mutex_class_array */ ulong mutex_class_max = 0; @@ -156,8 +156,8 @@ PFS_cond_class *cond_class_array = nullptr; - the performance schema initialization - a plugin initialization */ -static std::atomic thread_class_dirty_count{0}; -static std::atomic thread_class_allocated_count{0}; +static atomic_counter_t thread_class_dirty_count{0}; +static atomic_counter_t thread_class_allocated_count{0}; static PFS_thread_class *thread_class_array = nullptr; @@ -187,28 +187,28 @@ LF_HASH table_share_hash; /** True if table_share_hash is initialized. */ static bool table_share_hash_inited = false; -static std::atomic file_class_dirty_count{0}; -static std::atomic file_class_allocated_count{0}; +static atomic_counter_t file_class_dirty_count{0}; +static atomic_counter_t file_class_allocated_count{0}; PFS_file_class *file_class_array = nullptr; -static std::atomic stage_class_dirty_count{0}; -static std::atomic stage_class_allocated_count{0}; +static atomic_counter_t stage_class_dirty_count{0}; +static atomic_counter_t stage_class_allocated_count{0}; static PFS_stage_class *stage_class_array = nullptr; -static std::atomic statement_class_dirty_count{0}; -static std::atomic statement_class_allocated_count{0}; +static atomic_counter_t statement_class_dirty_count{0}; +static atomic_counter_t statement_class_allocated_count{0}; static PFS_statement_class *statement_class_array = nullptr; -static std::atomic socket_class_dirty_count{0}; -static std::atomic socket_class_allocated_count{0}; +static atomic_counter_t socket_class_dirty_count{0}; +static atomic_counter_t socket_class_allocated_count{0}; static PFS_socket_class *socket_class_array = nullptr; -static std::atomic memory_class_dirty_count{0}; -static std::atomic memory_class_allocated_count{0}; +static atomic_counter_t memory_class_dirty_count{0}; +static atomic_counter_t memory_class_allocated_count{0}; static std::atomic memory_class_array{nullptr}; diff --git a/storage/perfschema/pfs_instr_class.h b/storage/perfschema/pfs_instr_class.h index 5d9d50cd581..059d313d0f9 100644 --- a/storage/perfschema/pfs_instr_class.h +++ b/storage/perfschema/pfs_instr_class.h @@ -26,8 +26,8 @@ #include "my_config.h" #include -#include +#include "my_atomic.h" #include "lf.h" #include "my_compiler.h" #include "my_dbug.h" @@ -74,7 +74,7 @@ class PFS_opaque_container_page; extern bool pfs_enabled; /** Global ref count for plugin and component events. */ -extern std::atomic pfs_unload_plugin_ref_count; +extern atomic_counter_t pfs_unload_plugin_ref_count; /** Key, naming a synch instrument (mutex, rwlock, cond). */ typedef unsigned int PFS_sync_key; @@ -381,7 +381,7 @@ struct PFS_ALIGNED PFS_table_share { private: /** Number of opened table handles. */ - std::atomic m_refcount; + atomic_counter_t m_refcount; /** Table locks statistics. */ std::atomic m_race_lock_stat; /** Table indexes stats. */ diff --git a/storage/perfschema/pfs_stat.h b/storage/perfschema/pfs_stat.h index cb89c30a4e2..84e7c76b18b 100644 --- a/storage/perfschema/pfs_stat.h +++ b/storage/perfschema/pfs_stat.h @@ -24,8 +24,8 @@ #define PFS_STAT_H #include -#include +#include "my_atomic.h" #include "my_dbug.h" #include "my_sys.h" #include "my_systime.h" @@ -1055,16 +1055,16 @@ void memory_full_aggregate(const F *from, T *stat1, T *stat2) { } struct PFS_memory_shared_stat { - std::atomic m_used; - std::atomic m_alloc_count; - std::atomic m_free_count; - std::atomic m_alloc_size; - std::atomic m_free_size; - - std::atomic m_alloc_count_capacity; - std::atomic m_free_count_capacity; - std::atomic m_alloc_size_capacity; - std::atomic m_free_size_capacity; + atomic_counter_t m_used; + atomic_counter_t m_alloc_count; + atomic_counter_t m_free_count; + atomic_counter_t m_alloc_size; + atomic_counter_t m_free_size; + + atomic_counter_t m_alloc_count_capacity; + atomic_counter_t m_free_count_capacity; + atomic_counter_t m_alloc_size_capacity; + atomic_counter_t m_free_size_capacity; inline void reset(void) { m_used = false; diff --git a/storage/perfschema/pfs_user.h b/storage/perfschema/pfs_user.h index 2a62de3fc1f..c757e56776d 100644 --- a/storage/perfschema/pfs_user.h +++ b/storage/perfschema/pfs_user.h @@ -30,8 +30,8 @@ */ #include -#include +#include "my_atomic.h" #include "lf.h" #include "my_inttypes.h" #include "mysql_com.h" @@ -116,7 +116,7 @@ struct PFS_ALIGNED PFS_user : public PFS_connection_slice { ulonglong m_disconnected_count; private: - std::atomic m_refcount; + atomic_counter_t m_refcount; /** Per user memory aggregated statistics. diff --git a/storage/perfschema/service_pfs_notification.cc b/storage/perfschema/service_pfs_notification.cc index f05906cec36..b681da3cb64 100644 --- a/storage/perfschema/service_pfs_notification.cc +++ b/storage/perfschema/service_pfs_notification.cc @@ -30,8 +30,8 @@ #include #include #include -#include +#include "my_atomic.h" #include "my_systime.h" // my_sleep() #include "pfs_thread_provider.h" #include "storage/perfschema/pfs_server.h" @@ -79,7 +79,7 @@ struct PFS_notification_node { /** True if can be unregistered. */ bool m_use_ref_count; /** Reference count with high bit as enabled flag. */ - std::atomic m_refs; + atomic_counter_t m_refs; /** Next registration. */ std::atomic m_next; /** Bitmap of registered callbacks. */ @@ -295,7 +295,7 @@ struct PFS_notification_registry { static const std::uint32_t FREE_MASK = 0x80000000; std::atomic m_head; - std::atomic m_count; + atomic_counter_t m_count; }; /**