diff -ru orig-mysql-8.0.30/plugin/group_replication/include/plugin.h mysql-8.0.30/plugin/group_replication/include/plugin.h --- orig-mysql-8.0.30/plugin/group_replication/include/plugin.h 2022-07-06 21:36:34.000000000 +0000 +++ mysql-8.0.30/plugin/group_replication/include/plugin.h 2022-09-09 18:42:03.244087272 +0000 @@ -236,6 +236,8 @@ const char *get_view_change_uuid_var(); ulong get_exit_state_action_var(); ulong get_flow_control_mode_var(); +ulong get_certification_loop_sleep_time_var(); +ulong get_certification_loop_chunk_size_var(); long get_flow_control_certifier_threshold_var(); long get_flow_control_applier_threshold_var(); long get_flow_control_min_quota_var(); diff -ru orig-mysql-8.0.30/plugin/group_replication/include/plugin_variables.h mysql-8.0.30/plugin/group_replication/include/plugin_variables.h --- orig-mysql-8.0.30/plugin/group_replication/include/plugin_variables.h 2022-07-06 21:36:34.000000000 +0000 +++ mysql-8.0.30/plugin/group_replication/include/plugin_variables.h 2022-09-09 18:42:03.244087272 +0000 @@ -204,6 +204,16 @@ #define MIN_COMPRESSION_THRESHOLD 0 ulong compression_threshold_var; +#define DEFAULT_CERTIFICATION_LOOP_SLEEP_TIME 2000 +#define MAX_CERTIFICATION_LOOP_SLEEP_TIME 1000000 +#define MIN_CERTIFICATION_LOOP_SLEEP_TIME 0 + ulong certification_loop_sleep_time_var; + +#define DEFAULT_CERTIFICATION_LOOP_CHUNK_SIZE 10000 +#define MAX_CERTIFICATION_LOOP_CHUNK_SIZE UINT_MAX32 +#define MIN_CERTIFICATION_LOOP_CHUNK_SIZE 100 + ulong certification_loop_chunk_size_var; + #define DEFAULT_GTID_ASSIGNMENT_BLOCK_SIZE 1000000 #define MIN_GTID_ASSIGNMENT_BLOCK_SIZE 1 #define MAX_GTID_ASSIGNMENT_BLOCK_SIZE GNO_END diff -ru orig-mysql-8.0.30/plugin/group_replication/src/certifier.cc mysql-8.0.30/plugin/group_replication/src/certifier.cc --- orig-mysql-8.0.30/plugin/group_replication/src/certifier.cc 2022-07-06 21:36:34.000000000 +0000 +++ mysql-8.0.30/plugin/group_replication/src/certifier.cc 2022-09-09 18:53:53.295516839 +0000 @@ -1198,7 +1198,12 @@ DBUG_EXECUTE_IF("group_replication_do_not_clear_certification_database", { return; };); - mysql_mutex_lock(&LOCK_certification_info); + ulong i; + /* + The goal of the following loop is to avoid locking for too long transactions + on servers that have a high rate of trx. Processing 1M GTIDs in the original + code blocked the transaction processing for about 1s. + */ /* When a transaction "t" is applied to all group members and for all @@ -1207,23 +1212,42 @@ precedes them), then "t" is stable and can be removed from the certification info. */ + + /* get the starttime in 100ns unit */ + ulonglong starttime = my_getsystime(); + + mysql_mutex_lock(&LOCK_certification_info); + Certification_info::iterator it = certification_info.begin(); - stable_gtid_set_lock->wrlock(); - while (it != certification_info.end()) { - if (it->second->is_subset_not_equals(stable_gtid_set)) { - if (it->second->unlink() == 0) delete it->second; - certification_info.erase(it++); - } else - ++it; - } - stable_gtid_set_lock->unlock(); - /* - We need to update parallel applier indexes since we do not know - what write sets were purged, which may cause transactions - last committed to be incorrectly computed. - */ - increment_parallel_applier_sequence_number(true); + while (1) { + stable_gtid_set_lock->wrlock(); + + /* Needs to increase the rate if it takes too long, add a chunk every 5s */ + ulonglong rate_multiplier = (my_getsystime() - starttime) / 50000000 + 1; + + for (i = 0; i < get_certification_loop_chunk_size_var()*rate_multiplier; i++) { + if (it == certification_info.end()) { + break; + } + if (it->second->is_subset_not_equals(stable_gtid_set)) { + if (it->second->unlink() == 0) { + delete it->second; + } + certification_info.erase(it++); + } else { + ++it; + } + } /* for loop */ + + stable_gtid_set_lock->unlock(); + + /* + We need to update parallel applier indexes since we do not know + what write sets were purged, which may cause transactions + last committed to be incorrectly computed. + */ + increment_parallel_applier_sequence_number(true); #if !defined(NDEBUG) /* @@ -1239,8 +1263,16 @@ } #endif - mysql_mutex_unlock(&LOCK_certification_info); + mysql_mutex_unlock(&LOCK_certification_info); + if (it == certification_info.end()) { + break; + } + /* could add else clause with a short sleep */ + my_sleep(get_certification_loop_sleep_time_var()); + /* if we are here, we'll loop back so let's lock the mutex */ + mysql_mutex_lock(&LOCK_certification_info); + } /* while loop */ /* Applier channel received set does only contain the GTIDs of the remote (committed by other members) transactions. On the long diff -ru orig-mysql-8.0.30/plugin/group_replication/src/plugin.cc mysql-8.0.30/plugin/group_replication/src/plugin.cc --- orig-mysql-8.0.30/plugin/group_replication/src/plugin.cc 2022-07-06 21:36:34.000000000 +0000 +++ mysql-8.0.30/plugin/group_replication/src/plugin.cc 2022-09-09 18:49:27.234237840 +0000 @@ -293,6 +293,10 @@ ulong get_flow_control_mode_var() { return ov.flow_control_mode_var; } +ulong get_certification_loop_sleep_time_var() { return ov.certification_loop_sleep_time_var; } + +ulong get_certification_loop_chunk_size_var() { return ov.certification_loop_chunk_size_var; } + long get_flow_control_certifier_threshold_var() { return ov.flow_control_certifier_threshold_var; } @@ -4541,6 +4545,37 @@ ); static MYSQL_SYSVAR_ULONG( + certification_loop_sleep_time, /* name */ + ov.certification_loop_sleep_time_var, /* var */ + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NODEFAULT | + PLUGIN_VAR_PERSIST_AS_READ_ONLY, /* optional var | no set default */ + "The sleep time, in microseconds, in the certification " + "loop to allow transactions to interleave. " + "Default: 2000.", + nullptr, /* check func. */ + nullptr, /* update func. */ + DEFAULT_CERTIFICATION_LOOP_SLEEP_TIME, /* default */ + MIN_CERTIFICATION_LOOP_SLEEP_TIME, /* min */ + MAX_CERTIFICATION_LOOP_SLEEP_TIME, /* max */ + 0 /* block */ +); + +static MYSQL_SYSVAR_ULONG( + certification_loop_chunk_size, /* name */ + ov.certification_loop_chunk_size_var, /* var */ + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NODEFAULT | + PLUGIN_VAR_PERSIST_AS_READ_ONLY, /* optional var | no set default */ + "The chunk size of the certification loop." + "Default: 10000.", + nullptr, /* check func. */ + nullptr, /* update func. */ + DEFAULT_CERTIFICATION_LOOP_CHUNK_SIZE, /* default */ + MIN_CERTIFICATION_LOOP_CHUNK_SIZE, /* min */ + MAX_CERTIFICATION_LOOP_CHUNK_SIZE, /* max */ + 0 /* block */ +); + +static MYSQL_SYSVAR_ULONG( compression_threshold, /* name */ ov.compression_threshold_var, /* var */ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NODEFAULT | @@ -5139,6 +5174,8 @@ MYSQL_SYSVAR(components_stop_timeout), MYSQL_SYSVAR(allow_local_lower_version_join), MYSQL_SYSVAR(auto_increment_increment), + MYSQL_SYSVAR(certification_loop_sleep_time), + MYSQL_SYSVAR(certification_loop_chunk_size), MYSQL_SYSVAR(compression_threshold), MYSQL_SYSVAR(communication_max_message_size), MYSQL_SYSVAR(gtid_assignment_block_size),