From 9aae54765e8e3c4d5daed59354465c4abe00d098 Mon Sep 17 00:00:00 2001 From: Venkatesh Prasad Date: Thu, 3 Aug 2023 13:13:59 +0530 Subject: [PATCH] Bug #109668: XCom connection stalled forever in read() syscall over network https://bugs.mysql.com/bug.php?id=109668 Description ----------- GR suffered from problems caused by the security probes and network scanner processes connecting to the group replication communication port. This usually is not a problem, but poses a serious threat when another member tries to join the cluster by initialting a connection to the member which is affected by external processes using the port dedicated for group communication for longer durations. On such activites by external processes, the SSL enabled server stalled forever on the SSL_accept() call waiting for handshake data. Below is the stacktrace: Thread 55 (Thread 0x7f7bb77ff700 (LWP 2198598)): #0 in read () #1 in sock_read () #2 in BIO_read () #3 in ssl23_read_bytes () #4 in ssl23_get_client_hello () #5 in ssl23_accept () #6 in xcom_tcp_server_startup(Xcom_network_provider*) () When the server stalled in the above path forever, it prohibited other members to join the cluster resulting in the following messages on the joiner server's logs. [ERROR] [MY-011640] [Repl] Plugin group_replication reported: 'Timeout on wait for view after joining group' [ERROR] [MY-011735] [Repl] Plugin group_replication reported: '[GCS] The member is already leaving or joining a group.' Solution -------- This patch adds two new variables 1. group_replication_xcom_ssl_socket_timeout It is a file-descriptor level timeout in seconds for both accept() and SSL_accept() calls when group replication is listening on the xcom port. When set to a valid value, say for example 5 seconds, both accept() and SSL_accept() return after 5 seconds. The default value has been set to 0 (waits infinitely) for backward compatibility. This variable is effective only when GR is configred with SSL. 2. group_replication_xcom_ssl_accept_retries It defines the number of retries to be performed before closing the socket. For each retry the server thread calls SSL_accept() with timeout defined by the group_replication_xcom_ssl_socket_timeout for the SSL handshake process once the connection has been accepted by the first accept() call. The default value has been set to 10. This variable is effective only when GR is configred with SSL. Note: - Both of the above variables are dynamically configurable, but will become effective only on START GROUP_REPLICATION. - This patch is only for the Linux systems. --- .../include/start_proc_in_background.inc | 71 ++++++++++ mysql-test/include/wait_proc_to_finish.inc | 38 +++++ .../r/gr_persist_only_variables.result | 12 +- .../r/gr_persist_variables.result | 12 +- .../r/gr_set_option_during_stop.result | 6 + ...r_show_global_and_session_variables.result | 4 +- .../r/gr_ssl_socket_timeout.result | 50 +++++++ .../r/gr_variables_default_values.result | 8 +- .../r/gr_variables_privileges.result | 6 + .../t/gr_persist_only_variables.test | 2 +- .../t/gr_set_option_during_stop.test | 2 + .../gr_show_global_and_session_variables.test | 2 +- .../t/gr_ssl_socket_timeout.test | 130 ++++++++++++++++++ .../t/gr_variables_default_values.test | 19 ++- plugin/group_replication/include/plugin.h | 2 + .../include/plugin_variables.h | 10 ++ .../src/bindings/xcom/gcs_xcom_interface.cc | 12 ++ .../xcom/network/xcom_network_provider.cc | 14 ++ .../xcom_network_provider_native_lib.cc | 46 +++++++ .../src/bindings/xcom/xcom/task_os.h | 2 + plugin/group_replication/src/plugin.cc | 60 ++++++++ 21 files changed, 493 insertions(+), 15 deletions(-) create mode 100644 mysql-test/include/start_proc_in_background.inc create mode 100644 mysql-test/include/wait_proc_to_finish.inc create mode 100644 mysql-test/suite/group_replication/r/gr_ssl_socket_timeout.result create mode 100644 mysql-test/suite/group_replication/t/gr_ssl_socket_timeout.test diff --git a/mysql-test/include/start_proc_in_background.inc b/mysql-test/include/start_proc_in_background.inc new file mode 100644 index 000000000000..4f9e69b7e639 --- /dev/null +++ b/mysql-test/include/start_proc_in_background.inc @@ -0,0 +1,71 @@ +# ==== Purpose ==== +# +# Start process in background. Optionally store its PID in the file +# +# This functionality cannot be achieved by mysqltest built-in --exec_in_background +# because we need to start it like: +# cmd param1 param2 > output.log & echo $! > cmd.pid +# +# The way to pass $! through Perl to mysqltest binary is to escape $! like \$! +# This will prevent attemption to resolve $! variable on Perl layer, +# but will pass down \$! to mysqltest. +# mysqltest handles \$! literally (recognizes that $ has been escaped +# and adds escape character to the constructed command, so we end up with: +# cmd param1 param2 > output.log & echo \$! > cmd.pid +# +# ==== Usage ==== +# +# --let $command = process_to_be_executed +# [--let $command_opt = opt1 opt2 ...] +# [--let $output_file = output_file] +# [--let $pid_file = pid_file] +# [--let $redirect_stderr = 0 | 1 ] +# --source include/start_proc_in_backcground.inc +# +# Parameters: +# $command +# Process to be executed in background. +# +# $command_opt +# Options to be passed to the process via command line at its startup. +# +# $output_file +# Redirect process output to this file. +# +# $pid_file +# Store started process PID in this file. +# + + +if (!$command) +{ + --die ERROR IN TEST: 'command' parameter not specified +} + +--let $line = $command $command_opt + +if ($output_file) +{ + if ($redirect_stderr == 1) + { + --let $line = $line 2> $output_file + } + if ($redirect_stderr == 0) + { + --let $line = $line > $output_file + } +} + +--let $line = $line & + +if ($pid_file) +{ + --let $line = $line echo \$! > $pid_file +} + +--let _LINE = $line +--perl + my $cmd= $ENV{'_LINE'}; + system("$cmd"); +EOF + diff --git a/mysql-test/include/wait_proc_to_finish.inc b/mysql-test/include/wait_proc_to_finish.inc new file mode 100644 index 000000000000..7e9f08b4dfe0 --- /dev/null +++ b/mysql-test/include/wait_proc_to_finish.inc @@ -0,0 +1,38 @@ +# ==== Purpose ==== +# +# Wait for process to finish. +# +# This script can be used as standalone, but most probably +# will be used together with start_proc_in_background.inc +# +# ==== Usage ==== +# +# --let $pid_file = pid_file +# --source include/wait_proc_to_finish.inc +# +# Parameters: +# $pid_file +# File containing PID of the process. +# + +if (!$pid_file) +{ + --die ERROR IN TEST: 'pid_file' parameter specified +} + +--let _PID_FILE = $pid_file + +perl; + use Errno; + use strict; + my $pid_file = $ENV{'_PID_FILE'}; + open(my $f, '<', $pid_file) or die "OPENING $pid_file: $!\n"; + my $pid = do { local($/); <$f> }; + close($f); + my $not_present = (!kill(0,$pid) && $! == Errno::ESRCH); + while (!$not_present) { + sleep(1); + $not_present = (!kill(0,$pid) && $! == Errno::ESRCH); + } +EOF + diff --git a/mysql-test/suite/group_replication/r/gr_persist_only_variables.result b/mysql-test/suite/group_replication/r/gr_persist_only_variables.result index ca73f6148d7a..698e332a93f5 100644 --- a/mysql-test/suite/group_replication/r/gr_persist_only_variables.result +++ b/mysql-test/suite/group_replication/r/gr_persist_only_variables.result @@ -81,8 +81,10 @@ SET PERSIST_ONLY group_replication_tls_source = @@GLOBAL.group_replication_tls_s SET PERSIST_ONLY group_replication_transaction_size_limit = @@GLOBAL.group_replication_transaction_size_limit; SET PERSIST_ONLY group_replication_unreachable_majority_timeout = @@GLOBAL.group_replication_unreachable_majority_timeout; SET PERSIST_ONLY group_replication_view_change_uuid = @@GLOBAL.group_replication_view_change_uuid; +SET PERSIST_ONLY group_replication_xcom_ssl_accept_retries = @@GLOBAL.group_replication_xcom_ssl_accept_retries; +SET PERSIST_ONLY group_replication_xcom_ssl_socket_timeout = @@GLOBAL.group_replication_xcom_ssl_socket_timeout; -include/assert.inc ['Expect 60 persisted variables.'] +include/assert.inc ['Expect 62 persisted variables.'] ############################################################ # 2. Restart server, it must bootstrap the group and preserve @@ -91,9 +93,9 @@ include/assert.inc ['Expect 60 persisted variables.'] include/rpl_reconnect.inc include/gr_wait_for_member_state.inc -include/assert.inc ['Expect 60 persisted variables in persisted_variables table.'] -include/assert.inc ['Expect 59 variables which last value was set through SET PERSIST.'] -include/assert.inc ['Expect 50 persisted variables with matching persisted and global values.'] +include/assert.inc ['Expect 62 persisted variables in persisted_variables table.'] +include/assert.inc ['Expect 61 variables which last value was set through SET PERSIST.'] +include/assert.inc ['Expect 52 persisted variables with matching persisted and global values.'] ############################################################ # 3. Test RESET PERSIST IF EXISTS. @@ -158,6 +160,8 @@ RESET PERSIST IF EXISTS group_replication_tls_source; RESET PERSIST IF EXISTS group_replication_transaction_size_limit; RESET PERSIST IF EXISTS group_replication_unreachable_majority_timeout; RESET PERSIST IF EXISTS group_replication_view_change_uuid; +RESET PERSIST IF EXISTS group_replication_xcom_ssl_accept_retries; +RESET PERSIST IF EXISTS group_replication_xcom_ssl_socket_timeout; include/assert.inc ['Expect 0 persisted variables.'] diff --git a/mysql-test/suite/group_replication/r/gr_persist_variables.result b/mysql-test/suite/group_replication/r/gr_persist_variables.result index 33d72b347f7c..3ad1c52533a1 100644 --- a/mysql-test/suite/group_replication/r/gr_persist_variables.result +++ b/mysql-test/suite/group_replication/r/gr_persist_variables.result @@ -85,8 +85,10 @@ SET PERSIST group_replication_tls_source = @@GLOBAL.group_replication_tls_source SET PERSIST group_replication_transaction_size_limit = @@GLOBAL.group_replication_transaction_size_limit; SET PERSIST group_replication_unreachable_majority_timeout = @@GLOBAL.group_replication_unreachable_majority_timeout; SET PERSIST group_replication_view_change_uuid = @@GLOBAL.group_replication_view_change_uuid; +SET PERSIST group_replication_xcom_ssl_accept_retries = @@GLOBAL.group_replication_xcom_ssl_accept_retries; +SET PERSIST group_replication_xcom_ssl_socket_timeout = @@GLOBAL.group_replication_xcom_ssl_socket_timeout; -include/assert.inc ['Expect 60 persisted variables.'] +include/assert.inc ['Expect 62 persisted variables.'] ############################################################ # 2. Restart server, it must bootstrap the group and preserve @@ -95,9 +97,9 @@ include/assert.inc ['Expect 60 persisted variables.'] include/rpl_reconnect.inc include/gr_wait_for_member_state.inc -include/assert.inc ['Expect 60 persisted variables in persisted_variables table.'] -include/assert.inc ['Expect 59 variables which last value was set through SET PERSIST.'] -include/assert.inc ['Expect 59 variables which last value was set through SET PERSIST is equal to its global value.'] +include/assert.inc ['Expect 62 persisted variables in persisted_variables table.'] +include/assert.inc ['Expect 61 variables which last value was set through SET PERSIST.'] +include/assert.inc ['Expect 61 variables which last value was set through SET PERSIST is equal to its global value.'] ############################################################ # 3. Test RESET PERSIST. @@ -162,6 +164,8 @@ RESET PERSIST group_replication_tls_source; RESET PERSIST group_replication_transaction_size_limit; RESET PERSIST group_replication_unreachable_majority_timeout; RESET PERSIST group_replication_view_change_uuid; +RESET PERSIST group_replication_xcom_ssl_accept_retries; +RESET PERSIST group_replication_xcom_ssl_socket_timeout; include/assert.inc ['Expect 0 persisted variables.'] diff --git a/mysql-test/suite/group_replication/r/gr_set_option_during_stop.result b/mysql-test/suite/group_replication/r/gr_set_option_during_stop.result index 3e63d5fb10b2..cae96ad7dd91 100644 --- a/mysql-test/suite/group_replication/r/gr_set_option_during_stop.result +++ b/mysql-test/suite/group_replication/r/gr_set_option_during_stop.result @@ -41,6 +41,8 @@ WHERE VARIABLE_NAME LIKE 'group_replication_%' AND VARIABLE_NAME != 'group_replication_start_on_boot' AND VARIABLE_NAME != 'group_replication_tls_source' AND VARIABLE_NAME != 'group_replication_transaction_size_limit' + AND VARIABLE_NAME != 'group_replication_xcom_ssl_socket_timeout' + AND VARIABLE_NAME != 'group_replication_xcom_ssl_accept_retries' ORDER BY VARIABLE_NAME; SET SESSION sql_log_bin = 1; SET @value= @@GLOBAL.group_replication_advertise_recovery_endpoints; @@ -216,6 +218,10 @@ SET @value= @@GLOBAL.group_replication_tls_source; SET @@GLOBAL.group_replication_tls_source= @value; SET @value= @@GLOBAL.group_replication_transaction_size_limit; SET @@GLOBAL.group_replication_transaction_size_limit= @value; +SET @value= @@GLOBAL.group_replication_xcom_ssl_accept_retries; +SET @@GLOBAL.group_replication_xcom_ssl_accept_retries= @value; +SET @value= @@GLOBAL.group_replication_xcom_ssl_socket_timeout; +SET @@GLOBAL.group_replication_xcom_ssl_socket_timeout= @value; ############################################################ # 5. Validate that we did test all Group Replication options. [connection server1] diff --git a/mysql-test/suite/group_replication/r/gr_show_global_and_session_variables.result b/mysql-test/suite/group_replication/r/gr_show_global_and_session_variables.result index 1e9d6c4687f6..23485d516049 100644 --- a/mysql-test/suite/group_replication/r/gr_show_global_and_session_variables.result +++ b/mysql-test/suite/group_replication/r/gr_show_global_and_session_variables.result @@ -7,8 +7,8 @@ Note #### Storing MySQL user name or password information in the connection meta include/start_and_bootstrap_group_replication.inc include/stop_group_replication.inc -# Test#1: Basic check that there are 61 GR variables. -include/assert.inc [There are 61 GR variables at present.] +# Test#1: Basic check that there are 63 GR variables. +include/assert.inc [There are 63 GR variables at present.] # Test#2: Verify group replication related variables at GLOBAL scope. SET @@SESSION.group_replication_allow_local_lower_version_join= 1; diff --git a/mysql-test/suite/group_replication/r/gr_ssl_socket_timeout.result b/mysql-test/suite/group_replication/r/gr_ssl_socket_timeout.result new file mode 100644 index 000000000000..e026626cfee7 --- /dev/null +++ b/mysql-test/suite/group_replication/r/gr_ssl_socket_timeout.result @@ -0,0 +1,50 @@ +include/group_replication.inc +Warnings: +Note #### Sending passwords in plain text without SSL/TLS is extremely insecure. +Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information. +[connection server1] + +############################################################ +# 1. Start one member with GCS SSL enabled. +[connection server1] +SET @group_replication_ssl_mode_save= @@GLOBAL.group_replication_ssl_mode; +SET GLOBAL group_replication_ssl_mode= REQUIRED; +SET @group_replication_xcom_ssl_socket_timeout_save= @@GLOBAL.group_replication_xcom_ssl_socket_timeout; +SET @group_replication_xcom_ssl_accept_retries_save= @@GLOBAL.group_replication_xcom_ssl_accept_retries; +SET GLOBAL group_replication_xcom_ssl_socket_timeout= 3; +SET GLOBAL group_replication_xcom_ssl_accept_retries= 3; +include/start_and_bootstrap_group_replication.inc +Occurrences of 'Group communication SSL configuration: group_replication_ssl_mode: "REQUIRED"' in the input file: 1 + +############################################################ +# 2. Start the second member with GCS SSL enabled, the member +# will be able to join the group. +[connection server2] +SET @group_replication_ssl_mode_save= @@GLOBAL.group_replication_ssl_mode; +SET GLOBAL group_replication_ssl_mode= REQUIRED; +include/start_group_replication.inc +include/rpl_gr_wait_for_number_of_members.inc +Occurrences of 'Group communication SSL configuration: group_replication_ssl_mode: "REQUIRED"' in the input file: 1 + +############################################################ +# 3. Verify that any connection on group_replication +# communication port is aborted by the server after the +# timout configured by the group_replication_xcom_ssl_socket_timeout. +include/stop_group_replication.inc +SET @group_replication_communication_debug_options_save = @@GLOBAL.group_replication_communication_debug_options; +SET GLOBAL group_replication_communication_debug_options= "XCOM_DEBUG_BASIC"; +START GROUP_REPLICATION; +SET @@GLOBAL.group_replication_communication_debug_options= @group_replication_communication_debug_options_save; +include/assert_grep.inc [Assert that the mysql connection has been ended by the server] +include/assert_grep.inc [Assert that message about aborting the connection has been logged to GCS_DEBUG_TRACE file] +ERROR 2013 (HY000): Lost connection to MySQL server at 'reading initial communication packet', system error: 2 + +############################################################ +# 4. Clean up. +[connection server1] +SET GLOBAL group_replication_ssl_mode= @group_replication_ssl_mode_save; +SET GLOBAL group_replication_xcom_ssl_socket_timeout= @group_replication_xcom_ssl_socket_timeout_save; +SET GLOBAL group_replication_xcom_ssl_accept_retries= @group_replication_xcom_ssl_accept_retries_save; +[connection server2] +SET GLOBAL group_replication_ssl_mode= @group_replication_ssl_mode_save; +include/group_replication_end.inc diff --git a/mysql-test/suite/group_replication/r/gr_variables_default_values.result b/mysql-test/suite/group_replication/r/gr_variables_default_values.result index b63b23e08e12..740589b60f2a 100644 --- a/mysql-test/suite/group_replication/r/gr_variables_default_values.result +++ b/mysql-test/suite/group_replication/r/gr_variables_default_values.result @@ -26,9 +26,9 @@ include/stop_group_replication.inc # # Test Unit#1 # Set global/session group replication variables to default. -# Curently there are 61 group replication variables. +# Curently there are 63 group replication variables. # -include/assert.inc [There are 61 GR variables at present.] +include/assert.inc [There are 63 GR variables at present.] SET @@GLOBAL.group_replication_auto_increment_increment= default; ERROR 42000: Variable 'group_replication_auto_increment_increment' can't be set to the value of 'DEFAULT' SET @@GLOBAL.group_replication_compression_threshold= default; @@ -84,6 +84,8 @@ SET @@GLOBAL.group_replication_advertise_recovery_endpoints = default; SET @@GLOBAL.group_replication_view_change_uuid= default; SET @@GLOBAL.group_replication_communication_stack = default; SET @@GLOBAL.group_replication_paxos_single_leader = default; +SET @@GLOBAL.group_replication_xcom_ssl_socket_timeout = default; +SET @@GLOBAL.group_replication_xcom_ssl_accept_retries = default; SET @@SESSION.group_replication_consistency= default; # # Test Unit#2 @@ -134,6 +136,8 @@ include/assert.inc [Default group_replication_advertise_recovery_endpoints is "D include/assert.inc [Default group_replication_view_change_uuid is "AUTOMATIC"] include/assert.inc [Default group_replication_communication_stack is XCom] include/assert.inc [Default group_replication_paxos_single_leader is 0] +include/assert.inc [Default group_replication_xcom_ssl_socket_timeout is 0] +include/assert.inc [Default group_replication_xcom_ssl_accept_retries is 10] # # Clean up # diff --git a/mysql-test/suite/group_replication/r/gr_variables_privileges.result b/mysql-test/suite/group_replication/r/gr_variables_privileges.result index f52a5b4357ad..1129ea692c3b 100644 --- a/mysql-test/suite/group_replication/r/gr_variables_privileges.result +++ b/mysql-test/suite/group_replication/r/gr_variables_privileges.result @@ -147,6 +147,10 @@ SET GLOBAL group_replication_unreachable_majority_timeout = @@GLOBAL.group_repli ERROR 42000: Access denied; you need (at least one of) the SUPER or SYSTEM_VARIABLES_ADMIN privilege(s) for this operation SET GLOBAL group_replication_view_change_uuid = @@GLOBAL.group_replication_view_change_uuid; ERROR 42000: Access denied; you need (at least one of) the SUPER or SYSTEM_VARIABLES_ADMIN privilege(s) for this operation +SET GLOBAL group_replication_xcom_ssl_accept_retries = @@GLOBAL.group_replication_xcom_ssl_accept_retries; +ERROR 42000: Access denied; you need (at least one of) the SUPER or SYSTEM_VARIABLES_ADMIN privilege(s) for this operation +SET GLOBAL group_replication_xcom_ssl_socket_timeout = @@GLOBAL.group_replication_xcom_ssl_socket_timeout; +ERROR 42000: Access denied; you need (at least one of) the SUPER or SYSTEM_VARIABLES_ADMIN privilege(s) for this operation # Like most system variables, setting the session value for # group_replication_consistency requires no special privileges. @@ -227,6 +231,8 @@ SET GLOBAL group_replication_tls_source = @@GLOBAL.group_replication_tls_source; SET GLOBAL group_replication_transaction_size_limit = @@GLOBAL.group_replication_transaction_size_limit; SET GLOBAL group_replication_unreachable_majority_timeout = @@GLOBAL.group_replication_unreachable_majority_timeout; SET GLOBAL group_replication_view_change_uuid = @@GLOBAL.group_replication_view_change_uuid; +SET GLOBAL group_replication_xcom_ssl_accept_retries = @@GLOBAL.group_replication_xcom_ssl_accept_retries; +SET GLOBAL group_replication_xcom_ssl_socket_timeout = @@GLOBAL.group_replication_xcom_ssl_socket_timeout; ############################################################ # 4. Grant GROUP_REPLICATION_ADMIN and verify setting diff --git a/mysql-test/suite/group_replication/t/gr_persist_only_variables.test b/mysql-test/suite/group_replication/t/gr_persist_only_variables.test index 176f76f621b7..3d2270cbb0b0 100644 --- a/mysql-test/suite/group_replication/t/gr_persist_only_variables.test +++ b/mysql-test/suite/group_replication/t/gr_persist_only_variables.test @@ -91,7 +91,7 @@ while ( $varid <= $countvars ) --source include/assert.inc # TODO: Update this once Bug#27322592 is FIXED. ---let $persisted_variables = 50 +--let $persisted_variables = 52 --let $assert_text= 'Expect $persisted_variables persisted variables with matching persisted and global values.' --let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.variables_info vi JOIN performance_schema.persisted_variables pv JOIN performance_schema.global_variables gv ON vi.variable_name=pv.variable_name AND vi.variable_name=gv.variable_name AND pv.variable_value=gv.variable_value WHERE vi.variable_source="PERSISTED", count, 1] = $persisted_variables --source include/assert.inc diff --git a/mysql-test/suite/group_replication/t/gr_set_option_during_stop.test b/mysql-test/suite/group_replication/t/gr_set_option_during_stop.test index 79bd4bf23e1e..34d52e4d31ea 100644 --- a/mysql-test/suite/group_replication/t/gr_set_option_during_stop.test +++ b/mysql-test/suite/group_replication/t/gr_set_option_during_stop.test @@ -74,6 +74,8 @@ INSERT INTO gr_options_that_cannot_be_change (name) AND VARIABLE_NAME != 'group_replication_start_on_boot' AND VARIABLE_NAME != 'group_replication_tls_source' AND VARIABLE_NAME != 'group_replication_transaction_size_limit' + AND VARIABLE_NAME != 'group_replication_xcom_ssl_socket_timeout' + AND VARIABLE_NAME != 'group_replication_xcom_ssl_accept_retries' ORDER BY VARIABLE_NAME; SET SESSION sql_log_bin = 1; --let $gr_options_that_cannot_be_change_count= `SELECT COUNT(*) FROM gr_options_that_cannot_be_change;` diff --git a/mysql-test/suite/group_replication/t/gr_show_global_and_session_variables.test b/mysql-test/suite/group_replication/t/gr_show_global_and_session_variables.test index bc94d30aac77..29e534d64cd3 100644 --- a/mysql-test/suite/group_replication/t/gr_show_global_and_session_variables.test +++ b/mysql-test/suite/group_replication/t/gr_show_global_and_session_variables.test @@ -30,7 +30,7 @@ --source include/start_and_bootstrap_group_replication.inc --source include/stop_group_replication.inc ---let $gr_var_count= 61 +--let $gr_var_count= 63 --echo --echo # Test#1: Basic check that there are $gr_var_count GR variables. diff --git a/mysql-test/suite/group_replication/t/gr_ssl_socket_timeout.test b/mysql-test/suite/group_replication/t/gr_ssl_socket_timeout.test new file mode 100644 index 000000000000..c83f52cf6d3d --- /dev/null +++ b/mysql-test/suite/group_replication/t/gr_ssl_socket_timeout.test @@ -0,0 +1,130 @@ +################################################################################ +# This test verifies that any unintended connection on group_replication +# communication port is aborted by the server after the timout configured by +# the group_replication_xcom_ssl_socket_timeout. +# +# Test: +# 0. The test requires two servers: M1 and M2. +# 1. Enable group_replication_ssl_mode = REQUIRED on both members and start GR. +# 2. With both members ONLINE, stop GR on M2. +# 3. Initiate a connection on the GR communication port of M1 as a background +# process. +# 4. Start GR on M2. +# 5. Verify that START GR will be successful, after the server aborting the +# connection. +# 6. Cleanup +################################################################################ + +--source include/have_group_replication_xcom_communication_stack.inc +--source include/have_group_replication_plugin.inc +--let $rpl_skip_group_replication_start= 1 +--source include/group_replication.inc + + +--echo +--echo ############################################################ +--echo # 1. Start one member with GCS SSL enabled. +--let $rpl_connection_name= server1 +--source include/rpl_connection.inc +SET @group_replication_ssl_mode_save= @@GLOBAL.group_replication_ssl_mode; +SET GLOBAL group_replication_ssl_mode= REQUIRED; + +# Set the group_replication_xcom_ssl_socket_timeout and group_replication_xcom_ssl_accept_retries +SET @group_replication_xcom_ssl_socket_timeout_save= @@GLOBAL.group_replication_xcom_ssl_socket_timeout; +SET @group_replication_xcom_ssl_accept_retries_save= @@GLOBAL.group_replication_xcom_ssl_accept_retries; + +SET GLOBAL group_replication_xcom_ssl_socket_timeout= 3; +SET GLOBAL group_replication_xcom_ssl_accept_retries= 3; + +# Bootstrap and start group replication +--source include/start_and_bootstrap_group_replication.inc + +# Verify that GR was started with group_replication_ssl_mode = REQUIRED +--let $grep_file= $MYSQLTEST_VARDIR/log/mysqld.1.err +--let $grep_pattern= Group communication SSL configuration: group_replication_ssl_mode: "REQUIRED" +--let $grep_output= print_count +--source include/grep_pattern.inc + +--echo +--echo ############################################################ +--echo # 2. Start the second member with GCS SSL enabled, the member +--echo # will be able to join the group. +--let $rpl_connection_name= server2 +--source include/rpl_connection.inc +--disable_query_log +--eval SET GLOBAL group_replication_group_name= '$group_replication_group_name' +--enable_query_log + +SET @group_replication_ssl_mode_save= @@GLOBAL.group_replication_ssl_mode; +SET GLOBAL group_replication_ssl_mode= REQUIRED; +--source include/start_group_replication.inc + +--let $group_replication_number_of_members= 2 +--source include/gr_wait_for_number_of_members.inc + +--let $grep_file= $MYSQLTEST_VARDIR/log/mysqld.2.err +--let $grep_pattern= Group communication SSL configuration: group_replication_ssl_mode: "REQUIRED" +--let $grep_output= print_count +--source include/grep_pattern.inc + +--echo +--echo ############################################################ +--echo # 3. Verify that any connection on group_replication +--echo # communication port is aborted by the server after the +--echo # timout configured by the group_replication_xcom_ssl_socket_timeout. + +# STOP GR on server2 +--source include/stop_group_replication.inc + +# Connect to GR communication port on server1. For the purpose of testing, we +# use mysql client here. +--connection server1 +SET @group_replication_communication_debug_options_save = @@GLOBAL.group_replication_communication_debug_options; +SET GLOBAL group_replication_communication_debug_options= "XCOM_DEBUG_BASIC"; +--let $gr_port= `SELECT SUBSTRING(@@group_replication_local_address, LOCATE(':',@@group_replication_local_address) + 1)` +--let $command= $MYSQL +--let $command_opt= --user=root --host=127.0.0.1 --port=$gr_port +--let $output_file= $MYSQLTEST_VARDIR/tmp/mysql_output +--let $pid_file= $MYSQLTEST_VARDIR/tmp/mysql_pid +--let $redirect_stderr= 1 +--source include/start_proc_in_background.inc + +--connection server2 +START GROUP_REPLICATION; + +--connection server1 +SET @@GLOBAL.group_replication_communication_debug_options= @group_replication_communication_debug_options_save; +--source include/wait_proc_to_finish.inc + +# Assert that mysql command has failed +--let $assert_text= Assert that the mysql connection has been ended by the server +--let $assert_select= Lost connection to MySQL server at \'reading initial communication packet\' +--let $assert_file= $output_file +--let $assert_count= 1 +--source include/assert_grep.inc + +# Assert that message about aborting the connection has been logged to GCS_DEBUG_TRACE file +--let $assert_text= Assert that message about aborting the connection has been logged to GCS_DEBUG_TRACE file +--let $assert_select= SSL_accept did receive any data on fd .* despite waiting for 12 seconds in total, aborting the connection. +--let $assert_file= $MYSQLTEST_VARDIR/mysqld.1/data/GCS_DEBUG_TRACE +--let $assert_count= 1 +--source include/assert_grep.inc +--exec cat $output_file + +--echo +--echo ############################################################ +--echo # 4. Clean up. +--let $rpl_connection_name= server1 +--source include/rpl_connection.inc +SET GLOBAL group_replication_ssl_mode= @group_replication_ssl_mode_save; +SET GLOBAL group_replication_xcom_ssl_socket_timeout= @group_replication_xcom_ssl_socket_timeout_save; +SET GLOBAL group_replication_xcom_ssl_accept_retries= @group_replication_xcom_ssl_accept_retries_save; + +--let $rpl_connection_name= server2 +--source include/rpl_connection.inc +SET GLOBAL group_replication_ssl_mode= @group_replication_ssl_mode_save; + +--remove_file $pid_file +--remove_file $output_file +--remove_file $MYSQLTEST_VARDIR/mysqld.1/data/GCS_DEBUG_TRACE +--source include/group_replication_end.inc diff --git a/mysql-test/suite/group_replication/t/gr_variables_default_values.test b/mysql-test/suite/group_replication/t/gr_variables_default_values.test index ca7f38c39994..32f7747c30bc 100644 --- a/mysql-test/suite/group_replication/t/gr_variables_default_values.test +++ b/mysql-test/suite/group_replication/t/gr_variables_default_values.test @@ -90,8 +90,11 @@ SET @@GLOBAL.group_replication_communication_max_message_size= default; --let $saved_gr_communication_stack = `SELECT @@GLOBAL.group_replication_communication_stack;` --let $saved_gr_paxos_single_leader = `SELECT @@GLOBAL.group_replication_paxos_single_leader;` +--let $saved_gr_xcom_ssl_socket_timeout = `SELECT @@GLOBAL.group_replication_xcom_ssl_socket_timeout;` +--let $saved_gr_xcom_ssl_accept_retries = `SELECT @@GLOBAL.group_replication_xcom_ssl_accept_retries;` + # Total number of GR variables. ---let $total_gr_vars= 61 +--let $total_gr_vars= 63 --echo # --echo # Test Unit#1 @@ -162,6 +165,8 @@ SET @@GLOBAL.group_replication_advertise_recovery_endpoints = default; SET @@GLOBAL.group_replication_view_change_uuid= default; SET @@GLOBAL.group_replication_communication_stack = default; SET @@GLOBAL.group_replication_paxos_single_leader = default; +SET @@GLOBAL.group_replication_xcom_ssl_socket_timeout = default; +SET @@GLOBAL.group_replication_xcom_ssl_accept_retries = default; # Session variables SET @@SESSION.group_replication_consistency= default; @@ -396,6 +401,15 @@ SET @@SESSION.group_replication_consistency= default; --let $assert_cond= "[SELECT @@GLOBAL.group_replication_paxos_single_leader]" = 0 --source include/assert.inc +# group_replication_xcom_ssl_socket_timeout +--let $assert_text= Default group_replication_xcom_ssl_socket_timeout is 0 +--let $assert_cond= "[SELECT @@GLOBAL.group_replication_xcom_ssl_socket_timeout]" = 0 +--source include/assert.inc + +# group_replication_xcom_ssl_accept_retries +--let $assert_text= Default group_replication_xcom_ssl_accept_retries is 10 +--let $assert_cond= "[SELECT @@GLOBAL.group_replication_xcom_ssl_accept_retries]" = 10 +--source include/assert.inc --echo # --echo # Clean up @@ -453,6 +467,9 @@ SET @@SESSION.group_replication_consistency= default; --eval SET @@GLOBAL.group_replication_view_change_uuid= "$saved_gr_view_change_uuid" --eval SET @@GLOBAL.group_replication_communication_stack= "$saved_gr_communication_stack" --eval SET @@GLOBAL.group_replication_paxos_single_leader= $saved_gr_paxos_single_leader +--eval SET @@GLOBAL.group_replication_xcom_ssl_socket_timeout= $saved_gr_xcom_ssl_socket_timeout +--eval SET @@GLOBAL.group_replication_xcom_ssl_accept_retries= $saved_gr_xcom_ssl_accept_retries + --enable_query_log --let $total_gr_vars= diff --git a/plugin/group_replication/include/plugin.h b/plugin/group_replication/include/plugin.h index e3fd54f28044..781918508fa8 100644 --- a/plugin/group_replication/include/plugin.h +++ b/plugin/group_replication/include/plugin.h @@ -239,6 +239,8 @@ const char *get_group_name_var(); const char *get_view_change_uuid_var(); ulong get_exit_state_action_var(); ulong get_flow_control_mode_var(); +ulong get_xcom_ssl_socket_timeout_var(); +ulong get_xcom_ssl_accept_retries_var(); long get_flow_control_certifier_threshold_var(); long get_flow_control_applier_threshold_var(); long get_flow_control_min_quota_var(); diff --git a/plugin/group_replication/include/plugin_variables.h b/plugin/group_replication/include/plugin_variables.h index 28c010904f29..e731521dc9a5 100644 --- a/plugin/group_replication/include/plugin_variables.h +++ b/plugin/group_replication/include/plugin_variables.h @@ -204,6 +204,16 @@ struct plugin_options_variables { #define MIN_COMPRESSION_THRESHOLD 0 ulong compression_threshold_var; +#define DEFAULT_XCOM_SSL_SOCKET_TIMEOUT 0 +#define MAX_XCOM_SSL_SOCKET_TIMEOUT UINT_MAX32 +#define MIN_XCOM_SSL_SOCKET_TIMEOUT 0 + ulong xcom_ssl_socket_timeout_var; + +#define DEFAULT_XCOM_SSL_ACCEPT_RETRIES 10 +#define MAX_XCOM_SSL_ACCEPT_RETRIES UINT_MAX32 +#define MIN_XCOM_SSL_ACCEPT_RETRIES 0 + ulong xcom_ssl_accept_retries_var; + #define DEFAULT_GTID_ASSIGNMENT_BLOCK_SIZE 1000000 #define MIN_GTID_ASSIGNMENT_BLOCK_SIZE 1 #define MAX_GTID_ASSIGNMENT_BLOCK_SIZE GNO_END diff --git a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_interface.cc b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_interface.cc index 72d42a0ddf33..00d0a4cb04e3 100644 --- a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_interface.cc +++ b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_interface.cc @@ -52,6 +52,8 @@ using std::map; using std::string; using std::vector; +unsigned long xcom_ssl_socket_timeout; +unsigned long xcom_ssl_accept_retries; extern uint32_t get_my_xcom_id(); Gcs_interface *Gcs_xcom_interface::interface_reference_singleton = nullptr; @@ -1057,6 +1059,16 @@ bool Gcs_xcom_interface::initialize_xcom( const std::string *tls_ciphersuites = interface_params.get_parameter("tls_ciphersuites"); + const std::string *xcom_ssl_socket_timeout_str = + interface_params.get_parameter("xcom_ssl_socket_timeout"); + const std::string *xcom_ssl_accept_retries_str = + interface_params.get_parameter("xcom_ssl_accept_retries"); + + xcom_ssl_socket_timeout = + static_cast(atoi(xcom_ssl_socket_timeout_str->c_str())); + xcom_ssl_accept_retries = + static_cast(atoi(xcom_ssl_accept_retries_str->c_str())); + ssl_parameters ssl_configuration = { ssl_mode_int, server_key_file ? server_key_file->c_str() : nullptr, diff --git a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider.cc b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider.cc index de65a63dc9e5..29da03b29735 100644 --- a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider.cc +++ b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider.cc @@ -37,6 +37,9 @@ #include "xcom/xcom_base.h" #include "xcom/xcom_transport.h" +extern unsigned long xcom_ssl_accept_retries; +extern unsigned long xcom_ssl_socket_timeout; + void xcom_tcp_server_startup(Xcom_network_provider *net_provider) { xcom_port port = net_provider->get_port(); @@ -98,6 +101,8 @@ void xcom_tcp_server_startup(Xcom_network_provider *net_provider) { { int ret_ssl; int err; + unsigned long no_of_retries = xcom_ssl_accept_retries; + ERR_clear_error(); ret_ssl = SSL_accept(new_incoming_connection->ssl_fd); err = SSL_get_error(new_incoming_connection->ssl_fd, ret_ssl); @@ -107,6 +112,15 @@ void xcom_tcp_server_startup(Xcom_network_provider *net_provider) { if (err != SSL_ERROR_WANT_READ && err != SSL_ERROR_WANT_WRITE) { break; } + if (no_of_retries == 0) { + G_DEBUG( + "SSL_accept did receive any data on fd %d despite waiting " + "for %ld seconds in total, aborting the connection.", + new_incoming_connection->fd, + xcom_ssl_socket_timeout * (xcom_ssl_accept_retries + 1)); + break; + } + --no_of_retries; SET_OS_ERR(0); G_DEBUG("acceptor learner accept SSL retry fd %d", diff --git a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider_native_lib.cc b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider_native_lib.cc index b30560a5afdc..ca188c9a7b84 100644 --- a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider_native_lib.cc +++ b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/network/xcom_network_provider_native_lib.cc @@ -33,6 +33,8 @@ reserved. #include "xcom/xcom_base.h" #include "xcom/xcom_transport.h" +extern unsigned long xcom_ssl_socket_timeout; + #ifdef WIN32 // In OpenSSL before 1.1.0, we need this first. #include @@ -168,6 +170,28 @@ result Xcom_network_provider_library::create_server_socket() { close_open_connection(&cd); return fd; } + + /* + This code sets the socket level timeout defined by + group_replication_xcom_ssl_socket_timeout. + */ + struct timeval timeout; + timeout.tv_sec = xcom_ssl_socket_timeout; + timeout.tv_usec = 0; + SET_OS_ERR(0); + if (setsockopt(fd.val, SOL_SOCKET, SOCK_OPT_RECVTIMEOUT, &timeout, + sizeof(timeout)) < 0) { + fd.funerr = to_errno(GET_OS_ERR); + G_MESSAGE( + "Unable to set socket options " + "(socket=%d, errno=%d)!", + fd.val, to_errno(GET_OS_ERR)); + + connection_descriptor cd; + cd.fd = fd.val; + close_open_connection(&cd); + return fd; + } } return fd; } @@ -199,6 +223,28 @@ result Xcom_network_provider_library::create_server_socket_v4() { return fd; } } + + /* + This code sets the socket level timeout defined by + group_replication_xcom_ssl_socket_timeout. + */ + struct timeval timeout; + timeout.tv_sec = xcom_ssl_socket_timeout; + timeout.tv_usec = 0; + SET_OS_ERR(0); + if (setsockopt(fd.val, SOL_SOCKET, SOCK_OPT_RECVTIMEOUT, &timeout, + sizeof(timeout)) < 0) { + fd.funerr = to_errno(GET_OS_ERR); + G_MESSAGE( + "Unable to set socket options " + "(socket=%d, errno=%d)!", + fd.val, to_errno(GET_OS_ERR)); + + connection_descriptor cd; + cd.fd = fd.val; + close_open_connection(&cd); + return fd; + } return fd; } /* purecov: end */ diff --git a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task_os.h b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task_os.h index 9af18927fc81..aa37c15d698e 100644 --- a/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task_os.h +++ b/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task_os.h @@ -95,6 +95,8 @@ static inline int is_socket_error(int x) { return x == SOCKET_ERROR || x < 0; } #define SOCK_ECONNRESET ECONNRESET #define SOCK_ERRNO task_errno #define SOCK_OPT_REUSEADDR SO_REUSEADDR +#define SOCK_OPT_RECVTIMEOUT SO_RCVTIMEO +#define SOCK_OPT_SENDTIMEOOUT SO_SNDTIMEO #define GET_OS_ERR errno #define SET_OS_ERR(x) errno = (x) #define CLOSESOCKET(x) close(x) diff --git a/plugin/group_replication/src/plugin.cc b/plugin/group_replication/src/plugin.cc index 15e10f8c5e87..1a0fcafc4170 100644 --- a/plugin/group_replication/src/plugin.cc +++ b/plugin/group_replication/src/plugin.cc @@ -305,6 +305,14 @@ ulong get_exit_state_action_var() { return ov.exit_state_action_var; } ulong get_flow_control_mode_var() { return ov.flow_control_mode_var; } +ulong get_xcom_ssl_socket_timeout_var() { + return ov.xcom_ssl_socket_timeout_var; +} + +ulong get_xcom_ssl_accept_retries_var() { + return ov.xcom_ssl_accept_retries_var; +} + long get_flow_control_certifier_threshold_var() { return ov.flow_control_certifier_threshold_var; } @@ -2515,6 +2523,9 @@ int build_gcs_parameters(Gcs_interface_parameters &gcs_module_parameters) { std::string ssl_crlpath(""); std::string tls_version(""); std::string ssl_fips_mode(""); + std::string xcom_ssl_socket_timeout(""); + std::string xcom_ssl_accept_retries(""); + if (xcom_comm_protocol == XCOM_PROTOCOL) { ssl_key.append(sv.ssl_key ? sv.ssl_key : ""); ssl_cert.append(sv.ssl_cert ? sv.ssl_cert : ""); @@ -2525,6 +2536,17 @@ int build_gcs_parameters(Gcs_interface_parameters &gcs_module_parameters) { ssl_crlpath.append(sv.ssl_crlpath ? sv.ssl_crlpath : ""); tls_version.append(sv.tls_version ? sv.tls_version : ""); ssl_fips_mode.append(ov.ssl_fips_mode_values[sv.ssl_fips_mode]); + + if (ov.xcom_ssl_socket_timeout_var > 0) { + xcom_ssl_socket_timeout.append( + std::to_string(ov.xcom_ssl_socket_timeout_var)); + } + + if (ov.xcom_ssl_accept_retries_var > 0) { + xcom_ssl_accept_retries.append( + std::to_string(ov.xcom_ssl_accept_retries_var)); + } + } else if (xcom_comm_protocol == MYSQL_PROTOCOL) { ssl_key.append(ov.recovery_ssl_key_var ? ov.recovery_ssl_key_var : ""); ssl_cert.append(ov.recovery_ssl_cert_var ? ov.recovery_ssl_cert_var : ""); @@ -2555,6 +2577,11 @@ int build_gcs_parameters(Gcs_interface_parameters &gcs_module_parameters) { gcs_module_parameters.add_parameter("cipher", ssl_cipher); gcs_module_parameters.add_parameter("tls_version", tls_version); + gcs_module_parameters.add_parameter("xcom_ssl_socket_timeout", + xcom_ssl_socket_timeout); + gcs_module_parameters.add_parameter("xcom_ssl_accept_retries", + xcom_ssl_accept_retries); + bool is_ciphersuites_null = xcom_comm_protocol == XCOM_PROTOCOL ? sv.tls_ciphersuites == nullptr @@ -4669,6 +4696,37 @@ static MYSQL_SYSVAR_ULONG( 0 /* block */ ); +static MYSQL_SYSVAR_ULONG( + xcom_ssl_socket_timeout, /* name */ + ov.xcom_ssl_socket_timeout_var, /* var */ + PLUGIN_VAR_OPCMDARG | + PLUGIN_VAR_PERSIST_AS_READ_ONLY, /* optional var | no set default */ + "The timeout in seconds for the socket used for SSL Handshake on xcom port " + "Default: 0.", + nullptr, /* check func. */ + nullptr, /* update func. */ + DEFAULT_XCOM_SSL_SOCKET_TIMEOUT, /* default */ + MIN_XCOM_SSL_SOCKET_TIMEOUT, /* min */ + MAX_XCOM_SSL_SOCKET_TIMEOUT, /* max */ + 0 /* block */ +); + +static MYSQL_SYSVAR_ULONG( + xcom_ssl_accept_retries, /* name */ + ov.xcom_ssl_accept_retries_var, /* var */ + PLUGIN_VAR_OPCMDARG | + PLUGIN_VAR_PERSIST_AS_READ_ONLY, /* optional var | no set default */ + "Number of retries to be performed before closing the socket listenting on " + "the xcom port. " + "Default: 10.", + nullptr, /* check func. */ + nullptr, /* update func. */ + DEFAULT_XCOM_SSL_ACCEPT_RETRIES, /* default */ + MIN_XCOM_SSL_ACCEPT_RETRIES, /* min */ + MAX_XCOM_SSL_ACCEPT_RETRIES, /* max */ + 0 /* block */ +); + static MYSQL_SYSVAR_ULONG( compression_threshold, /* name */ ov.compression_threshold_var, /* var */ @@ -5302,6 +5360,8 @@ static SYS_VAR *group_replication_system_vars[] = { MYSQL_SYSVAR(view_change_uuid), MYSQL_SYSVAR(communication_stack), MYSQL_SYSVAR(paxos_single_leader), + MYSQL_SYSVAR(xcom_ssl_socket_timeout), + MYSQL_SYSVAR(xcom_ssl_accept_retries), nullptr, };