commit 4f060a38c26d770baf2f65e99c94d53f3a08425b Author: yuxianjiang Date: Wed Jan 6 11:16:40 2021 +0800 [bugfix]issue#349 deadlock transaction hang up when connection is broken Problem When turn off dead lock detection, transactions hold locks and come along with dead lock. If transactoins client disconnect accidently, transactions will not rollback and hold locks until lock timed out. This will cause the followup blocking on the holding locks. Also if transaction is blocked by other transaction and disconnected accidently, this transaction will not release locks until timed out No matter what dead lock check is configed. Deadlcok hangup can be solved by turning dead lock check on. Solution In timed out monitor thread, we add check on thread's connection status of socket. If connection is broken, monitor will mark the transaction unusable and signal lock suspend thread connection as lock timed-out. diff --git a/include/violite.h b/include/violite.h index 3ec407f68..89e74c15e 100644 --- a/include/violite.h +++ b/include/violite.h @@ -177,6 +177,7 @@ int vio_getnameinfo(const struct sockaddr *sa, char *hostname, size_t hostname_size, char *port, size_t port_size, int flags); +int vio_socket_alive(Vio *vio); #ifdef HAVE_OPENSSL #include diff --git a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result index 2003ae02b..7877c3dba 100644 --- a/mysql-test/suite/sys_vars/r/all_vars.result +++ b/mysql-test/suite/sys_vars/r/all_vars.result @@ -17,6 +17,8 @@ ALL_STACK_PATH ALL_STACK_PATH CDB_KILL_IDLE_TRANS_TIMEOUT CDB_KILL_IDLE_TRANS_TIMEOUT +CDB_LOCK_CONNECT_DETECT_ENABLED +CDB_LOCK_CONNECT_DETECT_ENABLED CDB_SHOW_IPK_INFO CDB_SHOW_IPK_INFO CDB_SQL_MODE_FIXUP_ENABLED diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 9098c6e53..6b0ff0dd6 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -397,6 +397,7 @@ my_bool cdb_enable_lock_statistics= false; ulong cdb_ignore_filename_length= 0; my_bool cdb_tencentroot_slow_log_enabled = FALSE; my_bool cdb_sql_mode_fixup_enabled = TRUE; +my_bool cdb_lock_connect_check_enabled = TRUE; /** Total memory used in server layer, pfs layer and innodb layer. diff --git a/sql/mysqld.h b/sql/mysqld.h index e80f6c97d..b145af475 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -406,6 +406,7 @@ extern ulonglong cdb_optimize_large_trans_binlog_last_affected_rows_threshold; extern ulonglong cdb_optimize_large_trans_binlog_aver_affected_rows_threshold; extern my_bool cdb_tencentroot_slow_log_enabled; extern my_bool cdb_sql_mode_fixup_enabled; +extern my_bool cdb_lock_connect_check_enabled; /* per-thread sync/async io stats */ extern void update_thread_stats(int type, ulonglong size= 0); diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 68ffd63b5..adc334a9c 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -967,4 +967,22 @@ bool thd_connection_alive(THD *thd) return false; } +bool thd_connection_alive2(THD *thd) +{ + NET *net= thd->get_protocol_classic()->get_net(); + if (!net) + return true; + + if (net->error) + return false; + + if (!net->vio) + return true; + + if (net->vio->mysql_socket.fd != INVALID_SOCKET) + return vio_socket_alive(net->vio); + + return true; +} + #endif /* EMBEDDED_LIBRARY */ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 3ab3aca95..b7e4594bb 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -6491,3 +6491,10 @@ static Sys_var_mybool Sys_cdb_sql_mode_fixup_enabled( GLOBAL_VAR(cdb_sql_mode_fixup_enabled), CMD_LINE(OPT_ARG), DEFAULT(TRUE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(NULL), ON_UPDATE(NULL)); + +static Sys_var_mybool Sys_cdb_lock_connect_detect_enabled( + "cdb_lock_connect_detect_enabled", "Enable connection check when transaction " + "is blocked by row lock. Default is TRUE", + GLOBAL_VAR(cdb_lock_connect_check_enabled), CMD_LINE(OPT_ARG), + DEFAULT(TRUE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(NULL), ON_UPDATE(NULL)); diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index cfa0e6516..00b8eba23 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1302,6 +1302,8 @@ struct trx_t { Committed on DD tables */ #endif /* UNIV_DEBUG */ ulint magic_n; + + bool connect_broken; }; /** diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index aac5b1e38..d3f57c499 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -550,9 +550,9 @@ lock_wait_suspend_thread( return; } - if (lock_wait_timeout < 100000000 + if (trx->connect_broken || (lock_wait_timeout < 100000000 && wait_time > (double) lock_wait_timeout - && !trx_is_high_priority(trx)) { + && !trx_is_high_priority(trx))) { trx->error_state = DB_LOCK_WAIT_TIMEOUT; @@ -597,6 +597,7 @@ lock_wait_release_thread_if_suspended( } } +bool thd_connection_alive2(THD *thd); /*********************************************************************//** Check if the thread lock wait has timed out. Release its locks if the wait has actually timed out. */ @@ -621,6 +622,19 @@ lock_wait_check_and_cancel( trx = thr_get_trx(slot->thr); + /* Try to detect trx's connection is alive. If connection + is broken, signal timeout and quit. + */ + ut_ad(trx->connect_broken == false); + if (cdb_lock_connect_check_enabled) { + if (trx->mysql_thd && !thd_connection_alive2(trx->mysql_thd)) { + wait_time = slot->wait_timeout + 1; + trx->connect_broken = true; + ib::error() << "Transaction's connect is broken. Aborting by " + "timeout."; + } + } + if (trx_is_interrupted(trx) || (slot->wait_timeout < 100000000 && (wait_time > (double) slot->wait_timeout diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index a1c27db15..c0e6c9a2b 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -214,6 +214,8 @@ trx_init( trx->hot_update_status = HOT_UPDATE_STATUS_NONE; trx->is_point_update = false; + + trx->connect_broken = false; } /** For managing the life-cycle of the trx_t instance that we get @@ -481,6 +483,8 @@ trx_create_low() trx_free(). */ ut_a(trx->mod_tables.size() == 0); + trx->connect_broken = false; + return(trx); } diff --git a/vio/viosocket.c b/vio/viosocket.c index 1aa57ddb2..29ab82043 100644 --- a/vio/viosocket.c +++ b/vio/viosocket.c @@ -1203,3 +1203,33 @@ int vio_getnameinfo(const struct sockaddr *sa, port, port_size, flags); } + +int vio_socket_alive(Vio *vio) +{ + int r = 0; + char buf[16]; + int sock = vio->mysql_socket.fd; + int err = 0; + r = recv(sock, buf, sizeof(buf), MSG_PEEK | MSG_DONTWAIT); + if (r == -1) + { + err = errno; + if (err == EAGAIN || err == EWOULDBLOCK) + return TRUE; + + return FALSE; + } + else if (r == 0) + { + //When a stream socket peer has performed an orderly shutdown, the + //return value will be 0 (the traditional "end-of-file" return). + //This means that vio socket has been close + return FALSE; + } + else + { + return TRUE; + } + + return TRUE; +}