Bug #9678 | Client library hangs after network communication failiure | ||
---|---|---|---|
Submitted: | 6 Apr 2005 13:51 | Modified: | 13 Oct 2006 18:12 |
Reporter: | Andy Hobbs | Email Updates: | |
Status: | Closed | Impact on me: | |
Category: | MySQL Server: C API (client library) | Severity: | S2 (Serious) |
Version: | 4.1.10a | OS: | Linux (Linux 2.4) |
Assigned to: | CPU Architecture: | Any |
[6 Apr 2005 13:51]
Andy Hobbs
[6 Apr 2005 13:54]
Andy Hobbs
libmysql_r/viosocket.c
Attachment: viosocket.c (text/plain), 14.14 KiB.
[6 Apr 2005 13:56]
Andy Hobbs
I have added the vioselect.c which I have modified as an example
[6 Jun 2005 14:11]
Marcelo Fernandez
hi, i tested out the patch with mysql 4.1.12, though it didn't worked as i expected, so i modified it a bit and now it worked just fine for me, which is to timeout if the server won't answer: int vio_read(Vio * vio, gptr buf, int size) { int r; int len; fd_set read_set; struct timeval timeout; int sockError; int sockFlags; int selectFDs; DBUG_ENTER("vio_read"); DBUG_PRINT("enter", ("sd=%d, buf=%p, size=%d", vio->sd, buf, size)); #ifdef __WIN__ r = recv(vio->sd, buf, size,0); #else errno=0; /* For linux */ /* Query the socket it's self as the vio->fcntl_flags may say */ /* we are blocking when we are not (During connection) and */ /* Select will not work correctly on a NON BLOCKING socket*/ sockFlags = fcntl(vio->sd, F_GETFL, 0); if (sockFlags & O_NONBLOCK) { r = read(vio->sd, buf, size); } else { FD_ZERO(&read_set); FD_SET(vio->sd, &read_set); timeout.tv_sec = 2; timeout.tv_usec = 0; /* Try reading from it using select */ if ((selectFDs = select(vio->sd+1, &read_set, NULL, NULL, &timeout)) == -1) { /* select error */ r = -1; } else if (selectFDs > 0 && (FD_ISSET(vio->sd, &read_set))) { len = sizeof(sockError); getsockopt(vio->sd, SOL_SOCKET, SO_ERROR, &sockError, &len); r = 0; if (sockError != 0) { errno = sockError; r = - 1; } else { r = read(vio->sd, buf, size); } } else /* We timed out */ { r = -1; } } #endif /* __WIN__ */ #ifndef DBUG_OFF if (r < 0) { DBUG_PRINT("vio_error", ("Got error %d during read",errno)); } #endif /* DBUG_OFF */ DBUG_PRINT("exit", ("%d", r)); DBUG_RETURN(r); }
[6 Jun 2005 17:00]
Marcelo Fernandez
hi again, i improved a bit more the fix, it now timeout's according to the value from MYSQL_OPT_READ_TIMEOUT, which i think is more correct, here's the diff against the 4.1.12 sources: diff -rc mysql-4.1.12/client/mysql.cc mysql-4.1.12patched/client/mysql.cc *** mysql-4.1.12/client/mysql.cc Fri May 13 08:32:36 2005 --- mysql-4.1.12patched/client/mysql.cc Mon Jun 6 13:45:23 2005 *************** *** 2770,2775 **** --- 2770,2785 ---- mysql_options(&mysql,MYSQL_OPT_CONNECT_TIMEOUT, (char*) &timeout); } + { + uint timeout=30; + mysql_options(&mysql,MYSQL_OPT_READ_TIMEOUT, + (char*) &timeout); + } + { + uint timeout=30; + mysql_options(&mysql,MYSQL_OPT_WRITE_TIMEOUT, + (char*) &timeout); + } if (opt_compress) mysql_options(&mysql,MYSQL_OPT_COMPRESS,NullS); if (opt_secure_auth) diff -rc mysql-4.1.12/include/violite.h mysql-4.1.12patched/include/violite.h *** mysql-4.1.12/include/violite.h Fri May 13 08:32:21 2005 --- mysql-4.1.12patched/include/violite.h Mon Jun 6 11:44:50 2005 *************** *** 175,180 **** --- 175,182 ---- struct sockaddr_in remote; /* Remote internet address */ enum enum_vio_type type; /* Type of connection */ char desc[30]; /* String description */ + int read_timeout; /* Read timeout, in seconds */ + int write_timeout; /* Write timeout, in seconds */ #ifdef HAVE_VIO /* function pointers. They are similar for socket/SSL/whatever */ void (*viodelete)(Vio*); diff -rc mysql-4.1.12/vio/viosocket.c mysql-4.1.12patched/vio/viosocket.c *** mysql-4.1.12/vio/viosocket.c Fri May 13 08:32:01 2005 --- mysql-4.1.12patched/vio/viosocket.c Mon Jun 6 13:47:18 2005 *************** *** 31,54 **** int vio_read(Vio * vio, gptr buf, int size) { ! int r; ! DBUG_ENTER("vio_read"); ! DBUG_PRINT("enter", ("sd=%d, buf=%p, size=%d", vio->sd, buf, size)); #ifdef __WIN__ ! r = recv(vio->sd, buf, size,0); #else ! errno=0; /* For linux */ ! r = read(vio->sd, buf, size); #endif /* __WIN__ */ #ifndef DBUG_OFF ! if (r < 0) ! { ! DBUG_PRINT("vio_error", ("Got error %d during read",errno)); ! } #endif /* DBUG_OFF */ ! DBUG_PRINT("exit", ("%d", r)); ! DBUG_RETURN(r); } --- 31,94 ---- int vio_read(Vio * vio, gptr buf, int size) { ! int r; ! int len; ! fd_set read_set; ! struct timeval timeout; ! int sockFlags; ! int selectFDs; ! ! DBUG_ENTER("vio_read"); ! DBUG_PRINT("enter", ("sd=%d, buf=%p, size=%d", vio->sd, buf, size)); #ifdef __WIN__ ! r = recv(vio->sd, buf, size,0); #else ! errno=0; /* For linux */ ! ! /* Query the socket it's self as the vio->fcntl_flags may say */ ! /* we are blocking when we are not (During connection) and */ ! /* Select will not work correctly on a NON BLOCKING socket*/ ! sockFlags = fcntl(vio->sd, F_GETFL, 0); ! ! if (sockFlags & O_NONBLOCK) ! { ! r = read(vio->sd, buf, size); ! } ! else ! { ! FD_ZERO(&read_set); ! FD_SET(vio->sd, &read_set); ! ! timeout.tv_sec = vio->read_timeout; ! timeout.tv_usec = 0; ! ! /* Try reading from it using select */ ! if ((selectFDs = select(vio->sd+1, &read_set, NULL, NULL, &timeout)) == -1) ! { ! /* select error */ ! r = -1; ! } ! else ! if (selectFDs > 0 && (FD_ISSET(vio->sd, &read_set))) ! { ! r = read(vio->sd, buf, size); ! } ! else /* We timed out */ ! { ! r = -1; ! } ! } ! #endif /* __WIN__ */ #ifndef DBUG_OFF ! if (r < 0) ! { ! DBUG_PRINT("vio_error", ("Got error %d during read",errno)); ! } #endif /* DBUG_OFF */ ! DBUG_PRINT("exit", ("%d", r)); ! DBUG_RETURN(r); } *************** *** 317,331 **** } ! void vio_timeout(Vio *vio __attribute__((unused)), ! uint which __attribute__((unused)), ! uint timeout __attribute__((unused))) { #ifdef __WIN__ ulong wait_timeout= (ulong) timeout * 1000; (void) setsockopt(vio->sd, SOL_SOCKET, which ? SO_SNDTIMEO : SO_RCVTIMEO, (char*) &wait_timeout, sizeof(wait_timeout)); #endif /* __WIN__ */ } --- 357,378 ---- } ! void vio_timeout(Vio *vio, uint which, uint timeout) { #ifdef __WIN__ ulong wait_timeout= (ulong) timeout * 1000; (void) setsockopt(vio->sd, SOL_SOCKET, which ? SO_SNDTIMEO : SO_RCVTIMEO, (char*) &wait_timeout, sizeof(wait_timeout)); + #else + if (which) + { + vio->write_timeout = timeout; + } + else + { + vio->read_timeout = timeout; + } #endif /* __WIN__ */ }
[8 Jun 2005 8:27]
Andy Hobbs
Hi, I have applied this to 4.1.12 myself as well (I couldn't use the diff out put attached as a patch, I had to manually apply the changes myself?) and it works fine. Thanks for this, will it make it in to a 4.x.x release or will it be in 5.x.x? Andy
[3 May 2006 17:06]
Matthew Lord
Hi Kostja, Any rough ETA on this? The customer is asking for one. Thanks! -matt-
[4 Aug 2006 11:53]
Konstantin Osipov
Hello Andy, thank you for your bug report and the submitted patch. We, however, can not apply it as is due to the reasons that Monty outlined in the comment for Bug#4143: - adding an extra select() per every call of vio_read is an overhead that we would not like to have - this select() will not spot all communication failures anyway What we can (and will) do is proper implementation of vio_timeout on all operating systems that support socket timeouts. Currently vio_timeout works only on Windows: we will make sure it also works on Linux, FreeBSD and other opertating systems whose TCP/IP stacks support options SO_SNDTIMEO, SO_RCVTIMEO. When this functionality is in place, you'll be able to reduce the wait time to a a value specified in mysql_options() at library intialization. Please let me know what you think about this, and thank you for your interest in MySQL.
[6 Aug 2006 11:03]
Bugs System
A patch for this bug has been committed. After review, it may be pushed to the relevant source trees for release in the next version. You can access the patch from: http://lists.mysql.com/commits/10081 ChangeSet@1.2530, 2006-08-06 15:03:03+04:00, kroki@moonlight.intranet +2 -0 BUG#9678: Client library hangs after network communication failure Socket timeouts in client library were used only on Windows. The solution is to use socket timeouts in client library on all systems were they are supported. No test case is provided because it is impossible to simulate network failure in current test suit.
[14 Aug 2006 16:01]
Bugs System
A patch for this bug has been committed. After review, it may be pushed to the relevant source trees for release in the next version. You can access the patch from: http://lists.mysql.com/commits/10376 ChangeSet@1.2530, 2006-08-14 20:01:19+04:00, kroki@moonlight.intranet +2 -0 BUG#9678: Client library hangs after network communication failure Socket timeouts in client library were used only on Windows. The solution is to use socket timeouts in client library on all systems were they are supported. No test case is provided because it is impossible to simulate network failure in current test suit.
[14 Aug 2006 18:38]
Konstantin Osipov
The patch submitted for 4.1 is ok to push. Please set back to in progress when done.
[14 Aug 2006 18:42]
Tomash Brechko
Pushed to 4.1.22.
[14 Aug 2006 19:32]
Tomash Brechko
Setting back to 'In progress', because patch for 5.1 will be different.
[21 Aug 2006 17:33]
Tomash Brechko
Bug#4143, bug#5449 and bug#11774 are duplicated of this bug, as all of them call to implementation of socket timeouts.
[21 Aug 2006 18:04]
Tomash Brechko
The patch pushed to 4.1.22 implements socket timeouts on POSIX platforms. You may set client timeouts with MYSQL_OPT_READ_TIMEOUT and MYSQL_OPT_WRITE_TIMEOUT options for mysql_options().
[21 Aug 2006 19:03]
Vladimir Shebordaev
Posix says "that not all implementations allow this option to be set". So those attempts to set socket I/O timeout options may seldom be ingored while the constants are still defined 'cause Posix compliant application should return those option values to the application.
[21 Aug 2006 19:27]
Tomash Brechko
The comment above is correct, let me say that we are trying to utilize socket timeouts if they are supported. We set them without checking if the operation succeded (and even do nothing if SO_SNDTIMEO or SO_RCVTIMEO is not defined). As Monty said in comment for bug#4143, there's no good alternative.
[2 Sep 2006 9:23]
Timothy Smith
Pushed to 5.0 (will be in 5.0.25) TODO: needs different patch to be merged into 5.1
[4 Sep 2006 16:58]
MC Brown
An entry for this bug has been added to the changelog for 5.0.25.
[13 Sep 2006 8:20]
Tomash Brechko
Merged into 5.1.12.
[13 Sep 2006 16:42]
Paul DuBois
Noted in 5.1.12 changelog.
[15 Sep 2006 7:13]
Domas Mituzas
Reopening this bug - this has to be in 4.0 too, according to EOL.
[18 Sep 2006 14:41]
Konstantin Osipov
Tomash, please downport the patch to 4.0. Thanks.
[18 Sep 2006 18:03]
Bugs System
A patch for this bug has been committed. After review, it may be pushed to the relevant source trees for release in the next version. You can access the patch from: http://lists.mysql.com/commits/12157 ChangeSet@1.2194, 2006-09-18 22:02:06+04:00, kroki@moonlight.intranet +4 -0 BUG#9678: Client library hangs after network communication failure (back-port to 4.0) Socket timeouts in client library were used only on Windows. Additionally, in 4.0 write operations erroneously set read timeout. The solution is to use socket timeouts in client library on all systems were they are supported, and to differentiate between read and write timeouts. No test case is provided because it is impossible to simulate network failure in current test suite.
[20 Sep 2006 16:11]
Konstantin Osipov
Approved by email.
[13 Oct 2006 11:39]
Tomash Brechko
Pushed to 4.0.
[13 Oct 2006 18:12]
Paul DuBois
Noted in 4.0.28 changelog.
[21 Nov 2006 23:38]
James Day
This might also fix bug #6070 and bug #11897. I've asked for feedback on them.