Bug #1690 UDFs are being called redundantly and causes heavy delays
Submitted: 28 Oct 2003 14:49 Modified: 16 Nov 2003 7:28
Reporter: Ondra Zizka Email Updates:
Status: Not a Bug Impact on me:
None 
Category:MySQL Server: User-defined functions ( UDF ) Severity:S2 (Serious)
Version:3.23.55 OS:Microsoft Windows (Win32)
Assigned to: Sergei Golubchik CPU Architecture:Any

[28 Oct 2003 14:49] Ondra Zizka
Description:
I'm not really sure if this concerns UDF only, but in UDF I could check it by writing a file. See:
I've rewritten a udf_example.cc to be usuable under win32/MSVS.NET, but I recognized the result to be useless. Not because of my foult, but because MySQL calls it N-times redundantly on N-th call, that means, when I perform 1000th Query like 
  "UPDATE ip2dns3 SET dns2 = DNSREV(ip) WHERE dns = '-' LIMIT 1"
, the function is called 1001 times (I'm not sure about this maths, anyway...). Combined with the fact that gethostbyaddr() takes few seconds, my DNSREV() turns to be good-for-nothing.

My idea is that DNSREV() should be called 1-times per the query above.

How to repeat:
Write any UDF and log the calls to it. Eg, take this: 

====================================================================
/****************************************************************************
** return hostname for an IP number.
** The functions can take as arguments a string "xxx.xxx.xxx.xxx" or
** four numbers.
****************************************************************************/

// INIT
TESTMYSQLDLL2_API my_bool dnsrev_init(UDF_INIT *initid, UDF_ARGS *args, char *message){
	if(FILE* fp = fopen("c:/mysql_dns.log","a")){
		fputs("init ",fp); fflush(fp); fclose(fp); }
  if (args->arg_count == 1)
    args->arg_type[0] = STRING_RESULT;
  else if (args->arg_count == 4)
    args->arg_type[0] = args->arg_type[1] = args->arg_type[2] = args->arg_type[3] = INT_RESULT;
	else{ strcpy(message, "Wrong # of arguments REVERSE_LOOKUP - must be 1 string or 4 ints."); return 1; }
  initid->max_length=128;
  initid->maybe_null=1;
	
	WSADATA wsaData; if(0 != WSAStartup(MAKEWORD(1,1), &wsaData)){
		strcpy(message, "Can't initiate WinSock :("); return 1; }

  return 0;
}
// DEINIT
TESTMYSQLDLL2_API void dnsrev_deinit(UDF_INIT *initid){
	if(FILE* fp = fopen("c:/mysql_dns.log","a")){
		fputs(" deinit\n",fp); fflush(fp); fclose(fp); }
  WSACleanup();
}

// DNSREV
TESTMYSQLDLL2_API char *dnsrev(UDF_INIT *initid, UDF_ARGS *args, char *result,
		     unsigned long *res_length, char *null_value, char *error){
	
	if(FILE* fp = fopen("c:/mysql_dns.log","a")){
		fputs(".",fp); fflush(fp); fclose(fp); }

  if (args->arg_count == 4){
    if (!args->args[0] || !args->args[1] ||!args->args[2] ||!args->args[3]){ *null_value=1; return 0; }
    sprintf(result,"%d.%d.%d.%d",
	    (int) *((__int64*) args->args[0]),
	    (int) *((__int64*) args->args[1]),
	    (int) *((__int64*) args->args[2]),
	    (int) *((__int64*) args->args[3]));
  }
  else{						// string argument
		if(!args->args[0]){ *null_value=1; return 0; } // Return NULL for NULL values
    uint length = args->lengths[0];
    if(length >= (uint) *res_length-1)
      length = (uint) *res_length;
    memcpy(result,args->args[0],length);
    result[length]=0;
  }

  unsigned long taddr = inet_addr(result);
  if(taddr == (unsigned long) -1L){
		*null_value=1; return 0;
		//strcpy(result,"Chyba v inet_addr()"); return result;
	}
  struct hostent *hp;
	hp = gethostbyaddr((char*) &taddr, sizeof(taddr), AF_INET);
  if(!hp){
		*null_value=1; return 0;
		//strcpy(result,"Chyba v gethostbyaddr()"); return result;
	}
	strcpy(result,hp->h_name);
  *res_length = (ulong)strlen(result);
  return result;
}
====================================================================

-- Now compile and register in MySQL:
CREATE FUNCTION reverse_lookup RETURNS STRING SONAME "fce.dll"

-- Now create any suitable table. I used this:
CREATE TABLE `ip2dns3` (
  `ip` varchar(50) NOT NULL default '',
  `dns` varchar(255) NOT NULL default '',
  `updated` date NOT NULL default '0000-00-00',
  `dns2` varchar(255) NOT NULL default '',
  PRIMARY KEY  (`ip`(16))
) TYPE=MyISAM PACK_KEYS=1;

INSERT INTO `ip2dns3` VALUES ('211.4.229.126', 'zz2001300562127.userreverse.dion.ne.jp', '2003-10-28', '');
INSERT INTO `ip2dns3` VALUES ('194.228.93.120', 'zvsaps.zabreh.indos.cz', '2003-10-26', '');
INSERT INTO `ip2dns3` VALUES ('81.6.37.81', '-', '2003-10-28', '');
INSERT INTO `ip2dns3` VALUES ('213.137.115.191', '-', '2003-10-26', '');
INSERT INTO `ip2dns3` VALUES ('216.69.56.1', 'zone56-1.kctcs.edu', '2003-10-28', '');
INSERT INTO `ip2dns3` VALUES ('213.142.128.62', 'zoltar.teledomenet.gr', '2003-10-26', '');
INSERT INTO `ip2dns3` VALUES ('66.154.97.29', '-', '2003-10-28', '');
INSERT INTO `ip2dns3` VALUES ('212.47.30.98', 'zizkova.starnet.cz', '2003-10-26', '');
INSERT INTO `ip2dns3` VALUES ('62.229.37.69', 'zizka.elsatnet.cz', '2001-03-28', '');
INSERT INTO `ip2dns3` VALUES ('213.191.138.42', 'zg05-041.dialin.iskon.hr', '2003-10-28', '');
### Add few other IP's to get the bug surely 'work'

#############
-- Now execute this: 
UPDATE ip2dns3 SET dns2 = DNSREV(ip) WHERE dns = '-' LIMIT 1
-- And once more
UPDATE ip2dns3 SET dns2 = DNSREV(ip) WHERE dns = '-' LIMIT 1
-- And more
UPDATE ip2dns3 SET dns2 = DNSREV(ip) WHERE dns = '-' LIMIT 1

# If you would watch the c:/mysql_dns.log, you would se this:
init . deinit
init .. deinit
init ... deinit
# and so on...

# What is weird is that the number of dots (calls to dnsrev()) sometimes seems to be infinite, the dnsrev_deinit() is called only after KILLing the thread.
init .......................................................... deinit

Suggested fix:
Now, guys, get to work :)
My suggestion to the folks who have the same problem is to add an INTEGER argument determining whether the gethostbyaddr() (or any other long-time process) will be called or not:

# SQL 
UPDATE ip2dns3 SET dns2 = DNSREV(ip, dns='-') WHERE dns = '-' LIMIT 1

// code: 
  // ...
  if( *((__int64*) args->args[1]) ){
    { *null_value=1; return 0; } // return NULL, this call is WORTHLESS anyway... 
  }
  // ...

And, I hope this is not a duplicate, this took me about an hour co create :)

Ondra Žižka
ondra.zizka.cz
[28 Oct 2003 16:16] Ondra Zizka
Now I've tried my workaround with the second argument. It doesn't work, because the rows where UDF is called are the ones that passed WHERE contition, thus it will be allways true.

Well then, I ask: What is the sense of passing over other convenient rows, and even if it has no system (at least, a system clear to me)...

Here I add the log the query generated:
- Dot means a UDF has been called
- 1st braces contains IP adress (PRIMARY KEY)
- 2nd braces contain the 2nd parameter (bool)
- + means that gethostbyaddr() was called

init .(10.72.128.30)  +.(62.24.73.136)  +.(65.192.195.15)  +.(65.192.195.16)  +.(129.187.254.47)  +.(192.92.126.162)  +.(193.15.53.2)  +.(194.102.200.14)  +.(194.213.48.50)  +.(194.213.208.75)  +.(194.228.135.14)  + deinit
init  .(10.72.128.30)+   .(62.24.73.136)+   .(65.192.195.15)+   .(65.192.195.16)+   .(129.187.254.47)+   .(192.92.126.162)+   .(193.15.53.2)+   .(194.102.200.14)+   .(194.213.48.50)+   .(194.213.208.75)+   .(194.228.135.14)+   .(194.228.148.23)+   deinit
init  .(10.72.128.30)(1)+   .(62.24.73.136)(1)+   .(65.192.195.15)(1)+   .(65.192.195.16)(1)+   .(129.187.254.47)(1)+   .(192.92.126.162)(1)+   .(193.15.53.2)(1)+   .(194.102.200.14)(1)+   .(194.213.48.50)(1)+   .(194.213.208.75)(1)+   .(194.228.135.14)(1)+   .(194.228.148.23)(1)+   .(195.47.108.147)(1)+   .(195.110.172.132)(1)+   .(200.253.207.60)(1)+   .(203.130.212.9)(1)+   .(209.69.41.2)(1)+   .(209.247.40.107)(1)+   .(212.11.100.28)(1)+   deinit

Note that every further query seems to skip several rows and return other row. Sometimes I suspect MySQL to do this to not let me bore during long autumn nights... :((

Ondra
[15 Nov 2003 13:36] Sergei Golubchik
This is not a bug.

Here, what happens:

mysql> UPDATE ip2dns3 SET dns2 = metaphon(ip) WHERE dns = '-' LIMIT 1;
init ...deinit
Query OK, 0 rows affected (0.00 sec)
Rows matched: 3  Changed: 0  Warnings: 0

as you see I used metaphon() function from udf_example.cc in mysql distribution (where I added fprintf(stderr, ...))

There are 3 rows with dns='-'
thus "rows matched" is 3 and metaphon udf was called three times.

Limit applies to changed rows, with LIMIT 1 you specify that no more than one row should be changed - indeed you see "Changed: 0"

It is because when the new value of the dns2 field is exactly the same as its old value, the row is not changed, limit does not apply, but udf is called.

To get the desired effect you may modify your UPDATE by

   WHERE dns='-' AND dns2=''

to avoid resolving ip's that were already resolved
[15 Nov 2003 21:12] Ondra Zizka
I'm not really convinced that this
-------->>
Limit applies to changed rows, with LIMIT 1 you specify that no more than one
row should be changed - indeed you see "Changed: 0".
It is because when the new value of the dns2 field is exactly the same as its
old value, the row is not changed, limit does not apply, but udf is called.
--------<<
really hits my problem. Look, if I have some rows like
  123.45.67.89  |  somebody.somehost.com
  123.45.67.90  |  -  
  123.45.67.91  |  -  
and I do 
  UPDATE ip2dns3 SET dns2 = metaphon(ip) WHERE dns = '-' LIMIT 1,
the only rows matched are that one which aren't still resolved.... UH?

Man, you got it! I really should blush :)  So I should mark not-tried ip's as '' , as the result of my dns_resolve() is '-' if unresolved. Wow that's something I really didn't realize. Sorry for the report. But, anyway, it could be a good demonstration of this feature, could be included in help?

Thanks for enlightenment! :) O.Z.
[16 Nov 2003 7:28] Sergei Golubchik
oops, I just found that the behavior I described is valid for 3.23.x branch only. Since 4.0 LIMIT indeed applies - as you expected - to the number of rows *matched* not *changed*. And it is how UPDATE with LIMIT is documented.

Still, it doesn't change anything for you - whatever the LIMIT applies to, you still wouldn't like to resolve the same ip twice :)