Description:
When calling an aggregate UDF on a select with group by and with rollup, xxx_init() is only called once (where the UDF can allocate the data structures it needs for the aggregate calculation). The xxx_add, xxx_clear and xxx() functions are then called with multiple UDF_INIT * values, each with identical *ptr values as set by the single call to xxx_init(). Once the query is complete xxx_deinit() is called only once.
It seems that xxx_init() should be called once every time an aggregation set is to be calculated (e.g. for each unique UDF_INIT *), followed by clear/add/xxx() calls and a final deinit().
Here is an example:
CREATE TABLE `tmp1` (
`gid` varchar(2) default NULL,
`xval` double default NULL,
`yval` double default NULL,
`gid2` varchar(1) default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8
select * from tmp1;
gid xval yval gid2
a 1 4 b
a 2 5 b
a 4 19 a
a 22 21 b
b 1 3 b
b 3 7 b
b 5 9 b
b 7 12 a
b 8 18 b
b 23 24 a
b 12 33 a
create aggregate function udf_count returns real soname 'ronUDFs.dll';
select gid, count(xval), udf_count(xval) from tmp1 group by gid;
gid count(xval) udf_count(xval)
a 4 4
b 7 7
THIS IS OK - CALLING SEQUENCE:
udf_count_init() initid 073894B4 internal id 111
udf_count_clear() initid 073894B4 internal id 111 done
udf_count_add() initid 073894B4 internal id 111 added x 1 attribute xval count 1
udf_count_add() initid 073894B4 internal id 111 added x 2 attribute xval count 2
udf_count_add() initid 073894B4 internal id 111 added x 4 attribute xval count 3
udf_count_add() initid 073894B4 internal id 111 added x 22 attribute xval count 4
udf_count() initid 073894B4 internal id 111 count 4
udf_count_clear() initid 073894B4 internal id 111 done
udf_count_add() initid 073894B4 internal id 111 added x 23 attribute xval count 1
udf_count_add() initid 073894B4 internal id 111 added x 8 attribute xval count 2
udf_count_add() initid 073894B4 internal id 111 added x 7 attribute xval count 3
udf_count_add() initid 073894B4 internal id 111 added x 5 attribute xval count 4
udf_count_add() initid 073894B4 internal id 111 added x 3 attribute xval count 5
udf_count_add() initid 073894B4 internal id 111 added x 1 attribute xval count 6
udf_count_add() initid 073894B4 internal id 111 added x 12 attribute xval count 7
udf_count() initid 073894B4 internal id 111 count 7
udf_count_deinit() initid 073894B4 internal id 111 closing logfile
select gid, count(xval), udf_count(xval) from tmp1 group by gid with rollup;
gid count(xval) udf_count(xval)
a 4 7
b 7 14
NULL 11 14
THIS IS WRONG - CALLING SEQUENCE:
udf_count_init() initid 073894BC internal id 382
udf_count_clear() initid 073894BC internal id 382 done
udf_count_add() initid 073894BC internal id 382 added x 1 attribute xval count 1
udf_count_clear() initid 074B88CC internal id 382 done
udf_count_add() initid 074B88CC internal id 382 added x 1 attribute xval count 1
udf_count_add() initid 073894BC internal id 382 added x 2 attribute xval count 2
udf_count_add() initid 074B88CC internal id 382 added x 2 attribute xval count 3
udf_count_add() initid 073894BC internal id 382 added x 4 attribute xval count 4
udf_count_add() initid 074B88CC internal id 382 added x 4 attribute xval count 5
udf_count_add() initid 073894BC internal id 382 added x 22 attribute xval count 6
udf_count_add() initid 074B88CC internal id 382 added x 22 attribute xval count 7
udf_count() initid 073894BC internal id 382 count 7
udf_count_clear() initid 073894BC internal id 382 done
udf_count_add() initid 073894BC internal id 382 added x 23 attribute xval count 1
udf_count_add() initid 074B88CC internal id 382 added x 23 attribute xval count 2
udf_count_add() initid 073894BC internal id 382 added x 8 attribute xval count 3
udf_count_add() initid 074B88CC internal id 382 added x 8 attribute xval count 4
udf_count_add() initid 073894BC internal id 382 added x 7 attribute xval count 5
udf_count_add() initid 074B88CC internal id 382 added x 7 attribute xval count 6
udf_count_add() initid 073894BC internal id 382 added x 5 attribute xval count 7
udf_count_add() initid 074B88CC internal id 382 added x 5 attribute xval count 8
udf_count_add() initid 073894BC internal id 382 added x 3 attribute xval count 9
udf_count_add() initid 074B88CC internal id 382 added x 3 attribute xval count 10
udf_count_add() initid 073894BC internal id 382 added x 1 attribute xval count 11
udf_count_add() initid 074B88CC internal id 382 added x 1 attribute xval count 12
udf_count_add() initid 073894BC internal id 382 added x 12 attribute xval count 13
udf_count_add() initid 074B88CC internal id 382 added x 12 attribute xval count 14
udf_count() initid 073894BC internal id 382 count 14
udf_count() initid 074B88CC internal id 382 count 14
udf_count_deinit() initid 073894BC internal id 382 closing logfile
Note that the xxx_clear, xxx_add and xxx() functions are called with two different initid pointers, with the apparent expectation that the UDF will maintain separate data structures for each one, but the xxx_init() and xxx_deinit() functions were only called once with the first UDF_INIT *.
How to repeat:
Use examples above.
UDF code:
#include <iostream>
#include <fstream>
#ifdef STANDARD
#include <stdio.h>
#include <string.h>
#ifdef __WIN__
typedef unsigned __int64 ulonglong;
typedef __int64 longlong;
#else
typedef unsigned long long ulonglong;
typedef long long longlong;
#endif /*__WIN__*/
#else
#include <my_global.h>
#include <my_sys.h>
#endif
#include <mysql.h>
#include <m_ctype.h>
#include <m_string.h>
#define BUFFERSIZE 1024
using namespace std;
extern "C" {
my_bool udf_count_init( UDF_INIT* initid, UDF_ARGS* args, char* message );
void udf_count_deinit( UDF_INIT* initid );
void udf_count_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
void udf_count_clear( UDF_INIT* initid, char* is_null, char* is_error );
void udf_count_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
double udf_count( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char *error );
}
struct data
{
unsigned long long count;
int id;
ofstream logfile;
};
my_bool udf_count_init( UDF_INIT* initid, UDF_ARGS* args, char* message )
{
if (args->arg_count < 1 || args->arg_count>2)
{
strcpy(message,"wrong number of arguments: udf_count() requires one or two arguments");
return 1;
}
if (args->arg_type[0]!= REAL_RESULT)
{
strcpy(message,"udf_count() requires a real for parameter 1");
return 1;
}
if (args->arg_count>1 && (args->arg_type[1]!=INT_RESULT))
{
strcpy(message,"udf_count() requires an int as parameter 2");
return 1;
}
initid->decimals=2;
if (args->arg_count==2 && (*((ulong*)args->args[1])<=16))
{
initid->decimals=*((ulong*)args->args[1]);
}
// regression_data *buffer = (regression_data *) malloc(sizeof(struct regression_data));
data *buffer = new data;
if (buffer == NULL) {
strcpy(message, "Cannot allocate memory for data");
return 1;
}
buffer->count = 0;
/* Seed the random-number generator with current time so that
* the numbers will be different every time we run.
*/
srand( (unsigned)time( NULL ) );
buffer->id = rand();
try {
buffer->logfile.open("c:\\ronUDF.log", ios::out | ios::app | ios::binary);
if (!buffer->logfile.is_open()) {
throw "Can't open logfile for writing";
}
} catch (char * str) {
strcpy(message, str);
return 1;
}
initid->maybe_null = 1;
initid->max_length = 32;
//initid->const_item = 0;
initid->ptr = (char*)buffer;
buffer->logfile << "udf_count_init() initid " << initid << " internal id " << buffer->id << endl;
return 0;
}
void udf_count_deinit( UDF_INIT* initid )
{
if (initid != NULL && initid->ptr != NULL) {
data *buffer = (data*)initid->ptr;
if (buffer->logfile.is_open()) {
buffer->logfile << "udf_count_deinit() initid " << initid << " internal id " << buffer->id << " closing logfile" << endl;
buffer->logfile.close();
}
// free(buffer);
delete buffer;
initid->ptr = NULL;
}
}
void udf_count_reset( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
{
data *buffer = (data*)initid->ptr;
buffer->logfile << "udf_count_reset() " << buffer->id << endl;
udf_count_clear(initid, is_null, is_error);
udf_count_add( initid, args, is_null, is_error );
}
void udf_count_clear( UDF_INIT* initid, char* is_null, char* is_error )
{
data *buffer = (data*)initid->ptr;
buffer->count = 0;
*is_null = 0;
*is_error = 0;
buffer->logfile << "udf_count_clear() initid " << initid << " internal id " << buffer->id << " done" << endl;
}
void udf_count_add( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
{
data *buffer = (data*)initid->ptr;
if (args->args[0]!=NULL)
{
double x = *((double*)args->args[0]);
buffer->count++;
buffer->logfile << "udf_count_add() initid " << initid << " internal id " << buffer->id << " added x " << x << " attribute " << args->attributes[0] << " count " << buffer->count << endl;
}
}
double udf_count( UDF_INIT* initid, UDF_ARGS* args, char* is_null, char* is_error )
{
data* buffer = (data*)initid->ptr;
if (buffer == NULL) {
*is_null = 1;
*is_error = 1;
return 0;
}
if (buffer->count == 0) {
*is_null = 1;
buffer->logfile << "udf_count() " << buffer->id << " count was 0" << endl;
return 0;
}
if (is_error && *is_error != 0)
{
*is_null = 1;
buffer->logfile << "udf_count() " << buffer->id << " is_error was not 0" << endl;
return 0.0;
}
buffer->logfile << "udf_count() initid " << initid << " internal id " << buffer->id << " count " << buffer->count << endl;
*is_null=0;
return buffer->count;
}
Suggested fix:
Call udf xxx_init() / xxx_deinit() once per unique UDF_INIT * needed for the aggregation calculations.