Description:
Using a stored function to qualify a query is very much slower than the usage of a intermediate variable (1,7s against 0,01s !!).
The point is:
why is the combination
set @a=somefunc(...); select ... where field=@a;
fast and
select ... where field=somefunc(...);
slow?
mysql> select version();
+------------------+
| version() |
+------------------+
| 5.0.24a-standard |
+------------------+
1 row in set (0.00 sec)
mysql> set @a=fSeqBefore(5,'2007-01-01');
Query OK, 0 rows affected (0.01 sec)
mysql> select isolate from tIsolates where isolate=@a;
+---------+
| isolate |
+---------+
| 3050 |
+---------+
1 row in set (0.00 sec)
mysql> select isolate from tIsolates where isolate=fSeqBefore(5,'2007-01-01');
+---------+
| isolate |
+---------+
| 3050 |
+---------+
1 row in set (1.70 sec)
How to repeat:
Table tIsolates:
CREATE TABLE tIsolates (
isolate VARCHAR (20) NOT NULL PRIMARY KEY, #name or ID of isolate
prevIsol VARCHAR (20) NOT NULL DEFAULT '', #link to previous isolate
centerID TINYINT UNSIGNED NOT NULL DEFAULT 5, #center that performed analysis
patID INTEGER NULL, #-> tPatients (we have 'anonymous' isolates)
iso_typeID TINYINT UNSIGNED NOT NULL DEFAULT 0, #type of isolate e.g. blood
sampling_date DATE NULL, #date when sample was taken
senderID TINYINT UNSIGNED NOT NULL DEFAULT 0, #center or physician that sent in the isolate
arrival_date DATE NOT NULL, #date of arrival at center
annotations VARCHAR (254) NOT NULL DEFAULT '',
path_findings VARCHAR (160) NOT NULL DEFAULT '', #url to findings/documents
updID CHAR(4) NOT NULL,
updated TIMESTAMP,
KEY I_date (sampling_date),
FOREIGN KEY (centerID) REFERENCES sCenters(centerID) ON DELETE RESTRICT,
FOREIGN KEY (patID) REFERENCES tPatients(patID) ON DELETE CASCADE,
FOREIGN KEY (iso_typeID) REFERENCES sIsolateTypes(iso_typeID) ON DELETE RESTRICT,
FOREIGN KEY (senderID) REFERENCES sCenters(centerID) ON DELETE RESTRICT
) Engine=InnoDB COMMENT='Isolate (sample) taken from patient';
CREATE TABLE tRawSequences (
seqID INTEGER AUTO_INCREMENT PRIMARY KEY, #for derived tables
isolate VARCHAR (20) NOT NULL DEFAULT '', #-> isolates
contains VARCHAR (8) NOT NULL, #one of pro, rt, int, pro/rt ...
seq_name VARCHAR (20) NOT NULL DEFAULT '', #additional qualifyer
fasta_date DATE NULL, #creation of fasta (NOT sampling date)
fasta_header VARCHAR (80) NOT NULL DEFAULT '', #header of fasta file that contained the nt_seq
nt_seq TEXT NOT NULL, #nucletide sequence
nt_crc INTEGER UNSIGNED NOT NULL DEFAULT 0, #CRC32 value of uppercase nt_seq
species CHAR (4) NOT NULL, #'HIV','HBV','HCV'...
type CHAR (2) NOT NULL, #'1','2','A','B','C','D','E','F','G'
subtype VARCHAR (20) NOT NULL DEFAULT '', #subtype e.g. CRF03_AB
updID CHAR(4) NOT NULL,
updated TIMESTAMP,
UNIQUE KEY LOG_PK_seq (isolate,contains,seq_name),
FOREIGN KEY (isolate) REFERENCES tIsolates(isolate) ON DELETE RESTRICT
) Engine=InnoDB COMMENT='Raw NT sequences';
I'm sure the foreign keys are not necessary to reproduce the behaviour.
Function fSeqBefore:
DELIMITER //
DROP FUNCTION IF EXISTS fSeqBefore
//
CREATE FUNCTION fSeqBefore(i_patID INTEGER, i_date DATE) RETURNS INTEGER
BEGIN
DECLARE iso INTEGER;
DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET iso = 0;
SELECT i.isolate INTO iso
FROM tIsolates i INNER JOIN tRawSequences r ON (i.isolate=r.isolate)
WHERE i.patID=i_patID AND i.sampling_date <= i_date
ORDER BY i.sampling_date DESC LIMIT 1;
RETURN iso;
END;
//
DELIMITER ;