Description:
When the results of a select statement are inserted into a table instead of being returned to the
client the query is much slower. The difference in execution time depends on the size of the source table.
It appears as if a table scan is being used with the insert into.
Outout from how to repeat is attached.
How to repeat:
-- Test case to show performance problem when using insert into table select...
-- The select statement takes a fraction of a second when executed by itself.
-- However when we attempt to insert the results into a table it is much slower.
-- Using a 40,000,000 row table the times differ by a factor of 100.
drop database if exists insert_select_perf_bug;
create database insert_select_perf_bug;
use insert_select_perf_bug;
-- create some noddy tables used to populate test table
create table zTo9 (col1 int(10));
insert into zTo9 values (0), (1), (2), (3), (4), (5), (6), (7), (8), (9);
create table zTo9999 (col1 int(10));
insert into zTo9999 select a.col1 * 1000 + b.col1 * 100 + c.col1 * 10 + d.col1 from zTo9 a, zTo9 b, zTo9 c, zTo9 d;
CREATE TABLE `dss_TestStreamNo1` (
`dsStreamInstId` int(10) unsigned NOT NULL,
`dsFirstTime` int(10) unsigned NOT NULL,
`dsc_testCounter1` smallint(5) unsigned default NULL,
`dsc_testCounter2` smallint(5) unsigned default NULL,
PRIMARY KEY (`dsStreamInstId`,`dsFirstTime`)
)
ENGINE=MyISAM
;
-- Insert maxId * maxtimestamp rows into table
select @maxIds := 50;
select @maxtimestamp := 10000;
select @halftimestamp := @maxtimestamp / 2;
insert into dss_TestStreamNo1 select a.col1, b.col1, 0, 0 from zTo9999 a, zTo9999 b where a.col1 < @maxIds and b.col1 < @maxtimestamp;
show variables;
flush status;
-- simple select - should return @maxIds rows
explain select dsStreamInstId, max(dsFirstTime) as dsFirstTime from dss_TestStreamNo1 where dsFirstTime <= @halftimestamp group by dsStreamInstId;
show status;
flush status;
-- simple select - should return @maxIds rows
select dsStreamInstId, max(dsFirstTime) as dsFirstTime from dss_TestStreamNo1 where dsFirstTime <= @halftimestamp group by dsStreamInstId;
show status;
flush status;
-- select into a table. this takes much longer, appears to be doing a table scan.
create table datatable2 select dsStreamInstId, max(dsFirstTime) as dsFirstTime from dss_TestStreamNo1 where dsFirstTime <= @halftimestamp group by dsStreamInstId;
show status;
flush status;
-- select into outfile is OK.
select dsStreamInstId, max(dsFirstTime) as dsFirstTime from dss_TestStreamNo1 where dsFirstTime <= @halftimestamp group by dsStreamInstId into outfile 'mydata' ;
show status;