Description:
This is a simplified example of a query that TSBS, Time Series Benchmark Suite, runs. I have work in progress to get TSBS running for MySQL.
For the test case, there is a table "cpu" that has an index on (id, ts) and a range scan is done with predicates on id and ts for queries like:
select * from cpu where ts > ? and ts < ? and id in (1)
select * from cpu where ts > ? and ts < ? and id in (1, 3)
When the last part of the query is changed to a subquery like:
... and id in (select tags_id from tags where h = ?)
Then a range scan is still done on cpu, but the index predicate (via keylen in explain output) is limited to the first index column (id) unless I add a unique index on tags.h. I assume the benefit of that unique index is to allow the subquery to be executed during optimization.
For the real query I can't add a unique index on tags.h. Therefore, the query plans that I get ends up scanning too much from the cpu table.
How to repeat:
Part 1 -> this does a single table query on cpu to show that MySQL can use predicates on both index columns whether or not the in-list for tags_id is limited to one or more than one value...
drop table if exists tags;
drop table if exists cpu;
create table tags (hostname varchar(256), id int primary key);
create table cpu (tags_id int not null, ts int not null);
insert into tags(hostname, id) values ('h1', 1), ('h2', 2), ('h3', 3), ('h4', 4), ('h5', 5);
insert into cpu(tags_id, ts) values (1,1), (1,2), (1,3), (1,4), (1,5);
insert into cpu(tags_id, ts) values (2,1), (2,2), (2,3), (2,4), (2,5);
insert into cpu(tags_id, ts) values (3,1), (3,2), (3,3), (3,4), (3,5);
insert into cpu(tags_id, ts) values (4,1), (4,2), (4,3), (4,4), (4,5);
create index x on tags(hostname);
analyze table tags;
create index id_ts on cpu(tags_id, ts);
analyze table cpu;
explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1)\G
explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1,3)\G
alter table tags drop index x;
create unique index x on tags(hostname);
explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1)\G
explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1,3)\G
And an example plan is shows that keylen=8, so predicates are used for both indexed columns (equality for id, range for ts)
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: cpu
partitions: NULL
type: range
possible_keys: id_ts
key: id_ts
key_len: 8
ref: NULL
rows: 2
filtered: 100.00
Extra: Using where; Using index
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: cpu
partitions: NULL
type: range
possible_keys: id_ts
key: id_ts
key_len: 8
ref: NULL
rows: 4
filtered: 100.00
Extra: Using where; Using index
Part 2 -> this does the real query and the only way to get keylen=8 is to add a unique index on tags.h, but that won't work for the real application.
drop table if exists tags;
drop table if exists cpu;
create table tags (hostname varchar(256), id int primary key);
create table cpu (tags_id int not null, ts int not null);
insert into tags(hostname, id) values ('h1', 1), ('h2', 2), ('h3', 3), ('h4', 4), ('h5', 5);
insert into cpu(tags_id, ts) values (1,1), (1,2), (1,3), (1,4), (1,5);
insert into cpu(tags_id, ts) values (2,1), (2,2), (2,3), (2,4), (2,5);
insert into cpu(tags_id, ts) values (3,1), (3,2), (3,3), (3,4), (3,5);
insert into cpu(tags_id, ts) values (4,1), (4,2), (4,3), (4,4), (4,5);
create index x on tags(hostname);
analyze table tags;
create index id_ts on cpu(tags_id, ts);
analyze table cpu;
explain select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1')\G
set optimizer_trace="enabled=on";
select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1');
select * from information_schema.optimizer_trace\G
set optimizer_trace="enabled=off";
alter table tags drop index x;
create unique index x on tags(hostname);
analyze table tags;
explain select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1')\G
set optimizer_trace="enabled=on";
select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1');
select * from information_schema.optimizer_trace\G
set optimizer_trace="enabled=off";
And this is the plan when there isn't a unique index and keylen=4
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: tags
partitions: NULL
type: ref
possible_keys: PRIMARY,x
key: x
key_len: 259
ref: const
rows: 1
filtered: 100.00
Extra: Using index
*************************** 2. row ***************************
id: 1
select_type: SIMPLE
table: cpu
partitions: NULL
type: ref
possible_keys: id_ts
key: id_ts
key_len: 4
ref: t.tags.id
rows: 5
filtered: 11.11
Extra: Using where; Using index
Description: This is a simplified example of a query that TSBS, Time Series Benchmark Suite, runs. I have work in progress to get TSBS running for MySQL. For the test case, there is a table "cpu" that has an index on (id, ts) and a range scan is done with predicates on id and ts for queries like: select * from cpu where ts > ? and ts < ? and id in (1) select * from cpu where ts > ? and ts < ? and id in (1, 3) When the last part of the query is changed to a subquery like: ... and id in (select tags_id from tags where h = ?) Then a range scan is still done on cpu, but the index predicate (via keylen in explain output) is limited to the first index column (id) unless I add a unique index on tags.h. I assume the benefit of that unique index is to allow the subquery to be executed during optimization. For the real query I can't add a unique index on tags.h. Therefore, the query plans that I get ends up scanning too much from the cpu table. How to repeat: Part 1 -> this does a single table query on cpu to show that MySQL can use predicates on both index columns whether or not the in-list for tags_id is limited to one or more than one value... drop table if exists tags; drop table if exists cpu; create table tags (hostname varchar(256), id int primary key); create table cpu (tags_id int not null, ts int not null); insert into tags(hostname, id) values ('h1', 1), ('h2', 2), ('h3', 3), ('h4', 4), ('h5', 5); insert into cpu(tags_id, ts) values (1,1), (1,2), (1,3), (1,4), (1,5); insert into cpu(tags_id, ts) values (2,1), (2,2), (2,3), (2,4), (2,5); insert into cpu(tags_id, ts) values (3,1), (3,2), (3,3), (3,4), (3,5); insert into cpu(tags_id, ts) values (4,1), (4,2), (4,3), (4,4), (4,5); create index x on tags(hostname); analyze table tags; create index id_ts on cpu(tags_id, ts); analyze table cpu; explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1)\G explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1,3)\G alter table tags drop index x; create unique index x on tags(hostname); explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1)\G explain select * from cpu where ts > 1 and ts < 4 and tags_id in (1,3)\G And an example plan is shows that keylen=8, so predicates are used for both indexed columns (equality for id, range for ts) *************************** 1. row *************************** id: 1 select_type: SIMPLE table: cpu partitions: NULL type: range possible_keys: id_ts key: id_ts key_len: 8 ref: NULL rows: 2 filtered: 100.00 Extra: Using where; Using index *************************** 1. row *************************** id: 1 select_type: SIMPLE table: cpu partitions: NULL type: range possible_keys: id_ts key: id_ts key_len: 8 ref: NULL rows: 4 filtered: 100.00 Extra: Using where; Using index Part 2 -> this does the real query and the only way to get keylen=8 is to add a unique index on tags.h, but that won't work for the real application. drop table if exists tags; drop table if exists cpu; create table tags (hostname varchar(256), id int primary key); create table cpu (tags_id int not null, ts int not null); insert into tags(hostname, id) values ('h1', 1), ('h2', 2), ('h3', 3), ('h4', 4), ('h5', 5); insert into cpu(tags_id, ts) values (1,1), (1,2), (1,3), (1,4), (1,5); insert into cpu(tags_id, ts) values (2,1), (2,2), (2,3), (2,4), (2,5); insert into cpu(tags_id, ts) values (3,1), (3,2), (3,3), (3,4), (3,5); insert into cpu(tags_id, ts) values (4,1), (4,2), (4,3), (4,4), (4,5); create index x on tags(hostname); analyze table tags; create index id_ts on cpu(tags_id, ts); analyze table cpu; explain select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1')\G set optimizer_trace="enabled=on"; select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1'); select * from information_schema.optimizer_trace\G set optimizer_trace="enabled=off"; alter table tags drop index x; create unique index x on tags(hostname); analyze table tags; explain select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1')\G set optimizer_trace="enabled=on"; select * from cpu where ts > 1 and ts < 4 and tags_id in (select id from tags where hostname = 'h1'); select * from information_schema.optimizer_trace\G set optimizer_trace="enabled=off"; And this is the plan when there isn't a unique index and keylen=4 *************************** 1. row *************************** id: 1 select_type: SIMPLE table: tags partitions: NULL type: ref possible_keys: PRIMARY,x key: x key_len: 259 ref: const rows: 1 filtered: 100.00 Extra: Using index *************************** 2. row *************************** id: 1 select_type: SIMPLE table: cpu partitions: NULL type: ref possible_keys: id_ts key: id_ts key_len: 4 ref: t.tags.id rows: 5 filtered: 11.11 Extra: Using where; Using index