Bug #60023 No Loose Index Scan for GROUP BY / DISTINCT on InnoDB partitioned table
Submitted: 8 Feb 2011 21:45
Reporter: Rene' Cannao' Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S5 (Performance)
Version:5.5.8 , 5.1.51 OS:Any
Assigned to: CPU Architecture:Any
Tags: distinct, GROUP BY, partitioning

[8 Feb 2011 21:45] Rene' Cannao'
Description:
On very simple GROUP BY / DISTINCT queries, the optimizer doesn't perform a Loose Index Scan if the table is partitioned and uses InnoDB as storage engine.
On MyISAM partitioned table works fine.

Example query:
SELECT DISTINCT indexed_column FROM tablename;

If the same table is partitioned and uses InnoDB , in EXPLAIN the join type passes from "range" to "index", and the "extra" field from "Using index for group-by" to "Using index".

How to repeat:
Test case:

CREATE TABLE tbl1 (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, a INT NOT NULL, KEY(a)) ENGINE=InnoDB;
INSERT INTO tbl1(a) VALUES (1), (2), (3), (4);
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;
INSERT INTO tbl1(a) SELECT a FROM tbl1;

OPTIMIZE TABLE tbl1;
EXPLAIN SELECT DISTINCT a FROM tbl1;
SELECT DISTINCT a FROM tbl1;
EXPLAIN SELECT a FROM tbl1 GROUP BY a;
SELECT a FROM tbl1 GROUP BY a;

mysql> EXPLAIN SELECT DISTINCT a FROM tbl1;
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
|  1 | SIMPLE      | tbl1  | range | NULL          | a    | 4       | NULL |    3 | Using index for group-by |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
1 row in set (0.00 sec)

mysql> SELECT DISTINCT a FROM tbl1;
+---+
| a |
+---+
| 1 |
| 2 |
| 3 |
| 4 |
+---+
4 rows in set (0.00 sec)

mysql> EXPLAIN SELECT a FROM tbl1 GROUP BY a;
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
|  1 | SIMPLE      | tbl1  | range | NULL          | a    | 4       | NULL |    3 | Using index for group-by |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
1 row in set (0.00 sec)

mysql> SELECT a FROM tbl1 GROUP BY a;
+---+
| a |
+---+
| 1 |
| 2 |
| 3 |
| 4 |
+---+
4 rows in set (0.00 sec)

ALTER TABLE tbl1  PARTITION BY RANGE (id) (PARTITION p0 VALUES LESS THAN (32768), PARTITION p1 VALUES LESS THAN MAXVALUE);
OPTIMIZE TABLE tbl1;

mysql> EXPLAIN SELECT DISTINCT a FROM tbl1;
+----+-------------+-------+-------+---------------+------+---------+------+-------+-------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows  | Extra       |
+----+-------------+-------+-------+---------------+------+---------+------+-------+-------------+
|  1 | SIMPLE      | tbl1  | index | NULL          | a    | 4       | NULL | 65158 | Using index |
+----+-------------+-------+-------+---------------+------+---------+------+-------+-------------+
1 row in set (0.00 sec)

mysql> SELECT DISTINCT a FROM tbl1;
+---+
| a |
+---+
| 1 |
| 2 |
| 3 |
| 4 |
+---+
4 rows in set (0.03 sec)

mysql> EXPLAIN SELECT a FROM tbl1 GROUP BY a;
+----+-------------+-------+-------+---------------+------+---------+------+-------+-------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows  | Extra       |
+----+-------------+-------+-------+---------------+------+---------+------+-------+-------------+
|  1 | SIMPLE      | tbl1  | index | NULL          | a    | 4       | NULL | 65158 | Using index |
+----+-------------+-------+-------+---------------+------+---------+------+-------+-------------+
1 row in set (0.00 sec)

mysql> SELECT a FROM tbl1 GROUP BY a;
+---+
| a |
+---+
| 1 |
| 2 |
| 3 |
| 4 |
+---+
4 rows in set (0.03 sec)

ALTER TABLE tbl1 ENGINE=MyISAM;

mysql> EXPLAIN SELECT a FROM tbl1 GROUP BY a;
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
|  1 | SIMPLE      | tbl1  | range | NULL          | a    | 4       | NULL |    6 | Using index for group-by |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT DISTINCT a FROM tbl1;
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
|  1 | SIMPLE      | tbl1  | range | NULL          | a    | 4       | NULL |    6 | Using index for group-by |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
1 row in set (0.00 sec)

Suggested fix:
Implement Loose Index Scan also on InnoDB partitioned tables.
[8 Feb 2011 21:48] Rene' Cannao'
So far verified on 5.1.51 and 5.5.8 .

I think is quite similar/related to bug #50939 .
[8 Feb 2013 8:45] Valeriy Kravchuk
This looks fixed in 5.6.10:

...
mysql> select version();
+-----------+
| version() |
+-----------+
| 5.6.10    |
+-----------+
1 row in set (0.00 sec)

mysql> ALTER TABLE tbl1  PARTITION BY RANGE (id) (PARTITION p0 VALUES LESS THAN (32768), PARTITION p1 VALUES LESS THAN MAXVALUE);
Query OK, 65536 rows affected (5.98 sec)
Records: 65536  Duplicates: 0  Warnings: 0

mysql> OPTIMIZE TABLE tbl1;
+-----------+----------+----------+-------------------------------------------------------------------+
| Table     | Op       | Msg_type | Msg_text                                                          |
+-----------+----------+----------+-------------------------------------------------------------------+
| test.tbl1 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| test.tbl1 | optimize | status   | OK                                                                |
+-----------+----------+----------+-------------------------------------------------------------------+
2 rows in set (6.93 sec)

mysql> EXPLAIN SELECT a FROM tbl1 GROUP BY a;                                   +----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
|  1 | SIMPLE      | tbl1  | range | NULL          | a    | 4       | NULL |    9 | Using index for group-by |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT DISTINCT a FROM tbl1;
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
| id | select_type | table | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
|  1 | SIMPLE      | tbl1  | range | NULL          | a    | 4       | NULL |    9 | Using index for group-by |
+----+-------------+-------+-------+---------------+------+---------+------+------+--------------------------+
1 row in set (0.00 sec)

mysql> EXPLAIN PARTITIONS SELECT DISTINCT a FROM tbl1;
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+--------------------------+
| id | select_type | table | partitions | type  | possible_keys | key  | key_len | ref  | rows | Extra                    |
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+--------------------------+
|  1 | SIMPLE      | tbl1  | p0,p1      | range | NULL          | a    | 4       | NULL |    9 | Using index for group-by |
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+--------------------------+
1 row in set (0.00 sec)

Why status is wrong then?
[22 May 2014 12:13] Arnaud Adant
Valeriy : I think it is a statistics issue.

workaround : 

1. analyze table

2. select sleep(30); flush tables;

Here is an example :

drop table if exists tp;
CREATE TABLE tp (
   id bigint NOT NULL AUTO_INCREMENT,
   c1 int not null,
   created_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (id,created_at),
  KEY  c1 (c1)
  ) ENGINE=InnoDB
  PARTITION BY RANGE ( UNIX_TIMESTAMP(created_at) ) (
  PARTITION p0 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-07 00:00:00') ),
  PARTITION p1 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-08 00:00:00') ),
  PARTITION p2 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-09 00:00:00') ),
  PARTITION p3 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-10 00:00:00') ),
  PARTITION p4 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-11 00:00:00') ),
  PARTITION p5 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-12 00:00:00') ),
  PARTITION p6 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-13 00:00:00') ),
  PARTITION p7 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-14 00:00:00') ),
  PARTITION p8 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-15 00:00:00') ),
  PARTITION p9 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-16 00:00:00') ),
  PARTITION p10 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-17 00:00:00') ),
  PARTITION p11 VALUES LESS THAN ( UNIX_TIMESTAMP('2014-05-18 00:00:00') ),
  PARTITION p12 VALUES LESS THAN (MAXVALUE)
  ); 
truncate table tp; 
insert into tp(c1) values(2);
insert into tp(c1) values(1);

replace into tp(created_at, c1) 
select date_sub('2014-05-18', interval 10*rand() day), floor(rand()*11) from tp t1, tp t2, tp t3,
tp t4, tp t5, tp t6, tp t7,
tp t8, tp t9, tp t10, tp t11,
tp t12, tp t13, tp t14, tp t15;

explain partitions select distinct c1 from tp\G

analyze table tp;

explain partitions select distinct c1 from tp\G

alter table tp remove partitioning;
analyze table tp;
explain partitions select distinct c1 from tp\G

flush tables;

explain partitions select distinct c1 from tp\G