Bug #60702 GROUP BY orders when used inside a UNION query, though it should not
Submitted: 30 Mar 2011 23:25 Modified: 31 Mar 2011 3:26
Reporter: Alex Bolenok Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S5 (Performance)
Version:5.1.57 OS:Any
Assigned to: CPU Architecture:Any

[30 Mar 2011 23:25] Alex Bolenok
Description:
In a UNION of two queries, one of both of them containing a GROUP BY, the results of GROUP BY are ordered, which is shown as "filesort" in the query plan, despite the fact that UNION does not preserve the order of individual queries.

This can not be turned off using ORDER BY NULL, because the optimizer throws it away as described here: http://dev.mysql.com/doc/refman/5.1/en/union.html

The can only be worked around by adding a LIMIT large enough to each query, along with ORDER BY NULL

How to repeat:
CREATE TABLE filler (
        id INT NOT NULL PRIMARY KEY AUTO_INCREMENT
) ENGINE=Memory;

CREATE TABLE grouping (
        id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
        value1 INT NOT NULL,
        value2 INT NOT NULL
) ENGINE=InnoDB;

DELIMITER $$

CREATE PROCEDURE prc_filler(cnt INT)
BEGIN
        DECLARE _cnt INT;
        SET _cnt = 1;
        WHILE _cnt <= cnt DO
                INSERT
                INTO    filler
                SELECT  _cnt;
                SET _cnt = _cnt + 1;
        END WHILE;
END
$$

DELIMITER ;

START TRANSACTION;
CALL prc_filler(100000);
COMMIT;

INSERT
INTO    grouping (value1, value2)
SELECT  CEILING(RAND(20110330) * 300000),
        CEILING(RAND(20110330 << 1) * 300000)
FROM    filler
CROSS JOIN
        (
        SELECT  id
        FROM    filler
        LIMIT 30
        ) q;

EXPLAIN
SELECT  value1 AS value
FROM    grouping
GROUP BY
        value1
UNION
SELECT  value2 AS value
FROM    grouping
GROUP BY
        value2
LIMIT 10;

id      select_type     table   type    possible_keys   key     key_len ref rows    Extra
1       PRIMARY grouping        ALL     NULL    NULL    NULL    NULL    3000279 Using temporary; Using filesort
2       UNION   grouping        ALL     NULL    NULL    NULL    NULL    3000279 Using temporary; Using filesort
NULL    UNION RESULT    <union1,2>      ALL     NULL    NULL    NULL    NULL NULL

EXPLAIN
(
SELECT  value1 AS value
FROM    grouping
GROUP BY
        value1
ORDER BY
        NULL
)
UNION
(
SELECT  value2 AS value
FROM    grouping
GROUP BY
        value2
ORDER BY
        NULL
)
LIMIT 10;

id      select_type     table   type    possible_keys   key     key_len ref
rows    Extra
1       PRIMARY grouping        ALL     NULL    NULL    NULL    NULL    3000279
Using temporary; Using filesort
2       UNION   grouping        ALL     NULL    NULL    NULL    NULL    3000279
Using temporary; Using filesort
NULL    UNION RESULT    <union1,2>      ALL     NULL    NULL    NULL    NULL
NULL

EXPLAIN
(
SELECT  value1 AS value
FROM    grouping
GROUP BY
        value1
ORDER BY
        NULL
LIMIT 10000000000
)
UNION
(
SELECT  value2 AS value
FROM    grouping
GROUP BY
        value2
ORDER BY
        NULL
LIMIT 10000000000
)
LIMIT 10;

id      select_type     table   type    possible_keys   key     key_len ref
rows    Extra
1       PRIMARY grouping        ALL     NULL    NULL    NULL    NULL    3000279
Using temporary
2       UNION   grouping        ALL     NULL    NULL    NULL    NULL    3000279
Using temporary
NULL    UNION RESULT    <union1,2>      ALL     NULL    NULL    NULL    NULL
NULL

Suggested fix:
Fix the optimizer so that GROUP BY does not order when used in a query which is a part of a UNION
[31 Mar 2011 3:26] Valeriy Kravchuk
Verified with current mysql-5.1 from bzr:

macbook-pro:5.1 openxs$ bin/mysql -uroot test
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 1
Server version: 5.1.57-debug Source distribution

Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
This software comes with ABSOLUTELY NO WARRANTY. This is free software,
and you are welcome to modify and redistribute it under the GPL v2 license

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql> CREATE TABLE filler (
    ->         id INT NOT NULL PRIMARY KEY AUTO_INCREMENT
    -> ) ENGINE=Memory;
Query OK, 0 rows affected (0.11 sec)

mysql> CREATE TABLE grouping (
    ->         id INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
    ->         value1 INT NOT NULL,
    ->         value2 INT NOT NULL
    -> ) ENGINE=InnoDB;
Query OK, 0 rows affected (0.17 sec)

mysql> delimiter $$
mysql> CREATE PROCEDURE prc_filler(cnt INT)
    -> BEGIN
    ->         DECLARE _cnt INT;
    ->         SET _cnt = 1;
    ->         WHILE _cnt <= cnt DO
    ->                 INSERT
    ->                 INTO    filler
    ->                 SELECT  _cnt;
    ->                 SET _cnt = _cnt + 1;
    ->         END WHILE;
    -> END
    -> $$
Query OK, 0 rows affected (0.02 sec)

mysql> delimiter ;
mysql> START TRANSACTION;
Query OK, 0 rows affected (0.00 sec)

mysql> CALL prc_filler(100000);
Query OK, 1 row affected (23.53 sec)

mysql> COMMIT;
Query OK, 0 rows affected (0.00 sec)

mysql> INSERT
    -> INTO    grouping (value1, value2)
    -> SELECT  CEILING(RAND(20110330) * 300000),
    ->         CEILING(RAND(20110330 << 1) * 300000)
    -> FROM    filler
    -> CROSS JOIN
    ->         (
    ->         SELECT  id
    ->         FROM    filler
    ->         LIMIT 30
    ->         ) q;
Query OK, 3000000 rows affected (1 min 32.91 sec)
Records: 3000000  Duplicates: 0  Warnings: 0

mysql> EXPLAIN
    -> SELECT  value1 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value1
    -> UNION
    -> SELECT  value2 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value2
    -> LIMIT 10;
+----+--------------+------------+------+---------------+------+---------+------+---------+---------------------------------+
| id | select_type  | table      | type | possible_keys | key  | key_len | ref  | rows    | Extra                           |
+----+--------------+------------+------+---------------+------+---------+------+---------+---------------------------------+
|  1 | PRIMARY      | grouping   | ALL  | NULL          | NULL | NULL    | NULL | 3000422 | Using temporary; Using filesort |
|  2 | UNION        | grouping   | ALL  | NULL          | NULL | NULL    | NULL | 3000422 | Using temporary; Using filesort |
| NULL | UNION RESULT | <union1,2> | ALL  | NULL          | NULL | NULL    | NULL |    NULL |                                 |
+----+--------------+------------+------+---------------+------+---------+------+---------+---------------------------------+
3 rows in set (0.00 sec)

mysql> EXPLAIN
    -> (
    -> SELECT  value1 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value1
    -> ORDER BY
    ->         NULL
    -> )
    -> UNION
    -> (
    -> SELECT  value2 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value2
    -> ORDER BY
    ->         NULL
    -> )
    -> LIMIT 10;
+----+--------------+------------+------+---------------+------+---------+------+---------+---------------------------------+
| id | select_type  | table      | type | possible_keys | key  | key_len | ref  | rows    | Extra                           |
+----+--------------+------------+------+---------------+------+---------+------+---------+---------------------------------+
|  1 | PRIMARY      | grouping   | ALL  | NULL          | NULL | NULL    | NULL | 3000422 | Using temporary; Using filesort |
|  2 | UNION        | grouping   | ALL  | NULL          | NULL | NULL    | NULL | 3000422 | Using temporary; Using filesort |
| NULL | UNION RESULT | <union1,2> | ALL  | NULL          | NULL | NULL    | NULL |    NULL |                                 |
+----+--------------+------------+------+---------------+------+---------+------+---------+---------------------------------+
3 rows in set (0.01 sec)

mysql> EXPLAIN
    -> (
    -> SELECT  value1 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value1
    -> ORDER BY
    ->         NULL
    -> LIMIT 10000000000
    -> )
    -> UNION
    -> (
    -> SELECT  value2 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value2
    -> ORDER BY
    ->         NULL
    -> LIMIT 10000000000
    -> )
    -> LIMIT 10;
+----+--------------+------------+------+---------------+------+---------+------+---------+-----------------+
| id | select_type  | table      | type | possible_keys | key  | key_len | ref  | rows    | Extra           |
+----+--------------+------------+------+---------------+------+---------+------+---------+-----------------+
|  1 | PRIMARY      | grouping   | ALL  | NULL          | NULL | NULL    | NULL | 3000422 | Using temporary |
|  2 | UNION        | grouping   | ALL  | NULL          | NULL | NULL    | NULL | 3000422 | Using temporary |
| NULL | UNION RESULT | <union1,2> | ALL  | NULL          | NULL | NULL    | NULL |    NULL |                 |
+----+--------------+------------+------+---------------+------+---------+------+---------+-----------------+
3 rows in set (0.00 sec)

I think this is more about unexpected (wrong) result than performance:

mysql> (
    -> SELECT  value1 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value1
    -> ORDER BY
    ->         NULL
    -> LIMIT 10000000000
    -> )
    -> UNION
    -> (
    -> SELECT  value2 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value2
    -> ORDER BY
    ->         NULL
    -> LIMIT 10000000000
    -> )
    -> LIMIT 10;
+--------+
| value  |
+--------+
|  12462 |
| 205466 |
|  89941 |
| 133309 |
|  96722 |
|  83683 |
| 128249 |
|  90196 |
|  66232 |
|  60571 |
+--------+
10 rows in set (22.26 sec)

mysql> (
    -> SELECT  value1 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value1
    -> ORDER BY
    ->         NULL
    -> )
    -> UNION
    -> (
    -> SELECT  value2 AS value
    -> FROM    grouping
    -> GROUP BY
    ->         value2
    -> ORDER BY
    ->         NULL
    -> )
    -> LIMIT 10;
+-------+
| value |
+-------+
|     1 |
|     2 |
|     3 |
|     4 |
|     5 |
|     6 |
|     7 |
|     8 |
|     9 |
|    10 |
+-------+
10 rows in set (23.26 sec)