Bug #11211 GROUP BY doesn't work correctly
Submitted: 9 Jun 2005 16:10 Modified: 23 Jun 2005 5:07
Reporter: Francois MASUREL Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S1 (Critical)
Version:5.0.6 OS:Microsoft Windows (Windows XP)
Assigned to: Timour Katchaounov CPU Architecture:Any

[9 Jun 2005 16:10] Francois MASUREL
Description:
A GROUP BY query sends duplicate results.

When I remove the 'object_guid' field in the usergroup_parent table, everything works fine.

Quite strange...

How to repeat:
-- MySQL Administrator dump 1.4
--
-- ------------------------------------------------------
-- Server version	5.0.6-beta-nt

/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;

/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;

--
-- Create schema test
--

CREATE DATABASE /*!32312 IF NOT EXISTS*/ test;
USE test;

--
-- Table structure for table `test`.`object_acl`
--

DROP TABLE IF EXISTS `object_acl`;
CREATE TABLE `object_acl` (
  `id` int(10) unsigned NOT NULL auto_increment,
  `id_obj_acl` int(10) unsigned NOT NULL default '0',
  `id_group` int(10) unsigned NOT NULL default '0',
  PRIMARY KEY  (`id`),
  UNIQUE KEY `object_group` (`id_obj_acl`,`id_group`),
  KEY `id_obj_acl` (`id_obj_acl`),
  KEY `id_group` (`id_group`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

--
-- Dumping data for table `test`.`object_acl`
--

/*!40000 ALTER TABLE `object_acl` DISABLE KEYS */;
INSERT INTO `object_acl` (`id`,`id_obj_acl`,`id_group`) VALUES 
 (6671,3253,3276),
 (6675,3255,3276),
 (6742,3278,3276);
/*!40000 ALTER TABLE `object_acl` ENABLE KEYS */;

--
-- Table structure for table `test`.`objects`
--

DROP TABLE IF EXISTS `objects`;
CREATE TABLE `objects` (
  `id` int(10) unsigned NOT NULL auto_increment,
  `id_parent` int(10) unsigned default NULL,
  `guid` varchar(21) NOT NULL default '',
  PRIMARY KEY  (`id`),
  KEY `id_parent` (`id_parent`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

--
-- Dumping data for table `test`.`objects`
--

/*!40000 ALTER TABLE `objects` DISABLE KEYS */;
INSERT INTO `objects` (`id`,`id_parent`,`guid`) VALUES 
 (3253,1,'3253_3912'),
 (3255,1,'3255_3912'),
 (3278,1,'3278_3912');
/*!40000 ALTER TABLE `objects` ENABLE KEYS */;

--
-- Table structure for table `test`.`user`
--

DROP TABLE IF EXISTS `user`;
CREATE TABLE `user` (
  `id` int(10) unsigned NOT NULL auto_increment,
  `id_object` int(10) unsigned default NULL,
  `login` varchar(20) NOT NULL default '',
  PRIMARY KEY  (`id`),
  UNIQUE KEY `login` (`login`),
  UNIQUE KEY `id_object` (`id_object`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

--
-- Dumping data for table `test`.`user`
--

/*!40000 ALTER TABLE `user` DISABLE KEYS */;
INSERT INTO `user` (`id`,`id_object`,`login`) VALUES 
 (1,3253,'admin'),
 (5,3278,'anonymous'),
 (6,3255,'f.masurel');
/*!40000 ALTER TABLE `user` ENABLE KEYS */;

--
-- Table structure for table `test`.`usergroup_parent`
--

DROP TABLE IF EXISTS `usergroup_parent`;
CREATE TABLE `usergroup_parent` (
  `object_id` int(10) unsigned NOT NULL default '0',
  `object_id_parent` int(10) unsigned default NULL,
  `object_guid` varchar(21) NOT NULL default '',
  `usergroup_parent_id_parent` int(10) unsigned default NULL,
  `usergroup_parent_id_child` int(10) unsigned default NULL,
  PRIMARY KEY  (`object_id`),
  KEY `object_id_parent` (`object_id_parent`),
  KEY `usergroup_parent_id_parent` (`usergroup_parent_id_parent`),
  KEY `usergroup_parent_id_child` (`usergroup_parent_id_child`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;

--
-- Dumping data for table `test`.`usergroup_parent`
--

/*!40000 ALTER TABLE `usergroup_parent` DISABLE KEYS */;
INSERT INTO `usergroup_parent` (`object_id`,`object_id_parent`,`object_guid`,`usergroup_parent_id_parent`,`usergroup_parent_id_child`) VALUES 
 (3386,3255,'3386_3916',3255,3255),
 (3391,3253,'3391_3916',3253,3253),
 (3393,3278,'3393_3916',3278,3278),
 (30315,3253,'30315_3916',3276,3253),
 (30316,3278,'30316_3916',3275,3278),
 (30322,3255,'30322_3916',3276,3255);
/*!40000 ALTER TABLE `usergroup_parent` ENABLE KEYS */;

--
-- Table structure for table `test`.`usergroup_tree`
--

DROP TABLE IF EXISTS `usergroup_tree`;
CREATE TABLE `usergroup_tree` (
  `id` int(10) unsigned NOT NULL auto_increment,
  `id_parent` int(10) unsigned NOT NULL default '0',
  `id_child` int(10) unsigned NOT NULL default '0',
  PRIMARY KEY  (`id`),
  UNIQUE KEY `parent_child` (`id_parent`,`id_child`),
  KEY `id_child` (`id_child`),
  KEY `id_parent` (`id_parent`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

--
-- Dumping data for table `test`.`usergroup_tree`
--

/*!40000 ALTER TABLE `usergroup_tree` DISABLE KEYS */;
INSERT INTO `usergroup_tree` (`id`,`id_parent`,`id_child`) VALUES 
 (69,3253,3253),
 (66,3255,3255),
 (71,3275,3253),
 (68,3275,3255),
 (73,3275,3278),
 (70,3276,3253),
 (96,3276,3255);
/*!40000 ALTER TABLE `usergroup_tree` ENABLE KEYS */;

/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;

SELECT object.guid AS object_guid, user.login AS user_login FROM objects AS object
INNER JOIN user AS user ON (user.id_object = object.id)
INNER JOIN object_acl ON object_acl.id_obj_acl = object.id
INNER JOIN usergroup_tree AS usergrouptree ON usergrouptree.id_parent = object_acl.id_group
LEFT JOIN usergroup_parent AS c3916 ON (c3916.object_id_parent = object.id)
LEFT JOIN usergroup_tree AS usergroup_tree_c3916 ON (c3916.usergroup_parent_id_parent = usergroup_tree_c3916.id_child)
GROUP BY object_guid, user_login ORDER BY object_guid;
[9 Jun 2005 16:54] Miguel Solorzano
The duplicates I got using the columns alias in GROUP BY, however
without alias works as expected:

mysql> SELECT object.guid AS object_guid, user.login AS user_login FROM
    -> objects AS object
    -> INNER JOIN user AS user ON (user.id_object = object.id)
    -> INNER JOIN object_acl ON object_acl.id_obj_acl = object.id
    -> INNER JOIN usergroup_tree AS usergrouptree ON usergrouptree.id_parent =
    -> object_acl.id_group
    -> LEFT JOIN usergroup_parent AS c3916 ON (c3916.object_id_parent =
    -> object.id)
    -> LEFT JOIN usergroup_tree AS usergroup_tree_c3916 ON
    -> (c3916.usergroup_parent_id_parent = usergroup_tree_c3916.id_child)
    -> GROUP BY object.guid, user.login ORDER BY object_guid;
+-------------+------------+
| object_guid | user_login |
+-------------+------------+
| 3253_3912   | admin      |
| 3255_3912   | f.masurel  |
| 3278_3912   | anonymous  |
+-------------+------------+
3 rows in set (0.00 sec)
[14 Jun 2005 12:24] Timour Katchaounov
This is not a bug, but expected behavior. Detailed explanation:

The problem is that the test query groups by a column that is ambiguous from
user's perspective. It seems that the user wanted to group by a column alias,
but didn't notice that there is another column in a different table that has the
same name.

Notice that  "object_guid" is a column in table "usergroup_parent". It is not clear
what is the intent of the query - group by "objects.guid" which is aliased as "object_guid"
or by "usergroup_parent.object_guid" which is aliased as "c3916.object_guid".

The current name resolution algorithm for GROUP BY searches first the FROM clause
for column references, and only if a column reference is not found there, it searches
the SELECT clause. As a result, in the test query we actually group by column
"c3916.object_guid" which is an alias for "usergroup_parent.object_guid". If one selects
that column, one will see that indeed things are grouped correctly. So in this case the
alias "object_guid" in the SELECT clause is not used for grouping at all and there
is no bug.

On the other hand, as Miguel wrote, there is another query, where we can group by
the column "object.guid", which is an alias for "objects.guid", and then we get the
expected result. Notice that this query is *NOT* equivalent to the previous one!
From the bug report I assume that the initial intent was to get the result of the
second query, but the user has made a mistake by using an alias named as a
column from a different table.

It is an open question whether we should detect such dangerous cases and
issue an error or a warning.

As a side note - I noteced that in many cases in the submitted database the
user have named different things with the same names. This is generally a bad
idea as it may lead to similar problems where things work, but not as expected
due to one name overshadowing another.
[14 Jun 2005 13:21] Timour Katchaounov
Ok, after all this is not so simple since we resolve first in the SELECT list, and only then
check in the FROM clause. I will investigate this a bit more. However, my previous comment
is valid in that the two queries are not equivalent, and that the "object_guid" in both queries
is resolved to a column from a different table. This is the reason why the two queries produce
different result - because they are not equivalent.
[14 Jun 2005 14:23] Francois MASUREL
You should notice that it works perfectly fine on mysql 4.1.12.

Francois
[14 Jun 2005 16:38] Bugs System
A patch for this bug has been committed. After review, it may
be pushed to the relevant source trees for release in the next
version. You can access the patch from:

  http://lists.mysql.com/internals/25986
[14 Jun 2005 17:03] Timour Katchaounov
Francois, you are perfectly right - it works as you say in 4.1. The reason is that in 4.1 we only
check whether GROUP columns are in the SELECT list, but we don't check the FROM clause.

Our goal is to make 5.0 SQL standard compliant, and the standard says:
"
1) Each <grouping column reference> shall unambiguously reference a column of the table resulting from the <from clause>. A column referenced in a <group by clause> is a grouping column.
"

On the other hand we preserve the MySQL extension that allows to group by column
references in the SELECT clause. However, in case there is a name resolution conflict,
we preserve the standard behavior.

The patch I suggest issues a warning in such cases.
[15 Jun 2005 7:13] Bugs System
A patch for this bug has been committed. After review, it may
be pushed to the relevant source trees for release in the next
version. You can access the patch from:

  http://lists.mysql.com/internals/26009
[16 Jun 2005 15:06] Bugs System
A patch for this bug has been committed. After review, it may
be pushed to the relevant source trees for release in the next
version. You can access the patch from:

  http://lists.mysql.com/internals/26060
[22 Jun 2005 9:00] Timour Katchaounov
Pushed in version 5.0.8.
[23 Jun 2005 5:07] Jon Stephens
Thank you for your bug report. This issue has been committed to our
source repository of that product and will be incorporated into the
next release.

If necessary, you can access the source repository and build the latest
available version, including the bugfix, yourself. More information 
about accessing the source trees is available at
    http://www.mysql.com/doc/en/Installing_source_tree.html

Additional info:

Documented in 5.0.8 changelog; maked as Closed.