Bug #28936 Group by returning incorrect values when sub-query is involved
Submitted: 7 Jun 2007 0:33 Modified: 7 Jun 2007 7:51
Reporter: Mike Benoit Email Updates:
Status: Can't repeat Impact on me:
None 
Category:MySQL Server: General Severity:S1 (Critical)
Version:5.0.41-Max OS:Linux
Assigned to: CPU Architecture:Any

[7 Jun 2007 0:33] Mike Benoit
Description:
When I add a group by clause to this query it starts behaving strange. Adding NULL values where they didn't exist, etc...

How to repeat:
CREATE TABLE user_date (
    id integer AUTO_INCREMENT NOT NULL,
    user_id integer NOT NULL,
    pay_period_id integer NOT NULL,
    date_stamp date NOT NULL,
	PRIMARY KEY(id)
) ENGINE=InnoDB;

CREATE TABLE user_date_total (
    id integer AUTO_INCREMENT NOT NULL,
    user_date_id integer NOT NULL,
    status_id integer NOT NULL,
    type_id integer NOT NULL,
    punch_control_id integer,
    over_time_policy_id integer,
    absence_policy_id integer,
    premium_policy_id integer,
    total_time integer DEFAULT 0 NOT NULL,
	PRIMARY KEY(id)
) ENGINE=InnoDB;

CREATE TABLE user_wage (
    id integer AUTO_INCREMENT NOT NULL,
    user_id integer NOT NULL,
    type_id integer NOT NULL,
    wage numeric(9,2) NOT NULL,
    effective_date date,
	PRIMARY KEY(id)
) ENGINE=InnoDB;

insert into user_date VALUES(147,40,1103,"2006-01-01");
insert into user_date VALUES(148,40,1103,"2006-01-02");

insert into user_wage VALUES(DEFAULT,40,10,19.50,"2004-01-01");
insert into user_wage VALUES(DEFAULT,40,10,19.75,"2004-06-01");
insert into user_wage VALUES(DEFAULT,40,10,20.15,"2005-06-01");
insert into user_wage VALUES(DEFAULT,40,10,21.50,"2006-01-01");

insert into user_date_total VALUES(DEFAULT, 147,10,20,0,0,0,0,28800);
insert into user_date_total VALUES(DEFAULT, 147,10,30,0,120,0,0,3600);
insert into user_date_total VALUES(DEFAULT, 147,10,40,0,0,0,8,10800);
insert into user_date_total VALUES(DEFAULT, 147,10,40,0,0,0,8,7200);
insert into user_date_total VALUES(DEFAULT, 147,10,40,0,0,0,8,14400);
insert into user_date_total VALUES(DEFAULT, 147,10,10,0,0,0,0,32400);
insert into user_date_total VALUES(DEFAULT, 148,10,20,0,0,0,0,28800);
insert into user_date_total VALUES(DEFAULT, 148,10,30,0,120,0,0,3600);
insert into user_date_total VALUES(DEFAULT, 148,10,10,0,0,0,0,32400);

--Regular Query, everything seems fine, no NULL values at all.
select  a.status_id as status_id,
a.type_id as type_id,
a.over_time_policy_id as over_time_policy_id,
z.id as user_wage_id,
z.effective_date as user_wage_effective_date
from    user_date_total as a
LEFT JOIN user_date as b ON a.user_date_id = b.id
LEFT JOIN user_wage as z ON z.id = (select z.id
	from user_wage as z
	where z.user_id = b.user_id
	and z.effective_date <= b.date_stamp
	order by z.effective_date desc limit 1)
where
b.user_id = '40'
AND b.pay_period_id = '1103'
AND b.date_stamp <= '2006-01-14'
AND a.status_id in (10,30)

+-----------+---------+---------------------+--------------+--------------------------+
| status_id | type_id | over_time_policy_id | user_wage_id | user_wage_effective_date |
+-----------+---------+---------------------+--------------+--------------------------+
|        10 |      20 |                   0 |            4 | 2006-01-01               |
|        10 |      30 |                 120 |            4 | 2006-01-01               |
|        10 |      40 |                   0 |            4 | 2006-01-01               |
|        10 |      40 |                   0 |            4 | 2006-01-01               |
|        10 |      40 |                   0 |            4 | 2006-01-01               |
|        10 |      10 |                   0 |            4 | 2006-01-01               |
|        10 |      20 |                   0 |            4 | 2006-01-01               |
|        10 |      30 |                 120 |            4 | 2006-01-01               |
|        10 |      10 |                   0 |            4 | 2006-01-01               |
+-----------+---------+---------------------+--------------+--------------------------+
9 rows in set (0.00 sec)

-- Same query only with group BY added: Where did the NULL values come from?
select  a.status_id as status_id,
a.type_id as type_id,
a.over_time_policy_id as over_time_policy_id,
z.id as user_wage_id,
z.effective_date as user_wage_effective_date,
sum(total_time) as total_time
from    user_date_total as a
LEFT JOIN user_date as b ON a.user_date_id = b.id
LEFT JOIN user_wage as z ON z.id = (select z.id
	from user_wage as z
	where z.user_id = b.user_id
	and z.effective_date <= b.date_stamp
	order by z.effective_date desc limit 1)
where
b.user_id = '40'
AND b.pay_period_id = '1103'
AND b.date_stamp <= '2006-01-14'
AND a.status_id in (10,30)
group by user_wage_id;

+-----------+---------+---------------------+--------------+--------------------------+------------+
| status_id | type_id | over_time_policy_id | user_wage_id | user_wage_effective_date | total_time |
+-----------+---------+---------------------+--------------+--------------------------+------------+
|        10 |      20 |                   0 |         NULL | NULL                     |      97200 |
|        10 |      20 |                   0 |            4 | 2006-01-01               |      64800 |
+-----------+---------+---------------------+--------------+--------------------------+------------+
2 rows in set (0.00 sec)

--2nd Group By Query, even more NULL values??
select  a.status_id as status_id,
a.type_id as type_id,
a.over_time_policy_id as over_time_policy_id,
z.id as user_wage_id,
z.effective_date as user_wage_effective_date,
sum(total_time) as total_time
from    user_date_total as a
LEFT JOIN user_date as b ON a.user_date_id = b.id
LEFT JOIN user_wage as z ON z.id = (select z.id
	from user_wage as z
	where z.user_id = b.user_id
	and z.effective_date <= b.date_stamp
	order by z.effective_date desc limit 1)
where
b.user_id = '40'
AND b.pay_period_id = '1103'
AND b.date_stamp <= '2006-01-14'
AND a.status_id in (10,30)
group by user_wage_id, user_wage_effective_date, a.status_id

+-----------+---------+---------------------+--------------+--------------------------+------------+
| status_id | type_id | over_time_policy_id | user_wage_id | user_wage_effective_date | total_time |
+-----------+---------+---------------------+--------------+--------------------------+------------+
|        10 |      20 |                   0 |         NULL | NULL                     |     162000 |
+-----------+---------+---------------------+--------------+--------------------------+------------+
1 row in set (0.00 sec)
[7 Jun 2007 7:51] Sveta Smirnova
Thank you for the report.

I can not repeat described behaviour with current development sources, but can with MySQL 5.0.41.

Please wait next release in which fix will be incorporated.