Bug #80088 Where in subquery referencing md5 computed column in view causes duplicate rows
Submitted: 21 Jan 2016 0:46 Modified: 26 Jan 2016 20:54
Reporter: Michael Thomas Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: DML Severity:S3 (Non-critical)
Version:5.6/5.7 OS:Other (Amazon RDS and OSX)
Assigned to: CPU Architecture:Any

[21 Jan 2016 0:46] Michael Thomas
Description:
When I create a view that transforms a column of a table using the MD5 function then reference that column in a WHERE IN subquery, the results are duplicated as many times as there are rows in the view.

The "how to repeat" explains much more clearly.

How to repeat:
drop table if exists account;
create table account as 
select 1 as id
union select 2 as id
union select 3 as id;

drop table if exists account_item;
create table account_item as 
select 1 as id, 1 as account_id
union select 1 as id, 1 as account_id
union select 2 as id, 2 as account_id
union select 3 as id, 2 as account_id
union select 4 as id, 3 as account_id;

drop view if exists account_view;
create view account_view as 
select md5(id) as id
from account;

select *
FROM account_item item
where md5(item.account_id) in (select id from account_view);

-- Expect to get "4" as the result but instead get "12"
[21 Jan 2016 0:57] Michael Thomas
This is not limited to the MD5 function. When any function in the view (even addition) causes the same issue.

For example:

drop table if exists account;
create table account as 
select 1 as id
union select 2 as id
union select 3 as id;

drop table if exists account_item;
create table account_item as 
select 1 as id, 1 as account_id
union select 1 as id, 1 as account_id
union select 2 as id, 2 as account_id
union select 3 as id, 2 as account_id
union select 4 as id, 3 as account_id;

drop view if exists account_view;
create view account_view as 
select id + 1 as id
from account;

select *
FROM account_item as item 
where (item.account_id + 1) in (select id from account_view);
[22 Jan 2016 14:28] Michael Thomas
The last part of the steps to reproduce should have been

select count(*)
FROM account_item item
where md5(item.account_id) in (select id from account_view);

-- Expect to get "4" as the result but instead get "12"
[26 Jan 2016 20:51] MySQL Verification Team
C:\dbs>c:\dbs\5.0\bin\mysql -uroot --port=3500 --prompt="mysql 5.0 > "
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.0.97-Win X64 Source distribution

Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 5.0 > use test
Database changed
mysql 5.0 > create table account as
    -> select 1 as id
    -> union select 2 as id
    -> union select 3 as id;
Query OK, 3 rows affected (0.03 sec)
Records: 3  Duplicates: 0  Warnings: 0

mysql 5.0 >
mysql 5.0 > drop table if exists account_item;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.0 > create table account_item as
    -> select 1 as id, 1 as account_id
    -> union select 1 as id, 1 as account_id
    -> union select 2 as id, 2 as account_id
    -> union select 3 as id, 2 as account_id
    -> union select 4 as id, 3 as account_id;
Query OK, 4 rows affected (0.01 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql 5.0 >
mysql 5.0 > drop view if exists account_view;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.0 > create view account_view as
    -> select md5(id) as id
    -> from account;
Query OK, 0 rows affected (0.01 sec)

mysql 5.0 >
mysql 5.0 > select *
    -> FROM account_item item
    -> where md5(item.account_id) in (select id from account_view);
+----+------------+
| id | account_id |
+----+------------+
|  1 |          1 |
|  2 |          2 |
|  3 |          2 |
|  4 |          3 |
+----+------------+
4 rows in set (0.00 sec)

C:\dbs>c:\dbs\5.1\bin\mysql -uroot --port=3510 --debug-info --prompt="mysql 5.1 > "
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.1.74-Win X64 Source distribution

Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 5.1 > use test
Database changed
mysql 5.1 > create table account as
    -> select 1 as id
    -> union select 2 as id
    -> union select 3 as id;
Query OK, 3 rows affected (0.01 sec)
Records: 3  Duplicates: 0  Warnings: 0

mysql 5.1 >
mysql 5.1 > drop table if exists account_item;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.1 > create table account_item as
    -> select 1 as id, 1 as account_id
    -> union select 1 as id, 1 as account_id
    -> union select 2 as id, 2 as account_id
    -> union select 3 as id, 2 as account_id
    -> union select 4 as id, 3 as account_id;
Query OK, 4 rows affected (0.01 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql 5.1 >
mysql 5.1 > drop view if exists account_view;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.1 > create view account_view as
    -> select md5(id) as id
    -> from account;
Query OK, 0 rows affected (0.01 sec)

mysql 5.1 >
mysql 5.1 > select *
    -> FROM account_item item
    -> where md5(item.account_id) in (select id from account_view);
+----+------------+
| id | account_id |
+----+------------+
|  1 |          1 |
|  2 |          2 |
|  3 |          2 |
|  4 |          3 |
+----+------------+
4 rows in set (0.00 sec)

C:\dbs>c:\dbs\5.5\bin\mysql -uroot --port=3550 --prompt="mysql 5.5 > "
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.5.49 Source distribution PULL: 2016-JAN-14

Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 5.5 > use test
Database changed
mysql 5.5 > create table account as
    -> select 1 as id
    -> union select 2 as id
    -> union select 3 as id;
Query OK, 3 rows affected (0.03 sec)
Records: 3  Duplicates: 0  Warnings: 0

mysql 5.5 >
mysql 5.5 > drop table if exists account_item;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.5 > create table account_item as
    -> select 1 as id, 1 as account_id
    -> union select 1 as id, 1 as account_id
    -> union select 2 as id, 2 as account_id
    -> union select 3 as id, 2 as account_id
    -> union select 4 as id, 3 as account_id;
Query OK, 4 rows affected (0.01 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql 5.5 >
mysql 5.5 > drop view if exists account_view;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.5 > create view account_view as
    -> select md5(id) as id
    -> from account;
Query OK, 0 rows affected (0.01 sec)

mysql 5.5 >
mysql 5.5 > select *
    -> FROM account_item item
    -> where md5(item.account_id) in (select id from account_view);
+----+------------+
| id | account_id |
+----+------------+
|  1 |          1 |
|  2 |          2 |
|  3 |          2 |
|  4 |          3 |
+----+------------+
4 rows in set (0.00 sec)
[26 Jan 2016 20:54] MySQL Verification Team
Thank you for the bug report.

C:\dbs>56

C:\dbs>c:\dbs\5.6\bin\mysql -uroot --port=3560 -p --prompt="mysql 5.6 > "
Enter password:
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 4
Server version: 5.6.30 Source distribution PULL: 2016-JAN-14

Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 5.6 > use test
Database changed
mysql 5.6 > drop table if exists account;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.6 > create table account as
    -> select 1 as id
    -> union select 2 as id
    -> union select 3 as id;
Query OK, 3 rows affected (0.39 sec)
Records: 3  Duplicates: 0  Warnings: 0

mysql 5.6 >
mysql 5.6 > drop table if exists account_item;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.6 > create table account_item as
    -> select 1 as id, 1 as account_id
    -> union select 1 as id, 1 as account_id
    -> union select 2 as id, 2 as account_id
    -> union select 3 as id, 2 as account_id
    -> union select 4 as id, 3 as account_id;
Query OK, 4 rows affected (0.05 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql 5.6 >
mysql 5.6 > drop view if exists account_view;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql 5.6 > create view account_view as
    -> select md5(id) as id
    -> from account;
Query OK, 0 rows affected (0.05 sec)

mysql 5.6 >
mysql 5.6 > select *
    -> FROM account_item item
    -> where md5(item.account_id) in (select id from account_view);
+----+------------+
| id | account_id |
+----+------------+
|  1 |          1 |
|  1 |          1 |
|  1 |          1 |
|  2 |          2 |
|  2 |          2 |
|  2 |          2 |
|  3 |          2 |
|  3 |          2 |
|  3 |          2 |
|  4 |          3 |
|  4 |          3 |
|  4 |          3 |
+----+------------+
12 rows in set (0.06 sec)

mysql 5.6 > exit
Bye

C:\dbs>57

C:\dbs>c:\dbs\5.7\bin\mysql -uroot -p --port=3570 --prompt="mysql 5.7 > "
Enter password: ******
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 4
Server version: 5.7.12 Source distribution PULL: 2016-JAN-14

Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 5.7 > use test
Database changed
mysql 5.7 > drop table if exists account;
Query OK, 0 rows affected (0.14 sec)

mysql 5.7 > create table account as
    -> select 1 as id
    -> union select 2 as id
    -> union select 3 as id;
Query OK, 3 rows affected (0.05 sec)
Records: 3  Duplicates: 0  Warnings: 0

mysql 5.7 >
mysql 5.7 > drop table if exists account_item;
Query OK, 0 rows affected (0.03 sec)

mysql 5.7 > create table account_item as
    -> select 1 as id, 1 as account_id
    -> union select 1 as id, 1 as account_id
    -> union select 2 as id, 2 as account_id
    -> union select 3 as id, 2 as account_id
    -> union select 4 as id, 3 as account_id;
Query OK, 4 rows affected (0.03 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql 5.7 >
mysql 5.7 > drop view if exists account_view;
Query OK, 0 rows affected (0.00 sec)

mysql 5.7 > create view account_view as
    -> select md5(id) as id
    -> from account;
Query OK, 0 rows affected (0.02 sec)

mysql 5.7 >
mysql 5.7 > select *
    -> FROM account_item item
    -> where md5(item.account_id) in (select id from account_view);
+----+------------+
| id | account_id |
+----+------------+
|  1 |          1 |
|  1 |          1 |
|  1 |          1 |
|  2 |          2 |
|  2 |          2 |
|  2 |          2 |
|  3 |          2 |
|  3 |          2 |
|  3 |          2 |
|  4 |          3 |
|  4 |          3 |
|  4 |          3 |
+----+------------+
12 rows in set (0.01 sec)

mysql 5.7 >
[27 Jan 2016 8:29] Øystein Grøvlen
Posted by developer:
 
A work-around is to turn off Semijoin Materialization:

mysql> select * FROM account_item as item where (item.account_id + 1) in (select id from account_view);
+----+------------+
| id | account_id |
+----+------------+
|  1 |          1 |
|  1 |          1 |
|  1 |          1 |
|  2 |          2 |
|  2 |          2 |
|  2 |          2 |
|  3 |          2 |
|  3 |          2 |
|  3 |          2 |
|  4 |          3 |
|  4 |          3 |
|  4 |          3 |
+----+------------+
12 rows in set (0,00 sec)

mysql> set optimizer_switch='materialization=off';
Query OK, 0 rows affected (0,00 sec)

mysql> select * FROM account_item as item where (item.account_id + 1) in (select id from account_view);
+----+------------+
| id | account_id |
+----+------------+
|  1 |          1 |
|  2 |          2 |
|  3 |          2 |
|  4 |          3 |
+----+------------+
4 rows in set (0,00 sec)