Bug #82308 use of DISTINCT with BIT and LONGTEXT leads to erroneous results
Submitted: 21 Jul 2016 14:54 Modified: 21 Jul 2016 17:39
Reporter: Andreas Heinzel Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: MyISAM storage engine Severity:S1 (Critical)
Version:5.7 OS:Any
Assigned to: CPU Architecture:Any
Tags: bit, distinct, longtext

[21 Jul 2016 14:54] Andreas Heinzel
Description:
Using DISTINCT in a subquery creating a projection of BIT and LONGTEXT columns leads to erroneous results (bits become active in the result set). 

eq:
table bit_test has two columns attr_bit(BIT) and attr_text (LONGTEXT)

SELECT CAST(attr_bit AS UNSIGNED) AS attr_bit_as_uint, attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test
) a;

result set: column attr_bit_as_uint contains only numerical ones.

------------------------------------------------
environment:
mysql client:
#mysql --version
mysql  Ver 14.14 Distrib 5.7.12, for Linux (x86_64) using  EditLine wrapper

mysql server:
#SHOW VARIABLES LIKE '%version%';
+-------------------------+------------------------------+
| Variable_name           | Value                        |
+-------------------------+------------------------------+
| innodb_version          | 5.7.12                       |
| protocol_version        | 10                           |
| slave_type_conversions  |                              |
| tls_version             | TLSv1,TLSv1.1                |
| version                 | 5.7.12                       |
| version_comment         | MySQL Community Server (GPL) |
| version_compile_machine | x86_64                       |
| version_compile_os      | Linux                        |
+-------------------------+------------------------------+

server host os:
Ubuntu 14.04.4 LTS 
Linux xxx 3.13.0-32-generic #57-Ubuntu SMP Tue Jul 15 03:51:08 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux

How to repeat:
CREATE TABLE bit_test(
       attr_bit BIT,
       attr_text LONGTEXT
);

INSERT INTO bit_test
VALUES
(0, 't1'),
(1, 't2'),
(1, 't3'),
(1, 't3'),
(0, 't3');

SELECT CAST(attr_bit AS UNSIGNED), attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test
) a;

#expected:
#0, t1
#1, t2
#1, t3
#0, t3

#actual:
#+----------------------------+-----------+
#| CAST(attr_bit AS UNSIGNED) | attr_text |
#+----------------------------+-----------+
#|                          1 | t1        |
#|                          1 | t2        |
#|                          1 | t3        |
#|                          1 | t3        |
#+----------------------------+-----------+

####################################
#     some additional interesting observations   #
####################################

#selection on attr_bit in the outer query fails:
SELECT CAST(attr_bit AS UNSIGNED), attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test
) a
WHERE a.attr_bit = 0; #alternatively a.attr_bit = (0) OR a.attr_bit = b'0'
#returns the four rows as the query above

SELECT CAST(attr_bit AS UNSIGNED), attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test
) a
WHERE a.attr_bit = 1;
#empty result set

####################
#selection issue was also observed on a similar table table with VARCHAR(255) instead of LONGTEXT
CREATE TABLE bit_test_varchar(
       attr_bit BIT,
       attr_text VARCHAR(255)
);

INSERT INTO bit_test_varchar
VALUES
(0, 't1'),
(1, 't2'),
(1, 't3'),
(1, 't3'),
(0, 't3');

#works
SELECT CAST(attr_bit AS UNSIGNED), attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test_varchar
) a;

#+----------------------------+-----------+
#| CAST(attr_bit AS UNSIGNED) | attr_text |
#+----------------------------+-----------+
#|                          0 | t1        |
#|                          1 | t2        |
#|                          1 | t3        |
#|                          0 | t3        |
#+----------------------------+-----------+

#however selection on attr_bit in outer select still produces unexpected result
SELECT CAST(attr_bit AS UNSIGNED), attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test_varchar
) a
WHERE a.attr_bit = b'0';

#+----------------------------+-----------+
#| CAST(attr_bit AS UNSIGNED) | attr_text |
#+----------------------------+-----------+
#|                          0 | t1        |
#+----------------------------+-----------+

#even though
SELECT a.attr_bit = b'0', attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test_varchar
) a;

#+-------------------+-----------+
#| a.attr_bit = b'0' | attr_text |
#+-------------------+-----------+
#|                 1 | t1        |
#|                 0 | t2        |
#|                 0 | t3        |
#|                 1 | t3        |
#+-------------------+-----------+
[21 Jul 2016 15:18] Andreas Heinzel
actually the error is not limited to subqueries (in the example the sq is only necessary to ensure that elimination of duplicates is performed on bit values)

How to repeat:
mysql>
CREATE TABLE bit_test(
       attr_bit BIT,
       attr_text LONGTEXT
);

INSERT INTO bit_test
VALUES
(0, 't1'),
(1, 't2'),
(1, 't3'),
(1, 't3'),
(0, 't3');

shell>
mysql -e"SELECT DISTINCT * FROM bit_test" TEST_DB | cat -evt
#expected:
attr_bit^Iattr_text$
#\0^It1$
#^A^It2$
#^A^It3$
#\0^It3$

#actual
#attr_bit^Iattr_text$
#^A^It1$
#^A^It2$
#^A^It3$
#^A^It3$
[21 Jul 2016 15:24] MySQL Verification Team
mysql 5.7 > use test
Database changed
mysql 5.7 > CREATE TABLE bit_test(
    ->        attr_bit BIT,
    ->        attr_text LONGTEXT
    -> );
Query OK, 0 rows affected (0.35 sec)

mysql 5.7 >
mysql 5.7 > INSERT INTO bit_test
    -> VALUES
    -> (0, 't1'),
    -> (1, 't2'),
    -> (1, 't3'),
    -> (1, 't3'),
    -> (0, 't3');
Query OK, 5 rows affected (0.06 sec)
Records: 5  Duplicates: 0  Warnings: 0

mysql 5.7 >
mysql 5.7 > SELECT CAST(attr_bit AS UNSIGNED), attr_text
    -> FROM(
    ->  SELECT DISTINCT *
    ->  FROM bit_test
    -> ) a;
+----------------------------+-----------+
| CAST(attr_bit AS UNSIGNED) | attr_text |
+----------------------------+-----------+
|                          0 | t1        |
|                          1 | t2        |
|                          1 | t3        |
|                          0 | t3        |
+----------------------------+-----------+
4 rows in set (0.03 sec)

mysql 5.7 > show variables like "%version%";
+-------------------------+---------------------------------+
| Variable_name           | Value                           |
+-------------------------+---------------------------------+
| innodb_version          | 5.7.15                          |
| protocol_version        | 10                              |
| slave_type_conversions  |                                 |
| tls_version             | TLSv1,TLSv1.1                   |
| version                 | 5.7.15                          |
| version_comment         | Source distribution 2016-JUL-09 |
| version_compile_machine | x86_64                          |
| version_compile_os      | Win64                           |
+-------------------------+---------------------------------+
8 rows in set (0.06 sec)
[21 Jul 2016 15:29] MySQL Verification Team
Are the prior and below results the expected ones?

mysql 5.7 > SELECT CAST(attr_bit AS UNSIGNED), attr_text
    -> FROM(
    ->  SELECT DISTINCT *
    ->  FROM bit_test
    -> ) a
    -> WHERE a.attr_bit = 0; #alternatively a.attr_bit = (0) OR a.attr_bit = b'0';
+----------------------------+-----------+
| CAST(attr_bit AS UNSIGNED) | attr_text |
+----------------------------+-----------+
|                          0 | t1        |
|                          0 | t3        |
+----------------------------+-----------+
2 rows in set (0.03 sec)

mysql 5.7 > SELECT CAST(attr_bit AS UNSIGNED), attr_text
    -> FROM(
    ->  SELECT DISTINCT *
    ->  FROM bit_test
    -> ) a
    -> WHERE a.attr_bit = 1;
+----------------------------+-----------+
| CAST(attr_bit AS UNSIGNED) | attr_text |
+----------------------------+-----------+
|                          1 | t2        |
|                          1 | t3        |
+----------------------------+-----------+
2 rows in set (0.00 sec)

Thanks.
[21 Jul 2016 16:15] Andreas Heinzel
Thank you Miguel for looking into this.
Yes the above results are the expected ones.

In the meantime I upgraded one of our test environments to 5.7.13 and also executed "the test" on a fresh install of 5.7.13. While, on the fresh install everything is fine, we still see the same issue in our test environment. I assume it is caused by our configuration - will look into it and report back.
[21 Jul 2016 16:29] MySQL Verification Team
Thank you for the feedback. Please comment when you have a repeatable test case. Thanks.
[21 Jul 2016 16:39] Andreas Heinzel
I'm sorry I forgot a very important detail - we are still using MyISAM as default storage engine.

Explicitly specifying the storage engine allows me to reproduce the issue. Can you please rerun the test with ENGINE = MyISAM
CREATE TABLE bit_test(
       attr_bit BIT,
       attr_text LONGTEXT
) ENGINE = MyISAM;

INSERT INTO bit_test
VALUES
(0, 't1'),
(1, 't2'),
(1, 't3'),
(1, 't3'),
(0, 't3');

SELECT CAST(attr_bit AS UNSIGNED), attr_text
FROM(
	SELECT DISTINCT *
	FROM bit_test
) a;
[21 Jul 2016 17:39] MySQL Verification Team
Thank you for the feedback.

C:\dbs>c:\dbs\5.7\bin\mysql -uroot -p --port=3570 --prompt="mysql 5.7 > "
Enter password: ******
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.7.15 Source distribution 2016-JUL-09

Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 5.7 > USE test
Database changed
mysql 5.7 > SHOW CREATE TABLE bit_test;
+----------+---------------------------------------------------------------------------------------------------------------------------+
| Table    | Create Table                                                                                                              |
+----------+---------------------------------------------------------------------------------------------------------------------------+
| bit_test | CREATE TABLE `bit_test` (
  `attr_bit` bit(1) DEFAULT NULL,
  `attr_text` longtext
) ENGINE=InnoDB DEFAULT CHARSET=latin1 |
+----------+---------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.07 sec)

mysql 5.7 > SELECT CAST(attr_bit AS UNSIGNED), attr_text
    -> FROM(
    ->  SELECT DISTINCT *
    ->  FROM bit_test
    -> ) a;
+----------------------------+-----------+
| CAST(attr_bit AS UNSIGNED) | attr_text |
+----------------------------+-----------+
|                          0 | t1        |
|                          1 | t2        |
|                          1 | t3        |
|                          0 | t3        |
+----------------------------+-----------+
4 rows in set (0.05 sec)

mysql 5.7 > ALTER TABLE bit_test ENGINE MyISAM;
Query OK, 5 rows affected (0.63 sec)
Records: 5  Duplicates: 0  Warnings: 0

mysql 5.7 > SELECT CAST(attr_bit AS UNSIGNED), attr_text
    -> FROM(
    ->  SELECT DISTINCT *
    ->  FROM bit_test
    -> ) a;
+----------------------------+-----------+
| CAST(attr_bit AS UNSIGNED) | attr_text |
+----------------------------+-----------+
|                          1 | t1        |
|                          1 | t2        |
|                          1 | t3        |
|                          1 | t3        |
+----------------------------+-----------+
4 rows in set (0.00 sec)

mysql 5.7 >
[21 Jul 2016 17:54] MySQL Verification Team
Just FYI, it works fine with InnoDB storage engine ....