MySQL Bugs: #97630: SELECT DISTINCT returns wrong result when there is WITH ROLLUP

Bug #97630	SELECT DISTINCT returns wrong result when there is WITH ROLLUP
Submitted:	14 Nov 2019 9:08	Modified:	6 Jan 2020 5:58
Reporter:	Hope Lee (OCA)	Email Updates:
Status:	Closed	Impact on me:	None
Category:	MySQL Server: Optimizer	Severity:	S3 (Non-critical)
Version:	8.0.16, 8.0.18	OS:	CentOS
Assigned to:		CPU Architecture:	Any

Description:
With distinct, the rollup result is wrong.

How to repeat:
CREATE TABLE `random_groupby_0_tab1` (
  `col0` int(11) DEFAULT NULL,
  `col1` int(11) DEFAULT NULL,
  `col2` int(11) DEFAULT NULL
);
INSERT INTO random_groupby_0_tab1 VALUES (22, 6, 8), (28, 57, 45), (82, 44, 71);
CREATE TABLE `random_groupby_0_tab0` (
  `col0` int(11) DEFAULT NULL,
  `col1` int(11) DEFAULT NULL,
  `col2` int(11) DEFAULT NULL
);
INSERT INTO random_groupby_0_tab0 VALUES (83, 0, 38), (26, 0, 79), (43, 81, 24);

SELECT -(-cor0.col1) AS col0 FROM random_groupby_0_tab1, random_groupby_0_tab0 cor0 GROUP BY cor0.col1 WITH ROLLUP;
SELECT DISTINCT -(-cor0.col1) AS col0 FROM random_groupby_0_tab1, random_groupby_0_tab0 cor0 GROUP BY cor0.col1 WITH ROLLUP;

root@localhost:test 8.0.16-rds-dev-debug> SELECT -(-cor0.col1) AS col0 FROM random_groupby_0_tab1, random_groupby_0_tab0 cor0 GROUP BY cor0.col1 WITH ROLLUP;
+------+
| col0 |
+------+
|    0 |
|   81 |
| NULL |
+------+
3 rows in set (0.00 sec)

root@localhost:test 8.0.16-rds-dev-debug> SELECT DISTINCT -(-cor0.col1) AS col0 FROM random_groupby_0_tab1, random_groupby_0_tab0 cor0 GROUP BY cor0.col1 WITH ROLLUP;
+------+
| col0 |
+------+
|    0 |
|   81 |
+------+
2 rows in set (0.00 sec)

Hello Lee,

Thank you for the report and test case.

regards,
Umesh

--- a/sql/sql_executor.cc
+++ b/sql/sql_executor.cc
@@ -6711,7 +6711,8 @@ bool QEP_TAB::remove_duplicates() {
                           field_count * sizeof(*field_lengths), MYF(MY_WME));
   if (field_lengths == nullptr) DBUG_RETURN(true);

-  size_t key_length = compute_field_lengths(first_field, field_lengths);
+  size_t key_length = compute_field_lengths(first_field, field_lengths) +
+      field_count * sizeof((*first_field)->null_bit);

   free_io_cache(tbl);  // Safety
   tbl->file->info(HA_STATUS_VARIABLE);
@@ -6845,6 +6846,12 @@ static bool remove_dup_with_hash_index(THD *thd, TABLE *table,
     /* copy fields to key buffer */
     const size_t *field_length = field_lengths;
     for (Field **ptr = first_field; *ptr; ++ptr, ++field_length) {
+       /* Whether the field is NULL should be considered */
+      *key_pos = (*ptr)->is_real_null() ? (*ptr)->null_bit : ~(*ptr)->null_bit;
+      key_pos += sizeof((*ptr)->null_bit);
+
+      *key_pos = (*ptr)->is_real_null() ?  (*ptr)->null_bit : ~(*ptr)->null_bit;
+      key_pos += 1;
       if ((*ptr)->sort_key_is_varlen()) {
         size_t len = (*ptr)->make_sort_key(key_pos + sizeof(uint32),
                                            *field_length - sizeof(uint32));

Sorry, there is one mistake in the above comment. This is the correct one:

--- a/sql/sql_executor.cc
+++ b/sql/sql_executor.cc
@@ -6711,7 +6711,8 @@ bool QEP_TAB::remove_duplicates() {
                           field_count * sizeof(*field_lengths), MYF(MY_WME));
   if (field_lengths == nullptr) DBUG_RETURN(true);

-  size_t key_length = compute_field_lengths(first_field, field_lengths);
+  size_t key_length = compute_field_lengths(first_field, field_lengths) +
+      field_count * sizeof((*first_field)->null_bit);

   free_io_cache(tbl);  // Safety
   tbl->file->info(HA_STATUS_VARIABLE);
@@ -6845,6 +6846,12 @@ static bool remove_dup_with_hash_index(THD *thd, TABLE *table,
     /* copy fields to key buffer */
     const size_t *field_length = field_lengths;
     for (Field **ptr = first_field; *ptr; ++ptr, ++field_length) {
+       /* Whether the field is NULL should be considered */
+      *key_pos = (*ptr)->is_real_null() ? (*ptr)->null_bit : ~(*ptr)->null_bit;
+      key_pos += sizeof((*ptr)->null_bit);
+
       if ((*ptr)->sort_key_is_varlen()) {
         size_t len = (*ptr)->make_sort_key(key_pos + sizeof(uint32),
                                            *field_length - sizeof(uint32));

Thank you, but please note that in order to submit contributions you must first sign the Oracle Contribution Agreement (OCA). For additional information please check http://www.oracle.com/technetwork/community/oca-486395.html 
If you have any questions, please contact the MySQL community team (mailto:mysql-community_ww@oracle.com) or by visiting https://dev.mysql.com/community/

Fixed in 8.0.20 under the heading of

Bug#27549694 DISTINCT REMOVES ROWS INCORRECTY WHEN "NULLS" ARE PRESENT

Note that the contributed code was not used, since the bug had already been fixed.