Bug #99459 SQL run with GROUP_MIN_MAX may infinite loop and never return
Submitted: 6 May 2020 8:17 Modified: 8 May 2020 13:21
Reporter: Ze Yang (OCA) Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.6,5.7,8.0 OS:Any
Assigned to: CPU Architecture:Any

[6 May 2020 8:17] Ze Yang
Description:
SQL run with GROUP_MIN_MAX may infinite loop and never return.

For QUICK_GROUP_MIN_MAX_SELECT, MySQL get the last prefix value firstly. Code `int QUICK_GROUP_MIN_MAX_SELECT::reset(void)`.
And when get_next, MySQL will get next_prefix and  check whether this is the last prefix.
When have_min or have_max, MySQL will try to get next_min or next_max. And the result will be assigned with min_res or max_res. Code  in `QUICK_GROUP_MIN_MAX_SELECT::get_next()` .

The InnoDB code ha_innobase::index_read only return 0, HA_ERR_KEY_NOT_FOUND, or error number, not return HA_ERR_END_OF_FILE.

One situation is that:
When a query run with min/max, and the is_index_scan of QUICK_GROUP_MIN_MAX_SELECT is false, if the records of last_prefix is deleted by other session, the session run with 'READ-UNCOMMITTED' will never get the last_prefix. If the next_min/next_max return HA_ERR_KEY_NOT_FOUND before check last_prefix, the is_last_prefix will be -1 and result of next_prefix always HA_ERR_KEY_NOT_FOUND.

```
do {
    result = next_prefix();
    /*
      Check if this is the last group prefix. Notice that at this point
      this->record contains the current prefix in record format.
    */
    if (!result) {
      is_last_prefix =
          key_cmp(index_info->key_part, last_prefix, group_prefix_len);
      DBUG_ASSERT(is_last_prefix <= 0);
    } else {
      if (result == HA_ERR_KEY_NOT_FOUND) continue; // The ha_innobase::index_read never return HA_ERR_END_OF_FILE, if the is_last_prefix is 0, the query will infinite loop.
      break;
    }

    if (have_min || have_max) {
      // Call next_min() when we have MIN() funcs or desc keyparts. In
      // latter case max values will be at the beginning of group.
      min_res = (have_min || (has_desc_keyparts && have_max)) ? next_min() : 0;
      if (min_res == 0) {
        // Reset and update funcs' values
        if (have_min) update_min_result(true);
        if (have_max) update_max_result(true);
      }
      // Call next_max() when we have MAX() funcs or desc keyparts. In
      // latter case min values will be at the end of group.
      max_res = (have_max || (has_desc_keyparts && have_min)) ? next_max() : 0;
      if (max_res == 0) {
        // Only update funcs' values
        if (have_min) update_min_result(false);
        if (have_max) update_max_result(false);
      }
    }
    if (!have_min && !have_max && key_infix_len > 0)
      result = head->file->ha_index_read_map(
          record, group_prefix, make_prev_keypart_map(real_key_parts),
          HA_READ_KEY_EXACT);

    result = have_min ? min_res : have_max ? max_res : result; // If get HA_ERR_KEY_NOT_FOUND here before last_prefix
  } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
           is_last_prefix != 0);
```

How to repeat:
1. Add DEBUG_SYNC point to make it easy to repeat.

```
#include "sql/current_thd.h"
+ #include "sql/debug_sync.h"

static int index_next_different(bool is_index_scan, handler *file,
                                KEY_PART_INFO *key_part, uchar *record,
                                const uchar *group_prefix,
                                uint group_prefix_len, uint group_key_parts) {
+  DEBUG_SYNC(current_thd, "before_index_next_different");
```

test_group_min_max.test

```
create table t(c1 int, c2 int, c3 int, c4 int, key(c1, c2, c3))ENGINE=InnoDB;
insert into t values(1, 1, 1, 1), (2, 2, 2,2), (3, 3, 3, 3), (4, 4, 4, 4), (5, 5, 5, 5);
analyze table t;
set transaction_isolation='read-uncommitted';
set debug = '+d,force_lis_for_group_by';
explain select c1, max(c3) from t where c2 = 6 group by c1;
set debug_sync='before_index_next_different wait_for sig';
--send select c1, max(c3) from t where c2 = 6 group by c1;
connect(con1,localhost,root,,test,,);
connection con1;
--echo # Switch to connection con1
delete from t where c1 = 5;
set debug_sync='now signal sig';

connection default;
--reap
```

session state

```
mysql> show processlist;
+----+-----------------+-----------------+------+---------+------+------------------------+----------------------------------------------------+
| Id | User            | Host            | db   | Command | Time | State                  | Info                                               |
+----+-----------------+-----------------+------+---------+------+------------------------+----------------------------------------------------+
|  4 | event_scheduler | localhost       | NULL | Daemon  |   39 | Waiting on empty queue | NULL                                               |
|  9 | root            | localhost       | test | Query   |   35 | Sending data           | select c1, max(c3) from t where c2 = 6 group by c1 |
| 10 | root            | localhost       | test | Sleep   |   35 |                        | NULL                                               |
| 11 | root            | localhost:23128 | test | Query   |    0 | starting               | show processlist                                   |
+----+-----------------+-----------------+------+---------+------+------------------------+----------------------------------------------------+
```
[6 May 2020 13:51] MySQL Verification Team
Hi Mr. Yang,

Thank you for your bug report.

However, we do have additional questions and requests.

First of all, have you been able to repeat this behaviour without DEBUG_SYNC. If you are able to, we would like to see a simple test case where that GROUP BY query takes a very, very long time to execute. If it would require to run the tests 10 times, or with some other query in parallel, please let us know. Simply, our users do not add DEBUG_SYNC, so we need a repeatable test case without it.

Second, have you tried latest 8.0 release, 8.0.20 ???

We are eagerly awaiting your answers.
[7 May 2020 6:05] Ze Yang
This is the testcase without add DEBUG_SYNC

```
create table t(c1 int, c2 int, c3 int, c4 int, key(c1, c2, c3));
delimiter //;

CREATE PROCEDURE BatchInsert(IN begin INT, IN end INT, IN name VARCHAR(10))
  BEGIN
  SET @insert_stmt = concat('INSERT INTO ', name, ' VALUES(?, ?, ?, ?);');
  PREPARE stmt from @insert_stmt;
  WHILE begin <= end DO
    SET @ID1 = begin;
    EXECUTE stmt using @ID1, @ID1, @ID1, @ID1;
    SET begin = begin + 1;
  END WHILE;
  END;
//
delimiter ;//

CALL BatchInsert(1, 20000, 't');
analyze table t;
set transaction_isolation='read-uncommitted';
set debug = '+d,force_lis_for_group_by';
explain select c1, max(c3) from t where c2 = 6 group by c1;
--send select c1, max(c3) from t where c2 = 6 group by c1;
connect(con1,localhost,root,,test,,);
connection con1;
--echo # Switch to connection con1
select sleep(0.03);
delete from t where c1 = 20000;

connection default;
--reap
```
[7 May 2020 6:06] Ze Yang
And the latest 8.0 release(8.0.20) version still have this problem.
[7 May 2020 13:41] MySQL Verification Team
Hi Mr. Yang,

I have ran your test and it ran beautifully.

The only change I did is to comment out the debug line. Again, we need a test case without such intervention, since we can not verify a bug where we need to raise some control flags, which does not happen in the near world.

This does not look like a bug to me.
[8 May 2020 8:17] Ze Yang
This is the test case without DEBUG line. And you can run it with release version.

```
create table t(c1 int, c2 int, c3 int, c4 int, key(c1, c2, c3));
delimiter //;

CREATE PROCEDURE BatchInsert(IN begin INT, IN end INT, IN name VARCHAR(10))
  BEGIN
  SET @insert_stmt = concat('INSERT INTO ', name, ' VALUES(?/20, ?, ?, ?);');
  PREPARE stmt from @insert_stmt;
  WHILE begin <= end DO
    SET @ID1 = begin;
    EXECUTE stmt using @ID1, @ID1, @ID1, @ID1;
    SET begin = begin + 1;
  END WHILE;
  END;
//
delimiter ;//

CALL BatchInsert(1, 100000, 't');
insert into t select * from t;
insert into t select * from t;
analyze table t;
set transaction_isolation='read-uncommitted';
explain select c1, max(c3) from t where c2 = 600000 group by c1;
--send select c1, max(c3) from t where c2 = 600000 group by c1;
connect(con1,localhost,root,,test,,);
connection con1;
--echo # Switch to connection con1
select sleep(0.01);
delete from t where c1 = 100000/20;

connection default;
--reap
```

By the way, in your opinion what's the meaning of debug code in MySQL source code and DEBUG line in MySQL mtr cases.
[8 May 2020 13:21] MySQL Verification Team
Hi Mr. Yang,

I have run your test case. I have changed debug sync timeout to 3600 seconds and I ran your test case. After one hour, timeout has expired.

Hence, I conclude that there is definitely a problem in the code, so I am verifying this report, alas, with lower severity.

Debug code is there for testing only, or for debugging with debug binaries. This is not a pice of code that is utilised in any manner in the production.

Verified as an optimiser bug.