MySQL Bugs: #99444: New HASH JOIN order problem

Bug #99444	New HASH JOIN order problem
Submitted:	5 May 2020 9:28	Modified:	13 May 2020 15:43
Reporter:	Gabor Aron	Email Updates:
Status:	Not a Bug	Impact on me:	None
Category:	MySQL Server: Optimizer	Severity:	S1 (Critical)
Version:	8.0.20	OS:	CentOS (64G RAM)
Assigned to:		CPU Architecture:	x86 (Intel(R) Xeon(R) CPU E5-2665 0 @ 2.40GHz)
Tags:	hash join, regression

Description:
The derived table t2 has "b", "a" column which is order what it should be selected as best match 

then it should left join if "a" IN "m", 0
(it have exact or use the default 0)

so 
m = 2 should have b = 123 and not 1 the generic 0
m = 1 should have b = 12 and not 1 the generic 0

How to repeat:
*** Table schema and data ***

CREATE TABLE `t1` (
  `m` int(11) NOT NULL
);

INSERT INTO `t1` (`m`) VALUES
(1),
(2),
(3);

CREATE TABLE `t2` (
  `a` int(11) NOT NULL,
  `b` int(11) NOT NULL
);

INSERT INTO `t2` (`a`, `b`) VALUES
(2,	123),
(1,	12),
(0,	1);

*** Correct result on mysql 8.0.18 and anything prio of that ***

SELECT @@version;
+-----------+
| @@version |
+-----------+
| 8.0.18    |
+-----------+

SELECT *
FROM t2
ORDER BY b DESC, a DESC
LIMIT 10;
+---+-----+
| a | b   |
+---+-----+
| 2 | 123 |
| 1 |  12 |
| 0 |   1 |
+---+-----+

SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10;
) t2 ON a IN (m, 0);
+---+------+------+
| m | a    | b    |
+---+------+------+
| 2 |    2 |  123 |
| 1 |    1 |   12 |
| 1 |    0 |    1 |
| 2 |    0 |    1 |
| 3 |    0 |    1 |
+---+------+------+

SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10
) t2 ON a IN (m, 0)
GROUP BY m;
+---+------+------+
| m | a    | b    |
+---+------+------+
| 2 |    2 |  123 |
| 1 |    1 |   12 |
| 3 |    0 |    1 |
+---+------+------+

EXPLAIN 
SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10
) t2 ON a IN (m, 0)
GROUP BY m;
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
| id | select_type | table      | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                                              |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
|  1 | PRIMARY     | t1         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    3 |   100.00 | Using temporary                                    |
|  1 | PRIMARY     | <derived2> | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    3 |   100.00 | Using where; Using join buffer (Block Nested Loop) |
|  2 | DERIVED     | t2         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    3 |   100.00 | Using filesort                                     |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+

*** Bad result due forced using hash join and unable to disable the hash join *** 

SELECT @@version;
+-----------+
| @@version |
+-----------+
| 8.0.20    |
+-----------+

SELECT *
FROM t2
ORDER BY b DESC, a DESC
LIMIT 10;
+---+-----+
| a | b   |
+---+-----+
| 2 | 123 |
| 1 |  12 |
| 0 |   1 |
+---+-----+

SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10
) t2 ON a IN (m, 0);
+---+------+------+
| m | a    | b    |
+---+------+------+
| 1 |    0 |    1 |
| 1 |    1 |   12 |
| 2 |    0 |    1 |
| 2 |    2 |  123 |
| 3 |    0 |    1 |
+---+------+------+
// here its looks like orderd by m which was not requested at all
// so when i use group to filter out duplicates the final result is bad

SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10
) t2 ON a IN (m, 0)
GROUP BY m;
+---+------+------+
| m | a    | b    |
+---+------+------+
| 1 |    0 |    1 |
| 2 |    0 |    1 |
| 3 |    0 |    1 |
+---+------+------+

EXPLAIN
SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10
) t2 ON a IN (m, 0)
GROUP BY m;
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
| id | select_type | table      | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                                      |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
|  1 | PRIMARY     | t1         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    3 |   100.00 | Using temporary                            |
|  1 | PRIMARY     | <derived2> | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    3 |   100.00 | Using where; Using join buffer (hash join) |
|  2 | DERIVED     | t2         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    3 |   100.00 | Using filesort                             |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+

Hello Gabor Aron,

Thank you for the report and test case.

regards,
Umesh

Hi,

If you don't specify ORDER BY in a query, the ordering of rows is undefined.
This goes even if you are joining two tables and one of them is a subquery
with ORDER BY -- it probably worked by accident in previous MySQL versions,
but was never guaranteed. Are there any other issues besides this?

Another issue:
SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10
) t2 ON a IN (m, 0)
GROUP BY m;
is bound to return random results, as it groups several rows having the same value of 'm' into one - how can it choose which values to keep for other columns.
In the value of session variable 'sql_mode', mode 'only_full_group_by' is enabled by default and it makes this query return this:
ERROR 1055 (42000): Expression #2 of SELECT list is not in GROUP BY clause and contains nonaggregated column 't2.a' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by

I recommend keeping this mode on when troubleshooting queries.

Hi Steinar Gunderson

| A. Steinar Gunderson
| If you don't specify ORDER BY in a query, the ordering of rows is undefined.

Not matter if you add ORDER to the main query, the result is still broken

SELECT *
FROM t1
LEFT JOIN (
    SELECT *
    FROM t2
    ORDER BY b DESC, a DESC
    LIMIT 10
) t2 ON a IN (m, 0)
GROUP BY m
ORDER BY b DESC, a DESC; 

+---+------+------+
| m | a    | b    |
+---+------+------+
| 1 |    0 |    1 |
| 2 |    0 |    1 |
| 3 |    0 |    1 |
+---+------+------+

|A. Steinar Gunderson
| -- it probably worked by accident in previous MySQL versions,

Q. you mean all prio version 5.x.x 8.x.x <= 8.0.19 where accident solutions ?

Hi,

Your query with ORDER BY appears to be correctly ordered, so I don't see the bug.

As Guilhem points out, your query as-is produces undefined results as per the SQL standard. MySQL versions <= 8.0.19 had an implementation that happened to give the result you wanted most of the time, but this does not make it a MySQL bug if MySQL 8.0.20 has a different implementation yielding different results.

Specifically, there is no SQL concept of “best match” in a join or group; it is a concept you have invented for yourself, and which MySQL does not attempt to follow.

Both the old and the new results are valid results for the given query.  As Guilhem writes, the query does not specify which values to choose for a and b if the are multiple rows in the same group.   This means that the result will be depend on in which order the rows are accessed.  With hash join, the rows will be accessed in a different order than before, and one may end up with different values for a and b.

In general, changes in order of access may happen whenever a different query plan is used (e.g., due to adding another index or when statistics are changed).  As Guilhem, writes, to avoid relying on assumptions about order of access, one should make sure ONLY_FULL_GROUP_BY is set.

Ok lets try to take another approach to show the "HASH JOIN order problem"

So lets also agree at last on one thing if a table has 2 rows b=2 and b=1 and you order that column DESC the first is 2 and second is 1 due 2 is greater then 1

*** Table schema and data ***

CREATE TABLE `t1` (`m` int NOT NULL, `n` int NOT NULL); 
INSERT INTO `t1` (`m`, `n`) VALUES (1, 0);
CREATE TABLE `t2` (`a` int NOT NULL, `b` int NOT NULL);
INSERT INTO `t2` (`a`, `b`) VALUES (1, 2), (1, 1);

*** On MySQL 8.0.20 ***

mysql> SELECT @@version;
+-----------+
| @@version |
+-----------+
| 8.0.20    |
+-----------+
1 row in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
| 1 | 0 |    1 |    1 |
+---+---+------+------+
2 rows in set (0.00 sec)

# so far so good the MySQL-8.0.20 put the b=2 on top
# now i put group by m to eliminate duplicates to remain only the first row

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) GROUP BY t1.m ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    1 |
+---+---+------+------+
1 row in set (0.00 sec)

# and the bug happens MySQL-8.0.20 not care any more the order

mysql> EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                                      |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
|  1 | SIMPLE      | t1    | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    1 |   100.00 | Using temporary; Using filesort            |
|  1 | SIMPLE      | t2    | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    2 |   100.00 | Using where; Using join buffer (hash join) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
2 rows in set, 1 warning (0.00 sec)

*** On MySQL <= 8.0.19 the results are correct as the "ORDER BY b DESC" is applied *** 

mysql> SELECT @@version;
+-----------+
| @@version |
+-----------+
| 8.0.19    |
+-----------+
1 row in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
| 1 | 0 |    1 |    1 |
+---+---+------+------+
2 rows in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) GROUP BY t1.m ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
+---+---+------+------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                                              |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
|  1 | SIMPLE      | t1    | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    1 |   100.00 | Using temporary; Using filesort                    |
|  1 | SIMPLE      | t2    | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    2 |   100.00 | Using where; Using join buffer (Block Nested Loop) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+

And the sad thing it says i have no way to disable the HASH_JOIN

https://dev.mysql.com/doc/refman/8.0/en/switchable-optimizations.html
Controls hash joins (MySQL 8.0.18 only; has no effect in MySQL 8.0.19 or later). 

*** MySQL 5.7.12 the results are correct as the "ORDER BY b DESC" is applied *** 

mysql> SELECT @@version;
+------------+
| @@version  |
+------------+
| 5.7.12-log |
+------------+
1 row in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
| 1 | 0 |    1 |    1 |
+---+---+------+------+
2 rows in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) GROUP BY t1.m ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
+---+---+------+------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                                              |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
|  1 | SIMPLE      | t1    | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    1 |   100.00 | Using temporary; Using filesort                    |
|  1 | SIMPLE      | t2    | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    2 |   100.00 | Using where; Using join buffer (Block Nested Loop) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
2 rows in set, 1 warning (0.00 sec)

*** MySQL 5.1.40 the results are correct as the "ORDER BY b DESC" is applied *** 

mysql> SELECT @@version;
+------------+
| @@version  |
+------------+
| 5.1.40-log |
+------------+
1 row in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;

+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
| 1 | 0 |    1 |    1 |
+---+---+------+------+
2 rows in set (0.03 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) GROUP BY t1.m ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
+---+---+------+------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+----+-------------+-------+------+---------------+------+---------+------+------+---------------------------------+
| id | select_type | table | type | possible_keys | key  | key_len | ref  | rows | Extra                           |
+----+-------------+-------+------+---------------+------+---------+------+------+---------------------------------+
|  1 | SIMPLE      | t1    | ALL  | NULL          | NULL | NULL    | NULL |    1 | Using temporary; Using filesort |
|  1 | SIMPLE      | t2    | ALL  | NULL          | NULL | NULL    | NULL |    2 |                                 |
+----+-------------+-------+------+---------------+------+---------+------+------+---------------------------------+
2 rows in set (0.00 sec)

There is still no problem here.  According to SQL, grouping is applied before ordering.  The ORDER BY clause will order the final groups, not the rows to be grouped.

So all for all cant disable new hash join so i am stuck with <= 8.0.19

What Øystein is saying is entirely correct. If you want to pick out a given rows from each group, GROUP BY is the wrong tool; GROUP BY is for aggregation. (Standard SQL simply does not allow you to write GROUP BY queries with such unpredictable results; as Guilhem says, MySQL enforces this unless you have turned off the default ONLY_FULL_GROUP_BY. You should generally not turn it off.)

You can probably do what you are trying to do with a window function, or optionally, a set of scalar subqueries (but the former is likely to be faster). In any case, the fact that hash join reorders rows is not a bug. Thus, closing this bug report as invalid.

Any idea of explaing this when no hash join is used

due i added index on t2.a works again as it was before <= 8.0.19

the silly thing if the hash join is enforced the results are different (but i can't call this a bug)
( as I have to understand "Both the old and the new results are valid results for the given query" even if the results are different )

mysql> DROP TABLE IF EXISTS t1, t2;
Query OK, 0 rows affected, 2 warnings (0.02 sec)

mysql> CREATE TABLE `t1` (`m` int NOT NULL, `n` int NOT NULL);
Query OK, 0 rows affected (0.01 sec)

mysql> INSERT INTO `t1` (`m`, `n`) VALUES (1, 0);
Query OK, 1 row affected (0.00 sec)

mysql> CREATE TABLE `t2` (`a` int NOT NULL, `b` int NOT NULL, KEY (`a`));
Query OK, 0 rows affected (0.00 sec)

mysql> INSERT INTO `t2` (`a`, `b`) VALUES (1, 2), (1, 1);
Query OK, 2 rows affected (0.01 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> SELECT @@version;
+-----------+
| @@version |
+-----------+
| 8.0.20    |
+-----------+
1 row in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
| 1 | 0 |    1 |    1 |
+---+---+------+------+
2 rows in set (0.00 sec)

mysql> SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) GROUP BY t1.m ORDER BY b DESC;
+---+---+------+------+
| m | n | a    | b    |
+---+---+------+------+
| 1 | 0 |    1 |    2 |
+---+---+------+------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON t2.a IN (t1.m, t1.n) ORDER BY b DESC;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                                          |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+------------------------------------------------+
|  1 | SIMPLE      | t1    | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    1 |   100.00 | Using temporary; Using filesort                |
|  1 | SIMPLE      | t2    | NULL       | ALL  | a             | NULL | NULL    | NULL |    2 |   100.00 | Range checked for each record (index map: 0x1) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+------------------------------------------------+
2 rows in set, 1 warning (0.00 sec)

Hi Steinar Gunderson 
| You can probably do what you are trying to do with a window function

Can you give me a bit more info about your window suggestion. Ty

Hi

Please look this

this all run on MySQL-8.0.20

mainly i put this to force that MySQL to do a HASH JOIN t2.a IN (t1.m, 0)

DROP TABLE IF EXISTS t1, t2;
CREATE TABLE `t1` (`m` int NOT NULL);
INSERT INTO `t1` (`m`) VALUES (1);
CREATE TABLE `t2` (`a` int NOT NULL, `b` int NOT NULL);
INSERT INTO `t2` (`a`, `b`) VALUES (1, 2), (1, 1);

SELECT @@version;
+-----------+
| @@version |
+-----------+
| 8.0.20    |
+-----------+

I know there is no way to disable the hash join with optimizer_switch as that is removed from 8.0.20
So using this optimizer switch off it will fall back to do WHERE instead the new HASH JOIN and the result again are correct as before <= 8.0.19

SET @@optimizer_switch = 'block_nested_loop=off';
SELECT *  FROM t1 LEFT JOIN (SELECT * FROM t2 ORDER BY t2.b DESC LIMIT 10) t2 ON t2.a IN (t1.m, 0) GROUP BY t1.m;
+---+------+------+
| m | a    | b    |
+---+------+------+
| 1 |    1 |    2 |
+---+------+------+

EXPLAIN SELECT *  FROM t1 LEFT JOIN (SELECT * FROM t2 ORDER BY t2.b DESC LIMIT 10) t2 ON t2.a IN (t1.m, 0) GROUP BY t1.m;
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+-----------------+
| id | select_type | table      | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra           |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+-----------------+
|  1 | PRIMARY     | t1         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    1 |   100.00 | Using temporary |
|  1 | PRIMARY     | <derived2> | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    2 |   100.00 | Using where     |
|  2 | DERIVED     | t2         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    2 |   100.00 | Using filesort  |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+-----------------+

And if i trun back and use the new HASH JOIN again the result are not as it was before <= 8.0.19

SET @@optimizer_switch = 'block_nested_loop=on';
SELECT *  FROM t1 LEFT JOIN (SELECT * FROM t2 ORDER BY t2.b DESC LIMIT 10) t2 ON t2.a IN (t1.m, 0) GROUP BY t1.m;
+---+------+------+
| m | a    | b    |
+---+------+------+
| 1 |    1 |    1 |
+---+------+------+

EXPLAIN SELECT *  FROM t1 LEFT JOIN (SELECT * FROM t2 ORDER BY t2.b DESC LIMIT 10) t2 ON t2.a IN (t1.m, 0) GROUP BY t1.m;
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
| id | select_type | table      | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                                      |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
|  1 | PRIMARY     | t1         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    1 |   100.00 | Using temporary                            |
|  1 | PRIMARY     | <derived2> | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    2 |   100.00 | Using where; Using join buffer (hash join) |
|  2 | DERIVED     | t2         | NULL       | ALL  | NULL          | NULL | NULL    | NULL |    2 |   100.00 | Using filesort                             |
+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+

Hello, Gabor Aron.

Thank you for having explained your problem in a very detailed way, with steps which are easy to reproduce.

There are several points to consider in the query:
SELECT *  FROM
t1 LEFT JOIN (SELECT * FROM t2 ORDER BY t2.b DESC LIMIT 10) t2
ON t2.a IN (t1.m, 0)
GROUP BY t1.m;

1) It calculates a temporary set of rows: 10 rows from t2, biggest values of t2.b first.
2) For each row in t1, it finds all rows in this set which match.
3) Then it wants to keep only one match per row of t1,
4) and, more precisely, it wants the biggest match.
I hope I figured out the intention right :-)

Then there are two things:
- how MySQL is implemented: the algorithms which it uses.
- what MySQL must respect: the ISO SQL standard.
The algorithms used in MySQL sometimes change, because we replace them with better ones over time.
As long as the queries' results keep respecting the SQL standard, it's ok to change the algorithms. If we never change them, we cannot make any progress.
For example, if a table contains 3 rows: 1,2,3, and a SELECT returns 1,2,3 in this order, it's ok if a new MySQL version starts returning 3,1,2: the SQL standard says that without ORDER BY, the DBMS can return in random order.

You know all that, I wrote it just as a recap.

What happened here is that, for years, MySQL had only one join algorithm called "nested-loop join". Then, ~10 years ago, we added "block nested-loop join". And, recently, we replaced the latter with "hash join". We did that because, in many queries, hash join gives big performance improvements; you can see the numbers at the bottom of:
https://mysqlserverteam.com/hash-join-in-mysql-8/

With block nested-loop join, rows of t1 and rows of the temporary set where joined in "intuitive order".
But it was just an effect of block nested-loop join. The SQL standard doesn't impose this, and never has. To be clear, in:

SELECT *  FROM
t1 LEFT JOIN (SELECT * FROM t2 ORDER BY t2.b DESC LIMIT 10) t2
ON t2.a IN (t1.m, 0)
GROUP BY t1.m;

- the SQL standard doesn't impose that the rows of the temporary set come out in order (even if there is ORDER in this set: ORDER BY is used when writing to the tmp set, not when later reading the tmp set).
- the SQL standard doesn't impose that rows of t1, or of the tmp set, should be iterated over in a particular order
- all it cares about is that the final result respects the join condition.

That's why hash join is free to output rows in any order.

Then there is another issue.
In a second phase, this output of the join algorithm is sent to GROUP BY.
GROUP BY has to form one single row from an entire group (of all rows having the same value for t1.m).
How could it choose between these rows: if two rows of the same group have different values of t2.a, which value of t2.a should it pick?
The SQL standard says this is an invalid query for this reason.
For a long time, MySQL has been less strict than the standard, and its GROUP BY algorithm has picked the first row emitted by the join phase.
But it had a "configuration setting" for people who wanted strictness (and safety) as in the standard: flag "only_full_group_by" in session variable "sql_mode".
Then, in 2014 we made this flag on by default:
mysqlserverteam.com/mysql-5-7-only_full_group_by-improved-recognizing-functional-dependencies-enabled-by-default/
because lots of people were running into the same issue as you: they had a query which was giving varying results, depending on version/algorithm or even depending on how many rows the tables had (as the number of rows influences the cost-based choice of algorithms by the optimizer).
So it has been enabled by default since 2014 (MySQL 5.7.5) and I advise to  research why, in your instance of MySQL, it's disabled. There must be an explicit setting which disables it. It may be just a historical leftover (old configuration file, my.cnf, my.ini?). If you enable the flag, the query will get an error message.
Back to the algorithm of GROUP BY. The fact of picking the first row of the group has been documented as subject to change for a long time, and then we removed this behaviour. Again, this allows more performance, and simpler code in MySQL.

To sum up, the query results which you observe before hash join, are easily explainable by a human, but they're not required by the SQL standard and are only an effect of the algorithms MySQL had.

What is left now, in my opinion, is to rewrite this query so that you get the same results as before, and (bonus!) in a way which is guaranteed by the SQL standard to always give these results, forever.

Based on the query in your initial post, I propose two solutions.

select m, max(a), max(b) from
(SELECT t1.m, first_value(t2.a) over w as a, first_value(t2.b) over w as b
FROM t1 LEFT JOIN t2 ON a IN (m, 0)
window w as (partition by t1.m order by t2.b desc, t2.a desc)) as dt
group by m;
+---+--------+--------+
| m | max(a) | max(b) |
+---+--------+--------+
| 1 |      1 |     12 |
| 2 |      2 |    123 |
| 3 |      0 |      1 |
+---+--------+--------+
This joins t1 and t2; for each row of t1, it orders the matching rows in t2 (WINDOW clause), and picks the first values of t2 in this order (FIRST_VALUE); finally, it collapses all rows having the same t1.m into one group (the MAX() isn't really doing anything: all rows in the group have the same value of t2.a).
https://mysqlserverteam.com/mysql-8-0-2-introducing-window-functions/

With a lateral table:

SELECT * from t1
left join lateral (select t2.a, t2.b from t2 where t2.a IN (m, 0) order by t2.b desc, t2.a desc limit 1) as dt on true;
+---+---+-----+
| m | a | b   |
+---+---+-----+
| 1 | 1 |  12 |
| 2 | 2 | 123 |
| 3 | 0 |   1 |
+---+---+-----+
For each row of t1, we compute one single row (the matching one with the biggest values). We use a left join, in case there's no matching row at all.
https://mysqlserverteam.com/support-for-lateral-derived-tables-added-to-mysql-8-0-14/

I hope that this clarifies the situation and gives you a path to solving the problem.

Hi Guilhem Bichot 

First of all thank you for this detailed clear explanation secondly I can't tell you in words how happy I am.

Big kudos to you !!! and to all MySQL developers.

Thanks for your kind words!

Hi

so i not moan anymore about the hash join it self as i understand 

but still my problem is that even with the WINDOW or LATERAL i have performance loss which still hurt me that i am stuck with 8.0.19

now probably what you will read about how i trick to insert random number or measure query duration will shock you or not

so this test is with about 60k sample rows but in our production there are around 52 million number its about telecommunication numbers from different providers
which i have to query in real time and find so say best match from all providers, when let say 100 calls per second come in and i have to query and respond to every sip request with it 0~30 ms 
every -1ms optimization helps me but every +1ms hurt me

 ... Anyway, long story short i have learn from it in a good way ... 

the WINDOW and LATER are not performing as my new solution or maybe the WINDOW or LATERAL query can be improved with SQL syntax ?

WINDOW  0.383932
LATERAL 0.226572
OTHER   0.152198

DELIMITER ;;

DROP FUNCTION IF EXISTS `r`;;
CREATE FUNCTION `r`(`r` float) RETURNS tinyint
    DETERMINISTIC
BEGIN
    INSERT IGNORE INTO t2 SELECT @l:=1+FLOOR(RAND()*10), SUBSTR(RAND(), 3, @l);
    RETURN 1;
END;;

DELIMITER ;

DROP TABLE IF EXISTS `t1`, `t2`;
CREATE TABLE `t1` (`m` int NOT NULL, PRIMARY KEY (`m`));
CREATE TABLE `t2` (`a` int NOT NULL,`b` bigint NOT NULL,PRIMARY KEY (`a`,`b`));
INSERT INTO `t1` (`m`) VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
# this will insert around 60k random numbers
SELECT BENCHMARK(100000, (SELECT r(RAND())));

SELECT @@version;
+-----------+
| @@version |
+-----------+
| 8.0.20    |
+-----------+

# now lets compare all query should return same result

SELECT m, MAX(a) a, MAX(b) b
FROM (
    SELECT t1.m
        , first_value(t2.a) OVER w AS a
        , first_value(t2.b) OVER w AS b
    FROM t1
    LEFT JOIN t2 ON t2.a IN (t1.m, 0)
    WINDOW w AS (PARTITION BY t1.m ORDER BY t2.b DESC, t2.a DESC)
) AS dt
GROUP BY m;
+----+------+------------+
| m  | a    | b          |
+----+------+------------+
|  1 |    1 |          9 |
|  2 |    2 |         99 |
|  3 |    3 |        999 |
|  4 |    4 |       9997 |
|  5 |    5 |      99992 |
|  6 |    6 |     999878 |
|  7 |    7 |    9997112 |
|  8 |    8 |   99966739 |
|  9 |    9 |  999966694 |
| 10 |   10 | 9999084956 |
+----+------+------------+

SELECT *
FROM t1
    LEFT JOIN LATERAL (
        SELECT t2.a, t2.b
        FROM t2 WHERE t2.a IN (m, 0)
        ORDER BY t2.b DESC, t2.a DESC
        LIMIT 1
    ) AS dt ON true;
+----+------+------------+
| m  | a    | b          |
+----+------+------------+
|  1 |    1 |          9 |
|  2 |    2 |         99 |
|  3 |    3 |        999 |
|  4 |    4 |       9997 |
|  5 |    5 |      99992 |
|  6 |    6 |     999878 |
|  7 |    7 |    9997112 |
|  8 |    8 |   99966739 |
|  9 |    9 |  999966694 |
| 10 |   10 | 9999084956 |
+----+------+------------+    

# i know its a bit silly solution but its faster then WINDOW and LATERAL and when there are no index even more difference

SELECT @n:='';
SELECT *, @n:=CONCAT(@n,m,',') n
FROM (
    SELECT *
    FROM t1
    LEFT JOIN t2 ON a IN (m, 0)
    ORDER BY b DESC
) s
WHERE ! FIND_IN_SET(m, @n);
+----+------+------------+-----------------------+
| m  | a    | b          | n                     |
+----+------+------------+-----------------------+
| 10 |   10 | 9999084956 | 10,                   |
|  9 |    9 |  999966694 | 10,9,                 |
|  8 |    8 |   99966739 | 10,9,8,               |
|  7 |    7 |    9997112 | 10,9,8,7,             |
|  6 |    6 |     999878 | 10,9,8,7,6,           |
|  5 |    5 |      99992 | 10,9,8,7,6,5,         |
|  4 |    4 |       9997 | 10,9,8,7,6,5,4,       |
|  3 |    3 |        999 | 10,9,8,7,6,5,4,3,     |
|  2 |    2 |         99 | 10,9,8,7,6,5,4,3,2,   |
|  1 |    1 |          9 | 10,9,8,7,6,5,4,3,2,1, |
+----+------+------------+-----------------------+

# now lets measure the query duration

SELECT @s:=UNIX_TIMESTAMP(SYSDATE(6)) s, (SELECT SUM(b) FROM (
    SELECT m, MAX(a) a, MAX(b) b
    FROM (
        SELECT t1.m
            , first_value(t2.a) OVER w AS a
            , first_value(t2.b) OVER w AS b
        FROM t1
        LEFT JOIN t2 ON t2.a IN (t1.m, 0)
        WINDOW w AS (PARTITION BY t1.m ORDER BY t2.b DESC, t2.a DESC)
    ) AS dt
    GROUP BY m
) s) crc, ROUND(UNIX_TIMESTAMP(SYSDATE(6)) - @s, 6) duration;
+-------------------+-------------+----------+
| s                 | crc         | duration |
+-------------------+-------------+----------+
| 1588935536.436847 | 11110126475 | 0.383932 |
+-------------------+-------------+----------+

SELECT @s:=UNIX_TIMESTAMP(SYSDATE(6)) s, (SELECT SUM(b) FROM (
    SELECT *
    FROM t1
        LEFT JOIN LATERAL (
            SELECT t2.a, t2.b
            FROM t2 WHERE t2.a IN (m, 0)
            ORDER BY t2.b DESC, t2.a DESC
            LIMIT 1
        ) AS dt ON true
) s) crc, ROUND(UNIX_TIMESTAMP(SYSDATE(6)) - @s, 6) duration;
+-------------------+-------------+----------+
| s                 | crc         | duration |
+-------------------+-------------+----------+
| 1588935536.823265 | 11110126475 | 0.226572 |
+-------------------+-------------+----------+

SELECT @n:='';
SELECT @s:=UNIX_TIMESTAMP(SYSDATE(6)) s, (SELECT SUM(b) FROM (
    SELECT *, @n:=CONCAT(@n,m,',') n
    FROM (
        SELECT *
        FROM t1
        LEFT JOIN (
            SELECT *
            FROM t2
        ) t2 ON a IN (m, 0)
        ORDER BY b DESC
    ) s
    WHERE ! FIND_IN_SET(m, @n)
) s) crc, ROUND(UNIX_TIMESTAMP(SYSDATE(6)) - @s, 6) duration;
+-------------------+-------------+----------+
| s                 | crc         | duration |
+-------------------+-------------+----------+
| 1588935537.051649 | 11110126475 | 0.152198 |
+-------------------+-------------+----------+

On my machine (ordinary 4-core Linux workstation), I get 0.21s for window functions, 0.12 for LATERAL, and the same 0.12 for the other query (the one using @n).

Plan for LATERAL:

explain format=tree SELECT * FROM t1     LEFT JOIN LATERAL <etc>

| -> Nested loop left join
    -> Invalidate materialized tables (row from t1)  (cost=1.25 rows=10)
        -> Index scan on t1 using PRIMARY  (cost=1.25 rows=10)
    -> Table scan on dt
        -> Materialize (invalidate on row from t1)
            -> Limit: 1 row(s)
                -> Sort: t2.b DESC, t2.a DESC, limit input to 1 row(s) per chunk  (cost=1296.67 rows=61221)
                    -> Filter: (t2.a in (t1.m,0))
                        -> Index scan on t2 using PRIMARY

Plan for the query with @n:

explain format=tree SELECT *, @n:=CONCAT(@n,m,',') <etc>
 -> Filter: (0 = find_in_set(s.m,(@n)))
    -> Table scan on s
        -> Materialize
            -> Sort: t2.b DESC
                -> Stream results
                    -> Nested loop left join  (cost=61294.51 rows=612210)
                        -> Index scan on t1 using PRIMARY  (cost=1.25 rows=10)
                        -> Filter: (t2.a in (t1.m,0))  (cost=619.44 rows=61221)
                            -> Index range scan on t2 (re-planned for each iteration)  (cost=619.44 rows=61221)

There are 10 rows in t1, 66726 in t2.

If I add another index:
 alter table t2 add index new_index(b,a);
LATERAL goes down to 0.09s.

So, alas I'm not able to reproduce your performance problem: for me LATERAL performs as good as the original query and can even be made faster.
I suggest you explore the plans which are used by your queries, to see if they differ from mine, if they look reasonable, if adding some index could help...

On my end the old way still perform much faster 0.063758 vs best possible 0.117089 which is almost twice

now with a modification to the WINDOW over idea got better results vs Lateral but still the problem with Window the order by use filesort 
is there any way to create index what the WINDOW can use as order ?

screen shoot https://ibb.co/kG5KVFB

the db schema is the same as before with modified index as you suggested 

There are 10 rows in t1, 66,726 in t2.

CREATE TABLE `t1` (
  `m` int NOT NULL,
  PRIMARY KEY (`m`)
);

CREATE TABLE `t2` (
  `a` int NOT NULL,
  `b` bigint NOT NULL,
  PRIMARY KEY (`b`,`a`)
);

#mysql 8.0.20 defaults with lateral 0.138540
SET sql_mode = "only_full_group_by"; 
SET optimizer_switch = "block_nested_loop=on,derived_merge=on"; 
SELECT a, 
	b 
FROM test.t1 
	LEFT JOIN LATERAL (
		SELECT * 
		FROM test.t2 
		WHERE t2.a = t1.m 
		ORDER BY b DESC, 
			a DESC 
		LIMIT 1
	) AS dt ON true 
ORDER BY a, 
	b

#mysql 8.0.20 defaults with window 0.117089
SET sql_mode = "only_full_group_by"; 
SET optimizer_switch = "block_nested_loop=on,derived_merge=on"; 
SELECT a, 
	b 
FROM (
		SELECT *, 
			ROW_NUMBER() OVER (
				PARTITION BY a 
				ORDER BY b DESC, 
					a DESC
			) rn 
		FROM test.t2
	) dt 
WHERE rn = 1 
ORDER BY a, 
	b

#mysql 8.0.20 modification to work the old way 0.063758
SET sql_mode = ""; 
SET optimizer_switch = "block_nested_loop=off,derived_merge=off"; 
SELECT a, 
	b 
FROM (
		SELECT * 
		FROM test.t2 
		ORDER BY b DESC, 
			a DESC
	) t 
GROUP BY a 
ORDER BY a, 
	b

#mysql 8.0.20 modified with window 0.167144
SET sql_mode = ""; 
SET optimizer_switch = "block_nested_loop=on,derived_merge=on"; 
SELECT a, 
	b 
FROM (
		SELECT FIRST_VALUE(t2.a) OVER w AS a, 
			FIRST_VALUE(t2.b) OVER w AS b 
		FROM test.t2 WINDOW w as (
				PARTITION BY a 
				ORDER BY b DESC, 
					a DESC
			)
	) dt 
GROUP BY a 
ORDER BY a, 
	b

If I'm not mistaken, in the queries in your last post, only the one with LATERAL mentions t1 and t2; others mention only t2. So it's impossible to compare the performance of the LATERAL one with that of others.