Bug #99499 Incorrect result when constant equailty expression is used in LEF JOIN condition
Submitted: 9 May 2020 22:13 Modified: 12 May 2020 8:48
Reporter: Marcos Albe (OCA) Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.7.29 OS:Any
Assigned to: CPU Architecture:Any

[9 May 2020 22:13] Marcos Albe
Description:
Adding a constant equality expression to a LEFT JOIN's ON condition can lead to bogus results; Will attach outputs.

How to repeat:
Here's how to reproduce:

CREATE TABLE local (
       id bigint(20) NOT NULL AUTO_INCREMENT,
  base_id int(11),
  
  PRIMARY KEY (id),
  KEY local_m1 (base_id)
) ENGINE=InnoDB;

CREATE TABLE intersection (
   base_id int(11) NOT NULL,
  child_id int(11) NOT NULL,
  
  UNIQUE KEY intersection_u1 (base_id,child_id),
  KEY intersection_m1 (child_id)
) ENGINE=InnoDB;

CREATE TABLE base (
  id int(11) NOT NULL AUTO_INCREMENT,
  PRIMARY KEY (id)
) ENGINE=InnoDB;

INSERT INTO local VALUES (1, null);
INSERT INTO intersection VALUES (1, 1);
INSERT INTO base VALUES (1);

-- Original problem query
   SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   
     FROM local AS l 
LEFT JOIN intersection AS i
       ON l.base_id = i.child_id 
LEFT JOIN base AS b
       ON b.id = i.base_id
      AND b.id = 1 
    WHERE l.id = 1;

-- All queries below are the same as above, except for the WHERE condition. Some will show bogus value "1" for b.id in the results.
SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id = 1 OR l.id = 1;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id = 0 OR l.id = 1;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id = 1 OR l.id = 0;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id = 1 AND l.id = 1;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id IN (1);
    
SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id IN (1,1);

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id IN (0,1);

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id
WHERE l.id IN (1,0);

-- "Fixed" query which drops the constant expression 'AND b.id = 1' in the ON for "base" table LEFT JOIN 
   SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   
     FROM local AS l 
LEFT JOIN intersection AS i
       ON l.base_id = i.child_id 
LEFT JOIN base AS b
       ON b.id = i.base_id
    WHERE l.id = 1;

-- None of these produces the error
SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id = 1 OR l.id = 1;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id = 0 OR l.id = 1;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id = 1 OR l.id = 0;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id = 1 AND l.id = 1;

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id IN (1);
    
SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id IN (1,1);

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id IN (0,1);

SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = i.base_id
WHERE l.id IN (1,0);

/*
I also flipped optimizer_switch with this bash snippet to prove they have no effect
#!/bin/bash
switches="index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,mrr=off,mrr_cost_based=off,block_nested_loop=off,batched_key_access=on,materialization=off,semijoin=off,loosescan=off,firstmatch=off,duplicateweedout=off,subquery_materialization_cost_based=off,use_index_extensions=off,condition_fanout_filter=off,derived_merge=off"
sql="SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   FROM local AS l LEFT JOIN intersection AS i ON l.base_id = i.child_id LEFT JOIN base AS b ON b.id = 1 AND b.id = i.base_id WHERE l.id = 1 OR l.id = 1";

tr "," "\n" <<< $switches |while read switch; do {
  echo -e "==========================\n${switch}\n=========================";
  ./use -BNe "SET optimizer_switch=\"${switch}\"; ${sql};" test;
} done
*/

Suggested fix:
Honor the AND in the ON condition for the LEFT JOIN which should prevent the row from showing up.
[9 May 2020 22:14] Marcos Albe
Test outputs on my instance

Attachment: test.output (application/octet-stream, text), 15.89 KiB.

[9 May 2020 22:14] Marcos Albe
Test script

Attachment: test.sql (application/sql, text), 5.88 KiB.

[9 May 2020 22:20] Marcos Albe
As a curiosity: it happens with any WHERE condition if you use MyISAM

Attachment: test.myisam.output (application/octet-stream, text), 16.35 KiB.

[9 May 2020 22:23] Marcos Albe
It does not affect 8.0.19
[10 May 2020 12:08] MySQL Verification Team
Thank you for the bug report. Please print here the offended query, real result and expected result. Thanks.
[12 May 2020 2:40] Marcos Albe
Hello Miguel,
The problem query is the following:

   SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   
     FROM local AS l 
LEFT JOIN intersection AS i
       ON l.base_id = i.child_id 
LEFT JOIN base AS b
       ON b.id = i.base_id
      AND b.id = 1 
    WHERE l.id = 1;

mysql- [localhost:5730] {msandbox} (test) >    SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*   
    ->      FROM local AS l 
    -> LEFT JOIN intersection AS i
    ->        ON l.base_id = i.child_id 
    -> LEFT JOIN base AS b
    ->        ON b.id = i.base_id
    ->       AND b.id = 1 
    ->     WHERE l.id = 1;
+----------+----+---------+-----------------+---------+----------+---------+------+
| t1       | id | base_id | t2              | base_id | child_id | t3      | id   |
+----------+----+---------+-----------------+---------+----------+---------+------+
| local -> |  1 |    NULL | intersection -> |    NULL |     NULL | base -> |    1 |
+----------+----+---------+-----------------+---------+----------+---------+------+

The expected output would be 
+----------+----+---------+-----------------+---------+----------+---------+------+
| t1       | id | base_id | t2              | base_id | child_id | t3      | id   |
+----------+----+---------+-----------------+---------+----------+---------+------+
| local -> |  1 |    NULL | intersection -> |    NULL |     NULL | base -> | NULL |
+----------+----+---------+-----------------+---------+----------+---------+------+

The "base" table has no matching tuple, so this condition does not hold true:

       ON b.id = i.base_id
      AND b.id = 1 

If  b.id would be equal to i.base_id because both are NULL, then b.id would not be equal to 1... It would be NULL.
[12 May 2020 5:19] Nikolai Ikhalainen
docker run -d --name m5647 -e MYSQL_ROOT_PASSWORD=secret mysql/mysql-server:5.6.47
docker exec -it m5647 mysql -uroot -psecret test

Execute following sql:
CREATE TABLE b (id int(11) NOT NULL AUTO_INCREMENT, PRIMARY KEY (id)) ENGINE=InnoDB;
CREATE TABLE i (b_id int(11) NOT NULL, c_id int(11) NOT NULL, UNIQUE KEY i_u1 (b_id,c_id), KEY i_m1 (c_id)) ENGINE=InnoDB;
CREATE TABLE l (id bigint(20) NOT NULL AUTO_INCREMENT, b_id int(11), PRIMARY KEY (id), KEY l_m1 (b_id)) ENGINE=InnoDB;

insert into b values (1);
insert into i values (1,1);
insert into l values (1,null);

select l.id, b.id from l left join i on (l.b_id = i.c_id) left join b on (b.id = 1 and b.id = i.b_id) where l.id in (1);
select l.id, b.id from l left join i ignore key (i_m1,i_u1) on (l.b_id = i.c_id) left join b on (b.id = 1 and b.id = i.b_id) where l.id in (1);

As you can see exactly the same query (the only difference is index hints, returns different results:
mysql> select l.id, b.id from l left join i on (l.b_id = i.c_id) left join b on (b.id = 1 and b.id = i.b_id) where l.id in (1);
+----+----+
| id | id |
+----+----+
|  1 |  1 |
+----+----+
1 row in set (0.00 sec)

mysql> select l.id, b.id from l left join i ignore key (i_m1,i_u1) on (l.b_id = i.c_id) left join b on (b.id = 1 and b.id = i.b_id) where l.id in (1);
+----+------+
| id | id   |
+----+------+
|  1 | NULL |
+----+------+
1 row in set (0.00 sec)

mysql> explain select l.id, b.id from l left join i on (l.b_id = i.c_id) left join b on (b.id = 1 and b.id = i.b_id) where l.id in (1);
+----+-------------+-------+-------+---------------+---------+---------+-------+------+-------------+
| id | select_type | table | type  | possible_keys | key     | key_len | ref   | rows | Extra       |
+----+-------------+-------+-------+---------------+---------+---------+-------+------+-------------+
|  1 | SIMPLE      | l     | const | PRIMARY       | PRIMARY | 8       | const |    1 | NULL        |
|  1 | SIMPLE      | i     | const | i_m1          | NULL    | NULL    | NULL  |    1 | Using index |
|  1 | SIMPLE      | b     | const | PRIMARY       | PRIMARY | 4       | const |    1 | Using index |
+----+-------------+-------+-------+---------------+---------+---------+-------+------+-------------+
3 rows in set (0.00 sec)

mysql> explain select l.id, b.id from l left join i ignore key (i_m1,i_u1) on (l.b_id = i.c_id) left join b on (b.id = 1 and b.id = i.b_id) where l.id in (1);
+----+-------------+-------+-------+---------------+---------+---------+-------+------+--------------------------+
| id | select_type | table | type  | possible_keys | key     | key_len | ref   | rows | Extra                    |
+----+-------------+-------+-------+---------------+---------+---------+-------+------+--------------------------+
|  1 | SIMPLE      | l     | const | PRIMARY       | PRIMARY | 8       | const |    1 | NULL                     |
|  1 | SIMPLE      | i     | ALL   | NULL          | NULL    | NULL    | NULL  |    1 | Using where              |
|  1 | SIMPLE      | b     | const | PRIMARY       | PRIMARY | 4       | const |    1 | Using where; Using index |
+----+-------------+-------+-------+---------------+---------+---------+-------+------+--------------------------+
3 rows in set (0.00 sec)
[12 May 2020 8:48] MySQL Verification Team
Hello Marcos,
Thank you for the feedback. Verified with version 5.7. Version 8.0 not affected:

Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 2
Server version: 5.7.31 Source distribution BUILT: 2020-MAY-01

Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 5.7 > CREATE DATABASE kk;
Query OK, 1 row affected (0,00 sec)

mysql 5.7 > USE kk;
Database changed
mysql 5.7 > CREATE TABLE local (
    ->        id bigint(20) NOT NULL AUTO_INCREMENT,
    ->   base_id int(11),
    ->
    ->   PRIMARY KEY (id),
    ->   KEY local_m1 (base_id)
    -> ) ENGINE=InnoDB;
Query OK, 0 rows affected (0,30 sec)

mysql 5.7 > CREATE TABLE intersection (
    ->    base_id int(11) NOT NULL,
    ->   child_id int(11) NOT NULL,
    ->
    ->   UNIQUE KEY intersection_u1 (base_id,child_id),
    ->   KEY intersection_m1 (child_id)
    -> ) ENGINE=InnoDB;
Query OK, 0 rows affected (0,30 sec)

mysql 5.7 > CREATE TABLE base (
    ->   id int(11) NOT NULL AUTO_INCREMENT,
    ->   PRIMARY KEY (id)
    -> ) ENGINE=InnoDB;
Query OK, 0 rows affected (0,27 sec)

mysql 5.7 > INSERT INTO local VALUES (1, null);
Query OK, 1 row affected (0,08 sec)

mysql 5.7 > INSERT INTO intersection VALUES (1, 1);
Query OK, 1 row affected (0,04 sec)

mysql 5.7 > INSERT INTO base VALUES (1);
Query OK, 1 row affected (0,05 sec)

mysql 5.7 >   SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*
    ->      FROM local AS l
    -> LEFT JOIN intersection AS i
    ->        ON l.base_id = i.child_id
    -> LEFT JOIN base AS b
    ->        ON b.id = i.base_id
    ->       AND b.id = 1
    ->     WHERE l.id = 1;
+----------+----+---------+-----------------+---------+----------+---------+------+
| t1       | id | base_id | t2              | base_id | child_id | t3      | id   |
+----------+----+---------+-----------------+---------+----------+---------+------+
| local -> |  1 |    NULL | intersection -> |    NULL |     NULL | base -> |    1 |
+----------+----+---------+-----------------+---------+----------+---------+------+
1 row in set (0,03 sec)

mysql 5.7 >

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 8
Server version: 8.0.21 Source distribution BUILT: 2020-MAY-01

Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql 8.0 > CREATE DATABASE kk;
Query OK, 1 row affected (0,14 sec)

mysql 8.0 > USE kk;
Database changed
mysql 8.0 > CREATE TABLE local (
    ->        id bigint(20) NOT NULL AUTO_INCREMENT,
    ->   base_id int(11),
    ->
    ->   PRIMARY KEY (id),
    ->   KEY local_m1 (base_id)
    -> ) ENGINE=InnoDB;
Query OK, 0 rows affected, 2 warnings (0,88 sec)

mysql 8.0 > CREATE TABLE intersection (
    ->    base_id int(11) NOT NULL,
    ->   child_id int(11) NOT NULL,
    ->
    ->   UNIQUE KEY intersection_u1 (base_id,child_id),
    ->   KEY intersection_m1 (child_id)
    -> ) ENGINE=InnoDB;
Query OK, 0 rows affected, 2 warnings (1,82 sec)

mysql 8.0 > CREATE TABLE base (
    ->   id int(11) NOT NULL AUTO_INCREMENT,
    ->   PRIMARY KEY (id)
    -> ) ENGINE=InnoDB;
Query OK, 0 rows affected, 1 warning (0,68 sec)

mysql 8.0 > INSERT INTO local VALUES (1, null);
Query OK, 1 row affected (0,12 sec)

mysql 8.0 > INSERT INTO intersection VALUES (1, 1);
Query OK, 1 row affected (0,13 sec)

mysql 8.0 > INSERT INTO base VALUES (1);
Query OK, 1 row affected (0,10 sec)

mysql 8.0 >   SELECT 'local ->' as 't1',l.*, 'intersection ->' as 't2', i.*, 'base ->' as 't3', b.*
    ->      FROM local AS l
    -> LEFT JOIN intersection AS i
    ->        ON l.base_id = i.child_id
    -> LEFT JOIN base AS b
    ->        ON b.id = i.base_id
    ->       AND b.id = 1
    ->     WHERE l.id = 1;
+----------+----+---------+-----------------+---------+----------+---------+------+
| t1       | id | base_id | t2              | base_id | child_id | t3      | id   |
+----------+----+---------+-----------------+---------+----------+---------+------+
| local -> |  1 |    NULL | intersection -> |    NULL |     NULL | base -> | NULL |
+----------+----+---------+-----------------+---------+----------+---------+------+
1 row in set (0,00 sec)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[25 May 2020 11:25] Marcelo Altmann
This has been fixed on 8.0.17 via https://github.com/mysql/mysql-server/commit/035fc0f6600253294834482fcad00050db55864c

The sql/sql_optimizer.cc and sql/sql_executor.cc changes apply directly to 5.7.
On 5.6 there is a slightly difference. The path will look like:

diff --git a/sql/sql_optimizer.cc b/sql/sql_optimizer.cc
index f654e5a04f2..d64205299ab 100644
--- a/sql/sql_optimizer.cc
+++ b/sql/sql_optimizer.cc
@@ -3509,6 +3509,7 @@ const_table_extraction_done:
           {
             s->type= JT_CONST;
             mark_as_null_row(table);
+           table->const_table= 1;
             join->found_const_table_map|= table->map;
            join->const_table_map|= table->map;
            set_position(join, const_count++, s, NULL);

In case you consider ^^ a contribution:

(*) I confirm the code being submitted is offered under the terms of the OCA, and that I am authorized to contribute it.