Bug #120275 DATE Index Lookup Loses Existing JOIN Matches Even with Explicit CAST(MEDIUMTEXT AS DATE), Causing S1/S2 Divergence
Submitted: 16 Apr 7:10 Modified: 20 Apr 6:50
Reporter: Y F Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S2 (Serious)
Version:8.0.45 OS:Ubuntu (Ubuntu 20.04.4 LTS)
Assigned to: CPU Architecture:x86 (Intel(R) Xeon(R) Platinum 8358P @ 2.60GHz (64 Cores, 96MB L3 Cache))

[16 Apr 7:10] Y F
Description:
This issue can be described using two temporal states of the same tables.

In the baseline state S1, after creating the tables and inserting a small
amount of data, the query returns 13 rows:

------------------------------------
mysql> SELECT 'S1' AS stage, COUNT(*) AS cnt
    -> FROM s1_result;
+-------+-----+
| stage | cnt |
+-------+-----+
| S1    | 13  |
+-------+-----+
1 row in set
------------------------------------

Then I transform S1 into S2 by inserting 64 additional rows into the DATE
table, followed by ANALYZE TABLE, and execute the same query again. In S2,
the query returns 49 rows:

------------------------------------
mysql> SELECT 'S2' AS stage, COUNT(*) AS cnt
    -> FROM s2_result;
+-------+-----+
| stage | cnt |
+-------+-----+
| S2    | 49  |
+-------+-----+
1 row in set
------------------------------------

However, because S2 is obtained from S1 by append-only inserts on the left
side of the JOIN, every row already present in S1 should still be present in
S2. That monotonicity property is violated: 12 rows from S1 disappear in S2.

------------------------------------
mysql> SELECT COUNT(*) AS missing_rows
    -> FROM s1_result s1
    -> LEFT JOIN s2_result s2
    ->   ON s1.k <=> s2.k
    ->  AND s1.mt <=> s2.mt
    -> WHERE s2.k IS NULL;
+--------------+
| missing_rows |
+--------------+
| 12           |
+--------------+
1 row in set
------------------------------------

The missing rows correspond to previously matched MEDIUMTEXT values such as:

------------------------------------
0t5e72qrwlv74pmf
gf_d5p9ccs
sample_h
sample_q
------------------------------------

This case is distinguishable from the earlier report Bug #120253 in an important way:

1. Forcing a non-index plan with `IGNORE INDEX (idx_d)` restores correctness.
2. Rewriting the JOIN as `m.d = CAST(r.mt AS DATE)` does not restore
   correctness.

Observed summary:

------------------------------------
COUNT(S1)                     = 13
COUNT(S2)                     = 49
MISSING(S1 -> S2)             = 12
COUNT(S2_force_no_index)      = 125
MISSING(S1 -> S2_force_no_index) = 0
COUNT(S2_explicit_cast)       = 49
MISSING(S1 -> S2_explicit_cast)  = 12
------------------------------------

This suggests a wrong-result bug in the DATE index-lookup path for
text-to-DATE comparisons that is broader than, or different from, the
implicit-CAST omission described in the earlier report. Here, even an
explicit `CAST(... AS DATE)` still produces the wrong result under the
index-lookup plan.

How to repeat:
SET SESSION sql_mode = '';
SET SESSION cte_max_recursion_depth = 1000;

DROP TABLE IF EXISTS t_main;
DROP TABLE IF EXISTS t_ref;
DROP TABLE IF EXISTS s1_result;
DROP TABLE IF EXISTS s2_result;
DROP TABLE IF EXISTS s2_no_index_result;
DROP TABLE IF EXISTS s2_cast_result;

CREATE TABLE t_main (
    id INT NOT NULL AUTO_INCREMENT,
    k  INT NULL,
    d  DATE NOT NULL,
    PRIMARY KEY (id),
    KEY idx_d (d)
);

CREATE TABLE t_ref (
    id INT NOT NULL AUTO_INCREMENT,
    mt MEDIUMTEXT NULL,
    f  ENUM('value1','value2','value3') NULL,
    PRIMARY KEY (id)
);

-- S1: baseline state
INSERT INTO t_main (k, d) VALUES
(-1,   '0000-00-00'),
( 3,   '0000-00-00'),
(-3,   '0000-00-00'),
(-598, '2023-01-01');

INSERT INTO t_ref (mt, f) VALUES
('2023-01-01 00:00:00', 'value1'),
('sample_h',            'value3'),
('sample_q',            'value3'),
('gf_d5p9ccs',          'value3'),
('0t5e72qrwlv74pmf',    'value2');

ANALYZE TABLE t_main;
ANALYZE TABLE t_ref;

CREATE TABLE s1_result AS
SELECT m.k, r.mt
FROM t_main AS m
JOIN t_ref  AS r
  ON m.d = r.mt
WHERE r.f >= 'val51';

SELECT 'S1' AS stage, COUNT(*) AS cnt
FROM s1_result;

-- S2: append-only expansion of S1
INSERT INTO t_main (k, d)
WITH RECURSIVE seq(n) AS (
    SELECT 1
    UNION ALL
    SELECT n + 1 FROM seq WHERE n < 64
)
SELECT
    CASE MOD(n, 5)
        WHEN 0 THEN -3
        WHEN 1 THEN -1
        WHEN 2 THEN 3
        WHEN 3 THEN -598
        ELSE 166
    END,
    CASE
        WHEN MOD(n, 4) = 0 THEN '0000-00-00'
        ELSE '2023-01-01'
    END
FROM seq;

ANALYZE TABLE t_main;

CREATE TABLE s2_result AS
SELECT m.k, r.mt
FROM t_main AS m
JOIN t_ref  AS r
  ON m.d = r.mt
WHERE r.f >= 'val51';

CREATE TABLE s2_no_index_result AS
SELECT m.k, r.mt
FROM t_main AS m IGNORE INDEX (idx_d)
JOIN t_ref  AS r
  ON m.d = r.mt
WHERE r.f >= 'val51';

CREATE TABLE s2_cast_result AS
SELECT m.k, r.mt
FROM t_main AS m
JOIN t_ref  AS r
  ON m.d = CAST(r.mt AS DATE)
WHERE r.f >= 'val51';

SELECT 'S2' AS stage, COUNT(*) AS cnt
FROM s2_result;

SELECT 'S2_force_no_index' AS stage, COUNT(*) AS cnt
FROM s2_no_index_result;

SELECT 'S2_explicit_cast' AS stage, COUNT(*) AS cnt
FROM s2_cast_result;

SELECT COUNT(*) AS missing_rows
FROM s1_result s1
LEFT JOIN s2_result s2
  ON s1.k  <=> s2.k
 AND s1.mt <=> s2.mt
WHERE s2.k IS NULL;

SELECT COUNT(*) AS missing_rows
FROM s1_result s1
LEFT JOIN s2_no_index_result s2
  ON s1.k  <=> s2.k
 AND s1.mt <=> s2.mt
WHERE s2.k IS NULL;

SELECT COUNT(*) AS missing_rows
FROM s1_result s1
LEFT JOIN s2_cast_result s2
  ON s1.k  <=> s2.k
 AND s1.mt <=> s2.mt
WHERE s2.k IS NULL;

SELECT DISTINCT s1.mt AS missing_text_value
FROM s1_result s1
LEFT JOIN s2_result s2
  ON s1.k  <=> s2.k
 AND s1.mt <=> s2.mt
WHERE s2.k IS NULL
ORDER BY missing_text_value;

-- Optional diagnostics
EXPLAIN
SELECT m.k, r.mt
FROM t_main AS m
JOIN t_ref  AS r
  ON m.d = r.mt
WHERE r.f >= 'val51';

EXPLAIN
SELECT m.k, r.mt
FROM t_main AS m IGNORE INDEX (idx_d)
JOIN t_ref  AS r
  ON m.d = r.mt
WHERE r.f >= 'val51';

EXPLAIN
SELECT m.k, r.mt
FROM t_main AS m
JOIN t_ref  AS r
  ON m.d = CAST(r.mt AS DATE)
WHERE r.f >= 'val51';
[20 Apr 6:50] Roy Lyseng
Thank you for the bug report.
Verified as described.