Bug #120254 Equivalent queries show different behavior due to short-circuit optimization failure
Submitted: 13 Apr 10:04 Modified: 14 Apr 8:48
Reporter: Guo Yuxiao Email Updates:
Status: Not a Bug Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S1 (Critical)
Version:9.6.0 OS:Any
Assigned to: CPU Architecture:Any

[13 Apr 10:04] Guo Yuxiao
Description:
Two equivalent queries exhibit different execution behaviors due to an issue with short circuit optimization.

I have the following two queries:

-- queryA
WITH q AS (
SELECT
  t_lhs.c0 AS _col_0,
  t_rhs.c0 AS _col_1
FROM t1 AS t_lhs
LEFT JOIN t1 AS t_rhs
  ON (
    JSON_CONTAINS(JSON_OBJECT('k', t_rhs.c0), JSON_OBJECT('k', JSON_VALID('ebCUQ7d'))) <=> JSON_LENGTH('gi4G3pphe')
  )
)
SELECT * FROM q WHERE FALSE;

-- queryB
WITH q AS (
SELECT
  t_lhs.c0 AS _col_0,
  t_rhs.c0 AS _col_1
FROM t1 AS t_lhs
LEFT JOIN t1 AS t_rhs
  ON (
    JSON_CONTAINS(JSON_OBJECT('k', t_rhs.c0), JSON_OBJECT('k', JSON_VALID('ebCUQ7d'))) <=> JSON_LENGTH('gi4G3pphe')
  )
)

SELECT
  t2._col_2 AS _col_2
FROM (
  SELECT
    t_branch.c0 AS _col_2
  FROM (
    SELECT
      q._col_0 AS c0,
      q._col_1 AS _col_5
    from q
    WHERE
      (q._col_0 >= q._col_1) IN (
        ('b' AND q._col_0) IN (
          CAST(67.23 AS DECIMAL(65, 30)) - CAST(q._col_1 AS DECIMAL(65, 30))
        ),
        q._col_0,
        NOT q._col_0
      )

    UNION ALL

    SELECT
      q._col_0 AS c0,
      q._col_1 AS _col_5
    from q
    WHERE
      NOT (
        (q._col_0 >= q._col_1) IN (
          ('b' AND q._col_0) IN (
            CAST(67.23 AS DECIMAL(65, 30)) - CAST(q._col_1 AS DECIMAL(65, 30))
          ),
          q._col_0,
          NOT q._col_0
        )
      )
  ) AS t_branch

  UNION ALL

  SELECT
    q._col_0 AS _col_2
  from q
  WHERE
    (
      (q._col_0 >= q._col_1) IN (
        ('b' AND q._col_0) IN (
          CAST(67.23 AS DECIMAL(65, 30)) - CAST(q._col_1 AS DECIMAL(65, 30))
        ),
        q._col_0,
        NOT q._col_0
      )
    ) IS NULL
) AS t2
WHERE FALSE;

The structure of queryB is:

SELECT ...
(
  SELECT ... FROM q WHERE P
  UNION ALL
  SELECT ... FROM q WHERE NOT P
  UNION ALL
  SELECT ... FROM q WHERE P IS NULL
)
WHERE FALSE

where P is:

(q._col_0 >= q._col_1) IN (
  ('b' AND q._col_0) IN (
    CAST(67.23 AS DECIMAL(65, 30)) - CAST(q._col_1 AS DECIMAL(65, 30))
  ),
  q._col_0,
  NOT q._col_0
)

The results of queryA and queryB are as follows:

-- result of queryA
Empty Set

-- result of queryB
ERROR 3141 (22032): Invalid JSON text in argument 1 to function json_length: "Invalid value." at position 0.

It can be seen that the short circuit optimization fails for queryB.

How to repeat:
-- create table
DROP TABLE IF EXISTS t0;
CREATE TABLE t0 ( c0 float unsigned  DEFAULT NULL COMMENT 'asdf', UNIQUE KEY c0 (c0) ) CHARSET=utf8mb4;
INSERT INTO t0 VALUES (NULL),(000000000000),(0000.0510917),(00000.082687),(00000.167716),(000000.19299),(00000.246631),(00000.459718),(00000.644608),(001397980000);
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 ( c0 float unsigned DEFAULT NULL, UNIQUE KEY c0 (c0) ) CHARSET=utf8mb4;
INSERT INTO t1 VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL);

-- queryA
WITH q AS (
SELECT
  t_lhs.c0 AS _col_0,
  t_rhs.c0 AS _col_1
FROM t1 AS t_lhs
LEFT JOIN t1 AS t_rhs
  ON (
    JSON_CONTAINS(JSON_OBJECT('k', t_rhs.c0), JSON_OBJECT('k', JSON_VALID('ebCUQ7d'))) <=> JSON_LENGTH('gi4G3pphe')
  )
)
SELECT * FROM q WHERE FALSE;

-- queryB
WITH q AS (
SELECT
  t_lhs.c0 AS _col_0,
  t_rhs.c0 AS _col_1
FROM t1 AS t_lhs
LEFT JOIN t1 AS t_rhs
  ON (
    JSON_CONTAINS(JSON_OBJECT('k', t_rhs.c0), JSON_OBJECT('k', JSON_VALID('ebCUQ7d'))) <=> JSON_LENGTH('gi4G3pphe')
  )
)

SELECT
  t2._col_2 AS _col_2
FROM (
  SELECT
    t_branch.c0 AS _col_2
  FROM (
    SELECT
      q._col_0 AS c0,
      q._col_1 AS _col_5
    from q
    WHERE
      (q._col_0 >= q._col_1) IN (
        ('b' AND q._col_0) IN (
          CAST(67.23 AS DECIMAL(65, 30)) - CAST(q._col_1 AS DECIMAL(65, 30))
        ),
        q._col_0,
        NOT q._col_0
      )

    UNION ALL

    SELECT
      q._col_0 AS c0,
      q._col_1 AS _col_5
    from q
    WHERE
      NOT (
        (q._col_0 >= q._col_1) IN (
          ('b' AND q._col_0) IN (
            CAST(67.23 AS DECIMAL(65, 30)) - CAST(q._col_1 AS DECIMAL(65, 30))
          ),
          q._col_0,
          NOT q._col_0
        )
      )
  ) AS t_branch

  UNION ALL

  SELECT
    q._col_0 AS _col_2
  from q
  WHERE
    (
      (q._col_0 >= q._col_1) IN (
        ('b' AND q._col_0) IN (
          CAST(67.23 AS DECIMAL(65, 30)) - CAST(q._col_1 AS DECIMAL(65, 30))
        ),
        q._col_0,
        NOT q._col_0
      )
    ) IS NULL
) AS t2
WHERE FALSE;
[14 Apr 8:48] Roy Lyseng
Thank you for the bu report.
However, we do not think this is a bug.
The optimizer is free to short circuit evaluations in some cases but not in others.