-- Minimized and amplified reproducer for MySQL DISTINCT early-stop / unused-table duplicate expansion
-- Original query shape:
--   SELECT DISTINCTROW t2.c2, t1.c1
--   FROM t2, t1 STRAIGHT_JOIN t0 ON (NOT(t0.c0));
--
-- Key idea:
--   t0 is not used in the DISTINCT output.  Matching t0 rows only duplicate
--   the same (t2.c2, t1.c1) keys.  A good plan should be able to stop after
--   finding one matching t0 row, but a bad plan may expand all t0 matches and
--   deduplicate later.

DROP DATABASE IF EXISTS mysql_distinct_early_stop_min;
CREATE DATABASE mysql_distinct_early_stop_min;
USE mysql_distinct_early_stop_min;

-- Tuning knobs.
-- Expected final DISTINCT rows = @t1_rows * @t2_rows.
-- A non-early-stop plan may generate roughly:
--   @t0_zero_rows * @t1_rows * @t2_rows
-- rows before temporary-table deduplication.
SET @t0_zero_rows    := 200;  -- amplification factor: matching rows for NOT(t0.c0)
SET @t0_nonzero_rows := 20;   -- non-matching rows, prevents degenerate single-value table
SET @t1_rows         := 600;
SET @t2_rows         := 600;

CREATE TABLE t0(
  c0 BIGINT
) ENGINE=InnoDB;

CREATE TABLE t1(
  c1 BIGINT
) ENGINE=InnoDB;

CREATE TABLE t2(
  c2 DOUBLE
) ENGINE=InnoDB;

-- Use a normal helper table, not a TEMPORARY table.
-- MySQL can raise "Can't reopen table" when the same temporary table is
-- referenced multiple times in one query.
CREATE TABLE digits_helper(
  d INT NOT NULL PRIMARY KEY
) ENGINE=InnoDB;

INSERT INTO digits_helper(d) VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

CREATE TABLE seq_helper(
  n INT NOT NULL PRIMARY KEY
) ENGINE=InnoDB;

INSERT INTO seq_helper(n)
SELECT ones.d + 10 * tens.d + 100 * hundreds.d + 1000 * thousands.d + 1 AS n
FROM digits_helper AS ones
CROSS JOIN digits_helper AS tens
CROSS JOIN digits_helper AS hundreds
CROSS JOIN digits_helper AS thousands
WHERE ones.d + 10 * tens.d + 100 * hundreds.d + 1000 * thousands.d + 1 <= 10000;

-- t0: many rows where NOT(c0) is true, i.e. c0 = 0.
-- These rows do not affect the DISTINCT key and should be collapsible to an
-- existence check / inner LIMIT 1.
INSERT INTO t0(c0)
SELECT 0
FROM seq_helper
WHERE n <= @t0_zero_rows;

-- Add non-matching rows: NOT(c0) is false for nonzero c0.
INSERT INTO t0(c0)
SELECT 1 + (n % 9)
FROM seq_helper
WHERE n <= @t0_nonzero_rows;

-- t1 and t2 have unique projected values, so the final DISTINCT cardinality is
-- exactly @t1_rows * @t2_rows.
INSERT INTO t1(c1)
SELECT 300000 + 3 * n
FROM seq_helper
WHERE n <= @t1_rows;

INSERT INTO t2(c2)
SELECT CAST(200000 + 3 * n AS DOUBLE)
FROM seq_helper
WHERE n <= @t2_rows;

-- Keep only the three relevant indexes observed in the original case.
CREATE INDEX i0 ON t0(c0);
CREATE INDEX i4 ON t1(c1);
CREATE INDEX i7 ON t2(c2);

SET SESSION information_schema_stats_expiry = 0;
SET SESSION optimizer_prune_level = 0;
ANALYZE TABLE t0, t1, t2;

SELECT
  (SELECT COUNT(*) FROM t0 WHERE NOT c0) AS t0_matching_rows,
  (SELECT COUNT(*) FROM t1) AS t1_rows,
  (SELECT COUNT(*) FROM t2) AS t2_rows,
  (SELECT COUNT(*) FROM t1) * (SELECT COUNT(*) FROM t2) AS expected_distinct_rows,
  (SELECT COUNT(*) FROM t0 WHERE NOT c0) * (SELECT COUNT(*) FROM t1) * (SELECT COUNT(*) FROM t2) AS duplicate_expanded_rows;

-- Baseline query: the potentially bad plan may expand all matching t0 rows
-- before temporary-table deduplication.
EXPLAIN ANALYZE
SELECT DISTINCTROW t2.c2 AS ref0, t1.c1 AS ref1
FROM t2, t1 STRAIGHT_JOIN t0 ON (NOT (t0.c0));

-- Hinted query from your tool.  In the interesting plan, look for:
--   Limit: 1 row(s)
-- on the t0 lookup.
EXPLAIN ANALYZE
SELECT /*+ JOIN_ORDER(t2, t1) */ DISTINCTROW t2.c2 AS ref0, t1.c1 AS ref1
FROM t2, t1 STRAIGHT_JOIN t0 ON (NOT (t0.c0));

-- Semantic rewrite: t0 only proves existence and does not contribute to the
-- DISTINCT key.
EXPLAIN ANALYZE
SELECT DISTINCTROW t2.c2 AS ref0, t1.c1 AS ref1
FROM t2, t1
WHERE EXISTS (
  SELECT 1 FROM t0 WHERE NOT t0.c0
);

-- Optional cleanup of helper tables if you want the database to contain only
-- the three reproducer tables after loading.  Leave commented if you want to
-- inspect the generated data.
-- DROP TABLE seq_helper;
-- DROP TABLE digits_helper;