-- Minimized and amplified reproducer for MySQL DISTINCT early-stop / unused-table duplicate expansion -- Original query shape: -- SELECT DISTINCTROW t2.c2, t1.c1 -- FROM t2, t1 STRAIGHT_JOIN t0 ON (NOT(t0.c0)); -- -- Key idea: -- t0 is not used in the DISTINCT output. Matching t0 rows only duplicate -- the same (t2.c2, t1.c1) keys. A good plan should be able to stop after -- finding one matching t0 row, but a bad plan may expand all t0 matches and -- deduplicate later. DROP DATABASE IF EXISTS mysql_distinct_early_stop_min; CREATE DATABASE mysql_distinct_early_stop_min; USE mysql_distinct_early_stop_min; -- Tuning knobs. -- Expected final DISTINCT rows = @t1_rows * @t2_rows. -- A non-early-stop plan may generate roughly: -- @t0_zero_rows * @t1_rows * @t2_rows -- rows before temporary-table deduplication. SET @t0_zero_rows := 200; -- amplification factor: matching rows for NOT(t0.c0) SET @t0_nonzero_rows := 20; -- non-matching rows, prevents degenerate single-value table SET @t1_rows := 600; SET @t2_rows := 600; CREATE TABLE t0( c0 BIGINT ) ENGINE=InnoDB; CREATE TABLE t1( c1 BIGINT ) ENGINE=InnoDB; CREATE TABLE t2( c2 DOUBLE ) ENGINE=InnoDB; -- Use a normal helper table, not a TEMPORARY table. -- MySQL can raise "Can't reopen table" when the same temporary table is -- referenced multiple times in one query. CREATE TABLE digits_helper( d INT NOT NULL PRIMARY KEY ) ENGINE=InnoDB; INSERT INTO digits_helper(d) VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); CREATE TABLE seq_helper( n INT NOT NULL PRIMARY KEY ) ENGINE=InnoDB; INSERT INTO seq_helper(n) SELECT ones.d + 10 * tens.d + 100 * hundreds.d + 1000 * thousands.d + 1 AS n FROM digits_helper AS ones CROSS JOIN digits_helper AS tens CROSS JOIN digits_helper AS hundreds CROSS JOIN digits_helper AS thousands WHERE ones.d + 10 * tens.d + 100 * hundreds.d + 1000 * thousands.d + 1 <= 10000; -- t0: many rows where NOT(c0) is true, i.e. c0 = 0. -- These rows do not affect the DISTINCT key and should be collapsible to an -- existence check / inner LIMIT 1. INSERT INTO t0(c0) SELECT 0 FROM seq_helper WHERE n <= @t0_zero_rows; -- Add non-matching rows: NOT(c0) is false for nonzero c0. INSERT INTO t0(c0) SELECT 1 + (n % 9) FROM seq_helper WHERE n <= @t0_nonzero_rows; -- t1 and t2 have unique projected values, so the final DISTINCT cardinality is -- exactly @t1_rows * @t2_rows. INSERT INTO t1(c1) SELECT 300000 + 3 * n FROM seq_helper WHERE n <= @t1_rows; INSERT INTO t2(c2) SELECT CAST(200000 + 3 * n AS DOUBLE) FROM seq_helper WHERE n <= @t2_rows; -- Keep only the three relevant indexes observed in the original case. CREATE INDEX i0 ON t0(c0); CREATE INDEX i4 ON t1(c1); CREATE INDEX i7 ON t2(c2); SET SESSION information_schema_stats_expiry = 0; SET SESSION optimizer_prune_level = 0; ANALYZE TABLE t0, t1, t2; SELECT (SELECT COUNT(*) FROM t0 WHERE NOT c0) AS t0_matching_rows, (SELECT COUNT(*) FROM t1) AS t1_rows, (SELECT COUNT(*) FROM t2) AS t2_rows, (SELECT COUNT(*) FROM t1) * (SELECT COUNT(*) FROM t2) AS expected_distinct_rows, (SELECT COUNT(*) FROM t0 WHERE NOT c0) * (SELECT COUNT(*) FROM t1) * (SELECT COUNT(*) FROM t2) AS duplicate_expanded_rows; -- Baseline query: the potentially bad plan may expand all matching t0 rows -- before temporary-table deduplication. EXPLAIN ANALYZE SELECT DISTINCTROW t2.c2 AS ref0, t1.c1 AS ref1 FROM t2, t1 STRAIGHT_JOIN t0 ON (NOT (t0.c0)); -- Hinted query from your tool. In the interesting plan, look for: -- Limit: 1 row(s) -- on the t0 lookup. EXPLAIN ANALYZE SELECT /*+ JOIN_ORDER(t2, t1) */ DISTINCTROW t2.c2 AS ref0, t1.c1 AS ref1 FROM t2, t1 STRAIGHT_JOIN t0 ON (NOT (t0.c0)); -- Semantic rewrite: t0 only proves existence and does not contribute to the -- DISTINCT key. EXPLAIN ANALYZE SELECT DISTINCTROW t2.c2 AS ref0, t1.c1 AS ref1 FROM t2, t1 WHERE EXISTS ( SELECT 1 FROM t0 WHERE NOT t0.c0 ); -- Optional cleanup of helper tables if you want the database to contain only -- the three reproducer tables after loading. Leave commented if you want to -- inspect the generated data. -- DROP TABLE seq_helper; -- DROP TABLE digits_helper;