-- MySQL DISTINCT early-out / hash-join Cartesian-product DROP DATABASE IF EXISTS mysql971phd9039_scaled_v3; CREATE DATABASE mysql971phd9039_scaled_v3; USE mysql971phd9039_scaled_v3; SET SESSION information_schema_stats_expiry = 0; SET SESSION optimizer_prune_level = 0; SET @T0_ROWS := 2000; SET @T1_ROWS := 20000; SET @THRESHOLD_STR := '1000000'; CREATE TABLE t0( c0 BIGINT, c1 BIGINT, c2 DOUBLE, c3 VARCHAR(64), c4 BIGINT, c5 DOUBLE ) ENGINE=InnoDB; CREATE TABLE t1( c0 BIGINT, c1 BIGINT, c2 DOUBLE, c3 VARCHAR(64), c4 BIGINT ) ENGINE=InnoDB; CREATE TABLE seq(n INT NOT NULL PRIMARY KEY) ENGINE=InnoDB; INSERT INTO seq VALUES (0); INSERT INTO seq SELECT n + 1 FROM seq; INSERT INTO seq SELECT n + 2 FROM seq; INSERT INTO seq SELECT n + 4 FROM seq; INSERT INTO seq SELECT n + 8 FROM seq; INSERT INTO seq SELECT n + 16 FROM seq; INSERT INTO seq SELECT n + 32 FROM seq; INSERT INTO seq SELECT n + 64 FROM seq; INSERT INTO seq SELECT n + 128 FROM seq; INSERT INTO seq SELECT n + 256 FROM seq; INSERT INTO seq SELECT n + 512 FROM seq; INSERT INTO seq SELECT n + 1024 FROM seq; INSERT INTO seq SELECT n + 2048 FROM seq; INSERT INTO seq SELECT n + 4096 FROM seq; INSERT INTO seq SELECT n + 8192 FROM seq; INSERT INTO seq SELECT n + 16384 FROM seq; INSERT INTO seq SELECT n + 32768 FROM seq; SELECT COUNT(*) AS seq_count, MIN(n) AS seq_min, MAX(n) AS seq_max FROM seq; INSERT INTO t0(c0, c1, c2, c3, c4, c5) SELECT 1000000 + n AS c0, 10 + 2 * n AS c1, CAST((MOD(n * 1103515245 + 12345, 200001) - 100000) AS DOUBLE) AS c2, CONCAT('gaussian_0_3_', 250 + MOD(n * 17, 120)) AS c3, 5 * n AS c4, 1 + MOD(n * 31, 30) AS c5 FROM seq WHERE n < @T0_ROWS; INSERT INTO t1(c0, c1, c2, c3, c4) SELECT 2000000 + n AS c0, 3000000 + 3 * n AS c1, MOD(n, 5) AS c2, @THRESHOLD_STR AS c3, 7 * n AS c4 FROM seq WHERE n < @T1_ROWS; CREATE INDEX i0 ON t0(c0); CREATE INDEX i1 ON t0(c1); CREATE INDEX i2 ON t1(c0); CREATE INDEX i3 ON t1(c0, c1); ANALYZE TABLE t0, t1; SELECT @T0_ROWS AS t0_rows, @T1_ROWS AS t1_rows, @T0_ROWS * @T1_ROWS AS default_candidate_pairs, @THRESHOLD_STR AS threshold_str; SELECT COUNT(*) AS t0_count, MIN(c1) AS t0_min_c1, MAX(c1) AS t0_max_c1 FROM t0; SELECT COUNT(*) AS t1_count, MIN(c3) AS t1_min_c3, MAX(c3) AS t1_max_c3 FROM t1; SELECT COUNT(*) AS expected_distinct_rows FROM ( SELECT DISTINCTROW t1.c4 AS ref0, t1.c3 AS ref1, t1.c2 AS ref2 FROM t1, t0 WHERE (t0.c1) < (CAST(GREATEST(t1.c3, '0.8727017201037127') AS SIGNED)) ) AS q; SHOW WARNINGS LIMIT 5; -- Baseline. Bad plan if it appears: -- Inner hash join (no condition) -> huge Cartesian product -> filter -> temp dedup. EXPLAIN ANALYZE SELECT DISTINCTROW t1.c4 AS ref0, t1.c3 AS ref1, t1.c2 AS ref2 FROM t1, t0 WHERE (t0.c1) < (CAST(GREATEST(t1.c3, '0.8727017201037127') AS SIGNED)); SHOW WARNINGS LIMIT 5; -- Hinted. Good plan if it appears: -- t1 outer -> t0 index range scan, re-planned per row -> Limit: 1 row(s). EXPLAIN ANALYZE SELECT /*+ JOIN_ORDER(t1, t0) */ DISTINCTROW t1.c4 AS ref0, t1.c3 AS ref1, t1.c2 AS ref2 FROM t1, t0 WHERE (t0.c1) < (CAST(GREATEST(t1.c3, '0.8727017201037127') AS SIGNED)); SHOW WARNINGS LIMIT 5; -- More direct join-method test: NO_BNL disables hash join / BNL-style join use in MySQL 8+/9.x. EXPLAIN ANALYZE SELECT /*+ NO_BNL(t0,t1) */ DISTINCTROW t1.c4 AS ref0, t1.c3 AS ref1, t1.c2 AS ref2 FROM t1, t0 WHERE (t0.c1) < (CAST(GREATEST(t1.c3, '0.8727017201037127') AS SIGNED)); SHOW WARNINGS LIMIT 5; -- Semantic rewrite: clean existence-style form. EXPLAIN ANALYZE SELECT DISTINCTROW t1.c4 AS ref0, t1.c3 AS ref1, t1.c2 AS ref2 FROM t1 WHERE EXISTS ( SELECT 1 FROM t0 WHERE t0.c1 < CAST(GREATEST(t1.c3, '0.8727017201037127') AS SIGNED) ); SHOW WARNINGS LIMIT 5;