Description:
We observed a severe performance issue with a query involving multiple joins (including ON TRUE / ON FALSE conditions) combined with a window function.
The query executes and returns results quickly on PostgreSQL and DuckDB, but on MySQL and MariaDB it does not return any result in reasonable time unless a LIMIT clause is added.
This suggests a potential optimizer limitation or inefficient execution strategy when handling join-heavy queries together with window functions.
- On MySQL:
- Query does not return (or takes extremely long time)
- Adding LIMIT (e.g. LIMIT 1000) makes it return immediately
- On PostgreSQL / DuckDB / TiDB:
- Query returns quickly without LIMIT
- The final result size is approximately 160,000 rows
How to repeat:
```SQL
-- SCHEMA
CREATE TABLE users (
id INT,
username VARCHAR(100),
email VARCHAR(255),
age INT,
status VARCHAR(20),
created_at TIMESTAMP NULL,
score DOUBLE
);
CREATE TABLE posts (
id INT,
user_id INT,
title VARCHAR(255),
content VARCHAR(1000),
views INT,
likes INT,
created_at TIMESTAMP NULL,
rating DOUBLE
);
CREATE TABLE comments (
id INT,
post_id INT,
user_id INT,
content VARCHAR(1000),
is_spam INT,
created_at TIMESTAMP NULL
);
CREATE TABLE orders (
id INT,
user_id INT,
amount DOUBLE,
status VARCHAR(20),
created_at TIMESTAMP NULL
);
INSERT INTO users VALUES
(1, 'alice', 'alice@test.com', 20, 'active', '2022-01-01 10:00:00', 88.5),
(2, 'bob', 'bob@test.com', 30, 'active', '2022-01-02 11:00:00', 92.3),
(3, 'carol', NULL, NULL, 'banned','2022-01-03 12:00:00', NULL),
(4, 'dave', 'dave@test.com', 45, 'active', '2022-01-04 13:00:00', 65.2),
(5, NULL, 'null@test.com', 18, 'inactive','2022-01-05 14:00:00', 70.0);
INSERT INTO posts VALUES
(1, 1, 'Hello World', 'First post', 100, 10, '2022-01-10 10:00:00', 4.5),
(2, 1, 'Another Post', NULL, 150, 20, '2022-01-11 11:00:00', 3.0),
(3, 2, 'Bob Post', 'Content', NULL, 5, '2022-01-12 12:00:00', NULL),
(4, 3, NULL, 'Empty', 50, 2, '2022-01-13 13:00:00', 5.0),
(5, 4, 'Last Post', 'Last', 300, 30,'2022-01-14 14:00:00', 4.9);
INSERT INTO comments VALUES
(1, 1, 2, 'Nice post', 0, '2022-01-20 10:00:00'),
(2, 1, 3, 'Spam here', 1, '2022-01-21 11:00:00'),
(3, 2, 1, 'Thanks', 0, '2022-01-22 12:00:00'),
(4, 4, 5, NULL, 0, '2022-01-23 13:00:00');
INSERT INTO orders VALUES
(1, 1, 100.00, 'paid', '2022-02-01 09:00:00'),
(2, 1, 200.50, 'shipped', '2022-02-02 10:00:00'),
(3, 2, NULL, 'failed', '2022-02-03 11:00:00'),
(4, 3, 50.00, 'paid', '2022-02-04 12:00:00'),
(5, 5, 999.99, 'paid', '2022-02-05 13:00:00');
-- TRIGGER SQLs:
SELECT
COALESCE(
(SELECT VAR_POP(id) FROM users),
(SELECT VAR_SAMP(id) FROM users)
) AS c0,
FLOOR(
CEIL(
STDDEV_POP(ref_6.id) OVER (
PARTITION BY subq_0.c0
ORDER BY ref_10.age, ref_11.post_id, subq_0.c0
)
)
) AS c1,
ref_8.status AS c2
FROM orders AS ref_0
INNER JOIN comments AS ref_3
INNER JOIN comments AS ref_4
INNER JOIN orders AS ref_5
ON (TRUE)
INNER JOIN users AS ref_6
ON (TRUE)
ON (TRUE)
ON (LPAD('xei', ref_4.id, ' ') <= 'y3')
LEFT JOIN orders AS ref_8
LEFT JOIN (
SELECT ref_9.age AS c0
FROM users AS ref_9
WHERE TRUE
) AS subq_0
ON (FALSE)
RIGHT JOIN users AS ref_10
ON (TRUE)
INNER JOIN comments AS ref_11
ON (ref_10.id != ref_11.id)
ON (TRUE)
WHERE TRUE;
```
Description: We observed a severe performance issue with a query involving multiple joins (including ON TRUE / ON FALSE conditions) combined with a window function. The query executes and returns results quickly on PostgreSQL and DuckDB, but on MySQL and MariaDB it does not return any result in reasonable time unless a LIMIT clause is added. This suggests a potential optimizer limitation or inefficient execution strategy when handling join-heavy queries together with window functions. - On MySQL: - Query does not return (or takes extremely long time) - Adding LIMIT (e.g. LIMIT 1000) makes it return immediately - On PostgreSQL / DuckDB / TiDB: - Query returns quickly without LIMIT - The final result size is approximately 160,000 rows How to repeat: ```SQL -- SCHEMA CREATE TABLE users ( id INT, username VARCHAR(100), email VARCHAR(255), age INT, status VARCHAR(20), created_at TIMESTAMP NULL, score DOUBLE ); CREATE TABLE posts ( id INT, user_id INT, title VARCHAR(255), content VARCHAR(1000), views INT, likes INT, created_at TIMESTAMP NULL, rating DOUBLE ); CREATE TABLE comments ( id INT, post_id INT, user_id INT, content VARCHAR(1000), is_spam INT, created_at TIMESTAMP NULL ); CREATE TABLE orders ( id INT, user_id INT, amount DOUBLE, status VARCHAR(20), created_at TIMESTAMP NULL ); INSERT INTO users VALUES (1, 'alice', 'alice@test.com', 20, 'active', '2022-01-01 10:00:00', 88.5), (2, 'bob', 'bob@test.com', 30, 'active', '2022-01-02 11:00:00', 92.3), (3, 'carol', NULL, NULL, 'banned','2022-01-03 12:00:00', NULL), (4, 'dave', 'dave@test.com', 45, 'active', '2022-01-04 13:00:00', 65.2), (5, NULL, 'null@test.com', 18, 'inactive','2022-01-05 14:00:00', 70.0); INSERT INTO posts VALUES (1, 1, 'Hello World', 'First post', 100, 10, '2022-01-10 10:00:00', 4.5), (2, 1, 'Another Post', NULL, 150, 20, '2022-01-11 11:00:00', 3.0), (3, 2, 'Bob Post', 'Content', NULL, 5, '2022-01-12 12:00:00', NULL), (4, 3, NULL, 'Empty', 50, 2, '2022-01-13 13:00:00', 5.0), (5, 4, 'Last Post', 'Last', 300, 30,'2022-01-14 14:00:00', 4.9); INSERT INTO comments VALUES (1, 1, 2, 'Nice post', 0, '2022-01-20 10:00:00'), (2, 1, 3, 'Spam here', 1, '2022-01-21 11:00:00'), (3, 2, 1, 'Thanks', 0, '2022-01-22 12:00:00'), (4, 4, 5, NULL, 0, '2022-01-23 13:00:00'); INSERT INTO orders VALUES (1, 1, 100.00, 'paid', '2022-02-01 09:00:00'), (2, 1, 200.50, 'shipped', '2022-02-02 10:00:00'), (3, 2, NULL, 'failed', '2022-02-03 11:00:00'), (4, 3, 50.00, 'paid', '2022-02-04 12:00:00'), (5, 5, 999.99, 'paid', '2022-02-05 13:00:00'); -- TRIGGER SQLs: SELECT COALESCE( (SELECT VAR_POP(id) FROM users), (SELECT VAR_SAMP(id) FROM users) ) AS c0, FLOOR( CEIL( STDDEV_POP(ref_6.id) OVER ( PARTITION BY subq_0.c0 ORDER BY ref_10.age, ref_11.post_id, subq_0.c0 ) ) ) AS c1, ref_8.status AS c2 FROM orders AS ref_0 INNER JOIN comments AS ref_3 INNER JOIN comments AS ref_4 INNER JOIN orders AS ref_5 ON (TRUE) INNER JOIN users AS ref_6 ON (TRUE) ON (TRUE) ON (LPAD('xei', ref_4.id, ' ') <= 'y3') LEFT JOIN orders AS ref_8 LEFT JOIN ( SELECT ref_9.age AS c0 FROM users AS ref_9 WHERE TRUE ) AS subq_0 ON (FALSE) RIGHT JOIN users AS ref_10 ON (TRUE) INNER JOIN comments AS ref_11 ON (ref_10.id != ref_11.id) ON (TRUE) WHERE TRUE; ```