-- ensure that myisam_stats_method is set to nulls_equal SET @@myisam_stats_method = 'nulls_equal'; SELECT @@myisam_stats_method; -- create a small table with ID values 1-10 DROP TABLE IF EXISTS small; CREATE TABLE small (id int primary key auto_increment) ENGINE=MYISAM; INSERT INTO small VALUES (), (), (), (), (), (), (), (), (), (); -- create a much bigger table with ~800k rows, with an equal split of null and non-null values referencing small.id DROP TABLE IF EXISTS big; CREATE TABLE big (id int primary key auto_increment, small_id int, key (small_id)) ENGINE=MyISAM; INSERT INTO big (small_id) SELECT small.id FROM small JOIN small small2; INSERT INTO big (small_id) SELECT NULL FROM small JOIN small small2; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; INSERT INTO big (small_id) SELECT small_id FROM big; -- create an even bigger table with ~4000k rows, with a copy of half of the medium IDs for each of the small IDs DROP TABLE IF EXISTS huge; CREATE TABLE huge (id int primary key auto_increment, small_id int, big_id int, key (small_id, big_id)) ENGINE=MyISAM; INSERT INTO huge (small_id, big_id) SELECT small.id, big.id FROM small JOIN big WHERE big.id % 2 = 0; -- ensure the index stats are up to date for all three tables ANALYZE TABLE small; ANALYZE TABLE big; ANALYZE TABLE huge; -- check the cardinality is sane for the index on big.small_id as per nulls_equal method SHOW INDEX FROM big WHERE Key_name = 'small_id' \G -- show a plan for joining from the huge table to the small one, filtered by a condition on the huge table EXPLAIN SELECT small.id, count(small.id) FROM huge, big, small WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1 GROUP BY small.id; -- run the query to see how long it takes SELECT small.id, count(small.id) FROM huge, big, small WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1 GROUP BY small.id; -- perform a harmless update to all three tables so their index stats become out of date INSERT INTO small VALUES (); DELETE FROM small WHERE id = LAST_INSERT_ID(); INSERT INTO big VALUES (); DELETE FROM big WHERE id = LAST_INSERT_ID(); INSERT INTO huge VALUES (); DELETE FROM huge WHERE id = LAST_INSERT_ID(); -- optimize all three tables OPTIMIZE TABLE small; OPTIMIZE TABLE big; OPTIMIZE TABLE huge; -- check the cardinality is sane for the index on big.small_id (now changed because OPTIMIZE TABLE ignores nulls_equal) SHOW INDEX FROM big WHERE Key_name = 'small_id' \G -- show the plan for the same query as before (now goes in the opposite direction) EXPLAIN SELECT small.id, count(small.id) FROM huge, big, small WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1 GROUP BY small.id; -- run the query to see how long it takes (twice as long as previously, due to now-suboptimal query plan) SELECT small.id, count(small.id) FROM huge, big, small WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1 GROUP BY small.id; -- cleanup DROP TABLE IF EXISTS small; DROP TABLE IF EXISTS big; DROP TABLE IF EXISTS huge;