-- ensure that myisam_stats_method is set to nulls_equal
SET @@myisam_stats_method = 'nulls_equal';
SELECT @@myisam_stats_method;

-- create a small table with ID values 1-10
DROP TABLE IF EXISTS small;
CREATE TABLE small (id int primary key auto_increment) ENGINE=MYISAM;
INSERT INTO small VALUES (), (), (), (), (), (), (), (), (), ();

-- create a much bigger table with ~800k rows, with an equal split of null and non-null values referencing small.id
DROP TABLE IF EXISTS big;
CREATE TABLE big (id int primary key auto_increment, small_id int, key (small_id)) ENGINE=MyISAM;
INSERT INTO big (small_id) SELECT small.id FROM small JOIN small small2;
INSERT INTO big (small_id) SELECT NULL FROM small JOIN small small2;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;
INSERT INTO big (small_id) SELECT small_id FROM big;

-- create an even bigger table with ~4000k rows, with a copy of half of the medium IDs for each of the small IDs
DROP TABLE IF EXISTS huge;
CREATE TABLE huge (id int primary key auto_increment, small_id int, big_id int, key (small_id, big_id)) ENGINE=MyISAM;
INSERT INTO huge (small_id, big_id) SELECT small.id, big.id FROM small JOIN big WHERE big.id % 2 = 0;

-- ensure the index stats are up to date for all three tables 
ANALYZE TABLE small;
ANALYZE TABLE big;
ANALYZE TABLE huge;

-- check the cardinality is sane for the index on big.small_id as per nulls_equal method
SHOW INDEX FROM big WHERE Key_name = 'small_id' \G

-- show a plan for joining from the huge table to the small one, filtered by a condition on the huge table
EXPLAIN SELECT small.id, count(small.id) FROM huge, big, small
WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1
GROUP BY small.id;

-- run the query to see how long it takes
SELECT small.id, count(small.id) FROM huge, big, small
WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1
GROUP BY small.id;

-- perform a harmless update to all three tables so their index stats become out of date
INSERT INTO small VALUES ();
DELETE FROM small WHERE id = LAST_INSERT_ID();
INSERT INTO big VALUES ();
DELETE FROM big WHERE id = LAST_INSERT_ID();
INSERT INTO huge VALUES ();
DELETE FROM huge WHERE id = LAST_INSERT_ID();

-- optimize all three tables 
OPTIMIZE TABLE small;
OPTIMIZE TABLE big;
OPTIMIZE TABLE huge;

-- check the cardinality is sane for the index on big.small_id (now changed because OPTIMIZE TABLE ignores nulls_equal)
SHOW INDEX FROM big WHERE Key_name = 'small_id' \G

-- show the plan for the same query as before (now goes in the opposite direction)
EXPLAIN SELECT small.id, count(small.id) FROM huge, big, small
WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1
GROUP BY small.id;

-- run the query to see how long it takes (twice as long as previously, due to now-suboptimal query plan)
SELECT small.id, count(small.id) FROM huge, big, small
WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1
GROUP BY small.id;

-- cleanup
DROP TABLE IF EXISTS small;
DROP TABLE IF EXISTS big;
DROP TABLE IF EXISTS huge;