Bug #78609 myisam_stats_method not respected for optimize and repair table
Submitted: 28 Sep 2015 20:22 Modified: 19 Apr 2017 16:36
Reporter: Joe Kislo Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: MyISAM storage engine Severity:S3 (Non-critical)
Version:5.7.16 OS:Any
Assigned to: CPU Architecture:Any

[28 Sep 2015 20:22] Joe Kislo
Description:
In MySQL 5.6.26 variable myisam_stats_method defaults to nulls_unequal.  If you change the setting to nulls_equal, several table operation commands (specifically optimize table, repair table) do not respect  this setting and will cause the indexes to be rebuilt using nulls_unequal.  We have tested setting myisam_stats_method both in the server config as well as as a session variable.  Neither solve the problem, leaving us with no way of optimizing or repairing tables.

How to repeat:
# Initial Setup
create database mysqlbug;
set variable myisam_stats_method='nulls_equal';
show variables like 'myisam_stats_method';
+---------------------+-------------+
| Variable_name       | Value       |
+---------------------+-------------+
| myisam_stats_method | nulls_equal |
+---------------------+-------------+

#Testing Optimize Table
create table MySQLBugTable (NoNulls int not null auto_increment, LotsOfNulls int null, primary key (NoNulls), index LotsOfNullsIndex (LotsOfNulls)) engine=MyISAM;
insert into MySQLBugTable values (null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, 55),(null, 44);
optimize table MySQLBugTable;
show index from MySQLBugTable;
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table         | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| MySQLBugTable |          0 | PRIMARY          |            1 | NoNulls     | A         |           9 |     NULL | NULL   |      | BTREE      |         |               |
| MySQLBugTable |          1 | LotsOfNullsIndex |            1 | LotsOfNulls | A         |           9 |     NULL | NULL   | YES  | BTREE      |         |               |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
2 rows in set (0.00 sec)

# ---> Incorrect indexes: Cardinality of LotsOfNullsIndex should be 3 because there are 3 unique values in the table if all nulls are equal.

#Testing Repair table
drop table MySQLBugTable;
create table MySQLBugTable (NoNulls int not null auto_increment, LotsOfNulls int null, primary key (NoNulls), index LotsOfNullsIndex (LotsOfNulls)) engine=MyISAM;
insert into MySQLBugTable values (null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, 55),(null, 44);
repair table MySQLBugTable;
show index from MySQLBugTable;
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table         | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| MySQLBugTable |          0 | PRIMARY          |            1 | NoNulls     | A         |           9 |     NULL | NULL   |      | BTREE      |         |               |
| MySQLBugTable |          1 | LotsOfNullsIndex |            1 | LotsOfNulls | A         |           9 |     NULL | NULL   | YES  | BTREE      |         |               |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+

# ---> Incorrect indexes: Cardinality of LotsOfNullsIndex should be 3 because there are 3 unique values in the table if all nulls are equal.

#Testing Check table
drop table MySQLBugTable;
create table MySQLBugTable (NoNulls int not null auto_increment, LotsOfNulls int null, primary key (NoNulls), index LotsOfNullsIndex (LotsOfNulls)) engine=MyISAM;
insert into MySQLBugTable values (null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, 55),(null, 44);
check table MySQLBugTable;
show index from MySQLBugTable;
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table         | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| MySQLBugTable |          0 | PRIMARY          |            1 | NoNulls     | A         |           9 |     NULL | NULL   |      | BTREE      |         |               |
| MySQLBugTable |          1 | LotsOfNullsIndex |            1 | LotsOfNulls | A         |           3 |     NULL | NULL   | YES  | BTREE      |         |               |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+

# ---> Correct indexes: LotsOfNulls shows cardinality of 3.

#Testing Analyze table
drop table MySQLBugTable;
create table MySQLBugTable (NoNulls int not null auto_increment, LotsOfNulls int null, primary key (NoNulls), index LotsOfNullsIndex (LotsOfNulls)) engine=MyISAM;
insert into MySQLBugTable values (null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, 55),(null, 44);
analyze table MySQLBugTable;
show index from MySQLBugTable;
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table         | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| MySQLBugTable |          0 | PRIMARY          |            1 | NoNulls     | A         |           9 |     NULL | NULL   |      | BTREE      |         |               |
| MySQLBugTable |          1 | LotsOfNullsIndex |            1 | LotsOfNulls | A         |           3 |     NULL | NULL   | YES  | BTREE      |         |               |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+

# ---> Correct indexes: LotsOfNulls shows cardinality of 3.

Suggested fix:
Make Optimize table and Repair table properly respect the myisam_stats_method system setting.

For possible work arounds, does anybody know if an "optimize" can be simulated by doing:

alter table MySQLBugTable Engine=MyISAM;
analyze table MySQLBugTable;

I can't find in the documentation that this is the same operation; specifically I do not believe the above will sort indexes, which optimize table does.
[6 Oct 2015 17:32] MySQL Verification Team
Hi,

Thank you for your bug report, however it is NOT a bug at all.

If you read our manual carefully, you will read that this variable is used by the optimizer. Whenever optimizer needs to have value distribution data for some index, this variable determines how will NULL values be treated.

Optimizer is not used AT ALL, for the OPTIMIZE and REPAIR operations, as there is no need what so ever to optimize these operations.

There could be a feature request for the report issued from these operations, which would use this option for providing the report. However, I do not see any practical use for such a feature.
[8 Oct 2015 19:09] Joe Kislo
I think maybe I wasn't clear, or maybe I'm not understanding what you're saying.  My actual issue is with statistics collection, not the query optimizer.  We ran into this issue because one our queries was running for 40 minutes, when it should only run for .04 seconds. When we optimize the table, we get 40 minute execution times, and when we analyze the table we get .04 second execution times.  So this is more than just show index showing some bad numbers, we really do see the query optimizer making a bad decision based on the cardinalities stored for the index.

After a bunch of trying to figure out what was going on, we discovered the cardinalities shown in 'show index' are radically different depending on if you use optimize or analyze.  Since we know the actual contents of the table, we can figure out what the "right" statistics are, and it's the optimize table that's out putting wrong cardinalities.  The optimizer is actually making the right decision in both cases since it bases it's decision on what it sees in the statistics stored for the index.  Using the test case we assembled above, 
you can see that optimize is the one not collecting the statistics correctly based on the myisam_stats_method setting.
[9 Oct 2015 15:41] MySQL Verification Team
I fail to see what is the problem.

Analyze is the only statement that will bring MyISAM indices up to date, regarding both cardinalities and distribution. That is how this storage engine is designed.

Again, optimize and repair table do not query for cardinality and distribution, hence they will not be affected by startup variable.
[14 Oct 2015 23:39] Joe Kislo
My understanding has always been that optimize also performs what an analyze would do.  That's why once you optimize a table, the system won't let you run
an analyze on the table:

mysql> create table MySQLBugTable (NoNulls int not null auto_increment, LotsOfNulls int null, primary key (NoNulls), index LotsOfNullsIndex (LotsOfNulls)) engine=MyISAM;
Query OK, 0 rows affected (0.00 sec)

mysql> insert into MySQLBugTable values (null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, 55),(null, 44);
Query OK, 9 rows affected (0.00 sec)
Records: 9  Duplicates: 0  Warnings: 0

mysql> optimize table MySQLBugTable;
+------------------------+----------+----------+----------+
| Table                  | Op       | Msg_type | Msg_text |
+------------------------+----------+----------+----------+
| mysqlbug.MySQLBugTable | optimize | status   | OK       |
+------------------------+----------+----------+----------+
1 row in set (0.00 sec)

mysql> analyze table MySQLBugTable;
+------------------------+---------+----------+-----------------------------+
| Table                  | Op      | Msg_type | Msg_text                    |
+------------------------+---------+----------+-----------------------------+
| mysqlbug.MySQLBugTable | analyze | status   | Table is already up to date |
+------------------------+---------+----------+-----------------------------+
1 row in set (0.01 sec)

The opposite is not true though, you can analyze a table, then it will let you optimize it.

Also the MySQL manual seems to imply it does update the table's index statistics:

http://dev.mysql.com/doc/refman/5.6/en/optimize-table.html

3. If the table's statistics are not up to date (and the repair could not be accomplished by sorting the index), update them.

Perhaps there is some nuance in what it's doing with the statistics collection vs. analyze, however in practical terms, it will rebuild the table index statistics with null_unequal (which the the bug we're running into).  If optimize table didn't mess with the indexes, then you should be able to:
 1) Create the MySQLBugTable
 2) Analyze it
 3) Insert a row
 4) Optimize it

And the index statistics/cardinailties would be the same as after #3.  However that's not the case.  Everything is great, until the optimize hits it, and it rebuilds using null_unquals, which leads to extremely poor decisions made by the optimizer for the table going forward (but make perfect sense based on the cardinalities now listed for the table):

mysql> set myisam_stats_method='nulls_equal';
Query OK, 0 rows affected (0.00 sec)

mysql> create table MySQLBugTable (NoNulls int not null auto_increment, LotsOfNulls int null, primary key (NoNulls), index LotsOfNullsIndex (LotsOfNulls)) engine=MyISAM;
Query OK, 0 rows affected (0.00 sec)

mysql> insert into MySQLBugTable values (null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, null),(null, 55),(null, 44);
Query OK, 9 rows affected (0.00 sec)
Records: 9  Duplicates: 0  Warnings: 0

mysql> analyze table MySQLBugTable;
+------------------------+---------+----------+----------+
| Table                  | Op      | Msg_type | Msg_text |
+------------------------+---------+----------+----------+
| mysqlbug.MySQLBugTable | analyze | status   | OK       |
+------------------------+---------+----------+----------+
1 row in set (0.00 sec)

mysql> show index from MySQLBugTable;
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table         | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| MySQLBugTable |          0 | PRIMARY          |            1 | NoNulls     | A         |           9 |     NULL | NULL   |      | BTREE      |         |               |
| MySQLBugTable |          1 | LotsOfNullsIndex |            1 | LotsOfNulls | A         |           3 |     NULL | NULL   | YES  | BTREE      |         |               |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
2 rows in set (0.00 sec)

mysql> # Shows good cardinality for null_equals

mysql> insert into MySQLBugTable values (null,null);
Query OK, 1 row affected (0.00 sec)

mysql> show index from MySQLBugTable;
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table         | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| MySQLBugTable |          0 | PRIMARY          |            1 | NoNulls     | A         |          10 |     NULL | NULL   |      | BTREE      |         |               |
| MySQLBugTable |          1 | LotsOfNullsIndex |            1 | LotsOfNulls | A         |           3 |     NULL | NULL   | YES  | BTREE      |         |               |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
2 rows in set (0.00 sec)

mysql> # Still shows good Statistics, but with 1 more row now

mysql> optimize table MySQLBugTable;
+------------------------+----------+----------+----------+
| Table                  | Op       | Msg_type | Msg_text |
+------------------------+----------+----------+----------+
| mysqlbug.MySQLBugTable | optimize | status   | OK       |
+------------------------+----------+----------+----------+
1 row in set (0.00 sec)

mysql> show index from MySQLBugTable;
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table         | Non_unique | Key_name         | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| MySQLBugTable |          0 | PRIMARY          |            1 | NoNulls     | A         |          10 |     NULL | NULL   |      | BTREE      |         |               |
| MySQLBugTable |          1 | LotsOfNullsIndex |            1 | LotsOfNulls | A         |          10 |     NULL | NULL   | YES  | BTREE      |         |               |
+---------------+------------+------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
2 rows in set (0.00 sec)

mysql> # LotsOfNulls index shows cardinality appropriate for nulls_unequal, and wrong for null_equals

Once you've called optimize on the table, how would you get a table out of this state?  Analyze won't run on the table anymore, it says it's 'Already up to date'.  What command would you recommend we run to get the table working again for us if we run optimize?
[15 Oct 2015 13:21] MySQL Verification Team
As I wrote before, this is only about reporting after command has been issued. But, let us check that. Issue after ANALYZE and then after OPTIMIZE, the following query:

SELECT count(*) FROM MySQLBugTable WHERE LotsOfNulls IS NULL;

If the results are the same, then it is all only about the reporting after the maintenance command, which is not important.

If the results differ, then it is most probably the bug, but would require some other checks on our side.
[16 Nov 2015 1:00] Bugs System
No feedback was provided for this bug for over a month, so it is
being suspended automatically. If you are able to provide the
information that was originally requested, please do so and change
the status of the bug back to "Open".
[12 Apr 2017 14:48] Liam McBrien
This issue also affects MySQL versions 5.5.52 and 5.7.16.

I cannot understand how this is not being treated as a serious bug. As it stands, issuing an OPTIMIZE TABLE command will change query plans if you are using myisam_stats_method = nulls_equal. In my production database, this causes a query which executes in tens of seconds to start taking half an hour.

Here is a query plan for a contrived example, immediately after an ANALYZE TABLE:

mysql> EXPLAIN SELECT small.id, count(small.id) FROM huge, big, small
    -> WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1
    -> GROUP BY small.id;
+----+-------------+-------+--------+------------------+----------+---------+------------------------+--------+-----------------------------------------------------------+
| id | select_type | table | type   | possible_keys    | key      | key_len | ref                    | rows   | Extra                                                     |
+----+-------------+-------+--------+------------------+----------+---------+------------------------+--------+-----------------------------------------------------------+
|  1 | SIMPLE      | huge  | ref    | small_id         | small_id | 5       | const                  | 405978 | Using where; Using index; Using temporary; Using filesort |
|  1 | SIMPLE      | big   | eq_ref | PRIMARY,small_id | PRIMARY  | 4       | test_list.huge.big_id  |      1 |                                                           |
|  1 | SIMPLE      | small | eq_ref | PRIMARY          | PRIMARY  | 4       | test_list.big.small_id |      1 | Using index                                               |
+----+-------------+-------+--------+------------------+----------+---------+------------------------+--------+-----------------------------------------------------------+
3 rows in set (0.00 sec)

We then issue a harmless update and deletion so that the index stats become out-of-date, and issue an OPTIMIZE TABLE command:

mysql> INSERT INTO big VALUES ();
Query OK, 1 row affected (0.00 sec)

mysql> DELETE FROM big WHERE id = LAST_INSERT_ID();
Query OK, 1 row affected (0.00 sec)

mysql> OPTIMIZE TABLE big;
+---------------+----------+----------+----------+
| Table         | Op       | Msg_type | Msg_text |
+---------------+----------+----------+----------+
| test_list.big | optimize | status   | OK       |
+---------------+----------+----------+----------+
1 row in set (1.88 sec)

Now, the query plan completely changes direction, because the index cardinality has been recalculated using nulls_unequal:

mysql> EXPLAIN SELECT small.id, count(small.id) FROM huge, big, small
    -> WHERE huge.big_id = big.id AND big.small_id = small.id AND huge.small_id = 1
    -> GROUP BY small.id;
+----+-------------+-------+-------+------------------+----------+---------+------------------------+------+--------------------------+
| id | select_type | table | type  | possible_keys    | key      | key_len | ref                    | rows | Extra                    |
+----+-------------+-------+-------+------------------+----------+---------+------------------------+------+--------------------------+
|  1 | SIMPLE      | small | index | PRIMARY          | PRIMARY  | 4       | NULL                   |    5 | Using index              |
|  1 | SIMPLE      | big   | ref   | PRIMARY,small_id | small_id | 5       | test_list.small.id     |    2 | Using where              |
|  1 | SIMPLE      | huge  | ref   | small_id         | small_id | 10      | const,test_list.big.id |    1 | Using where; Using index |
+----+-------------+-------+-------+------------------+----------+---------+------------------------+------+--------------------------+
3 rows in set (0.01 sec)

The calculation of index cardinality when running OPTIMIZE TABLE should respect the myisam_stats_method setting, just like ANALYZE TABLE. As it stands, the index stats are corrupted by OPTIMIZE TABLE if myisam_stats_method is not set to the default. The workaround is to perform a table update and issue an ANALYZE TABLE immediately after running OPTIMIZE TABLE.

I will attach my contrived example to this bug report in case it helps.
[12 Apr 2017 14:50] Liam McBrien
Contrived example to illustrate OPTIMIZE TABLE changing index cardinality and query plans

Attachment: mysql_bug_78609_example.sql (text/plain), 3.53 KiB.

[12 Apr 2017 15:34] Joe Kislo
I can't agree more.  This is a  serious issue, especially for people who don't know about it.  We worked around this by rewriting all of of our maintenance scripts to detect the version of MySQL, and if it's an affected version we:

alter table TableName engine=MyISAM;
analyze table TableName;

Instead of ever calling optimize.

But we spent a long time before we tracked down why performance was so abysmal on the new version of MySQL.
[12 Apr 2017 16:20] MySQL Verification Team
Hi!

If you read our chapter 17.2 or our manual (for the 5.7 server version) you will see that after changing myisam_stats_method, you should run ANALYZE command as it is not run automatically by any other command.

If you run ANALYZE after that method is changed, then and only then, you can expect reliable results.

Your comments also indicate that myisam_stats_method changed all by itself during the operations. If that is true, we would very much like to have a test case.

For your information, MyISAM is the storage engine that will be deprecated in the future. We do not know yet when will this be decided. We will, of course, fix all known MyISAM-related security bugs and will verify the other, verifiable, bugs, for future reference.

We are slowly moving towards the InnoDB engine. We are recommending that you start planning for conversion to the InnoDB storage engine, as well, which is far superior and does not have none of these problems.

Thank you for your interest in our products.
[13 Apr 2017 8:26] Liam McBrien
I'm aware that MyISAM is deprecated and we're planning our migration to InnoDB - this is a work in progress, though, and in the short term, this bug is a severe annoyance. I am also aware of the fact that table stats are not updated unless ANALYZE/OPTIMIZE TABLE is run when the table stats are out of date, and this is a contributory factor to this bug.

The myisam_stats_method setting does not change when OPTIMIZE TABLE is run. It's just ignored by that command, meaning that the index stats become corrupted because they've been calculated using the wrong method. Worse still, we need to force a table update to fix the problem afterwards, since ANALYZE TABLE only works when the table stats are out of date.

Example:

mysql> show variables like 'myisam_stats_method';
+---------------------+-------------+
| Variable_name       | Value       |
+---------------------+-------------+
| myisam_stats_method | nulls_equal |
+---------------------+-------------+
1 row in set (0.00 sec)

mysql> analyze table big;
+---------------+---------+----------+----------+
| Table         | Op      | Msg_type | Msg_text |
+---------------+---------+----------+----------+
| test_list.big | analyze | status   | OK       |
+---------------+---------+----------+----------+
1 row in set (0.19 sec)

mysql> SHOW INDEX FROM big WHERE Key_name = 'small_id' \G
*************************** 1. row ***************************
        Table: big
   Non_unique: 1
     Key_name: small_id
 Seq_in_index: 1
  Column_name: small_id
    Collation: A
  Cardinality: 10        -- correct nulls_equal cardinality
     Sub_part: NULL
       Packed: NULL
         Null: YES
   Index_type: BTREE
      Comment: 
Index_comment: 
1 row in set (0.01 sec)

mysql> INSERT INTO big VALUES ();
Query OK, 1 row affected (0.00 sec)

mysql> DELETE FROM big WHERE id = LAST_INSERT_ID();
Query OK, 1 row affected (0.00 sec)

mysql> OPTIMIZE TABLE big;
+---------------+----------+----------+----------+
| Table         | Op       | Msg_type | Msg_text |
+---------------+----------+----------+----------+
| test_list.big | optimize | status   | OK       |
+---------------+----------+----------+----------+
1 row in set (1.45 sec)

mysql> show variables like 'myisam_stats_method';
+---------------------+-------------+
| Variable_name       | Value       |
+---------------------+-------------+
| myisam_stats_method | nulls_equal |
+---------------------+-------------+
1 row in set (0.00 sec)

mysql> SHOW INDEX FROM big WHERE Key_name = 'small_id' \G
*************************** 1. row ***************************
        Table: big
   Non_unique: 1
     Key_name: small_id
 Seq_in_index: 1
  Column_name: small_id
    Collation: A
  Cardinality: 409600        -- incorrect; nulls_unequal cardinality
     Sub_part: NULL
       Packed: NULL
         Null: YES
   Index_type: BTREE
      Comment: 
Index_comment: 
1 row in set (0.00 sec)

mysql> ANALYZE TABLE big;
+---------------+---------+----------+-----------------------------+
| Table         | Op      | Msg_type | Msg_text                    |
+---------------+---------+----------+-----------------------------+
| test_list.big | analyze | status   | Table is already up to date |
+---------------+---------+----------+-----------------------------+
1 row in set (0.00 sec)

mysql> INSERT INTO big VALUES ();
Query OK, 1 row affected (0.00 sec)

mysql> DELETE FROM big WHERE id = LAST_INSERT_ID();
Query OK, 1 row affected (0.00 sec)

mysql> ANALYZE TABLE big;
+---------------+---------+----------+----------+
| Table         | Op      | Msg_type | Msg_text |
+---------------+---------+----------+----------+
| test_list.big | analyze | status   | OK       |
+---------------+---------+----------+----------+
1 row in set (0.21 sec)

mysql> SHOW INDEX FROM big WHERE Key_name = 'small_id' \G
*************************** 1. row ***************************
        Table: big
   Non_unique: 1
     Key_name: small_id
 Seq_in_index: 1
  Column_name: small_id
    Collation: A
  Cardinality: 10        -- correct nulls_equal cardinality
     Sub_part: NULL
       Packed: NULL
         Null: YES
   Index_type: BTREE
      Comment: 
Index_comment: 
1 row in set (0.00 sec)
[13 Apr 2017 14:29] MySQL Verification Team
Hi!

I have read carefully your last contribution and I must admit that this looks like a small bug in MyISAM.

Hence, please upload the dump of the table `big` in this bug, as I would like to verify this bug.

Please, upload the file, by using the tab "Files" and follow ALL instructions there.

Also, upload the file AFTER the Easter holidays as the file will be removed after several days automatically.

Thanks in advance.
[13 Apr 2017 14:51] Liam McBrien
The file I uploaded yesterday (mysql_bug_78609_example.sql) contains all of the commands required to create and populate the tables in my example, as well as a number of subsequent commands which illustrate the issue.
[13 Apr 2017 15:35] MySQL Verification Team
Hi!

I have downloaded the file that you have uploaded. Thank you on that.

Please, do confirm that you have been able to repeat the issue with the test case uploaded, since in your comments you have worked with a table that has over 400 K rows.
[13 Apr 2017 17:58] Liam McBrien
Yes, the uploaded test case includes the commands required to create the 'big' table from my examples, which contains roughly 800k rows of which half have null values for the small_id column.
[19 Apr 2017 16:36] MySQL Verification Team
Hi!

I managed to repeat your test case. Hence, this is now a fully verified bug. Here is the output from the running of your test case:

---------------------------------------------------------------------------

@@myisam_stats_method
nulls_equal
Table	Op	Msg_type	Msg_text
pomocni.small	analyze	status	OK
Table	Op	Msg_type	Msg_text
pomocni.big	analyze	status	OK
Table	Op	Msg_type	Msg_text
pomocni.huge	analyze	status	Table is already up to date
*************************** 1. row ***************************
        Table: big
   Non_unique: 1
     Key_name: small_id
 Seq_in_index: 1
  Column_name: small_id
    Collation: A
  Cardinality: 11
     Sub_part: NULL
       Packed: NULL
         Null: YES
   Index_type: BTREE
      Comment: 
Index_comment: 
id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
1	SIMPLE	huge	NULL	ref	small_id	small_id	5	const	405978	100.00	Using where; Using index; Using temporary; Using filesort
1	SIMPLE	big	NULL	eq_ref	PRIMARY,small_id	PRIMARY	4	pomocni.huge.big_id	1	100.00	Using where
1	SIMPLE	small	NULL	eq_ref	PRIMARY	PRIMARY	4	pomocni.big.small_id	1	100.00	Using index
id	count(small.id)
1	20475
2	20485
3	20475
4	20485
5	20475
6	20485
7	20475
8	20485
9	20475
10	20485
Table	Op	Msg_type	Msg_text
pomocni.small	optimize	status	OK
Table	Op	Msg_type	Msg_text
pomocni.big	optimize	status	OK
Table	Op	Msg_type	Msg_text
pomocni.huge	optimize	status	OK
*************************** 1. row ***************************
        Table: big
   Non_unique: 1
     Key_name: small_id
 Seq_in_index: 1
  Column_name: small_id
    Collation: A
  Cardinality: 409600
     Sub_part: NULL
       Packed: NULL
         Null: YES
   Index_type: BTREE
      Comment: 
Index_comment: 
id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
1	SIMPLE	small	NULL	index	PRIMARY	PRIMARY	4	NULL	10	100.00	Using index
1	SIMPLE	big	NULL	ref	PRIMARY,small_id	small_id	5	pomocni.small.id	2	100.00	NULL
1	SIMPLE	huge	NULL	ref	small_id	small_id	10	const,pomocni.big.id	1	100.00	Using index
id	count(small.id)
1	20475
2	20485
3	20475
4	20485
5	20475
6	20485
7	20475
8	20485
9	20475
10	20485
--------------------------------------------------