From 783bd8d2c8934b21c9dcd7312826277f615bfcef Mon Sep 17 00:00:00 2001 From: casazhang Date: Fri, 17 Sep 2021 14:05:33 +0800 Subject: [PATCH] Bug#104108 CUMULATIVE FREQUENCY MAY GROW OVER 1.0 DUE TO FLOAT ERRORS When singleton histogram is constructed, cumulative frequency is calculated by adding frequencies of previous buckets and the current bucket. This may lead to accumulated float errors, and the last cumulative frequency may be over 1.0 (cf > 1.0) and cause abnormal behaviors. This patch accumulates with integer type to avoid float errors. --- mysql-test/r/histograms.result | 11 +++++++++++ mysql-test/suite/innodb/r/histogram-debug.result | 10 +++++----- mysql-test/t/histograms.test | 10 ++++++++++ sql/histograms/singleton.cc | 7 ++++--- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/mysql-test/r/histograms.result b/mysql-test/r/histograms.result index 757464b2f23..5a5ba1f0a75 100644 --- a/mysql-test/r/histograms.result +++ b/mysql-test/r/histograms.result @@ -1969,6 +1969,17 @@ AND OUTR . col_varchar_key IS NULL HAVING x <= 3 ORDER BY OUTR . pk , OUTR . pk; x +# +# Bug#104108 CUMULATIVE FREQUENCY MAY GROW OVER 1.0 DUE TO FLOAT ERRORS +# +ANALYZE TABLE t2 UPDATE HISTOGRAM ON col_datetime WITH 100 BUCKETS; +Table Op Msg_type Msg_text +test.t2 histogram status Histogram statistics created for column 'col_datetime'. +SELECT schema_name, table_name, column_name, +JSON_REMOVE(histogram, '$."last-updated"') +FROM information_schema.COLUMN_STATISTICS WHERE table_name = 't2' and column_name = 'col_datetime'; +SCHEMA_NAME TABLE_NAME COLUMN_NAME JSON_REMOVE(histogram, '$."last-updated"') +test t2 col_datetime {"buckets": [["1900-01-01 00:00:00.000000", 0.3], ["2001-01-20 12:47:23.000000", 0.35], ["2001-07-25 08:40:24.000000", 0.4], ["2001-10-22 11:13:24.000000", 0.45], ["2002-08-25 20:35:06.000000", 0.5], ["2002-12-08 11:34:58.000000", 0.55], ["2003-03-12 02:00:34.000000", 0.6], ["2003-12-04 11:14:26.000000", 0.65], ["2005-08-15 00:00:00.000000", 0.7], ["2006-09-11 18:25:21.000000", 0.75], ["2007-04-10 12:16:04.000000", 0.8], ["2008-05-16 08:09:06.000000", 0.85], ["2008-07-02 00:00:00.000000", 0.9], ["2009-06-07 13:48:58.000000", 0.95], ["2009-11-07 00:00:00.000000", 1.0]], "data-type": "datetime", "null-values": 0.0, "collation-id": 8, "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 100} DROP TABLE t1, t2; SET @@SESSION.sql_mode=DEFAULT; # diff --git a/mysql-test/suite/innodb/r/histogram-debug.result b/mysql-test/suite/innodb/r/histogram-debug.result index 4515dc26286..4bee1432db6 100644 --- a/mysql-test/suite/innodb/r/histogram-debug.result +++ b/mysql-test/suite/innodb/r/histogram-debug.result @@ -32,7 +32,7 @@ Table Op Msg_type Msg_text test.t1 histogram status Histogram statistics created for column 'a'. SELECT SCHEMA_NAME, TABLE_NAME, COLUMN_NAME, JSON_REMOVE(histogram, '$."last-updated"') FROM information_schema.column_statistics WHERE COLUMN_NAME = 'a'; SCHEMA_NAME TABLE_NAME COLUMN_NAME JSON_REMOVE(histogram, '$."last-updated"') -test t1 a {"buckets": [[10, 0.1], [20, 0.2], [30, 0.30000000000000004], [40, 0.4], [50, 0.5], [60, 0.6], [70, 0.7], [80, 0.7999999999999999], [90, 0.8999999999999999], [100, 0.9999999999999999]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 0.5, "histogram-type": "singleton", "number-of-buckets-specified": 50} +test t1 a {"buckets": [[10, 0.1], [20, 0.2], [30, 0.3], [40, 0.4], [50, 0.5], [60, 0.6], [70, 0.7], [80, 0.8], [90, 0.9], [100, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 0.5, "histogram-type": "singleton", "number-of-buckets-specified": 50} DROP TABLE t1; SELECT NAME,COUNT FROM INFORMATION_SCHEMA.INNODB_METRICS WHERE SUBSYSTEM='sampling'; NAME COUNT @@ -97,7 +97,7 @@ Table Op Msg_type Msg_text test.t1 histogram status Histogram statistics created for column 'a'. SELECT SCHEMA_NAME, TABLE_NAME, COLUMN_NAME, JSON_REMOVE(histogram, '$."last-updated"') FROM information_schema.column_statistics WHERE COLUMN_NAME = 'a'; SCHEMA_NAME TABLE_NAME COLUMN_NAME JSON_REMOVE(histogram, '$."last-updated"') -test t1 a {"buckets": [[10, 0.2], [20, 0.4], [30, 0.6000000000000001], [40, 0.8], [50, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 50} +test t1 a {"buckets": [[10, 0.2], [20, 0.4], [30, 0.6], [40, 0.8], [50, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 50} set global innodb_limit_optimistic_insert_debug = 0; DROP TABLE t1; SELECT NAME,COUNT FROM INFORMATION_SCHEMA.INNODB_METRICS WHERE SUBSYSTEM='sampling'; @@ -167,7 +167,7 @@ Table Op Msg_type Msg_text test.t1 histogram status Histogram statistics created for column 'a'. SELECT SCHEMA_NAME, TABLE_NAME, COLUMN_NAME, JSON_REMOVE(histogram, '$."last-updated"') FROM information_schema.column_statistics WHERE COLUMN_NAME = 'a'; SCHEMA_NAME TABLE_NAME COLUMN_NAME JSON_REMOVE(histogram, '$."last-updated"') -test t1 a {"buckets": [[40, 0.2], [50, 0.4], [80, 0.6000000000000001], [90, 0.8], [100, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 0.5, "histogram-type": "singleton", "number-of-buckets-specified": 50} +test t1 a {"buckets": [[40, 0.2], [50, 0.4], [80, 0.6], [90, 0.8], [100, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 0.5, "histogram-type": "singleton", "number-of-buckets-specified": 50} SELECT NAME,COUNT FROM INFORMATION_SCHEMA.INNODB_METRICS WHERE SUBSYSTEM='sampling'; NAME COUNT sampled_pages_read 3 @@ -183,7 +183,7 @@ Table Op Msg_type Msg_text test.t1 histogram status Histogram statistics created for column 'a'. SELECT SCHEMA_NAME, TABLE_NAME, COLUMN_NAME, JSON_REMOVE(histogram, '$."last-updated"') FROM information_schema.column_statistics WHERE COLUMN_NAME = 'a'; SCHEMA_NAME TABLE_NAME COLUMN_NAME JSON_REMOVE(histogram, '$."last-updated"') -test t1 a {"buckets": [[40, 0.2], [50, 0.4], [80, 0.6000000000000001], [90, 0.8], [100, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 0.5, "histogram-type": "singleton", "number-of-buckets-specified": 50} +test t1 a {"buckets": [[40, 0.2], [50, 0.4], [80, 0.6], [90, 0.8], [100, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 0.5, "histogram-type": "singleton", "number-of-buckets-specified": 50} DROP TABLE t1; set global innodb_limit_optimistic_insert_debug = 0; SET DEBUG='-d,histogram_force_sampling'; @@ -273,7 +273,7 @@ Table Op Msg_type Msg_text test.t1 histogram status Histogram statistics created for column 'id'. SELECT SCHEMA_NAME, TABLE_NAME, COLUMN_NAME, JSON_REMOVE(histogram, '$."last-updated"') FROM information_schema.column_statistics WHERE COLUMN_NAME = 'id'; SCHEMA_NAME TABLE_NAME COLUMN_NAME JSON_REMOVE(histogram, '$."last-updated"') -test t1 id {"buckets": [[1, 0.2], [2, 0.4], [3, 0.6000000000000001], [4, 0.8], [5, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 50} +test t1 id {"buckets": [[1, 0.2], [2, 0.4], [3, 0.6], [4, 0.8], [5, 1.0]], "data-type": "int", "null-values": 0.0, "collation-id": 8, "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 50} SET DEBUG = "-d, sample_read_sample_half"; DROP TABLE t1; # diff --git a/mysql-test/t/histograms.test b/mysql-test/t/histograms.test index 6d6254ae4a5..6087be23bd9 100644 --- a/mysql-test/t/histograms.test +++ b/mysql-test/t/histograms.test @@ -1558,6 +1558,16 @@ AND OUTR . col_varchar_key IS NULL HAVING x <= 3 ORDER BY OUTR . pk , OUTR . pk; + +--echo # +--echo # Bug#104108 CUMULATIVE FREQUENCY MAY GROW OVER 1.0 DUE TO FLOAT ERRORS +--echo # + +ANALYZE TABLE t2 UPDATE HISTOGRAM ON col_datetime WITH 100 BUCKETS; +SELECT schema_name, table_name, column_name, + JSON_REMOVE(histogram, '$."last-updated"') +FROM information_schema.COLUMN_STATISTICS WHERE table_name = 't2' and column_name = 'col_datetime'; + DROP TABLE t1, t2; SET @@SESSION.sql_mode=DEFAULT; diff --git a/sql/histograms/singleton.cc b/sql/histograms/singleton.cc index d80abb4c2ff..e7bd1092b70 100644 --- a/sql/histograms/singleton.cc +++ b/sql/histograms/singleton.cc @@ -117,7 +117,7 @@ bool Singleton::build_histogram(const Value_map &value_map, value_map.get_num_null_values() / static_cast(total_count); // Create buckets with relative frequency, and not absolute frequency. - double cumulative_frequency = 0.0; + ha_rows cumulative_sum = 0; /* Since we are using a std::map with Mem_root_allocator, we are forced to wrap @@ -126,8 +126,9 @@ bool Singleton::build_histogram(const Value_map &value_map, */ try { for (const auto &node : value_map) { - const double frequency = node.second / static_cast(total_count); - cumulative_frequency += frequency; + cumulative_sum += node.second; + const double cumulative_frequency = + cumulative_sum / static_cast(total_count); m_buckets.emplace(node.first, cumulative_frequency); } -- 2.26.2