From 12ea7e15b0b5bf79cae76fc0c41bd307958bda6a Mon Sep 17 00:00:00 2001 From: davidYichengWei Date: Mon, 26 Feb 2024 21:16:45 -0500 Subject: [PATCH] Bug#113587 SELECT DISTINCT makes ORDER BY ... DESC sort data to ascending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: MySQL builds a temporary table for deduplication. When there is only one column in the ORDER BY clause which has an index, the optimizer will use a backward index scan for sorting, and just perform a sequential scan on the temporary table. This is ok when the temporary table fits in memory, which will return elements in insertion order. When tmp_table_size is exceeded and the temporary table spills to disk to use the InnoDB storage engine, it forces the elements to be sorted based on the primary index in ascending order. This makes the final sequential scan produce data in ascending order. Fix: Before test_skip_sort() in query optimization, we check for the following two conditions: 1. The query has a DISTINCT clause 2. The ORDER BY clause specifies DESC order If they both hold, we will not apply the skip sort optimization, and a filesort will be applied after reading from the temporary table to sort items in the desired order. Potential improvements: There is a third condition we could check before skipping the optimization: the worst-case estimated size of the temporary table exceeds the tmp_table_size. I tried using (number of rows) *(row size) to estimate the size of the temporary table, but it doesn’t work well. The actual size is far larger than the estimate, and doesn’t grow linearly with the number of rows. We can apply this third condition check if we can find a way to get a good estimation, as this would allow the skip sort optimization when it won’t cause the order problem. --- .../r/optimizer_disable_skip_sort.result | 28 ++++++++++++++ mysql-test/t/optimizer_disable_skip_sort.test | 28 ++++++++++++++ sql/sql_optimizer.cc | 26 +++++++++++++ sql/sql_optimizer.h | 37 +++++++++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 mysql-test/r/optimizer_disable_skip_sort.result create mode 100644 mysql-test/t/optimizer_disable_skip_sort.test diff --git a/mysql-test/r/optimizer_disable_skip_sort.result b/mysql-test/r/optimizer_disable_skip_sort.result new file mode 100644 index 00000000000..2c83b6f3438 --- /dev/null +++ b/mysql-test/r/optimizer_disable_skip_sort.result @@ -0,0 +1,28 @@ +SET SESSION tmp_table_size=1024; +DROP TABLE IF EXISTS t1, t2; +CREATE TABLE t1 ( +order_id int(10) unsigned NOT NULL AUTO_INCREMENT, +order_revision_id int(10) unsigned DEFAULT NULL, +external_order_id varchar(255) DEFAULT NULL, +PRIMARY KEY (order_id), +KEY order_revision_id (order_revision_id) +) ENGINE=InnoDB AUTO_INCREMENT=555292701 DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES (1,1,'1'),(2,2,'2'),(3,3,'3'),(4,4,'4'),(5,5,'5'),(6,6,'6'),(7,7,'7'),(8,8,'8'),(9,9,'9'),(10,10,'10'); +CREATE TABLE t2 ( +order_revision_id int(10) unsigned NOT NULL AUTO_INCREMENT, +state varchar(255) NOT NULL, +PRIMARY KEY (order_revision_id) +) ENGINE=InnoDB AUTO_INCREMENT=958993 DEFAULT CHARSET=utf8mb4; +INSERT INTO t2 VALUES (1,'NEW'), (2,'NEW'), (3,'NEW'), (4,'NEW'), (5,'NEW'), (6,'NEW'), (7,'NEW'), (8,'NEW'), (9,'NEW'), (10,'NEW'); +SELECT DISTINCT t1.order_id, t1.external_order_id FROM t1 INNER JOIN t2 USING (order_revision_id) ORDER BY t1.order_id DESC; +order_id external_order_id +10 10 +9 9 +8 8 +7 7 +6 6 +5 5 +4 4 +3 3 +2 2 +1 1 diff --git a/mysql-test/t/optimizer_disable_skip_sort.test b/mysql-test/t/optimizer_disable_skip_sort.test new file mode 100644 index 00000000000..fb18ecfdd40 --- /dev/null +++ b/mysql-test/t/optimizer_disable_skip_sort.test @@ -0,0 +1,28 @@ +# Bug #113587 SELECT DISTINCT makes ORDER BY ... DESC sort data to ascending +# Test that after spilling the temporary table for deduplication to disk, the DESC order is preserved +# By limiting tmp_table_size, we force the temporary table to be spilled to disk + +--disable_warnings +SET SESSION tmp_table_size=1024; + +DROP TABLE IF EXISTS t1, t2; +CREATE TABLE t1 ( + order_id int(10) unsigned NOT NULL AUTO_INCREMENT, + order_revision_id int(10) unsigned DEFAULT NULL, + external_order_id varchar(255) DEFAULT NULL, + PRIMARY KEY (order_id), + KEY order_revision_id (order_revision_id) +) ENGINE=InnoDB AUTO_INCREMENT=555292701 DEFAULT CHARSET=utf8mb4; + +INSERT INTO t1 VALUES (1,1,'1'),(2,2,'2'),(3,3,'3'),(4,4,'4'),(5,5,'5'),(6,6,'6'),(7,7,'7'),(8,8,'8'),(9,9,'9'),(10,10,'10'); + +CREATE TABLE t2 ( + order_revision_id int(10) unsigned NOT NULL AUTO_INCREMENT, + state varchar(255) NOT NULL, + PRIMARY KEY (order_revision_id) +) ENGINE=InnoDB AUTO_INCREMENT=958993 DEFAULT CHARSET=utf8mb4; + +INSERT INTO t2 VALUES (1,'NEW'), (2,'NEW'), (3,'NEW'), (4,'NEW'), (5,'NEW'), (6,'NEW'), (7,'NEW'), (8,'NEW'), (9,'NEW'), (10,'NEW'); +--enable_warnings + +SELECT DISTINCT t1.order_id, t1.external_order_id FROM t1 INNER JOIN t2 USING (order_revision_id) ORDER BY t1.order_id DESC; \ No newline at end of file diff --git a/sql/sql_optimizer.cc b/sql/sql_optimizer.cc index 77995398c98..163e62c2403 100644 --- a/sql/sql_optimizer.cc +++ b/sql/sql_optimizer.cc @@ -1651,6 +1651,8 @@ bool JOIN::optimize_distinct_group_order() { void JOIN::test_skip_sort() { DBUG_TRACE; + if (disable_skip_sort_optimization()) return; + ASSERT_BEST_REF_IN_JOIN_ORDER(this); JOIN_TAB *const tab = best_ref[const_tables]; @@ -1721,6 +1723,30 @@ void JOIN::test_skip_sort() { } } +bool JOIN::disable_skip_sort_optimization() { + if (!select_distinct) { + return false; + } + + if (order.empty()) { + return false; + } + ORDER *ord = order.order; + bool has_desc_order = false; + while (ord) { + if (ord->direction == ORDER_DESC) { + has_desc_order = true; + break; + } + ord = ord->next; + } + if (!has_desc_order) { + return false; + } + + return true; +} + /** Test if ORDER BY is a single MATCH function(ORDER BY MATCH) and sort order is descending. diff --git a/sql/sql_optimizer.h b/sql/sql_optimizer.h index a4743509c24..d8c4d5d36ba 100644 --- a/sql/sql_optimizer.h +++ b/sql/sql_optimizer.h @@ -1030,6 +1030,43 @@ class JOIN { */ void test_skip_sort(); + /** + * @brief Test if the test_skip_sort() optimization should be disabled. + * + * @details + * This is related to Bug #113587 where if we have a query like: + * @code + * SELECT DISTINCT t1.order_id, t1.external_order_id + * FROM t1 INNER JOIN t1 USING (order_revision_id) + * ORDER BY t1.order_id DESC; + * @endcode + * The optimizer will create a temp table for deduplication, and then + * perform a sequential scan on the temp table if an index can be used + * for ordering. + * This is ok when the temp table can fit in memory (i.e. tmp_table_size + * not exceeded), because we can read from an in-memory temp table in insertion + * order. + * The problem arises when the temp table spills to disk to create an + * InnoDB table, which is always ordered by the primary key in ascending order. + * In this case, the final sequantial scan will return the rows in ascending order, + * even though the ORDER BY clause specifies descending order. + * + * To fix this, we disable the test_skip_sort() optimization if: + * 1. The query has a DISTINCT clause + * 2. The ORDER BY clause specifies DESC order + * 3. The worst-case estimated size of the temp table exceeds the tmp_table_size + * (Note: the third condition is not implemented yet, because it's difficult to + * get a good estimate at the optimization stage. Using number of rows * row size + * doesn't give a good estimate) + * + * so that a filesort will be used at the last step to sort the result in the + * correct order. + * + * @return true if the test_skip_sort() optimization should be disabled + * @return false otherwise + */ + bool disable_skip_sort_optimization(); + bool alloc_indirection_slices(); /** -- 2.39.1