Description:
When use temptable engine to build tmp table, it may lead to performance degradation for IN-Subs.
How to repeat:
Repeat as follows:
```
create table trade (
`trade_id` bigint NOT NULL AUTO_INCREMENT,
`seller_id` bigint DEFAULT NULL,
`create_time` datetime DEFAULT NULL,
`pay_time` datetime DEFAULT NULL,
`doudian_open_id` varchar(256) DEFAULT NULL,
PRIMARY KEY (`trade_id`),
KEY `pay_time` (`pay_time`),
KEY `seller_id` (`seller_id`,`create_time`),
KEY `idx_doudian_open_id` (`doudian_open_id`));
-- t1_in_range:
CREATE TABLE t1_in_range like trade;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:26', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2023-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
INSERT INTO t1_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_in_range;
--t1_not_in_range:
CREATE TABLE t1_not_in_range like trade;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:26', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:29:27', '2023-11-15 20:29:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) VALUES (6656, '2022-11-15 20:30:26', '2023-11-15 20:30:30', CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)));
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
INSERT INTO t1_not_in_range (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, CONCAT('1@#xC8', substring(md5(rand()), 1, 72), substring(md5(rand()), 1, 72)) from t1_not_in_range;
create table trade_000001(
`trade_id` bigint NOT NULL AUTO_INCREMENT,
`seller_id` bigint DEFAULT NULL,
`create_time` datetime DEFAULT NULL,
`pay_time` datetime DEFAULT NULL,
`doudian_open_id` varchar(256) DEFAULT NULL,
PRIMARY KEY (`trade_id`),
KEY `pay_time` (`pay_time`),
KEY `seller_id` (`seller_id`,`create_time`),
KEY `idx_doudian_open_id` (`doudian_open_id`));
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_in_range;
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_in_range;
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_not_in_range;
INSERT INTO trade_000001 (seller_id, create_time, pay_time, doudian_open_id) SELECT seller_id, create_time, pay_time, doudian_open_id from t1_not_in_range;
create table shop_buyer_000001(
`shop_buyer_id` bigint NOT NULL AUTO_INCREMENT,
`seller_id` bigint DEFAULT NULL,
`buyer_frist_pay_time` datetime DEFAULT NULL,
`doudian_open_id` varchar(256) DEFAULT NULL,
PRIMARY KEY (`shop_buyer_id`),
KEY `idx_doudian_open_id` (`doudian_open_id`)
);
INSERT INTO shop_buyer_000001 (seller_id, buyer_frist_pay_time, doudian_open_id) SELECT seller_id, pay_time, doudian_open_id from t1_in_range;
INSERT INTO shop_buyer_000001 (seller_id, buyer_frist_pay_time, doudian_open_id) SELECT seller_id, create_time, doudian_open_id from t1_not_in_range;
mysql> show variables like '%tmp%';
+---------------------------------+----------------------------+
| Variable_name | Value |
+---------------------------------+----------------------------+
| default_tmp_storage_engine | InnoDB |
| innodb_tmpdir | |
| internal_tmp_mem_storage_engine | TempTable |
| replica_load_tmpdir | /home/xychen/tools/run/tmp |
| slave_load_tmpdir | /home/xychen/tools/run/tmp |
| tmp_table_size | 1073741824 |
| tmpdir | /home/xychen/tools/run/tmp |
+---------------------------------+----------------------------+
7 rows in set (0.00 sec)
mysql>
mysql> set tmp_table_size = 64 * 16777216;
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT
-> count( DISTINCT doudian_open_id )
-> FROM
-> (
-> SELECT
-> doudian_open_id
-> FROM
-> (
-> (
-> SELECT
-> buyer.doudian_open_id AS doudian_open_id
-> FROM
-> trade_000001 trade,
-> shop_buyer_000001 buyer
-> WHERE
-> trade.doudian_open_id = buyer.doudian_open_id
-> AND trade.seller_id = 6656
-> AND buyer.seller_id = 6656
-> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00'
-> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN (
-> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14'
-> GROUP BY
-> trade.doudian_open_id
-> HAVING
-> count( 1 ) >= 1
-> )
-> )
-> ) x2
-> ) tmp;
+-----------------------------------+
| count( DISTINCT doudian_open_id ) |
+-----------------------------------+
| 524288 |
+-----------------------------------+
1 row in set (5 min 24.07 sec)
mysql> set internal_tmp_mem_storage_engine = MEMORY;
Query OK, 0 rows affected (0.00 sec)
mysql>
mysql> SELECT
-> count( DISTINCT doudian_open_id )
-> FROM
-> (
-> SELECT
-> doudian_open_id
-> FROM
-> (
-> (
-> SELECT
-> buyer.doudian_open_id AS doudian_open_id
-> FROM
-> trade_000001 trade,
-> shop_buyer_000001 buyer
-> WHERE
-> trade.doudian_open_id = buyer.doudian_open_id
-> AND trade.seller_id = 6656
-> AND buyer.seller_id = 6656
-> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00'
-> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN (
-> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14'
-> GROUP BY
-> trade.doudian_open_id
-> HAVING
-> count( 1 ) >= 1
-> )
-> )
-> ) x2
-> ) tmp;
+-----------------------------------+
| count( DISTINCT doudian_open_id ) |
+-----------------------------------+
| 524288 |
+-----------------------------------+
1 row in set (1 min 8.43 sec)
```
Suggested fix:
Cleary, the MEMORY (ie., ha_heap) engine is faster than Temptable engine. However, with the perf data we find that "memset" wastes too may cpus.
```
+ 95.11% 0.00% connection mysqld [.] mysql_execute_command
+ 95.11% 0.00% connection mysqld [.] Sql_cmd_dml::execute
+ 95.06% 0.00% connection mysqld [.] Query_expression::execute
+ 95.06% 0.00% connection mysqld [.] Query_expression::ExecuteIteratorQuery
+ 94.85% 0.01% connection mysqld [.] AggregateIterator::Read
+ 93.81% 0.01% connection mysqld [.] NestedLoopIterator::Read
+ 93.79% 0.01% connection mysqld [.] FilterIterator::Read
+ 91.57% 0.02% connection mysqld [.] Item_cond_and::val_int
+ 91.57% 0.02% connection mysqld [.] Item::val_bool
+ 91.49% 0.00% connection mysqld [.] Item_in_optimizer::val_int
+ 91.47% 0.01% connection mysqld [.] Item_in_subselect::val_bool_naked
+ 91.30% 0.01% connection mysqld [.] Item_subselect::exec
+ 91.29% 0.00% connection mysqld [.] subselect_hash_sj_engine::exec
+ 91.28% 0.02% connection mysqld [.] ExecuteExistsQuery
+ 90.82% 0.01% connection mysqld [.] TABLE::empty_result_table
+ 90.81% 0.00% connection mysqld [.] Query_expression::ClearForExecution
+ 90.80% 0.01% connection mysqld [.] temptable::Handler::delete_all_rows
+ 90.79% 0.01% connection mysqld [.] JOIN::reset
+ 90.72% 90.72% connection libc-2.24.so [.] __memset_avx2_erms
+ 2.32% 0.03% connection mysqld [.] RefIterator<false>::Read
+ 1.92% 0.12% connection mysqld [.] row_search_mvcc
+ 1.87% 0.01% connection mysqld [.] handler::ha_index_read_map
+ 1.65% 0.01% connection mysqld [.] ha_innobase::index_read
```
Check the source code, we find that
```
inline void Table::truncate() {
if (!m_all_columns_are_fixed_size) {
for (auto element : m_rows) {
Row *row = static_cast<Row *>(element);
row->~Row();
}
}
m_rows.clear();
/* Truncate indexes even if `m_indexes_are_enabled` is false. Somebody may
* use truncate() before enabling indexes and we don't want an empty m_rows
* with some stale data inside the indexes. */
for (auto &entry : m_index_entries) {
entry.m_index->truncate();
}
}
void Hash_unique::truncate() { m_hash_table.clear(); }
```
m_hash_table.clear() is called too many times.
Just fix:
```
diff --git a/storage/temptable/src/index.cc b/storage/temptable/src/index.cc
index a39e3d4f8..20455a25d 100644
--- a/storage/temptable/src/index.cc
+++ b/storage/temptable/src/index.cc
@@ -264,7 +264,9 @@ void Hash_unique::erase(const Cursor &target) {
m_hash_table.erase(target.hash_iterator());
}
-void Hash_unique::truncate() { m_hash_table.clear(); }
+void Hash_unique::truncate() {
+ if (!m_hash_table.empty()) m_hash_table.clear();
+}
```
With this patch, the execution time is reduced to 17s.
```
mysql> SELECT
-> count( DISTINCT doudian_open_id )
-> FROM
-> (
-> SELECT
-> doudian_open_id
-> FROM
-> (
-> (
-> SELECT
-> buyer.doudian_open_id AS doudian_open_id
-> FROM
-> trade_000001 trade,
-> shop_buyer_000001 buyer
-> WHERE
-> trade.doudian_open_id = buyer.doudian_open_id
-> AND trade.seller_id = 6656
-> AND buyer.seller_id = 6656
-> AND buyer.buyer_frist_pay_time >= '2023-01-01 00:00:00'
-> AND buyer.buyer_frist_pay_time <= '2023-11-30 23:59:59' AND trade.doudian_open_id IN (
-> SELECT doudian_open_id FROM trade_000001 trade WHERE seller_id = 6656 AND trade.pay_time >= '2022-11-14'
-> GROUP BY
-> trade.doudian_open_id
-> HAVING
-> count( 1 ) >= 1
-> )
-> )
-> ) x2
-> ) tmp;
+-----------------------------------+
| count( DISTINCT doudian_open_id ) |
+-----------------------------------+
| 524288 |
+-----------------------------------+
1 row in set (17.16s)
```