commit 55468726b5e61da927f003f89a47ca62b1c6f8ce Author: Dmitry Lenev Date: Mon Jul 24 19:34:30 2023 +0200 Bug #96236 "Big intrinsic tmp InnoDB table make InnoDB engine get totally stuck". Some queries that used a big intrinsic temporary InnoDB table might have led to InnoDB SE stall (especially when InnoDB buffer pool was small). Also concurrent execution of those queries could have led to the same problem or degraded performance. The problem occured for queries that scanned through such intrinsic tables using auto-created secondary key rather than clustered key. What happened was that in such cases some of the pages belonging to intrinsic table and accessed by the query stayed fixed in buffer pool until the end of statement (and not until the end of operation on specific row as it happens for normal InnoDB tables or only while they are inspected as it happens for scans of intrinsic tables over clustered key). As result these buffer fixed pages might have consumed the whole buffer pool at some point and further statement progress was blocked. Such behavior was result of combination of the following factors: 1) operations on intrinsic temporary tables are optimized and use single mini-transaction for all row operations in the statement (while for normal tables commit of mini-transaction happens after processing of each record. 2) normally, all buffer fixed pages are unfixed from memory only at commit of mini-transaction. 3) process of scanning over a key is optimized to immediately unfix pages once the cursor leaves them. However, when scan happens over a secondary key additional lookups on the clustered key are performed. And buffer pages which are fixed by these additional lookups stay fixed until the end of mini-transaction. This fix solves the problem by ensuring that we commit mini-transaction which is used for access to initrinsic temporary table after performing operations on each 100 rows. This unfixes buffer pages touched by mini-transaction making them available for further use by other or this statements. This patch might lead to 1-3% performance reduction in some of scenarios which heavily use intrinsic temporary tables. OTOH we also have seen significant performance improvements in other scenarios where big intrinsic tables were used. So we consider this acceptable. diff --git a/mysql-test/suite/innodb/r/optimizer_temporary_table_2.result b/mysql-test/suite/innodb/r/optimizer_temporary_table_2.result new file mode 100644 index 00000000000..8e0db301901 --- /dev/null +++ b/mysql-test/suite/innodb/r/optimizer_temporary_table_2.result @@ -0,0 +1,55 @@ +# +# Test for bug#96236 "Big intrinsic tmp InnoDB table make InnoDB +# engine get totally stuck". +# +# Check that InnoDB buffer pool size is set to minimum/5Mb and thus is +# small enough to trigger original problem. If it ever changes the size +# of source table below needs to be changed proportionately. +include/assert.inc [InnoDB Buffer Pool has minimal size (5Mb).] +CREATE TABLE source (id INT PRIMARY KEY AUTO_INCREMENT, a VARCHAR(100), b VARCHAR(100), c VARCHAR(100), d VARCHAR(100), e VARCHAR(100), hdl_source_id INT, hdl_created_date DATETIME); +CREATE TABLE dest LIKE source; +# Populate source table with 64K records. +SELECT count(*) FROM source; +count(*) +65536 +# Run query that creates big intrinsic temporary table. Prior to the +# fix it has exhausted buffer pool by keeping all its pages buffer +# fixed in memory, so query stalled. +# +# Note that to trigger problematic scenario query must perform scan +# over secondary key on intrinsic temporary table +# (and auxiliary lookups to confirm record status on clustered key). +# In this case the secondary key scan fixes and unfixes pages +# through which it iterates, while pages accessed during auxiliary +# lookups on clustered key stay buffer fixed until mtr commit. +# +# In cases when intrinsic temporary table is read using clustered +# key only the problem didn't occur, as similary to previous case +# key scan fixes and unfixes pages through which it iterates, but +# there are no auxiliary lookups to buffer fix additional pages. +EXPLAIN INSERT INTO dest SELECT id,a,b,c,d,e,hdl_source_id,hdl_created_date FROM (SELECT *, ROW_NUMBER() OVER(PARTITION BY id ORDER BY hdl_source_id DESC, hdl_created_date DESC) rn FROM source) a WHERE rn=1; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 INSERT dest NULL ALL NULL NULL NULL NULL # NULL NULL +1 PRIMARY NULL ref 8 const # 100.00 NULL +2 DERIVED source NULL ALL NULL NULL NULL NULL # 100.00 Using filesort +Warnings: +Note 3598 To get information about window functions use EXPLAIN FORMAT=JSON +Note 1003 insert into `test`.`dest` /* select#1 */ select `a`.`id` AS `id`,`a`.`a` AS `a`,`a`.`b` AS `b`,`a`.`c` AS `c`,`a`.`d` AS `d`,`a`.`e` AS `e`,`a`.`hdl_source_id` AS `hdl_source_id`,`a`.`hdl_created_date` AS `hdl_created_date` from (/* select#2 */ select `test`.`source`.`id` AS `id`,`test`.`source`.`a` AS `a`,`test`.`source`.`b` AS `b`,`test`.`source`.`c` AS `c`,`test`.`source`.`d` AS `d`,`test`.`source`.`e` AS `e`,`test`.`source`.`hdl_source_id` AS `hdl_source_id`,`test`.`source`.`hdl_created_date` AS `hdl_created_date`,row_number() OVER (PARTITION BY `test`.`source`.`id` ORDER BY `test`.`source`.`hdl_source_id` desc,`test`.`source`.`hdl_created_date` desc ) AS `rn` from `test`.`source`) `a` where (`a`.`rn` = 1) +INSERT INTO dest SELECT id,a,b,c,d,e,hdl_source_id,hdl_created_date FROM (SELECT *, ROW_NUMBER() OVER(PARTITION BY id ORDER BY hdl_source_id DESC, hdl_created_date DESC) rn FROM source) a WHERE rn=1; +# Queries that used several cursors over intrinsic temporary tables +# were also affected by the same problem. Again problem was visible +# only if scan over secondary key was used. +EXPLAIN WITH qn AS (SELECT *, ROW_NUMBER() OVER(PARTITION BY id ORDER BY hdl_source_id DESC, hdl_created_date DESC) rn FROM source) SELECT (SELECT COUNT(*) FROM qn WHERE rn=1), (SELECT COUNT(*) FROM qn WHERE rn=2); +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 PRIMARY NULL NULL NULL NULL NULL NULL NULL # NULL No tables used +4 SUBQUERY NULL ref 8 const # 100.00 NULL +2 SUBQUERY NULL ref 8 const # 100.00 NULL +3 DERIVED source NULL ALL NULL NULL NULL NULL # 100.00 Using filesort +Warnings: +Note 3598 To get information about window functions use EXPLAIN FORMAT=JSON +Note 1003 with `qn` as (/* select#3 */ select `test`.`source`.`id` AS `id`,`test`.`source`.`a` AS `a`,`test`.`source`.`b` AS `b`,`test`.`source`.`c` AS `c`,`test`.`source`.`d` AS `d`,`test`.`source`.`e` AS `e`,`test`.`source`.`hdl_source_id` AS `hdl_source_id`,`test`.`source`.`hdl_created_date` AS `hdl_created_date`,row_number() OVER (PARTITION BY `test`.`source`.`id` ORDER BY `test`.`source`.`hdl_source_id` desc,`test`.`source`.`hdl_created_date` desc ) AS `rn` from `test`.`source`) /* select#1 */ select (/* select#2 */ select count(0) from `qn` where (`qn`.`rn` = 1)) AS `(SELECT COUNT(*) FROM qn WHERE rn=1)`,(/* select#4 */ select count(0) from `qn` where (`qn`.`rn` = 2)) AS `(SELECT COUNT(*) FROM qn WHERE rn=2)` +WITH qn AS (SELECT *, ROW_NUMBER() OVER(PARTITION BY id ORDER BY hdl_source_id DESC, hdl_created_date DESC) rn FROM source) SELECT (SELECT COUNT(*) FROM qn WHERE rn=1), (SELECT COUNT(*) FROM qn WHERE rn=2); +(SELECT COUNT(*) FROM qn WHERE rn=1) (SELECT COUNT(*) FROM qn WHERE rn=2) +65536 0 +# Clean-up. +DROP TABLES dest, source; diff --git a/mysql-test/suite/innodb/t/optimizer_temporary_table_2-master.opt b/mysql-test/suite/innodb/t/optimizer_temporary_table_2-master.opt new file mode 100644 index 00000000000..67c8423cf2a --- /dev/null +++ b/mysql-test/suite/innodb/t/optimizer_temporary_table_2-master.opt @@ -0,0 +1 @@ +--innodb_buffer_pool_size=5M diff --git a/mysql-test/suite/innodb/t/optimizer_temporary_table_2.test b/mysql-test/suite/innodb/t/optimizer_temporary_table_2.test new file mode 100644 index 00000000000..f333cd4a24c --- /dev/null +++ b/mysql-test/suite/innodb/t/optimizer_temporary_table_2.test @@ -0,0 +1,58 @@ +--echo # +--echo # Test for bug#96236 "Big intrinsic tmp InnoDB table make InnoDB +--echo # engine get totally stuck". +--echo # + +--echo # Check that InnoDB buffer pool size is set to minimum/5Mb and thus is +--echo # small enough to trigger original problem. If it ever changes the size +--echo # of source table below needs to be changed proportionately. +--let $innodb_buffer_pool_size = `SELECT @@global.innodb_buffer_pool_size` +--let $assert_text = InnoDB Buffer Pool has minimal size (5Mb). +--let $assert_cond = $innodb_buffer_pool_size = 5242880 +--source include/assert.inc + +CREATE TABLE source (id INT PRIMARY KEY AUTO_INCREMENT, a VARCHAR(100), b VARCHAR(100), c VARCHAR(100), d VARCHAR(100), e VARCHAR(100), hdl_source_id INT, hdl_created_date DATETIME); +CREATE TABLE dest LIKE source; + +--echo # Populate source table with 64K records. +--disable_query_log +INSERT INTO source (a, b, c, d, e, hdl_source_id, hdl_created_date) values ('aaaaaaaaa', 'bbbbbbbbbb', 'cccccccccccccccc', 'dddddddddddddddddddddddddddd', 'eeeeeeeeeeeeeeeeee', 1, NOW()); +--let $i= 0 +while ($i < 16) +{ + INSERT INTO source (a, b, c, d, e, hdl_source_id, hdl_created_date) SELECT a, b, c, d, e, hdl_source_id + 1, NOW() FROM source; + --inc $i +} +--enable_query_log +SELECT count(*) FROM source; + +--echo # Run query that creates big intrinsic temporary table. Prior to the +--echo # fix it has exhausted buffer pool by keeping all its pages buffer +--echo # fixed in memory, so query stalled. +--echo # +--echo # Note that to trigger problematic scenario query must perform scan +--echo # over secondary key on intrinsic temporary table +--echo # (and auxiliary lookups to confirm record status on clustered key). +--echo # In this case the secondary key scan fixes and unfixes pages +--echo # through which it iterates, while pages accessed during auxiliary +--echo # lookups on clustered key stay buffer fixed until mtr commit. +--echo # +--echo # In cases when intrinsic temporary table is read using clustered +--echo # key only the problem didn't occur, as similary to previous case +--echo # key scan fixes and unfixes pages through which it iterates, but +--echo # there are no auxiliary lookups to buffer fix additional pages. +--let $query = INSERT INTO dest SELECT id,a,b,c,d,e,hdl_source_id,hdl_created_date FROM (SELECT *, ROW_NUMBER() OVER(PARTITION BY id ORDER BY hdl_source_id DESC, hdl_created_date DESC) rn FROM source) a WHERE rn=1 +--replace_column 10 # +--eval EXPLAIN $query +--eval $query + +--echo # Queries that used several cursors over intrinsic temporary tables +--echo # were also affected by the same problem. Again problem was visible +--echo # only if scan over secondary key was used. +--let $query = WITH qn AS (SELECT *, ROW_NUMBER() OVER(PARTITION BY id ORDER BY hdl_source_id DESC, hdl_created_date DESC) rn FROM source) SELECT (SELECT COUNT(*) FROM qn WHERE rn=1), (SELECT COUNT(*) FROM qn WHERE rn=2) +--replace_column 10 # +--eval EXPLAIN $query +--eval $query + +--echo # Clean-up. +DROP TABLES dest, source; diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 02502a1cdae..6d8e83fd4b5 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -984,14 +984,20 @@ struct rec_cache_t { size_t nullable_cols{0}; }; +/** Maximum number of records we insert or select from intrinsic table +before committing mtr. */ +constexpr uint32_t MAX_INTRINSIC_MTR_RECORDS = 100; + /** Cache position of last inserted or selected record by caching record and holding reference to the block where record resides. -Note: We don't commit mtr and hold it beyond a transaction lifetime as this is -a special case (intrinsic table) that are not shared across connection. */ +Note: We don't commit mtr (unless mtr_records reaches MAX_INTRINSIC_MTR_RECORDS +limit) and hold it beyond a transaction lifetime as this is a special case +(intrinsic table) that are not shared across connection. */ class last_ops_cur_t { public: /** Constructor */ - last_ops_cur_t() : rec(), block(), mtr(), disable_caching(), invalid() { + last_ops_cur_t() + : rec(), block(), mtr(), disable_caching(), invalid(), mtr_records(0) { /* Do Nothing. */ } @@ -1003,6 +1009,7 @@ class last_ops_cur_t { rec = nullptr; block = nullptr; invalid = false; + mtr_records = 0; } public: @@ -1022,6 +1029,11 @@ class last_ops_cur_t { split then invalidate the cached position as it would be no more remain valid. Will be re-cached on post-split insert. */ bool invalid; + + /** Number of records which were inserted or selected into from + intrinsic table within this mtr. Needed to limit number of intrinsic + table records inserted/selected within single mtr. */ + uint32_t mtr_records; }; /** "GEN_CLUST_INDEX" is the name reserved for InnoDB default diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index d4d8dcb0507..fe97bb9f2aa 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2626,9 +2626,15 @@ static dberr_t row_ins_sorted_clust_index_entry(ulint mode, dict_index_t *index, mtr = &index->last_ins_cur->mtr; /* Search for position if tree needs to be split or if last position - is not cached. */ + is not cached. Also commit mtr and do the search if we have inserted + more than MAX_INTRINSIC_MTR_RECORDS within this mtr. The latter is + done to unfix buffer pages used by the mtr. Otherwise statements + (even single statement in extreme case) using intrinsic tables can + consume the whole buffer pool by their buffer fixed pages causing + performance problems or even stalls. */ if (mode == BTR_MODIFY_TREE || index->last_ins_cur->rec == nullptr || - index->last_ins_cur->disable_caching) { + index->last_ins_cur->disable_caching || + index->last_ins_cur->mtr_records > MAX_INTRINSIC_MTR_RECORDS) { /* Commit the previous mtr. */ index->last_ins_cur->release(); @@ -2700,6 +2706,7 @@ static dberr_t row_ins_sorted_clust_index_entry(ulint mode, dict_index_t *index, index->last_ins_cur->rec = insert_rec; index->last_ins_cur->block = cursor.page_cur.block; + index->last_ins_cur->mtr_records++; } else { index->last_ins_cur->release(); } diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index b3917453c6f..f7701e5a5c1 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -3966,7 +3966,8 @@ dberr_t row_search_no_mvcc(byte *buf, page_cur_mode_t mode, If search key is specified, cursor is open using the key else cursor is open to return all the records. */ if (direction != 0) { - if (prebuilt->m_temp_read_shared && !prebuilt->m_temp_tree_modified) { + if (prebuilt->m_temp_read_shared && !prebuilt->m_temp_tree_modified && + !(index->last_sel_cur->mtr_records > MAX_INTRINSIC_MTR_RECORDS)) { if (!mtr->is_active()) { mtr_start(mtr); @@ -3992,10 +3993,17 @@ dberr_t row_search_no_mvcc(byte *buf, page_cur_mode_t mode, return (err); /* purecov: inspected */ } - } else if (index->last_sel_cur->invalid || prebuilt->m_temp_tree_modified) { + } else if (index->last_sel_cur->invalid || prebuilt->m_temp_tree_modified || + index->last_sel_cur->mtr_records > MAX_INTRINSIC_MTR_RECORDS) { block_relocated: /* Index tree has changed and so active cached cursor is no more valid. - Re-set it based on the last selected position. */ + Re-set it based on the last selected position. + Also commit mtr and reset the cursor if we have selected more than + MAX_INTRINSIC_MTR_RECORDS within this mtr. The latter is done to unfix + buffer pages used by the mtr. Otherwise statements (even single + statement in extreme case) using intrinsic tables can consume the whole + buffer pool by their buffer fixed pages causing performance problems + or even stalls. */ index->last_sel_cur->release(); prebuilt->m_temp_tree_modified = false; @@ -4143,6 +4151,7 @@ dberr_t row_search_no_mvcc(byte *buf, page_cur_mode_t mode, ut_ad(err == DB_SUCCESS); index->last_sel_cur->rec = pcur->get_rec(); index->last_sel_cur->block = pcur->get_block(); + index->last_sel_cur->mtr_records++; /* This is needed in order to restore the cursor if index structure changes while SELECT is still active. */