From 400f4f3bf4ebc282de84a40ea3b5b58f904c3073 Mon Sep 17 00:00:00 2001 From: Davi Arnaut Date: Fri, 21 Dec 2012 14:14:07 -0800 Subject: [PATCH] InnoDB wastes 62 of every 16,384 pages in XDES/IBUF_BITMAP extent The problem is that once the segments of a tablespace are bigger than 32 pages, fragment pages are no longer allocated for use, yet they are still reserved whenever a new fragment extent is allocated. This is a direct consequence of mainly two facts: whenever a new descriptor page is needed (every 16384 pages), the extent that contains the descriptor page cannot be assigned to a segment and is instead used as a fragment extent; and a segment can only allocate up to 32 fragment pages since the array used to track fragment pages belonging to a segment is limited to 32 entries per segment. The solution is to allow for fragment extents to be leased to segments whenever there are free fragment extents available. A fragment extent is considered available if the only used pages in the extent are the extent descriptor and ibuf bitmap pages. A new extent state is used to tag leased extents and to ensure that they are returned to the space free fragment list once no longer being used by a segment. Additionally, a new system variable named innodb_lease_fragment_extents is introduced to control whether free fragment extents are allocated to segments. This is an incompatible change. Once a fragment extent is allocated to a segment, the table that contains the segment is no longer compatible with earlier MySQL versions. Also, the two reserved pages per leased extent are counted towards the size of the segment. This means that index and table size information provided by statements such as SHOW TABLE STATUS will include the size of the two additional pages per leased extent. --- client/mysqltest.cc | 6 + mysql-test/include/have_innodb_ruby.inc | 11 + .../suite/innodb/r/innodb_fragment_extent.result | 60 +++++ .../suite/innodb/t/innodb_fragment_extent.test | 81 ++++++ .../r/innodb_lease_fragment_extents_basic.result | 92 +++++++ .../t/innodb_lease_fragment_extents_basic.test | 65 +++++ storage/innobase/fsp/fsp0fsp.c | 275 +++++++++++++++++---- storage/innobase/handler/ha_innodb.cc | 6 + storage/innobase/include/srv0srv.h | 1 + storage/innobase/srv/srv0srv.c | 4 + 10 files changed, 549 insertions(+), 52 deletions(-) create mode 100644 mysql-test/include/have_innodb_ruby.inc create mode 100644 mysql-test/suite/innodb/r/innodb_fragment_extent.result create mode 100644 mysql-test/suite/innodb/t/innodb_fragment_extent.test create mode 100644 mysql-test/suite/sys_vars/r/innodb_lease_fragment_extents_basic.result create mode 100644 mysql-test/suite/sys_vars/t/innodb_lease_fragment_extents_basic.test diff --git a/client/mysqltest.cc b/client/mysqltest.cc index 7bb0da8..82e7489 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -3052,6 +3052,8 @@ void do_exec(struct st_command *command) command->first_argument, ds_res.str); } + var_set_int("$exec_exit_status", status); + DBUG_PRINT("info", ("error: %d, status: %d", error, status)); @@ -3078,6 +3080,10 @@ void do_exec(struct st_command *command) die("command \"%s\" succeeded - should have failed with errno %d...", command->first_argument, command->expected_errors.err[0].code.errnum); } + else + { + var_set_int("$exec_exit_status", error); + } dynstr_free(&ds_cmd); DBUG_VOID_RETURN; diff --git a/mysql-test/include/have_innodb_ruby.inc b/mysql-test/include/have_innodb_ruby.inc new file mode 100644 index 0000000..71bfd59 --- /dev/null +++ b/mysql-test/include/have_innodb_ruby.inc @@ -0,0 +1,11 @@ +disable_query_log; +disable_result_log; +disable_abort_on_error; +exec innodb_space --help; +enable_abort_on_error; +if ($exec_exit_status != 0) +{ + skip Test requires innodb_ruby; +} +enable_result_log; +enable_query_log; diff --git a/mysql-test/suite/innodb/r/innodb_fragment_extent.result b/mysql-test/suite/innodb/r/innodb_fragment_extent.result new file mode 100644 index 0000000..ec407d0 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_fragment_extent.result @@ -0,0 +1,60 @@ +# +# Test that fragment extents can be fully used. +# + +# Setup. +SET @old_innodb_file_per_table = @@GLOBAL.innodb_file_per_table; +SET @old_innodb_lease_fragment_extents = @@GLOBAL.innodb_lease_fragment_extents; +SET GLOBAL innodb_file_per_table = ON; +SET GLOBAL innodb_lease_fragment_extents = ON; +CREATE VIEW buffer_pool AS +SELECT CONVERT(VARIABLE_VALUE, UNSIGNED) AS PAGES_DIRTY FROM +INFORMATION_SCHEMA.GLOBAL_STATUS WHERE +VARIABLE_NAME = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; +CREATE TABLE t1 (a BIGINT PRIMARY KEY, b VARCHAR(1024), c VARCHAR(1024)) +ENGINE=InnoDB; +CREATE PROCEDURE p1(k BIGINT, c BIGINT) +BEGIN +SET autocommit = OFF; +WHILE c > 0 DO +INSERT INTO t1 VALUES (k, REPEAT('b', 1024), REPEAT('c', 1024)); +IF (k MOD 1024 = 0) THEN COMMIT; END IF; +SET k = k + 1; +SET c = c - 1; +END WHILE; +SET autocommit = ON; +END| +# Populate table so that more than 16384 pages are used. +CALL p1(0, 130000); +# The free_frag list should now only contain one extent. +start_page bitmap +0 ##########################################################...... +# Show that the fragment extent was leased and its pages used. +start end count type +0 0 1 FSP_HDR +1 1 1 IBUF_BITMAP +2 2 1 INODE +3 57 55 INDEX +58 63 6 ALLOCATED +64 16383 16320 INDEX +16384 16384 1 XDES +16385 16385 1 IBUF_BITMAP +16386 18605 2220 INDEX +18606 19455 850 ALLOCATED +# Delete rows so that the fragment extent becomes unused. +DELETE FROM t1 WHERE a >= 114400; +# Show that the fragment extent is returned to the free_frag list. +start_page bitmap +0 ########################################################........ +16384 ##.............................................................. +# Repopulate table. +CALL p1(114400, 15600); +# Show that the fragment extent can be reused. +start_page bitmap +0 ##########################################################...... +# Cleanup. +DROP TABLE t1; +DROP PROCEDURE p1; +DROP VIEW buffer_pool; +SET @@GLOBAL.innodb_file_per_table = @old_innodb_file_per_table; +SET @@GLOBAL.innodb_lease_fragment_extents = @old_innodb_lease_fragment_extents; diff --git a/mysql-test/suite/innodb/t/innodb_fragment_extent.test b/mysql-test/suite/innodb/t/innodb_fragment_extent.test new file mode 100644 index 0000000..7c2e338 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_fragment_extent.test @@ -0,0 +1,81 @@ +--source include/have_innodb.inc +--source include/have_innodb_ruby.inc + +--echo # +--echo # Test that fragment extents can be fully used. +--echo # +--echo + +--echo # Setup. +SET @old_innodb_file_per_table = @@GLOBAL.innodb_file_per_table; +SET @old_innodb_lease_fragment_extents = @@GLOBAL.innodb_lease_fragment_extents; + +let $MYSQLD_DATADIR= `select @@datadir`; + +SET GLOBAL innodb_file_per_table = ON; +SET GLOBAL innodb_lease_fragment_extents = ON; + +CREATE VIEW buffer_pool AS + SELECT CONVERT(VARIABLE_VALUE, UNSIGNED) AS PAGES_DIRTY FROM + INFORMATION_SCHEMA.GLOBAL_STATUS WHERE + VARIABLE_NAME = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; + +CREATE TABLE t1 (a BIGINT PRIMARY KEY, b VARCHAR(1024), c VARCHAR(1024)) + ENGINE=InnoDB; + +delimiter |; + +CREATE PROCEDURE p1(k BIGINT, c BIGINT) +BEGIN + SET autocommit = OFF; + WHILE c > 0 DO + INSERT INTO t1 VALUES (k, REPEAT('b', 1024), REPEAT('c', 1024)); + IF (k MOD 1024 = 0) THEN COMMIT; END IF; + SET k = k + 1; + SET c = c - 1; + END WHILE; + SET autocommit = ON; +END| + +delimiter ;| + +--echo # Populate table so that more than 16384 pages are used. +CALL p1(0, 130000); + +# Wait for dirty pages to be flushed to disk. +let $wait_condition = SELECT PAGES_DIRTY = 0 FROM buffer_pool; +--source include/wait_condition.inc + +--echo # The free_frag list should now only contain one extent. +--exec innodb_space -f $MYSQLD_DATADIR/test/t1.ibd list-summary -L free_frag + +--echo # Show that the fragment extent was leased and its pages used. +--exec innodb_space -f $MYSQLD_DATADIR/test/t1.ibd space-page-type-regions + +--echo # Delete rows so that the fragment extent becomes unused. +DELETE FROM t1 WHERE a >= 114400; + +# Wait for dirty pages to be flushed to disk. +let $wait_condition = SELECT PAGES_DIRTY = 0 FROM buffer_pool; +--source include/wait_condition.inc + +--echo # Show that the fragment extent is returned to the free_frag list. +--exec innodb_space -f $MYSQLD_DATADIR/test/t1.ibd list-summary -L free_frag + +--echo # Repopulate table. +CALL p1(114400, 15600); + +# Wait for dirty pages to be flushed to disk. +let $wait_condition = SELECT PAGES_DIRTY = 0 FROM buffer_pool; +--source include/wait_condition.inc + +--echo # Show that the fragment extent can be reused. +--exec innodb_space -f $MYSQLD_DATADIR/test/t1.ibd list-summary -L free_frag + +--echo # Cleanup. +DROP TABLE t1; +DROP PROCEDURE p1; +DROP VIEW buffer_pool; + +SET @@GLOBAL.innodb_file_per_table = @old_innodb_file_per_table; +SET @@GLOBAL.innodb_lease_fragment_extents = @old_innodb_lease_fragment_extents; diff --git a/mysql-test/suite/sys_vars/r/innodb_lease_fragment_extents_basic.result b/mysql-test/suite/sys_vars/r/innodb_lease_fragment_extents_basic.result new file mode 100644 index 0000000..bfe9170 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_lease_fragment_extents_basic.result @@ -0,0 +1,92 @@ +SET @start_global_value = @@global.innodb_lease_fragment_extents; +SELECT @start_global_value; +@start_global_value +0 +Valid values are 'ON' and 'OFF' +SELECT @@GLOBAL.innodb_lease_fragment_extents IN (0, 1); +@@GLOBAL.innodb_lease_fragment_extents IN (0, 1) +1 +SELECT @@GLOBAL.innodb_lease_fragment_extents; +@@GLOBAL.innodb_lease_fragment_extents +0 +SELECT @@SESSION.innodb_lease_fragment_extents; +ERROR HY000: Variable 'innodb_lease_fragment_extents' is a GLOBAL variable +SHOW GLOBAL VARIABLES LIKE 'innodb_lease_fragment_extents'; +Variable_name Value +innodb_lease_fragment_extents OFF +SHOW SESSION VARIABLES LIKE 'innodb_lease_fragment_extents'; +Variable_name Value +innodb_lease_fragment_extents OFF +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS OFF +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS OFF +SET GLOBAL innodb_lease_fragment_extents='OFF'; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +@@GLOBAL.innodb_lease_fragment_extents +0 +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS OFF +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS OFF +SET @@GLOBAL.innodb_lease_fragment_extents=1; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +@@GLOBAL.innodb_lease_fragment_extents +1 +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS ON +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS ON +SET GLOBAL innodb_lease_fragment_extents=0; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +@@GLOBAL.innodb_lease_fragment_extents +0 +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS OFF +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS OFF +SET @@GLOBAL.innodb_lease_fragment_extents='ON'; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +@@GLOBAL.innodb_lease_fragment_extents +1 +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS ON +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS ON +SET SESSION innodb_lease_fragment_extents='OFF'; +ERROR HY000: Variable 'innodb_lease_fragment_extents' is a GLOBAL variable and should be set with SET GLOBAL +SET @@SESSION.innodb_lease_fragment_extents='ON'; +ERROR HY000: Variable 'innodb_lease_fragment_extents' is a GLOBAL variable and should be set with SET GLOBAL +SET GLOBAL innodb_lease_fragment_extents=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_lease_fragment_extents' +SET GLOBAL innodb_lease_fragment_extents=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_lease_fragment_extents' +SET GLOBAL innodb_lease_fragment_extents=2; +ERROR 42000: Variable 'innodb_lease_fragment_extents' can't be set to the value of '2' +NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643) +SET GLOBAL innodb_lease_fragment_extents=-3; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +@@GLOBAL.innodb_lease_fragment_extents +1 +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS ON +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_LEASE_FRAGMENT_EXTENTS ON +SET GLOBAL innodb_lease_fragment_extents='AUTO'; +ERROR 42000: Variable 'innodb_lease_fragment_extents' can't be set to the value of 'AUTO' +SET @@GLOBAL.innodb_lease_fragment_extents = @start_global_value; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +@@GLOBAL.innodb_lease_fragment_extents +0 diff --git a/mysql-test/suite/sys_vars/t/innodb_lease_fragment_extents_basic.test b/mysql-test/suite/sys_vars/t/innodb_lease_fragment_extents_basic.test new file mode 100644 index 0000000..e1ff5d5 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_lease_fragment_extents_basic.test @@ -0,0 +1,65 @@ +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_lease_fragment_extents; +SELECT @start_global_value; + +# +# Global scope only +# +--echo Valid values are 'ON' and 'OFF' +SELECT @@GLOBAL.innodb_lease_fragment_extents IN (0, 1); +SELECT @@GLOBAL.innodb_lease_fragment_extents; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@SESSION.innodb_lease_fragment_extents; +SHOW GLOBAL VARIABLES LIKE 'innodb_lease_fragment_extents'; +SHOW SESSION VARIABLES LIKE 'innodb_lease_fragment_extents'; +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; + +# +# Read-write variable +# +SET GLOBAL innodb_lease_fragment_extents='OFF'; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +SET @@GLOBAL.innodb_lease_fragment_extents=1; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +SET GLOBAL innodb_lease_fragment_extents=0; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +SET @@GLOBAL.innodb_lease_fragment_extents='ON'; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +--error ER_GLOBAL_VARIABLE +SET SESSION innodb_lease_fragment_extents='OFF'; +--error ER_GLOBAL_VARIABLE +SET @@SESSION.innodb_lease_fragment_extents='ON'; + +# +# Boolean type. +# +--error ER_WRONG_TYPE_FOR_VAR +SET GLOBAL innodb_lease_fragment_extents=1.1; +--error ER_WRONG_TYPE_FOR_VAR +SET GLOBAL innodb_lease_fragment_extents=1e1; +--error ER_WRONG_VALUE_FOR_VAR +SET GLOBAL innodb_lease_fragment_extents=2; +--echo NOTE: The following should fail with ER_WRONG_VALUE_FOR_VAR (BUG#50643) +SET GLOBAL innodb_lease_fragment_extents=-3; +SELECT @@GLOBAL.innodb_lease_fragment_extents; +SELECT * FROM information_schema.global_variables WHERE variable_name='innodb_lease_fragment_extents'; +SELECT * FROM information_schema.session_variables WHERE variable_name='innodb_lease_fragment_extents'; +--error ER_WRONG_VALUE_FOR_VAR +SET GLOBAL innodb_lease_fragment_extents='AUTO'; + +# +# Cleanup +# + +SET @@GLOBAL.innodb_lease_fragment_extents = @start_global_value; +SELECT @@GLOBAL.innodb_lease_fragment_extents; diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c index 72d1559..fd21e61 100644 --- a/storage/innobase/fsp/fsp0fsp.c +++ b/storage/innobase/fsp/fsp0fsp.c @@ -151,6 +151,8 @@ the extent are free and which contain old tuple version to clean. */ #define XDES_FULL_FRAG 3 /* extent is in full fragment list of space */ #define XDES_FSEG 4 /* extent belongs to a segment */ +#define XDES_FSEG_FRAG 5 /* fragment extent belongs to a + segment */ /* File extent data structure size in bytes. */ #define XDES_SIZE \ @@ -159,6 +161,9 @@ the extent are free and which contain old tuple version to clean. */ /* Offset of the descriptor array on a descriptor page */ #define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE) +/* The number of reserved pages in a fragment extent. */ +#define XDES_FRAG_N_USED 2 + #ifndef UNIV_HOTBACKUP /* Flag to indicate if we have printed the tablespace full error. */ static ibool fsp_tbs_full_error_printed = FALSE; @@ -242,6 +247,15 @@ fsp_fill_free_list( mtr_t* mtr) /*!< in/out: mini-transaction */ __attribute__((nonnull)); /**********************************************************************//** +Initializes a fragment extent and puts it into the free fragment list. */ +static +void +fsp_init_xdes_frag( +/*===============*/ + fsp_header_t* header, /*!< in/out: tablespace header */ + xdes_t* descr, /*!< in/out: extent descriptor */ + mtr_t* mtr); /*!< in/out: mini-transaction */ +/**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. @@ -528,7 +542,7 @@ xdes_set_state( { ut_ad(descr && mtr); ut_ad(state >= XDES_FREE); - ut_ad(state <= XDES_FSEG); + ut_ad(state <= XDES_FSEG_FRAG); ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr); @@ -550,11 +564,31 @@ xdes_get_state( ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr); - ut_ad(state - 1 < XDES_FSEG); + ut_ad(state - 1 < XDES_FSEG_FRAG); return(state); } /**********************************************************************//** +Returns true if extent belongs to a given segment. +@return TRUE if extent is part of the segment, FALSE otherwise */ +UNIV_INLINE +ibool +xdes_in_segment( +/*============*/ + const xdes_t* descr, /*!< in: descriptor */ + ib_id_t seg_id, /*!< in: segment id */ + mtr_t* mtr) /*!< in: mtr */ +{ + ulint state = xdes_get_state(descr, mtr); + + if (state != XDES_FSEG && state != XDES_FSEG_FRAG) { + return(FALSE); + } + + return(mach_read_from_8(descr + XDES_ID) == seg_id); +} + +/**********************************************************************//** Inits an extent descriptor to the free and clean state. */ UNIV_INLINE void @@ -1296,7 +1330,6 @@ fsp_fill_free_list( ulint zip_size; xdes_t* descr; ulint count = 0; - ulint frag_n_used; ulint actual_increase; ulint i; mtr_t ibuf_mtr; @@ -1411,22 +1444,7 @@ fsp_fill_free_list( #endif if (UNIV_UNLIKELY(init_xdes)) { - - /* The first page in the extent is a descriptor page - and the second is an ibuf bitmap page: mark them - used */ - - xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr); - xdes_set_bit(descr, XDES_FREE_BIT, - FSP_IBUF_BITMAP_OFFSET, FALSE, mtr); - xdes_set_state(descr, XDES_FREE_FRAG, mtr); - - flst_add_last(header + FSP_FREE_FRAG, - descr + XDES_FLST_NODE, mtr); - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, - MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used + 2, MLOG_4BYTES, mtr); + fsp_init_xdes_frag(header, descr, mtr); } else { flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr); @@ -1488,6 +1506,121 @@ fsp_alloc_free_extent( } /**********************************************************************//** +Returns the last fragment extent descriptor from the free_frag list. +@return the fragment extent descriptor, or NULL if none */ +static +xdes_t* +fsp_get_last_free_frag_extent( +/*==========================*/ + fsp_header_t* header, /*!< in: tablespace header */ + ulint zip_size, /*!< in: compressed page size */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint space; + fil_addr_t node; + xdes_t* descr; + + ut_ad(header && mtr); + + node = flst_get_last(header + FSP_FREE_FRAG, mtr); + + if (fil_addr_is_null(node)) { + return(NULL); + } + + space = mach_read_from_4(header + FSEG_HDR_SPACE); + descr = xdes_lst_get_descriptor(space, zip_size, node, mtr); + ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); + + return(descr); +} + +/**********************************************************************//** +Leases an essentially free fragment extent to a segment. +@return extent descriptor */ +static +xdes_t* +fsp_lease_frag_extent( +/*==================*/ + fsp_header_t* header, /*!< in/out: tablespace header */ + fseg_inode_t* inode, /*!< in/out: segment inode */ + ulint zip_size, /*!< in/out: compressed page size */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + xdes_t* descr; + ib_id_t seg_id; + ulint n_used; + ulint page_size; + ibool has_xdes_page; + + /* If available, take an extent from the free_frag list. */ + if (!(descr = fsp_get_last_free_frag_extent(header, zip_size, mtr))) { + return(NULL); + } + + /* Used to check if the fragment extent can be leased. */ + n_used = xdes_get_n_used(descr, mtr); + page_size = zip_size ? zip_size : UNIV_PAGE_SIZE; + has_xdes_page = !ut_2pow_remainder(xdes_get_offset(descr), page_size); + + /* A fragment extent can be leased if it is the special kind that has a + descriptor page and no other pages are being used except the descriptor + and ibuf bitmap pages. */ + if (has_xdes_page != TRUE || n_used != XDES_FRAG_N_USED) { + return(NULL); + } + + /* Transition the extent (and its ownership) to the segment. */ + xdes_set_state(descr, XDES_FSEG_FRAG, mtr); + + seg_id = mach_read_from_8(inode + FSEG_ID); + mlog_write_ull(descr + XDES_ID, seg_id, mtr); + + /* Move extent from the space free_frag list to the segment not_full + list and update counters to reflect the already used (descriptor and + ibuf bitmap) pages. */ + flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr); + n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr); + mlog_write_ulint(header + FSP_FRAG_N_USED, n_used - XDES_FRAG_N_USED, + MLOG_4BYTES, mtr); + + flst_add_last(inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr); + n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr); + mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, + n_used + XDES_FRAG_N_USED, MLOG_4BYTES, mtr); + + return(descr); +} + +/**********************************************************************//** +Initializes a fragment extent and puts it into the free fragment list. */ +static +void +fsp_init_xdes_frag( +/*===============*/ + fsp_header_t* header, /*!< in/out: tablespace header */ + xdes_t* descr, /*!< in/out: extent descriptor */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint n_used; + + /* The first page in the extent is a descriptor page + and the second is an ibuf bitmap page: mark them + used */ + xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr); + xdes_set_bit(descr, XDES_FREE_BIT, FSP_IBUF_BITMAP_OFFSET, + FALSE, mtr); + + xdes_set_state(descr, XDES_FREE_FRAG, mtr); + mlog_write_ull(descr + XDES_ID, 0, mtr); + flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr); + + n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr); + mlog_write_ulint(header + FSP_FRAG_N_USED, n_used + XDES_FRAG_N_USED, + MLOG_4BYTES, mtr); +} + +/**********************************************************************//** Allocates a single free page from a space. */ static __attribute__((nonnull)) void @@ -1799,6 +1932,7 @@ fsp_free_extent( { fsp_header_t* header; xdes_t* descr; + ulint state; ut_ad(mtr); @@ -1806,17 +1940,21 @@ fsp_free_extent( descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr); - if (xdes_get_state(descr, mtr) == XDES_FREE) { + state = xdes_get_state(descr, mtr); + + if (state == XDES_FREE) { ut_print_buf(stderr, (byte*)descr - 500, 1000); putc('\n', stderr); ut_error; + } else if (state == XDES_FSEG_FRAG) { + xdes_init(descr, mtr); + fsp_init_xdes_frag(header, descr, mtr); + } else { + xdes_init(descr, mtr); + flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr); } - - xdes_init(descr, mtr); - - flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr); } /**********************************************************************//** @@ -2654,8 +2792,7 @@ fseg_alloc_free_page_low( /* In the big if-else below we look for ret_page and ret_descr */ /*-------------------------------------------------------------*/ - if ((xdes_get_state(descr, mtr) == XDES_FSEG) - && mach_read_from_8(descr + XDES_ID) == seg_id + if (xdes_in_segment(descr, seg_id, mtr) && (xdes_get_bit(descr, XDES_FREE_BIT, hint % FSP_EXTENT_SIZE, mtr) == TRUE)) { take_hinted_page: @@ -2690,6 +2827,26 @@ take_hinted_page: hint + FSP_EXTENT_SIZE, mtr); goto take_hinted_page; /*-----------------------------------------------------------*/ + } else if (srv_lease_fragment_extents + && (fill_factor >= srv_segment_fill_factor) + && (used >= FSEG_FRAG_LIMIT) + && (!!(ret_descr + = fsp_lease_frag_extent(space_header, seg_inode, + zip_size, mtr)))) { + + /* 3. We take any fragment extent whose pages are essentially + ============================================================= + unused (except for the descriptor and ibuf pages). The extent + ============================================================= + is now leased to the segment and the hinted page can be taken + ============================================================= + if free + =======*/ + + ret_page = xdes_get_offset(ret_descr) + + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, + hint % FSP_EXTENT_SIZE, mtr); + /*-----------------------------------------------------------*/ } else if ((direction != FSP_NO_DIR) && (fill_factor >= srv_segment_fill_factor) && (used >= FSEG_FRAG_LIMIT) @@ -2697,7 +2854,7 @@ take_hinted_page: = fseg_alloc_free_extent(seg_inode, space, zip_size, mtr)))) { - /* 3. We take any free extent (which was already assigned above + /* 4. We take any free extent (which was already assigned above =============================================================== in the if-condition to ret_descr) and take the lowest or ======================================================== @@ -2709,11 +2866,10 @@ take_hinted_page: ret_page += FSP_EXTENT_SIZE - 1; } /*-----------------------------------------------------------*/ - } else if ((xdes_get_state(descr, mtr) == XDES_FSEG) - && mach_read_from_8(descr + XDES_ID) == seg_id + } else if (xdes_in_segment(descr, seg_id, mtr) && (!xdes_is_full(descr, mtr))) { - /* 4. We can take the page from the same extent as the + /* 5. We can take the page from the same extent as the ====================================================== hinted page (and the extent already belongs to the ================================================== @@ -2725,7 +2881,7 @@ take_hinted_page: hint % FSP_EXTENT_SIZE, mtr); /*-----------------------------------------------------------*/ } else if (reserved - used > 0) { - /* 5. We take any unused page from the segment + /* 6. We take any unused page from the segment ==============================================*/ fil_addr_t first; @@ -2746,7 +2902,7 @@ take_hinted_page: 0, mtr); /*-----------------------------------------------------------*/ } else if (used < FSEG_FRAG_LIMIT) { - /* 6. We allocate an individual page from the space + /* 7. We allocate an individual page from the space ===================================================*/ buf_block_t* block = fsp_alloc_free_page( space, zip_size, hint, mtr, init_mtr); @@ -2767,7 +2923,7 @@ take_hinted_page: return(block); /*-----------------------------------------------------------*/ } else { - /* 7. We allocate a new extent and take its first page + /* 8. We allocate a new extent and take its first page ======================================================*/ ret_descr = fseg_alloc_free_extent(seg_inode, space, zip_size, mtr); @@ -3298,6 +3454,7 @@ fseg_free_page_low( xdes_t* descr; ulint not_full_n_used; ulint state; + ulint n_used; ib_id_t descr_id; ib_id_t seg_id; ulint i; @@ -3338,7 +3495,7 @@ crash: state = xdes_get_state(descr, mtr); - if (state != XDES_FSEG) { + if (state != XDES_FSEG && state != XDES_FSEG_FRAG) { /* The page is in the fragment pages of the segment */ for (i = 0;; i++) { @@ -3397,19 +3554,28 @@ crash: descr + XDES_FLST_NODE, mtr); flst_add_last(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used + FSP_EXTENT_SIZE - 1, - MLOG_4BYTES, mtr); + not_full_n_used += FSP_EXTENT_SIZE - 1; } else { ut_a(not_full_n_used > 0); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used - 1, MLOG_4BYTES, mtr); + not_full_n_used -= 1; } xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - if (xdes_is_free(descr, mtr)) { + n_used = xdes_get_n_used(descr, mtr); + + /* A leased fragment extent might have no more pages belonging to + the segment.*/ + if (state == XDES_FSEG_FRAG && n_used == XDES_FRAG_N_USED) { + n_used = 0; + not_full_n_used -= XDES_FRAG_N_USED; + } + + mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used, + MLOG_4BYTES, mtr); + + if (n_used == 0) { /* The extent has become free: free it to space */ flst_remove(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr); @@ -3475,7 +3641,7 @@ fseg_free_extent( descr = xdes_get_descriptor(space, zip_size, page, mtr); - ut_a(xdes_get_state(descr, mtr) == XDES_FSEG); + ut_a(xdes_get_state(descr, mtr) >= XDES_FSEG); ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8)); ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); @@ -3516,7 +3682,8 @@ fseg_free_extent( fsp_free_extent(space, zip_size, page, mtr); #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - for (i = 0; i < FSP_EXTENT_SIZE; i++) { + i = xdes_get_state(descr, mtr) == XDES_FSEG ? 0 : XDES_FRAG_N_USED; + for (; i < FSP_EXTENT_SIZE; i++) { buf_page_set_file_page_was_freed(space, first_page_in_extent + i); @@ -3732,7 +3899,8 @@ static ibool fseg_validate_low( /*==============*/ - fseg_inode_t* inode, /*!< in: segment inode */ + fseg_inode_t* inode, /*!< in: segment inode */ + ulint* n_frag, /*!< in: number of frag extents */ mtr_t* mtr2) /*!< in: mtr */ { ulint space; @@ -3794,10 +3962,10 @@ fseg_validate_low( ut_a(xdes_get_n_used(descr, &mtr) > 0); ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(mach_read_from_8(descr + XDES_ID) == seg_id); + ut_a(xdes_in_segment(descr, seg_id, &mtr)); n_used2 += xdes_get_n_used(descr, &mtr); + *n_frag += (xdes_get_state(descr, &mtr) == XDES_FSEG_FRAG); node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); mtr_commit(&mtr); @@ -3819,8 +3987,9 @@ fseg_validate_low( node_addr, &mtr); ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(mach_read_from_8(descr + XDES_ID) == seg_id); + ut_a(xdes_in_segment(descr, seg_id, &mtr)); + + *n_frag += (xdes_get_state(descr, &mtr) == XDES_FSEG_FRAG); node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); mtr_commit(&mtr); @@ -3847,6 +4016,7 @@ fseg_validate( ulint space; ulint flags; ulint zip_size; + ulint n_frag; space = page_get_space_id(page_align(header)); @@ -3855,7 +4025,7 @@ fseg_validate( inode = fseg_inode_get(header, space, zip_size, mtr); - ret = fseg_validate_low(inode, mtr); + ret = fseg_validate_low(inode, &n_frag, mtr); return(ret); } @@ -3962,6 +4132,7 @@ fsp_validate( ulint n_used = 0; ulint n_used2 = 0; ulint n_full_frag_pages; + ulint n_fseg_frag = 0; ulint n; ulint seg_inode_len_free; ulint seg_inode_len_full; @@ -4104,7 +4275,7 @@ fsp_validate( seg_inode = fsp_seg_inode_page_get_nth_inode( seg_inode_page, n, zip_size, &mtr); ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0); - fseg_validate_low(seg_inode, &mtr); + fseg_validate_low(seg_inode, &n_fseg_frag, &mtr); descr_count += flst_get_len(seg_inode + FSEG_FREE, &mtr); @@ -4149,7 +4320,7 @@ fsp_validate( seg_inode = fsp_seg_inode_page_get_nth_inode( seg_inode_page, n, zip_size, &mtr); if (mach_read_from_8(seg_inode + FSEG_ID)) { - fseg_validate_low(seg_inode, &mtr); + fseg_validate_low(seg_inode, &n_fseg_frag, &mtr); descr_count += flst_get_len( seg_inode + FSEG_FREE, &mtr); @@ -4171,12 +4342,12 @@ fsp_validate( ut_a(descr_count * FSP_EXTENT_SIZE == free_limit); if (!zip_size) { - ut_a(n_used + n_full_frag_pages + ut_a(n_used + n_full_frag_pages + (n_fseg_frag * 2) == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE) + seg_inode_len_full + seg_inode_len_free); } else { - ut_a(n_used + n_full_frag_pages + ut_a(n_used + n_full_frag_pages + (n_fseg_frag * 2) == n_used2 + 2 * ((free_limit + (zip_size - 1)) / zip_size) + seg_inode_len_full + seg_inode_len_free); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index d5e7b33..1c1d988 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -12028,6 +12028,11 @@ static MYSQL_SYSVAR_UINT(index_fill_factor, srv_index_fill_factor, "of free space. Only applies to clustered indexes.", NULL, NULL, 94, 0, 100, 0); +static MYSQL_SYSVAR_BOOL(lease_fragment_extents, srv_lease_fragment_extents, + PLUGIN_VAR_NOCMDARG, + "If a free fragment extent is available, allow it to be allocated to " + "a segment.", NULL, NULL, FALSE); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), @@ -12107,6 +12112,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(segment_fill_factor), MYSQL_SYSVAR(index_fill_factor), MYSQL_SYSVAR(index_page_split_mode), + MYSQL_SYSVAR(lease_fragment_extents), NULL }; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index aca2506..99cc283 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -134,6 +134,7 @@ extern ulong srv_auto_extend_increment; extern ulong srv_segment_fill_factor; extern uint srv_index_fill_factor; +extern my_bool srv_lease_fragment_extents; extern ibool srv_created_new_raw; diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 1341602..182c6d6 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -178,6 +178,10 @@ UNIV_INTERN ulong srv_segment_fill_factor = 88; remaining space for record growth (e.g. updates). */ UNIV_INTERN uint srv_index_fill_factor = 94; +/* Allocate a free fragment to a segment before taking an extent +from the segment/space free list. */ +UNIV_INTERN my_bool srv_lease_fragment_extents = FALSE; + /* If the following is TRUE we do not allow inserts etc. This protects the user from forgetting the 'newraw' keyword to my.cnf */ -- 1.8.0