diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 309b93c..50c72e4 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -325,6 +325,7 @@ static PSI_mutex_info all_innodb_mutexes[] = { PSI_KEY(ibuf_mutex), PSI_KEY(ibuf_pessimistic_insert_mutex), PSI_KEY(log_sys_mutex), + PSI_KEY(log_sys_w_mutex), PSI_KEY(page_zip_stat_per_index_mutex), PSI_KEY(purge_sys_pq_mutex), PSI_KEY(recv_sys_mutex), diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 006f863..64e1d13 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -644,7 +644,7 @@ struct log_t{ buffer */ #ifndef UNIV_HOTBACKUP LogSysMutex mutex; /*!< mutex protecting the log */ - + LogSysMutex w_mutex; /*!< mutex to protect writing to log file */ FlushOrderMutex log_flush_order_mutex;/*!< mutex to serialize access to the flush list when we are putting dirty blocks in the list. The idea @@ -654,7 +654,8 @@ struct log_t{ insertions in the flush_list happen in the LSN order. */ #endif /* !UNIV_HOTBACKUP */ - byte* buf_ptr; /* unaligned log buffer */ + byte* buf_pair_ptr[2];/*!< unaligned log buffer */ + byte* buf_pair[2]; /*!< two buffer for redo copy/write */ byte* buf; /*!< log buffer */ ulint buf_size; /*!< log buffer size in bytes */ ulint max_buf_free; /*!< recommended maximum value of @@ -782,13 +783,21 @@ struct log_t{ } while (0) /** Test if log sys mutex is owned. */ -#define log_mutex_own() mutex_own(&log_sys->mutex) +#define log_mutex_own() (mutex_own(&log_sys->mutex) || mutex_own(&(log_sys->w_mutex))) /** Acquire the log sys mutex. */ #define log_mutex_enter() mutex_enter(&log_sys->mutex) +#define log_mutex_enter_all() do { \ + mutex_enter(&log_sys->w_mutex); \ + mutex_enter(&log_sys->mutex); \ +} while (0) /** Release the log sys mutex. */ #define log_mutex_exit() mutex_exit(&log_sys->mutex) +#define log_mutex_exit_all() do { \ + mutex_exit(&log_sys->w_mutex); \ + mutex_exit(&log_sys->mutex); \ +} while (0) #ifndef UNIV_NONINL #include "log0log.ic" diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 8928841..db8f9a0 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -72,6 +72,7 @@ extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; extern mysql_pfs_key_t log_sys_mutex_key; extern mysql_pfs_key_t log_cmdq_mutex_key; extern mysql_pfs_key_t log_flush_order_mutex_key; +extern mysql_pfs_key_t log_sys_w_mutex_key; extern mysql_pfs_key_t recalc_pool_mutex_key; extern mysql_pfs_key_t page_cleaner_mutex_key; extern mysql_pfs_key_t purge_sys_pq_mutex_key; diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h index 71e86d0..b5beaae 100644 --- a/storage/innobase/include/sync0types.h +++ b/storage/innobase/include/sync0types.h @@ -236,6 +236,7 @@ enum latch_level_t { SYNC_RECV, SYNC_LOG_FLUSH_ORDER, SYNC_LOG, + SYNC_W_LOG, SYNC_PAGE_CLEANER, SYNC_PURGE_QUEUE, SYNC_TRX_SYS_HEADER, diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index daa5706..f0d5b42 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -161,20 +161,20 @@ log_buffer_extend( ulint move_end; byte tmp_buf[OS_FILE_LOG_BLOCK_SIZE]; - log_mutex_enter(); + log_mutex_enter_all(); while (log_sys->is_extending) { /* Another thread is trying to extend already. Needs to wait for. */ - log_mutex_exit(); + log_mutex_exit_all(); log_buffer_flush_to_disk(); - log_mutex_enter(); + log_mutex_enter_all(); if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) { /* Already extended enough by the others */ - log_mutex_exit(); + log_mutex_exit_all(); return; } } @@ -196,11 +196,11 @@ log_buffer_extend( != ut_calc_align_down(log_sys->buf_next_to_write, OS_FILE_LOG_BLOCK_SIZE)) { /* Buffer might have >1 blocks to write still. */ - log_mutex_exit(); + log_mutex_exit_all(); log_buffer_flush_to_disk(); - log_mutex_enter(); + log_mutex_enter_all(); } move_start = ut_calc_align_down( @@ -217,11 +217,21 @@ log_buffer_extend( /* reallocate log buffer */ srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1; - ut_free(log_sys->buf_ptr); - log_sys->buf_ptr = static_cast( + ut_free(log_sys->buf_pair_ptr[0]); + ut_free(log_sys->buf_pair_ptr[1]); + + log_sys->buf_pair_ptr[0] = static_cast( + ut_zalloc_nokey(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE)); + log_sys->buf_pair_ptr[1] = static_cast( ut_zalloc_nokey(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - log_sys->buf = static_cast( - ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE)); + + log_sys->buf_pair[0] = static_cast( + ut_align(log_sys->buf_pair_ptr[0], OS_FILE_LOG_BLOCK_SIZE)); + log_sys->buf_pair[1] = static_cast( + ut_align(log_sys->buf_pair_ptr[1], OS_FILE_LOG_BLOCK_SIZE)); + + log_sys->buf = log_sys->buf_pair[0]; + log_sys->buf_size = LOG_BUFFER_SIZE; log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; @@ -232,7 +242,7 @@ log_buffer_extend( ut_ad(log_sys->is_extending); log_sys->is_extending = false; - log_mutex_exit(); + log_mutex_exit_all(); ib::info() << "innodb_log_buffer_size was extended to " << LOG_BUFFER_SIZE << "."; @@ -738,6 +748,7 @@ log_init(void) log_sys = static_cast(ut_zalloc_nokey(sizeof(log_t))); mutex_create("log_sys", &log_sys->mutex); + mutex_create("log_sys_w", &log_sys->w_mutex); mutex_create("log_flush_order", &log_sys->log_flush_order_mutex); @@ -749,11 +760,17 @@ log_init(void) ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE); ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE); - log_sys->buf_ptr = static_cast( + log_sys->buf_pair_ptr[0] = static_cast( ut_zalloc_nokey(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE)); + log_sys->buf_pair_ptr[1] = static_cast( + ut_zalloc_nokey(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE)); + + log_sys->buf_pair[0] = static_cast( + ut_align(log_sys->buf_pair_ptr[0], OS_FILE_LOG_BLOCK_SIZE)); + log_sys->buf_pair[1] = static_cast( + ut_align(log_sys->buf_pair_ptr[1], OS_FILE_LOG_BLOCK_SIZE)); - log_sys->buf = static_cast( - ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE)); + log_sys->buf = log_sys->buf_pair[0]; log_sys->buf_size = LOG_BUFFER_SIZE; @@ -853,39 +870,6 @@ log_group_init( } /******************************************************//** -Update log_sys after write completion. */ -static -void -log_sys_write_completion(void) -/*==========================*/ -{ - ulint move_start; - ulint move_end; - - ut_ad(log_mutex_own()); - - log_sys->write_lsn = log_sys->lsn; - log_sys->buf_next_to_write = log_sys->write_end_offset; - - if (log_sys->write_end_offset > log_sys->max_buf_free / 2) { - /* Move the log buffer content to the start of the - buffer */ - - move_start = ut_calc_align_down( - log_sys->write_end_offset, - OS_FILE_LOG_BLOCK_SIZE); - move_end = ut_calc_align(log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE); - - ut_memmove(log_sys->buf, log_sys->buf + move_start, - move_end - move_start); - log_sys->buf_free -= move_start; - - log_sys->buf_next_to_write -= move_start; - } -} - -/******************************************************//** Completes an i/o to a log file. */ void log_io_complete( @@ -1149,6 +1133,8 @@ log_write_up_to( #ifdef UNIV_DEBUG ulint loop_count = 0; #endif /* UNIV_DEBUG */ + byte* write_buf; + lsn_t write_lsn; ut_ad(!srv_read_only_mode); @@ -1174,7 +1160,7 @@ loop: } #endif - log_mutex_enter(); + mutex_enter(&(log_sys->w_mutex)); ut_ad(!recv_no_log_write); lsn_t limit_lsn = flush_to_disk @@ -1182,7 +1168,7 @@ loop: : log_sys->write_lsn; if (limit_lsn >= lsn) { - log_mutex_exit(); + mutex_exit(&(log_sys->w_mutex)); return; } @@ -1190,7 +1176,7 @@ loop: /* write requests during fil_flush() might not be good for Windows */ if (log_sys->n_pending_flushes > 0 || !os_event_is_set(log_sys->flush_event)) { - log_mutex_exit(); + mutex_exit(&(log_sys->w_mutex)); os_event_wait(log_sys->flush_event); goto loop; } @@ -1208,7 +1194,7 @@ loop: for us. */ bool work_done = log_sys->current_flush_lsn >= lsn; - log_mutex_exit(); + mutex_exit(&(log_sys->w_mutex)); os_event_wait(log_sys->flush_event); @@ -1219,9 +1205,11 @@ loop: } } + log_mutex_enter(); if (!flush_to_disk && log_sys->buf_free == log_sys->buf_next_to_write) { /* Nothing to write and no flush to disk requested */ + mutex_exit(&(log_sys->w_mutex)); log_mutex_exit(); return; } @@ -1246,7 +1234,7 @@ loop: if (log_sys->buf_free == log_sys->buf_next_to_write) { /* Nothing to write, flush only */ - log_mutex_exit(); + log_mutex_exit_all(); log_write_flush_to_disk_low(); return; } @@ -1267,6 +1255,27 @@ loop: log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, log_sys->next_checkpoint_no); + write_lsn = log_sys->lsn; + write_buf = log_sys->buf; + + if (log_sys->buf == log_sys->buf_pair[0]) { + log_sys->buf = log_sys->buf_pair[1]; + } else { + log_sys->buf = log_sys->buf_pair[0]; + } + + ut_ad(log_sys->buf != write_buf); + + /* Copy the last block to new buf */ + ut_memcpy(log_sys->buf, + write_buf + area_end - OS_FILE_LOG_BLOCK_SIZE, + OS_FILE_LOG_BLOCK_SIZE); + + log_sys->buf_free %= OS_FILE_LOG_BLOCK_SIZE; + log_sys->buf_next_to_write = log_sys->buf_free; + + log_mutex_exit(); + group = UT_LIST_GET_FIRST(log_sys->log_groups); /* Calculate pad_size if needed. */ @@ -1292,13 +1301,13 @@ loop: pad_size = log_sys->buf_size - area_end; } - ::memset(log_sys->buf + area_end, 0, pad_size); + ::memset(write_buf + area_end, 0, pad_size); } } /* Do the write to the log files */ log_group_write_buf( - group, log_sys->buf + area_start, + group, write_buf + area_start, area_end - area_start + pad_size, #ifdef UNIV_DEBUG pad_size, @@ -1313,8 +1322,7 @@ loop: log_group_set_fields(group, log_sys->write_lsn); - log_sys_write_completion(); - + log_sys->write_lsn = write_lsn; #ifndef _WIN32 if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { /* O_SYNC means the OS did not buffer the log file at all: @@ -1323,7 +1331,7 @@ loop: } #endif /* !_WIN32 */ - log_mutex_exit(); + mutex_exit(&(log_sys->w_mutex)); if (flush_to_disk) { log_write_flush_to_disk_low(); @@ -1491,7 +1499,7 @@ log_io_complete_checkpoint(void) { MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE); - log_mutex_enter(); + log_mutex_enter_all(); ut_ad(log_sys->n_pending_checkpoint_writes > 0); @@ -1499,7 +1507,7 @@ log_io_complete_checkpoint(void) log_complete_checkpoint(); } - log_mutex_exit(); + log_mutex_exit_all(); } /*******************************************************************//** @@ -1721,7 +1729,7 @@ log_write_checkpoint_info( } } - log_mutex_exit(); + log_mutex_exit_all(); MONITOR_INC(MONITOR_NUM_CHECKPOINT); @@ -1739,10 +1747,10 @@ mtr_buf_t* log_append_on_checkpoint( mtr_buf_t* buf) { - log_mutex_enter(); + log_mutex_enter_all(); mtr_buf_t* old = log_sys->append_on_checkpoint; log_sys->append_on_checkpoint = buf; - log_mutex_exit(); + log_mutex_exit_all(); return(old); } @@ -1780,7 +1788,7 @@ log_checkpoint( } #endif /* !_WIN32 */ - log_mutex_enter(); + log_mutex_enter_all(); ut_ad(!recv_no_log_write); oldest_lsn = log_buf_pool_get_oldest_modification(); @@ -1798,7 +1806,7 @@ log_checkpoint( == log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) { /* Do nothing, because nothing was logged (other than a MLOG_CHECKPOINT marker) since the previous checkpoint. */ - log_mutex_exit(); + log_mutex_exit_all(); return(true); } @@ -1824,15 +1832,15 @@ log_checkpoint( flush_lsn = log_sys->lsn; } - log_mutex_exit(); + log_mutex_exit_all(); log_write_up_to(flush_lsn, true); - log_mutex_enter(); + log_mutex_enter_all(); if (!write_always && log_sys->last_checkpoint_lsn >= oldest_lsn) { - log_mutex_exit(); + log_mutex_exit_all(); return(true); } @@ -1840,7 +1848,7 @@ log_checkpoint( if (log_sys->n_pending_checkpoint_writes > 0) { /* A checkpoint write is running */ - log_mutex_exit(); + log_mutex_exit_all(); if (sync) { /* Wait for the checkpoint write to complete */ @@ -2457,8 +2465,10 @@ log_shutdown(void) { log_group_close_all(); - ut_free(log_sys->buf_ptr); - log_sys->buf_ptr = NULL; + ut_free(log_sys->buf_pair_ptr[0]); + ut_free(log_sys->buf_pair_ptr[1]); + log_sys->buf_pair_ptr[0] = NULL; + log_sys->buf_pair_ptr[1] = NULL; log_sys->buf = NULL; ut_free(log_sys->checkpoint_buf_ptr); log_sys->checkpoint_buf_ptr = NULL; @@ -2469,6 +2479,7 @@ log_shutdown(void) rw_lock_free(&log_sys->checkpoint_lock); mutex_free(&log_sys->mutex); + mutex_free(&(log_sys->w_mutex)); mutex_free(&log_sys->log_flush_order_mutex); recv_sys_close(); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index f189d45..6c07a20 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -755,7 +755,7 @@ recv_synchronize_groups(void) checkpoint info on disk certain */ log_write_checkpoint_info(true); - log_mutex_enter(); + log_mutex_enter_all(); } #endif /* !UNIV_HOTBACKUP */ @@ -2050,7 +2050,7 @@ loop: ut_d(recv_no_log_write = true); mutex_exit(&(recv_sys->mutex)); - log_mutex_exit(); + log_mutex_exit_all(); /* Stop the recv_writer thread from issuing any LRU flush batches. */ @@ -2069,7 +2069,7 @@ loop: /* Allow batches from recv_writer thread. */ mutex_exit(&recv_sys->writer_mutex); - log_mutex_enter(); + log_mutex_enter_all(); mutex_enter(&(recv_sys->mutex)); ut_d(recv_no_log_write = false); @@ -3282,7 +3282,7 @@ recv_recovery_from_checkpoint_start( recv_recovery_on = true; - log_mutex_enter(); + log_mutex_enter_all(); /* Look for the latest checkpoint from any of the log groups */ @@ -3290,7 +3290,7 @@ recv_recovery_from_checkpoint_start( if (err != DB_SUCCESS) { - log_mutex_exit(); + log_mutex_exit_all(); return(err); } @@ -3314,7 +3314,7 @@ recv_recovery_from_checkpoint_start( (byte*)"ibbackup", (sizeof "ibbackup") - 1)) { if (srv_read_only_mode) { - log_mutex_exit(); + log_mutex_exit_all(); ib::error() << "Cannot restore from mysqlbackup," " InnoDB running in read-only mode!"; @@ -3361,7 +3361,7 @@ recv_recovery_from_checkpoint_start( ut_ad(!recv_sys->found_corrupt_fs); if (recv_sys->found_corrupt_log && !srv_force_recovery) { - log_mutex_exit(); + log_mutex_exit_all(); return(DB_ERROR); } @@ -3383,7 +3383,7 @@ recv_recovery_from_checkpoint_start( if ((recv_sys->found_corrupt_log && !srv_force_recovery) || recv_sys->found_corrupt_fs) { - log_mutex_exit(); + log_mutex_exit_all(); return(DB_ERROR); } } @@ -3413,7 +3413,7 @@ recv_recovery_from_checkpoint_start( if (srv_read_only_mode) { ib::error() << "Can't initiate database" " recovery, running in read-only-mode."; - log_mutex_exit(); + log_mutex_exit_all(); return(DB_READ_ONLY); } @@ -3427,7 +3427,7 @@ recv_recovery_from_checkpoint_start( err = recv_init_crash_recovery_spaces(); if (err != DB_SUCCESS) { - log_mutex_exit(); + log_mutex_exit_all(); return(err); } @@ -3438,7 +3438,7 @@ recv_recovery_from_checkpoint_start( if ((recv_sys->found_corrupt_log && !srv_force_recovery) || recv_sys->found_corrupt_fs) { - log_mutex_exit(); + log_mutex_exit_all(); return(DB_ERROR); } } @@ -3459,7 +3459,7 @@ recv_recovery_from_checkpoint_start( } if (recv_sys->recovered_lsn < checkpoint_lsn) { - log_mutex_exit(); + log_mutex_exit_all(); /* No harm in trying to do RO access. */ if (!srv_read_only_mode) { @@ -3508,7 +3508,7 @@ recv_recovery_from_checkpoint_start( mutex_exit(&recv_sys->mutex); - log_mutex_exit(); + log_mutex_exit_all(); recv_lsn_checks_on = true; @@ -3665,13 +3665,13 @@ recv_reset_logs( MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, (log_sys->lsn - log_sys->last_checkpoint_lsn)); - log_mutex_exit(); + log_mutex_exit_all(); /* Reset the checkpoint fields in logs */ log_make_checkpoint_at(LSN_MAX, TRUE); - log_mutex_enter(); + log_mutex_enter_all(); } #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 775f28c..b06252d 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -457,10 +457,10 @@ create_log_files( fil_open_log_and_system_tablespace_files(); /* Create a log checkpoint. */ - log_mutex_enter(); + log_mutex_enter_all(); ut_d(recv_no_log_write = false); recv_reset_logs(lsn); - log_mutex_exit(); + log_mutex_exit_all(); return(DB_SUCCESS); } diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc index be1bd5d..52c99e0 100644 --- a/storage/innobase/sync/sync0debug.cc +++ b/storage/innobase/sync/sync0debug.cc @@ -600,6 +600,7 @@ SyncDebug::check_order(const latch_t* latch) case SYNC_FTS_CACHE_INIT: case SYNC_PAGE_CLEANER: case SYNC_LOG: + case SYNC_W_LOG: case SYNC_LOG_FLUSH_ORDER: case SYNC_ANY_LATCH: case SYNC_FILE_FORMAT_TAG: @@ -962,6 +963,10 @@ sync_latch_meta_init() SYNC_LOG, log_sys_mutex_key); + LATCH_ADD(SrvLatches, "log_sys_w", + SYNC_W_LOG, + log_sys_w_mutex_key); + LATCH_ADD(SrvLatches, "log_flush_order", SYNC_LOG_FLUSH_ORDER, log_flush_order_mutex_key); diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc index b2e0271..a20248c 100644 --- a/storage/innobase/sync/sync0sync.cc +++ b/storage/innobase/sync/sync0sync.cc @@ -58,6 +58,7 @@ mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; mysql_pfs_key_t log_sys_mutex_key; mysql_pfs_key_t log_cmdq_mutex_key; mysql_pfs_key_t log_flush_order_mutex_key; +mysql_pfs_key_t log_sys_w_mutex_key; mysql_pfs_key_t recalc_pool_mutex_key; mysql_pfs_key_t page_cleaner_mutex_key; mysql_pfs_key_t purge_sys_pq_mutex_key;