From 1071d225a6ca043477e14be8777f2dcef86931a3 Mon Sep 17 00:00:00 2001 From: sensssz Date: Mon, 12 Dec 2016 20:27:50 -0500 Subject: [PATCH 1/6] New implementation. --- storage/innobase/handler/ha_innodb.cc | 31 ++- storage/innobase/include/lock0lock.h | 11 + storage/innobase/include/trx0trx.h | 2 + storage/innobase/lock/lock0lock.cc | 378 +++++++++++++++++++++++++++++++--- storage/innobase/trx/trx0trx.cc | 3 + 5 files changed, 400 insertions(+), 25 deletions(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 1a65c5f..7b87493 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -300,6 +300,22 @@ static TYPELIB innodb_default_row_format_typelib = { NULL }; +/** Possible values of the parameter innodb_lock_schedule_algorithm */ +static const char* innodb_lock_schedule_algorithm_names[] = { + "fcfs", + "vats", + NullS +}; + +/** Used to define an enumerate type of the system variable +innodb_lock_schedule_algorithm. */ +static TYPELIB innodb_lock_schedule_algorithm_typelib = { + array_elements(innodb_lock_schedule_algorithm_names) - 1, + "innodb_lock_schedule_algorithm_typelib", + innodb_lock_schedule_algorithm_names, + NULL +}; + /* The following counter is used to convey information to InnoDB about server activity: in case of normal DML ops it is not sensible to call srv_active_wake_master_thread after each @@ -19467,6 +19483,18 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size, NULL, NULL, 120, 1, 127, 0); #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ +static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm, + PLUGIN_VAR_RQCMDARG, + "The algorithm Innodb uses for deciding which locks to grant next when" + " a lock is released. Possible values are" + " FCFS" + " grant the locks in First-Come-First-Served order;" + " VATS" + " use the Variance-Aware-Transaction-Scheduling algorithm, which" + " uses an Eldest-Transaction-First heuristic.", + NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, + &innodb_lock_schedule_algorithm_typelib); + static MYSQL_SYSVAR_ULONG(buffer_pool_instances, srv_buf_pool_instances, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Number of buffer pool instances, set to higher value on high-end machines to increase scalability", @@ -20069,7 +20097,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(ft_num_word_optimize), MYSQL_SYSVAR(ft_sort_pll_degree), MYSQL_SYSVAR(large_prefix), - MYSQL_SYSVAR(force_load_corrupted), + MYSQL_SYSVAR(force_load_corrupted), + MYSQL_SYSVAR(lock_schedule_algorithm), MYSQL_SYSVAR(locks_unsafe_for_binlog), MYSQL_SYSVAR(lock_wait_timeout), MYSQL_SYSVAR(deadlock_detect), diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 8db09e5..7c167c4 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -45,6 +45,17 @@ class ReadView; extern my_bool innobase_deadlock_detect; +/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by + setting innodb_lock_schedule_algorithm. */ +enum innodb_lock_schedule_algorithm_t { + /*!< First Come First Served */ + INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, + /*!< Variance-Aware-Transaction-Scheduling */ + INNODB_LOCK_SCHEDULE_ALGORITHM_VATS +}; + +extern ulong innodb_lock_schedule_algorithm; + /*********************************************************************//** Gets the size of a lock struct. @return size in bytes */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 45e567e..5a71bd3 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1089,6 +1089,8 @@ struct trx_t { time_t start_time; /*!< time the state last time became TRX_STATE_ACTIVE */ + long dep_size; + bool size_updated; lsn_t commit_lsn; /*!< lsn at the time of the commit */ table_id_t table_id; /*!< Table to drop iff dict_operation == TRX_DICT_OP_TABLE, or 0. */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 540bb61..1c968ee 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -49,11 +49,16 @@ Created 5/7/1996 Heikki Tuuri #include "row0mysql.h" #include "pars0pars.h" +#include #include +#include /* Flag to enable/disable deadlock detector. */ my_bool innobase_deadlock_detect = TRUE; +/** Lock scheduling algorithm */ +ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_VATS; + /** Total number of cached record locks */ static const ulint REC_LOCK_CACHE = 8; @@ -1488,6 +1493,196 @@ RecLock::lock_alloc( return(lock); } +/*********************************************************************//** +Check if lock1 has higher priority than lock2. +NULL has lowest priority. +If either is a high priority transaction, the lock has higher priority. +If neither of them is wait lock, the first one has higher priority. +If only one of them is a wait lock, it has lower priority. +Otherwise, the one with an older transaction has higher priority. +@returns true if lock1 has higher priority, false otherwise. */ +static +bool +has_higher_priority( + lock_t *lock1, + lock_t *lock2) +{ + if (lock1 == NULL) { + return false; + } else if (lock2 == NULL) { + return true; + } + if (!lock_get_wait(lock1)) { + return true; + } else if (!lock_get_wait(lock2)) { + return false; + } + if (trx_is_high_priority(lock1->trx)) { + return true; + } + if (trx_is_high_priority(lock2->trx)) { + return false; + } + return lock1->trx->dep_size > lock2->trx->dep_size; +} + +static +bool +use_vats( + trx_t *trx) +{ + return innodb_lock_schedule_algorithm == + INNODB_LOCK_SCHEDULE_ALGORITHM_VATS + && !thd_is_replication_slave_thread(trx->mysql_thd); +} + +static +lock_t * +lock_rec_get_first( + hash_table_t *hash, + ulint space, + ulint page_no, + ulint heap_no) +{ + lock_t *lock; + + lock = lock_rec_get_first_on_page_addr(hash, space, page_no); + if (lock != NULL && !lock_rec_get_nth_bit(lock, heap_no)) { + lock = lock_rec_get_next(heap_no, lock); + } + + return lock; +} + +static +void +lock_rec_insert_to_head( + hash_table_t *lock_hash, + lock_t *lock, + ulint rec_fold) +{ + lock_t *next; + hash_cell_t* cell; + + // Move the target lock to the head of the list + cell = hash_get_nth_cell(lock_hash, hash_calc_hash(rec_fold, lock_hash)); + if (lock != cell->node) { + next = (lock_t *) cell->node; + cell->node = lock; + lock->hash = next; + } +} + +static +void +reset_trx_size_updated() +{ + trx_t *trx; + for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); + trx != NULL; + trx = UT_LIST_GET_NEXT(trx_list, trx)) { + trx->size_updated = false; + } + for (trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); + trx != NULL; + trx = UT_LIST_GET_NEXT(trx_list, trx)) { + trx->size_updated = false; + } +} + +static +void +update_dep_size( + trx_t *trx, + long size_delta, + long depth=1) +{ + ulint space; + ulint page_no; + ulint heap_no; + lock_t *lock; + lock_t *wait_lock; + hash_table_t *lock_hash; + + if (!use_vats(trx) || trx->size_updated || size_delta == 0) { + return; + } + + trx->size_updated = true; + trx->dep_size += size_delta; + if (trx->dep_size < 0) { + trx->dep_size = 0; + } + wait_lock = trx->lock.wait_lock; + if (trx->state != TRX_STATE_ACTIVE + || wait_lock == NULL) { + if (depth == 1) { + reset_trx_size_updated(); + } + return; + } + + space = wait_lock->un_member.rec_lock.space; + page_no = wait_lock->un_member.rec_lock.page_no; + heap_no = lock_rec_find_set_bit(wait_lock); + lock_hash = lock_hash_get(wait_lock->type_mode); + for (lock = lock_rec_get_first(lock_hash, space, page_no, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + if (!lock_get_wait(lock) + && trx != lock->trx) { + update_dep_size(lock->trx, size_delta, depth + 1); + } + } + if (depth == 1) { + reset_trx_size_updated(); + } +} + +static +void +update_dep_size( + lock_t *in_lock, + ulint heap_no, + bool wait) +{ + lock_t *lock; + ulint space; + ulint page_no; + long total_size_delta; + hash_table_t *lock_hash; + + if (!use_vats(in_lock->trx)) { + return; + } + + space = in_lock->un_member.rec_lock.space; + page_no = in_lock->un_member.rec_lock.page_no; + lock_hash = lock_hash_get(in_lock->type_mode); + + if (wait) { + for (lock = lock_rec_get_first(lock_hash, space, page_no, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + if (!lock_get_wait(lock) + && in_lock->trx != lock->trx) { + update_dep_size(lock->trx, in_lock->trx->dep_size + 1); + } + } + } else { + total_size_delta = 0; + for (lock = lock_rec_get_first(lock_hash, space, page_no, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + if (lock_get_wait(lock) + && in_lock->trx != lock->trx) { + total_size_delta += lock->trx->dep_size + 1; + } + } + update_dep_size(in_lock->trx, total_size_delta); + } +} + /** Add the lock to the record lock hash and the transaction's lock list @param[in,out] lock Newly created record lock to add to the rec hash @@ -1498,19 +1693,28 @@ RecLock::lock_add(lock_t* lock, bool add_to_hash) ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(lock->trx)); + bool wait = m_mode & LOCK_WAIT; + if (add_to_hash) { ulint key = m_rec_id.fold(); + hash_table_t *lock_hash = lock_hash_get(m_mode); ++lock->index->table->n_rec_locks; - HASH_INSERT(lock_t, hash, lock_hash_get(m_mode), key, lock); + if (use_vats(lock->trx) && !wait) { + lock_rec_insert_to_head(lock_hash, lock, key); + } else { + HASH_INSERT(lock_t, hash, lock_hash, key, lock); + } } - if (m_mode & LOCK_WAIT) { + UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock); + + if (wait) { lock_set_lock_and_trx_wait(lock, lock->trx); + } else { + update_dep_size(lock, lock_rec_find_set_bit(lock), false); } - - UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock); } /** @@ -1733,6 +1937,8 @@ RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt) ut_ad(trx_mutex_own(m_trx)); + update_dep_size(lock, lock_rec_find_set_bit(lock), err == DB_LOCK_WAIT || err == DB_DEADLOCK); + /* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd is used */ if (err == DB_LOCK_WAIT) { thd_report_row_lock_wait(current_thd, wait_for->trx->mysql_thd); @@ -1833,6 +2039,7 @@ lock_rec_add_to_queue( if (lock != NULL) { lock_rec_set_nth_bit(lock, heap_no); + update_dep_size(lock, heap_no, false); return; } @@ -1916,6 +2123,7 @@ lock_rec_lock_fast( if (!lock_rec_get_nth_bit(lock, heap_no)) { lock_rec_set_nth_bit(lock, heap_no); status = LOCK_REC_SUCCESS_CREATED; + update_dep_size(lock, heap_no, false); } } @@ -2423,6 +2631,111 @@ lock_rec_cancel( trx_mutex_exit(lock->trx); } +/*********************************************************************//** +Checks if a waiting record lock request still has to for granted locks. +@return lock that is causing the wait */ +static +const lock_t* +lock_rec_has_to_wait_granted( +/*==========================*/ + const lock_t* wait_lock, /*!< in: waiting record lock */ + std::vector &granted_locks) /*!< in: granted record lock */ +{ + ulint i; + lock_t *lock; + for (i = 0; i < granted_locks.size(); ++i) { + lock = granted_locks[i]; + if (lock_has_to_wait(wait_lock, lock)) { + return lock; + } + } + return NULL; +} + +static +void +vats_grant( + hash_table_t *lock_hash, + lock_t *released_lock, + ulint heap_no) +{ + ulint space; + ulint page_no; + ulint rec_fold; + ulint i; + ulint j; + long sub_dep_size_total; + long add_dep_size_total; + long dep_size_compsensate; + lock_t* lock; + lock_t* wait_lock; + lock_t* new_granted_lock; + std::vector wait_locks; + std::vector granted_locks; + std::vector new_granted; + + sub_dep_size_total = 0; + add_dep_size_total = 0; + space = released_lock->un_member.rec_lock.space; + page_no = released_lock->un_member.rec_lock.page_no; + rec_fold = lock_rec_fold(space, page_no); + for (lock = lock_rec_get_first(lock_hash, space, page_no, heap_no); + lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + if (!lock_get_wait(lock)) { + granted_locks.push_back(lock); + } else { + wait_locks.push_back(lock); + } + } + + std::sort(wait_locks.begin(), wait_locks.end(), has_higher_priority); + for (i = 0; i < wait_locks.size(); ++i) { + lock = wait_locks[i]; + if (!lock_rec_has_to_wait_granted(lock, granted_locks) + && !lock_rec_has_to_wait_granted(lock, new_granted)) { + lock_grant(lock); + HASH_DELETE(lock_t, hash, lock_hash, + rec_fold, lock); + lock_rec_insert_to_head(lock_hash, lock, rec_fold); + new_granted.push_back(lock); + sub_dep_size_total -= lock->trx->dep_size + 1; + } else { + add_dep_size_total += lock->trx->dep_size + 1; + } + } + if (lock_get_wait(released_lock)) { + sub_dep_size_total -= released_lock->trx->dep_size + 1; + } + for (i = 0; i < granted_locks.size(); ++i) { + lock = granted_locks[i]; + dep_size_compsensate = 0; + for (j = 0; j < new_granted.size(); ++j) { + new_granted_lock = new_granted[j]; + if (lock->trx == new_granted_lock->trx) { + dep_size_compsensate += lock->trx->dep_size + 1; + } + } + if (lock->trx != released_lock->trx) { + update_dep_size(lock->trx, sub_dep_size_total + dep_size_compsensate); + } + } + for (i = 0; i < new_granted.size(); ++i) { + lock = new_granted[i]; + dep_size_compsensate = 0; + for (j = 0; j < wait_locks.size(); ++j) { + wait_lock = wait_locks[j]; + if (lock_get_wait(wait_lock) + && lock->trx == wait_lock->trx) { + dep_size_compsensate -= lock->trx->dep_size + 1; + } + } + if (lock->trx != released_lock->trx) { + update_dep_size(lock->trx, add_dep_size_total + dep_size_compsensate); + } + } +} + /*************************************************************//** Removes a record lock request, waiting or granted, from the queue and grants locks to other transactions in the queue if they now are entitled @@ -2439,6 +2752,7 @@ lock_rec_dequeue_from_page( { ulint space; ulint page_no; + ulint heap_no; lock_t* lock; trx_lock_t* trx_lock; hash_table_t* lock_hash; @@ -2465,20 +2779,31 @@ lock_rec_dequeue_from_page( MONITOR_INC(MONITOR_RECLOCK_REMOVED); MONITOR_DEC(MONITOR_NUM_RECLOCK); - /* Check if waiting locks in the queue can now be granted: grant - locks if there are no conflicting locks ahead. Stop at the first - X lock that is waiting or has been granted. */ + if (!use_vats(in_lock->trx)) { - for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no); - lock != NULL; - lock = lock_rec_get_next_on_page(lock)) { + /* Check if waiting locks in the queue can now be granted: + grant locks if there are no conflicting locks ahead. Stop at + the first X lock that is waiting or has been granted. */ - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { + for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, + page_no); + lock != NULL; + lock = lock_rec_get_next_on_page(lock)) { - /* Grant the lock */ - ut_ad(lock->trx != in_lock->trx); - lock_grant(lock); + if (lock_get_wait(lock) + && !lock_rec_has_to_wait_in_queue(lock)) { + + /* Grant the lock */ + ut_ad(lock->trx != in_lock->trx); + lock_grant(lock); + } + } + } else { + for (heap_no = 0; heap_no < lock_rec_get_n_bits(in_lock); ++heap_no) { + if (!lock_rec_get_nth_bit(in_lock, heap_no)) { + continue; + } + vats_grant(lock_hash, in_lock, heap_no); } } } @@ -4269,17 +4594,22 @@ lock_rec_unlock( ut_a(!lock_get_wait(lock)); lock_rec_reset_nth_bit(lock, heap_no); - /* Check if we can now grant waiting lock requests */ + if (!use_vats(trx)) { - for (lock = first_lock; lock != NULL; - lock = lock_rec_get_next(heap_no, lock)) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { + /* Check if we can now grant waiting lock requests */ - /* Grant the lock */ - ut_ad(trx != lock->trx); - lock_grant(lock); + for (lock = first_lock; lock != NULL; + lock = lock_rec_get_next(heap_no, lock)) { + if (lock_get_wait(lock) + && !lock_rec_has_to_wait_in_queue(lock)) { + + /* Grant the lock */ + ut_ad(trx != lock->trx); + lock_grant(lock, false); + } } + } else { + vats_grant(lock_sys->rec_hash, lock, heap_no); } lock_mutex_exit(); @@ -7221,7 +7551,7 @@ DeadlockChecker::get_first_lock(ulint* heap_no) const /* Must find at least two locks, otherwise there cannot be a waiting lock, secondly the first lock cannot be the wait_lock. */ ut_a(lock != NULL); - ut_a(lock != m_wait_lock); + ut_a(lock != m_wait_lock || use_vats(lock->trx)); /* Check that the lock type doesn't change. */ ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock)); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 7f89df7..83d660a 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1462,6 +1462,9 @@ trx_start_low( trx->start_time = ut_time(); } + trx->dep_size = 0; + trx->size_updated = false; + ut_a(trx->error_state == DB_SUCCESS); MONITOR_INC(MONITOR_TRX_ACTIVE); From d19982cbbae383cc3d2f99d87044abf821582c48 Mon Sep 17 00:00:00 2001 From: sensssz Date: Mon, 12 Dec 2016 20:32:16 -0500 Subject: [PATCH 2/6] Error fix for lock_grant. --- storage/innobase/lock/lock0lock.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 1c968ee..e904355 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4605,7 +4605,7 @@ lock_rec_unlock( /* Grant the lock */ ut_ad(trx != lock->trx); - lock_grant(lock, false); + lock_grant(lock); } } } else { From 27b4cdad775684b682e55e6f8c2e3358d1438cc6 Mon Sep 17 00:00:00 2001 From: sensssz Date: Mon, 12 Dec 2016 23:49:34 -0500 Subject: [PATCH 3/6] Add test for variable lock_schedule_algorithm and fix result of perfschema.show_sanity. --- mysql-test/suite/perfschema/r/show_sanity.result | 2 ++ .../r/innodb_lock_schedule_algorithm_basic.result | 12 ++++++++++++ .../sys_vars/t/innodb_lock_schedule_algorithm_basic.test | 16 ++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 mysql-test/suite/sys_vars/r/innodb_lock_schedule_algorithm_basic.result create mode 100644 mysql-test/suite/sys_vars/t/innodb_lock_schedule_algorithm_basic.test diff --git a/mysql-test/suite/perfschema/r/show_sanity.result b/mysql-test/suite/perfschema/r/show_sanity.result index a10b908..dea3230 100644 --- a/mysql-test/suite/perfschema/r/show_sanity.result +++ b/mysql-test/suite/perfschema/r/show_sanity.result @@ -409,6 +409,7 @@ order by show_mode, source, variable_name; SHOW_MODE SOURCE VARIABLE_NAME 5.6 I_S.SESSION_VARIABLES GTID_EXECUTED 5.6 I_S.SESSION_VARIABLES INNODB_DEADLOCK_DETECT +5.6 I_S.SESSION_VARIABLES INNODB_LOCK_SCHEDULE_ALGORITHM 5.6 I_S.SESSION_VARIABLES LOG_STATEMENTS_UNSAFE_FOR_BINLOG 5.6 I_S.SESSION_VARIABLES TLS_VERSION @@ -433,6 +434,7 @@ order by show_mode, source, variable_name; SHOW_MODE SOURCE VARIABLE_NAME 5.6 I_S.SESSION_VARIABLES GTID_EXECUTED 5.6 I_S.SESSION_VARIABLES INNODB_DEADLOCK_DETECT +5.6 I_S.SESSION_VARIABLES INNODB_LOCK_SCHEDULE_ALGORITHM 5.6 I_S.SESSION_VARIABLES LOG_STATEMENTS_UNSAFE_FOR_BINLOG 5.6 I_S.SESSION_VARIABLES TLS_VERSION diff --git a/mysql-test/suite/sys_vars/r/innodb_lock_schedule_algorithm_basic.result b/mysql-test/suite/sys_vars/r/innodb_lock_schedule_algorithm_basic.result new file mode 100644 index 0000000..74a64e3 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_lock_schedule_algorithm_basic.result @@ -0,0 +1,12 @@ +SELECT @@global.innodb_lock_schedule_algorithm; +@@global.innodb_lock_schedule_algorithm +fcfs +SET GLOBAL innodb_lock_schedule_algorithm = 'FCFS'; +SELECT @@global.innodb_lock_schedule_algorithm; +@@global.innodb_lock_schedule_algorithm +fcfs +SET GLOBAL innodb_lock_schedule_algorithm = 'VATS'; +SELECT @@global.innodb_lock_schedule_algorithm; +@@global.innodb_lock_schedule_algorithm +vats +SET GLOBAL innodb_lock_schedule_algorithm = 'FCFS'; diff --git a/mysql-test/suite/sys_vars/t/innodb_lock_schedule_algorithm_basic.test b/mysql-test/suite/sys_vars/t/innodb_lock_schedule_algorithm_basic.test new file mode 100644 index 0000000..83873a6 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_lock_schedule_algorithm_basic.test @@ -0,0 +1,16 @@ +-- source include/have_innodb.inc +# This is a debug variable for now +-- source include/have_debug.inc + +SELECT @@global.innodb_lock_schedule_algorithm; + +SET GLOBAL innodb_lock_schedule_algorithm = 'FCFS'; + +SELECT @@global.innodb_lock_schedule_algorithm; + +SET GLOBAL innodb_lock_schedule_algorithm = 'VATS'; + +SELECT @@global.innodb_lock_schedule_algorithm; + +# Reset state +SET GLOBAL innodb_lock_schedule_algorithm = 'FCFS'; From e3f913ac1865eb13a13b52df0cf3a6c7d2b17854 Mon Sep 17 00:00:00 2001 From: sensssz Date: Mon, 12 Dec 2016 23:55:01 -0500 Subject: [PATCH 4/6] Set default value of lock_schedule_algorithm to FCFS. --- storage/innobase/lock/lock0lock.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index e904355..bb41c03 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -57,7 +57,7 @@ Created 5/7/1996 Heikki Tuuri my_bool innobase_deadlock_detect = TRUE; /** Lock scheduling algorithm */ -ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_VATS; +ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS; /** Total number of cached record locks */ static const ulint REC_LOCK_CACHE = 8; From 5dcb8ea751d6352cad954833b4d063c77d340978 Mon Sep 17 00:00:00 2001 From: sensssz Date: Mon, 12 Dec 2016 23:59:56 -0500 Subject: [PATCH 5/6] Handle NULL pointer in is_replication_slave. --- storage/innobase/handler/ha_innodb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 7b87493..de1dcef 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1380,7 +1380,7 @@ thd_is_replication_slave_thread( /*============================*/ THD* thd) /*!< in: thread handle */ { - return((ibool) thd_slave_thread(thd)); + return(thd && (ibool) thd_slave_thread(thd)); } /******************************************************************//** From 02e10b027866e47429b70fa0f791c2af6f83b02e Mon Sep 17 00:00:00 2001 From: sensssz Date: Tue, 13 Dec 2016 10:54:31 -0500 Subject: [PATCH 6/6] Preserve FIFO order for high priority transactions. --- storage/innobase/include/trx0trx.h | 1 + storage/innobase/lock/lock0lock.cc | 10 +++++++++- storage/innobase/trx/trx0trx.cc | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 5a71bd3..5e0f778 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1091,6 +1091,7 @@ struct trx_t { TRX_STATE_ACTIVE */ long dep_size; bool size_updated; + long seq; lsn_t commit_lsn; /*!< lsn at the time of the commit */ table_id_t table_id; /*!< Table to drop iff dict_operation == TRX_DICT_OP_TABLE, or 0. */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index bb41c03..5827eb9 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1496,9 +1496,11 @@ RecLock::lock_alloc( /*********************************************************************//** Check if lock1 has higher priority than lock2. NULL has lowest priority. -If either is a high priority transaction, the lock has higher priority. If neither of them is wait lock, the first one has higher priority. If only one of them is a wait lock, it has lower priority. +If both are high priority transactions, the one with a lower seq +number has higher priority. +High priority transaction has higher priority. Otherwise, the one with an older transaction has higher priority. @returns true if lock1 has higher priority, false otherwise. */ static @@ -1517,6 +1519,10 @@ has_higher_priority( } else if (!lock_get_wait(lock2)) { return false; } + if (trx_is_high_priority(lock1->trx) + && trx_is_high_priority(lock2->trx)) { + return lock1->trx->seq < lock2->trx->seq; + } if (trx_is_high_priority(lock1->trx)) { return true; } @@ -2674,6 +2680,7 @@ vats_grant( std::vector granted_locks; std::vector new_granted; + i = 0; sub_dep_size_total = 0; add_dep_size_total = 0; space = released_lock->un_member.rec_lock.space; @@ -2685,6 +2692,7 @@ vats_grant( if (!lock_get_wait(lock)) { granted_locks.push_back(lock); } else { + lock->trx->seq = i++; wait_locks.push_back(lock); } } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 83d660a..40ab69d 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1464,6 +1464,7 @@ trx_start_low( trx->dep_size = 0; trx->size_updated = false; + trx->seq = 0; ut_a(trx->error_state == DB_SUCCESS);