*** storage/innobase/handler/ha_innodb.cc.orig Wed Apr 16 06:05:10 2008 --- storage/innobase/handler/ha_innodb.cc Fri Aug 8 09:01:19 2008 *************** *** 1819,1824 **** --- 1819,1825 ---- FALSE - the current SQL statement ended */ { trx_t* trx; + bool saved_flush_log_later; DBUG_ENTER("innobase_commit"); DBUG_PRINT("trans", ("ending transaction")); *************** *** 1865,1870 **** --- 1866,1897 ---- /* We need current binlog position for ibbackup to work. Note, the position is current because of prepare_commit_mutex */ + + /* TODO: the above is not true if something other than + InnoDB has written to the binlog file, so this needs + to be corrected. */ + + trx->mysql_log_file_name = mysql_bin_log_file_name(); + trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos(); + + if (trx->active_trans == 2) { + /* we are holding the prepare_commit_mutex so + do a commit without a flush here, to reserve + our place in the log and ensure the same order + as the binlog file, then release the lock to + allow other threads to progress. We do this + before checking srv_commit_concurrency to avoid + mixing the lock ordering of prepare_commit_mutex + and commit_cond_m. We will do the actual flush + to disk after checking srv_commit_concurrency. */ + + saved_flush_log_later = trx->flush_log_later; + trx->flush_log_later = true; + innobase_commit_low(trx); + trx->flush_log_later = saved_flush_log_later; + pthread_mutex_unlock(&prepare_commit_mutex); + } + retry: if (srv_commit_concurrency > 0) { pthread_mutex_lock(&commit_cond_m); *************** *** 1882,1892 **** } } ! trx->mysql_log_file_name = mysql_bin_log_file_name(); ! trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos(); - innobase_commit_low(trx); - if (srv_commit_concurrency > 0) { pthread_mutex_lock(&commit_cond_m); commit_threads--; --- 1909,1922 ---- } } ! if (trx->active_trans == 2) { ! /* We already did a commit without a flush, ! so now we need to do the actual flush. */ ! trx_commit_complete_for_mysql(trx); ! } else { ! innobase_commit_low(trx); ! } if (srv_commit_concurrency > 0) { pthread_mutex_lock(&commit_cond_m); commit_threads--; *************** *** 1894,1904 **** pthread_mutex_unlock(&commit_cond_m); } - if (trx->active_trans == 2) { - - pthread_mutex_unlock(&prepare_commit_mutex); - } - trx->active_trans = 0; } else { --- 1924,1929 ---- *************** *** 7699,7731 **** int error = 0; trx_t* trx = check_trx_exists(thd); - if (thd_sql_command(thd) != SQLCOM_XA_PREPARE && - (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) - { - - /* For ibbackup to work the order of transactions in binlog - and InnoDB must be the same. Consider the situation - - thread1> prepare; write to binlog; ... - - thread2> prepare; write to binlog; commit - thread1> ... commit - - To ensure this will not happen we're taking the mutex on - prepare, and releasing it on commit. - - Note: only do it for normal commits, done via ha_commit_trans. - If 2pc protocol is executed by external transaction - coordinator, it will be just a regular MySQL client - executing XA PREPARE and XA COMMIT commands. - In this case we cannot know how many minutes or hours - will be between XA PREPARE and XA COMMIT, and we don't want - to block for undefined period of time. - */ - pthread_mutex_lock(&prepare_commit_mutex); - trx->active_trans = 2; - } - if (!THDVAR(thd, support_xa)) { return(0); --- 7724,7729 ---- *************** *** 7754,7759 **** --- 7752,7781 ---- ut_ad(trx->active_trans); error = (int) trx_prepare_for_mysql(trx); + if (thd_sql_command(thd) != SQLCOM_XA_PREPARE) { + + /* For ibbackup to work the order of transactions in binlog + and InnoDB must be the same. Consider the situation + + thread1> prepare; write to binlog; ... + + thread2> prepare; write to binlog; commit + thread1> ... commit + + To ensure this will not happen we're taking the mutex on + prepare, and releasing it on commit. + + Note: only do it for normal commits, done via ha_commit_trans. + If 2pc protocol is executed by external transaction + coordinator, it will be just a regular MySQL client + executing XA PREPARE and XA COMMIT commands. + In this case we cannot know how many minutes or hours + will be between XA PREPARE and XA COMMIT, and we don't want + to block for undefined period of time. + */ + pthread_mutex_lock(&prepare_commit_mutex); + trx->active_trans = 2; + } } else { /* We just mark the SQL statement ended and do not do a transaction prepare */