Index: storage/innobase/trx/trx0trx.cc =================================================================== --- storage/innobase/trx/trx0trx.cc (revision 5767) +++ storage/innobase/trx/trx0trx.cc (working copy) @@ -1649,6 +1649,10 @@ flushed. */ trx_t* trx) /*!< in/out: transaction */ { + if (thd_enhanced_sync(trx->mysql_thd) + && trx->state == TRX_STATE_PREPARED) + return; + trx->op_info = "flushing log"; trx_flush_log_if_needed_low(lsn); trx->op_info = ""; Index: storage/innobase/handler/ha_innodb.cc =================================================================== --- storage/innobase/handler/ha_innodb.cc (revision 5767) +++ storage/innobase/handler/ha_innodb.cc (working copy) @@ -922,7 +922,8 @@ bool innobase_flush_logs( /*================*/ - handlerton* hton); /*!< in: InnoDB handlerton */ + handlerton* hton, /*!< in: InnoDB handlerton */ + bool enhanced_sync); /*!< in: enable enhanced sync strategy if true*/ /************************************************************************//** Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the @@ -1130,6 +1131,17 @@ } /******************************************************************//** +Check if sesion variable sql_log_bin and binlog_enhanced_sync are both enabled. +@return true if enabled */ +ibool +thd_enhanced_sync( +/*=================*/ + THD* thd) /*!< in: thread handle */ +{ + return(thd != 0 && thd_sync_enhanced(thd)); +} + +/******************************************************************//** Save some CPU by testing the value of srv_thread_concurrency in inline functions. */ static inline @@ -3318,15 +3330,37 @@ bool innobase_flush_logs( /*================*/ - handlerton* hton) /*!< in/out: InnoDB handlerton */ + handlerton* hton, /*!< in/out: InnoDB handlerton */ + bool enhanced_sync) /*!< in: enable enhanced sync strategy if true*/ { bool result = 0; DBUG_ENTER("innobase_flush_logs"); DBUG_ASSERT(hton == innodb_hton_ptr); - if (!srv_read_only_mode) { - log_buffer_flush_to_disk(); + /* + If enhanced_sync is false ,then this means innobase_flush_logs is called by + by operations like FLUHS LOGS, BINLOG ROTATE,etc. + If enhanced_sync is true, we know binlog_enhanced_sync is enabled. we have to sync + the redo log if srv_flush_log_at_trx_commit =1, or just write redo log if + srv_flush_log_at_trx_commit = 2. */ + if (!srv_read_only_mode + && (!enhanced_sync + ||(enhanced_sync && srv_flush_log_at_trx_commit != 0))) { + + bool sync = true; + + if (enhanced_sync + && srv_flush_log_at_trx_commit == 2) + sync = false; + + mutex_enter(&(log_sys->mutex)); + + lsn_t lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + log_write_up_to(lsn, sync); } DBUG_RETURN(result); Index: storage/innobase/handler/ha_innodb.h =================================================================== --- storage/innobase/handler/ha_innodb.h (revision 5767) +++ storage/innobase/handler/ha_innodb.h (working copy) @@ -422,8 +422,16 @@ @return True if sql_mode has strict mode (all or trans), false otherwise. */ bool thd_is_strict_mode(const MYSQL_THD thd) __attribute__((nonnull)); + +/** Is enhanced flush enabled. +@param thd Thread object +@return True if enhanced sync strategy is enabled, false otherwise. +*/ +bool thd_sync_enhanced(const MYSQL_THD thd) +__attribute__((nonnull)); } /* extern "C" */ + struct trx_t; extern const struct _ft_vft ft_vft_result; Index: storage/innobase/include/ha_prototypes.h =================================================================== --- storage/innobase/include/ha_prototypes.h (revision 5767) +++ storage/innobase/include/ha_prototypes.h (working copy) @@ -135,6 +135,14 @@ /*===========================*/ THD* thd); /*!< in: thread handle */ + /******************************************************************//** +Check if binlog_enhanced_sync and sql_log_bin are both enabled. +@return true if binlog_enhanced_sync and sql_log_bin are both enabled */ +ibool +thd_enhanced_sync( +/*=================*/ + THD* thd); /*!< in: thread handle */ + /*************************************************************//** Prints info of a THD object (== user session thread) to the given file. */ Index: sql/ha_ndbcluster_binlog.cc =================================================================== --- sql/ha_ndbcluster_binlog.cc (revision 5767) +++ sql/ha_ndbcluster_binlog.cc (working copy) @@ -904,7 +904,7 @@ ndb data to be logged has made it to the binary log to get a deterministic behavior on the rotation of the log. */ -static bool ndbcluster_flush_logs(handlerton *hton) +static bool ndbcluster_flush_logs(handlerton *hton, bool enhanced_sync) { ndbcluster_binlog_wait(current_thd); return FALSE; Index: sql/log_event.cc =================================================================== --- sql/log_event.cc (revision 5767) +++ sql/log_event.cc (working copy) @@ -5281,7 +5281,7 @@ { sql_print_information("Crashing crash_after_commit_and_update_pos."); rli->flush_info(true); - ha_flush_logs(0); + ha_flush_logs(0, 0); DBUG_SUICIDE(); } ); Index: sql/binlog.cc =================================================================== --- sql/binlog.cc (revision 5767) +++ sql/binlog.cc (working copy) @@ -66,6 +66,7 @@ static handlerton *binlog_hton; bool opt_binlog_order_commits= true; +bool opt_binlog_enhanced_sync= false; const char *log_bin_index= 0; const char *log_bin_basename= 0; @@ -4036,7 +4037,7 @@ Flush logs for storage engines, so that the last transaction is fsynced inside storage engines. */ - if (ha_flush_logs(NULL)) + if (ha_flush_logs(NULL, 0)) DBUG_RETURN(1); ha_reset_logs(thd); @@ -5141,7 +5142,7 @@ mysql_mutex_lock(&LOCK_index); if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1) - && (error= ha_flush_logs(NULL))) + && (error= ha_flush_logs(NULL, 0))) goto end; mysql_mutex_assert_owner(&LOCK_log); @@ -5701,7 +5702,7 @@ Flush logs for storage engines, so that the last transaction is fsynced inside storage engines. */ - ha_flush_logs(NULL); + ha_flush_logs(NULL, 0); purge_logs_before_date(purge_time, true); } } @@ -6880,6 +6881,7 @@ my_atomic_rwlock_rdunlock(&opt_binlog_max_flush_queue_time_lock); const ulonglong start_utime= max_udelay > 0 ? my_micro_time() : 0; + bool enhanced_sync= opt_binlog_enhanced_sync; /* First we read the queue until it either is empty or the difference between the time we started and the current time is too large. @@ -6892,16 +6894,22 @@ while ((max_udelay == 0 || my_micro_time() < start_utime + max_udelay) && has_more) { std::pair current= stage_manager.pop_front(Stage_manager::FLUSH_STAGE); - std::pair result= flush_thread_caches(current.second); has_more= current.first; - total_bytes+= result.second; - if (flush_error == 1) - flush_error= result.first; + if (first_seen == NULL) first_seen= current.second; #ifndef DBUG_OFF no_flushes++; #endif + + /* Flush thread cache later if enhanced sync is enabled*/ + if (!enhanced_sync) + { + std::pair result= flush_thread_caches(current.second); + total_bytes+= result.second; + if (flush_error == 1) + flush_error= result.first; + } } /* @@ -6912,18 +6920,36 @@ if (has_more) { THD *queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE); - for (THD *head= queue ; head ; head = head->next_to_commit) + if (first_seen == NULL) + first_seen= queue; + + if (!enhanced_sync) { + for (THD *head= queue ; head ; head = head->next_to_commit) + { + std::pair result= flush_thread_caches(head); + total_bytes+= result.second; + if (flush_error == 1) + flush_error= result.first; +#ifndef DBUG_OFF + no_flushes++; +#endif + } + } + } + + if (enhanced_sync) + { + /* Tell the engine layer to sync the log*/ + ha_flush_logs(NULL, 1); + /* Flush thread cahce to binlog file*/ + for (THD *head= first_seen ; head ; head = head->next_to_commit) + { std::pair result= flush_thread_caches(head); total_bytes+= result.second; if (flush_error == 1) flush_error= result.first; -#ifndef DBUG_OFF - no_flushes++; -#endif } - if (first_seen == NULL) - first_seen= queue; } *out_queue_var= first_seen; Index: sql/sql_class.cc =================================================================== --- sql/sql_class.cc (revision 5767) +++ sql/sql_class.cc (working copy) @@ -4089,6 +4089,11 @@ *inc = thd->variables.auto_increment_increment; } +extern "C" bool thd_sync_enhanced(const MYSQL_THD thd) +{ + return (opt_binlog_enhanced_sync + &&thd->variables.sql_log_bin); +} /** Is strict sql_mode set. @@ -4103,7 +4108,6 @@ return thd->is_strict_mode(); } - #ifndef EMBEDDED_LIBRARY extern "C" void thd_pool_wait_begin(MYSQL_THD thd, int wait_type); extern "C" void thd_pool_wait_end(MYSQL_THD thd); Index: sql/binlog.h =================================================================== --- sql/binlog.h (revision 5767) +++ sql/binlog.h (working copy) @@ -854,7 +854,7 @@ extern const char *log_bin_index; extern const char *log_bin_basename; extern bool opt_binlog_order_commits; - +extern bool opt_binlog_enhanced_sync; /** Turns a relative log binary log path into a full path, based on the opt_bin_logname or opt_relay_logname. Also trims the cr-lf at the Index: sql/handler.h =================================================================== --- sql/handler.h (revision 5767) +++ sql/handler.h (working copy) @@ -785,7 +785,7 @@ void (*drop_database)(handlerton *hton, char* path); int (*panic)(handlerton *hton, enum ha_panic_function flag); int (*start_consistent_snapshot)(handlerton *hton, THD *thd); - bool (*flush_logs)(handlerton *hton); + bool (*flush_logs)(handlerton *hton, bool enhanced_sync); bool (*show_status)(handlerton *hton, THD *thd, stat_print_fn *print, enum ha_stat_type stat); uint (*partition_flags)(); uint (*alter_table_flags)(uint flags); @@ -3457,7 +3457,7 @@ int ha_panic(enum ha_panic_function flag); void ha_close_connection(THD* thd); void ha_kill_connection(THD *thd); -bool ha_flush_logs(handlerton *db_type); +bool ha_flush_logs(handlerton *db_type, bool enhanced_sync); void ha_drop_database(char* path); int ha_create_table(THD *thd, const char *path, const char *db, const char *table_name, Index: sql/sql_reload.cc =================================================================== --- sql/sql_reload.cc (revision 5767) +++ sql/sql_reload.cc (working copy) @@ -136,7 +136,7 @@ query_logger.reopen_log_file(QUERY_LOG_GENERAL); if (options & REFRESH_ENGINE_LOG) - if (ha_flush_logs(NULL)) + if (ha_flush_logs(NULL, 0)) result= 1; if (options & REFRESH_BINARY_LOG) Index: sql/sys_vars.cc =================================================================== --- sql/sys_vars.cc (revision 5767) +++ sql/sys_vars.cc (working copy) @@ -1070,6 +1070,14 @@ GLOBAL_VAR(opt_binlog_order_commits), CMD_LINE(OPT_ARG), DEFAULT(TRUE)); +static Sys_var_mybool Sys_binlog_enhanced_sync( + "binlog_enhanced_sync", + "enhanced innodb flush stragey to improve the perofrmance of" + " group commit. if enabled, the redo log write/sync will be" + " delayed until binlog binlog flush stage", + GLOBAL_VAR(opt_binlog_enhanced_sync), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + static Sys_var_ulong Sys_bulk_insert_buff_size( "bulk_insert_buffer_size", "Size of tree cache used in bulk " "insert optimisation. Note that this is a limit per thread!", Index: sql/handler.cc =================================================================== --- sql/handler.cc (revision 5767) +++ sql/handler.cc (working copy) @@ -2026,25 +2026,26 @@ void *arg) { handlerton *hton= plugin_data(plugin, handlerton *); - if (hton->state == SHOW_OPTION_YES && hton->flush_logs && - hton->flush_logs(hton)) + if (hton->state == SHOW_OPTION_YES && hton->flush_logs && + hton->flush_logs(hton, *((bool *)arg))) return TRUE; return FALSE; } -bool ha_flush_logs(handlerton *db_type) +bool ha_flush_logs(handlerton *db_type, bool enhanced_sync) { if (db_type == NULL) { if (plugin_foreach(NULL, flush_handlerton, - MYSQL_STORAGE_ENGINE_PLUGIN, 0)) + MYSQL_STORAGE_ENGINE_PLUGIN, + (void *)(&enhanced_sync))) return TRUE; } else { if (db_type->state != SHOW_OPTION_YES || - (db_type->flush_logs && db_type->flush_logs(db_type))) + (db_type->flush_logs && db_type->flush_logs(db_type, enhanced_sync))) return TRUE; } return FALSE; Index: sql/sql_table.cc =================================================================== --- sql/sql_table.cc (revision 5767) +++ sql/sql_table.cc (working copy) @@ -8804,7 +8804,7 @@ else sql_print_warning("Could not open table %s.%s after rename\n", alter_ctx.new_db, alter_ctx.table_name); - ha_flush_logs(old_db_type); + ha_flush_logs(old_db_type, 0); } table_list->table= NULL; // For query cache query_cache.invalidate(thd, table_list, FALSE);