Index: storage/innobase/trx/trx0trx.cc =================================================================== --- storage/innobase/trx/trx0trx.cc (revision 6927) +++ storage/innobase/trx/trx0trx.cc (working copy) @@ -2630,7 +2630,8 @@ trx_sys_mutex_exit(); /*--------------------------------------*/ - if (lsn > 0) { + if (lsn > 0 + && (!thd_enhanced_sync(trx->mysql_thd))) { /* Depending on the my.cnf options, we may now write the log buffer to the log files, making the prepared state of the transaction durable if the OS does not crash. We may also Index: storage/innobase/handler/ha_innodb.cc =================================================================== --- storage/innobase/handler/ha_innodb.cc (revision 6927) +++ storage/innobase/handler/ha_innodb.cc (working copy) @@ -922,7 +922,8 @@ bool innobase_flush_logs( /*================*/ - handlerton* hton); /*!< in: InnoDB handlerton */ + handlerton* hton, /*!< in: InnoDB handlerton */ + bool enhanced_sync); /*!< in: enable enhanced sync strategy if true*/ /************************************************************************//** Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the @@ -1130,6 +1131,17 @@ } /******************************************************************//** +Check if sesion variable sql_log_bin and binlog_enhanced_sync are both enabled. +@return true if enabled */ +ibool +thd_enhanced_sync( +/*=================*/ + THD* thd) /*!< in: thread handle */ +{ + return(thd != 0 && thd_sync_enhanced(thd)); +} + +/******************************************************************//** Save some CPU by testing the value of srv_thread_concurrency in inline functions. */ static inline @@ -3318,15 +3330,35 @@ bool innobase_flush_logs( /*================*/ - handlerton* hton) /*!< in/out: InnoDB handlerton */ + handlerton* hton, /*!< in/out: InnoDB handlerton */ + bool enhanced_sync) /*!< in: enable enhanced sync strategy if true*/ { bool result = 0; DBUG_ENTER("innobase_flush_logs"); DBUG_ASSERT(hton == innodb_hton_ptr); + /* + If enhanced_sync is false ,then this means innobase_flush_logs is called by + by operations like FLUHS LOGS, BINLOG ROTATE,etc. + If enhanced_sync is true, we know binlog_enhanced_sync is enabled. we have to sync + the redo log if srv_flush_log_at_trx_commit =1, or just write redo log if + srv_flush_log_at_trx_commit = 2. */ if (!srv_read_only_mode) { - log_buffer_flush_to_disk(); + if (!enhanced_sync) + log_buffer_flush_to_disk(); + else { + if (srv_flush_log_at_trx_commit == 0) + DBUG_RETURN(result); + + bool sync = (srv_flush_log_at_trx_commit == 1); + + mutex_enter(&(log_sys->mutex)); + lsn_t lsn = log_sys->lsn; + mutex_exit(&(log_sys->mutex)); + + log_write_up_to(lsn, sync); + } } DBUG_RETURN(result); Index: storage/innobase/handler/ha_innodb.h =================================================================== --- storage/innobase/handler/ha_innodb.h (revision 6927) +++ storage/innobase/handler/ha_innodb.h (working copy) @@ -422,8 +422,16 @@ @return True if sql_mode has strict mode (all or trans), false otherwise. */ bool thd_is_strict_mode(const MYSQL_THD thd) __attribute__((nonnull)); + +/** Is enhanced flush enabled. +@param thd Thread object +@return True if enhanced sync strategy is enabled, false otherwise. +*/ +bool thd_sync_enhanced(const MYSQL_THD thd) +__attribute__((nonnull)); } /* extern "C" */ + struct trx_t; extern const struct _ft_vft ft_vft_result; Index: storage/innobase/include/ha_prototypes.h =================================================================== --- storage/innobase/include/ha_prototypes.h (revision 6927) +++ storage/innobase/include/ha_prototypes.h (working copy) @@ -135,6 +135,14 @@ /*===========================*/ THD* thd); /*!< in: thread handle */ + /******************************************************************//** +Check if binlog_enhanced_sync and sql_log_bin are both enabled. +@return true if binlog_enhanced_sync and sql_log_bin are both enabled */ +ibool +thd_enhanced_sync( +/*=================*/ + THD* thd); /*!< in: thread handle */ + /*************************************************************//** Prints info of a THD object (== user session thread) to the given file. */ Index: sql/ha_ndbcluster_binlog.cc =================================================================== --- sql/ha_ndbcluster_binlog.cc (revision 6927) +++ sql/ha_ndbcluster_binlog.cc (working copy) @@ -904,7 +904,7 @@ ndb data to be logged has made it to the binary log to get a deterministic behavior on the rotation of the log. */ -static bool ndbcluster_flush_logs(handlerton *hton) +static bool ndbcluster_flush_logs(handlerton *hton, bool enhanced_sync) { ndbcluster_binlog_wait(current_thd); return FALSE; Index: sql/binlog.cc =================================================================== --- sql/binlog.cc (revision 6927) +++ sql/binlog.cc (working copy) @@ -66,6 +66,7 @@ static handlerton *binlog_hton; bool opt_binlog_order_commits= true; +bool opt_binlog_enhanced_sync= false; const char *log_bin_index= 0; const char *log_bin_basename= 0; @@ -6880,6 +6881,7 @@ my_atomic_rwlock_rdunlock(&opt_binlog_max_flush_queue_time_lock); const ulonglong start_utime= max_udelay > 0 ? my_micro_time() : 0; + bool enhanced_sync= opt_binlog_enhanced_sync; /* First we read the queue until it either is empty or the difference between the time we started and the current time is too large. @@ -6889,7 +6891,9 @@ */ bool has_more= true; THD *first_seen= NULL; - while ((max_udelay == 0 || my_micro_time() < start_utime + max_udelay) && has_more) + while (!enhanced_sync + &&(max_udelay == 0 || my_micro_time() < start_utime + max_udelay) + && has_more) { std::pair current= stage_manager.pop_front(Stage_manager::FLUSH_STAGE); std::pair result= flush_thread_caches(current.second); @@ -6912,18 +6916,36 @@ if (has_more) { THD *queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE); - for (THD *head= queue ; head ; head = head->next_to_commit) + if (first_seen == NULL) + first_seen= queue; + + if (!enhanced_sync) { + for (THD *head= queue ; head ; head = head->next_to_commit) + { + std::pair result= flush_thread_caches(head); + total_bytes+= result.second; + if (flush_error == 1) + flush_error= result.first; +#ifndef DBUG_OFF + no_flushes++; +#endif + } + } + } + + if (enhanced_sync) + { + /* Tell the engine layer to sync the log*/ + ha_flush_logs(NULL, 1); + /* Flush thread cahce to binlog file*/ + for (THD *head= first_seen ; head ; head = head->next_to_commit) + { std::pair result= flush_thread_caches(head); total_bytes+= result.second; if (flush_error == 1) flush_error= result.first; -#ifndef DBUG_OFF - no_flushes++; -#endif } - if (first_seen == NULL) - first_seen= queue; } *out_queue_var= first_seen; Index: sql/sql_class.cc =================================================================== --- sql/sql_class.cc (revision 6927) +++ sql/sql_class.cc (working copy) @@ -4089,6 +4089,11 @@ *inc = thd->variables.auto_increment_increment; } +extern "C" bool thd_sync_enhanced(const MYSQL_THD thd) +{ + return (opt_binlog_enhanced_sync + &&thd->variables.sql_log_bin); +} /** Is strict sql_mode set. @@ -4103,7 +4108,6 @@ return thd->is_strict_mode(); } - #ifndef EMBEDDED_LIBRARY extern "C" void thd_pool_wait_begin(MYSQL_THD thd, int wait_type); extern "C" void thd_pool_wait_end(MYSQL_THD thd); Index: sql/binlog.h =================================================================== --- sql/binlog.h (revision 6927) +++ sql/binlog.h (working copy) @@ -854,7 +854,7 @@ extern const char *log_bin_index; extern const char *log_bin_basename; extern bool opt_binlog_order_commits; - +extern bool opt_binlog_enhanced_sync; /** Turns a relative log binary log path into a full path, based on the opt_bin_logname or opt_relay_logname. Also trims the cr-lf at the Index: sql/handler.h =================================================================== --- sql/handler.h (revision 6927) +++ sql/handler.h (working copy) @@ -785,7 +785,7 @@ void (*drop_database)(handlerton *hton, char* path); int (*panic)(handlerton *hton, enum ha_panic_function flag); int (*start_consistent_snapshot)(handlerton *hton, THD *thd); - bool (*flush_logs)(handlerton *hton); + bool (*flush_logs)(handlerton *hton, bool enhanced_sync); bool (*show_status)(handlerton *hton, THD *thd, stat_print_fn *print, enum ha_stat_type stat); uint (*partition_flags)(); uint (*alter_table_flags)(uint flags); @@ -3457,7 +3457,7 @@ int ha_panic(enum ha_panic_function flag); void ha_close_connection(THD* thd); void ha_kill_connection(THD *thd); -bool ha_flush_logs(handlerton *db_type); +bool ha_flush_logs(handlerton *db_type, bool enhanced_sync = false); void ha_drop_database(char* path); int ha_create_table(THD *thd, const char *path, const char *db, const char *table_name, Index: sql/sys_vars.cc =================================================================== --- sql/sys_vars.cc (revision 6927) +++ sql/sys_vars.cc (working copy) @@ -1070,6 +1070,14 @@ GLOBAL_VAR(opt_binlog_order_commits), CMD_LINE(OPT_ARG), DEFAULT(TRUE)); +static Sys_var_mybool Sys_binlog_enhanced_sync( + "binlog_enhanced_sync", + "enhanced innodb flush strategy to improve the performance of" + " group commit. if enabled, the redo log write/sync will be" + " delayed until binlog binlog flush stage", + GLOBAL_VAR(opt_binlog_enhanced_sync), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + static Sys_var_ulong Sys_bulk_insert_buff_size( "bulk_insert_buffer_size", "Size of tree cache used in bulk " "insert optimisation. Note that this is a limit per thread!", Index: sql/handler.cc =================================================================== --- sql/handler.cc (revision 6927) +++ sql/handler.cc (working copy) @@ -2026,25 +2026,26 @@ void *arg) { handlerton *hton= plugin_data(plugin, handlerton *); - if (hton->state == SHOW_OPTION_YES && hton->flush_logs && - hton->flush_logs(hton)) + if (hton->state == SHOW_OPTION_YES && hton->flush_logs && + hton->flush_logs(hton, *((bool *)arg))) return TRUE; return FALSE; } -bool ha_flush_logs(handlerton *db_type) +bool ha_flush_logs(handlerton *db_type, bool enhanced_sync) { if (db_type == NULL) { if (plugin_foreach(NULL, flush_handlerton, - MYSQL_STORAGE_ENGINE_PLUGIN, 0)) + MYSQL_STORAGE_ENGINE_PLUGIN, + (void *)(&enhanced_sync))) return TRUE; } else { if (db_type->state != SHOW_OPTION_YES || - (db_type->flush_logs && db_type->flush_logs(db_type))) + (db_type->flush_logs && db_type->flush_logs(db_type, enhanced_sync))) return TRUE; } return FALSE;