diff -Naur mysql-5.0.37/sql/log.cc mysql-5.0.37.fixed/sql/log.cc --- mysql-5.0.37/sql/log.cc 2008-09-29 12:57:44.000000000 -0700 +++ mysql-5.0.37.fixed/sql/log.cc 2008-09-29 13:04:35.000000000 -0700 @@ -1045,36 +1045,22 @@ DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name)); pthread_mutex_lock(&LOCK_index); - pthread_mutex_lock(&rli->log_space_lock); - rli->relay_log.purge_logs(rli->group_relay_log_name, included, - 0, 0, &rli->log_space_total); - // Tell the I/O thread to take the relay_log_space_limit into account - rli->ignore_log_space_limit= 0; - pthread_mutex_unlock(&rli->log_space_lock); /* - Ok to broadcast after the critical region as there is no risk of - the mutex being destroyed by this thread later - this helps save - context switches + Need to flush_relay_log_info() before the call to purge_logs. The + calls used to be in the other order, but if the server dies + after the purge, but before the flush, replication will stop + with an error initializing the master info struct once the + server is restarted. */ - pthread_cond_broadcast(&rli->log_space_cond); - - /* - Read the next log file name from the index file and pass it back to - the caller - If included is true, we want the first relay log; - otherwise we want the one after event_relay_log_name. - */ - if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) || - (!included && - ((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) || - (error=find_next_log(&rli->linfo, 0))))) + if ((error = find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) || + (error = find_next_log(&rli->linfo, 0))) { char buff[22]; sql_print_error("next log error: %d offset: %s log: %s included: %d", error, llstr(rli->linfo.index_file_offset,buff), - rli->group_relay_log_name, + rli->event_relay_log_name, included); goto err; } @@ -1102,6 +1088,49 @@ /* Store where we are in the new file for the execution thread */ flush_relay_log_info(rli); + /* + If we crash here before purging the logs, the logs will initially + be left around on the machine at crash recovery time, and will be + listed in the index file. However, the next time the SQL thread + completes processing of a relay log, and thus this function gets + called, those logs from before the crash will also get purged. + */ + DBUG_EXECUTE_IF("crash_purge_first_log", abort();); + + /* Now it is safe to purge the log */ + pthread_mutex_lock(&rli->log_space_lock); + rli->relay_log.purge_logs(rli->group_relay_log_name, FALSE, + 0, 0, &rli->log_space_total); + // Tell the I/O thread to take the relay_log_space_limit into account + rli->ignore_log_space_limit= 0; + pthread_mutex_unlock(&rli->log_space_lock); + + /* + Ok to broadcast after the critical region as there is no risk of + the mutex being destroyed by this thread later - this helps save + context switches + */ + pthread_cond_broadcast(&rli->log_space_cond); + + /* + We have to repeat some work in order to make sure that rli->linfo + has the correct offsets since we have purged log files and updated + the index after getting rli->linfo the first time around. + */ + if ((error = find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))) + { + char buff[22]; + sql_print_error("next log error: %d offset: %s log: %s included: %d", + error, + llstr(rli->linfo.index_file_offset,buff), + rli->event_relay_log_name, + included); + goto err; + } + + /* If included was passed, rli->linfo should be the first entry. */ + DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0); + err: pthread_mutex_unlock(&LOCK_index); DBUG_RETURN(error);