=== modified file 'BUILD/SETUP.sh' --- BUILD/SETUP.sh 2008-04-28 16:24:05 +0000 +++ BUILD/SETUP.sh 2008-06-13 07:53:29 +0000 @@ -60,7 +60,7 @@ prefix="/usr/local/mysql" just_print= just_configure= -full_debug= +full_debug="=full" warning_mode= parse_options "$@" === modified file 'storage/maria/ma_bitmap.c' --- storage/maria/ma_bitmap.c 2008-04-03 13:40:25 +0000 +++ storage/maria/ma_bitmap.c 2008-09-25 12:36:38 +0000 @@ -1,3 +1,4 @@ +#define GBDELAY 20 /* Copyright (C) 2007 Michael Widenius This program is free software; you can redistribute it and/or modify @@ -145,7 +146,7 @@ DBUG_ENTER("write_changed_bitmap"); DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); DBUG_ASSERT(bitmap->file.write_callback != 0); - DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); + DBUG_PRINT("info", ("bitmap %p bitmap->non_flushable: %u", bitmap, bitmap->non_flushable)); if ((bitmap->non_flushable == 0) #ifdef WRONG_BITMAP_FLUSH @@ -259,12 +260,15 @@ my_bool _ma_bitmap_end(MARIA_SHARE *share) { + MARIA_FILE_BITMAP *bitmap= &share->bitmap; my_bool res= _ma_bitmap_flush(share); - pthread_mutex_destroy(&share->bitmap.bitmap_lock); - pthread_cond_destroy(&share->bitmap.bitmap_cond); - delete_dynamic(&share->bitmap.pinned_pages); - my_free((uchar*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR)); - share->bitmap.map= 0; + DBUG_ASSERT(bitmap->non_flushable == 0); + DBUG_ASSERT(bitmap->flush_all_requested == 0); + pthread_mutex_destroy(&bitmap->bitmap_lock); + pthread_cond_destroy(&bitmap->bitmap_cond); + delete_dynamic(&bitmap->pinned_pages); + my_free((uchar*) bitmap->map, MYF(MY_ALLOW_ZERO_PTR)); + bitmap->map= 0; return res; } @@ -311,20 +315,25 @@ @note This is used by reader threads which don't unpin things */ - +// todo change name my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share) { my_bool res= 0; MARIA_FILE_BITMAP *bitmap= &share->bitmap; DBUG_ENTER("_ma_bitmap_flush"); + /* + We need to flush what's in memory (bitmap.map) to page cache otherwise, as + we are going to read bitmaps from page cache in table scan, we may miss + recently inserted rows. This matters only for committed rows, that is, + rows for which there was a commit before our transaction started; as + commit and start are protected by the same mutex, we see memory at least + as new as at commit time, so if the committed rows caused bitmap->changed + to be true, we see it; if we see 0 it really means a flush happened since + then. i.e. read without mutex is safe here. + */ if (bitmap->changed) { pthread_mutex_lock(&bitmap->bitmap_lock); - while (bitmap->non_flushable && bitmap->changed) - { - DBUG_PRINT("info", ("waiting for bitmap to be flushable")); - pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); - } if (bitmap->changed) { bitmap->changed= 0; @@ -372,22 +381,43 @@ pthread_mutex_lock(&bitmap->bitmap_lock); if (bitmap->changed) { - bitmap->flush_all_requested= TRUE; + bitmap->flush_all_requested++; #ifndef WRONG_BITMAP_FLUSH while (bitmap->non_flushable > 0) { - DBUG_PRINT("info", ("waiting for bitmap to be flushable")); - pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); +// fprintf(stderr,"%u waiting for bitmap %p to be flushable\n", __LINE__, bitmap); + DBUG_PRINT("info", ("waiting for bitmap %p to be flushable", bitmap)); + { + struct timeval now; + struct timespec timeout; + struct timezone tz; + int rc; + gettimeofday(&now, &tz); + timeout.tv_sec= now.tv_sec + GBDELAY; + timeout.tv_nsec= now.tv_usec * 1000; + rc= pthread_cond_timedwait(&bitmap->bitmap_cond, &bitmap->bitmap_lock, &timeout); + if (rc == ETIMEDOUT || rc == ETIME) + { + DBUG_PRINT("info",("waited too long dying")); + DBUG_ASSERT(0); + } + } +// fprintf(stderr,"%u ended waiting for bitmap %p to be flushable\n", +// __LINE__, bitmap); } #endif + bitmap->flush_all_requested--; /* Bitmap is in a flushable state: its contents in memory are reflected by log records (complete REDO-UNDO groups) and all bitmap pages are unpinned. We keep the mutex to preserve this situation, and flush to the file. */ - res= write_changed_bitmap(share, bitmap); - bitmap->changed= FALSE; + if (bitmap->changed) + { + res= write_changed_bitmap(share, bitmap); + bitmap->changed= FALSE; + } /* We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap pages have been flushed. That's a condition of correctness of @@ -404,12 +434,12 @@ &bitmap->pages_covered) & PCFLUSH_PINNED_AND_ERROR) res= TRUE; - bitmap->flush_all_requested= FALSE; /* Some well-behaved threads may be waiting for flush_all_requested to become false, wake them up. */ - DBUG_PRINT("info", ("bitmap flusher waking up others")); + DBUG_PRINT("info", ("bitmap %p flusher waking up others", bitmap)); +// fprintf(stderr,"%u bitmap %p waking up\n", __LINE__, bitmap); pthread_cond_broadcast(&bitmap->bitmap_cond); } pthread_mutex_unlock(&bitmap->bitmap_lock); @@ -2139,8 +2169,8 @@ function first waits for the flush to be done. @note - info->non_flushable_state is set to 1 if we have incremented - bitmap->info->non_flushable and not yet decremented it. + this sets info->non_flushable_state to 1 if we have incremented + bitmap->non_flushable and not yet decremented it. @param share Table's share @param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable @@ -2151,20 +2181,21 @@ { MARIA_SHARE *share= info->s; MARIA_FILE_BITMAP *bitmap; + DBUG_ENTER("_ma_bitmap_flushable"); /* Not transactional tables are never automaticly flushed and needs no protection */ if (!share->now_transactional) - return; + DBUG_VOID_RETURN; bitmap= &share->bitmap; if (non_flushable_inc == -1) { pthread_mutex_lock(&bitmap->bitmap_lock); - DBUG_ASSERT((int) bitmap->non_flushable > 0 && - info->non_flushable_state == 1); + DBUG_ASSERT((int) bitmap->non_flushable > 0); + DBUG_ASSERT(info->non_flushable_state == 1); info->non_flushable_state= 0; if (--bitmap->non_flushable == 0) { @@ -2172,20 +2203,27 @@ We unlock and unpin pages locked and pinned by other threads. It does not seem to be an issue as all bitmap changes are serialized with the bitmap's mutex. + It is in debug builds, see the assertion in pagecache in remove_pin(): + it checks that unpinned is one of the pinners. */ _ma_bitmap_unpin_all(share); if (unlikely(bitmap->flush_all_requested)) { - DBUG_PRINT("info", ("bitmap flushable waking up flusher")); + DBUG_PRINT("info", ("bitmap %p flushable waking up flusher", bitmap)); +// fprintf(stderr,"%u bitmap %p waking up\n", __LINE__, bitmap); pthread_cond_broadcast(&bitmap->bitmap_cond); } } - DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); +// fprintf(stderr,"%u bitmap %p bitmap->non_flushable: %u\n", __LINE__, bitmap, bitmap->non_flushable); + DBUG_PRINT("info", ("bitmap %p bitmap->non_flushable: %u", bitmap, bitmap->non_flushable)); pthread_mutex_unlock(&bitmap->bitmap_lock); - return; + DBUG_VOID_RETURN; } - DBUG_ASSERT(non_flushable_inc == 1 && info->non_flushable_state == 0); + DBUG_ASSERT(non_flushable_inc == 1); + DBUG_ASSERT(info->non_flushable_state == 0); /* It is a read without mutex because only an optimization */ + // hangs in the cond wait sometimes +#if 1 if (unlikely(bitmap->flush_all_requested)) { /* @@ -2202,18 +2240,37 @@ pthread_mutex_lock(&bitmap->bitmap_lock); while (bitmap->flush_all_requested) { - DBUG_PRINT("info", ("waiting for bitmap flusher")); - pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock); +// fprintf(stderr,"%u waiting for bitmap %p flusher\n", __LINE__, bitmap); + DBUG_PRINT("info", ("waiting for bitmap %p flusher", bitmap)); + { + struct timeval now; + struct timespec timeout; + struct timezone tz; + int rc; + gettimeofday(&now, &tz); + timeout.tv_sec= now.tv_sec + GBDELAY; + timeout.tv_nsec= now.tv_usec * 1000; + rc= pthread_cond_timedwait(&bitmap->bitmap_cond, &bitmap->bitmap_lock, &timeout); + if (rc == ETIMEDOUT || rc == ETIME) + { + DBUG_PRINT("info",("waited too long dying")); + DBUG_ASSERT(0); + } + } +// fprintf(stderr,"%u ended waiting for bitmap %p flusher\n", __LINE__, bitmap); } pthread_mutex_unlock(&bitmap->bitmap_lock); } +#endif /* Ok to set without mutex: we didn't touch the bitmap's content yet; when we - touch it we will take the mutex. + touch it we will take the mutex.?? */ bitmap->non_flushable++; info->non_flushable_state= 1; - DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); +// fprintf(stderr,"%u bitmap %p bitmap->non_flushable: %u\n", __LINE__, bitmap, bitmap->non_flushable); + DBUG_PRINT("info", ("bitmap %p bitmap->non_flushable: %u", bitmap, bitmap->non_flushable)); + DBUG_VOID_RETURN; } @@ -2321,22 +2378,24 @@ goto err; } - if (info->s->now_transactional) + // this repeats ma_bitmap_flushable(-1) except that it has mutex already + if (info->non_flushable_state) { - DBUG_ASSERT((int) bitmap->non_flushable >= 0 && - info->non_flushable_state); + DBUG_ASSERT((int) bitmap->non_flushable >= 0); info->non_flushable_state= 0; if (--bitmap->non_flushable == 0) { _ma_bitmap_unpin_all(info->s); if (unlikely(bitmap->flush_all_requested)) { - DBUG_PRINT("info", ("bitmap flushable waking up flusher")); + DBUG_PRINT("info", ("bitmap %p flushable waking up flusher", bitmap)); +// fprintf(stderr,"%u bitmap %p waking up\n", __LINE__, bitmap); pthread_cond_broadcast(&bitmap->bitmap_cond); } } } - DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); +// fprintf(stderr,"%u bitmap %p bitmap->non_flushable: %u\n", __LINE__, bitmap, bitmap->non_flushable); + DBUG_PRINT("info", ("bitmap %p bitmap->non_flushable: %u", bitmap, bitmap->non_flushable)); pthread_mutex_unlock(&bitmap->bitmap_lock); DBUG_RETURN(0); === modified file 'storage/maria/ma_blockrec.c' --- storage/maria/ma_blockrec.c 2008-08-28 18:52:23 +0000 +++ storage/maria/ma_blockrec.c 2008-09-25 12:28:41 +0000 @@ -5141,7 +5141,9 @@ if (end_of_data > info->scan.dir_end || offset < PAGE_HEADER_SIZE || length < share->base.min_block_length) { - DBUG_ASSERT(0); + DBUG_ASSERT(!(end_of_data > info->scan.dir_end)); + DBUG_ASSERT(!(offset < PAGE_HEADER_SIZE)); + DBUG_ASSERT(!(length < share->base.min_block_length)); goto err; } #endif === modified file 'storage/maria/ma_checkpoint.c' --- storage/maria/ma_checkpoint.c 2008-08-28 18:52:23 +0000 +++ storage/maria/ma_checkpoint.c 2008-09-18 15:17:42 +0000 @@ -160,6 +160,7 @@ DBUG_PRINT("enter", ("level: %d", checkpoint_in_progress)); bzero(&record_pieces, sizeof(record_pieces)); + ulonglong start_time=my_getsystime(); /* STEP 1: record current end-of-log position using log's lock. It is critical for the correctness of Checkpoint (related to memory visibility @@ -296,6 +297,7 @@ checkpoints_total++; checkpoints_ok_total+= !error; pthread_mutex_unlock(&LOCK_checkpoint); + fprintf(stderr, "checkpoint took %lld\n", my_getsystime()-start_time); DBUG_RETURN(error); } === modified file 'storage/maria/ma_close.c' --- storage/maria/ma_close.c 2008-08-26 12:34:57 +0000 +++ storage/maria/ma_close.c 2008-09-25 12:19:50 +0000 @@ -107,7 +107,7 @@ File must be synced as it is going out of the maria_open_list and so becoming unknown to future Checkpoints. */ - if (!share->temporary && my_sync(share->kfile.file, MYF(MY_WME))) + if (share->now_transactional && my_sync(share->kfile.file, MYF(MY_WME))) error= my_errno; if (my_close(share->kfile.file, MYF(0))) error= my_errno; === modified file 'storage/maria/ma_pagecache.c' --- storage/maria/ma_pagecache.c 2008-08-25 18:26:50 +0000 +++ storage/maria/ma_pagecache.c 2008-09-25 13:12:32 +0000 @@ -2162,9 +2162,12 @@ #ifndef DBUG_OFF { PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var); + if (info) + { DBUG_ASSERT(info != 0); info_unlink(info); my_free((uchar*) info, MYF(0)); + } } #endif DBUG_VOID_RETURN; @@ -2184,9 +2187,12 @@ PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, my_thread_var); + if (info) + { DBUG_ASSERT(info != 0); info_unlink((PAGECACHE_PIN_INFO *)info); my_free((uchar*)info, MYF(0)); + } } static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { === modified file 'storage/maria/maria_def.h' --- storage/maria/maria_def.h 2008-08-28 18:52:23 +0000 +++ storage/maria/maria_def.h 2008-09-18 11:42:35 +0000 @@ -242,7 +242,7 @@ pgcache_page_no_t page; /* Page number for current bitmap */ uint used_size; /* Size of bitmap head that is not 0 */ my_bool changed; /* 1 if page needs to be flushed */ - my_bool flush_all_requested; /**< If _ma_bitmap_flush_all waiting */ + uint flush_all_requested; /**< count waiters on bitmap flush */ uint non_flushable; /**< 0 if bitmap and log are in sync */ PAGECACHE_FILE file; /* datafile where bitmap is stored */