From b056295e74a2820d4e7c63e6307b91751fe95389 Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Thu, 8 Feb 2018 10:13:07 +0100 Subject: [PATCH] PS-314: issue with 65536+ threads and mdl locks MDL uses the LF_PINS structure for maintaining a global map, which had a 16 bit limitations. After reaching 65535 threads, new threads couldn't access the MDL structures until older threads disconnected. This limitation also caused assertions in the debug builds, stopping the server. This patch changes the 16 bit limitation of the LF_PINS structure to 32 bit. While this solves the problem, it does so with a high memory cost: as the LF_PINS structure uses the LF_DYNARRAY for storing its data, a 1GB array will be allocated when reaching 65536+256+1 threads. 99% of that memory will never be used, as MySQL has a connection limit of 100000, and this array would be enough for 4 billion. As servers with 65k+ connections will use more than 40GB of address space anyway, this shouldn't be an issue, especially as most of it will never be accessed.. --- include/lf.h | 12 ++++++------ mysys/lf_alloc-pin.c | 27 +++++++++++++-------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/include/lf.h b/include/lf.h index d6b73f3f69d..ffc10875f2d 100644 --- a/include/lf.h +++ b/include/lf.h @@ -59,19 +59,19 @@ typedef struct { lf_pinbox_free_func *free_func; void *free_func_arg; uint free_ptr_offset; - uint32 volatile pinstack_top_ver; /* this is a versioned pointer */ - uint32 volatile pins_in_array; /* number of elements in array */ + uint64 volatile pinstack_top_ver; /* this is a versioned pointer */ + uint64 volatile pins_in_array; /* number of elements in array */ } LF_PINBOX; typedef struct st_lf_pins { void * volatile pin[LF_PINBOX_PINS]; LF_PINBOX *pinbox; void *purgatory; - uint32 purgatory_count; - uint32 volatile link; + uint64 purgatory_count; + uint64 volatile link; /* we want sizeof(LF_PINS) to be 64 to avoid false sharing */ -#if SIZEOF_INT*2+SIZEOF_CHARP*(LF_PINBOX_PINS+2) != 64 - char pad[64-sizeof(uint32)*2-sizeof(void*)*(LF_PINBOX_PINS+2)]; +#if 2*8+SIZEOF_CHARP*(LF_PINBOX_PINS+2) != 64 + char pad[64-sizeof(uint64)*2-sizeof(void*)*(LF_PINBOX_PINS+2)]; #endif } LF_PINS; diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 4f5dbeda89d..d5fe7ef079c 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -92,13 +92,13 @@ Pins are given away from a "pinbox". Pinbox is stack-based allocator. It used dynarray for storing pins, new elements are allocated by dynarray as necessary, old are pushed in the stack for reuse. ABA is solved by - versioning a pointer - because we use an array, a pointer to pins is 16 bit, - upper 16 bits are used for a version. + versioning a pointer - because we use an array, a pointer to pins is 32 bit, + upper 32 bits are used for a version. */ #include "lf.h" #include "mysys_priv.h" /* key_memory_lf_node */ -#define LF_PINBOX_MAX_PINS 65536 +#define LF_PINBOX_MAX_PINS (65536ULL*65536ULL) static void lf_pinbox_real_free(LF_PINS *pins); @@ -136,15 +136,14 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox) */ LF_PINS *lf_pinbox_get_pins(LF_PINBOX *pinbox) { - uint32 pins, next, top_ver; + uint64 pins, next, top_ver; LF_PINS *el; /* - We have an array of max. 64k elements. The highest index currently allocated is pinbox->pins_in_array. Freed elements are in a lifo stack, pinstack_top_ver. - pinstack_top_ver is 32 bits; 16 low bits are the index in the - array, to the first element of the list. 16 high bits are a version - (every time the 16 low bits are updated, the 16 high bits are + pinstack_top_ver is 64 bits; 32 low bits are the index in the + array, to the first element of the list. 32 high bits are a version + (every time the 32 low bits are updated, the 32 high bits are incremented). Versioning prevents the ABA problem. */ top_ver= pinbox->pinstack_top_ver; @@ -153,7 +152,7 @@ LF_PINS *lf_pinbox_get_pins(LF_PINBOX *pinbox) if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) { /* the stack of free elements is empty */ - pins= my_atomic_add32((int32 volatile*) &pinbox->pins_in_array, 1)+1; + pins= my_atomic_add64((int64 volatile*) &pinbox->pins_in_array, 1)+1; if (unlikely(pins >= LF_PINBOX_MAX_PINS)) return 0; /* @@ -167,8 +166,8 @@ LF_PINS *lf_pinbox_get_pins(LF_PINBOX *pinbox) } el= (LF_PINS *)lf_dynarray_value(&pinbox->pinarray, pins); next= el->link; - } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, - (int32*) &top_ver, + } while (!my_atomic_cas64((int64 volatile*) &pinbox->pinstack_top_ver, + (int64*) &top_ver, top_ver-pins+next+LF_PINBOX_MAX_PINS)); /* set el->link to the index of el in the dynarray (el->link has two usages: @@ -191,7 +190,7 @@ LF_PINS *lf_pinbox_get_pins(LF_PINBOX *pinbox) void lf_pinbox_put_pins(LF_PINS *pins) { LF_PINBOX *pinbox= pins->pinbox; - uint32 top_ver, nr; + uint64 top_ver, nr; nr= pins->link; #ifndef DBUG_OFF @@ -221,8 +220,8 @@ void lf_pinbox_put_pins(LF_PINS *pins) do { pins->link= top_ver % LF_PINBOX_MAX_PINS; - } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, - (int32*) &top_ver, + } while (!my_atomic_cas64((int64 volatile*) &pinbox->pinstack_top_ver, + (int64*) &top_ver, top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); }