diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 064c533fb21..3d36c9c1d8b 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1551,11 +1551,20 @@ String *Item_str_conv::val_str(String *str) { } else res = copy_if_not_alloced(str, res, res->length()); + orig_res.copy(res->ptr(), res->length(), res->charset()); len = converter(collation.collation, res->ptr(), res->length(), res->ptr(), res->length()); + + if (len == 0) { + ++multiply; + res->swap(orig_res); + goto multiplyx; + } + assert(len <= res->length()); res->length(len); } else { +multiplyx: size_t len = res->length() * multiply; tmp_value.alloc(len); tmp_value.set_charset(collation.collation); diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 5d608b04ed6..fa3fdecb933 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -480,6 +480,7 @@ class Item_str_conv : public Item_str_func { uint multiply; my_charset_conv_case converter; String tmp_value; + String orig_res; public: Item_str_conv(const POS &pos, Item *item) : Item_str_func(pos, item) {} diff --git a/strings/ctype-utf8.cc b/strings/ctype-utf8.cc index 2745cdd4f47..a2d412cfe69 100644 --- a/strings/ctype-utf8.cc +++ b/strings/ctype-utf8.cc @@ -7306,16 +7306,24 @@ static size_t my_caseup_utf8mb4(const CHARSET_INFO *cs, char *src, char *srcend = src + srclen, *dstend = dst + dstlen, *dst0 = dst; const MY_UNICASE_INFO *uni_plane = cs->caseinfo; assert(src != dst || cs->caseup_multiply == 1); + bool inplace = (src == dst); while ((src < srcend) && (srcres = my_mb_wc_utf8mb4(&wc, pointer_cast(src), pointer_cast(srcend))) > 0) { my_toupper_utf8mb4(uni_plane, &wc); - if ((dstres = my_wc_mb_utf8mb4(cs, wc, pointer_cast(dst), + if (((dstres = my_wc_mb_utf8mb4(cs, wc, pointer_cast(dst), pointer_cast(dstend))) <= 0) + && !inplace) break; - src += srcres; - dst += dstres; + if (srcres == dstres || !inplace) { + src += srcres; + dst += dstres; + } else { + // srcres have to equal to dstres in in-place case conversion(multiply == 1), if not try bigger buffer. + // Not enough space to do in-place case conversion, which means some unicode character's bytes changed in utf8mb4. + return 0; + } } return (size_t)(dst - dst0); } @@ -7396,16 +7404,22 @@ static size_t my_casedn_utf8mb4(const CHARSET_INFO *cs, char *src, char *srcend = src + srclen, *dstend = dst + dstlen, *dst0 = dst; const MY_UNICASE_INFO *uni_plane = cs->caseinfo; assert(src != dst || cs->casedn_multiply == 1); + bool inplace = (src == dst); while ((src < srcend) && (srcres = my_mb_wc_utf8mb4(&wc, pointer_cast(src), pointer_cast(srcend))) > 0) { my_tolower_utf8mb4(uni_plane, &wc); - if ((dstres = my_wc_mb_utf8mb4(cs, wc, pointer_cast(dst), + if (((dstres = my_wc_mb_utf8mb4(cs, wc, pointer_cast(dst), pointer_cast(dstend))) <= 0) + && !inplace) break; - src += srcres; - dst += dstres; + if (srcres == dstres || !inplace) { + src += srcres; + dst += dstres; + } else { + return 0; + } } return (size_t)(dst - dst0); }