diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index b7224117a24..e194150002b 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1433,11 +1433,20 @@ String *Item_str_conv::val_str(String *str) { } else res = copy_if_not_alloced(str, res, res->length()); + orig_res.copy(res->ptr(), res->length(), res->charset()); len = converter(collation.collation, res->ptr(), res->length(), res->ptr(), res->length()); + + if (len == 0) { + ++multiply; + res->swap(orig_res); + goto multiplyx; + } + assert(len <= res->length()); res->length(len); } else { +multiplyx: size_t len = res->length() * multiply; tmp_value.alloc(len); tmp_value.set_charset(collation.collation); diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 86d2b265f46..3f2e7bf4914 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -411,6 +411,7 @@ class Item_str_conv : public Item_str_func { uint multiply; my_charset_conv_case converter; String tmp_value; + String orig_res; public: Item_str_conv(const POS &pos, Item *item) : Item_str_func(pos, item) {} diff --git a/strings/ctype-utf8.cc b/strings/ctype-utf8.cc index 35876bec091..4d37bbae65c 100644 --- a/strings/ctype-utf8.cc +++ b/strings/ctype-utf8.cc @@ -7302,14 +7302,21 @@ static size_t my_caseup_utf8mb4(const CHARSET_INFO *cs, char *src, char *srcend = src + srclen, *dstend = dst + dstlen, *dst0 = dst; const MY_UNICASE_INFO *uni_plane = cs->caseinfo; assert(src != dst || cs->caseup_multiply == 1); + bool inplace = (src == dst); while ((src < srcend) && (srcres = my_mb_wc_utf8mb4(&wc, (uchar *)src, (uchar *)srcend)) > 0) { my_toupper_utf8mb4(uni_plane, &wc); - if ((dstres = my_wc_mb_utf8mb4(cs, wc, (uchar *)dst, (uchar *)dstend)) <= 0) + if (((dstres = my_wc_mb_utf8mb4(cs, wc, (uchar *)dst, (uchar *)dstend)) <= 0) && !inplace) break; - src += srcres; - dst += dstres; + if (srcres == dstres || !inplace) { + src += srcres; + dst += dstres; + } else { + // srcres have to equal to dstres in in-place case conversion(multiply == 1), if not try bigger buffer. + // Not enough space to do in-place case conversion, which means some unicode character's bytes changed in utf8mb4. + return 0; + } } return (size_t)(dst - dst0); } @@ -7388,14 +7395,19 @@ static size_t my_casedn_utf8mb4(const CHARSET_INFO *cs, char *src, char *srcend = src + srclen, *dstend = dst + dstlen, *dst0 = dst; const MY_UNICASE_INFO *uni_plane = cs->caseinfo; assert(src != dst || cs->casedn_multiply == 1); + bool inplace = (src == dst); while ((src < srcend) && (srcres = my_mb_wc_utf8mb4(&wc, (uchar *)src, (uchar *)srcend)) > 0) { my_tolower_utf8mb4(uni_plane, &wc); - if ((dstres = my_wc_mb_utf8mb4(cs, wc, (uchar *)dst, (uchar *)dstend)) <= 0) + if (((dstres = my_wc_mb_utf8mb4(cs, wc, (uchar *)dst, (uchar *)dstend)) <= 0) && !inplace) break; - src += srcres; - dst += dstres; + if (srcres == dstres || !inplace) { + src += srcres; + dst += dstres; + } else { + return 0; + } } return (size_t)(dst - dst0); }