Bug #99190 | my_tosort_unicode: unnecessary max_char check while utf8-mb3 processing | ||
---|---|---|---|
Submitted: | 6 Apr 2020 11:51 | Modified: | 6 Apr 2020 13:23 |
Reporter: | Georgy Kirichenko | Email Updates: | |
Status: | Verified | Impact on me: | |
Category: | MySQL Server: Charsets | Severity: | S5 (Performance) |
Version: | 8.0 | OS: | Any |
Assigned to: | CPU Architecture: | Any |
[6 Apr 2020 11:51]
Georgy Kirichenko
[6 Apr 2020 11:57]
Georgy Kirichenko
diff --git a/strings/ctype-utf8.cc b/strings/ctype-utf8.cc index 37bae420ad3..13c7f291331 100644 --- a/strings/ctype-utf8.cc +++ b/strings/ctype-utf8.cc @@ -5002,13 +5002,25 @@ const MY_UNICASE_CHARACTER *my_unicase_pages_unicode520[4352] = { MY_UNICASE_INFO my_unicase_unicode520 = {0x10FFFF, my_unicase_pages_unicode520}; -static inline void my_tosort_unicode(const MY_UNICASE_INFO *uni_plane, - my_wc_t *wc, uint flags) { +static inline void +my_tosort_unicode_convert(const MY_UNICASE_INFO *uni_plane, my_wc_t *wc, + uint flags) +{ + const MY_UNICASE_CHARACTER *page; + + DBUG_ASSERT(*wc <= uni_plane->maxchar); + + if ((page= uni_plane->page[*wc >> 8])) + *wc= (flags & MY_CS_LOWER_SORT) ? + page[*wc & 0xFF].tolower : + page[*wc & 0xFF].sort; +} + +static inline void +my_tosort_unicode(const MY_UNICASE_INFO *uni_plane, my_wc_t *wc, uint flags) +{ if (*wc <= uni_plane->maxchar) { - const MY_UNICASE_CHARACTER *page; - if ((page = uni_plane->page[*wc >> 8])) - *wc = (flags & MY_CS_LOWER_SORT) ? page[*wc & 0xFF].tolower - : page[*wc & 0xFF].sort; + my_tosort_unicode_convert(uni_plane, wc, flags); } else { *wc = MY_CS_REPLACEMENT_CHARACTER; } @@ -5238,7 +5250,10 @@ static size_t my_strxfrm_pad_unicode(uchar *str, uchar *strend) { one wide character). This is inlined because the call overhead of mb_wc() would otherwise be quite large. */ -template <class Mb_wc> + +typedef void (*tosort_unicode_t)(const MY_UNICASE_INFO *, my_wc_t *, uint); + +template <tosort_unicode_t tosort_unicode, class Mb_wc> static inline size_t my_strnxfrm_unicode_tmpl(const CHARSET_INFO *cs, Mb_wc mb_wc, uchar *dst, size_t dstlen, uint nweights, @@ -5286,7 +5301,7 @@ static inline size_t my_strnxfrm_unicode_tmpl(const CHARSET_INFO *cs, goto pad; src += res; - my_tosort_unicode(uni_plane, &wc, cs->state); + tosort_unicode(uni_plane, &wc, cs->state); dst = store16be(dst, wc); } @@ -5296,7 +5311,7 @@ static inline size_t my_strnxfrm_unicode_tmpl(const CHARSET_INFO *cs, my_wc_t wc; int res = mb_wc(&wc, src, se); if (res > 0) { - my_tosort_unicode(uni_plane, &wc, cs->state); + tosort_unicode(uni_plane, &wc, cs->state); src += res; *dst++ = (uchar)(wc >> 8); } @@ -5327,14 +5342,14 @@ size_t my_strnxfrm_unicode(const CHARSET_INFO *cs, uchar *dst, size_t dstlen, // my_mb_wc_utf8 is so common that we special-case it; short-circuit away // the thunk, and get it inlined. if (cs->cset->mb_wc == my_mb_wc_utf8_thunk) { - return my_strnxfrm_unicode_tmpl(cs, Mb_wc_utf8(), dst, dstlen, nweights, - src, srclen, flags); + return my_strnxfrm_unicode_tmpl<my_tosort_unicode_convert>(cs, Mb_wc_utf8(), + dst, dstlen, nweights, src, srclen, flags); } else { // Fallback using a function pointer (which the compiler is unlikely // to be able to optimize away). Mb_wc_through_function_pointer mb_wc(cs); - return my_strnxfrm_unicode_tmpl(cs, mb_wc, dst, dstlen, nweights, src, - srclen, flags); + return my_strnxfrm_unicode_tmpl<my_tosort_unicode>(cs, mb_wc, dst, dstlen, + nweights, src, srclen, flags); } } @@ -5696,8 +5711,8 @@ static int my_strnncoll_utf8(const CHARSET_INFO *cs, const uchar *s, return bincmp(s, se, t, te); } - my_tosort_unicode(uni_plane, &s_wc, cs->state); - my_tosort_unicode(uni_plane, &t_wc, cs->state); + my_tosort_unicode_convert(uni_plane, &s_wc, cs->state); + my_tosort_unicode_convert(uni_plane, &t_wc, cs->state); if (s_wc != t_wc) { return s_wc > t_wc ? 1 : -1; @@ -5752,8 +5767,8 @@ static int my_strnncollsp_utf8(const CHARSET_INFO *cs, const uchar *s, return bincmp(s, se, t, te); } - my_tosort_unicode(uni_plane, &s_wc, cs->state); - my_tosort_unicode(uni_plane, &t_wc, cs->state); + my_tosort_unicode_convert(uni_plane, &s_wc, cs->state); + my_tosort_unicode_convert(uni_plane, &t_wc, cs->state); if (s_wc != t_wc) { return s_wc > t_wc ? 1 : -1;
[6 Apr 2020 13:23]
MySQL Verification Team
Hi Mr. Kirichenko, Thank you for your performance bug report. I found out that your patch improves performance, although not much. Still, any improvement is welcome. Thank you very much for your contribution. Verified as reported.