From fc397a87b70a21bafc2e219c2a97e2dd243cf1ab Mon Sep 17 00:00:00 2001 From: Olernov Date: Thu, 18 May 2017 18:42:05 +0300 Subject: [PATCH] Bug #85588: When using rlike operator to detect characters with hex values between 80 and FF, it does not work when converting a binary string that starts with hexadecimal '00' using unhex function. --- client/mysqltest.cc | 6 +++--- mysql-test/r/func_regexp.result | 3 +++ mysql-test/t/func_regexp.test | 10 ++++++++++ regex/engine.c | 5 +++-- regex/engine.ih | 4 ++-- regex/main.c | 4 ++-- regex/my_regex.h | 2 +- regex/regexec.c | 7 ++++--- sql/item_cmpfunc.cc | 2 +- 9 files changed, 29 insertions(+), 14 deletions(-) diff --git a/client/mysqltest.cc b/client/mysqltest.cc index 1ac2ea3f395..ef5c261963e 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -8325,7 +8325,7 @@ int match_re(my_regex_t *re, char *str) str= comm_end + 2; } - int err= my_regexec(re, str, (size_t)0, NULL, 0); + int err= my_regexec(re, str, strlen(str), (size_t)0, NULL, 0); if (err == 0) return 1; @@ -9827,8 +9827,8 @@ int reg_replace(char** buf_p, int* buf_len_p, char *pattern, while (!err_code) { /* find the match */ - err_code= my_regexec(&r,str_p, r.re_nsub+1, subs, - (str_p == string) ? MY_REG_NOTBOL : 0); + err_code= my_regexec(&r,str_p, strlen(str_p), r.re_nsub+1, subs, + (str_p != string) ? MY_REG_NOTBOL : 0); /* if regular expression error (eg. bad syntax, or out of memory) */ if (err_code && err_code != MY_REG_NOMATCH) diff --git a/mysql-test/r/func_regexp.result b/mysql-test/r/func_regexp.result index 5045e36f9a5..4e0a1f7cf23 100644 --- a/mysql-test/r/func_regexp.result +++ b/mysql-test/r/func_regexp.result @@ -157,3 +157,6 @@ SELECT ' ' REGEXP '[[:space:]]'; SELECT '\t' REGEXP '[[:space:]]'; '\t' REGEXP '[[:space:]]' 1 +SELECT (CONVERT(UNHEX('00149D5554') USING BINARY) RLIKE CONCAT('[', UNHEX('80'), '-', UNHEX('FF'), ']')); +(CONVERT(UNHEX('00149D5554') USING BINARY) RLIKE CONCAT('[', UNHEX('80'), '-', UNHEX('FF'), ']')) +1 diff --git a/mysql-test/t/func_regexp.test b/mysql-test/t/func_regexp.test index 5b7aa299a52..f5cdf1633b5 100644 --- a/mysql-test/t/func_regexp.test +++ b/mysql-test/t/func_regexp.test @@ -106,3 +106,13 @@ SELECT '\t' REGEXP '[[:blank:]]'; SELECT ' ' REGEXP '[[:space:]]'; SELECT '\t' REGEXP '[[:space:]]'; + +# +# Bug #85588: When using rlike operator to detect characters with hex values +# between 80 and FF, it does not work when converting a binary string that +# starts with hexadecimal '00' using unhex function. +# This test verifies that binary string having hexadecimal 00 byte is +# processed correctly. + +SELECT (CONVERT(UNHEX('00149D5554') USING BINARY) RLIKE CONCAT('[', UNHEX('80'), '-', UNHEX('FF'), ']')); + diff --git a/regex/engine.c b/regex/engine.c index 5e6fed23495..0d83ee71a86 100644 --- a/regex/engine.c +++ b/regex/engine.c @@ -63,10 +63,11 @@ struct match { == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, MY_REG_NOMATCH failure */ -matcher(charset,g, str, nmatch, pmatch, eflags) +matcher(charset,g, str, strlength, nmatch, pmatch, eflags) const CHARSET_INFO *charset; register struct re_guts *g; char *str; +size_t strlength; size_t nmatch; my_regmatch_t pmatch[]; int eflags; @@ -89,7 +90,7 @@ int eflags; stop = str + pmatch[0].rm_eo; } else { start = str; - stop = start + strlen(start); + stop = start + strlength; } if (stop < start) return(MY_REG_INVARG); diff --git a/regex/engine.ih b/regex/engine.ih index 993c725c952..181c16722e6 100644 --- a/regex/engine.ih +++ b/regex/engine.ih @@ -4,8 +4,8 @@ extern "C" { #endif /* === engine.c === */ -static int matcher(const CHARSET_INFO *charset,register struct re_guts *g, - char *string, size_t nmatch, my_regmatch_t pmatch[], +static int matcher(const CHARSET_INFO *charset,struct re_guts *g, + char *string, size_t strlength, size_t nmatch, my_regmatch_t pmatch[], int eflags); static char *dissect(const CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst); diff --git a/regex/main.c b/regex/main.c index 8eb11fbeccc..12d3d31dcab 100644 --- a/regex/main.c +++ b/regex/main.c @@ -149,7 +149,7 @@ char *argv[]; subs[0].rm_so = startoff; subs[0].rm_eo = strlen(argv[optind]) - endoff; } - err = my_regexec(&re, argv[optind], (size_t)NS, subs, eopts); + err = my_regexec(&re, argv[optind], strlen(argv[optind]), (size_t)NS, subs, eopts); if (err) { len = my_regerror(err, &re, erbuf, sizeof(erbuf)); fprintf(stderr, "error %s, %d/%d `%s'\n", @@ -329,7 +329,7 @@ int opts; /* may not match f1 */ subs[0].rm_so = strchr(f2, '(') - f2 + 1; subs[0].rm_eo = strchr(f2, ')') - f2; } - err = my_regexec(&re, f2copy, NSUBS, subs, options('e', f1)); + err = my_regexec(&re, f2copy, strlen(f2copy), NSUBS, subs, options('e', f1)); if (err != 0 && (f3 != NULL || err != MY_REG_NOMATCH)) { /* unexpected error or wrong error */ diff --git a/regex/my_regex.h b/regex/my_regex.h index c0282844cf2..c0bb52513e5 100644 --- a/regex/my_regex.h +++ b/regex/my_regex.h @@ -66,7 +66,7 @@ extern size_t my_regerror(int, const my_regex_t *, char *, size_t); /* === regexec.c === */ -extern int my_regexec(const my_regex_t *, const char *, size_t, my_regmatch_t [], int); +extern int my_regexec(const my_regex_t *, const char *, size_t, size_t, my_regmatch_t [], int); #define MY_REG_NOTBOL 00001 #define MY_REG_NOTEOL 00002 #define MY_REG_STARTEND 00004 diff --git a/regex/regexec.c b/regex/regexec.c index a25fffe3838..3fc59ea0f76 100644 --- a/regex/regexec.c +++ b/regex/regexec.c @@ -110,9 +110,10 @@ * have been prototyped. */ int /* 0 success, MY_REG_NOMATCH failure */ -my_regexec(preg, str, nmatch, pmatch, eflags) +my_regexec(preg, str, strlength, nmatch, pmatch, eflags) const my_regex_t *preg; const char *str; +size_t strlength; size_t nmatch; my_regmatch_t pmatch[]; int eflags; @@ -134,7 +135,7 @@ int eflags; if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&MY_REG_LARGE)) - return(smatcher(preg->charset, g, pstr, nmatch, pmatch, eflags)); + return(smatcher(preg->charset, g, pstr, strlength, nmatch, pmatch, eflags)); else - return(lmatcher(preg->charset, g, pstr, nmatch, pmatch, eflags)); + return(lmatcher(preg->charset, g, pstr, strlength, nmatch, pmatch, eflags)); } diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index b854729fcfe..5fba2e77808 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5577,7 +5577,7 @@ longlong Item_func_regex::val_int() } res= &conv; } - return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1; + return my_regexec(&preg, res->c_ptr_safe(), res->length(), 0, (my_regmatch_t*) 0, 0) ? 0 : 1; }