From 8811bc77bccec9e62def9e85fb8431715a872370 Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Fri, 9 Feb 2024 20:43:57 +0100 Subject: [PATCH] Fix offset check for insert position overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multibyte characters are used, the behavior for the insert function is incorrect. It inserts a character even if the given position is beyond the input string. See the following example: mysql> select insert('Å', 2, 1, 'a'); +--------------------------+ | insert('Å', 2, 1, 'a') | +--------------------------+ | Åa | +--------------------------+ 1 row in set (0.00 sec) According to the documentation and if compared to the behavior for single byte characters, this should not have inserted the lowercase 'a'. The reason for this is that the code first steps for byte lengths with the following check: if ((start < 1) || (start > orig_len)) return res; // Wrong param; skip insert In the case of a multibyte character, the start position won't be beyond the length since the byte length of the input string here is 3 bytes. This checks is a shortcut though and after this the correct offset is computed with charpos. One thing overlooked here though is that the following changes the meaning of the start value: --start; // Internal start from '0' It's no longer a value starting with 1, but now with 0. This means that we can't do the same start > orig_len because now start will be one less. Hence the check here after the multibyte aware position is calculated, should use start >= orig_len instead of start > orig_len. A test for this bug is also added. --- mysql-test/r/func_str.result | 7 +++++++ mysql-test/t/func_str.test | 6 ++++++ sql/item_strfunc.cc | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/func_str.result b/mysql-test/r/func_str.result index 72fdea0a3459..e36caf4ebc6c 100644 --- a/mysql-test/r/func_str.result +++ b/mysql-test/r/func_str.result @@ -5822,3 +5822,10 @@ SELECT QUOTE(x'80'); ERROR HY000: Cannot convert string '\x80' from binary to utf8mb4 SELECT QUOTE(_utf8mb4 x'80'); ERROR HY000: Invalid utf8mb4 character string: '80' +# +# Bug: The insert function does handle inserting beyond the string for multibyte characters +# +set names utf8mb4; +select insert('Å', 2, 1, 'a'); +insert('Å', 2, 1, 'a') +Å diff --git a/mysql-test/t/func_str.test b/mysql-test/t/func_str.test index 58c5944c44e6..fa64d4a0282d 100644 --- a/mysql-test/t/func_str.test +++ b/mysql-test/t/func_str.test @@ -2675,3 +2675,9 @@ SELECT QUOTE(NULL); SELECT QUOTE(x'80'); --error ER_INVALID_CHARACTER_STRING SELECT QUOTE(_utf8mb4 x'80'); + +--echo # +--echo # Bug: The insert function does handle inserting beyond the string for multibyte characters +--echo # +set names utf8mb4; +select insert('Å', 2, 1, 'a'); diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 289895b99b71..c1638f33716e 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1355,7 +1355,7 @@ String *Item_func_insert::val_str(String *str) { res->charpos(static_cast(length), static_cast(start)); /* Re-testing with corrected params */ - if (start > orig_len) + if (start >= orig_len) return res; /* purecov: inspected */ // Wrong param; skip insert if (length > orig_len - start) length = orig_len - start;