Description:
SUBSTRING_INDEX is documented (and, from the code comments, clearly intended) to return the whole string if its third argument is out of range (too positive or too negative). However, due to a programming error, its val_str() method actually truncates a BIGINT parameter to 32 bits before comparing it against 0.
The fix is pretty easy; just postpone the truncation until after you've compared against 0, and if the truncation had any effect, return the original string --- on the reasonable assumption that no string will contain more than 2 billion matches.
How to repeat:
CREATE TABLE t (i BIGINT);
INSERT INTO t VALUES (0);
INSERT INTO t VALUES (-1);
INSERT INTO t VALUES (-2);
INSERT INTO t VALUES (-3);
INSERT INTO t VALUES (-4);
INSERT INTO t VALUES (-9223372036854775803);
INSERT INTO t VALUES (-9223372036854775804);
INSERT INTO t VALUES (-9223372036854775805);
INSERT INTO t VALUES (-9223372036854775806);
INSERT INTO t VALUES (-9223372036854775807);
INSERT INTO t VALUES (-9223372036854775808);
SELECT i, SUBSTRING_INDEX("wAxAyAz", "A", i) FROM t;
+----------------------+------------------------------------+
| i | SUBSTRING_INDEX("wAxAyAz", "A", i) |
+----------------------+------------------------------------+
| 0 | |
| -1 | z |
| -2 | yAz |
| -3 | xAyAz |
| -4 | wAxAyAz |
| -9223372036854775803 | wAxAyAz |
| -9223372036854775804 | wAxAyAz |
| -9223372036854775805 | wAxAy |
| -9223372036854775806 | wAx |
| -9223372036854775807 | w |
| -9223372036854775808 | |
+----------------------+------------------------------------+
Suggested fix:
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -1509,7 +1509,7 @@ String *Item_func_substr_index::val_str(String *str)
DBUG_ASSERT(fixed == 1);
String *res= args[0]->val_str(str);
String *delimiter= args[1]->val_str(&tmp_value);
- int32 count= (int32) args[2]->val_int();
+ longlong count= args[2]->val_int();
uint offset;
if (args[0]->null_value || args[1]->null_value || args[2]->null_value)
@@ -1524,6 +1524,11 @@ String *Item_func_substr_index::val_str(String *str)
res->set_charset(collation.collation);
+ if (count < INT_MIN || count > INT_MAX)
+ {
+ return res;
+ }
+
#ifdef USE_MB
if (use_mb(res->charset()))
{