diff --git a/mysql-test/r/loaddata.result b/mysql-test/r/loaddata.result index ef4d45849e1..625db3f2f15 100644 --- a/mysql-test/r/loaddata.result +++ b/mysql-test/r/loaddata.result @@ -532,3 +532,24 @@ FIELDS TERMINATED BY 't' LINES TERMINATED BY ''; Got one of the listed errors SET @@sql_mode= @old_mode; DROP TABLE t1; +# +# Bug #25147988: LOAD DATA INFILE FAILS WITH AN ESCAPE CHARACTER +# FOLLOWED BY A MULTI-BYTE ONE +# +CREATE TABLE t1(a VARCHAR(20)) CHARSET utf8mb4; +LOAD DATA INFILE '../../std_data/loaddata_utf8.dat' INTO TABLE t1 CHARACTER SET utf8mb4; +SELECT HEX(a) FROM t1; +HEX(a) +E4B880E4BA8CE4B889 +E59B9BE4BA94E585AD +E4B883E585ABE4B99D +E4B880E4BA8CE4B889 +E59B9BE4BA94E585AD +E4B883E585ABE4B99D0A +DROP TABLE t1; +CREATE TABLE t1(a VARCHAR(20)) CHARSET gbk; +LOAD DATA INFILE '../../std_data/loaddata7.dat' INTO TABLE t1 CHARACTER SET gbk; +SELECT HEX(a) FROM t1; +HEX(a) +815C825C +DROP TABLE t1; diff --git a/mysql-test/std_data/loaddata7.dat b/mysql-test/std_data/loaddata7.dat new file mode 100644 index 00000000000..20408bcbf3c --- /dev/null +++ b/mysql-test/std_data/loaddata7.dat @@ -0,0 +1 @@ +\\\\ diff --git a/mysql-test/std_data/loaddata_utf8.dat b/mysql-test/std_data/loaddata_utf8.dat index fc7a28229d4..ce8c668dc87 100644 --- a/mysql-test/std_data/loaddata_utf8.dat +++ b/mysql-test/std_data/loaddata_utf8.dat @@ -1,3 +1,6 @@ 一二三 四五六 七八九 +\一二三 +四\五六 +七八九\ diff --git a/mysql-test/suite/rpl/r/rpl_loaddata_charset.result b/mysql-test/suite/rpl/r/rpl_loaddata_charset.result index 07c0b743e51..89070bd20ca 100644 --- a/mysql-test/suite/rpl/r/rpl_loaddata_charset.result +++ b/mysql-test/suite/rpl/r/rpl_loaddata_charset.result @@ -49,6 +49,9 @@ hex(cl) E4B880E4BA8CE4B889 E59B9BE4BA94E585AD E4B883E585ABE4B99D +E4B880E4BA8CE4B889 +E59B9BE4BA94E585AD +E4B883E585ABE4B99D0A ----------content on slave---------- USE mysqltest; SELECT hex(cl) FROM t; @@ -56,6 +59,9 @@ hex(cl) E4B880E4BA8CE4B889 E59B9BE4BA94E585AD E4B883E585ABE4B99D +E4B880E4BA8CE4B889 +E59B9BE4BA94E585AD +E4B883E585ABE4B99D0A DROP DATABASE mysqltest; DROP DATABASE IF EXISTS mysqltest; CREATE DATABASE mysqltest CHARSET UTF8; @@ -69,6 +75,9 @@ hex(cl) E4B880E4BA8CE4B889 E59B9BE4BA94E585AD E4B883E585ABE4B99D +E4B880E4BA8CE4B889 +E59B9BE4BA94E585AD +E4B883E585ABE4B99D0A ----------content on slave---------- USE mysqltest; SELECT hex(cl) FROM t; @@ -76,5 +85,8 @@ hex(cl) E4B880E4BA8CE4B889 E59B9BE4BA94E585AD E4B883E585ABE4B99D +E4B880E4BA8CE4B889 +E59B9BE4BA94E585AD +E4B883E585ABE4B99D0A DROP DATABASE mysqltest; include/rpl_end.inc diff --git a/mysql-test/t/loaddata.test b/mysql-test/t/loaddata.test index 9006e277dc0..58b954ad1ae 100644 --- a/mysql-test/t/loaddata.test +++ b/mysql-test/t/loaddata.test @@ -657,3 +657,18 @@ SET @@sql_mode= @old_mode; --remove_file $MYSQLTEST_VARDIR/mysql DROP TABLE t1; + +--echo # +--echo # Bug #25147988: LOAD DATA INFILE FAILS WITH AN ESCAPE CHARACTER +--echo # FOLLOWED BY A MULTI-BYTE ONE +--echo # +# Test escape mark followed by multibyte character +CREATE TABLE t1(a VARCHAR(20)) CHARSET utf8mb4; +LOAD DATA INFILE '../../std_data/loaddata_utf8.dat' INTO TABLE t1 CHARACTER SET utf8mb4; +SELECT HEX(a) FROM t1; +DROP TABLE t1; +# Test multibyte character whose second byte is 0x5C +CREATE TABLE t1(a VARCHAR(20)) CHARSET gbk; +LOAD DATA INFILE '../../std_data/loaddata7.dat' INTO TABLE t1 CHARACTER SET gbk; +SELECT HEX(a) FROM t1; +DROP TABLE t1; diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 5b2affaddc1..1b5e24abad5 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -1545,6 +1545,7 @@ int READ_INFO::read_field() for (;;) { + bool escaped_mb= false; while ( to < end_of_buff) { chr = GET; @@ -1566,7 +1567,22 @@ int READ_INFO::read_field() */ if (escape_char != enclosed_char || chr == escape_char) { - *to++ = (uchar) unescape((char) chr); + uint ml= my_mbcharlen(read_charset, chr); + /* + For escaped multibyte character, push back the first byte, + and will handle it below. + Because multibyte character's second byte is possible to be + 0x5C, per Query_result_export::send_data, both head byte and + tail byte are escaped for such characters. So mark it if the + head byte is escaped and will handle it below. + */ + if (ml == 1) + *to++= (uchar) unescape((char) chr); + else + { + escaped_mb= true; + PUSH(chr); + } continue; } PUSH(chr); @@ -1635,7 +1651,10 @@ int READ_INFO::read_field() uint ml= my_mbcharlen(read_charset, chr); if (ml == 0) { - error= 1; + *to= '\0'; + my_error(ER_INVALID_CHARACTER_STRING, MYF(0), + read_charset->csname, buffer); + error= true; return 1; } @@ -1645,13 +1664,6 @@ int READ_INFO::read_field() uchar* p= to; *to++ = chr; - ml= my_mbcharlen(read_charset, chr); - if (ml == 0) - { - error= 1; - return 1; - } - for (uint i= 1; i < ml; i++) { chr= GET; @@ -1664,8 +1676,16 @@ int READ_INFO::read_field() to-= i; goto found_eof; } + else if (chr == escape_char && escaped_mb) + { + // Unescape the second byte if it is escaped. + chr= GET; + chr= (uchar) unescape((char) chr); + } *to++ = chr; } + if (escaped_mb) + escaped_mb= false; if (my_ismbchar(read_charset, (const char *)p, (const char *)to)) @@ -1675,6 +1695,12 @@ int READ_INFO::read_field() chr= GET; } #endif + else if (ml > 1) + { + // Buffer is too small, exit while loop, and reallocate. + PUSH(chr); + break; + } *to++ = (uchar) chr; } /* @@ -1682,7 +1708,7 @@ int READ_INFO::read_field() */ if (!(new_buffer=(uchar*) my_realloc((char*) buffer,buff_length+1+IO_SIZE, MYF(MY_WME)))) - return (error=1); + return (error=true); to=new_buffer + (to-buffer); buffer=new_buffer; buff_length+=IO_SIZE;